<p>如果您想直接查看数据工作,请参阅“从1开始”部分。作为一个比我回答得早的人,您应该使用reforme-wide(在python:pivot中),但在我的例子中,我将数据本身进行了透视,并将其放入新的变量中
(<code>new=pd.pivot(data,index=['id','level1','strate','level2'],values=['level3','value'],columns='no')</code>)</p>
<p>(*另外:在您输入整个示例数据后,我只需编辑一点代码.)</p>
<pre><code>#00.module
import pandas as pd
import numpy as np
# 0. data
d=[['', '1', 'Recruitement', '', 'Recruitement start date', 'Total', '2021-07-01'],
['', '1', 'Recruitement', '', 'Recruitement start date', 'Ivory Coast', '2021-07-01'],
['', '1', 'Recruitement', '', 'Recruitement start date', 'Madagascar', '2021-07-01'],
['', '1', 'Recruitement', '', 'Recruitement start date', 'South Africa', '2021-07-01'],
['', '2', 'Recruitement', '', 'Sex (Woman), %', 'Total', '47.0'],
['', '2', 'Recruitement', '', 'Sex (Woman), %', 'Ivory Coast', '40.0'],
['', '2', 'Recruitement', '', 'Sex (Woman), %', 'Madagascar', '40.0'],
['', '2', 'Recruitement', '', 'Sex (Woman), %', 'South Africa', '60.0'],
['5-Total', '3', 'Follow up', '', 'D7 visits: N performed', 'Total', '4'],
['5-Ivory Coast', '3', 'Follow up', '', 'D7 visits: N performed', 'Ivory Coast', '1'],
['5-Madagascar', '3', 'Follow up', '', 'D7 visits: N performed', 'Madagascar', '2'],
['5-South Africa', '3', 'Follow up', '', 'D7 visits: N performed', 'South Africa', '1'],
['5-Total', '4', 'Follow up', '', 'D7 visits: N expected', 'Total', '12'],
['5-Ivory Coast', '4', 'Follow up', '', 'D7 visits: N expected', 'Ivory Coast', '4'],
['5-Madagascar', '4', 'Follow up', '', 'D7 visits: N expected', 'Madagascar', '4'],
['5-South Africa', '4', 'Follow up', '', 'D7 visits: N expected', 'South Africa', '4'],
['', '6', 'Recruitement', '', 'Age, median', 'Total', '35.0'],
['', '6', 'Recruitement', '', 'Age, median', 'Ivory Coast', '35.0'],
['', '6', 'Recruitement', '', 'Age, median', 'Madagascar', '31.0'],
['', '6', 'Recruitement', '', 'Age, median', 'South Africa', '36.0'],
['', '7', 'Recruitement', '', 'Age, IQR', 'Total', '(26.5-48.5)'],
['', '7', 'Recruitement', '', 'Age, IQR', 'Ivory Coast', '(26.0-48.0)'],
['', '7', 'Recruitement', '', 'Age, IQR', 'Madagascar', '(26.0-49.0)'],
['', '7', 'Recruitement', '', 'Age, IQR', 'South Africa', '(28.0-40.0)'],
['', '8', 'Recruitement', '', 'Randomized all, N', 'Total', '15'],
['', '8', 'Recruitement', '', 'Randomized all, N', 'Ivory Coast', '5'],
['', '8', 'Recruitement', '', 'Randomized all, N', 'Madagascar', '5'],
['', '8', 'Recruitement', '', 'Randomized all, N', 'South Africa', '5'],
['15-Total', '9', 'Recruitement', '', 'Included severity stage 1, N', 'Total', '1'],
['15-South Africa', '9', 'Recruitement', '', 'Included severity stage 1, N', 'South Africa', '1'],
['16-Total', '10', 'Recruitement', '', 'Included severity stage 2, N', 'Total', '8'],
['16-Ivory Coast', '10', 'Recruitement', '', 'Included severity stage 2, N', 'Ivory Coast', '4'],
['16-Madagascar', '10', 'Recruitement', '', 'Included severity stage 2, N', 'Madagascar', '1'],
['16-South Africa', '10', 'Recruitement', '', 'Included severity stage 2, N', 'South Africa', '3'],
['17-Total', '11', 'Recruitement', '', 'Included severity stage 3, N', 'Total', '6'],
['17-Ivory Coast', '11', 'Recruitement', '', 'Included severity stage 3, N', 'Ivory Coast', '1'],
['17-Madagascar', '11', 'Recruitement', '', 'Included severity stage 3, N', 'Madagascar', '4'],
['17-South Africa', '11', 'Recruitement', '', 'Included severity stage 3, N', 'South Africa', '1'],
['15-Total', '12', 'Recruitement', '', 'Included severity stage 1, %', 'Total', '7.0'],
['15-Ivory Coast', '12', 'Recruitement', '', 'Included severity stage 1, %', 'Ivory Coast', 'np.nan'],
['15-Madagascar', '12', 'Recruitement', '', 'Included severity stage 1, %', 'Madagascar', 'np.nan'],
['15-South Africa', '12', 'Recruitement', '', 'Included severity stage 1, %', 'South Africa', '20.0'],
['16-Total', '13', 'Recruitement', '', 'Included severity stage 2, %', 'Total', '53.0'],
['16-Ivory Coast', '13', 'Recruitement', '', 'Included severity stage 2, %', 'Ivory Coast', '80.0'],
['16-Madagascar', '13', 'Recruitement', '', 'Included severity stage 2, %', 'Madagascar', '20.0'],
['16-South Africa', '13', 'Recruitement', '', 'Included severity stage 2, %', 'South Africa', '60.0'],
['17-Total', '14', 'Recruitement', '', 'Included severity stage 3, %', 'Total', '40.0'],
['17-Ivory Coast', '14', 'Recruitement', '', 'Included severity stage 3, %', 'Ivory Coast', '20.0'],
['17-Madagascar', '14', 'Recruitement', '', 'Included severity stage 3, %', 'Madagascar', '80.0'],
['17-South Africa', '14', 'Recruitement', '', 'Included severity stage 3, %', 'South Africa', '20.0']]
data=pd.DataFrame(d,columns=['id', 'index', 'level1', 'level2', 'level3', 'strate', 'value'])
data=data.applymap(lambda x: np.nan if (x=='np.nan') or (x=='') else x)
print(data)
backup=data
# 1.1 make natural number by ['id','level1','strate']
data=data.loc[~(data['id'].isna()),:]
data['value']=data.value.astype('float')
data['concat']=data['id']+data['level1']+data['strate']
for i in data['concat'].unique():
print(i)
data.loc[data['concat']==i,'no']=list(range(1,data.loc[data['concat']==i,:].shape[0]+1))
print(data)
# 1.2 reshape wide(in python: pivot)
new=pd.pivot(data,index=['id','level1','strate','level2'],values=['level3','value'],columns='no')
new.columns=new.columns.map(lambda x: f'{x[0]}_{x[1]}')
new=new.reset_index()
print(new)
new[['level3_1.0', 'level3_2.0', 'value_1.0', 'value_2.0']]
# 1.3 generate value and level3 columns and drop ['level3_1.0', 'level3_2.0', 'value_1.0', 'value_2.0'] columns
new['value']=new['value_1.0']/new['value_2.0']
new['level3']='D7 visits: N performed/expected %'
new=new.drop(columns=new.filter(regex='.0$'))
print(new)
# 1.4 append
backup.loc[(backup['id'].isna()),:].append(new,ignore_index=True)
</code></pre>
<p>我还在Stata中添加了相同的工作。处理数据会容易得多</p>
<pre><code>clear
edit // open data editor(editing mode)
* 0) make a dataset
input str30(id index level1 level2 level3 strate value)
"" "1" "Recruitement" . "Recruitement start date" "Total" "2021-07-01"
"" "1" "Recruitement" . "Recruitement start date" "Ivory Coast" "2021-07-01"
"" "1" "Recruitement" . "Recruitement start date" "Madagascar" "2021-07-01"
"" "1" "Recruitement" . "Recruitement start date" "South Africa" "2021-07-01"
"" "2" "Recruitement" . "Sex (Woman), %" "Total" "47.0"
"" "2" "Recruitement" . "Sex (Woman), %" "Ivory Coast" "40.0"
"" "2" "Recruitement" . "Sex (Woman), %" "Madagascar" "40.0"
"" "2" "Recruitement" . "Sex (Woman), %" "South Africa" "60.0"
"5-Total" "3" "Follow up" . "D7 visits: N performed" "Total" "4"
"5-Ivory Coast" "3" "Follow up" . "D7 visits: N performed" "Ivory Coast" "1"
"5-Madagascar" "3" "Follow up" . "D7 visits: N performed" "Madagascar" "2"
"5-South Africa" "3" "Follow up" . "D7 visits: N performed" "South Africa" "1"
"5-Total" "4" "Follow up" . "D7 visits: N expected" "Total" "12"
"5-Ivory Coast" "4" "Follow up" . "D7 visits: N expected" "Ivory Coast" "4"
"5-Madagascar" "4" "Follow up" . "D7 visits: N expected" "Madagascar" "4"
"5-South Africa" "4" "Follow up" . "D7 visits: N expected" "South Africa" "4"
"" "6" "Recruitement" . "Age, median" "Total" "35.0"
"" "6" "Recruitement" . "Age, median" "Ivory Coast" "35.0"
"" "6" "Recruitement" . "Age, median" "Madagascar" "31.0"
"" "6" "Recruitement" . "Age, median" "South Africa" "36.0"
"" "7" "Recruitement" . "Age, IQR" "Total" "(26.5-48.5)"
"" "7" "Recruitement" . "Age, IQR" "Ivory Coast" "(26.0-48.0)"
"" "7" "Recruitement" . "Age, IQR" "Madagascar" "(26.0-49.0)"
"" "7" "Recruitement" . "Age, IQR" "South Africa" "(28.0-40.0)"
"" "8" "Recruitement" . "Randomized all, N" "Total" "15"
"" "8" "Recruitement" . "Randomized all, N" "Ivory Coast" "5"
"" "8" "Recruitement" . "Randomized all, N" "Madagascar" "5"
"" "8" "Recruitement" . "Randomized all, N" "South Africa" "5"
"15-Total" "9" "Recruitement" . "Included severity stage 1, N" "Total" "1"
"15-South Africa" "9" "Recruitement" . "Included severity stage 1, N" "South Africa" "1"
"16-Total" "10" "Recruitement" . "Included severity stage 2, N" "Total" "8"
"16-Ivory Coast" "10" "Recruitement" . "Included severity stage 2, N" "Ivory Coast" "4"
"16-Madagascar" "10" "Recruitement" . "Included severity stage 2, N" "Madagascar" "1"
"16-South Africa" "10" "Recruitement" . "Included severity stage 2, N" "South Africa" "3"
"17-Total" "11" "Recruitement" . "Included severity stage 3, N" "Total" "6"
"17-Ivory Coast" "11" "Recruitement" . "Included severity stage 3, N" "Ivory Coast" "1"
"17-Madagascar" "11" "Recruitement" . "Included severity stage 3, N" "Madagascar" "4"
"17-South Africa" "11" "Recruitement" . "Included severity stage 3, N" "South Africa" "1"
"15-Total" "12" "Recruitement" . "Included severity stage 1, %" "Total" "7.0"
"15-Ivory Coast" "12" "Recruitement" . "Included severity stage 1, %" "Ivory Coast" "nan"
"15-Madagascar" "12" "Recruitement" . "Included severity stage 1, %" "Madagascar" "nan"
"15-South Africa" "12" "Recruitement" . "Included severity stage 1, %" "South Africa" "20.0"
"16-Total" "13" "Recruitement" . "Included severity stage 2, %" "Total" "53.0"
"16-Ivory Coast" "13" "Recruitement" . "Included severity stage 2, %" "Ivory Coast" "80.0"
"16-Madagascar" "13" "Recruitement" . "Included severity stage 2, %" "Madagascar" "20.0"
"16-South Africa" "13" "Recruitement" . "Included severity stage 2, %" "South Africa" "60.0"
"17-Total" "14" "Recruitement" . "Included severity stage 3, %" "Total" "40.0"
"17-Ivory Coast" "14" "Recruitement" . "Included severity stage 3, %" "Ivory Coast" "20.0"
"17-Madagascar" "14" "Recruitement" . "Included severity stage 3, %" "Madagascar" "80.0"
"17-South Africa" "14" "Recruitement" . "Included severity stage 3, %" "South Africa" "20.0"
end
compress
replace value="" if value=="nan"
sa data,replace
* 1) reshape wide
use data,clear
drop if missing(id)
destring value,replace
bysort level1 level2 strate id : gen no=_n // make no colums of which value values are natural numbers by['level1', 'level2','strate','id']
reshape wide level3 value index ,i(id) j(no) // pd.pivot, so easy!
* 2) value and level3
gen value=value1/value2
drop value? // so easy!
gen level3="D7 visits: N performed/expected %"
drop level3? // so easy!
order id level* strate value // so easy!(order columns)
* 3) append
drop index*
tostring value,replace force
replace value="" if value=="."
sa temp,replace
use data,clear
keep if missing(id)
append using temp.dta
</code></pre>