在前三列后面附加任何其他列

df = pd.DataFrame({ "1": np.random.randint(900000000, 999999999, size=5), "2": np.random.choice( ["A","B","C", np.nan], 5), "3": np.random.choice( [np.nan, 1], 5), "4": np.random.randint(900000000, 999999999, size=5), "5": np.random.choice( ["A","B","C", np.nan], 5), "6": np.random.choice( [np.nan, 1], 5) })

1 2 3 4 5 6 0 925846412 nan 1.0 994235729 nan NaN 1 991877917 B 1.0 970766032 nan NaN 2 931608603 B NaN 937096948 B NaN 3 977083128 A NaN 974190653 B 1.0 4 937344792 nan NaN 972948910 B 1.0

col_counter = 0 df_neu = pd.DataFrame(columns=["A", "B", "C"]) for column in df.columns: if col_counter == 3: col_counter = 0 if col_counter == 0: # set_trace() df_neu["A"] = df_neu["A"].append(df[column]).reset_index(drop = True) elif col_counter == 1: df_neu["B"] = df_neu["B"].append(df[column]).reset_index(drop = True) elif col_counter == 2: df_neu["C"] = df_neu["C"].append(df[column]).reset_index(drop = True) col_counter +=1

A B C 0 925846412 nan 1.0 1 991877917 B 1.0 2 931608603 B NaN 3 977083128 A NaN 4 937344792 nan NaN 5 994235729 nan NaN 6 970766032 nan NaN 7 937096948 B NaN 8 974190653 B 1.0 9 972948910 B 1.0

1条回答

网友

1楼 · 发布于 2024-04-19 16:06:08

您可以按整数在列中创建MultiIndex，按列长度创建的数组进行模除，然后按^{}、^{}和最后一个^{}对remove MultiIndex进行整形：

np.random.seed(2019)

df = pd.DataFrame({
    "1": np.random.randint(900000000, 999999999, size=5),
    "2": np.random.choice( ["A","B","C", np.nan], 5),
    "3": np.random.choice( [np.nan, 1], 5),

    "4": np.random.randint(900000000, 999999999, size=5),
    "5": np.random.choice( ["A","B","C", np.nan], 5),
    "6": np.random.choice( [np.nan, 1], 5)
})
print (df)
           1    2    3          4  5    6
0  960189042    B  NaN  991581392  A  1.0
1  977655199  nan  1.0  964195250  A  1.0
2  961771966    A  NaN  969007327  B  1.0
3  955308022    C  1.0  973316485  A  NaN
4  933277976    A  1.0  976749175  A  NaN

arr = np.arange(len(df.columns))
df.columns = [arr // 3, arr % 3]

df = df.stack(0).sort_index(level=[1, 0]).reset_index(drop=True)
df.columns = ['A','B','C']
print (df)
           A    B    C
0  960189042    B  NaN
1  977655199  nan  1.0
2  961771966    A  NaN
3  955308022    C  1.0
4  933277976    A  1.0
5  991581392    A  1.0
6  964195250    A  1.0
7  969007327    B  1.0
8  973316485    A  NaN
9  976749175    A  NaN

如果附加到Series并由constructor最后创建DataFrame，则解决方案有效：

col_counter = 0
a,b,c = pd.Series(),pd.Series(),pd.Series()

for column in df.columns:
    if col_counter == 3:
        col_counter = 0

    if col_counter == 0:
        # set_trace()
        a = a.append(df[column]).reset_index(drop = True)
    elif col_counter == 1:
        b = b.append(df[column]).reset_index(drop = True)
    elif col_counter == 2:
        c = c.append(df[column]).reset_index(drop = True)

    col_counter +=1

df_neu = pd.DataFrame({"A":a, "B":b, "C":c})
print (df_neu)
           A    B    C
0  960189042    B  NaN
1  977655199  nan  1.0
2  961771966    A  NaN
3  955308022    C  1.0
4  933277976    A  1.0
5  991581392    A  1.0
6  964195250    A  1.0
7  969007327    B  1.0
8  973316485    A  NaN
9  976749175    A  NaN

相关问题更多 >

编程相关推荐

热门问题

热门文章