在数据帧中拆分列不删除n

+-----+-------------------------------+-------------+ | | Sourcing Event ID (DTRM ID) | Site | +-----+-------------------------------+-------------+ | 0 | 1035 | ,ABC55, | | 1 | 1067 | ,, | | 2 | 1181 | ,, | | 3 | 1183 | ,, | | 4 | 1184 | ,, | | 5 | 1264 | ,, | | 6 | 1307 | ,DEF2, | | 7 | 1354 | ,, | | 8 | 1369 | ,HIJ150, | | 9 | 1372 | ,DEF64, | | 10 | 1373 | ,KLM9, | | 11 | 1374 | ,DEF1, | | 12 | 1381 | ,, | | 13 | 1385 | ,, | | 14 | 1391 | ,, | | 15 | 1394 | ,, | | 16 | 1395 | ,, | | 17 | 1402 | ,, | | 18 | 1404 | ,, | | 19 | 1405 | ,, | | 20 | 1406 | ,, | | 21 | 1408 | ,, | | 22 | 1410 | ,HIJ116, | | 23 | 1412 | ,, | +-----+-------------------------------+-------------+

df_sourcing_events = pd.read_csv(wf['local_filename']) sourcing_events_melt_col = 'Sourcing Event ID (DTRM ID)' sourcing_events_site_col = 'Site' print(df_sourcing_events[[sourcing_events_melt_col,sourcing_events_site_col]]) df_sourcing_events[sourcing_events_site_col] = df_sourcing_events[sourcing_events_site_col].str.lstrip(',') df_sourcing_events[sourcing_events_site_col] = df_sourcing_events[sourcing_events_site_col].str.rstrip(',') df_sourcing_events_sites = pd.concat([df_sourcing_events[sourcing_events_melt_col], df_sourcing_events[sourcing_events_site_col].str.split(',', expand = True)], axis = 1)\ .melt(id_vars=[sourcing_events_melt_col])\ .sort_values(by = sourcing_events_melt_col)\ .rename(columns = {'value' : sourcing_events_site_col})\ .drop(columns = ['variable'])\ .dropna()

+-----+-------------------------------+-----------+ | | Sourcing Event ID (DTRM ID) | Site | +-----+-------------------------------+-----------+ | 0 | 1035 | ABC55 | | 1 | 1067 | | | 2 | 1181 | | | 3 | 1183 | | | 4 | 1184 | | | 5 | 1264 | | | 6 | 1307 | DEF2 | | 7 | 1354 | | | 8 | 1369 | HIJ150 | | 9 | 1372 | DEF64 | | 10 | 1373 | KLM9 | | 11 | 1374 | DEF1 | | 12 | 1381 | | | 13 | 1385 | | | 14 | 1391 | | | 15 | 1394 | | | 16 | 1395 | | | 17 | 1402 | | | 18 | 1404 | | | 19 | 1405 | | | 20 | 1406 | | | 21 | 1408 | | | 22 | 1410 | HIJ116 | | 23 | 1412 | | +-----+-------------------------------+-----------+

df_sourcing_events_final = df_sourcing_events.drop([sourcing_events_site_col], axis=1) write_dataframe_to_csv_on_s3(df_sourcing_events_sites, s3_bucket, 'sourcing_events_sites.csv') write_dataframe_to_csv_on_s3(df_sourcing_events_final, s3_bucket, file_name)

2条回答

网友

1楼 · 编辑于 2024-06-10 11:06:42

它不会删除，因为它们是空字符串而不是N/A。尝试：

df = df_sourcing_events_sites
df = df[df.Site != '']

网友

2楼 · 编辑于 2024-06-10 11:06:42

^{}只删除“real”NaN。但有时csv文件包含被熊猫视为字符串的na。在您的情况下，我认为这些是空字符串""

在任何情况下，^{}方法都有一个na_values参数，您可以用所需的字符串值填充该参数。你可以试试na_values=""，但我无法预测它的输出

相关问题更多 >

编程相关推荐

热门问题

热门文章