Python：使用Pandas将元组作为键的字典转换为稀疏矩阵

for filename in os.listdir(inv_folder): name, ext = os.path.splitext(filename) if ext == '.xlsx': if filename==inv_file: continue recipe_files.append(filename) #loop through list of files, load each workbook, and send it to the inventory function for file in recipe_files: file_counter += 1 file_path = inv_folder+'\\'+file wb = load_workbook(file_path,data_only=True) sheet=wb.active inventory(sheet,file,file_counter) def inventory(sheet,file,file_counter): print('\n',file) for row in sheet.iter_rows(2,18,1,3): if row[0].value: ing_dict[(row[0].value,file)]=row[2].value

1条回答

网友

1楼 · 发布于 2024-05-14 04:20:30

下面的代码应该执行您想要的操作。我添加了内联注释来解释如何转换数据

import numpy as np
import pandas as pd

# The expected input data
data = {('Alf', '2012.xlsx'): 600, ('Elf', '2012.xlsx'): 400, ('Alf', '2013.xlsx'): 200, ('Tim', '2014.xlsx'): 150}

row_to_pos = {}  # maps a row name to an actual position
data_new = {}    # We need to reformat the data structure

# loop through the data
for key, value in data.items():
    row=key[0] # e.g., 'Alf'
    column=key[1] # e.g., '2012.xlsx'
    
    #  if a row name is new, we add it to our mapper
    if row not in row_to_pos:
        row_to_pos[row] = len(row_to_pos)
        
    # if a column name is new, we add a new entry in `data_new`
    if column not in data_new:
        data_new[column] = [[],[]]
        
    # store our data, key=column_name, value=a list of two lists
    data_new[column][0].append(row_to_pos[key[0]]) # store the position
    data_new[column][1].append(value) # store the actual value

# we did not know in the first place how many unique row names we have so we have to loop once more
for key, value in data_new.items():
    tmp = np.zeros(len(row_to_pos))
    tmp[value[0]] = value[1] # value[0] are the positions, value[1] the corresponding values
    data_new[key] = tmp

# create our dataframe
data_new['Name'] = list(row_to_pos.keys())
df = pd.DataFrame(data_new)
df = df.set_index(['Name'])
print(df)

这将产生以下输出：

      2012.xlsx  2013.xlsx  2014.xlsx
Name
Alf       600.0      200.0        0.0
Elf       400.0        0.0        0.0
Tim         0.0        0.0      150.0

相关问题更多 >

编程相关推荐

热门问题

热门文章