Python 数据映射和追加到 Excel
我正在做一个项目,需要处理一个Excel文件里的数据,并把它整理好。
这里有一个简化的目录结构示例:
1 REVENUE
MOHLTC
ONT
ACCOUNT NUMBER
ACCOUNT NAME
***Important: Do Not Report Header (H) accounts***
COMMENTS
H
1 10
REVENUE - SERVICE RECIPIENT SERVICES
Detailed accounts required
FUNDING
MOHLTC
ON
1 10 04
Funding – Quality Based Procedures (QBP)
1 10 05
Funding – Health Based Allocation Model (HBAM)/ Growth Efficiency Model (GEM)
...
1 10 50
SR Rev. Worker Safety and Insurance Board (WSIB)
H
...
问题是:我有一个Excel文件,里面的数据是按列组织的,每一行代表一组代码,这些代码对应一个层级结构中的目录。
我需要读取这些数据,找到每个代码对应的目录名称,然后在Excel表格中相应地整理这些信息。
我写了Python代码,使用一个目录类来创建目录结构,并用openpyxl库来处理Excel文件。不过,我在把目录名称正确地映射到Excel文件中的代码时遇到了困难。
import os
import openpyxl
import re
class Directory:
def __init__(self, code, name, description=""):
self.code = code
self.name = name
self.description = description
self.subdirectories = []
def add_subdirectory(self, subdirectory):
self.subdirectories.append(subdirectory)
def display(self, indent=0, path=""):
current_path = path + " > " + self.code + ", " + self.name if path else self.code + ", " + self.name
print(" " * indent + current_path)
for subdirectory in self.subdirectories:
subdirectory.display(indent + 1, current_path)
def find_directory(directory, target_code):
if directory.code == target_code:
return directory
for subdirectory in directory.subdirectories:
result = find_directory(subdirectory, target_code)
if result:
return result
return None
def process_excel_file(excel_file):
try:
wb = openpyxl.load_workbook(excel_file)
sheet = wb.active
data_rows = sheet.iter_rows(min_row=2, values_only=True)
processed_data = []
for row in data_rows:
processed_row = [cell.strip() if cell else "" for cell in row]
processed_data.append(processed_row)
return processed_data
except Exception as e:
print(f"Error occurred while processing Excel file: {e}")
return None
if __name__ == "__main__":
# Create the root directory
root_directory = Directory("F", "Financial")
# Create additional directories
directories = [
Directory("F1", "Revenue (including Recoveries)"),
Directory("F3", "Compensation Expenses"),
Directory("F4", "Supplies Expenses"),
Directory("F5", "Service Recipient Specific Supplies"),
Directory("F6", "Sundry Expenses"),
Directory("F7", "Equipment Expenses"),
Directory("F8", "Contracted-Out Services"),
Directory("F9", "Building and Grounds Expenses")
]
# Add additional directories to the root directory
for directory in directories:
root_directory.add_subdirectory(directory)
# Subdirectories for directory_F1
subdirectories_F1 = [
Directory("F110", "Revenue – Service Recipient Services"),
Directory("F112", "Envelope Revenue"),
Directory("F120", "Recoveries-External"),
Directory("F121", "Recoveries-Internal"),
Directory("F122", "Recoveries-Interdepartmental"),
Directory("F130", "Contributed Services"),
Directory("F131", "Amortized Donation Contributed Services"),
Directory("F140", "Donations"),
Directory("F141", "Amortized Donation Revenue"),
Directory("F150", "Grants"),
Directory("F151", "Amortized Grant Revenue"),
Directory("F160", "Investment Revenue"),
Directory("F165", "Unrealized Gain/Loss"),
Directory("F170", "Revenue Transferred From Other Funds"),
Directory("F190", "Other Revenue")
]
# Add subdirectories to directory_F1
for subdir in subdirectories_F1:
directory_F1 = find_directory(root_directory, "F1")
if directory_F1:
directory_F1.add_subdirectory(subdir)
else:
print("Directory F1 not found.")
# Subdirectories for directory_F110
subdirectories_F110 = [
Directory("F11004", "Funding – LHIN Quality Based Procedures (QBP)"),
Directory("F11005", "Funding – LHIN Health Based Allocation Model (HBAM)"),
Directory("F11006", "Funding-Local Health Integration Networks (LHIN)"),
Directory("F11007", "Funding-Cancer Care Ontario"),
Directory("F11008", "Funding-LHINs One Time"),
Directory("F11010", "Funding-Provincial MOHLTC (Allocation)"),
Directory("F11011", "Funding-Provision for Recoveries"),
Directory("F11012", "Funding-MOHLTC One Time"),
Directory("F11013", "Funding-MOHLTC Pay Equity Funding"),
Directory("F11014", "Funding-MOHLTC Other Funding Envelopes"),
Directory("F11015", "Funding-Other MOHLTC (Care & Provider Services)"),
Directory("F11016", "OHIP-Professional Component"),
Directory("F11017", "OHIP-Technical Component"),
Directory("F11019", "Funding – Paymaster / Flow Through"),
Directory("F11020", "Funding-Provincial Government-Other (Usually Fund Type 3)"),
Directory("F11022", "Funding-MC/MCY Fiscal Subsidy"),
Directory("F11030", "Funding-Federal Government (Sum of 1-10-31 to 1-10-34) (Usually Fund Type 3)"),
Directory("F11031", "Funding-Department of Veteran's Affairs"),
Directory("F11034", "Funding-Other Federal Government"),
Directory("F11040", "Funding-Municipal Government (Usually Fund Type 3)"),
Directory("F11045", "Funding-Non Government (Usually Fund Type 3)"),
Directory("F11050", "R Rev. Worker Safety and Insurance Board (WIB)"),
Directory("F11060", "R REV. NON-RESIDENT OF PROVINCE REVENUE"),
Directory("F11061", "R Rev. Quebec or Manitoba Provincial Health Ins. Plan"),
Directory("F11063", "R Rev. Private Health Ins. /Out-of-Pocket (self-pay)"),
Directory("F11064", "R Rev. All other Provincial Health Insurance Plan"),
Directory("F11065", "R Rev. Non-Residents of Canada"),
Directory("F11070", "R Rev. Uninsured Residents of Ontario"),
Directory("F11080", "R Rev. Insured Residents – Private Health Insurance/Out-of-Pocket (self-pay)"),
Directory("F11090", "R Rev. Other Service Recipient Payment Sources"),
Directory("F11091", "R Rev.-Partial Basic Accommodation Fees"),
Directory("F11092", "R Rev.-Full Basic Accommodation Fees"),
Directory("F11093", "R Rev.-Preferred Accommodation Fees"),
Directory("F11099", "R Rev.-Client Fee Waived (CSS Sector)")
]
# Add subdirectories to directory_F110
for subdir in subdirectories_F110:
directory_F110 = find_directory(root_directory, "F110")
if directory_F110:
directory_F110.add_subdirectory(subdir)
else:
print("Directory F110 not found.")
excel_file = "Test 1.xlsx"
processed_data = process_excel_file(excel_file)
if processed_data:
try:
existing_wb = openpyxl.load_workbook(excel_file)
existing_sheet = existing_wb.active
next_column = existing_sheet.max_column + 1
for row_index, row in enumerate(processed_data, start=1):
if any(row):
directory_code = row[0]
directory = find_directory(root_directory, directory_code)
if directory:
existing_sheet.cell(row=row_index, column=next_column, value=directory.name)
else:
print(f"No directory found for row {row_index} with code {directory_code}")
existing_wb.save(excel_file)
except Exception as e:
print(f"Error occurred while processing Excel file: {e}")
else:
print("No data to process or error occurred during data processing.")
Test 1.xlsx文件的初始状态:https://imgur.com/a/CfwOWBp
Test 1.xlsx文件处理后的状态:https://imgur.com/a/QrukBGT
我希望我的程序能够接收用户输入,格式是“F 1 1 0 1 0”,然后在目录中搜索并输出相应的结果。例如,如果用户输入“F11010”,程序应该输出:
Enter the code (or 'exit' to quit): F11010
Directory:
F, Financial >
F1, Revenue (including Recoveries) >
F110, Revenue – Service Recipient Services >
F11010, Funding-Provincial MOHLTC (Allocation)
所以,我想要的最终结果是"F, Financial > F1, Revenue (including Recoveries) > F110, Revenue – Service Recipient Services > F11010, Funding-Provincial MOHLTC (Allocation)"
,这些信息要分别添加到各个列中。
1 个回答
1
为了实现你想要的效果,你需要修改一下find_directory这个函数,让它能够匹配用户输入的完整代码,而不仅仅是检查它是否以目标代码开头。乍一看,这似乎很简单,但其实...