Python 数据映射和追加到 Excel

1 投票
1 回答
37 浏览
提问于 2025-04-14 15:58

我正在做一个项目,需要处理一个Excel文件里的数据,并把它整理好。

这里有一个简化的目录结构示例:

1 REVENUE 
MOHLTC 
    ONT 
        ACCOUNT NUMBER 
            ACCOUNT NAME 
            ***Important: Do Not Report Header (H) accounts*** 
            COMMENTS 
            H
1 10 
    REVENUE - SERVICE RECIPIENT SERVICES 
        Detailed accounts required 
    FUNDING 
        MOHLTC 
            ON 
                1 10 04 
                    Funding – Quality Based Procedures (QBP) 
                1 10 05 
                    Funding – Health Based Allocation Model (HBAM)/ Growth Efficiency Model (GEM) 
                ...
1 10 50 
    SR Rev. Worker Safety and Insurance Board (WSIB) 
    H
...

问题是:我有一个Excel文件,里面的数据是按列组织的,每一行代表一组代码,这些代码对应一个层级结构中的目录。

我需要读取这些数据,找到每个代码对应的目录名称,然后在Excel表格中相应地整理这些信息。

我写了Python代码,使用一个目录类来创建目录结构,并用openpyxl库来处理Excel文件。不过,我在把目录名称正确地映射到Excel文件中的代码时遇到了困难。

import os
import openpyxl 
import re

class Directory:
    def __init__(self, code, name, description=""):
        self.code = code
        self.name = name
        self.description = description
        self.subdirectories = []

    def add_subdirectory(self, subdirectory):
        self.subdirectories.append(subdirectory)

    def display(self, indent=0, path=""):
        current_path = path + " > " + self.code + ", " + self.name if path else self.code + ", " + self.name
        print("  " * indent + current_path)
        for subdirectory in self.subdirectories:
            subdirectory.display(indent + 1, current_path)

def find_directory(directory, target_code):
    if directory.code == target_code:
        return directory
    for subdirectory in directory.subdirectories:
        result = find_directory(subdirectory, target_code)
        if result:
            return result
    return None

def process_excel_file(excel_file):
    try:
        wb = openpyxl.load_workbook(excel_file)
        sheet = wb.active
        data_rows = sheet.iter_rows(min_row=2, values_only=True)
        processed_data = []
        for row in data_rows:
            processed_row = [cell.strip() if cell else "" for cell in row]
            processed_data.append(processed_row)
        return processed_data
    except Exception as e:
        print(f"Error occurred while processing Excel file: {e}")
        return None

if __name__ == "__main__":
    # Create the root directory
    root_directory = Directory("F", "Financial")
    # Create additional directories
    directories = [
        Directory("F1", "Revenue (including Recoveries)"),
        Directory("F3", "Compensation Expenses"),
        Directory("F4", "Supplies Expenses"),
        Directory("F5", "Service Recipient Specific Supplies"),
        Directory("F6", "Sundry Expenses"),
        Directory("F7", "Equipment Expenses"),
        Directory("F8", "Contracted-Out Services"),
        Directory("F9", "Building and Grounds Expenses")
    ]

    # Add additional directories to the root directory
    for directory in directories:
        root_directory.add_subdirectory(directory)

    # Subdirectories for directory_F1
    subdirectories_F1 = [
        Directory("F110", "Revenue – Service Recipient Services"),
        Directory("F112", "Envelope Revenue"),
        Directory("F120", "Recoveries-External"),
        Directory("F121", "Recoveries-Internal"),
        Directory("F122", "Recoveries-Interdepartmental"),
        Directory("F130", "Contributed Services"),
        Directory("F131", "Amortized Donation Contributed Services"),
        Directory("F140", "Donations"),
        Directory("F141", "Amortized Donation Revenue"),
        Directory("F150", "Grants"),
        Directory("F151", "Amortized Grant Revenue"),
        Directory("F160", "Investment Revenue"),
        Directory("F165", "Unrealized Gain/Loss"),
        Directory("F170", "Revenue Transferred From Other Funds"),
        Directory("F190", "Other Revenue")
    ]

    # Add subdirectories to directory_F1
    for subdir in subdirectories_F1:
        directory_F1 = find_directory(root_directory, "F1")
        if directory_F1:
            directory_F1.add_subdirectory(subdir)
        else:
            print("Directory F1 not found.")

    # Subdirectories for directory_F110
    subdirectories_F110 = [
        Directory("F11004", "Funding – LHIN Quality Based Procedures (QBP)"),
        Directory("F11005", "Funding – LHIN Health Based Allocation Model (HBAM)"),
        Directory("F11006", "Funding-Local Health Integration Networks (LHIN)"),
        Directory("F11007", "Funding-Cancer Care Ontario"),
        Directory("F11008", "Funding-LHINs One Time"),
        Directory("F11010", "Funding-Provincial MOHLTC (Allocation)"),
        Directory("F11011", "Funding-Provision for Recoveries"),
        Directory("F11012", "Funding-MOHLTC One Time"),
        Directory("F11013", "Funding-MOHLTC Pay Equity Funding"),
        Directory("F11014", "Funding-MOHLTC Other Funding Envelopes"),
        Directory("F11015", "Funding-Other MOHLTC (Care & Provider Services)"),
        Directory("F11016", "OHIP-Professional Component"),
        Directory("F11017", "OHIP-Technical Component"),
        Directory("F11019", "Funding – Paymaster / Flow Through"),
        Directory("F11020", "Funding-Provincial Government-Other (Usually Fund Type 3)"),
        Directory("F11022", "Funding-MC/MCY Fiscal Subsidy"),
        Directory("F11030", "Funding-Federal Government (Sum of 1-10-31 to 1-10-34) (Usually Fund Type 3)"),
        Directory("F11031", "Funding-Department of Veteran's Affairs"),
        Directory("F11034", "Funding-Other Federal Government"),
        Directory("F11040", "Funding-Municipal Government (Usually Fund Type 3)"),
        Directory("F11045", "Funding-Non Government (Usually Fund Type 3)"),
        Directory("F11050", "R Rev. Worker Safety and Insurance Board (WIB)"),
        Directory("F11060", "R REV. NON-RESIDENT OF PROVINCE REVENUE"),
        Directory("F11061", "R Rev. Quebec or Manitoba Provincial Health Ins. Plan"),
        Directory("F11063", "R Rev. Private Health Ins. /Out-of-Pocket (self-pay)"),
        Directory("F11064", "R Rev. All other Provincial Health Insurance Plan"),
        Directory("F11065", "R Rev. Non-Residents of Canada"),
        Directory("F11070", "R Rev. Uninsured Residents of Ontario"),
        Directory("F11080", "R Rev. Insured Residents – Private Health Insurance/Out-of-Pocket (self-pay)"),
        Directory("F11090", "R Rev. Other Service Recipient Payment Sources"),
        Directory("F11091", "R Rev.-Partial Basic Accommodation Fees"),
        Directory("F11092", "R Rev.-Full Basic Accommodation Fees"),
        Directory("F11093", "R Rev.-Preferred Accommodation Fees"),
        Directory("F11099", "R Rev.-Client Fee Waived (CSS Sector)")
    ]

    # Add subdirectories to directory_F110
    for subdir in subdirectories_F110:
        directory_F110 = find_directory(root_directory, "F110")
        if directory_F110:
            directory_F110.add_subdirectory(subdir)
        else:
            print("Directory F110 not found.")

    excel_file = "Test 1.xlsx"
    processed_data = process_excel_file(excel_file)

    if processed_data:
        try:
            existing_wb = openpyxl.load_workbook(excel_file)
            existing_sheet = existing_wb.active
            next_column = existing_sheet.max_column + 1
            for row_index, row in enumerate(processed_data, start=1):
                if any(row):
                    directory_code = row[0]
                    directory = find_directory(root_directory, directory_code)
                    if directory:
                        existing_sheet.cell(row=row_index, column=next_column, value=directory.name)
                    else:
                        print(f"No directory found for row {row_index} with code {directory_code}")
            existing_wb.save(excel_file)
        except Exception as e:
            print(f"Error occurred while processing Excel file: {e}")
    else:
        print("No data to process or error occurred during data processing.")

Test 1.xlsx文件的初始状态:https://imgur.com/a/CfwOWBp

Test 1.xlsx文件处理后的状态:https://imgur.com/a/QrukBGT

源数据集的示例



我希望我的程序能够接收用户输入,格式是“F 1 1 0 1 0”,然后在目录中搜索并输出相应的结果。例如,如果用户输入“F11010”,程序应该输出:

Enter the code (or 'exit' to quit): F11010
Directory: 
F, Financial > 
F1, Revenue (including Recoveries) > 
F110, Revenue – Service Recipient Services > 
F11010, Funding-Provincial MOHLTC (Allocation)

所以,我想要的最终结果是"F, Financial > F1, Revenue (including Recoveries) > F110, Revenue – Service Recipient Services > F11010, Funding-Provincial MOHLTC (Allocation)",这些信息要分别添加到各个列中。

1 个回答

1

为了实现你想要的效果,你需要修改一下find_directory这个函数,让它能够匹配用户输入的完整代码,而不仅仅是检查它是否以目标代码开头。乍一看,这似乎很简单,但其实...

撰写回答