使用Python生成XML输出文件
我的输入数据保存在一个csv文件里,我需要根据这个文件里的数据生成一个xml输出文件。我正在尝试用python来实现这个需求。我试过下面的代码,但没有得到想要的结果。
输入数据(Input.csv文件):
CustID,CardNo
A00001,C000000001
A00001,C000000002
A00002,C000000003
我尝试过下面的代码:
import csv
import io
import xml.etree.ElementTree as ET
from collections import defaultdict
def generate_xml(input_file, output_file):
# Read input data from the file
with open(input_file, 'r') as file:
input_data = file.readlines()
cards_per_custid = defaultdict(list)
for line in csv.DictReader(input_data):
cards_per_custid[line["CustID"]].append(line["CardNo"])
root = ET.Element(
"ReceivableAccounting",
attrib={"xmlns": "http://www.sample.com/testing"},
)
for cust_id, cards in sorted(cards_per_custid.items()):
card_el = ET.SubElement(root, "Card")
header = ET.SubElement(card_el, "CustHolderHeader")
ET.SubElement(header, "CustID").text = cust_id
for card_no in cards:
line = ET.SubElement(card_el, "CardLine")
ET.SubElement(line, "CardDetail").text = card_no
tree = ET.ElementTree(root)
ET.indent(tree)
tree.write(output_file)
# Example usage:
input_file = "Input.csv"
output_file = "output.xml"
generate_xml(input_file, output_file)
生成了以下结果:
<ReceivableAccounting xmlns="http://www.sample.com/testing">
<Card>
<CustHolderHeader>
<CustID>A00001</CustID>
</CustHolderHeader>
</Card>
<Card>
<CustHolderHeader>
<CustID>A00003</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000003</CardDetail>
</CardLine>
</Card>
期望得到以下结果:
<?xml version="1.0" encoding="UTF-8"?>
<ReceivableAccounting xmlns=http://www.sample.com/testing>
<Card>
<CustHolderHeader>
<CustID>A00001</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000001</CardDetail>
</CardLine>
<CardLine>
<CardDetail>C000000002</CardDetail>
</CardLine>
</Card>
<Card>
<CustHolderHeader>
<CustID>A00002</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000003</CardDetail>
</CardLine>
</Card>
1 个回答
0
没问题。
我把你的示例数据放进了一个 io.StringIO()
里,不过你也可以直接用一个打开的文件来替代。需要注意的是,我这里处理 xmlns
的方式并不是完全正确,但对于你的示例来说是有效的。
import csv
import io
import xml.etree.ElementTree as ET
from collections import defaultdict
input_file = io.StringIO(
"""
CustID,CardNo
A00001,C000000001
A00001,C000000002
A00002,C000000003
""".strip()
)
cards_per_custid = defaultdict(list)
for line in csv.DictReader(input_file):
cards_per_custid[line["CustID"]].append(line["CardNo"])
root = ET.Element(
"ReceivableAccounting",
attrib={"xmlns": "http://www.sample.com/testing"},
)
for cust_id, cards in sorted(cards_per_custid.items()):
card_el = ET.SubElement(root, "Card")
header = ET.SubElement(card_el, "CustHolderHeader")
ET.SubElement(header, "CustID").text = cust_id
for card_no in cards:
line = ET.SubElement(card_el, "CardLine")
ET.SubElement(line, "CardDetail").text = card_no
tree = ET.ElementTree(root)
ET.indent(tree)
print(ET.tostring(root, encoding="unicode"))
这段代码会输出
<ReceivableAccounting xmlns="http://www.sample.com/testing">
<Card>
<CustHolderHeader>
<CustID>A00001</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000001</CardDetail>
</CardLine>
<CardLine>
<CardDetail>C000000002</CardDetail>
</CardLine>
</Card>
<Card>
<CustHolderHeader>
<CustID>A00002</CustID>
</CustHolderHeader>
<CardLine>
<CardDetail>C000000003</CardDetail>
</CardLine>
</Card>
</ReceivableAccounting>