如何将Python列表推导转换为XML

1 投票
2 回答
1965 浏览
提问于 2025-04-16 11:54

我需要一点帮助,想找个教程或者示例,教我怎么把列表推导式和一个csv数据文件结合起来,然后把这些内容转成一个xml文件。我看了很多Python的书和pdf,比如ditp、IYOCGwP、learnpythonthehardway、lxml教程、think python,还有网上的搜索,感觉我已经快搞定了,或者说我觉得是这样。我只需要一点推动力,把所有东西结合起来。基本上,我是把一个Excel电子表格导出成csv文件。这个csv文件里有很多记录的行,我需要把这些记录映射到一个xml文件里。我是Python的新手,想着用这个小项目来学习这门语言。下面的代码虽然不太好看,但能用。我可以读取一个csv文件,把它放到一个列表里。我可以把三个列表合并,然后输出结果列表,还能让我的程序生成一个几乎符合我需要格式的xml框架。接下来我会列出我实际输出的小样本,以及我想用xml实现的目标。抱歉如果这段话太长了,这是我第一次发帖。

import csv, datetime, os  
from lxml import etree  
from ElementTree_pretty import prettify

f = os.path.getsize("SO.csv")
fh = "SO.csv"
rh = open(fh, "rU")

rows = 0
try:
    rlist = csv.reader(rh)
    reports = []
    for row in rlist:
        '''print row.items()'''
        rowStripped = [x.strip(' ') for x in row]
        reports.append(rowStripped)
        rows +=1
except csv.Error, e:
    sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e))

finally:
    rh.close()

root = etree.Element("co_ehs")
object = etree.SubElement(root, "object")
event = etree.SubElement(object, "event")
facets = etree.SubElement(event, "facets")
categories = etree.SubElement(facets, "categories")
instance = etree.SubElement(categories, "instance")
property = etree.SubElement(instance, "property")

facets = ['header','header','header','header','informational','header','informational']

categories =     ['processing','processing','processing','processing','short_title','file_num','short_narrative']

property = ['REPORT ID','NEXT REPORT ID','initial-event-date','number','title','summary-docket-num','description-story']

print('----------Printing Reports from CSV Data----------')
print reports
print('---------END OF CSV DATA-------------')
print
mappings = zip(facets, categories, property)
print('----------Printing Mappings from the zip of facets, categories, property ----------')
print mappings
print('---------END OF List Comprehension-------------')
print
print('----------Printing the xml skeleton that will contain the mappings and the csv data ----------')
print(etree.tostring(root, xml_declaration=True, encoding='UTF-8', pretty_print=True))
print('---------END OF XML Skeleton-------------')  


----My OUTPUT---  
----------Printing Reports from CSV Data----------  
[['1', '12-Dec-04', 'Vehicle Collision', '786689', 'No fault collision due to ice', '-1', '545671'], ['3', '15-Dec-04', 'OJT Injury', '87362', 'Paint fumes combusted causing 2nd degree burns', '4', '588456'], ['4', '17-Dec-04', 'OJT Injury', '87362', 'Paint fumes combusted causing 2nd degree burns', '-1', '58871'], ['1000', '12-Nov-05', 'Back Injury', '9854231', 'Lifting without a support device', '-1', '545671'], ['55555', '12-Jan-06', 'Foot Injury', '7936547', 'Office injury - heavy item dropped on foot', '-1', '545671']]  
---------END OF CSV DATA-------------  
----------Printing Mappings from the zip of facets, categories, property ----------  
[('header', 'processing', 'REPORT ID'), ('header', 'processing', 'NEXT REPORT ID'), ('header', 'processing', 'initial-event-date'), ('header', 'processing', 'number'), ('informational', 'short_title', 'title'), ('header', 'file_num', 'summary-docket-num'), ('informational', 'short_narrative', 'description-story')]  
---------END OF List Comprehension-------------  
----------Printing the xml skeleton that will contain the mappings and the csv data ----------  

    <?xml version='1.0' encoding='UTF-8'?>
    <co_ehs>
      <object>
        <event>
          <facets>
            <categories>
              <instance>
                <property/>
              </instance>
            </categories>
          </facets>
        </event>
      </object>
</co_ehs>

---------END OF XML Skeleton-------------  
----------CSV DATA------------------  
C_ID,NEXT_C_ID,C_DATE,C_NUMBER,C_EVENT,C_DOCKETNUM,C_DESCRIPTION  
1,-1,12-Dec-04,545671,Vehicle Collision,786689,"No fault collision due to ice"  
3,4,15-Dec-04,588456,OJT Injury,87362,"Paint fumes combusted causing 2nd degree burns"  
4,-1,17-Dec-04,58871,OJT Injury,87362,"Paint fumes combusted causing 2nd degree burns"  
1000,-1,12-Nov-05,545671,Back Injury,9854231,"Lifting without a support device"  
55555,-1,12-Jan-06,545671,Foot Injury,7936547,"Office injury - heavy item dropped on foot"  

-----------What I want the xml output to look like----------------------  
    <?xml version="1.0" encoding="UTF-8"?>
    <co_ehs xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="co_ehs.xsd">  
      <object id="3" object-type="ehs_report">
        <event event-tag="0">
          <facets name="header">
            <categories name="processing">
              <instance instance-tag="0">
                <property name="REPORT ID" value="1"/>
                <property name="NEXT REPORT ID" value="-1"/>
                <property name="initial-event-date" value="12-Dec-04"/>
                <property name="number" value="545671"/>
              </instance>
            </categories>
          </facets>
          <facets name="informational">
            <categories name="short_title">
              <instance-tag="0">
                <property name="title" value="Vehicle Collision"/>
              </instance>
            </categories>
          </facets>
          <facets name="header">
            <categories name="file_num">
              <instance-tag="0">
                <property name="summary-docket-num" value="786689"/>
              </instance>
            </categories>
          </facets>
          <facets name="informational">
            <categories name="short_narrative">
              <instance-tag="0">
                <property name="description-story" value="No fault collision due to ice"/>
              </instance>
            </categories>
          </facets>
        </event>
      </object>
    </co_ehs>

2 个回答

0

我创建了一个名为'pattern.txt'的文件,里面的内容是这样的(注意缩进)。

请注意里面有8个%s,它们放在了很重要的位置。

        <event event-tag="%s">
          <facets name="header">
            <categories name="processing">
              <instance instance-tag="0">
                <property name="REPORT ID" value="%s"/>
                <property name="NEXT REPORT ID" value="%s"/>
                <property name="initial-event-date" value="%s"/>
                <property name="number" value="%s"/>
              </instance>
            </categories>
          </facets>
          <facets name="informational">
            <categories name="short_title">
              <instance-tag="0">
                <property name="title" value="%s"/>
              </instance>
            </categories>
          </facets>
          <facets name="header">
            <categories name="file_num">
              <instance-tag="0">
                <property name="summary-docket-num" value="%s"/>
              </instance>
            </categories>
          </facets>
          <facets name="informational">
            <categories name="short_narrative">
              <instance-tag="0">
                <property name="description-story" value="%s"/>
              </instance>
            </categories>
          </facets>
        </event>

我还创建了一个名为'SO.csv'的文件,内容如下:

C_ID,NEXT_C_ID,C_DATE,C_NUMBER,C_EVENT,C_DOCKETNUM,C_DESCRIPTION  
1,-1,12-Dec-04,545671,Vehicle Collision,786689,"No fault collision due to ice"  
3,4,15-Dec-04,588456,OJT Injury,87362,"Paint fumes combusted causing 2nd degree burns"  
4,-1,17-Dec-04,58871,OJT Injury,87362,"Paint fumes combusted causing 2nd degree burns"  
1000,-1,12-Nov-05,545671,Back Injury,9854231,"Lifting without a support device"  
55555,-1,12-Jan-06,545671,Foot Injury,7936547,"Office injury - heavy item dropped on foot"

然后我运行了以下代码:

import csv

rid = csv.reader(open('SO.csv','rb'))
rid.next()

with open('pattern.txt') as f:
    pati = f.read()

xmloutput = ['    <?xml version="1.0" encoding="UTF-8"?>',
             '    <co_ehs xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '\
             'xsi:noNamespaceSchemaLocation="co_ehs.xsd">',
             '      <object id="3" object-type="ehs_report">']

for i,row in enumerate(rid):
    row[0:0] = str(i)
    xmloutput.append( pati % tuple(row) )

print '\n'.join(xmloutput)

这样对你有帮助吗?

0

这是我的解决方案。我使用lxml库,因为用框架生成XML通常比用字符串或模板文件要好。

不过,co_ehs的属性缺失了,但这可以通过一些set()调用轻松解决。我就不帮你做了,留给你自己去完成。

顺便说一下:你可以通过点击答案左侧的勾选标记来接受最好的答案。

import csv, datetime, os  
from lxml import etree

def makeFacet(event, newheaders, ev, facetname, catname, count, nhposstart, nhposend):
    facets = etree.SubElement(event, "facets", name=facetname)
    categories = etree.SubElement(facets, "categories", name=catname)
    instance = etree.SubElement(categories, "instance") 
    instance.set("instance-tag", count)

    for i in range(nhposstart, nhposend):
        property = etree.SubElement(instance, "property")
        property.set("name", newheaders[i])
        property.set("value", ev[i].strip())


# read the csv
fh = "SO.csv"
rh = open(fh, "rU")

try:
    rlist = list(csv.reader(rh))
except csv.Error as e:
    sys.exit("file %s, line %d: %s" % (filename, reader.line_num, e))
finally:
    rh.close()

# generate the xml

# newheaders is a mapping of the csv column names, because they don't correspondent w/ the XML
newheaders = ["REPORT_ID","NEXT_REPORT_ID","initial-event-date","number","title","summary-docket-num", "description-story"]

root = etree.Element("co_ehs")

object = etree.SubElement(root, "object")

object.set("id", "3") # Not sure about this one
object.set("object-type", "ehs-report")

for c, ev in enumerate(rlist[1:]):
    event  = etree.SubElement(object, "event")
    event.set("event-tag", "%s"%c) 
    makeFacet(event, newheaders, ev, "header", "processing", "%s"%c, 0, 4)
    makeFacet(event, newheaders, ev, "informational", "short-title", "%s"%c, 4, 5)
    makeFacet(event, newheaders, ev, "header", "file_num", "%s"%c, 5, 6)
    makeFacet(event, newheaders, ev, "informational", "short_narrative", "%s"%c, 6, 7)

print(etree.tostring(root, xml_declaration=True, encoding="UTF-8", pretty_print=True))

撰写回答