如何从xml输出列表中创建内部循环?

2024-04-25 19:05:38 发布

您现在位置:Python中文网/ 问答频道 /正文

我有如下的测试数据: [['81303~E8889~81300~7295~71942', 'AR', '61.43463381', '0', '0', '0', '0', '0', '0', '0', '99212~73080~00378415101~00406035705~63304045830~99202~WC101~29105~A4565~73070~73090~99203'], ['7234', 'AR', '54.29158111', '0', '0', '0', '1', '0', '0', '0', 'E0849']] 我正试图将其转换为soap API调用的XML格式,但我遇到的问题是tilde 以下是我的测试csv文件的外观:

playcodes,Benefit State,Age at 

DOL,BEHAVIORAL,CARDIAC,DIABETES,HYPERTENSION,OBESE,SMOKER,SUBSTANCE,CPT_codes
81303~E8889~81300~7295~71942,AR,61.43463381,0,0,0,0,0,0,0,99212~73080~00378415101~00406035705~63304045830~99202~WC101~29105~A4565~73070~73090~99203
7234,AR,54.29158111,0,0,0,1,0,0,0,E0849

我的代码是:

import csv
import xml.etree.ElementTree as ET
import xml

f = open('medical_test.csv')
next(f)
csv_f = csv.reader(f)   
data = []
for row in csv_f: 
   data.append(row)
f.close()

print(data)
[['81303~E8889~81300~7295~71942', 'AR', '61.43463381', '0', '0', '0', '0', '0', '0', '0', '99212~73080~00378415101~00406035705~63304045830~99202~WC101~29105~A4565~73070~73090~99203'], ['7234', 'AR', '54.29158111', '0', '0', '0', '1', '0', '0', '0', 'E0849']]


def convert_row(row):
    return """
<?xml version="1.0" encoding="UTF-8"?>
<cbcalc>
    <icdcodes>
        %s
    </icdcodes>
    <state>%s</state>
    <country>US</country>
    <clientid>Custom field</clientid>
    <medicalonly></medicalonly>
    <bpcode></bpcode>
    <noicode></noicode>
    <age>%s</age>
    <jobclass>1</jobclass>
    <fulloutput>Y</fulloutput>
    <cfactors>
        <depression>%s</depression>
        <cardiac>%s</cardiac>
        <diabetes>%s</diabetes>
        <hypertension>%s</hypertension>
        <legalrep></legalrep>
        <obesity>%s</obesity>
        <smoker>%s</smoker>
        <subabuse>%s</subabuse>
    </cfactors>
    <prosummary>
        <icd>
            <code></code>
        </icd>
        <ndc>
            <code></code>
        <ndc>
        <cpt>
            <code>%s</code>
        </cpt>
        <hcpcs>
            <code></code>
        </hcpcs>
    </prosummary>
</cbcalc>
""" % (row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10])

print('\n'.join([convert_row(row) for row in data[1:]]))

输出如下所示:

<?xml version="1.0" encoding="UTF-8"?>
<cbcalc>
    <icdcodes>
        <code>7234</code>
    </icdcodes>
    <state>AR</state>
    <country>US</country>
    <clientid>Custom field</clientid>
    <medicalonly></medicalonly>
    <bpcode></bpcode>
    <noicode></noicode>
    <age>54.29158111</age>
    <jobclass>1</jobclass>
    <fulloutput>Y</fulloutput>
    <cfactors>
        <depression>0</depression>
        <cardiac>0</cardiac>
        <diabetes>0</diabetes>
        <hypertension>1</hypertension>
        <legalrep></legalrep>
        <obesity>0</obesity>
        <smoker>0</smoker>
        <subabuse>0</subabuse>
    </cfactors>
    <prosummary>
        <icd>
            <code></code>
        </icd>
        <ndc>
            <code></code>
        <ndc>
        <cpt>
            <code>E0849</code>
        </cpt>
        <hcpcs>
            <code></code>
        </hcpcs>
    </prosummary>
</cbcalc>

我需要这个:

<?xml version="1.0" encoding="UTF-8"?>
<cbcalc>
    <icdcodes>
       <code>81303</code>
       <code>E8889</code>
       <code>81300</code>
       <code>7295</code>
       <code>71942</code>
    </icdcodes>
    <state>AR</state>
    <country>US</country>
    <age>61.43463381</age>
    <jobclass>1</jobclass>
    <fulloutput>Y</fulloutput>
    <cfactors>
       <depression>0</depression>
       <cardiac>0</cardiac>
       <diabetes>0</diabetes>
       <hypertension>0</hypertension>
       <obesity>0</obesity>
       <smoker>0</smoker>
       <subabuse>0</subabuse>
    </cfactors>
    <prosummary>
       <icd>
       </icd>
       <ndc>
       </ndc>
       <cpt>
               <code>99212</code>
               <code>73080</code>
               <code>00378415101</code>
               <code>00406035705</code>
               <code>63304045830</code>
               <code>99202</code>
               <code>WC101</code>
               <code>29105</code>
               <code>A4565</code>
               <code>73070</code>
               <code>73090</code>
               <code>99203</code>
       </cpt>
       <hcpcs>
       </hcpcs>
    </prosummary>
    <icdcodes>
       <code>7234</code>
    </icdcodes>
    <state>AR</state>
    <country>US</country>
    <age>54.29158111</age>
    <jobclass>1</jobclass>
    <fulloutput>Y</fulloutput>
    <cfactors>
       <depression>0</depression>
       <cardiac>0</cardiac>
       <diabetes>0</diabetes>
       <hypertension>1</hypertension>
       <obesity>0</obesity>
       <smoker>0</smoker>
       <subabuse>0</subabuse>
    </cfactors>
    <prosummary>
       <icd>
       </icd>
       <ndc>
       </ndc>
       <cpt>
               <code>E0849</code>
       </cpt>
       <hcpcs>
       </hcpcs>
    </prosummary>
</cbcalc>

我试过了,但没用

codes = '\n'.join([f'       <code>{item}</code>'
 for item in row[0].split('~')])

我正在丢失数据,因为列表中有波浪线。如何提取具有波浪线的数据,以便以XML格式获取所有数据


Tags: agecodecountryrowardiabetesstateobesity
2条回答

考虑使用{{CD1>}的DOM方法,通过{{CD2>}迭代,用字典键引用数据。

假设实际CSV数据包括:

playcodes,Benefit State,Age at DOL,BEHAVIORAL,CARDIAC,DIABETES,HYPERTENSION,OBESE,SMOKER,SUBSTANCE,CPT_codes
81303~E8889~81300~7295~71942,AR,61.43463381,0,0,0,0,0,0,0,99212~73080~00378415101~00406035705~63304045830~99202~WC101~29105~A4565~73070~73090~99203
7234,AR,54.29158111,0,0,0,1,0,0,0,E0849

见代码调整:

import csv
import xml.etree.ElementTree as ET
import xml.dom.minidom                 # FOR PRETTY PRINT

# INITIALIZING XML FILE
root = ET.Element('root') 

# READING CSV FILE
with open("medical_test.csv") as f: 
   reader = csv.DictReader(f) 
      
   # WRITING TO XML NODES 
   for i, row in enumerate(reader, start=1):
       cbNode = ET.SubElement(root, "cbalc")
       icdNode = ET.SubElement(cbNode, "icdcodes") 

       for code in row['playcodes'].split('~'):
           ET.SubElement(icdNode, "code").text = code

       ET.SubElement(cbNode, "state").text = row['Benefit State']
       ET.SubElement(cbNode, "country").text = "US"  
       ET.SubElement(cbNode, "age").text = row['Age at DOL']
       ET.SubElement(cbNode, "jobclass").text = "1" 
       ET.SubElement(cbNode, "fulloutput").text ="Y"

       cfNode = ET.SubElement(cbNode, "cfactors")
       for k in ['BEHAVIORAL', 'CARDIAC', 'DIABETES',
                 'HYPERTENSION', 'OBESE', 'SMOKER', 'SUBSTANCE']:
           ET.SubElement(cfNode, k.lower()).text = str(row[k])

       psNode = ET.SubElement(cbNode, "prosummary")
       ET.SubElement(psNode, "icd")
       ET.SubElement(psNode, "ndc") 

       cptNode = ET.SubElement(psNode, "cpt")
       for code in row['CPT_codes'].split('~'):
           ET.SubElement(cptNode, "code").text = code

       ET.SubElement(psNode, "hcpcs")

# SAVING XML FILE 
doc = ET.tostring(root, method='xml', encoding="UTF-8")
with open(f'Output.xml', 'wb') as f: 
      f.write(doc)

# PRETTY PRINT VERSION   
dom = xml.dom.minidom.parseString(doc)
with open(f'Output.xml', 'wb') as f: 
  f.write(dom.toprettyxml(encoding="UTF-8"))                

输出

<?xml version="1.0" encoding="UTF-8"?>
<root>
    <cbalc>
        <icdcodes>
            <code>81303</code>
            <code>E8889</code>
            <code>81300</code>
            <code>7295</code>
            <code>71942</code>
        </icdcodes>
        <state>AR</state>
        <country>US</country>
        <age>61.43463381</age>
        <jobclass>1</jobclass>
        <fulloutput>Y</fulloutput>
        <cfactors>
            <behavioral>0</behavioral>
            <cardiac>0</cardiac>
            <diabetes>0</diabetes>
            <hypertension>0</hypertension>
            <obese>0</obese>
            <smoker>0</smoker>
            <substance>0</substance>
        </cfactors>
        <prosummary>
            <icd/>
            <ndc/>
            <cpt>
                <code>99212</code>
                <code>73080</code>
                <code>00378415101</code>
                <code>00406035705</code>
                <code>63304045830</code>
                <code>99202</code>
                <code>WC101</code>
                <code>29105</code>
                <code>A4565</code>
                <code>73070</code>
                <code>73090</code>
                <code>99203</code>
            </cpt>
            <hcpcs/>
        </prosummary>
    </cbalc>
    <cbalc>
        <icdcodes>
            <code>7234</code>
        </icdcodes>
        <state>AR</state>
        <country>US</country>
        <age>54.29158111</age>
        <jobclass>1</jobclass>
        <fulloutput>Y</fulloutput>
        <cfactors>
            <behavioral>0</behavioral>
            <cardiac>0</cardiac>
            <diabetes>0</diabetes>
            <hypertension>1</hypertension>
            <obese>0</obese>
            <smoker>0</smoker>
            <substance>0</substance>
        </cfactors>
        <prosummary>
            <icd/>
            <ndc/>
            <cpt>
                <code>E0849</code>
            </cpt>
            <hcpcs/>
        </prosummary>
    </cbalc>
</root>

有两种方法可以解决这个问题,但我更喜欢先使用字符串操作(实际上是一个模板),然后再导入xml(我在下面使用lxml,而不是etree.ElementTree,因为我更喜欢它的xpath支持,但您可以采用任何一种方法

from lxml import etree

#start of xml string:
xml_string= """<?xml version="1.0" encoding="UTF-8"?>
 <root>"""

for datum in data:
    ipcodes_str =''
    icdcodes = datum[0].split('~')
    for icd in icdcodes:
        ipcodes_str+=(f'<code>{icd}</code>\n')
    cpts = datum[-1].split('~')
    cpts_str =''
    for cpt in cpts:
        cpts_str+=(f'<code>{cpt}</code>\n')

    #now for the body of the xml string:

    xml_string+=(f"""<cbcalc><icdcodes>
       {ipcodes_str}       
    </icdcodes>
    <state>{datum[1]}</state>
    <country>US</country>
    <age>{datum[2]}</age>
    <jobclass>1</jobclass>
    <fulloutput>Y</fulloutput>
    <cfactors>
       <depression>{datum[3]}</depression>
       <cardiac>{datum[4]}</cardiac>
       <diabetes>{datum[5]}</diabetes>
       <hypertension>{datum[6]}</hypertension>
       <obesity>{datum[7]}</obesity>
       <smoker>{datum[8]}</smoker>
       <subabuse>{datum[9]}</subabuse>
    </cfactors>
    <prosummary>
       <icd>
       </icd>
       <ndc>
       </ndc>
       <cpt>
        {cpts_str}
       </cpt>
       <hcpcs>
       </hcpcs>
    </prosummary></cbcalc>""")

#close the xml string
xml_string+="""</root>"""

#parse the xml string
doc = etree.XML(xml_string.encode())
print(etree.tostring(doc).decode())

输出应该是一个格式良好的xml文件,如您的问题所述

相关问题 更多 >