Python的流读写器选项

2024-05-16 13:24:46 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个9GB的XML文件,通过在内存中加载它来处理这个文件有点大。我可以使用哪些流读写器选项?你知道吗

以下是我正在使用的当前代码:

print ("opening file")
with open('text.xml') as fd:
    doc = xmltodict.parse(fd.read())

print ("converting to CSV")
columns = ('EntityType','OrganisationName','AddressLine1','AddressLine2','AddressLine3','PostCode','CompanyID','OrganisationType','OrganisationStatus','OrganisationIndustryCode','DirectorRole','DirectorName')

with open('output.csv', 'wb') as f:
    writer = csv.DictWriter(f, fieldnames=columns)
    writer.writeheader()

    for x in doc['N8:EntityList']['N8:Entity']:
        writer.writerow({'EntityType':x['@xsi:type'].split(':')[1]
                         ,'OrganisationName':x['N2:OrganisationName']['N2:NameElement']['#text'] if isinstance(x['N2:OrganisationName'],(list,)) != True else x['N2:OrganisationName'][0]['N2:NameElement']['#text']
                         ,'AddressLine1':x['N5:Addresses']['N5:Address'][0]['N6:FreeTextAddress']['N6:AddressLine'][0]['#text']
                         ,'AddressLine2':x['N5:Addresses']['N5:Address'][0]['N6:FreeTextAddress']['N6:AddressLine'][1]['#text']
                         ,'AddressLine3':x['N5:Addresses']['N5:Address'][0]['N6:FreeTextAddress']['N6:AddressLine'][2]['#text'] if len(x['N5:Addresses']['N5:Address'][0]['N6:FreeTextAddress']['N6:AddressLine']) > 2 else None
                         ,'PostCode':x['N5:Addresses']['N5:Address'][0]['N6:PostCode']['N6:Identifier']['#text']
                         ,'CompanyID':x['N5:Identifiers']['N5:Identifier']['N5:IdentifierElement'] if isinstance(x['N5:Identifiers']['N5:Identifier'],(list,)) != True else x['N5:Identifiers']['N5:Identifier'][0]['N5:IdentifierElement']
                         ,'OrganisationType':x['N5:OrganisationInfo']['@N5:Type'] if x['N5:OrganisationInfo'].has_key('@N5:Type') else None
                         ,'OrganisationStatus':x['N5:OrganisationInfo']['@N5:Status'] if x['N5:OrganisationInfo'].has_key('@N5:Status') else None
                         ,'OrganisationIndustryCode':x['N5:OrganisationInfo']['@N5:IndustryCode'] if x['N5:OrganisationInfo'].has_key('@N5:IndustryCode') else None
                         ,'DirectorRole':x['N1:Director']['@xsi:type'].split(':')[1] if isinstance(x['N1:Director'],(list,)) != True else x['N1:Director'][0]['@xsi:type'].split(':')[1]
                         ,'DirectorName':x['N1:Director']['N2:PersonName']['N2:NameElement'][0]['#text'] if isinstance(x['N1:Director'],(list,)) != True else x['N1:Director'][0]['N2:PersonName']['N2:NameElement'][0]['#text'] + " " + x['N1:Director']['N2:PersonName']['N2:NameElement'][1]['#text'] if isinstance(x['N1:Director'],(list,)) != True else x['N1:Director'][0]['N2:PersonName']['N2:NameElement'][1]['#text']
                         })

xml.etree.cElementTree是选项吗?你知道吗

XML树是巨大的,但它看起来就是这样。我只需要储存一把。我知道我需要迭代每一行,并有一个计数器或什么的。我可以看看其他XML文档有没有这样的例子?你知道吗

N8:Entity
 |-- N1:Director: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- N2:PersonName: struct (nullable = true)
 |    |    |    |-- N2:NameElement: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- _ElementType: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- _Type: string (nullable = true)
 |    |    |-- N9:Asic: struct (nullable = true)
 |    |    |    |-- N2:OrganisationName: struct (nullable = true)
 |    |    |    |    |-- N2:NameElement: struct (nullable = true)
 |    |    |    |    |    |-- _ElementType: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- N5:Identifiers: struct (nullable = true)
 |    |    |    |    |-- N5:Identifier: struct (nullable = true)
 |    |    |    |    |    |-- N5:IdentifierElement: long (nullable = true)
 |    |    |    |    |    |-- N5:IssuerName: struct (nullable = true)
 |    |    |    |    |    |    |-- N2:NameElement: string (nullable = true)
 |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |-- N9:RegisteredOfficeAddress: struct (nullable = true)
 |    |    |    |    |-- N6:Country: struct (nullable = true)
 |    |    |    |    |    |-- N6:NameElement: struct (nullable = true)
 |    |    |    |    |    |    |-- _Abbreviation: boolean (nullable = true)
 |    |    |    |    |    |    |-- _NameCode: string (nullable = true)
 |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |-- N6:FreeTextAddress: struct (nullable = true)
 |    |    |    |    |    |-- N6:AddressLine: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |-- N6:PostCode: struct (nullable = true)
 |    |    |    |    |    |-- N6:Identifier: struct (nullable = true)
 |    |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |-- _Usage: string (nullable = true)
 |    |    |-- N9:Status: string (nullable = true)
 |    |    |-- _DateValidFrom: string (nullable = true)
 |    |    |-- _DateValidTo: string (nullable = true)
 |    |    |-- _type: string (nullable = true)
 |-- N2:OrganisationName: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- N2:NameElement: struct (nullable = true)
 |    |    |    |-- _ElementType: string (nullable = true)
 |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _Type: string (nullable = true)
 |-- N5:Addresses: struct (nullable = true)
 |    |-- N5:Address: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- N6:Country: struct (nullable = true)
 |    |    |    |    |-- N6:NameElement: struct (nullable = true)
 |    |    |    |    |    |-- _Abbreviation: boolean (nullable = true)
 |    |    |    |    |    |-- _NameCode: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- N6:FreeTextAddress: struct (nullable = true)
 |    |    |    |    |-- N6:AddressLine: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- N6:PostCode: struct (nullable = true)
 |    |    |    |    |-- N6:Identifier: struct (nullable = true)
 |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- _AddressID: long (nullable = true)
 |    |    |    |-- _AddressIDType: string (nullable = true)
 |    |    |    |-- _DateValidFrom: string (nullable = true)
 |    |    |    |-- _DateValidTo: string (nullable = true)
 |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |-- _Usage: string (nullable = true)
 |-- N5:ContactNumbers: struct (nullable = true)
 |    |-- N5:ContactNumber: struct (nullable = true)
 |    |    |-- N5:ContactNumberElement: array (nullable = true)
 |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _CommunicationMediaType: string (nullable = true)
 |    |    |-- _Usage: string (nullable = true)
 |-- N5:ElectronicAddressIdentifiers: struct (nullable = true)
 |    |-- N5:ElectronicAddressIdentifier: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |-- _VALUE: string (nullable = true)
 |-- N5:Events: struct (nullable = true)
 |    |-- N5:Event: struct (nullable = true)
 |    |    |-- _Type: string (nullable = true)
 |    |    |-- _VALUE: string (nullable = true)
 |-- N5:Identifiers: struct (nullable = true)
 |    |-- N5:Identifier: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- N5:IdentifierElement: long (nullable = true)
 |    |    |    |-- N5:IssuerName: struct (nullable = true)
 |    |    |    |    |-- N2:NameElement: string (nullable = true)
 |    |    |    |-- _Type: string (nullable = true)
 |-- N5:OrganisationInfo: struct (nullable = true)
 |    |-- _CountryOfOrigin: string (nullable = true)
 |    |-- _IndustryCode: string (nullable = true)
 |    |-- _Status: string (nullable = true)
 |    |-- _Type: string (nullable = true)
 |    |-- _VALUE: string (nullable = true)
 |-- N9:AddressForRecords: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- N6:Country: struct (nullable = true)
 |    |    |    |-- N6:NameElement: struct (nullable = true)
 |    |    |    |    |-- _Abbreviation: boolean (nullable = true)
 |    |    |    |    |-- _NameCode: string (nullable = true)
 |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- N6:FreeTextAddress: struct (nullable = true)
 |    |    |    |-- N6:AddressLine: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- N6:PostCode: struct (nullable = true)
 |    |    |    |-- N6:Identifier: struct (nullable = true)
 |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _DateValidFrom: string (nullable = true)
 |    |    |-- _DateValidTo: string (nullable = true)
 |-- N9:AnnualReturnFilingMonth: long (nullable = true)
 |-- N9:FinancialReportingFilingMonth: long (nullable = true)
 |-- N9:HasConstitutionFiled: boolean (nullable = true)
 |-- N9:InsolvencyDetails: struct (nullable = true)
 |    |-- N9:Appointee: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- N2:OrganisationName: struct (nullable = true)
 |    |    |    |    |-- N2:NameElement: struct (nullable = true)
 |    |    |    |    |    |-- _ElementType: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- N2:PersonName: struct (nullable = true)
 |    |    |    |    |-- N2:NameElement: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- _ElementType: string (nullable = true)
 |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- N5:ElectronicAddressIdentifiers: struct (nullable = true)
 |    |    |    |    |-- N5:ElectronicAddressIdentifier: struct (nullable = true)
 |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- N9:PhysicalAddress: struct (nullable = true)
 |    |    |    |    |-- N6:Country: struct (nullable = true)
 |    |    |    |    |    |-- N6:NameElement: struct (nullable = true)
 |    |    |    |    |    |    |-- _Abbreviation: boolean (nullable = true)
 |    |    |    |    |    |    |-- _NameCode: string (nullable = true)
 |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |-- N6:FreeTextAddress: struct (nullable = true)
 |    |    |    |    |    |-- N6:AddressLine: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |-- N6:PostCode: struct (nullable = true)
 |    |    |    |    |    |-- N6:Identifier: struct (nullable = true)
 |    |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |    |-- _VALUE: long (nullable = true)
 |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |-- _Usage: string (nullable = true)
 |    |    |    |-- _DateValidFrom: string (nullable = true)
 |    |    |    |-- _DateValidTo: string (nullable = true)
 |-- N9:PersonAuthorisedForService: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- N2:PersonName: struct (nullable = true)
 |    |    |    |-- N2:NameElement: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- _ElementType: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- _Type: string (nullable = true)
 |    |    |-- N9:Address: struct (nullable = true)
 |    |    |    |-- N6:Country: struct (nullable = true)
 |    |    |    |    |-- N6:NameElement: struct (nullable = true)
 |    |    |    |    |    |-- _Abbreviation: boolean (nullable = true)
 |    |    |    |    |    |-- _NameCode: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- N6:FreeTextAddress: struct (nullable = true)
 |    |    |    |    |-- N6:AddressLine: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- N6:PostCode: struct (nullable = true)
 |    |    |    |    |-- N6:Identifier: struct (nullable = true)
 |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |-- _Usage: string (nullable = true)
 |    |    |-- _DateValidFrom: string (nullable = true)
 |    |    |-- _DateValidTo: string (nullable = true)
 |-- N9:PreviousCompanyName: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _DateValidFrom: string (nullable = true)
 |    |    |-- _DateValidTo: string (nullable = true)
 |    |    |-- _VALUE: string (nullable = true)
 |-- N9:PreviousCompanyStatus: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _DateValidFrom: string (nullable = true)
 |    |    |-- _DateValidTo: string (nullable = true)
 |    |    |-- _VALUE: string (nullable = true)
 |-- N9:ShareRegisterAddress: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- N6:Country: struct (nullable = true)
 |    |    |    |-- N6:NameElement: struct (nullable = true)
 |    |    |    |    |-- _Abbreviation: boolean (nullable = true)
 |    |    |    |    |-- _NameCode: string (nullable = true)
 |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- N6:FreeTextAddress: struct (nullable = true)
 |    |    |    |-- N6:AddressLine: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- N6:PostCode: struct (nullable = true)
 |    |    |    |-- N6:Identifier: struct (nullable = true)
 |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _DateValidFrom: string (nullable = true)
 |    |    |-- _DateValidTo: string (nullable = true)
 |-- N9:Shareholding: struct (nullable = true)
 |    |-- N9:ExtensiveShareholding: boolean (nullable = true)
 |    |-- N9:ShareAllocation: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- N9:Allocation: long (nullable = true)
 |    |    |    |-- N9:Shareholder: struct (nullable = true)
 |    |    |    |    |-- N2:OrganisationName: struct (nullable = true)
 |    |    |    |    |    |-- N2:NameElement: struct (nullable = true)
 |    |    |    |    |    |    |-- _ElementType: string (nullable = true)
 |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |-- N2:PersonName: struct (nullable = true)
 |    |    |    |    |    |-- N2:NameElement: array (nullable = true)
 |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |-- _ElementType: string (nullable = true)
 |    |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |-- N5:Identifiers: struct (nullable = true)
 |    |    |    |    |    |-- N5:Identifier: struct (nullable = true)
 |    |    |    |    |    |    |-- N5:IdentifierElement: long (nullable = true)
 |    |    |    |    |    |    |-- N5:IssuerName: struct (nullable = true)
 |    |    |    |    |    |    |    |-- N2:NameElement: string (nullable = true)
 |    |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |-- N9:PhysicalAddress: struct (nullable = true)
 |    |    |    |    |    |-- N6:Country: struct (nullable = true)
 |    |    |    |    |    |    |-- N6:NameElement: struct (nullable = true)
 |    |    |    |    |    |    |    |-- _Abbreviation: boolean (nullable = true)
 |    |    |    |    |    |    |    |-- _NameCode: string (nullable = true)
 |    |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |    |-- N6:FreeTextAddress: struct (nullable = true)
 |    |    |    |    |    |    |-- N6:AddressLine: array (nullable = true)
 |    |    |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |    |-- N6:PostCode: struct (nullable = true)
 |    |    |    |    |    |    |-- N6:Identifier: struct (nullable = true)
 |    |    |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |-- _Usage: string (nullable = true)
 |    |    |    |    |-- _DateValidTo: string (nullable = true)
 |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |-- _type: string (nullable = true)
 |    |-- N9:TotalNumberOfShares: long (nullable = true)
 |-- N9:UltimateHoldingCompany: struct (nullable = true)
 |    |-- N2:OrganisationName: struct (nullable = true)
 |    |    |-- N2:NameElement: struct (nullable = true)
 |    |    |    |-- _ElementType: string (nullable = true)
 |    |    |    |-- _VALUE: string (nullable = true)
 |    |-- N5:Identifiers: struct (nullable = true)
 |    |    |-- N5:Identifier: struct (nullable = true)
 |    |    |    |-- N5:IdentifierElement: string (nullable = true)
 |    |    |    |-- N5:IssuerName: struct (nullable = true)
 |    |    |    |    |-- N2:NameElement: string (nullable = true)
 |    |    |    |-- _Type: string (nullable = true)
 |    |-- N9:Address: struct (nullable = true)
 |    |    |-- N6:Country: struct (nullable = true)
 |    |    |    |-- N6:NameElement: struct (nullable = true)
 |    |    |    |    |-- _Abbreviation: boolean (nullable = true)
 |    |    |    |    |-- _NameCode: string (nullable = true)
 |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- N6:FreeTextAddress: struct (nullable = true)
 |    |    |    |-- N6:AddressLine: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- N6:PostCode: struct (nullable = true)
 |    |    |    |-- N6:Identifier: struct (nullable = true)
 |    |    |    |    |-- _Type: string (nullable = true)
 |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _AddressID: long (nullable = true)
 |    |    |-- _AddressIDType: string (nullable = true)
 |    |    |-- _Type: string (nullable = true)
 |    |    |-- _Usage: string (nullable = true)
 |    |-- N9:CountryOfOrigin: string (nullable = true)
 |    |-- _PartyID: long (nullable = true)
 |    |-- _PartyIDType: string (nullable = true)
 |    |-- _Type: string (nullable = true)
 |    |-- _VALUE: string (nullable = true)
 |-- _N8: string (nullable = true)
 |-- _PartyID: long (nullable = true)
 |-- _PartyIDType: string (nullable = true)
 |-- _type: string (nullable = true)
 |-- _xmlns: string (nullable = true)
 |-- _xsi: string (nullable = true)

Tags: truestringvaluetypeelementarraystructidentifier