用Elem解析非阻塞XML

2024-06-01 02:21:43 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试解析一些XML数据,并将其放入嵌套字典中供以后使用。然而,由于XML数据不是结构化的,据我所知,它是以子类型格式或块数据格式存在的,所以我不确定最佳方法。我目前正在尝试使用XPath,并认为这是这个数据结构的最佳选择,但我不确定如何以一种方式解析它,使我能够将它正确地添加到嵌套字典中。我试过下面的方法,但正如你可能猜到的,它是不正确的。有没有人能给我们建议一下解析这些数据的最佳方法

要分析的数据

<address addr="192.168.1.74" addrtype="ipv4"/>
<address addr="FC:75:16:03:D0:2A" addrtype="mac" vendor="D-Link International"/>
<hostnames>
</hostnames>
<ports><extraports state="closed" count="994">
<extrareasons reason="resets" count="994"/>
</extraports>
<port protocol="tcp" portid="80"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="http" product="lighttpd" method="probed" conf="10"><cpe>cpe:/a:lighttpd:lighttpd</cpe></service></port>
<port protocol="tcp" portid="139"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="netbios-ssn" product="Samba smbd" version="3.X" extrainfo="workgroup: WORKGROUP" method="probed" conf="10"/></port>
<port protocol="tcp" portid="443"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="http" product="lighttpd" tunnel="ssl" method="probed" conf="10"><cpe>cpe:/a:lighttpd:lighttpd</cpe></service></port>
<port protocol="tcp" portid="445"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="netbios-ssn" product="Samba smbd" version="3.X" extrainfo="workgroup: WORKGROUP" method="probed" conf="10"/></port>
<port protocol="tcp" portid="515"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="printer" product="LPRng" extrainfo="Not authorized" method="probed" conf="10"/></port>
<port protocol="tcp" portid="3306"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="mysql" product="MySQL" extrainfo="unauthorized" method="probed" conf="10"><cpe>cpe:/a:mysql:mysql</cpe></service></port>
</ports>
<times srtt="16241" rttvar="1850" to="100000"/>
</host>
<host starttime="1443920156" endtime="1443920210"><status state="up" reason="arp-response" reason_ttl="0"/>
<address addr="192.168.1.126" addrtype="ipv4"/>
<address addr="00:0C:29:30:A1:C9" addrtype="mac" vendor="VMware"/>
<hostnames>
</hostnames>
<ports><extraports state="filtered" count="984">
<extrareasons reason="no-responses" count="984"/>
</extraports>
<port protocol="tcp" portid="53"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="domain" product="Microsoft DNS" version="6.1.7601" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="88"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="kerberos-sec" product="Windows 2003 Kerberos" extrainfo="server time: 2015-10-04 00:56:07Z" ostype="Windows" method="probed" conf="10"><cpe>cpe:/a:microsoft:kerberos</cpe><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="135"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="139"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="netbios-ssn" method="probed" conf="10"/></port>
<port protocol="tcp" portid="389"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ldap" method="probed" conf="10"/></port>
<port protocol="tcp" portid="445"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="netbios-ssn" method="probed" conf="10"/></port>
<port protocol="tcp" portid="464"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="kpasswd5" method="table" conf="3"/></port>
<port protocol="tcp" portid="593"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ncacn_http" product="Microsoft Windows RPC over HTTP" version="1.0" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="636"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="tcpwrapped" method="probed" conf="8"/></port>
<port protocol="tcp" portid="3268"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ldap" method="probed" conf="10"/></port>
<port protocol="tcp" portid="3269"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="tcpwrapped" method="probed" conf="8"/></port>
<port protocol="tcp" portid="49154"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49155"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49157"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ncacn_http" product="Microsoft Windows RPC over HTTP" version="1.0" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49158"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49161"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
</ports>
<times srtt="5951" rttvar="5849" to="100000"/>
</host>
<runstats><finished time="1443920210" timestr="Sun Oct  4 01:56:50 2015" elapsed="53.38" summary="Nmap done at Sun Oct  4 01:56:50 2015; 2 IP addresses (2 hosts up) scanned in 53.38 seconds" exit="success"/><hosts up="2" down="0" total="2"/>
</runstats>
</nmaprun>

当前尝试

from xml.etree import ElementTree
import os

nmap_file = 'Test.xml'

dictionary = {}
dictionary['host'] = {}
dictionary['host']['port'] = {}
dictionary['host']['port']['service'] = {}


with open(nmap_file, 'rt') as f:
    tree = ElementTree.parse(f)

for node in tree.findall('.//address'):
    if (node.attrib.get('addrtype') == 'ipv4'):
        host = node.attrib.get('addr')
        dictionary['host'] = host
        for node in tree.findall('.//port'):
            port = node.attrib.get('portid')
            dictionary['host']['port'] = port

        for node in tree.findall('.//service'):
            product = node.attrib.get('product')
            dictionary['host']['port']['service'] = product


print dictionary

Tags: nameportconfserviceopenprotocolmethodtcp
1条回答
网友
1楼 · 发布于 2024-06-01 02:21:43

考虑使用lxml模块、xpath和源于嵌套字典的列表的稍微不同的方法。请参阅有关创建nested dictionaries的信息性教程

import lxml.etree as et
import os

nmap_file = 'Test.xml'    
with open(nmap_file, 'rt') as f:
    tree = et.parse(f)

dictionary = {}
hosts = tree.xpath('//host')

for i in range(1, len(hosts)+1):
    hostlist = []
    portlist = []
    servicelist = []

    addrnodes = tree.xpath("//host[{}]/address[@addrtype='ipv4']/@addr".format(i))
    hostlist.append(addrnodes[0])

    portnodes = tree.xpath("//host[{}]/ports/port[string-length(service/@product)>0]/@portid".format(i))
    for pt in portnodes:
        portlist.append(pt)

    servicenodes = tree.xpath("//host[{}]/ports/port/service/@product".format(i))    
    for srv in servicenodes:        
        servicelist.append(srv)

    for h in hostlist:
        dictionary[h] = {}

    for h in hostlist:
        for p, s in zip(portlist, servicelist):
                dictionary[h][p] = s

print(dictionary)

输出(注意:只有列出了服务产品的端口才会显示在字典中)

{'192.168.1.74': {'80': 'lighttpd', 
                  '445': 'Samba smbd', 
                  '139': 'Samba smbd', 
                  '443': 'lighttpd', 
                  '515': 'LPRng', 
                  '3306': 'MySQL'}}
{'192.168.1.126': {'49161': 'Microsoft Windows RPC', 
                   '135': 'Microsoft Windows RPC', 
                   '53': 'Microsoft DNS', 
                   '49157': 'Microsoft Windows RPC over HTTP', 
                   '593': 'Microsoft Windows RPC over HTTP', 
                   '49155': 'Microsoft Windows RPC', 
                   '49158': 'Microsoft Windows RPC', 
                   '88': 'Windows 2003 Kerberos', 
                   '49154': 'Microsoft Windows RPC'}}

相关问题 更多 >