我正在尝试解析一些XML数据,并将其放入嵌套字典中供以后使用。然而,由于XML数据不是结构化的,据我所知,它是以子类型格式或块数据格式存在的,所以我不确定最佳方法。我目前正在尝试使用XPath,并认为这是这个数据结构的最佳选择,但我不确定如何以一种方式解析它,使我能够将它正确地添加到嵌套字典中。我试过下面的方法,但正如你可能猜到的,它是不正确的。有没有人能给我们建议一下解析这些数据的最佳方法
要分析的数据
<address addr="192.168.1.74" addrtype="ipv4"/>
<address addr="FC:75:16:03:D0:2A" addrtype="mac" vendor="D-Link International"/>
<hostnames>
</hostnames>
<ports><extraports state="closed" count="994">
<extrareasons reason="resets" count="994"/>
</extraports>
<port protocol="tcp" portid="80"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="http" product="lighttpd" method="probed" conf="10"><cpe>cpe:/a:lighttpd:lighttpd</cpe></service></port>
<port protocol="tcp" portid="139"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="netbios-ssn" product="Samba smbd" version="3.X" extrainfo="workgroup: WORKGROUP" method="probed" conf="10"/></port>
<port protocol="tcp" portid="443"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="http" product="lighttpd" tunnel="ssl" method="probed" conf="10"><cpe>cpe:/a:lighttpd:lighttpd</cpe></service></port>
<port protocol="tcp" portid="445"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="netbios-ssn" product="Samba smbd" version="3.X" extrainfo="workgroup: WORKGROUP" method="probed" conf="10"/></port>
<port protocol="tcp" portid="515"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="printer" product="LPRng" extrainfo="Not authorized" method="probed" conf="10"/></port>
<port protocol="tcp" portid="3306"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="mysql" product="MySQL" extrainfo="unauthorized" method="probed" conf="10"><cpe>cpe:/a:mysql:mysql</cpe></service></port>
</ports>
<times srtt="16241" rttvar="1850" to="100000"/>
</host>
<host starttime="1443920156" endtime="1443920210"><status state="up" reason="arp-response" reason_ttl="0"/>
<address addr="192.168.1.126" addrtype="ipv4"/>
<address addr="00:0C:29:30:A1:C9" addrtype="mac" vendor="VMware"/>
<hostnames>
</hostnames>
<ports><extraports state="filtered" count="984">
<extrareasons reason="no-responses" count="984"/>
</extraports>
<port protocol="tcp" portid="53"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="domain" product="Microsoft DNS" version="6.1.7601" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="88"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="kerberos-sec" product="Windows 2003 Kerberos" extrainfo="server time: 2015-10-04 00:56:07Z" ostype="Windows" method="probed" conf="10"><cpe>cpe:/a:microsoft:kerberos</cpe><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="135"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="139"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="netbios-ssn" method="probed" conf="10"/></port>
<port protocol="tcp" portid="389"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ldap" method="probed" conf="10"/></port>
<port protocol="tcp" portid="445"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="netbios-ssn" method="probed" conf="10"/></port>
<port protocol="tcp" portid="464"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="kpasswd5" method="table" conf="3"/></port>
<port protocol="tcp" portid="593"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ncacn_http" product="Microsoft Windows RPC over HTTP" version="1.0" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="636"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="tcpwrapped" method="probed" conf="8"/></port>
<port protocol="tcp" portid="3268"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ldap" method="probed" conf="10"/></port>
<port protocol="tcp" portid="3269"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="tcpwrapped" method="probed" conf="8"/></port>
<port protocol="tcp" portid="49154"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49155"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49157"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ncacn_http" product="Microsoft Windows RPC over HTTP" version="1.0" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49158"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49161"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
</ports>
<times srtt="5951" rttvar="5849" to="100000"/>
</host>
<runstats><finished time="1443920210" timestr="Sun Oct 4 01:56:50 2015" elapsed="53.38" summary="Nmap done at Sun Oct 4 01:56:50 2015; 2 IP addresses (2 hosts up) scanned in 53.38 seconds" exit="success"/><hosts up="2" down="0" total="2"/>
</runstats>
</nmaprun>
当前尝试
from xml.etree import ElementTree
import os
nmap_file = 'Test.xml'
dictionary = {}
dictionary['host'] = {}
dictionary['host']['port'] = {}
dictionary['host']['port']['service'] = {}
with open(nmap_file, 'rt') as f:
tree = ElementTree.parse(f)
for node in tree.findall('.//address'):
if (node.attrib.get('addrtype') == 'ipv4'):
host = node.attrib.get('addr')
dictionary['host'] = host
for node in tree.findall('.//port'):
port = node.attrib.get('portid')
dictionary['host']['port'] = port
for node in tree.findall('.//service'):
product = node.attrib.get('product')
dictionary['host']['port']['service'] = product
print dictionary
考虑使用lxml模块、xpath和源于嵌套字典的列表的稍微不同的方法。请参阅有关创建nested dictionaries的信息性教程
输出(注意:只有列出了服务产品的端口才会显示在字典中)
相关问题 更多 >
编程相关推荐