from lxml import html
import requests
page = requests.get('https://www.embassy-worldwide.com/country/united-states/')
tree = html.fromstring(page.content)
country = tree.xpath('//div[@class="posts-container col-md-6"]/h2/text()')
embassy = tree.xpath('//div[@class="posts-container col-md-6"]/ul/li/a/text()')
print(country)
print(embessy)
输出:
country:
['Belgium',
'Afghanistan',
'Albania',
'Andorra',
'Algeria',
'Antigua & Barbuda',
'Angola',...]
embessy:
['Honorary Consulate of Belgium in Phoenix',
'Consulate General of Armenia in Los Angeles',
'Permanent Mission of Afghanistan to United Nations',
'Consulate General of Afghanistan in Los',...]
这应该让你开始,使用下面的代码从网站列表中获取大使馆名称和国家名称。理解下面的代码并尝试提取电子邮件
输出:
相关问题 更多 >
编程相关推荐