我想网上刮预定义的链接。 我想从https://www.outdooractive.com/de/抓取徒步旅行,在一个特定的区域,所以我用20个链接定义了这个区域。到现在为止,一直都还不错。我得到了一个链接的数据,但是当我试图在页面列表中循环时,它只经过一个链接。我希望这只是我在逻辑思维上的无能。如果有人能帮我,我会很高兴的。在
这是我的密码。只有三个环节不是全部。在
import bs4 as bs
import sys
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
import codecs
webliste = []
webliste.append('https://www.outdooractive.com/de/touren/#cat=Wanderung&view=listMap&wt=Nationalpark%20Bayerischer%20Wald%20(94258%20Frauenau%20Germany)&zc=14,13.54301,48.94731')
webliste.append("https://www.outdooractive.com/de/touren/#cat=Wanderung&view=listMap&wt=Nationalpark%20Bayerischer%20Wald%20(94258%20Frauenau%20Germany)&zc=14,13.5443,48.88763")
webliste.append("https://www.outdooractive.com/de/touren/#cat=Wanderung&view=listMap&wt=Nationalpark%20Bayerischer%20Wald%20(94258%20Frauenau%20Germany)&zc=14,13.4589,48.93163")
for Page in webliste:
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
print('Load finished')
def Callable(self, html_str):
self.html = html_str
self.app.quit()
page = Page(webliste[0+1])
filename = "WandertourenLinks.csv"
f = codecs.open(filename, "w","utf-8")
headers ="Tour Name" + ";" + "Länge" + ";" + "Zeit" + ";" + "Aufstieg" + ";" + "Abstieg" + ";" + "Link zur Tour"+ ";" + "Anbieter\n"
f.write(headers)
def main():
soup = bs.BeautifulSoup(page.html, 'html.parser')
containers = soup.findAll("div", {"class":"oax_dp_snippet"})
print ("Anzahl der gefundenen touren", len(containers))
#loop
for container in containers:
tour_container = container.findAll("span",{"dir":"auto"})
cont = tour_container[0].text
print("Name der Tour: ", cont)
tour_name = cont
tour_data = container.findAll("div",{"class":"oax_tour_data oax_fl"})
leange = tour_data[0].text.strip()
zeit = tour_data[1].text.strip()
aufstieg = tour_data[2].text.strip()
abstieg = tour_data[3].text.strip()
print("Länge der Tour: ", leange)
print("Länge der Tour: ", zeit)
print("Länge der Tour: ", aufstieg)
print("Länge der Tour: ", abstieg)
link = container.a["href"]
link_a = link
print ("Link zur Tour: ", link)
tour_anbieter = container.findAll("div",{"class":"oax_var_pos oax_var_pos_bottom oax_font_smaller oax_line_height_14 oax_ellipsis"})
anbieter = tour_anbieter[0].text.strip()
print("Tourenanbieter: ", anbieter)
f.write(tour_name + ";" + leange + ";" + zeit + ";" + aufstieg + ";" + abstieg + ";" + link+ ";" + anbieter+ "\n")
f.close()
if __name__ == '__main__': main()
@Steve Haigh谢谢你给我的第二条建议是最好的。知道一切都有用。我知道它不是很性感,但它很管用;)
相关问题 更多 >
编程相关推荐