我如何用python删除所有日期

from time import sleep from urllib.parse import urlparse from bs4 import BeautifulSoup from selenium import webdriver from selenium.common.exceptions import NoSuchElementException def get_urls_season(url_path): driver = webdriver.Chrome() driver.fullscreen_window() driver.get("https://us.soccerway.com" + url_path) click_privacy_policy(driver) date = date_selector(driver) #url_list = cycle_through_game_weeks(driver) url_list.reverse() driver.quit() print("=" * 100) print(f"{len(set(url_list))} find") if input("con? (y/n): ") != "y": exit() return url_list def date_selector(driver): inptdate='2010-2012' startdate=inptdate.split('-')[0] enddate=inptdate.split('-')[1] while int(startdate)< int(enddate): textstring=str(startdate) + "/" + str(int(startdate)+1) print(textstring) driver.find_element_by_xpath("//select[@name='season_id']/option[text()='" + textstring +"']").click() startdate=int(startdate)+1 url_list = cycle_through_game_weeks(driver) def click_privacy_policy(driver): try: driver.find_element_by_class_name("qc-cmp-button").click() except NoSuchElementException: pass def cycle_through_game_weeks(driver): season_urls = get_fixture_urls(innerhtml_soup(driver)) while is_previous_button_enabled(driver): click_previous_button(driver) sleep(2) urls = get_fixture_urls(innerhtml_soup(driver)) urls.reverse() season_urls += urls return season_urls def is_previous_button_enabled(driver): return driver.find_element_by_id( "page_competition_1_block_competition_matches_summary_5_previous" ).get_attribute("class") != "previous disabled" def click_previous_button(driver): driver.find_element_by_id( "page_competition_1_block_competition_matches_summary_5_previous" ).click() def get_fixture_urls(soup): urls = [] for elem in soup.select(".info-button.button > a"): urls.append(urlparse(elem.get("href")).path) return urls def innerhtml_soup(driver): html = driver.find_element_by_tag_name("html").get_attribute("innerHTML") soup = BeautifulSoup(html, "html.parser") return soup

1条回答

网友

1楼 · 发布于 2024-04-19 19:57:28

如果我正确理解了代码，问题就出在这里：

url_list = cycle_through_game_weeks(driver)

在每次迭代中，您都要用新的url\u列表覆盖旧的url\u列表，最简单的解决方案是：

url_list += cycle_through_game_weeks(driver)

更加优雅有效：

   url_list = []
   while int(startdate)< int(enddate):
        textstring=str(startdate) + "/" + str(int(startdate)+1)
        print(textstring)
        driver.find_element_by_xpath("//select[@name='season_id']/option[text()='" + textstring +"']").click()
        startdate=int(startdate)+1
        url_list.append(cycle_through_game_weeks(driver))
   return url_list

这样，在url\u list[0]下，您将获得第一年的值，在url\u list[1]下的值，以此类推

相关问题更多 >

编程相关推荐

热门问题

热门文章