不使用WebDriverWait我的代码返回：元素单击截获/使用WebDriverWait返回'NoneType'对象不可编辑

from selenium import webdriver from selenium.webdriver.chrome.options import Options import time options = Options() options.add_argument("start-maximized") options.add_experimental_option("excludeSwitches", ["enable-logging"]) driver = webdriver.Chrome(r"C:\Users\Computador\Desktop\Python\chromedriver.exe", options=options) url = "https://int.soccerway.com/matches/2021/07/28/" driver.get(url) driver.find_element_by_xpath("//div[@class='language-picker-trigger']").click() driver.find_element_by_xpath("//a[@href='https://int.soccerway.com']").click() time.sleep(10) for btn in driver.find_elements_by_xpath("//tr[contains(@class,'group-head clickable')]"): btn.click() time.sleep(10) jogos = driver.find_elements_by_xpath("//td[contains(@class,'score-time')]//a") for jogo in jogos: resultado = jogo.get_attribute("href") print(resultado) driver.quit()

from selenium import webdriver from selenium.webdriver.chrome.options import Options import time from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC options = Options() options.add_argument("start-maximized") options.add_argument("headless") options.add_experimental_option("excludeSwitches", ["enable-logging"]) driver = webdriver.Chrome(r"C:\Users\Computador\Desktop\Python\chromedriver.exe", options=options) url = "https://int.soccerway.com/matches/2021/07/28/" driver.get(url) driver.find_element_by_xpath("//div[@class='language-picker-trigger']").click() driver.find_element_by_xpath("//a[@href='https://int.soccerway.com']").click() time.sleep(10) for btn in WebDriverWait(driver, 1).until(EC.element_to_be_clickable((By.XPATH, "//tr[contains(@class,'group-head clickable')]"))): btn.click() time.sleep(10) jogos = driver.find_elements_by_xpath("//td[contains(@class,'score-time')]//a") for jogo in jogos: resultado = jogo.get_attribute("href") print(resultado) driver.quit()

https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fc-sheriff-tiraspol/alashkert-fc/3517568/ https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fk-neftchi/olympiakos-cfp/3517569/ https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/scs-cfr-1907-cluj-sa/newcastle-fc/3517571/ https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fc-midtjylland/celtic-fc/3517576/ https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fk-razgrad-2000/mura/3517574/ https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/galatasaray-sk/psv-nv/3517577/ https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/bsc-young-boys-bern/k-slovan-bratislava/3517566/ https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fk-crvena-zvezda-beograd/fc-kairat-almaty/3517570/ https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/ac-sparta-praha/sk-rapid-wien/3517575/ https://int.soccerway.com/matches/2021/07/28/world/olympics/saudi-arabia-u23/brazil--under-23/3497390/ https://int.soccerway.com/matches/2021/07/28/world/olympics/germany-u23/cote-divoire-u23/3497391/ https://int.soccerway.com/matches/2021/07/28/world/olympics/romania-u23/new-zealand-under-23/3497361/ https://int.soccerway.com/matches/2021/07/28/world/olympics/korea-republic-u23/honduras-u23/3497362/ https://int.soccerway.com/matches/2021/07/28/world/olympics/australia-under-23/egypt-under-23/3497383/ https://int.soccerway.com/matches/2021/07/28/world/olympics/spain-under-23/argentina-under-23/3497384/ https://int.soccerway.com/matches/2021/07/28/world/olympics/france-u23/japan-u23/3497331/ https://int.soccerway.com/matches/2021/07/28/world/olympics/south-africa-u23/mexico-u23/3497332/ https://int.soccerway.com/matches/2021/07/28/africa/cecafa-senior-challenge-cup/uganda-under-23/eritrea-under-23/3567664/

3条回答

网友

1楼 · 编辑于 2024-06-16 10:57:21

尝试添加Options{}{}

options.add_argument("window-size=1440,900")

O/p

网友

2楼 · 编辑于 2024-06-16 10:57:21

布朗德比如果

我明白了，你的剧本有两个问题

首先是

for btn in WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//tr[contains(@class,'group-head  clickable')]"))):
    btn.click()

基本上，这是错误的，因为element_to_be_clickable将再次返回单个webelement，因此您将得到non-inerrable error，而我们可以使用visibility_of_all_elements_located返回列表

其次，您不能直接click，因为Selenium视图端口中没有几个元素，所以我们必须使用ActionsChain

见下文：

options = webdriver.ChromeOptions()
options.add_argument(" disable-infobars")
options.add_argument("start-maximized")
options.add_argument(" disable-extensions")
options.add_experimental_option("prefs", {"profile.default_content_setting_values.notifications": 2})
options.add_argument(" headless")
options.add_experimental_option("prefs", {"profile.default_content_settings.cookies": 2})

driver = webdriver.Chrome(options = options)
driver.implicitly_wait(30)
driver.get("https://int.soccerway.com/")
driver.find_element_by_xpath("//div[@class='language-picker-trigger']").click()
driver.find_element_by_xpath("//a[@href='https://int.soccerway.com']").click()
sleep(10)
for btn in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//tr[contains(@class,'group-head  clickable')]"))):
    ActionChains(driver).move_to_element(btn).click().perform()
sleep(10)
jogos = driver.find_elements_by_xpath("//td[contains(@class,'score-time')]//a")
for jogo in jogos:
    resultado = jogo.get_attribute("href")
    print(resultado)

输出：

https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fc-sheriff-tiraspol/alashkert-fc/3517568/
https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fk-neftchi/olympiakos-cfp/3517569/
https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/scs-cfr-1907-cluj-sa/newcastle-fc/3517571/
https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fc-midtjylland/celtic-fc/3517576/
https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fk-razgrad-2000/mura/3517574/
https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/galatasaray-sk/psv-nv/3517577/
https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/bsc-young-boys-bern/k-slovan-bratislava/3517566/
https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/fk-crvena-zvezda-beograd/fc-kairat-almaty/3517570/
https://int.soccerway.com/matches/2021/07/28/europe/uefa-champions-league/ac-sparta-praha/sk-rapid-wien/3517575/
https://int.soccerway.com/matches/2021/07/28/world/olympics/saudi-arabia-u23/brazil under-23/3497390/
https://int.soccerway.com/matches/2021/07/28/world/olympics/germany-u23/cote-divoire-u23/3497391/
https://int.soccerway.com/matches/2021/07/28/world/olympics/romania-u23/new-zealand-under-23/3497361/
https://int.soccerway.com/matches/2021/07/28/world/olympics/korea-republic-u23/honduras-u23/3497362/
https://int.soccerway.com/matches/2021/07/28/world/olympics/australia-under-23/egypt-under-23/3497383/
https://int.soccerway.com/matches/2021/07/28/world/olympics/spain-under-23/argentina-under-23/3497384/
https://int.soccerway.com/matches/2021/07/28/world/olympics/france-u23/japan-u23/3497331/
https://int.soccerway.com/matches/2021/07/28/world/olympics/south-africa-u23/mexico-u23/3497332/

网友

3楼 · 编辑于 2024-06-16 10:57:21

你不需要硒

硒永远不应该是从web上抓取数据的主要方式。它的速度很慢，通常比它的备选方案需要更多的代码行。尽可能使用requests与lxml解析器结合使用。在这个特定的用例中，您只使用selenium在不同的URL之间切换，这是一种可以很容易地硬编码的东西，从而避免了首先使用它的需要

import requests
from lxml import html
import csv
import re
from datetime import datetime
import json

class GameCrawler(object):
    def __init__(self):
        self.input_date = input('Specify a date e.g. 2021/07/28: ')
        self.date_object = datetime.strptime(self.input_date, "%Y/%m/%d")
        self.output_file = '{}.csv'.format(re.sub('/', '-', self.input_date))
        self.ROOT_URL = 'https://int.soccerway.com'
        self.json_request_url = '{}/a/block_competition_matches_summary'.format(self.ROOT_URL)
        self.entry_point = '{}/matches/{}'.format(self.ROOT_URL, self.input_date)
        self.session = requests.Session()
        self.HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
        self.all_game_urls = []
        self.league_urls = self.get_league_urls()

    def save_to_csv(self):
        with open(self.output_file, 'a+') as f:
            writer = csv.writer(f)
            for row in self.all_game_urls:
                writer.writerow([row]) 
        return

    def request_other_pages(self, page_params):
        params = {
            'block_id': 'page_competition_1_block_competition_matches_summary_11',
            'callback_params': json.dumps({
                "page": page_params['page_count'] + 2, 
                "block_service_id": "competition_summary_block_competitionmatchessummary",
                "round_id": int(page_params['round_id']),
                "outgroup":"",
                "view":1,
                "competition_id": int(page_params['competition_id'])
            }),
            'action': 'changePage',
            'params': json.dumps({"page": page_params['page_count']}),
        }
        response = self.session.get(self.json_request_url, headers=self.HEADERS, params=params)
        if response.status_code != 200:
            return
        else:
            json_data = json.loads(response.text)["commands"][0]["parameters"]["content"]
            return html.fromstring(json_data)

    def get_page_params(self, tree, response):
        res = re.search('r(\d+)?/$', response.url)
        if res:
            page_params = {
                'round_id': res.group(1),
                'competition_id': tree.xpath('//*[@data-competition]/@data-competition')[0],
                'page_count': len(tree.xpath('//*[@class="page-dropdown"]/option'))
            }
            return page_params if page_params['page_count'] != 0 else {}
        return {}

    def match_day_check(self, game):
        timestamp = game.xpath('./@data-timestamp')[0]
        match_date = datetime.fromtimestamp(int(timestamp))
        return True if self.date_object.day == match_date.day else False

    def scrape_page(self, tree):
        for game in tree.xpath('//*[@data-timestamp]'):
            game_url = game.xpath('./td[@class="score-time "]/a/@href')
            if game_url and self.match_day_check(game):
                self.all_game_urls.append('{}{}'.format(self.ROOT_URL, game_url[0]))
        return

    def get_league_urls(self):
        page = self.session.get(self.entry_point, headers=self.HEADERS)
        tree = html.fromstring(page.content)
        league_urls = ['{}{}'.format(self.ROOT_URL, league_url) for league_url in tree.xpath('//th[@class="competition-link"]/a/@href')]
        return league_urls

    def main(self):
        for index, league_url in enumerate(self.league_urls):
            response = self.session.get(league_url, headers=self.HEADERS)
            tree = html.fromstring(response.content)
            self.scrape_page(tree)
            page_params = self.get_page_params(tree, response)
            if page_params.get('page_count', 0) != 0:
                while True:
                    page_params['page_count'] = page_params['page_count'] - 1
                    if page_params['page_count'] == 0:
                        break
                    tree = self.request_other_pages(page_params)
                    if tree is None:
                        continue
                    self.scrape_page(tree)
            print('Retrieved links for {} out of {} competitions'.format(index+1, len(self.league_urls)))
        self.save_to_csv()
        return

if __name__ == '__main__':
    GameCrawler().main()

那么什么时候硒值得使用呢

如今，网站通常提供动态内容，因此如果您想要检索的数据不是静态加载的：

检查浏览器的“网络”选项卡以查看是否有请求特定于您感兴趣的数据，以及
试着用requests来模拟它

如果由于网页的设计方式，第1点和第2点是不可能的，那么最好的选择是使用selenium，它将通过模拟用户交互获取所需的内容。对于HTML解析，您仍然可以选择使用lxml，或者您可以坚持使用selenium，它也提供了该功能

第一次编辑：

修正了OP提出的问题
包括对所提供代码的限制
代码重构
添加了日期检查，以确保仅保存在指定日期播放的比赛
添加了允许保存搜索结果的功能

第二次编辑：

添加了使用get_page_params()和request_other_pages()浏览每个列出的竞赛的所有页面的功能
更多代码重构

更新

你不需要硒

那么什么时候硒值得使用呢

相关问题更多 >

编程相关推荐

热门问题

热门文章