网页抓取因等待加载评论而超时

0 投票
1 回答
50 浏览
提问于 2025-04-14 15:50

我想写一个Python脚本,能够读取谷歌地图上某个特定地点或商店的所有评论。我尝试了多次修改代码,但总是出现超时异常。下面是我的代码:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def scrape_google_reviews(url):
    # Set up Chrome WebDriver
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode, i.e., without opening browser window
    chromedriver_path = 'C:/Users/Downloads/chromedriver-win64/chromedriver.exe'  # Specify path to chromedriver executable
    service = Service(chromedriver_path)
    driver = webdriver.Chrome(service=service, options=chrome_options)

    # Load the Google Maps URL
    driver.get(url)

    # Wait for the reviews to load
    try:
        WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CLASS_NAME, "ODSEW-ShBeI-content")))
    except TimeoutException as e:
        print("Timeout occurred while waiting for reviews to load:", e)
        driver.quit()
        return None
    except Exception as e:
        print("An error occurred while waiting for reviews to load:", e)
        driver.quit()
        return None

    # Extract review elements
    review_elements = driver.find_elements(By.CLASS_NAME, "ODSEW-ShBeI-content")

    # Extract review details
    reviews = []
    for review_element in review_elements:
        review_text = review_element.find_element(By.CSS_SELECTOR, ".ODSEW-ShBeI-title").text
        reviews.append(review_text)

    # Close the WebDriver
    driver.quit()

    return reviews

# Example usage
url = "https://www.google.com/maps/place/FASTECH+SOLUTIONS/@18.5165309,73.8457059,18.29z/data=!4m6!3m5!1s0x3bc2c160b5caf2dd:0x6d49235d88bd5d25!8m2!3d18.5161858!4d73.8459712!16s%2Fg%2F11t7drcv4g?entry=ttu"
reviews = scrape_google_reviews(url)
if reviews:
    for i, review in enumerate(reviews, 1):
        print(f"Review {i}: {review}")
else:
    print("Failed to scrape reviews.")

我不太确定哪里出了问题,我参考了好几个博客,包括一个来自geeksforgeeks的,但似乎这些信息都过时了。我的Chrome版本是122.0.6261.113,我从这里下载了chromedriver.exe:https://storage.googleapis.com/chrome-for-testing-public/122.0.6261.128/win64/chromedriver-win64.zip

1 个回答

1

这里是获取评论的代码。

我没有使用动态创建的类来获取评论,而是用了一个看起来比较固定的类 MyEned

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def scrape_google_reviews(url):
    # Set up Chrome WebDriver
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode, i.e., without opening browser window
    chromedriver_path = 'C:/Users/Downloads/chromedriver-win64/chromedriver.exe'  # Specify path to chromedriver executable
    service = Service(chromedriver_path)
    driver = webdriver.Chrome(service=service, options=chrome_options)

    # Load the Google Maps URL
    driver.get(url)

    # Wait for the reviews to load
    try:
        WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CLASS_NAME, "MyEned")))
    except TimeoutException as e:
        print("Timeout occurred while waiting for reviews to load:", e)
        driver.quit()
        return None
    except Exception as e:
        print("An error occurred while waiting for reviews to load:", e)
        driver.quit()
        return None

    # Extract review elements
    review_elements = driver.find_elements(By.CLASS_NAME, "MyEned")
    print("review_elemtns", review_elements)
    # Extract review details
    reviews = []
    for review_element in review_elements:
        review_text = review_element.get_attribute("textContent")
        reviews.append(review_text)

    # Close the WebDriver
    driver.quit()

    return reviews

# Example usage
url = "https://www.google.com/maps/place/FASTECH+SOLUTIONS/@18.5165309,73.8457059,18.29z/data=!4m6!3m5!1s0x3bc2c160b5caf2dd:0x6d49235d88bd5d25!8m2!3d18.5161858!4d73.8459712!16s%2Fg%2F11t7drcv4g?entry=ttu"
reviews = scrape_google_reviews(url)
if reviews:
    for i, review in enumerate(reviews, 1):
        print(f"Review {i}: {review}")
else:
    print("Failed to scrape reviews.")

下面是输出的截图。

在这里输入图片描述

撰写回答