网页抓取因等待加载评论而超时
我想写一个Python脚本,能够读取谷歌地图上某个特定地点或商店的所有评论。我尝试了多次修改代码,但总是出现超时异常。下面是我的代码:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
def scrape_google_reviews(url):
# Set up Chrome WebDriver
chrome_options = Options()
chrome_options.add_argument("--headless") # Run in headless mode, i.e., without opening browser window
chromedriver_path = 'C:/Users/Downloads/chromedriver-win64/chromedriver.exe' # Specify path to chromedriver executable
service = Service(chromedriver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
# Load the Google Maps URL
driver.get(url)
# Wait for the reviews to load
try:
WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CLASS_NAME, "ODSEW-ShBeI-content")))
except TimeoutException as e:
print("Timeout occurred while waiting for reviews to load:", e)
driver.quit()
return None
except Exception as e:
print("An error occurred while waiting for reviews to load:", e)
driver.quit()
return None
# Extract review elements
review_elements = driver.find_elements(By.CLASS_NAME, "ODSEW-ShBeI-content")
# Extract review details
reviews = []
for review_element in review_elements:
review_text = review_element.find_element(By.CSS_SELECTOR, ".ODSEW-ShBeI-title").text
reviews.append(review_text)
# Close the WebDriver
driver.quit()
return reviews
# Example usage
url = "https://www.google.com/maps/place/FASTECH+SOLUTIONS/@18.5165309,73.8457059,18.29z/data=!4m6!3m5!1s0x3bc2c160b5caf2dd:0x6d49235d88bd5d25!8m2!3d18.5161858!4d73.8459712!16s%2Fg%2F11t7drcv4g?entry=ttu"
reviews = scrape_google_reviews(url)
if reviews:
for i, review in enumerate(reviews, 1):
print(f"Review {i}: {review}")
else:
print("Failed to scrape reviews.")
我不太确定哪里出了问题,我参考了好几个博客,包括一个来自geeksforgeeks的,但似乎这些信息都过时了。我的Chrome版本是122.0.6261.113,我从这里下载了chromedriver.exe:https://storage.googleapis.com/chrome-for-testing-public/122.0.6261.128/win64/chromedriver-win64.zip
1 个回答
1
这里是获取评论的代码。
我没有使用动态创建的类来获取评论,而是用了一个看起来比较固定的类 MyEned
。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
def scrape_google_reviews(url):
# Set up Chrome WebDriver
chrome_options = Options()
chrome_options.add_argument("--headless") # Run in headless mode, i.e., without opening browser window
chromedriver_path = 'C:/Users/Downloads/chromedriver-win64/chromedriver.exe' # Specify path to chromedriver executable
service = Service(chromedriver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
# Load the Google Maps URL
driver.get(url)
# Wait for the reviews to load
try:
WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CLASS_NAME, "MyEned")))
except TimeoutException as e:
print("Timeout occurred while waiting for reviews to load:", e)
driver.quit()
return None
except Exception as e:
print("An error occurred while waiting for reviews to load:", e)
driver.quit()
return None
# Extract review elements
review_elements = driver.find_elements(By.CLASS_NAME, "MyEned")
print("review_elemtns", review_elements)
# Extract review details
reviews = []
for review_element in review_elements:
review_text = review_element.get_attribute("textContent")
reviews.append(review_text)
# Close the WebDriver
driver.quit()
return reviews
# Example usage
url = "https://www.google.com/maps/place/FASTECH+SOLUTIONS/@18.5165309,73.8457059,18.29z/data=!4m6!3m5!1s0x3bc2c160b5caf2dd:0x6d49235d88bd5d25!8m2!3d18.5161858!4d73.8459712!16s%2Fg%2F11t7drcv4g?entry=ttu"
reviews = scrape_google_reviews(url)
if reviews:
for i, review in enumerate(reviews, 1):
print(f"Review {i}: {review}")
else:
print("Failed to scrape reviews.")
下面是输出的截图。
