Python/Selenium不使用JSON d的web抓取JS表

from time import sleep # to wait for stuff to finish. from selenium import webdriver # to interact with our site. from selenium.common.exceptions import WebDriverException # url is wrong from webdriver_manager import chrome # to install and find the chromedriver executable BASE_URL = 'https://www2.sgx.com/securities/annual-reports-financial-statements' driver = webdriver.Chrome(executable_path=chrome.ChromeDriverManager().install()) driver.maximize_window() try: driver.get(BASE_URL) except WebDriverException: print("Url given is not working, please try again.") exit() # clicking away pop-up sleep(5) header = driver.find_element_by_id("website-header") driver.execute_script("arguments[0].click();", header) # clicking the clear all button, to clear the calendar sleep(2) clear_field = driver.find_element_by_xpath('/html/body/div[1]/main/div[1]/article/template-base/div/div/sgx-widgets-wrapper/widget-filter-listing/widget-filter-listing-financial-reports/section[2]/div[1]/sgx-filter/sgx-form/div[2]/span[2]') clear_field.click() # clicking to select only Annual Reports sleep(2) driver.find_element_by_xpath("/html/body/div[1]/main/div[1]/article/template-base/div/div/sgx-widgets-wrapper/widget-filter-listing/widget-filter-listing-financial-reports/section[2]/div[1]/sgx-filter/sgx-form/div[1]/div[1]/sgx-input-select/label/span[2]/input").click() sleep(1) driver.find_element_by_xpath("//span[text()='Annual Report']").click() rows = driver.find_elements_by_class_name("sgx-table-cell") print(len(rows))

1条回答

网友

1楼 · 发布于 2024-04-19 20:22:12

我知道你要求不要使用API。我认为使用它是一种更干净的方法。你知道吗

（输出3709个文档）

import requests

URL_TEMPLATE = 'https://api.sgx.com/financialreports/v1.0?pagestart={}&pagesize=250&params=id%2CcompanyName%2CdocumentDate%2CsecurityName%2Ctitle%2Curl'

NUM_OF_PAGES = 16
data = []
for page_num in range(1, NUM_OF_PAGES):
    r = requests.get(URL_TEMPLATE.format(page_num))
    if r.status_code == 200:
        data.extend(r.json()['data'])
print('we have {} documents'.format(len(data)))
for doc in data:
    print(doc)

相关问题更多 >

编程相关推荐

热门问题

热门文章