如何使用Selenium Python从HTML收集特定数据

import time import requests from selenium import webdriver from bs4 import BeautifulSoup from keyboard import press_and_release def weather_forecast2(): print('Hello, I can search up the weather for you.') while True: inp = input('Where shall I search? Enter a place :').capitalize() print('Alright, checking the weather in ' + inp + '...') URL = 'https://www.yr.no/nb' "Search for a place" driver = webdriver.Edge() # Open Microsoft Edge driver.get(URL) # Goes to the HTML-page of the given URL element = driver.find_element_by_id("søk") # Find the search input box element.send_keys(inp) # Enter input press_and_release('enter') # Click enter cURL = driver.current_url # Current URL "Find data" driver.get(cURL) # Goes to the HTML-page that appeared after clicking button r = requests.get(cURL) # Get request for contents of the page print(r.content) # Outputs HTML code for the page soup = BeautifulSoup(r.content, 'html5lib') # Parse the data with BeautifulSoup(HTML-string, HTML-parser)

1条回答

网友

1楼 · 发布于 2024-06-16 14:18:12

假设您可以正确地检索url，那么您可以使用该url作为referer头，以及该url中的位置id，来调用实际返回预测的API。我没有你对press_and_release的定义，所以测试代码时没有这个定义

import requests, re
from selenium import webdriver

# url = 'https://www.yr.no/nb/v%C3%A6rvarsel/daglig-tabell/2-6058560/Canada/Ontario/London'

def get_forecast(str:url)->object:
    
    location_id = re.search(r'daglig-tabell/(.*?)/', url).group(1)
    headers = {'user-agent': 'Mozilla/5.0', 'referer': url}
    forecasts = requests.get(f'https://www.yr.no/api/v0/locations/{location_id}/forecast', headers=headers).json()
    return forecasts 


def get_forecast_url():
    
    print('Hello, I can search up the weather for you.')

    driver = webdriver.Chrome()  # Open Microsoft Edge. (I changed to Chrome)

    while True:

        inp = input('Where shall I search? Enter a place :').capitalize()
        print('Alright, checking the weather in ' + inp + '...')

        URL = 'https://www.yr.no/nb'

        "Search for a place"

        driver.get(URL)  # Goes to the HTML-page of the given URL
        driver.find_element_by_id("page-header__search-button").click() #open search 
        # Find the search input box
        element = driver.find_element_by_id("page-header__search-input")
        element.send_keys(inp)  # Enter input
        press_and_release('enter')  # Click enter

        cURL = driver.current_url  # Current URL
        print(get_forecast(cURL))

    driver.quit()

相关问题更多 >

编程相关推荐

热门问题

热门文章