使用无限滚动Python添加刮取条件时出现问题

last_height = driver.execute_script("return document.body.scrollHeight") while True: # Scroll down to bottom driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # Wait to load page time.sleep(randint(1,10)) for a in page.find_all('a', href=True): <--Condition print("Found the URL:", a['href']) <----Condition # Calculate new scroll height and compare with last scroll height new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height

1条回答

网友

1楼 · 发布于 2024-06-16 10:41:25

def scroll(driver, timeout):
    scroll_pause_time = timeout

    # Get scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait to load page
        time.sleep(scroll_pause_time)

        # Calculate new scroll height and compare with last scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            # If heights are the same it will exit the function
            break
        last_height = new_height

scroll(driver, randint(2,5))
page = BeautifulSoup(driver.page_source, 'lxml')
count = 0
for a in page.find_all('a', href=True):
    count+=1
    print(count)
    print("Found the URL:", a['href'])

相关问题更多 >

编程相关推荐

热门问题

热门文章