<p>这将模拟单击下一页,将代码放在<code>scrapy</code>蜘蛛<a href="https://docs.scrapy.org/en/latest/intro/overview.html" rel="nofollow noreferrer">scrapy docs</a>中</p>
<pre><code># -*- coding: utf-8 -*-
import scrapy
from scrapy.utils.response import open_in_browser
import pandas as pd
class TestSpider(scrapy.Spider):
name = 'test'
allowed_domains = ['heavens-above.com']
def start_requests(self):
url = "https://heavens-above.com/StarlinkLaunchPasses.aspx?lat=45.61&lng=15.312&loc=Somewhere&alt=0&tz=CET"
yield scrapy.Request(url,callback=self.parse)
def parse(self, response):
#open_in_browser(response) see the response
table=response.xpath('//table[@class="standardTable"]').extract_first()
df = pd.read_html(table)
#do what you want the df
#going to next page
to_post = response.urljoin(response.xpath('//form[@name="aspnetForm"]/@action').extract_first())
data = {
'__EVENTTARGET': '',
'__EVENTARGUMENT': '',
'__LASTFOCUS': '',
'__VIEWSTATE':response.xpath('//*[@id="__VIEWSTATE"]/@value').extract_first(),
'__VIEWSTATEGENERATOR':response.xpath('//*[@id="__VIEWSTATEGENERATOR"]/@value').extract_first(),
'utcOffset':response.xpath('//*[@id="utcOffset"]/@value').extract_first(),
'ctl00$ddlCulture': 'en',
'ctl00$cph1$hidStartUtc':response.xpath('//*[@id="ctl00_cph1_hidStartUtc"]/@value').extract_first(),
'ctl00$cph1$ddlLaunches':response.xpath('//*[@id="ctl00$cph1$ddlLaunches"]/@value').extract_first(),
'ctl00$cph1$ddlLaunches':response.xpath('//option[@selected="selected"]/@value').extract()[-1],
'ctl00$cph1$btnNext': '>',
}
yield scrapy.http.FormRequest(to_post,callback=self.parse,formdata=data,)
</code></pre>