我的拼凑分页工作正常,但它只显示所有页面的第一页数据

2024-04-26 17:09:01 发布

您现在位置:Python中文网/ 问答频道 /正文

我是python新手,我为我的scrapy混合了许多示例代码,但它只显示了所有页面的第一页数据。有什么问题? 我的代码是:

import scrapy
from scrapy.item import Item, Field
class HotelAbbasiItem(Item):
    reviewer=Field()
    DateOfReview=Field()
    Nationality=Field()
    Contribution=Field()
    ReviewText=Field()
    Rating=Field()

class HotelabbasiSpider(scrapy.Spider):
    name = 'HotelAbbasi'
    allowed_domains = ['tripadvisor.com']
    start_urls = ['https://www.tripadvisor.com/Hotel_Review-g295423-d320767-Reviews-Abbasi_Hotel-Isfahan_Isfahan_Province.html']
    def parse(self,response):
        items=HotelAbbasiItem()
        all_div_parts=response.css('div.hotels-community-tab-common-Card__section--4r93H')
        for part in all_div_parts:
            reviewer=part.css('a.social-member-event-MemberEventOnObjectBlock__member--35-jC::text').extract()
            DateOfReview=part.css('span::text').extract()
            Nationality=part.css('span.small::text').extract()
            Contribution=part.css('span.social-member-MemberHeaderStats__bold--3z3qh::text').extract()
            ReviewText=part.css('q.location-review-review-list-parts-ExpandableReview__reviewText--gOmRC>span::text').extract()
            Rating=part.css('div.location-review-review-list-parts-RatingLine__bubbles--GcJvM>span::attr(class)').extract()

            items['reviewer']=reviewer
            items['DateOfReview']=DateOfReview
            items['Nationality']=Nationality
            items['Contribution']=Contribution
            items['ReviewText']=ReviewText
            items['Rating']=Rating

            yield items
        NextPage=response.css('div.is-centered>a.primary::attr(href)').extract_first()
        if NextPage:
            NextPage=response.urljoin(NextPage)
            yield scrapy.Request(url=NextPage,callback=self.parse)

Tags: textdivfieldextractitemscssscrapyspan