如何停止在scrapy中重复循环?

2024-03-28 10:46:44 发布

您现在位置:Python中文网/ 问答频道 /正文

我在这里刮一页,但是每当我执行这个代码时,about_page重复3次。如何结束这种重复。我只想要它给我一次about_page。你知道吗

import scrapy
class DmozSpiderSpider(scrapy.Spider):
    name = 'Dmoz'
    start_urls = ['http://dmoz-odp.org/']
    about_page = 'http://dmoz-odp.org/docs/en/about.html'

    def parse(self, response):    
        items = {'About_page': self.about_page}
        # save and call request to another page
        dct = [(self.about_page, self.parse_about)]
        for page, callback in dct:
            yield response.follow(page, callback, meta={'items': items})

    def find_items(self, response, names, finder):
        items = response.meta['items']
        for name, find in zip(names.values(), finder.values()):
            items[name] = response.css(find).extract()
            yield items

    def parse_about(self, response):
        names = {'name1': 'Headings',
             'name2': 'Paragraphs',
             'name3': '3 Projects',
            }

        finder = {'find1': 'h2::text , #mainContent h1::text',
              'find2': 'p::text',
              'find3': 'li~ li+ li b a::text , li:nth-child(1) b a::text',
              }
        yield from self.find_items(response, names, finder)

Tags: textnameselffindernamesparseresponsedef
1条回答
网友
1楼 · 发布于 2024-03-28 10:46:44

固定缩进:

def find_items(self, response, names, finder):
    items = response.meta['items']
    for name, find in zip(names.values(), finder.values()):
        items[name] = response.css(find).extract()
    yield items  # <- yield after loop

相关问题 更多 >