爬虫传递的响应，缺少一个位置参数

# Scrapy automatically provides `response` to `parse()` when coming from `start_requests()` def parse(self, response): site = response.meta['site'] #same as "site = thesaurus" self.dispatcher[site](response) #same as "self.dispatcher['thesaurus'](response)

import scrapy class WordSpider(scrapy.Spider): def __init__(self, keyword = 'apprehensive'): self.k = keyword name = "words" # Utilities def make_csv(self, words): csv = '' for word in words: csv += word + ',' return csv def save_words(self, words, fp): with ofpen(fp, 'w') as f: f.seek(0) f.truncate() csv = self.make_csv(words) f.write(csv) # site specific parsers def thesaurus(self, response): filename = 'thesaurus.txt' words = '' print("in func self is defined as ", self) ul = response.css('.relevancy-block ul') for idx, u in enumerate(ul): if idx == 1: break; words = u.css('.text::text').extract() print("words is ", words) self.save_words(filename, words) def oxford(self): filename = 'oxford.txt' words = '' def collins(self): filename = 'collins.txt' words = '' # site/function mapping dispatcher = { 'thesaurus': thesaurus, 'oxford': oxford, 'collins': collins, } def parse(self, response): site = response.meta['site'] self.dispatcher[site](response) def start_requests(self): urls = { 'thesaurus': 'http://www.thesaurus.com/browse/%s?s=t' % self.k, #'collins': 'https://www.collinsdictionary.com/dictionary/english-thesaurus/%s' % self.k, #'oxford': 'https://en.oxforddictionaries.com/thesaurus/%s' % self.k, } for site, url in urls.items(): print(site, url) yield scrapy.Request(url, meta={'site': site}, callback=self.parse)

1条回答

网友

1楼 · 发布于 2024-05-29 04:43:33

你的代码周围有很多微小的错误。我特意清理了一下，遵循了一些常见的python/scrapy习语：）

import logging
import scrapy


# Utilities
# should probably use csv module here or `scrapy crawl -o` flag instead
def make_csv(words):
    csv = ''
    for word in words:
        csv += word + ','
    return csv


def save_words(words, fp):
    with open(fp, 'w') as f:
        f.seek(0)
        f.truncate()
        csv = make_csv(words)
        f.write(csv)


class WordSpider(scrapy.Spider):
    name = "words"

    def __init__(self, keyword='apprehensive', **kwargs):
        super(WordSpider, self).__init__(**kwargs)
        self.k = keyword

    def start_requests(self):
        urls = {
            'thesaurus': 'http://www.thesaurus.com/browse/%s?s=t' % self.k,
            # 'collins': 'https://www.collinsdictionary.com/dictionary/english-thesaurus/%s' % self.k,
            # 'oxford': 'https://en.oxforddictionaries.com/thesaurus/%s' % self.k,
        }

        for site, url in urls.items():
            yield scrapy.Request(url, meta={'site': site}, callback=self.parse)

    def parse(self, response):
        parser = getattr(self, response.meta['site'])  # retrieve method by name
        logging.info(f'parsing using: {parser}')
        parser(response)

    # site specific parsers
    def thesaurus(self, response):
        filename = 'thesaurus.txt'
        words = []
        print("in func self is defined as ", self)
        ul = response.css('.relevancy-block ul')
        for idx, u in enumerate(ul):
            if idx == 1:
                break
            words = u.css('.text::text').extract()
            print("words is ", words)
        save_words(filename, words)

    def oxford(self):
        filename = 'oxford.txt'
        words = ''

    def collins(self):
        filename = 'collins.txt'
        words = ''

相关问题更多 >

编程相关推荐

热门问题

热门文章