写入文件时出错

import scrapy from scrapy.spiders import CrawlSpider, Rule from scrapy.linkextractors import LinkExtractor from scrapy.selector import HtmlXPathSelector from scrapy.http import Request from Erowid.items import ErowidItem import os class ExperiencesSpider(CrawlSpider): name = "experiences" allowed_domains = ["www.erowid.org"] start_urls = ['https://www.erowid.org/experiences/exp_list.shtml'] rules = [Rule(LinkExtractor(allow =('subs/exp_[a-zA-Z]+.shtml')),callback='parse_item', follow = True) Rule(LinkExtractor(allow =('subs/exp_[a-zA-Z]+.shtml')), follow = True) ] def parse_item(self, response): filename = str(response.url)[44:-6] selectors = response.css('table') if not os.path.exists('drugs-%s' % (filename)): ##Make the file os.makedirs('drugs-%s' % (filename)) list_of_experience = selectors.xpath('//table[@class="exp-cat-table"]/tr/td/a/@href').extract() for item in list_of_experience: request_url = str(item) Request(url="http://www.erowid.org" + request_url, callback = 'request_experience') def request_experience(self, response): selectors = response.css('div') for selector in selectors: experience = ErowidItem() experience['Author'] = selector.xpath('//div[@class="author"]/a/text()').extract() experience['Title'] = selector.xpath('//div[@class="title"]/text()').extract() experience['Substance'] = selector.xpath('//div[@class="substance"]/text()').extract() experience['Text'] = selector.xpath("//div[@class = 'report-text-surround']/text()").extract() title = str(experience['Substance']) + " "+ str(experience['Title']) with open(os.path.join('drugs-%s' % (filename), title),"a") as fid: fid.write(str(experience) + "\n")

1条回答

网友

1楼 · 发布于 2024-04-26 07:07:58

根据documentation of ^{}-

callback (callable) – the function that will be called with the response of this request (once its downloaded) as its first parameter.

回调应该是可调用函数，而不是它的字符串，在尝试将其作为Request对象的回调发送之前，还需要定义函数。你知道吗

示例-

import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.selector import HtmlXPathSelector
from scrapy.http import Request
from Erowid.items import ErowidItem
import os

class ExperiencesSpider(CrawlSpider):
    name = "experiences"
    allowed_domains = ["www.erowid.org"]
    start_urls = ['https://www.erowid.org/experiences/exp_list.shtml']

    rules = [Rule(LinkExtractor(allow =('subs/exp_[a-zA-Z]+.shtml')),callback='parse_item', follow = True)

    Rule(LinkExtractor(allow =('subs/exp_[a-zA-Z]+.shtml')), follow = True)    

    ]

    def request_experience(self, response):
        selectors = response.css('div')
        for selector in selectors:
            experience = ErowidItem()
            experience['Author'] = selector.xpath('//div[@class="author"]/a/text()').extract()
            experience['Title'] = selector.xpath('//div[@class="title"]/text()').extract()
            experience['Substance'] = selector.xpath('//div[@class="substance"]/text()').extract()
            experience['Text'] = selector.xpath("//div[@class = 'report-text-surround']/text()").extract()

            title = str(experience['Substance']) + " "+ str(experience['Title'])
            with open(os.path.join('drugs-%s' % (self.filename), title),"a") as fid:
                fid.write(str(experience) + "\n")

    def parse_item(self, response):
        self.filename = str(response.url)[44:-6]
        selectors = response.css('table')
        if not os.path.exists('drugs-%s' % (self.filename)): ##Make the file
            os.makedirs('drugs-%s' % (self.filename))
        list_of_experience = selectors.xpath('//table[@class="exp-cat-table"]/tr/td/a/@href').extract()

        for item in list_of_experience:
            request_url = str(item)
            Request(url="http://www.erowid.org" + request_url, callback = self.request_experience)

相关问题更多 >

编程相关推荐

热门问题

热门文章