别工作了

2024-04-25 08:51:47 发布

您现在位置:Python中文网/ 问答频道 /正文

我分析了一个网站,我有一个蜘蛛:

# -*- coding: utf-8 -*-



from quoka.items import QuokaItem
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.loader.processors import TakeFirst
from scrapy.loader import XPathItemLoader
from scrapy.selector import HtmlXPathSelector

class QuokaLoader(XPathItemLoader):
    default_output_processor = TakeFirst()


class QuokaSpider(CrawlSpider):

    name = "quoka"
    allowed_domains = ["quoka.de"]
    start_urls = ["http://www.quoka.de/immobilien/bueros-gewerbeflaechen/"]

rules = (
         Rule(LinkExtractor(allow=('kleinanzeigen/cat_27_2710_ct_0_page_')), follow=True),
         Rule(LinkExtractor(allow=('immobilien/bueros-gewerbeflaechen/')), callback='parse_item'),
         )

def parse_item(self, response):
    hxs = HtmlXPathSelector(response)
    l = QuokaLoader(QuokaItem(), hxs)

    #
    l.add_xpath('date',response.xpath("/html/body/div[3]/div[2]/div[1]/main/div[8]/div/div[2]/strong/span/text()").extract())
    l.add_xpath('cost',response.xpath("/html/body/div[3]/div[2]/div[1]/main/div[8]/div/div[3]/div[2]/div[2]/text()").extract())
   # l.add_value('url', response.url)

    return l.load_item()

输入命令:sudo scrapy crawl quoka_spider.py

但我有个神秘的错误:

^{pr2}$

我用的是Ubuntu16.04,python3.5。安装scraby pip3安装scraby。我重新安装Scraby,但没有成功。 如何修复?在


Tags: fromimportdivaddresponseloaderitemrule

热门问题