1:我的蜘蛛在csv-fi上一行一行地给我所有的结果

from scrapy import Spider from companies.items import CompaniesItem import re class companiesSpider(Spider): name = "companies" allowed_domains = ['http://startup.miami',] # Defining the list of pages to scrape start_urls = ["http://startup.miami/category/startups/page/" + str(1*i) + "/" for i in range(0, 10)] def parse(self, response): rows = response.xpath('//*[@id="datafetch"]') for row in rows: link = row.xpath('.//h2/a/@href').extract() name = row.xpath('.//header/h2/a/text()').extract() item = CompaniesItem() item['link'] = link item['name'] = name yield item

1条回答

网友

1楼 · 发布于 2024-04-25 01:28:08

您的解析方法未生成任何请求或项。在下面的部分中，我们将浏览这些页面并获取URL和名称。在parse\u detail中，可以向项添加其他数据。我们没有硬编码到10页，而是检查是否有下一页，如果是这样的话，再进行一次解析。你知道吗

from scrapy import Spider
from ..items import CompaniesItem
import scrapy


class CompaniesSpider(Spider):
    name = "companies"
    allowed_domains = ['startup.miami']
    # Defining the list of pages to scrape
    start_urls = ["http://startup.miami/category/startups/"]

    def parse(self, response):
        # get link & name and send item to parse_detail in meta
        rows = response.xpath('//*[@id="datafetch"]/article')
        for row in rows:
            link = row.xpath('.//@href').extract_first()
            name = row.xpath(
                './/*[@class="textoCoworking"]/text()').extract_first()
            item = CompaniesItem()
            item['link'] = link
            item['name'] = name.strip()
            yield scrapy.Request(link,
                                 callback=self.parse_detail,
                                 meta={'item': item})
        # get the next page
        next_page = response.xpath(
            '//*[@class="next page-numbers"]/@href').extract_first()
        if next_page:
            yield scrapy.Request(next_page, callback=self.parse)

    def parse_detail(self, response):
        item = response.meta['item']
        # add other details to the item here
        yield item

要将结果放入csv文件中，可以像这样启动scraper：scrapy crawl companies -o test_companies.csv

相关问题更多 >

编程相关推荐

热门问题

热门文章