制作一个粗糙的spid的exe

2024-04-25 18:49:20 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个脏蜘蛛,我想转换成exe文件。尝试了py2exe和pyinstaller,但没有成功。关于如何使它成为单个可执行文件的任何建议。起始URL也需要更改,因此也应该考虑更改。你知道吗

# -*- coding: utf-8 -*-
import re

import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule


class YuSpider(CrawlSpider):
    custom_settings = {
        'CONCURRENT_REQUESTS': 1
    }
    name = 'yu'
    allowed_domains = ['farfeshplus.com',
                       'wintv.live']

    start_urls = ['https://www.farfeshplus.com/Video.asp?ZoneID=1517']

    # uncomment __init__ function if u want to use from command line and comment out start urls

    # def __init__(self, name=None, **kwargs):
    #     if 'start_urls' in kwargs:
    #         self.start_urls = kwargs.pop('start_urls').split(',')
    #     super(YuSpider, self).__init__(name, **kwargs)

    rules = (
        Rule(LinkExtractor(restrict_css='td td td tr:nth-child(3) a'), callback='parse_item', follow=True),

    )

    def parse_item(self, response):
        for url in response.xpath('//html'):
            response.meta['NAME'] = url.xpath('//h1/div/text()').extract()

            frames = url.xpath('//iframe[@width="750"]/@src').extract_first()

            yield scrapy.Request(url=frames, callback=self.parse_frame, meta=response.meta)

    def parse_frame(self, response):
        name = response.meta['NAME']
        URL = response.xpath('//body/script').extract_first()
        try:
            mp4 = re.compile(r"(?<=mp4:\s\[\')(.*)\'\]")
            link = mp4.findall(URL)[0]
        except IndexError:
            mp4 = re.compile(r"(?<=hls:\s')(.*)'")
            link = mp4.findall(URL)[0]

        yield {
            'NAME': name,
            'Link': link,
        }

Tags: nameimportselfreurlparseresponseurls