在窗体中选择项并提取出现的表

import scrapy from scrapy.spiders import Spider product_names = ['Host Intrusion Prevention','McAfee Agent','Active Response','Database Security'] class McAfee_Spider(scrapy.Spider): name = 'McAfee' allowed_domains = 'mcafee.com' start_urls = 'https://www.mcafee.com/enterprise/en-us/support/product-eol.html' for product in product_names: def parse(self, response): scrapy.FormRequest.from_response( response, formxpath="//form[@id='selectProductArea']", formdata={ "SelectProductArea" : product }, clickdata = { "type": "select" }, ) def parse_table(self, response): product = response.xpath("//table[@class="general eoldynamicContent"]//tbody//tr//td[1]").extract() version = response.xpath("//table[@class="general eoldynamicContent"]//tbody//tr//td[2]").extract() eos_notif = response.xpath("//table[@class="general eoldynamicContent"]//tbody//tr//td[3]").extract() eol_date = response.xpath("//table[@class="general eoldynamicContent"]//tbody//tr//td[4]").extract()

import pandas as pd results = {'product':['McAfee Host Intrusion Prevention', 'McAfee Host Prevention for Linux'], 'version':['8.0','8.0 Patch 6'], 'eos_notif':['',''], 'eol_date':['','']} pd.DataFrame(results)

1条回答

网友

1楼 · 发布于 2024-04-25 05:15:54

你找错地方了。在您选择列表中的任何内容后，上述网站不会发送任何FormRequest。相反，它从https://www.mcafee.com/enterprise/admin/support/eol.xml加载所有内容，只显示一段数据：

import scrapy


class McAfee_Spider(scrapy.Spider):
    name = 'McAfee'
    allowed_domains = 'mcafee.com'
    start_urls = ['https://www.mcafee.com/enterprise/admin/support/eol.xml']

    def parse(self, response):
        for product in response.xpath('//product'):
            product_title = product.xpath('./@title').get()
            for element in product.xpath('./element'):
                element_title = element.xpath('./@title').get()
                element_version = element.xpath('./@version').get()
                element_eos = element.xpath('./@eos').get()
                element_eos_notification = element.xpath('./@eos_notification').get()
                element_comment = element.xpath('./comment/text()').get()


                yield {
                    'product_title': product_title,
                    'element_title': element_title,
                    'element_version': element_version,
                    'element_eos': element_eos,
                    'element_eos_notification': element_eos_notification,
                    'element_commment': element_comment,
                }

相关问题更多 >

编程相关推荐

热门问题

热门文章