我用Scrapy刮了一个网站。我的问题是分页使用的是javascript。所以我不能通过一个链接循环。
我试着用硒来解决这个问题,但是我有很多(referer: None)
或Unable to locate element: {"method":"xpath","selector":"//li[@class="btn-next"]/a"
的错误
我的蜘蛛代码:
import json
import scrapy
import re
import pkgutil
from scrapy.loader import ItemLoader
from lp_spider.items import AnnonceItem
from selenium import webdriver
class AnnonceSpider(scrapy.Spider):
name = 'lp_results'
def __init__(self, *args, **kwargs):
data_file = pkgutil.get_data(
"lp_spider", "json/input/db_scrap_url_lp_js_10000_reduced2.json")
self.data = json.loads(data_file)
self.driver = webdriver.Chrome()
def start_requests(self):
for item in self.data:
request = scrapy.Request(item['url_lp'], callback=self.parse)
request.meta['item'] = item
yield request
def parse(self, response):
self.driver.get(response.url)
while True:
next = self.driver.find_element_by_xpath(
"//li[@class='btn-next']/a")
try:
item = response.meta['item']
item['results'] = []
for caritem in response.css("li.li-result"):
data = AnnonceItem()
data["marque"] = caritem.css("span.brand::text").extract_first(
)
item['results'].append(data)
yield item
next.click()
except:
break
self.driver.close()
目前没有回答
相关问题 更多 >
编程相关推荐