我在使用XMLFeedSpider的RSS服务(googletrends,url=https://trends.google.cl/trends/trendingsearches/daily/rss?geo=CL)上使用scrapy
,但是我在一些标记上遇到了一些问题,
尤其是ht:
。我在ht
标记上得到了这个错误
class RssGoogleTrends(XMLFeedSpider):
name = 'Google'
allowed_domain = ['https://trends.google.com']
start_urls = ['https://trends.google.com/trends/trendingsearches/daily/rss?geo=CL']
itertag = 'item'
def parse_node(self, response, node):
self.logger.info('Hi, this is a <%s> node!: %s', self.itertag, ''.join(node.getall()))
item = {}
item['id'] = node.xpath('title/text()',).extract_first()
item['link'] = node.xpath('link/text()',).extract_first() #define XPath for link
item['description'] = node.xpath('description/text()',).extract_first() #define XPath for description
item['pubDate'] = node.xpath('pubDate/text()',).extract_first()
item['approx_traffic'] = node.xpath('ht:approx_traffic/text()',).extract_first()
print(item)
return item
谢谢你的时间
目前没有回答
相关问题 更多 >
编程相关推荐