如何为通过txsocksx发出socks4请求的scrapy编写下载处理程序

2024-06-16 09:27:41 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在做一个大学项目,但是我需要让下面的代码与socks4而不是tor/socks5一起工作。我尝试将SOCKS5Agent修改为SOCKS4Agent,但随后收到并出错:

原始代码:https://stackoverflow.com/a/33944924/11219616

我的代码:

import scrapy.core.downloader.handlers.http11 as handler
from twisted.internet import reactor
from txsocksx.http import SOCKS4Agent
from twisted.internet.endpoints import TCP4ClientEndpoint
from scrapy.core.downloader.webclient import _parse


class TorScrapyAgent(handler.ScrapyAgent):
    _Agent = SOCKS4Agent

    def _get_agent(self, request, timeout):
        proxy = request.meta.get('proxy')

        if proxy:
            proxy_scheme, _, proxy_host, proxy_port, _ = _parse(proxy)

            if proxy_scheme == 'socks4':
                endpoint = TCP4ClientEndpoint(reactor, proxy_host, proxy_port)

                return self._Agent(reactor, proxyEndpoint=endpoint)

        return super(TorScrapyAgent, self)._get_agent(request, timeout)


class TorHTTPDownloadHandler(handler.HTTP11DownloadHandler):
    def download_request(self, request, spider):
        agent = TorScrapyAgent(contextFactory=self._contextFactory, pool=self._pool,
                               maxsize=getattr(spider, 'download_maxsize', self._default_maxsize),
                               warnsize=getattr(spider, 'download_warnsize', self._default_warnsize))

        return agent.download_request(request)

我得到一个错误:

Traceback (most recent call last):
File "C:\Python27\lib\site-packages\twisted\internet\defer.py", line 1416, in _inlineCallbacks
    result = result.throwExceptionIntoGenerator(g)
File "C:\Python27\lib\site-packages\twisted\python\failure.py", line 491, in throwExceptionIntoGenerator
    return g.throw(self.type, self.value, self.tb)
File "C:\Python27\lib\site-packages\scrapy\core\downloader\middleware.py", line 43, in process_request
    defer.returnValue((yield download_func(request=request,spider=spider)))
File "C:\Python27\lib\site-packages\ometa\protocol.py", line 53, in dataReceived
    self._parser.receive(data)
File "C:\Python27\lib\site-packages\ometa\tube.py", line 41, in receive
    status = self._interp.receive(data)
File "C:\Python27\lib\site-packages\ometa\interp.py", line 48, in receive
    for x in self.next:
File "C:\Python27\lib\site-packages\ometa\interp.py", line 177, in apply
    for x in self._apply(f, ruleName, argvals):
File "C:\Python27\lib\site-packages\ometa\interp.py", line 110, in _apply
    for x in rule():
File "C:\Python27\lib\site-packages\ometa\interp.py", line 256, in parse_Or
    for x in self._eval(subexpr):
File "C:\Python27\lib\site-packages\ometa\interp.py", line 241, in parse_And
    for x in self._eval(subexpr):
File "C:\Python27\lib\site-packages\ometa\interp.py", line 440, in parse_Action
    val = eval(expr.data, self.globals, self._localsStack[-1])
File "<string>", line 1, in <module>
File "C:\Python27\lib\site-packages\txsocksx\client.py", line 276, in serverResponse
    raise e.socks4ErrorMap.get(status)()
RequestRejectedOrFailed

Tags: inpyimportselfparserequestlibpackages