Python 'requests'库 - 如何定义特定DNS?
在我的项目中,我使用 Python 的 requests
库 来处理所有的 HTTP 请求。
现在,我需要用特定的 DNS 来查询 HTTP 服务器——有两个环境,每个环境都有自己的 DNS,而且这些环境的更改是独立进行的。
所以,当代码运行时,它应该使用与环境相关的 DNS,而不是我网络连接中指定的 DNS。
有没有人尝试过用 python-requests 来实现这个?我只找到关于 urllib2 的解决方案:
https://stackoverflow.com/questions/4623090/python-set-custom-dns-server-for-urllib-requests
6 个回答
我找到一个很棒的库,叫做 requests-doh,可以用来解决这个问题。为了使用你自己的DNS(更具体来说,是通过HTTPS的DNS),你可以这样做:
from requests_doh import DNSOverHTTPSSession, add_dns_provider
#Change YOUR_DNS_NAME to whatever you want to call your DNS
#Change YOUR_DNS_OVER_HTTPS_ADDRESS to your DNS's address
add_dns_provider("YOUR_DNS_NAME", "YOUR_DNS_OVER_HTTPS_ADDRESS")
session = DNSOverHTTPSSession("YOUR_DNS_NAME")
#Change YOUR_URL to the URL you are trying to GET
r = session.get(YOUR_URL)
print(r.content)
我知道这是一个老帖子,但这里有一个我用Python 3写的解决方案,使用了tldextract和dnspython这两个库。我还留了一些注释掉的代码,方便你理解如何调试和设置额外的会话参数。
#!/usr/bin/env python3
import sys
from pprint import pprint as pp
import requests
import dns.resolver # NOTE: dnspython package
import tldextract
class CustomAdapter(requests.adapters.HTTPAdapter):
def __init__(self, nameservers):
self.nameservers = nameservers
super().__init__()
def resolve(self, host, nameservers, record_type):
dns_resolver = dns.resolver.Resolver()
dns_resolver.nameservers = nameservers
answers = dns_resolver.query(host, record_type)
for rdata in answers:
return str(rdata)
def get_connection(self, url, proxies=None):
ext = tldextract.extract(url)
fqdn = ".".join([ ext.subdomain, ext.domain, ext.suffix ])
print("FQDN: {}".format(fqdn))
a_record = self.resolve(fqdn, nameservers, 'A')
print("A record: {}".format(a_record))
resolved_url = url.replace(fqdn, a_record) # NOTE: Replace first occurrence only
print("Resolved URL: {}".format(resolved_url))
return super().get_connection(resolved_url, proxies=proxies)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: {} <url>".format(sys.argv[0]))
sys.exit(0)
url = sys.argv[1]
nameservers = [
'208.67.222.222', # NOTE: OpenDNS
'8.8.8.8' # NOTE: Google
]
session = requests.Session()
session.mount(url, CustomAdapter(nameservers))
parameters = {
# "headers": {'Content-Type': 'application/json'},
# "timeout" : 45,
# "stream" : True
# "proxies" : {
# "http": "http://your_http_proxy:8080/",
# "https": "http://your_https_proxy:8081/"
# },
# "auth": (name, password),
# ...
}
response = session.get(url, **parameters)
pp(response.__dict__)
这是控制台输出的内容:
$ ./run.py http://www.test.com
FQDN: www.test.com
A record: 69.172.200.235
Resolved URL: http://69.172.200.235/
{'_content': b'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3'
b'.org/TR/html4/strict.dtd">\n<html>\n<head>\n<meta http-equiv="C'
b'ontent-Type" content="text/html; charset=iso-8859-1">\n<meta '
b'http-equiv="Content-Script-Type" content="text/javascript">\n'
b'<script type="text/javascript">\nfunction getCookie(c_name) {'
b' // Local function for getting a cookie value\n if (docume'
b'nt.cookie.length > 0) {\n c_start = document.cookie.in'
b'dexOf(c_name + "=");\n if (c_start!=-1) {\n c_st'
b'art=c_start + c_name.length + 1;\n c_end=document.cook'
b'ie.indexOf(";", c_start);\n\n if (c_end==-1) \n '
b' c_end = document.cookie.length;\n\n return unescape('
b'document.cookie.substring(c_start,c_end));\n }\n }\n '
b' return "";\n}\nfunction setCookie(c_name, value, expiredays'
b') { // Local function for setting a value of a cookie\n va'
b'r exdate = new Date();\n exdate.setDate(exdate.getDate()+e'
b'xpiredays);\n document.cookie = c_name + "=" + escape(valu'
b'e) + ((expiredays==null) ? "" : ";expires=" + exdate.toGMTString'
b'()) + ";path=/";\n}\nfunction getHostUri() {\n var loc = doc'
b"ument.location;\n return loc.toString();\n}\nsetCookie('YPF8"
b"827340282Jdskjhfiw_928937459182JAX666', '171.68.244.56', 10)"
b';\ntry { \n location.reload(true); \n} catch (err1) { \n '
b' try { \n location.reload(); \n } catch (err2) { '
b' \n \tlocation.href = getHostUri(); \n } \n}\n</scrip'
b't>\n</head>\n<body>\n<noscript>This site requires JavaScript an'
b'd Cookies to be enabled. Please change your browser settings or '
b'upgrade your browser.</noscript>\n</body>\n</html>\n',
'_content_consumed': True,
'_next': None,
'connection': <requests.adapters.HTTPAdapter object at 0x109130e48>,
'cookies': <RequestsCookieJar[]>,
'elapsed': datetime.timedelta(microseconds=992676),
'encoding': 'ISO-8859-1',
'headers': {'Server': 'nginx/1.14.2', 'Date': 'Wed, 01 May 2019 18:01:58 GMT', 'Content-Type': 'text/html', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Keep-Alive': 'timeout=20', 'X-DIS-Request-ID': '2a5057a7c7b8a93dd700856c48fda74a', 'P3P': 'CP="NON DSP COR ADMa OUR IND UNI COM NAV INT"', 'Cache-Control': 'no-cache', 'Content-Encoding': 'gzip'},
'history': [<Response [302]>],
'raw': <urllib3.response.HTTPResponse object at 0x1095b90b8>,
'reason': 'OK',
'request': <PreparedRequest [GET]>,
'status_code': 200,
'url': 'https://www.test.com/'}
希望这对你有帮助。
一个定制的HTTP适配器可以解决这个问题。
别忘了设置 server_hostname
,这样才能启用 服务器名称指示(SNI)。
import requests
class HostHeaderSSLAdapter(requests.adapters.HTTPAdapter):
def resolve(self, hostname):
# a dummy DNS resolver
import random
ips = [
'104.16.89.20', # CloudFlare
'151.101.2.109', # Fastly
]
resolutions = {
'cdn.jsdelivr.net': random.choice(ips),
}
return resolutions.get(hostname)
def send(self, request, **kwargs):
from urllib.parse import urlparse
connection_pool_kwargs = self.poolmanager.connection_pool_kw
result = urlparse(request.url)
resolved_ip = self.resolve(result.hostname)
if result.scheme == 'https' and resolved_ip:
request.url = request.url.replace(
'https://' + result.hostname,
'https://' + resolved_ip,
)
connection_pool_kwargs['server_hostname'] = result.hostname # SNI
connection_pool_kwargs['assert_hostname'] = result.hostname
# overwrite the host header
request.headers['Host'] = result.hostname
else:
# theses headers from a previous request may have been left
connection_pool_kwargs.pop('server_hostname', None)
connection_pool_kwargs.pop('assert_hostname', None)
return super(HostHeaderSSLAdapter, self).send(request, **kwargs)
url = 'https://cdn.jsdelivr.net/npm/bootstrap/LICENSE'
session = requests.Session()
session.mount('https://', HostHeaderSSLAdapter())
r = session.get(url)
print(r.headers)
r = session.get(url)
print(r.headers)
你应该了解一下 TransportAdapters,包括它的源代码。虽然它的文档不是特别好,但它能让你直接使用很多在 RFC 2818 和 RFC 6125 中描述的功能。特别是,这些文档鼓励(或者说要求?)客户端代码支持特定应用的DNS,以便检查证书的CommonName和SubjectAltName。你在这些调用中需要的关键字参数是“assert_hostname”。下面是如何在requests库中设置它:
from requests import Session, HTTPError
from requests.adapters import HTTPAdapter, DEFAULT_POOLSIZE, DEFAULT_RETRIES, DEFAULT_POOLBLOCK
class DNSResolverHTTPSAdapter(HTTPAdapter):
def __init__(self, common_name, host, pool_connections=DEFAULT_POOLSIZE, pool_maxsize=DEFAULT_POOLSIZE,
max_retries=DEFAULT_RETRIES, pool_block=DEFAULT_POOLBLOCK):
self.__common_name = common_name
self.__host = host
super(DNSResolverHTTPSAdapter, self).__init__(pool_connections=pool_connections, pool_maxsize=pool_maxsize,
max_retries=max_retries, pool_block=pool_block)
def get_connection(self, url, proxies=None):
redirected_url = url.replace(self.__common_name.lower(), self.__host)
return super(DNSResolverHTTPSAdapter, self).get_connection(redirected_url, proxies=proxies)
def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs):
pool_kwargs['assert_hostname'] = self.__common_name
super(DNSResolverHTTPSAdapter, self).init_poolmanager(connections, maxsize, block=block, **pool_kwargs)
common_name = 'SuperSecretSarahServer'
host = '192.168.33.51'
port = 666
base_url = 'https://{}:{}/api/'.format(common_name, port)
my_session = Session()
my_session.mount(self.base_url.lower(), DNSResolverHTTPSAdapter(common_name, host))
user_name = 'sarah'
url = '{}users/{}'.format(self.base_url, user_name)
default_response_kwargs = {
'auth': (NAME, PASSWORD),
'headers': {'Content-Type': 'application/json'},
'verify': SSL_OPTIONS['ca_certs'],
'cert': (SSL_OPTIONS['certfile'], SSL_OPTIONS['keyfile'])
}
response = my_session.get(url, **default_response_kwargs)
我用 common_name
来表示证书上预期的名称,以及你的代码如何引用目标机器。我用 host
来表示外界能识别的名称,比如完全限定域名(FQDN)、IP地址、DNS条目等等。当然,SSL_OPTIONS字典(在我的例子中)必须列出你机器上合适的证书和密钥文件名。(另外,NAME和PASSWORD也应该是正确的字符串。)
requests
库使用了urllib3
,而urllib3
又最终使用了httplib.HTTPConnection
,所以之前在一个已经删除的帖子中提到的技巧(链接到另一个帖子,讨论如何让urllib2
使用自定义的DNS)在某种程度上仍然适用。
urllib3.connection
模块在同名的基础上继承了httplib.HTTPConnection
,并将.connect()
方法替换成了一个调用self._new_conn
的方法。接着,这个方法又委托给了urllib3.util.connection.create_connection()
。也许最简单的方式是对那个函数进行修改:
from urllib3.util import connection
_orig_create_connection = connection.create_connection
def patched_create_connection(address, *args, **kwargs):
"""Wrap urllib3's create_connection to resolve the name elsewhere"""
# resolve hostname to an ip address; use your own
# resolver here, as otherwise the system resolver will be used.
host, port = address
hostname = your_dns_resolver(host)
return _orig_create_connection((hostname, port), *args, **kwargs)
connection.create_connection = patched_create_connection
你可以提供自己的代码来将地址中的host
部分解析成IP地址,而不是依赖于connection.create_connection()
(这个方法又调用了socket.create_connection()
)来为你解析主机名。
像所有的猴子补丁一样,要小心代码在后续版本中是否有重大变化;这里的补丁是针对urllib3
版本1.21.1创建的,但应该适用于早到1.9的版本。
请注意,这个回答已经重写,以适应更新的urllib3
版本,这些版本提供了一个更方便的补丁位置。查看编辑历史可以找到适用于版本< 1.9的旧方法,这个方法是对捆绑的urllib3
版本的补丁,而不是独立安装。