data = [{"data":
"0\\x1e\\x82*.extractdomain.com\\x82\\x0ctest.extractdomain.com",
"name": "subjectAltName"
}]
text = ''.join([i if ord(i) < 128 else ' ' for i in data["data"])
import re
def extract_url(url):
chunks = url.split(".")
subdomain, domain = ".".join(chunks[:-2]), ".".join((chunks[-2], chunks[-1]))
return (subdomain, domain)
# splits your text by .com
sites = re.split("(?<=\.com)", data[0]["data"])
# replaces all non-ascii strings (if they're more than 1 char in length)
extracted_sites = [re.sub(r'\\x([0-9a-f]){2}','', site) for site in sites if site]
# replaces all non-ascii strings (if they're single-character)
extracted_sites = ["".join([c for c in site if ord(c) < 128]) for site in sites if site]
print([extract_url(url) for url in extracted_sites])
注意:(仔细检查\x1e的长度是一个字符还是四个字符)
输出(子域、域):
相关问题 更多 >
编程相关推荐