无法在python web中接收证书

2024-05-08 11:43:39 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在开发一个WebCrawler,但是我在从连接到的服务器读取证书时遇到了很多问题,我只需要读取证书并在屏幕上打印它,但是使用getpeercert()函数,证书总是{},我尝试使用get_server_certificate(),它返回一个错误,告诉我值太多,无法解压缩。 我知道在线程中也有一些错误,但我稍后会修复它。 已经提前谢谢了。

问题出在“acesso”函数上

#coding: utf8
import socket
import sys
import re
import ssl
import pprint
from threading import Thread
from urlparse import urlparse


threads = []
vetorLinks = []
linksVisitados = []
nThreads = 4 #numero de threads

def acesso(url):
    print "Acessando: " + url
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    ssl_socket = ssl.wrap_socket(s)

    urlParsed = urlparse(url)

    try:
        ssl_socket.connect((urlParsed[1], 443))
    except  socket.timeout:
        print "Time out"

    ssl.get_server_certificate(urlParsed[1])

    pprint.pprint(ssl_socket.getpeercert())

    request = "GET " + urlParsed[2] + " HTTP/1.1\r\nUser-Agent: Python\r\nHost: " +     urlParsed[1] + "\r\nConnection: persistent\r\n\r\n"

    ssl_socket.sendall(request)
    dados = ''
    try:
        while(True):
            buff = ssl_socket.recv(4096)
            if not len(buff):
                break
            dados += buff
    except socket.timeout:
        print "Time Out"

    ssl_socket.close()
    return dados


def navega(url, profundidade, vetorLinks, visitados):
    if not url in visitados and (url.startswith("http://") or     url.startswith("https://")):
        visitados.append(url) #salva a url no vetor dos visitados

        html = acesso(url) #html da pagina lida

        urls = re.findall(r"""<a href=[\'"]?([^\'" >]+)""", html)

        vetorLinks.extend(urls)     
        if profundidade==0:
            return
        else:
            try:
                for i in range(0, len(vetorLinks)):
                    for j in range(0, nThreads):
                        t = Thread(target = navega, args = (vetorLinks[i], profundidade-    1, vetorLinks, linksVisitados))
                        t.start()
                        threads.append(t)
                        i += 1
                    for t in threads:
                        t.join()
            except RuntimeError:
                print "RuntimeError nas Threads."

navega(sys.argv[2], int(sys.argv[1]), vetorLinks, linksVisitados)

Tags: inimporturlsslsyssocket证书pprint