我的简单HTTP套接字代理脚本有什么问题?

0 投票
1 回答
738 浏览
提问于 2025-04-15 14:08

我写了一个简单的Python脚本,用来实现代理功能。这个脚本运行得不错,不过如果请求的网页上有很多其他的HTTP请求,比如谷歌地图,页面加载就会变得很慢。

有没有什么建议可以帮我找出代码中的瓶颈,以及我该如何改进呢?

#!/usr/bin/python
import socket,select,re
from threading import Thread

class ProxyServer():
    def __init__(self, host, port):
        self.host=host
        self.port=port 
        self.sk1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    def startServer(self):
        self.sk1.bind((self.host,self.port))
        self.sk1.listen(256)
        print "proxy is ready for connections..."
        while(1):
            conn,clientAddr = self.sk1.accept()
           # print "new request coming in from " + str(clientAddr)
            handler = RequestHandler(conn)
            handler.start()


class RequestHandler(Thread):

    def __init__(self, sk1):
        Thread.__init__(self)
        self.clientSK = sk1
        self.buffer = ''
        self.header = {}





    def run(self):
        sk1 = self.clientSK 
        sk2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        while 1:
            self.buffer += sk1.recv(8192)
            if self.buffer.find('\n') != -1:
                break;

        self.header = self.processHeader(self.buffer)
        if len(self.header)>0: #header got processed
            hostString = self.header['Host']
            host=port=''
            if hostString.__contains__(':'): # with port number
                host,port = hostString.split(':')
            else:
                host,port = hostString,"80"
            sk2.connect((host,int(port)))

        else:
            sk1.send('bad request')
            sk1.close();
            return
        inputs=[sk1,sk2]
        sk2.send(self.buffer)
        #counter
        count = 0
        while 1:
            count+=1
            rl, wl, xl = select.select(inputs, [], [], 3)
            if xl:
                break
            if rl:
                for x in rl:
                    data = x.recv(8192)
                    if x is sk1:
                        output = sk2
                    else:
                        output = sk1
                    if data:
                        output.send(data)
                        count = 0
            if count == 20:
                 break


        sk1.close()
        sk2.close()



    def processHeader(self,header):
        header = header.replace("\r\n","\n")
        lines = header.split('\n')    
        result = {}
        uLine = lines[0] # url line
        if len(uLine) == 0: return result # if url line empty return empty dict
        vl = uLine.split(' ')
        result['method'] = vl[0]
        result['url'] = vl[1]
        result['protocol'] = vl[2]
        for line in lines[1: - 1]:
            if len(line)>3: # if line is not empty
                exp = re.compile(': ')
                nvp = exp.split(line, 1)
                if(len(nvp)>1):
                    result[nvp[0]] = nvp[1]
        return result




if __name__ == "__main__":
    HOST, PORT = "0.0.0.0", 8088
    proxy = ProxyServer(HOST,PORT)
    proxy.startServer()

1 个回答

0

我不太确定你遇到的速度问题是什么,但我发现了一些其他的小问题:

result['protocal'] = vl[2]

应该是

result['protocol'] = vl[2]

这段代码缩进得太深了一层:

sk2.connect((host,int(port)))

你可以使用这个装饰器来逐行分析你每个方法的性能。

撰写回答