我正在尝试使用多个PhantomJS实例,并在线程之间使用驱动程序,而不是销毁它,并一次又一次地创建进程:
import sys
from datetime import datetime
import eventlet
from helpers import log, make_request
import settings
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import redis
import lxml.html as html
pool = eventlet.GreenPool(settings.max_threads)
pile = eventlet.GreenPile(pool)
redis = redis.StrictRedis(host='localhost', port=6379, db=0)
def begin_crawl_phantomJSV2():
url = redis.spop("queue")
if None == url:
return
driver = webdriver.PhantomJS();
process_urlv2(driver, url)
def process_urlv2(driver, url):
driver.get(url)
## some work
url = redis.spop("queue")
if None == url:
driver.close()
driver.quit()
return
pile.spawn(process_urlv2(driver, url))
if __name__ == '__main__':
timea = datetime.now()
log("Beginning crawl at {}".format(timea))
redis.sadd("queue", "http://linka.com")
redis.sadd("queue", "http://linkb.com")
[pile.spawn(begin_crawl_phantomJSV2) for _ in range(1)]
pool.waitall()
并得到以下错误:
Traceback (most recent call last):
File "C:\Python27\lib\site-packages\eventlet\hubs\hub.py", line 457, in fire_timers
timer()
File "C:\Python27\lib\site-packages\eventlet\hubs\timer.py", line 58, in __call__
cb(*args, **kw)
File "C:\Python27\lib\site-packages\eventlet\greenthread.py", line 214, in main
result = function(*args, **kwargs)
TypeError: 'NoneType' object is not callable
将方法更改为:
def begin_crawl_phantomJS():
driver = webdriver.PhantomJS();
url = redis.spop("queue")
if None == url:
return
process_url(driver, url)
driver.close()
driver.quit()
pile.spawn(begin_crawl_phantomJS)
def process_url(driver, url):
driver.get(url)
## some work
它的工作很好,但我正在浪费一些时间的幻影散文午餐,你知道我在做什么吗?你知道吗
必须改变
至
相关问题 更多 >
编程相关推荐