使用python的PhantonJS和Green GreenPile崩溃

2024-04-25 10:08:32 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试使用多个PhantomJS实例,并在线程之间使用驱动程序,而不是销毁它,并一次又一次地创建进程:

import sys
from datetime import datetime
import eventlet
from helpers import log, make_request
import settings
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import redis
import lxml.html as html

pool = eventlet.GreenPool(settings.max_threads)
pile = eventlet.GreenPile(pool)
redis = redis.StrictRedis(host='localhost', port=6379, db=0)        

def begin_crawl_phantomJSV2():

    url = redis.spop("queue")
    if None == url:
        return

    driver = webdriver.PhantomJS();

    process_urlv2(driver, url)

def process_urlv2(driver, url):       

    driver.get(url)

    ## some work

    url = redis.spop("queue")
    if None == url:
        driver.close()
        driver.quit()
        return

    pile.spawn(process_urlv2(driver, url))


if __name__ == '__main__':    

    timea = datetime.now()
    log("Beginning crawl at {}".format(timea))
    redis.sadd("queue", "http://linka.com")
    redis.sadd("queue", "http://linkb.com")    

    [pile.spawn(begin_crawl_phantomJSV2) for _ in range(1)]
    pool.waitall()

并得到以下错误:

Traceback (most recent call last):
  File "C:\Python27\lib\site-packages\eventlet\hubs\hub.py", line 457, in fire_timers
    timer()
  File "C:\Python27\lib\site-packages\eventlet\hubs\timer.py", line 58, in __call__
    cb(*args, **kw)
  File "C:\Python27\lib\site-packages\eventlet\greenthread.py", line 214, in main
    result = function(*args, **kwargs)
TypeError: 'NoneType' object is not callable

将方法更改为:

def begin_crawl_phantomJS():

    driver = webdriver.PhantomJS();
    url = redis.spop("queue")
    if None == url:
        return

    process_url(driver, url)
    driver.close()
    driver.quit()
    pile.spawn(begin_crawl_phantomJS)

def process_url(driver, url):

    driver.get(url)
    ## some work

它的工作很好,但我正在浪费一些时间的幻影散文午餐,你知道我在做什么吗?你知道吗


Tags: infromimportredisurlifqueuedef