pyqt4 顺序启动停止应用程序时段错误
我正在尝试使用pyqt读取网页。我需要用不同的网址多次调用一个方法。目前我使用的代码类似于:http://blog.sitescraper.net/2010/06/scraping-javascript-webpages-in-python.html#comment-form
但是当我尝试时,出现了段错误(seg faults)。欢迎任何建议。
import sys
from time import clock
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from PyQt4.QtNetwork import *
class Render(QWebPage):
def __init__(self):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.networkAccessManager().finished.connect(self.handleEnd)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
self.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
def loadURL(self, url):
self.mainFrame().load(QUrl(url))
self.app.exec_()
def savePageImage (self, width, height, Imagefile):
pageSize = self.mainFrame().contentsSize();
if width == 0:
pageWidth = pageSize.width()
else:
pageWidth = width
if height == 0:
pageHeight = pageSize.height()
else:
pageHeight = height
self.setViewportSize(QSize(pageWidth, pageHeight))
Img = QImage(self.viewportSize(), QImage.Format_ARGB32)
painter = QPainter(Img)
self.mainFrame().render(painter)
painter.end()
Img.save(Imagefile)
def _loadFinished(self, result):
print "load finish"
self.frame = self.mainFrame()
self.returnVal = result
self.app.quit()
def handleEnd (self, reply):
# get first http code and disconnect
# could add filter to listen relevant responses
self.httpcode = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute)
self.networkAccessManager().finished.disconnect(self.handleEnd)
jsrurl = 'http://www.w3resource.com/javascript/document-alert-confirm/four.html'
badurl='something.or.other'
badhttp = 'http://eclecticself.com/test2.html'
testurl = 'http://www.nydailynews.com/entertainment/index.html'
testurl2 = 'http://www.palmbeachpost.com/'
testurl3 = 'http://www.nydailynews.com/news/politics/2011/08/03/2011-08-03_pat_buchanan_downplays_controversy_after_calling_president_obama_your_boy_to_rev.html'
url = testurl
start = clock()
r = Render()
r.loadURL(url)
html = r.frame.toHtml()
elapsed = clock() - start
print elapsed
if (r.returnVal == True):
if (r.httpcode.toInt()[0] != 404):
#print html.toUtf8()
start = clock()
r.savePageImage(1024, 0, "pageSnapshot.png")
elapsed = clock() - start
print elapsed
else:
print 'page not found'
else:
print 'badurl'
s = Render()
s.loadURL(jsrurl)
html = s.frame.toHtml()
elapsed = clock() - start
print elapsed
if (s.returnVal == True):
if (s.httpcode.toInt()[0] != 404):
print html.toUtf8()
start = clock()
s.savePageImage(1024, 0, "pageSnapshot.png")
elapsed = clock() - start
print elapsed
else:
print 'page not found'
else:
print 'badurl'
1 个回答
1
PyQt经常会忘记保持对对象的引用。这里有一些解决办法:
可以试试用PySide代替PyQt,这样做很简单,因为它们的接口几乎完全一样。我建议你先试试PySide,这可能会立即解决你的问题,或者至少让问题变得更可预测和可重复。
尽量保持对你正在使用的所有Qt对象的引用,等用完这些对象后再去掉这些引用。你也可以尝试在切换到下一个网页之前,明确地关闭它们或者导航到“about:blank”。
通常这样做会有帮助。如果没有,那你就需要像utdemir上面提到的那样,进一步缩小问题范围。调试通常没什么用,因为这类问题往往和时间有关。没有输出缓冲的日志记录通常能帮助你更接近问题的根源。
我理解你的感受,这种问题确实很难追踪!