PyQT4 Javascript 警告
我有一个Python脚本,用来从一个网站(www.nowgoal.com)抓取数据。因为这个网页里有JavaScript代码,所以我使用PyQt4来渲染这个页面,然后把它转换成HTML,最后提取需要的数据。一切都运行得很好,但最近他们添加了一个JavaScript的警告信息,这让页面无法正确渲染。
查看源代码时,我发现页面底部有一个处理警告信息的JavaScript函数:
`<script type ="text/javascript" >
if(getCookie("enurl_bak")==null)
{
writeCookie("enurl_bak", "1");
if(confirm('Nowgoal.net is our spare link\n\n Please add to your favorites')) {try{window.external.addFavorite('http://www.nowgoal.net','LiveScore - NowGoal.com');}catch(e) {alert('Sorry, fail to add favorits. Your browser can\'t finish this operation. Please use Ctrl+D to add.');}}
}
</script>`
现在看来,只要设置一个cookie(名字是“enurl_bak”,值是“null”),就可以跳过这个警告了。问题是我不知道该怎么做。我到处找了找,但没有找到用PyQt4设置cookie的实际例子。
这是我用来渲染网页的代码:
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from PyQt4 import QtNetwork
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().page().setNetworkAccessManager(networkAccessManager)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
url = 'http://www.nowgoal.com'
r = Render(url)
html = r.frame.toHtml()
我还尝试过用setHtml(来自urllib2)替代load(QUrl)的方法,想直接去掉JavaScript警告函数,但没有成功。
2 个回答
1
下面的测试脚本成功地设置和读取了cookie,这样就不会显示警告信息了。不过,这个方法只在test.html页面上有效;出于某种未知的原因(可能是webkit的bug?),在www.nowgoal.com网站上就不行。
from PyQt4 import QtCore, QtGui, QtWebKit, QtNetwork
class WebPage(QtWebKit.QWebPage):
def __init__(self):
QtWebKit.QWebPage.__init__(self)
self.cookies = QtNetwork.QNetworkCookieJar(self)
self.cookies.setAllCookies(
[QtNetwork.QNetworkCookie('enurl_bak', '1')])
self.networkAccessManager().setCookieJar(self.cookies)
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
def start(self, url):
self.mainFrame().load(QtCore.QUrl(url))
def handleLoadFinished(self):
print('handleLoadFinished')
QtGui.qApp.quit()
if __name__ == '__main__':
import sys
app = QtGui.QApplication(sys.argv)
window = WebPage()
window.start('test.html')
sys.exit(app.exec_())
test.html:
<script type="text/javascript">
// from www.nowgoal.com (public.js)
function getCookie(name){
var cname = name + "=";
var dc = document.cookie;
if (dc.length > 0){
begin = dc.indexOf(cname);
if (begin != -1){
begin += cname.length;
end = dc.indexOf(";", begin);
if (end == -1) end = dc.length;
return dc.substring(begin, end);
}
}
return null;
}
if (getCookie('enurl_bak') == null) {
alert('"enurl_bak" value is null');
}
</script>
更新:
看起来并没有什么webkit的bug:我只需要设置一下域名,正如SkY3d的回答所说的那样。
1
没错!!
完成了 :)
from PyQt4.QtNetwork import QNetworkCookie, QNetworkCookieJar
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
import sys
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.cookie = QNetworkCookie()
self.cookie.setDomain('.nowgoal.com')
self.cookie.setName('enurl_bak')
self.cookiejar = QNetworkCookieJar()
self.cookiejar.setAllCookies([self.cookie])
self.networkAccessManager().setCookieJar(self.cookiejar)
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
print"loadfinished"
self.app.quit()
url = 'http://www.nowgoal.com'
Render(url)
再次感谢 ekhumoro 帮我指明了方向!