奇怪的行为_

from bs4 import BeautifulSoup import csv import time import mechanize import cookielib # Browser br = mechanize.Browser() # Cookie Jar cj = cookielib.LWPCookieJar() br.set_cookiejar(cj) # Browser options br.set_handle_equiv(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) # Follows refresh 0 but not hangs on refresh > 0 br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) # Want debugging messages? #br.set_debug_http(True) #br.set_debug_redirects(True) #br.set_debug_responses(True) br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] print "Obtaining FII data from SEBI..." r = br.open('http://www.sebi.gov.in/sebiweb/investment/FIILatestSE.jsp?period=month') data = r.read() messages=[] messages.append("FII data obtained from SEBI") soup=BeautifulSoup(data) list=soup.find_all(rowspan="7")

1条回答

网友

1楼 · 发布于 2024-05-15 21:43:40

我通过添加xml参数解决了这个问题。你知道吗

soup = BeautifulSoup(data, 'xml')

为便于澄清，请确保已安装lxml。如果安装了任何其他解析器，它将覆盖lxml，并且解析可能不是预期的！你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章