下面的代码不断地在print (aTweet + '~' + timeSource[x] + '~' + keyWord[i])
行上给出错误IndexError: list index out of range
。这和keyword[i]
术语有关吗?我理解Index out of range
通常意味着提供一个列表元素不存在的索引。这是否意味着错误实际上可能存在于本节中:
if ( len(splitSource) > 20 ):
max_range = 19
else:
max_range = len(splitSource)
参考代码:
import re
from re import sub
import time
import cookielib
from cookielib import CookieJar
import urllib2
from urllib2 import urlopen
import difflib
import sys
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
keyWord = ["Scotch"]
def main():
i=0
while i<len(keyWord):
startingLink = 'https://twitter.com/search/realtime?q='+keyWord[i]
tUrl = startingLink+'&src=hash'
oldTwit = []
newTwit = []
howSimAr = [.5,.5,.5,.5,.5]
sourceCode = opener.open(tUrl).read()
splitSource = re.findall(r'<p class="js-tweet-text tweet-text">(.*?)</p>',sourceCode)
timeSource = re.findall(r'js-nav" title="(.*?)"',sourceCode)
if ( len(splitSource) > 20 ):
max_range = 19
else:
max_range = len(splitSource)
print ''
print ''
print ''
##print 'Keyword: ' + keyWord[i]
print ''
for x in range (0, max_range):
aTweet = re.sub(r'<.*?>','',splitSource[x])
print (aTweet + '~' + timeSource[x] + '~' + keyWord[i])
#print ';'
newTwit.append(aTweet)
## comparison = difflib.SequenceMatcher(None, newTwit, oldTwit)
## howSim = comparison.ratio()
## print ';'
## print 'This selection is',howSim,'similar to the past'
## howSimAr.append(howSim)
## howSimAr.remove(howSimAr[0])
##
## waitMultiplier = reduce(lambda x, y: x+y, howSimAr)/len(howSimAr)
##
## print ''
## print 'The current similarity array:',howSimAr
## print 'Our current Multiplier:', waitMultiplier
oldTwit = [None]
for eachItem in newTwit:
oldTwit.append(eachItem)
newTwit = [None]
time.sleep(2)
x = 0
i = i + 1
## except Exception, e:
## print str(e)
## print 'errored in the main try'
main()
twitter搜索页面的源代码中没有出现
js-nav" title="
,因此第二个regexp将找不到任何内容。事实上在
将显示:
无论您想归档什么,最好使用
HTMLParser
左右的文档来处理HTML,而不是使用re
。这将更容易确保timeSource[x]
和splitSource[x]
在所有x
中都属于彼此。你知道吗相关问题 更多 >
编程相关推荐