import requests
from bs4 import BeautifulSoup
import operator
from collections
import Counter
def start(url):
wordlist=[]
source_code=requests.get(url).text
soup=BeautifulSoup(source_code,'html.parser')
for each_text in soup.findAll('div',{'class':'entry-content'}):
content=each_text.strings
words=content.lower().split()
for each_word in words:
wordlist.append(each_word)
clean_wordlist(wordlist)
def clean_wordlist(wordlist):
clean_list=[]
for word in wordlist:
symbols='!@#$%^&*()_-+={[}]|\;:"<>?/.,'
for i in range (0,len(symbols)):
word=word.replace(symbols[i],'')
if len(word)>0:
clean_list.append(word)
create_dictionary(clean_list)
def create_dictionary(clean_list):
word_count={}
for word in clean_list:
if word in word_count:
word_count[word]+=1
else:
word_count[word]=1
for key,value in sorted(word_count.items(),key=operator.itemgetter(1)):
print ("%s : %s " % (key,value))
c=Counter(word_count)
top=c.most_common(3)
print(top)
start("https://www.geeksforgeeks.org/programming-language-choose/")</code>
下面的程序给出错误“Attribute error”:“Generator”对象没有Attribute.lower()。 我把每一种的字体都打印出来了_文本.字符串正在返回哪个打印的[class'generator'],但是现在如何前进并从给定链接获取文本部分
与创建生成器对象不同,我们只使用
.text
,或者如果我们真的想使用.strings
,那么您可以进行解包(即print(*stingsobject)
)正如您所知,我们在对象解包之前使用星号,我将不详细介绍,但您可以找到有关它的更多信息HERE
相关问题 更多 >
编程相关推荐