from bs4 import BeautifulSoup #imports beautifulSoup package
import urllib2
url = 'https://www.marshall.usc.edu/faculty/phd' #sets url to a variable
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read(), "lxml") #sets the contents of the page to the variable soup
#names = soup.find_all('tr', {'class': 'odd views-row-first'})
names = soup.find_all('td', {'class': 'views-field views-field-field-faculty-name-last-value active'}) #sets the name 'cell' and tags
#namesU = names.replaceAll("<[^>]*>","")
#names.strip('<td class="views-field views-field-field-faculty-name-last-value active">')
#names2 = names.sub('<td class="views-field views-field-field-faculty-name-last-value active">', '')
print(names)
您可以在“td”的find\u all之后使用“text”属性来解决这个问题。你知道吗
因此,从find_all得到的结果,只需迭代,得到每个部分的“文本”部分,并将其放入名称数组中。你知道吗
下面是一个列表理解方法来实现这一点:
运行此操作后,输出将产生:
相关问题 更多 >
编程相关推荐