<p>测试和似乎工作。你知道吗</p>
<pre><code>from bs4 import BeautifulSoup, SoupStrainer
html = '''<html>
<body>
<p class="fixedfonts">
<a href="A.pdf">LINK1</a>
</p>
<h2>Results</h2>
<p class="fixedfonts">
<a href="B.pdf">LINK2</a>
</p>
<p class="fixedfonts">
<a href="B.pdf">LINK2</a>
</p>
<p class="fixedfonts">
<a href="C.pdf">LINK3</a>
</p>
</body>
</html>'''
# at this point html contains the code as string
# parse the HTML file
dat = html.split("Result")
need = dat[1]
soup = BeautifulSoup(html.replace('\n', ''), parse_only=SoupStrainer('a'))
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
links = list()
for link in soup:
if link.has_attr('href'):
links.append(link['href'].replace('%20', ' '))
n_links = list()
for i in set(links):
if need.count(i) > 0:
for x in range(1, need.count(i) + 1):
n_links.append(i)
print(n_links)
</code></pre>