Python3打印特定href链接

<li><a href="#">Education & Research </a> <ul> <li><a href="caseofthe_month.php">Case of the Month</a></li> <a href="page.php?id=2"> <a href="idontwantthispagetoshowup.php"> <a href="page.php?id=5">Prospectus Fellowship-July-14</a> <a href="thisoneeither.php">

def gethref(ip): url = ("http://" + ip) print("[x] ~ SCAN: " + url + " ~ [x]") req = requests.get(url) tree = html.fromstring(req.text) tree_href = tree.xpath('//@href') #print(tree_href) if '*.php?id=*' in tree_href: print (tree_href) #soup = BeautifulSoup(req.text, 'html.parser') #h = soup.find_all('href=*.php') #print(h) #sqli = soup.select('a') #for link in soup.find_all('a'): # sqli = (link.get('href')) # sqli = str(sqli) # print(sqli) # if 'page' in sqli: # print(sqli.a)

2条回答

网友

1楼 · 编辑于 2024-05-15 01:16:02

您可以使用CSS选择器a[href*=".php?id="]：

from bs4 import BeautifulSoup

html_doc = """
<li><a href="#">Education & Research</a>

<ul>                         
<li>
    <a href="caseofthe_month.php">Case of the Month</a>
</li>
</ul>

<a href="page.php?id=2"></a>
<a href="idontwantthispagetoshowup.php">
<a href="page.php?id=5">Prospectus Fellowship-July-14</a>
<a href="thisoneeither.php"></a>
"""

soup = BeautifulSoup(html_doc, "html.parser")

for link in soup.select('a[href*=".php?id="]'):
    print(link["href"])

印刷品：

page.php?id=2
page.php?id=5

或：

for link in soup.find_all("a"):
    if ".php?id=" in link.get("href", ""):
        print(link["href"])

或：

for link in soup.find_all(
    lambda t: t.name == "a" and ".php?id=" in t.get("href", "")
):
    print(link["href"])

网友

2楼 · 编辑于 2024-05-15 01:16:02

这是查找包含.php?id=的所有HREF所需的代码

from bs4 import BeautifulSoup
import requests
import re

def gethref(ip):
    url = ("http://" + ip)
    print("[x] ~ SCAN: " + url + " ~ [x]")
    req = requests.get(url)
    soup = BeautifulSoup(req.text, 'html.parser')
    h = soup.find_all(href=re.compile(r'(.*).php\?id=\d*'))
    print(h)
    # sqli = soup.select('a') # i don't know what its doing, so i just commented it out
    # for link in soup.find_all('a'):
    #   sqli = str(link.get('href'))
    #   print(sqli)
    #   if 'page' in sqli:
    #       print(sqli.a)

我想这就是你需要的

告诉我如果它不起作用

相关问题更多 >

编程相关推荐

热门问题

热门文章