无法使用未更改的url抓取网站页面

import requests from bs4 import BeautifulSoup headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} url = "https://slotcatalog.com/en/The-Best-Slots#anchorFltrList" page = requests.get(url, headers=headers) soup = BeautifulSoup(page.content, 'html.parser') data = [] table = soup.find_all('div', attrs={'class':'providerCard'}) for game in range(0,len(table)-1): print(table[game].find('a')['title'])

for page_no in range(1, 100): data = { "blck":"fltrGamesBlk", "ajax":"1", "lang":"end", "p":str(page_no), "translit":"The-Best-Slots", "tag":"TOP", "dt1":"", "dt2":"", "sorting":"SRANK", "cISO":"GB", "dt_period":"", "rtp_1":"50.00", "rtp_2":"100.00", "max_exp_1":"2.00", "max_exp_2":"250000.00", "min_bet_1":"0.01", "min_bet_2":"5.00", "max_bet_1":"3.00", "max_bet_2":"10000.00" } page = requests.post('https://slotcatalog.com/index.php', data=data, headers={'Host' : 'slotcatalog.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0' }) soup = BeautifulSoup(page.content, 'html.parser') for row in soup.find_all('div', attrs={'class':'providerCard'}): name = row.find('a')['title'] print(name)

1条回答

网友

1楼 · 发布于 2024-05-31 23:45:18

好吧，那么，你有一个打字错误。XD这是来自有效负载的"lang":"end"，但除其他外，它应该是"lang": "en"

无论如何，我已经清理了你的代码一点，它的工作如预期。如果你愿意的话，你可以在所有的游戏中保持循环

import requests
from bs4 import BeautifulSoup

headers = {
    "referer": "https://slotcatalog.com/en/The-Best-Slots",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/50.0.2661.102 Safari/537.36",
    "x-requested-with": "XMLHttpRequest",
}

payload = {
    "blck": "fltrGamesBlk",
    "ajax": "1",
    "lang": "en",
    "p": 1,
    "translit": "The-Best-Slots",
    "tag": "TOP",
    "dt1": "",
    "dt2": "",
    "sorting": "SRANK",
    "cISO": "EN",
    "dt_period": "",
    "rtp_1": "50.00",
    "rtp_2": "100.00",
    "max_exp_1": "2.00",
    "max_exp_2": "250000.00",
    "min_bet_1": "0.01",
    "min_bet_2": "5.00",
    "max_bet_1": "3.00",
    "max_bet_2": "10000.00"
}
page = requests.post(
    "https://slotcatalog.com/index.php",
    data=payload,
    headers=headers,
)
soup = BeautifulSoup(page.content, "html.parser")
print([i.get("title") for i in soup.find_all("a", {"class": "providerName"})])

输出（仅适用于第1页）：

['Starburst', 'Bonanza', 'Rainbow Riches', 'Book of Dead', "Fishin' Frenzy", 'Wolf Gold', 'Twin Spin', 'Slingo Rainbow Riches', "Gonzo's Quest", "Gonzo's Quest Megaways", 'Eye of Horus (Reel Time Gaming)', 'Age of the Gods God of Storms', 'Lightning Roulette', 'Buffalo Blitz', "Fishin' Frenzy Megaways", 'Fluffy Favourites', 'Blue Wizard', 'Legacy of Dead', '9 Pots of Gold', 'Buffalo Blitz II', 'Cleopatra (IGT)', 'Quantum Roulette', 'Reel King Mega', 'Mega Moolah', '7s Deluxe', "Rainbow Riches Pick'n'Mix", "Shaman's Dream"]

相关问题更多 >

编程相关推荐

热门问题

热门文章