python3 xhr请求代码部队

2024-06-16 15:09:45 发布

您现在位置:Python中文网/ 问答频道 /正文

最初,我使用selemium通过javascript实现了这一点,但由于错误,我决定重写为请求(如果可能的话,重写为aiohttp)

codeforces站点有自己的api,但有一个api用于获取任务分析https://codeforces.com/data/problemTutorial,它不在文档中(如果您去竞赛评审,可以看到这一点)。这些请求具有以下格式:

xhr requests from browser

我试着提出一个请求,但没有成功

import requests
from lxml.html import HtmlElement, fromstring
from lxml import html

from lxml import html
from lxml.etree import tostring

s = requests.session()


def get_token():
    url = "https://codeforces.com/profile/MiFaFaOvO"
    html = s.get(url).text
    tree = fromstring(html)
    return tree.xpath('//*[@id="body"]/div[3]/div[5]/form/input[1]')[0].get(
        "value"
    )


csrf_token = get_token()
print(csrf_token)
url = "https://codeforces.com/data/problemTutorial"
headers = {
    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
    "X-Requested-With": "XMLHttpRequest",
    "X-CSRF-Token": csrf_token,
    "accept": "text/html,application/xhtml+xml,application/xml",
    "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
}

a = s.post(
    url,
    data={"problemCode": "1371A", "csrf_token": csrf_token,},
    headers=headers,
)
print(a.url, a, a.text)

Tags: textfromhttpsimportcomtokenurldata
1条回答
网友
1楼 · 发布于 2024-06-16 15:09:45

解决办法很简单

  1. 必须将数据={}传递给post
  2. 获取csrftoken时,需要使用浏览器头发出请求,就像对此有保护一样)
import requests
from lxml.html import HtmlElement, fromstring
from lxml import html

from lxml import html
from lxml.etree import tostring

s = requests.session()

headers = {
    "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
}


def get_token():
    url = "https://codeforces.com/profile/MiFaFaOvO"
    html = s.get(url, headers=headers).text
    tree = fromstring(html)
    return tree.xpath('//*[@id="body"]/div[3]/div[5]/form/input[1]')[0].get(
        "value"
    )


csrf_token = get_token()
headers["x-csrf-token"] = csrf_token
headers["x-requested-with"] = "XMLHttpRequest"
url = "https://codeforces.com/data/problemTutorial"


a = s.post(
    url,
    data={"problemCode": "1371A", "csrf_token": csrf_token,},
    headers=headers,
)
print(a.text)

aiohttp

import aiohttp
import asyncio

from lxml.html import fromstring
from lxml import html


headers = {
    "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
}


async def get_token(session):
    resp = await session.get("https://codeforces.com/profile/MiFaFaOvO")
    async with resp:
        html = await resp.text()
        return (
            fromstring(html)
            .xpath('//*[@id="body"]/div[3]/div[5]/form/input[1]')[0]
            .get("value")
        )


async def problemData(task, session, csrf_token):
    resp = await session.post(
        "https://codeforces.com/data/problemTutorial",
        data={"problemCode": task, "csrf_token": csrf_token,},
        headers={
            "x-requested-with": "XMLHttpRequest",
            "x-csrf-token": csrf_token,
        },
    )
    async with resp:
        return (await resp.json())["html"]


async def main():
    session = aiohttp.ClientSession(headers=headers)
    loop = asyncio.get_event_loop()
    csrf_token = await get_token(session)
    tasks = ["1371A", "1360B", "1370A"]
    for future in asyncio.as_completed(
        [problemData(task, session, csrf_token) for task in tasks]
    ):
        res = await future
        print(res)
    await session.close()


if __name__ == "__main__":
    asyncio.run(main())

相关问题 更多 >