如何解决PyTube中的HTTP 400错误:错误请求?
我用pyTube库做了一个简单的三行代码项目。这个项目的功能就是从YouTube上下载视频。我通常用它下载手球比赛的视频,时长大约100分钟。七天前我最后一次使用的时候一切都很好,但现在却出现了“HTTP错误400:错误请求”的问题。
from pytube import YouTube
youtubeObject = YouTube('https://www.youtube.com/watch?v=DASMWPUFFP4')
youtubeObject = youtubeObject.streams.get_highest_resolution()
youtubeObject.download('D:\\Utakmice')
它可以下载短一点的视频,但对于其他类似长度(大约100分钟)的影片就不行了。我尝试升级pyTube库和清理浏览器缓存,但都没有用。我还试着深入研究urllib,但也没有找到任何线索。我遇到的错误是:
urllib.error.HTTPError: HTTP Error 400: Bad Request
在网上找不到任何解决方案,所以希望能得到一些帮助。提前谢谢大家。
编辑-问题已解决
我在pytube的GitHub上发现了一个关于这个问题的讨论。解决方法是需要在innertube.py文件中更改某些客户端版本。问题的链接是: https://github.com/pytube/pytube/issues/1894#issue-2180600881
1 个回答
1
看起来你遇到的错误是因为发送给YouTube服务器的请求不太对劲。
错误追踪(最近的调用最后):
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/flask/app.py", line 2213, in __call__
return self.wsgi_app(environ, start_response)
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/flask/app.py", line 2193, in wsgi_app
response = self.handle_exception(e)
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/flask/app.py", line 2190, in wsgi_app
response = self.full_dispatch_request()
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/flask/app.py", line 1486, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/flask/app.py", line 1484, in full_dispatch_request
rv = self.dispatch_request()
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/flask/app.py", line 1469, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
File "/var/www/html/ytd_web_app/Routes/Media.py", line 90, in getMedia
file = getMetaData(file_name)
File "/var/www/html/ytd_web_app/Routes/Media.py", line 43, in getMetaData
response = json.dumps(media.verifyPlatform(), indent=4)
File "/var/www/html/ytd_web_app/Models/Media.py", line 164, in verifyPlatform
"data": self.handleYouTube()
File "/var/www/html/ytd_web_app/Models/Media.py", line 227, in handleYouTube
youtube = self._YouTubeDownloader.search()
File "/var/www/html/ytd_web_app/Models/YouTubeDownloader.py", line 297, in search
"author_channel": self.getVideo().channel_url,
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/pytube/__main__.py", line 432, in channel_url
return f'https://www.youtube.com/channel/{self.channel_id}'
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/pytube/__main__.py", line 424, in channel_id
return self.vid_info.get('videoDetails', {}).get('channelId', None)
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/pytube/__main__.py", line 246, in vid_info
innertube_response = innertube.player(self.video_id)
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/pytube/innertube.py", line 448, in player
return self._call_api(endpoint, query, self.base_data)
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/pytube/innertube.py", line 390, in _call_api
response = request._execute_request(
File "/var/www/html/ytd_web_app/venv/lib/python3.10/site-packages/pytube/request.py", line 37, in _execute_request
return urlopen(request, timeout=timeout) # nosec
File "/usr/lib/python3.10/urllib/request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.10/urllib/request.py", line 525, in open
response = meth(req, response)
File "/usr/lib/python3.10/urllib/request.py", line 634, in http_response
response = self.parent.error(
File "/usr/lib/python3.10/urllib/request.py", line 563, in error
return self._call_chain(*args)
File "/usr/lib/python3.10/urllib/request.py", line 496, in _call_chain
result = func(*args)
File "/usr/lib/python3.10/urllib/request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 400: Bad Request
而且,这个错误是客户端的问题,因为这是一个POST请求。经过进一步的研究和分析发现,PyTube使用的是1.0版本,而YouTube的API最新版本是3.0。
def _execute_request(
url: str,
method: str | None = None,
headers: dict[str, str] | None = None,
data: bytes | None = None,
timeout: float = socket._GLOBAL_DEFAULT_TIMEOUT
) -> _UrlopenRet:
"""
Executing the request from the PyTube's API towards the
YouTube API endpoint to retrieve the Stream's data.
Parameters:
url: string: The uniform resource locator of the API endpoint.
method: string: The HTTP request method to be used.
headers: object: The HTTP headers to be used by the API.
data: bytes: The data to be passed into the request.
timeout: float: The timeout delay to close the connection.
Returns:
_UrlopenRet
"""
base_headers = {"User-Agent": "Mozilla/5.0", "accept-language": "en-US,en"}
if headers:
base_headers.update(headers)
if data:
if not isinstance(data, bytes):
data = bytes(json.dumps(data), encoding="utf-8")
if url.lower().startswith("http"):
request = Request(url, headers=base_headers, method=method, data=data)
else:
raise ValueError("Invalid URL")
return urlopen(request, timeout=timeout)
def _call_api(self, endpoint: str, query: list[str], data: bytes):
"""
Generating the request to a given endpoint withe the
provided query parameters and data.
Parameters:
endpoint: string: The uniform resource locator of the endpoint to the YouTube's API.
query: array: The HTTP query to be sent to the API.
data: bytes: The data to be passed into the request.
"""
if self.use_oauth:
del query['key']
endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
headers = {
'Content-Type': 'application/json',
}
if self.use_oauth:
if self.access_token:
self.refresh_bearer_token()
headers['Authorization'] = f'Bearer {self.access_token}'
else:
self.fetch_bearer_token()
headers['Authorization'] = f'Bearer {self.access_token}'
headers.update(self.header)
response = request._execute_request(
endpoint_url,
'POST',
headers=headers,
data=data
)
return json.loads(response.read())
def player(self, video_id: str) -> dict:
"""
Making the request to the player endpoint.
Parameters:
video_id: string: The identifier of the video to get the player data for.
Returns:
object
"""
endpoint = f'{self.base_url}/player'
query = {
'videoId': video_id,
}
query.update(self.base_params)
return self._call_api(endpoint, query, self.base_data)
@property
def base_url(self) -> str:
"""
Returning the base uniform resource locator endpoint for the
InnerTube API.
Returns:
string
"""
return 'https://www.youtube.com/youtubei/v1'