使用Selenium和Python下载JavaScriptloaded音频

import requests from time import sleep from selenium import webdriver from selenium.webdriver.firefox.options import Options url = 'https://learn.dict.naver.com/conversation#/korean-en/20190713' options = Options() options.headless = True driver = webdriver.Firefox(options=options, executable_path = 'geckodriver') driver.get(url) sleep(3) driver.find_element_by_class_name('btn_listen').click() #for the first one

1条回答

网友

1楼 · 发布于 2024-05-29 08:31:27

您可以使用requests下载mp3文件，并在页面上以文本格式获取有关句子的其他有用信息。
下面的代码是https://learn.dict.naver.com/conversation#/korean-en/20190713的示例。在data变量中使用json可以查看可以使用的信息。在

import requests
import json

callback = 'angular.callbacks._0'

headers = {
    'Referer': 'https://learn.dict.naver.com/conversation',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/75.0.3770.100 Safari/537.36',
    'DNT': '1',
}
params = (
    ('callback', callback),
)

with requests.Session() as session:
    response = session.get('https://gateway.dict.naver.com/krdict/kr/koen/today/20190713/conversation.dict',
                           headers=headers, params=params)

    data = json.loads(response.text.lstrip(f"{callback}(").rstrip(")"))["data"]
    sentences = data["sentences"]

    for sentence in sentences:
        audio_id = sentence["id"]
        sentence_pron_file = sentence["sentence_pron_file"]

        response = requests.post(f'https://learn.dict.naver.com/dictPronunciation.dict?filePaths={sentence_pron_file}')
        audio_url = response.json()["url"][0]
        audio_file = session.get(audio_url)

        with open(f'./{audio_id}.mp3', 'wb') as f:
            f.write(audio_file.content)

相关问题更多 >

编程相关推荐

热门问题

热门文章