无法使用Azure语音转文本服务转换语音为文本
我正在使用下面的代码,通过Azure的语音转文本服务把语音转换成文字。我想把我的音频文件变成文本。以下是相关的代码:
import os
import azure.cognitiveservices.speech as speechsdk
def recognize_from_microphone():
# This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
speech_config = speechsdk.SpeechConfig(subscription=my_key, region=my_region)
speech_config.speech_recognition_language="en-US"
audio_config = speechsdk.audio.AudioConfig(filename="C:\\Users\\DELL\\Desktop\\flowlly.com\\demo\\003. Class 3 - Monolith, Microservices, gRPC, Webhooks.mp4")
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
speech_recognition_result = speech_recognizer.recognize_once_async().get()
if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech:
print("Recognized: {}".format(speech_recognition_result.text))
elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch:
print("No speech could be recognized: {}".format(speech_recognition_result.no_match_details))
elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_recognition_result.cancellation_details
print("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
print("Did you set the speech resource key and region values?")
recognize_from_microphone()
但是在尝试运行转录程序时,我遇到了这个错误:
File "C:\Users\DELL\Desktop\flowlly.com\demo\transcriber.py", line 48, in <module>
recognize_from_microphone()
File "C:\Users\DELL\Desktop\flowlly.com\demo\transcriber.py", line 18, in recognize_from_microphone
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\DELL\AppData\Local\Programs\Python\Python312\Lib\site-packages\azure\cognitiveservices\speech\speech.py", line 1006, in __init__
_call_hr_fn(
File "C:\Users\DELL\AppData\Local\Programs\Python\Python312\Lib\site-packages\azure\cognitiveservices\speech\interop.py", line 62, in _call_hr_fn
_raise_if_failed(hr)
File "C:\Users\DELL\AppData\Local\Programs\Python\Python312\Lib\site-packages\azure\cognitiveservices\speech\interop.py", line 55, in _raise_if_failed
__try_get_error(_spx_handle(hr))
File "C:\Users\DELL\AppData\Local\Programs\Python\Python312\Lib\site-packages\azure\cognitiveservices\speech\interop.py", line 50, in __try_get_error
raise RuntimeError(message)
RuntimeError: Exception with error code:
[CALL STACK BEGIN]
> pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- pal_string_to_wstring
- recognizer_create_speech_recognizer_from_config
- recognizer_create_speech_recognizer_from_config
[CALL STACK END]
Exception with an error code: 0xa (SPXERR_INVALID_HEADER)
我已经安装了相关的SDK,但它还是不工作。我现在该怎么办呢?
1 个回答
1
在Azure语音转文本服务中,目前支持的文件格式是WAV(16 kHz或8 kHz,16位,单声道PCM)。
- 你需要把你的.mp4文件转换成WAV格式。确保转换后的WAV文件符合以下要求:采样率:16 kHz或8 kHz,位深度:16位。
filename="path/to/your/converted_file.wav"
import os
import azure.cognitiveservices.speech as speechsdk
def recognize_from_audio_file():
# Replace 'my_key' and 'my_region' with your actual subscription key and region
my_key = "YourSubscriptionKey"
my_region = "YourRegion"
speech_config = speechsdk.SpeechConfig(subscription=my_key, region=my_region)
speech_config.speech_recognition_language = "en-US"
# Provide the path to your WAV audio file
audio_file_path = r"C:\Users\samplest 3.wav"
audio_config = speechsdk.audio.AudioConfig(filename=audio_file_path)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
speech_recognition_result = speech_recognizer.recognize_once_async().get()
if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech:
print("Recognized: {}".format(speech_recognition_result.text))
elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch:
print("No speech could be recognized: {}".format(speech_recognition_result.no_match_details))
elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_recognition_result.cancellation_details
print("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
print("Did you set the speech resource key and region values?")
recognize_from_audio_file()
输出结果: