我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用speech_recognition.AudioFile()。
def process_listen(self): if not os.path.isfile("data/temp/heard.pcm") or os.stat("data/temp/heard.pcm").st_size == 0: await self.bot.send_embed(self.text_channel, ":warning: No input found") return func = functools.partial(subprocess.call, ["ffmpeg", "-f", "s16le", "-y", "-ar", "44.1k", "-ac", "2", "-i", "data/temp/heard.pcm", "data/temp/heard.wav"], shell = True) await self.bot.loop.run_in_executor(None, func) with speech_recognition.AudioFile("data/temp/heard.wav") as source: audio = self.recognizer.record(source) ''' try: await self.bot.reply("Sphinx thinks you said: " + recognizer.recognize_sphinx(audio)) except speech_recognition.UnknownValueError: await self.bot.reply("Sphinx could not understand audio") except speech_recognition.RequestError as e: await self.bot.reply("Sphinx error; {0}".format(e)) ''' try: text = self.recognizer.recognize_google(audio) await self.bot.send_embed(self.text_channel, "I think you said: `{}`".format(text)) except speech_recognition.UnknownValueError: # await self.bot.send_embed(self.text_channel, ":no_entry: Google Speech Recognition could not understand audio") await self.bot.send_embed(self.text_channel, ":no_entry: I couldn't understand that") except speech_recognition.RequestError as e: await self.bot.send_embed(self.text_channel, ":warning: Could not request results from Google Speech Recognition service; {}".format(e)) else: response = clients.aiml_kernel.respond(text) # TODO: Handle brain not loaded? if not response: games_cog = client.get_cog("Games") if not games_cog: return response = await games_cog.cleverbot_get_reply(text) await self.bot.send_embed(self.text_channel, "Responding with: `{}`".format(response)) await self.play_tts(response, self.bot.user) # open("data/heard.pcm", 'w').close() # necessary? # os.remove ?
def cut_and_send(infile, outfile, length): # print(infile) # print(outfile) # print(length) # return myaudio = AudioSegment.from_file(infile, "wav") chunk_length_ms = length # pydub calculates in millisec chunks = make_chunks(myaudio, chunk_length_ms) # Make chunks of one sec for i, chunk in enumerate(chunks): chunk_name = "chunk{0}.wav".format(i) print("exporting", chunk_name) chunk.export(chunk_name, format="wav") r = sr.Recognizer() with sr.AudioFile(chunk_name) as source: audio = r.record(source) # recognize speech using Google Speech Recognition try: # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` # instead of `r.recognize_google(audio)` txt = r.recognize_google(audio) + " " with open(outfile, 'a') as f: f.write(txt) except sr.UnknownValueError: print("Ehm... sorry not understood this one.") except sr.RequestError as e: print("Request failed; {0}".format(e)) os.remove(chunk_name)
def __init__(self, audio_file=None): """ Thread used to caught n audio from the microphone and pass it to a callback method """ super(SpeechRecognition, self).__init__() self.recognizer = sr.Recognizer() self.microphone = sr.Microphone() self.callback = None self.stop_thread = None self.kill_yourself = False self.audio_stream = None # get global configuration sl = SettingLoader() self.settings = sl.settings if audio_file is None: # audio file not set, we need to capture a sample from the microphone with self.microphone as source: if self.settings.recognition_options.adjust_for_ambient_noise_second > 0: # threshold is calculated from capturing ambient sound logger.debug("[SpeechRecognition] threshold calculated by " "capturing ambient noise during %s seconds" % self.settings.recognition_options.adjust_for_ambient_noise_second) Utils.print_info("[SpeechRecognition] capturing ambient sound during %s seconds" % self.settings.recognition_options.adjust_for_ambient_noise_second) self.recognizer.adjust_for_ambient_noise(source, duration=self.settings. recognition_options.adjust_for_ambient_noise_second) else: # threshold is defined manually logger.debug("[SpeechRecognition] threshold defined by settings: %s" % self.settings.recognition_options.energy_threshold) self.recognizer.energy_threshold = self.settings.recognition_options.energy_threshold Utils.print_info("Threshold set to: %s" % self.recognizer.energy_threshold) else: # audio file provided with sr.AudioFile(audio_file) as source: self.audio_stream = self.recognizer.record(source) # read the entire audio file
def transcribe(inputfile,outputfile='',to_txt=True): wav_source=True if inputfile.lower()[-4:]!='.wav': # Creates a temporary WAV wav_source=False # if input is MP3 temp_filename=inputfile.split('/')[-1]+'_temp.wav' wav_path='/var/tmp/'+temp_filename # Pathname for temp WAV subprocess.call(['ffmpeg', '-y', '-i', inputfile, wav_path]) # '-y' option overwrites existing file if present else: wav_path=inputfile transcript='' r = sr.Recognizer() with sr.AudioFile(wav_path) as source: audio = r.record(source) # read the entire audio file try: # recognize speech using Sphinx print('Processing ...') transcript=r.recognize_sphinx(audio) except sr.UnknownValueError: print("Sphinx error: No speech detected.") except sr.RequestError as e: print("Sphinx error; {0}".format(e)) if wav_source==False: os.remove(wav_path) # deleting temp WAV if to_txt==True: if outputfile=='': outputfile=inputfile[:-4]+'.pocketsphinx.txt' with open(outputfile, 'w') as fo: fo.write(transcript) return transcript else: return transcript
def _recognize_bing(wav_path, api_key, language='zh-CN'): r = sr.Recognizer() with sr.AudioFile(wav_path) as source: audio = r.record(source) try: text = r.recognize_bing(audio, key=api_key, language=language) return text except (sr.UnknownValueError, sr.RequestError): return None
def listen_translate(): while(True): # obtain audio from the microphone r = sr.Recognizer() with sr.Microphone(sample_rate=8000) as source: print("Say something!") # print(5), # time.sleep(1) # print(4), # time.sleep(1) # print(3), # time.sleep(1) # print(2), # time.sleep(1) # print(1), # time.sleep(1) audio = r.listen(source)#,timeout=5,phrase_time_limit=0.05 # r = sr.Recognizer() # with sr.AudioFile('./english.wav') as source: # audio = r.record(source) # read the entire audio file # write audio to a WAV file ``
with open("microphone-results.wav", "wb") as f: f.write(audio.get_wav_data()) # recognize speech using Sphinx try: print("Sphinx thinks you said :" + r.recognize_sphinx(audio)) except sr.UnknownValueError: print("Sphinx could not understand audio") except sr.RequestError as e: print("Sphinx error; {0}".format(e))
```
def play_audio(): r = sr.Recognizer() with sr.AudioFile('./english.wav') as source: audio = r.record(source) # read the entire audio file print audio # recognize speech using Sphinx try: print("Sphinx thinks you said :" + r.recognize_sphinx(audio)) except sr.UnknownValueError: print("Sphinx could not understand audio") except sr.RequestError as e: print("Sphinx error; {0}".format(e))