我们从Python开源项目中,提取了以下12个代码示例,用于说明如何使用pydub.AudioSegment.from_wav()。
def _words_list_to_file(anim_words): """ Creates an audio file from the words specified in the given list """ anim_files = [os.path.join( os.path.dirname(os.path.abspath(__file__)), "audio", word.lower() + ".wav" ) for word in anim_words ] out_audio = AudioSegment.from_wav(anim_files[0]) for word_file in anim_files[1:]: out_audio += AudioSegment.from_wav(word_file) out_file = out_audio.export("/tmp/file.wav", format="wav") return "/tmp/file.wav"
def cut_audio(line): ''' cuts the audio file at the specified start and stop times, and then exports to the desiginated output folder line format: [AUDIO_FILE_NAME, START, STOP] ''' audio_file_name = line[0] file_type = audio_file_name[-4:] if file_type == '.wav': audio = AudioSegment.from_wav(args.in_dir + audio_file_name) cut = audio[cvt_time(line[1]):cvt_time(line[2])] #create name for cut audio file file_num = get_num(audio_file_name) cut_name = audio_file_name[0:-4] + '_cut_%d' % file_num + '.wav' #export to output folder cut.export(args.out_dir + cut_name, format="wav") print('---> ' + cut_name + '\n') elif file_type == '.mp3': audio = AudioSegment.from_mp3(args.in_dir + audio_file_name) cut = audio[cvt_time(line[1]):cvt_time(line[2])] #create name for cut audio file file_num = get_num(audio_file_name) cut_name = audio_file_name[0:-4] + '_cut_%d' % file_num + '.mp3' #export to output folder cut.export(args.out_dir + cut_name, format="mp3") print('---> ' + cut_name + '\n') else: #error, incompatible file type print('**** ' + audio_file_name + ' caused an error') print('**** ' + file_type + ' incompatible file type') print('**** skipping file\n')
def test_text_to_speech(self): catch_requests() text = ('hello') audio = utils.text_to_speech(text=text, synthesizer=self.synthesizer, synth_args=self.synth_args, sentence_break='. ') sample = AudioSegment.from_wav('tests/test_files/test.wav') audio.export('.test_utils/test.mp3', format='mp3') self.assertEquals(len(audio), len(sample))
def test_text_to_speech_sentence_break(self): catch_requests() text = ('hello ' * 51) audio = utils.text_to_speech(text=text, synthesizer=self.synthesizer, synth_args=self.synth_args, sentence_break=' ') sample = AudioSegment.from_wav('tests/test_files/test.wav') self.assertGreater(len(audio), len(sample) * 50)
def tags_to_wav(media_path,out_dir,tag_pairs): basename=os.path.splitext(os.path.basename(media_path))[0] wav_source=True if media_path.lower()[-4:] not in ('.mp3','.wav'): # Creates a temporary WAV wav_source=False # if input is MP4 temp_filename=media_path.split('/')[-1]+'_temp.wav' audio_path='/var/tmp/'+temp_filename # Pathname for temp WAV subprocess.call(['ffmpeg', '-y', '-i', media_path, audio_path]) # '-y' option overwrites existing file if present else: audio_path=media_path try: if audio_path[-4:].lower()=='.mp3': song = AudioSegment.from_mp3(audio_path) else: song = AudioSegment.from_wav(audio_path) except Exception as inst: print(inst) sys.exit(2) for pair in tag_pairs: start = pair[0] duration = pair[1]-pair[0] clip_pathname=os.path.join(out_dir,basename+"_start_"+str(start)+"_dur_"+str(duration)+".wav") start_msec = float(start) * 1000.0 duration_msec = float(duration) * 1000 if not os.path.exists(clip_pathname): clip_data = song[start_msec:start_msec+duration_msec] clip_data=clip_data.set_channels(1) clip_data.export(clip_pathname, format="wav")
def excerpt_segments(segments_df,inputfile,out_dir,mono): try: song = AudioSegment.from_wav(inputfile) except: return "ERROR: "+inputfile+" can't be found." start = float(segments_df[segments_df['Names']=="<START>"]['Instants']) end = float(segments_df[segments_df['Names']=="<END>"]['Instants']) start_msec = start * 1000.0 end_msec = end * 1000 clip_data = song[start_msec:end_msec] clip_pathname=out_dir+inputfile.split('/')[-1][:-4]+'_reading_excerpt'+'.wav' if mono==True: clip_data=clip_data.set_channels(1) clip_data.export(clip_pathname, format="wav", parameters=["-ar 48000", "-acodec pcm_s24le"])
def play(filepath, content_type='audio/wav'): """ Will attempt to play various audio file types (wav, ogg, mp3). """ if 'wav' in content_type: sound = AudioSegment.from_wav(filepath) elif 'ogg' in content_type or 'opus' in content_type: sound = AudioSegment.from_ogg(filepath) elif 'mp3' in content_type or 'mpeg' in content_type: sound = AudioSegment.from_mp3(filepath) pydub_play(sound)
def synthesize(self, text, src, dst): """ Synthesize .wav from text src is the folder that contains all syllables .wav files dst is the destination folder to save the synthesized file """ print("Synthesizing ...") delay = 0 increment = 355 # milliseconds pause = 500 # pause for punctuation syllables = lazy_pinyin(text, style=pypinyin.TONE3) # initialize to be complete silence, each character takes up ~500ms result = AudioSegment.silent(duration=500*len(text)) for syllable in syllables: path = src+syllable+".wav" sound_file = Path(path) # insert 500 ms silence for punctuation marks if syllable in TextToSpeech.punctuation: short_silence = AudioSegment.silent(duration=pause) result = result.overlay(short_silence, position=delay) delay += increment continue # skip sound file that doesn't exist if not sound_file.is_file(): continue segment = AudioSegment.from_wav(path) result = result.overlay(segment, position=delay) delay += increment directory = dst if not os.path.exists(directory): os.makedirs(directory) result.export(directory+"generated.wav", format="wav") print("Exported.")
def text_to_speech(text, synthesizer, synth_args, sentence_break): """ Converts given text to a pydub AudioSegment using a specified speech synthesizer. At the moment, IBM Watson's text-to-speech API is the only available synthesizer. :param text: The text that will be synthesized to audio. :param synthesizer: The text-to-speech synthesizer to use. At the moment, 'watson' is the only available input. :param synth_args: A dictionary of arguments to pass to the synthesizer. Parameters for authorization (username/password) should be passed here. :param sentence_break: A string that identifies a sentence break or another logical break in the text. Necessary for text longer than 50 words. Defaults to '. '. """ if len(text.split()) < 50: if synthesizer == 'watson': with open('.temp.wav', 'wb') as temp: temp.write(watson_request(text=text, synth_args=synth_args).content) response = AudioSegment.from_wav('.temp.wav') os.remove('.temp.wav') return response else: raise ValueError('"' + synthesizer + '" synthesizer not found.') else: segments = [] for i, sentence in enumerate(text.split(sentence_break)): if synthesizer == 'watson': with open('.temp' + str(i) + '.wav', 'wb') as temp: temp.write(watson_request(text=sentence, synth_args=synth_args).content) segments.append(AudioSegment.from_wav('.temp' + str(i) + '.wav')) os.remove('.temp' + str(i) + '.wav') else: raise ValueError('"' + synthesizer + '" synthesizer not found.') response = segments[0] for segment in segments[1:]: response = response + segment return response
def audio_generator(dict_dir, text, output_dest): with open(dict_dir + "/myDict.py") as f: myDict = ast.literal_eval(f.read()) textList = text.split(" ") mainList = [] for i in textList: if i in myDict.keys(): mainList.append(AudioSegment.from_wav(dict_dir + "/" + myDict[i])) # Check to see if at least one word was generated if mainList == []: raise Exception('\033[91m' + "None of the words you entered was" + " spoken by your figure." + '\033[0m') # If a file with the default name exits, create a new name with a # new suffix res = 0 while(os.path.exists(output_dest + "/output" + str(res) + ".wav")): res += 1 mainAudio = mainList[0] # Concatenate selected audio words for i in range(1, len(mainList)): mainAudio += mainList[i] # Export the joined audio mainAudio.export(output_dest + '/output' + str(res) + '.wav', format="wav") if os.path.exists(output_dest + "/output" + str(res) + ".wav"): print ('\033[94m' + "Speech-Hacker: " + "Your audio was generated at: " + output_dest + "/output" + str(res) + ".wav" + '\033[0m') else: print ("Speech-Hacker: " '\033[91m' + "Failed to generate your requested audio." + '\033[0m')
def wavconvert(wav, codec): from pydub import AudioSegment song = AudioSegment.from_wav(wav) fn = os.path.splitext(wav) out = fn[0]+'.'+codec tags = { 'artist' : 'Various Artists', 'album' : 'WeChat Voice', 'year' : time.strftime('%Y-%m-%d'), 'comments': 'This album is awesome!' } parameters = ['-q:a', '0'] if codec.lower() == 'ogg': parameters = ['-q:a', '0'] elif codec.lower() in ['mp3', 'mp2', 'mpa']: parameters = ['-q:a', '6'] elif codec.lower() in ['aac', 'mp4', 'm4a']: parameters = ['-q:a', '0'] codec = 'mp4' song.export(out, format=codec, parameters=parameters, tags=tags) return(out) pass
def audio_to_export(sourcepath,wavepath,start,end): wav = AudioSegment.from_wav(sourcepath) wav[start*1000:end*1000].export(wavepath, format="wav") # ?e