public byte[] cacheFile(String toSpeak, OutputFormat format) throws IOException { byte[] mp3File = null; // cache it begin ----- String localFileName = getLocalFileName(this, toSpeak, "mp3"); // String filename = AudioFile.globalFileCacheDir + File.separator + // localFileName; if (!audioFile.cacheContains(localFileName)) { log.info("retrieving speech from Amazon - {}", localFileName); AmazonPollyClient polly = getPolly(); SynthesizeSpeechRequest synthReq = new SynthesizeSpeechRequest().withText(toSpeak).withVoiceId(awsVoice.getId()).withOutputFormat(format); SynthesizeSpeechResult synthRes = polly.synthesizeSpeech(synthReq); InputStream data = synthRes.getAudioStream(); mp3File = FileIO.toByteArray(data); audioFile.cache(localFileName, mp3File, toSpeak); } else { log.info("using local cached file"); mp3File = FileIO.toByteArray(new File(AudioFile.globalFileCacheDir + File.separator + getLocalFileName(this, toSpeak, "mp3"))); } // invoke("publishStartSpeaking", toSpeak); // audioFile.playBlocking(filename); // invoke("publishEndSpeaking", toSpeak); // log.info("Finished waiting for completion."); return mp3File; }
private String retrieveSpeechUrl(String instruction) { SynthesizeSpeechPresignRequest synthesizeSpeechPresignRequest = new SynthesizeSpeechPresignRequest() .withText(instruction) .withTextType(TextType.Ssml) .withVoiceId(VoiceId.Joanna) .withOutputFormat(OutputFormat.Mp3); try { return client.getPresignedSynthesizeSpeechUrl(synthesizeSpeechPresignRequest).toString(); } catch (AmazonClientException exception) { listener.onError(); return null; } }
@Override public AudioData speak(String toSpeak) throws Exception { if (!credentialsError) { cacheFile(toSpeak, OutputFormat.Mp3); AudioData audioData = audioFile.playCachedFile(getLocalFileName(this, toSpeak, "mp3")); utterances.put(audioData, toSpeak); return audioData; /* * InputStream speechStream = synthesize(toSpeak, OutputFormat.Mp3); // * create an MP3 player AdvancedPlayer player = new * AdvancedPlayer(speechStream, * javazoom.jl.player.FactoryRegistry.systemRegistry().createAudioDevice()); * * player.setPlayBackListener(new PlaybackListener() { * * @Override public void playbackStarted(PlaybackEvent evt) { * System.out.println("Playback started"); System.out.println(toSpeak); } * * @Override public void playbackFinished(PlaybackEvent evt) { * System.out.println("Playback finished"); } }); * * // play it! player.play(); */ } return null; }
@Override public boolean speakBlocking(String toSpeak) throws Exception { if (!credentialsError) { cacheFile(toSpeak, OutputFormat.Mp3); invoke("publishStartSpeaking", toSpeak); audioFile.playBlocking(AudioFile.globalFileCacheDir + File.separator + getLocalFileName(this, toSpeak, "mp3")); invoke("publishEndSpeaking", toSpeak); } return false; }
/** * Returns text-to-speech of a translation of a given text. Before translating the text, * requesting speech from AWS Polly and storing the resulting MP3 to S3 this method looks * up previous translation of the same text. Once found it will avoid doing the aforementioned * roundtrip but rather will use the data of the previous translation. * @param text text to translate and convert to speech * @return text to speech information * @throws AlexaStateException error reading or writing state to Dynamo dictionary */ public Optional<TextToSpeech> textToSpeech(final String text) throws AlexaStateException { // remove invalid prefixes that accidently made it into the slots final String textToTranslate = prefixesToRemove.stream() .filter(prefix -> StringUtils.startsWithIgnoreCase(text, prefix)) .findFirst() .map(prefix -> text.replaceFirst(prefix, "")) // if none of these prefixes exist in the text, keep the text as is .orElse(text); // look up previous translation in dictionary Optional<TextToSpeech> tts = dynamoStateHandler.readModel(TextToSpeech.class, getDictionaryId(textToTranslate)); // if there was a previous tts for this text return immediately (exception for the roundtrip-phrase used by the test-client) if (tts.isPresent() && !StringUtils.equalsIgnoreCase(textToTranslate, SkillConfig.getAlwaysRoundTripPhrase())) { // set handler to session to avoid writing back to dynamo (nothing changed) tts.get().setHandler(sessionStateHandler); return tts; } // translate term by leveraging a Translator implementation provided by the factory final Optional<String> translated = translator.translate(textToTranslate, language); if (translated.isPresent()) { // without a voiceId there's not chance to fulfill the translation request Validate.notBlank(voiceId, "No voiceId is associated with given language."); // form the SSML by embedding the translated text final String ssml = String.format("<speak><amazon:effect name='drc'><prosody rate='-15%%' volume='x-loud'>%1$s</prosody></amazon:effect><break time='250ms' /></speak>", translated.get()); // build a Polly request to get speech with desired voice and SSML final SynthesizeSpeechRequest synthRequest = new SynthesizeSpeechRequest() .withText(ssml) .withOutputFormat(OutputFormat.Mp3) .withVoiceId(voiceId) .withTextType(TextType.Ssml) .withSampleRate("22050"); // fire request to Polly final SynthesizeSpeechResult synthResult = awsPolly.synthesizeSpeech(synthRequest); try { // store audio stream of Polly to S3 as an MP3 file final PutObjectRequest s3Put = new PutObjectRequest(SkillConfig.getS3BucketName(), getMp3Path(textToTranslate), synthResult.getAudioStream(), new ObjectMetadata()) .withCannedAcl(CannedAccessControlList.PublicRead); awsS3.putObject(s3Put); // as long as Polly output does not comply with Alexa MP3 format restriction we need to convert the MP3 if (!SkillConfig.shouldSkipMp3Conversion()) { // call the REST service that encapsualtes the FFMPEG conversion on a server final String mp3ConvertedUrl = Mp3Converter.convertMp3(getMp3Path(textToTranslate)); // validate this service returned a url (equal to success) Validate.notBlank(mp3ConvertedUrl, "Conversion service did not return proper return value"); } // build the TTS object with all the information needed to return output speech return Optional.of(getTTS(textToTranslate, translated.get())); } catch (final IOException | URISyntaxException e) { log.error("Error while generating mp3. " + e.getMessage()); } } return Optional.empty(); }
void setupPlayButton() { playButton = (Button) findViewById(R.id.readButton); playButton.setEnabled(false); playButton.setOnClickListener(new View.OnClickListener() { @Override public void onClick(View v) { playButton.setEnabled(false); Voice selectedVoice = (Voice) voicesSpinner.getSelectedItem(); String textToRead = sampleTextEditText.getText().toString(); // Use voice's sample text if user hasn't provided any text to read. if (textToRead.trim().isEmpty()) { textToRead = getSampleText(selectedVoice); } // Create speech synthesis request. SynthesizeSpeechPresignRequest synthesizeSpeechPresignRequest = new SynthesizeSpeechPresignRequest() // Set text to synthesize. .withText(textToRead) // Set voice selected by the user. .withVoiceId(selectedVoice.getId()) // Set format to MP3. .withOutputFormat(OutputFormat.Mp3); // Get the presigned URL for synthesized speech audio stream. URL presignedSynthesizeSpeechUrl = client.getPresignedSynthesizeSpeechUrl(synthesizeSpeechPresignRequest); Log.i(TAG, "Playing speech from presigned URL: " + presignedSynthesizeSpeechUrl); // Create a media player to play the synthesized audio stream. if (mediaPlayer.isPlaying()) { setupNewMediaPlayer(); } mediaPlayer.setAudioStreamType(AudioManager.STREAM_MUSIC); try { // Set media player's data source to previously obtained URL. mediaPlayer.setDataSource(presignedSynthesizeSpeechUrl.toString()); } catch (IOException e) { Log.e(TAG, "Unable to set data source for the media player! " + e.getMessage()); } // Start the playback asynchronously (since the data source is a network stream). mediaPlayer.prepareAsync(); } }); }