in PollyTTSEngine/PollyManager.cpp [60:122]
PollySpeechResponse PollyManager::GenerateSpeech(CSentItem& item)
{
PollySpeechResponse response;
auto creds = Aws::MakeShared<Aws::Auth::ProfileConfigFileAWSCredentialsProvider>(ALLOCATION_TAG, "polly-windows");
Aws::Client::ClientConfiguration config = Aws::Client::ClientConfiguration("polly-windows");
config.userAgent = config.userAgent + " request-source/polly-windows/PRODUCTVERSION";
Aws::Polly::PollyClient p = Aws::Polly::PollyClient(creds, config);
SynthesizeSpeechRequest speech_request;
auto speech_text = Aws::Utils::StringUtils::FromWString(item.pItem);
if (Aws::Utils::StringUtils::ToLower(speech_text.c_str()).find("</voice>") != std::string::npos)
{
speech_text = "<speak>" + speech_text.replace(speech_text.find("</voice>"), sizeof("</voice>") - 1, "");
}
tinyxml2::XMLDocument doc;
tinyxml2::XMLError res = doc.Parse(speech_text.c_str());
speech_request.SetTextType(TextType::text);
if (res == tinyxml2::XML_SUCCESS && strcmp(doc.RootElement()->Name(), "speak") == 0) {
m_logger->debug("Text type = ssml");
speech_request.SetTextType(TextType::ssml);
speech_text = speech_text.replace(speech_text.find("<speak>"), sizeof("<speak>") - 1, "");
}
speech_request.SetTextType(TextType::ssml);
if (m_isNews)
{
speech_text = "<speak><amazon:domain name=\"news\">" + speech_text + "</amazon:domain></speak>";
}
else if (m_isConversational)
{
speech_text = "<speak><amazon:domain name=\"conversational\">" + speech_text + "</amazon:domain></speak>";
}
else
{
speech_text = "<speak>" + speech_text + "</speak>";
}
m_logger->debug("{}: Asking Polly for '{}'", __FUNCTION__, speech_text.c_str());
speech_request.SetOutputFormat(OutputFormat::pcm);
speech_request.SetVoiceId(m_vVoiceId);
m_logger->debug("Generating speech: {}", speech_text);
speech_request.SetText(speech_text);
speech_request.SetSampleRate("16000");
if (m_isNeural) {
m_logger->debug("Neural voice? Yes");
speech_request.SetEngine(Engine::neural);
}
auto speech = p.SynthesizeSpeech(speech_request);
response.IsSuccess = speech.IsSuccess();
if (!speech.IsSuccess())
{
std::stringstream error;
//error << speech.GetError().GetMessageW();
response.ErrorMessage = error.str();
return response;
}
auto &r = speech.GetResult();
auto& stream = r.GetAudioStream();
stream.read(reinterpret_cast<char*>(&response.AudioData[0]), MAX_SIZE);
response.Length = stream.gcount();
return response;
}