in PollyTTSEngine/ttsengobj.cpp [140:246]
STDMETHODIMP CTTSEngObj::Speak( DWORD dwSpeakFlags,
REFGUID rguidFormatId,
const WAVEFORMATEX * pWaveFormatEx,
const SPVTEXTFRAG* pTextFragList,
ISpTTSEngineSite* pOutputSite )
{
Aws::SDKOptions options;
m_logger->debug("Starting Speak\n");
if (wcslen(m_voiceOveride) == 0)
{
LPWSTR data;
CComPtr<ISpDataKey> attributesKey;
m_logger->debug("Reading attributes key to get the voice\n");
m_cpToken->OpenKey(L"Attributes", &attributesKey);
attributesKey->GetStringValue(L"VoiceId", &m_pPollyVoice);
attributesKey->GetStringValue(L"IsNeural", &data);
m_isNeural = wcscmp(L"1", data) == 0;
attributesKey->GetStringValue(L"IsNews", &data);
m_isNews = wcscmp(L"1", data) == 0;
attributesKey->GetStringValue(L"IsConversational", &data);
m_isConversational = wcscmp(L"1", data) == 0;
}
m_logger->debug("Initializing AWS\n");
InitAPI(options);
HRESULT hr = S_OK;
//--- Check args
if( SP_IS_BAD_INTERFACE_PTR( pOutputSite ) ||
SP_IS_BAD_READ_PTR( pTextFragList ) )
{
hr = E_INVALIDARG;
}
else
{
//--- Init some vars
m_pCurrFrag = pTextFragList;
m_pNextChar = m_pCurrFrag->pTextStart;
m_pEndChar = m_pNextChar + m_pCurrFrag->ulTextLen;
m_ullAudioOff = 0;
CItemList ItemList;
m_logger->debug("Starting work processing\n");
while( SUCCEEDED( hr ) && !(pOutputSite->GetActions() & SPVES_ABORT) )
{
//--- Do skip?
if( pOutputSite->GetActions() & SPVES_SKIP )
{
m_logger->debug("ACTION: SKIP\n");
long lSkipCnt;
SPVSKIPTYPE eType;
hr = pOutputSite->GetSkipInfo( &eType, &lSkipCnt );
if( SUCCEEDED( hr ) )
{
//--- Notify SAPI how many items we skipped. We're returning zero
// because this feature isn't implemented.
hr = pOutputSite->CompleteSkip( 0 );
}
}
//--- Build the text item list
if( SUCCEEDED( hr ) && (hr = GetNextSentence( ItemList )) != S_OK )
{
m_logger->debug("ERROR Getting the next sentence from ItemList\n");
break;
}
//--- We aren't going to do any part of speech determination,
// prosody, or pronunciation determination. If you were, one thing
// you will need is access to the SAPI lexicon. You can get that with
// the following call.
// CComPtr<ISpLexicon> cpLexicon;
// hr = pUser->GetLexicon( &cpLexicon );
if( !(pOutputSite->GetActions() & SPVES_ABORT) )
{
//--- Fire begin sentence event
CSentItem& FirstItem = ItemList.GetHead();
CSentItem& LastItem = ItemList.GetTail();
CSpEvent Event;
Event.eEventId = SPEI_SENTENCE_BOUNDARY;
Event.elParamType = SPET_LPARAM_IS_UNDEFINED;
Event.ullAudioStreamOffset = m_ullAudioOff;
Event.lParam = (LPARAM)FirstItem.ulItemSrcOffset;
Event.wParam = (WPARAM)LastItem.ulItemSrcOffset +
LastItem.ulItemSrcLen -
FirstItem.ulItemSrcOffset;
hr = pOutputSite->AddEvents( &Event, 1 );
//--- Output
if( SUCCEEDED( hr ) )
{
hr = OutputSentence( ItemList, pOutputSite );
return hr;
}
}
}
//--- S_FALSE just says that we hit the end, return okay
if( hr == S_FALSE )
{
hr = S_OK;
}
}
return hr;
} /* CTTSEngObj::Speak */