in dialogflow-cx/detect_intent_stream.py [0:0]
def detect_intent_stream(agent, session_id, audio_file_path, language_code):
"""Returns the result of detect intent with streaming audio as input.
Using the same `session_id` between requests allows continuation
of the conversation."""
session_path = f"{agent}/sessions/{session_id}"
print(f"Session path: {session_path}\n")
client_options = None
agent_components = AgentsClient.parse_agent_path(agent)
location_id = agent_components["location"]
if location_id != "global":
api_endpoint = f"{location_id}-dialogflow.googleapis.com:443"
print(f"API Endpoint: {api_endpoint}\n")
client_options = {"api_endpoint": api_endpoint}
session_client = SessionsClient(client_options=client_options)
input_audio_config = audio_config.InputAudioConfig(
audio_encoding=audio_config.AudioEncoding.AUDIO_ENCODING_LINEAR_16,
sample_rate_hertz=24000,
)
def request_generator():
audio_input = session.AudioInput(config=input_audio_config)
query_input = session.QueryInput(audio=audio_input, language_code=language_code)
voice_selection = audio_config.VoiceSelectionParams()
synthesize_speech_config = audio_config.SynthesizeSpeechConfig()
output_audio_config = audio_config.OutputAudioConfig()
# Sets the voice name and gender
voice_selection.name = "en-GB-Standard-A"
voice_selection.ssml_gender = (
audio_config.SsmlVoiceGender.SSML_VOICE_GENDER_FEMALE
)
synthesize_speech_config.voice = voice_selection
# Sets the audio encoding
output_audio_config.audio_encoding = (
audio_config.OutputAudioEncoding.OUTPUT_AUDIO_ENCODING_UNSPECIFIED
)
output_audio_config.synthesize_speech_config = synthesize_speech_config
# The first request contains the configuration.
yield session.StreamingDetectIntentRequest(
session=session_path,
query_input=query_input,
output_audio_config=output_audio_config,
)
# Here we are reading small chunks of audio data from a local
# audio file. In practice these chunks should come from
# an audio input device.
with open(audio_file_path, "rb") as audio_file:
while True:
chunk = audio_file.read(4096)
if not chunk:
break
# The later requests contains audio data.
audio_input = session.AudioInput(audio=chunk)
query_input = session.QueryInput(audio=audio_input)
yield session.StreamingDetectIntentRequest(query_input=query_input)
responses = session_client.streaming_detect_intent(requests=request_generator())
print("=" * 20)
for response in responses:
print(f'Intermediate transcript: "{response.recognition_result.transcript}".')
# Note: The result from the last response is the final transcript along
# with the detected content.
response = response.detect_intent_response
print(f"Query text: {response.query_result.transcript}")
response_messages = [
" ".join(msg.text.text) for msg in response.query_result.response_messages
]
print(f"Response text: {' '.join(response_messages)}\n")