# Setup

Go through this section before starting the tests.

In [None]:
#@title Install dependecy libraries

#@markdown Run this block and you may need to restart the Colab runtime,
#@markdown after which skip to the next block.

!pip install --upgrade pip
!pip install --upgrade google-cloud-dialogflow
!pip install --upgrade google-cloud-dialogflow-cx
!pip install --upgrade pydub

In [None]:
#@title Common library { display-mode: "form" }

from datetime import datetime
from google.cloud import dialogflow_v2beta1
from google.cloud import dialogflowcx_v3beta1
from math import floor
from pydub import AudioSegment
from time import sleep


def YieldAudioChunks(audio_file, sample_rate, sample_width, chunk_duration):
  clip = AudioSegment.from_raw(
      audio_file,
      frame_rate=sample_rate,
      sample_width=sample_width,
      channels=1)
  duration = len(clip)
  start_offset = 0
  while start_offset < duration:
    yield clip[start_offset:start_offset + chunk_duration].raw_data
    start_offset += chunk_duration
    sleep(chunk_duration / 1000)


def CallUnaryAPI(method, request):
  print('[%s] Sending request:' %  datetime.now())
  print(request)
  response = method(request=request)
  print('[%s] Received response:' % datetime.now())
  print(response)


def CallStreamingAPI(method, request_generator):
  def send_requests():
    for request in request_generator():
      print('[%s] Sending request:' %  datetime.now())
      print(request)
      yield request

  for response in method(requests=send_requests()):
    print('[%s] Received response:' % datetime.now())
    print(response)



In [None]:
#@title Authentication { display-mode: "form" }

#@markdown Run this block to authenticate yourself.
#
#@markdown This will be the credential to call GCP API. Make sure you have the permission for accessing the resources of the project you're going to test.

from google import auth as google_auth
from google.colab import auth

auth.authenticate_user()
credentials, project_id = google_auth.default(
    scopes=['https://www.googleapis.com/auth/cloud-platform'])

In [None]:
#@title Mount Google Drive { display-mode: "form" }

#@markdown Mount your Google Drive to `/content/gdrive` folder. When this is done, find
#@markdown your folder in Colab's Files tab to the left.

from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

#@markdown Later, you may refer to your files in the Google Driver as testing
#@markdown data.

In [None]:
#@title Gather Basic Info { display-mode: "form", run: "auto" }

#@markdown GCP project you're going to test
PROJECT_ID = 'tianzhu-test' #@param {type:"string"}
#@markdown GCP location, e.g. 'global' or 'us-east1'
LOCATION_ID = 'global' #@param {type:"string"}

# Test *DetectIntent

This API does not require creating up the resources for the session: simply specify a unique session ID and start the talk with the virtual agent.

In [None]:
#@title DetectIntent { display-mode: "form" }

#@markdown Dialogflow version, ES or CX.
DF_VERSION = 'CX' #@param ["ES", "CX"]
#@markdown Agent ID (CX only)
AGENT_ID = '38c93a4f-37cd-4123-a75d-e0ad78120c97' #@param { type: "string" }
#@markdown Environment ID
ENVIRONMENT_ID = 'draft' #@param { type: "string" }
#@markdown User ID (ES only)
USER_ID = '-' #@param { type: "string" }
#@markdown Session ID, may use any one unique for the purpose of testing.
SESSION_ID = 'test' #@param { type: "string" }
#@markdown Input type, text or audio.
INPUT_TYPE = 'AUDIO' #@param ["TEXT", "AUDIO"]
#@markdown Language of the input, e.g. 'en-us'
LANGUAGE_CODE = 'en-us' #@param { type: "string" }
#@markdown Input, the text query or the file of audio clip
INPUT = '/content/gdrive/MyDrive/hello.raw' #@param { type: "string" }
#@markdown audio format, MULAW or LINEAR16
AUDIO_FORMAT = 'LINEAR16' #@param ["MULAW", "LINEAR16"]
#@markdown Sample rate, e.g. 16000
SAMPLE_RATE = 8000 #@param { type: "integer" }
#@markdown Speech model, e.g. 'phone_call'
SPEECH_MODEL = 'phone_call' #@param { type: "string" }
#@markdown Whether to use enhanced speech model. (Must enable speech logging in
#@markdown the agent settings.)
USE_ENHANCED_SPEECH_MODEL = True #@param { type: "boolean" }


if DF_VERSION == 'ES':
  client = dialogflow_v2beta1.SessionsClient(
      credentials=credentials,
      client_options={
          'quota_project_id': PROJECT_ID
      })
  session=(
      'projects/%s/locations/%s/agent/environments/%s/users/%s/sessions/%s' %
          (PROJECT_ID, LOCATION_ID, ENVIRONMENT_ID, USER_ID, SESSION_ID))

  if INPUT_TYPE == 'TEXT':
    request = dialogflow_v2beta1.DetectIntentRequest(
        session=session,
        query_input = dialogflow_v2beta1.QueryInput(
            text=dialogflow_v2beta1.TextInput(
                language_code=LANGUAGE_CODE,
                text=INPUT)))
  else:
    request = dialogflow_v2beta1.DetectIntentRequest(
        session=session,
        query_input=dialogflow_v2beta1.QueryInput(
            audio_config=dialogflow_v2beta1.InputAudioConfig(
                audio_encoding=(
                    dialogflow_v2beta1.AudioEncoding.AUDIO_ENCODING_MULAW
                    if AUDIO_FORMAT == 'MULAW'
                    else dialogflow_v2beta1.AudioEncoding
                        .AUDIO_ENCODING_LINEAR_16),
                sample_rate_hertz=SAMPLE_RATE,
                language_code=LANGUAGE_CODE,
                model=SPEECH_MODEL,
                model_variant=(
                    dialogflow_v2beta1.SpeechModelVariant.USE_ENHANCED
                    if USE_ENHANCED_SPEECH_MODEL
                    else dialogflow_v2beta1.SpeechModelVariant.USE_STANDARD))),
        input_audio=open(INPUT, 'rb').read())

else:
  client = dialogflowcx_v3beta1.SessionsClient(
      credentials=credentials,
      client_options={
          'quota_project_id': PROJECT_ID
      })
  session='projects/%s/locations/%s/agents/%s/environments/%s/sessions/%s' % (
            PROJECT_ID, LOCATION_ID, AGENT_ID, ENVIRONMENT_ID, SESSION_ID)

  if INPUT_TYPE == 'TEXT':
    request = dialogflowcx_v3beta1.DetectIntentRequest(
        session=session,
        query_input = dialogflowcx_v3beta1.QueryInput(
            text=dialogflowcx_v3beta1.TextInput(text=INPUT),
            language_code=LANGUAGE_CODE))
  else:
    request = dialogflowcx_v3beta1.DetectIntentRequest(
        session=session,
        query_input=dialogflowcx_v3beta1.QueryInput(
            audio=dialogflowcx_v3beta1.AudioInput(
                  config=dialogflowcx_v3beta1.InputAudioConfig(
                      audio_encoding=(
                          dialogflowcx_v3beta1.AudioEncoding
                              .AUDIO_ENCODING_MULAW
                          if AUDIO_FORMAT == 'MULAW'
                          else dialogflowcx_v3beta1.AudioEncoding
                              .AUDIO_ENCODING_LINEAR_16),
                      sample_rate_hertz=SAMPLE_RATE,
                      model=SPEECH_MODEL,
                      model_variant=(
                          dialogflowcx_v3beta1.SpeechModelVariant.USE_ENHANCED
                          if USE_ENHANCED_SPEECH_MODEL
                          else dialogflowcx_v3beta1.SpeechModelVariant
                              .USE_STANDARD)),
                audio=open(INPUT, 'rb').read()),
            language_code=LANGUAGE_CODE))

CallUnaryAPI(client.detect_intent, request)


In [None]:
#@title StreamingDetectIntent { display-mode: "form" }

#@markdown Dialogflow version, ES or CX.
DF_VERSION = 'CX' #@param ["ES", "CX"]
#@markdown Agent ID (CX only)
AGENT_ID = '38c93a4f-37cd-4123-a75d-e0ad78120c97' #@param { type: "string" }
#@markdown Environment ID
ENVIRONMENT_ID = 'draft' #@param { type: "string" }
#@markdown User ID (ES only)
USER_ID = '-' #@param { type: "string" }
#@markdown Session ID, may use any one unique for the purpose of testing.
SESSION_ID = 'test' #@param { type: "string" }
#@markdown Input type, text or audio.
INPUT_TYPE = 'TEXT' #@param ["TEXT", "AUDIO"]
#@markdown Language of the input, e.g. 'en-us'
LANGUAGE_CODE = 'en-us' #@param { type: "string" }
#@markdown Input, the text query or the file of audio clip
INPUT = '/content/gdrive/MyDrive/hello.raw' #@param { type: "string" }
#@markdown audio format, MULAW or LINEAR16
AUDIO_FORMAT = 'LINEAR16' #@param ["MULAW", "LINEAR16"]
#@markdown Sample rate, e.g. 16000
SAMPLE_RATE = 8000 #@param { type: "integer" }
#@markdown Speech model, e.g. 'phone_call'
SPEECH_MODEL = 'phone_call' #@param { type: "string" }
#@markdown Whether to use enhanced speech model. (Must enable speech logging in
#@markdown the agent settings.)
USE_ENHANCED_SPEECH_MODEL = True #@param { type: "boolean" }
#@markdown Whether to use single utterance mode
USE_SINGLE_UTTERANCE_MODE = True #@param { type: "boolean" }
#@markdown Audio chunk duration in ms.
CHUNK_DURATION = 100 #@param { type: "integer" }


if DF_VERSION == 'ES':
  client = dialogflow_v2beta1.SessionsClient(
      credentials=credentials,
      client_options={
          'quota_project_id': PROJECT_ID
      })
  session=(
      'projects/%s/locations/%s/agent/environments/%s/users/%s/sessions/%s' %
          (PROJECT_ID, LOCATION_ID, ENVIRONMENT_ID, USER_ID, SESSION_ID))

  if INPUT_TYPE == 'TEXT':
    def yield_requests():
      yield dialogflow_v2beta1.StreamingDetectIntentRequest(
          session=session,
          query_input = dialogflow_v2beta1.QueryInput(
              text=dialogflow_v2beta1.TextInput(
                  language_code=LANGUAGE_CODE,
                  text=INPUT)))
  else:
    def yield_requests():
      yield dialogflow_v2beta1.StreamingDetectIntentRequest(
          session=session,
          query_input=dialogflow_v2beta1.QueryInput(
              audio_config=dialogflow_v2beta1.InputAudioConfig(
                  audio_encoding=(
                      dialogflow_v2beta1.AudioEncoding.AUDIO_ENCODING_MULAW
                      if AUDIO_FORMAT == 'MULAW'
                      else dialogflow_v2beta1.AudioEncoding
                          .AUDIO_ENCODING_LINEAR_16),
                  sample_rate_hertz=SAMPLE_RATE,
                  language_code=LANGUAGE_CODE,
                  model=SPEECH_MODEL,
                  model_variant=(
                      dialogflow_v2beta1.SpeechModelVariant.USE_ENHANCED
                      if USE_ENHANCED_SPEECH_MODEL
                      else dialogflow_v2beta1.SpeechModelVariant
                          .USE_STANDARD),
                  single_utterance=USE_SINGLE_UTTERANCE_MODE)))

      for chunk in YieldAudioChunks(
          open(INPUT, 'rb'),
          sample_rate=SAMPLE_RATE,
          sample_width=1 if AUDIO_FORMAT == 'MULAW' else 2,
          chunk_duration=CHUNK_DURATION):
        yield dialogflow_v2beta1.StreamingDetectIntentRequest(
            input_audio=chunk)

else:
  client = dialogflowcx_v3beta1.SessionsClient(
      credentials=credentials,
      client_options={
          'quota_project_id': PROJECT_ID
      })
  session='projects/%s/locations/%s/agents/%s/environments/%s/sessions/%s' % (
            PROJECT_ID, LOCATION_ID, AGENT_ID, ENVIRONMENT_ID, SESSION_ID)

  if INPUT_TYPE == 'TEXT':
    def yield_requests():
      yield dialogflowcx_v3beta1.StreamingDetectIntentRequest(
          session=session,
          query_input = dialogflowcx_v3beta1.QueryInput(
              text=dialogflowcx_v3beta1.TextInput(text=INPUT),
              language_code=LANGUAGE_CODE))
  else:
    def yield_requests():
      yield dialogflowcx_v3beta1.StreamingDetectIntentRequest(
          session=session,
          query_input=dialogflowcx_v3beta1.QueryInput(
              audio=dialogflowcx_v3beta1.AudioInput(
                    config=dialogflowcx_v3beta1.InputAudioConfig(
                        audio_encoding=(
                            dialogflowcx_v3beta1.AudioEncoding
                                .AUDIO_ENCODING_MULAW
                            if AUDIO_FORMAT == 'MULAW'
                            else dialogflowcx_v3beta1.AudioEncoding
                                .AUDIO_ENCODING_LINEAR_16),
                        sample_rate_hertz=SAMPLE_RATE,
                        model=SPEECH_MODEL,
                        model_variant=(
                            dialogflowcx_v3beta1.SpeechModelVariant.USE_ENHANCED
                            if USE_ENHANCED_SPEECH_MODEL
                            else dialogflowcx_v3beta1.SpeechModelVariant
                                .USE_STANDARD))),
              language_code=LANGUAGE_CODE))

      for chunk in YieldAudioChunks(
          open(INPUT, 'rb'),
          sample_rate=SAMPLE_RATE,
          sample_width=1 if AUDIO_FORMAT == 'MULAW' else 2,
          chunk_duration=CHUNK_DURATION):
        yield dialogflowcx_v3beta1.StreamingDetectIntentRequest(
          query_input=dialogflowcx_v3beta1.QueryInput(
              audio=dialogflowcx_v3beta1.AudioInput(audio=chunk),
              language_code=LANGUAGE_CODE))

CallStreamingAPI(client.streaming_detect_intent, yield_requests)

# Test *AnalyzeContent

To use this API, we first have to set up Conversation and Participant resources. Then call the API on the created participant resource.

In [None]:
#@title Set up a new conversation { display-mode: "form" }

#@markdown Dialogflow conversation profile to use to create the conversation
CONVERSATION_PROFILE_ID = '2rjtTcLFRAmqCiNDh5gQzQ' #@param {type:"string"}

conversations_client = dialogflow_v2beta1.ConversationsClient(
    credentials=credentials,
    client_options={
        'quota_project_id': PROJECT_ID
    })
conversation_name = conversations_client.create_conversation(
    request=dialogflow_v2beta1.CreateConversationRequest(
        parent='projects/%s/locations/%s' % (PROJECT_ID, LOCATION_ID),
        conversation=dialogflow_v2beta1.Conversation(
            conversation_profile=
                'projects/%s/locations/%s/conversationProfiles/%s' %
                (PROJECT_ID, LOCATION_ID, CONVERSATION_PROFILE_ID)))).name
participants_client = dialogflow_v2beta1.ParticipantsClient(
    credentials=credentials,
    client_options={
        'quota_project_id': PROJECT_ID
    })
participant_name = participants_client.create_participant(
    request=dialogflow_v2beta1.CreateParticipantRequest(
        parent=conversation_name,
        participant=dialogflow_v2beta1.Participant(
            role=dialogflow_v2beta1.Participant.Role.END_USER))).name

print('Created conversation: %s' % conversation_name)
print('Created participant: %s' % participant_name)



In [None]:
#@title AnalyzeContent { display-mode: "form" }

#@markdown Input type, text or audio.
INPUT_TYPE = 'TEXT' #@param ["TEXT", "AUDIO", "EVENT"]
#@markdown Language of the input, e.g. 'en-us'
LANGUAGE_CODE = 'en-us' #@param { type: "string" }
#@markdown Input, the text query or the file of audio clip
INPUT = '/content/gdrive/MyDrive/hello.raw' #@param { type: "string" }
#@markdown audio format, MULAW or LINEAR16
AUDIO_FORMAT = 'LINEAR16' #@param ["MULAW", "LINEAR16"]
#@markdown Sample rate, e.g. 16000
SAMPLE_RATE = 8000 #@param { type: "integer" }
#@markdown Speech model, e.g. 'phone_call'
SPEECH_MODEL = 'phone_call' #@param { type: "string" }
#@markdown Whether to use enhanced speech model.
USE_ENHANCED_SPEECH_MODEL = True #@param { type: "boolean" }

client = dialogflow_v2beta1.ParticipantsClient(
    credentials=credentials,
    client_options={
        'quota_project_id': PROJECT_ID
    })

if INPUT_TYPE == 'TEXT':
  request = dialogflow_v2beta1.AnalyzeContentRequest(
      participant=participant_name,
      text_input=dialogflow_v2beta1.TextInput(
          text=INPUT, language_code=LANGUAGE_CODE))
elif INPUT_TYPE == 'EVENT':
  request = dialogflow_v2beta1.AnalyzeContentRequest(
      participant=participant_name,
      event_input=dialogflow_v2beta1.EventInput(
          name=INPUT, language_code=LANGUAGE_CODE))
else:
  request = dialogflow_v2beta1.AnalyzeContentRequest(
      participant=participant_name,
      audio_input=dialogflow_v2beta1.AudioInput(
          config=dialogflow_v2beta1.InputAudioConfig(
              language_code=LANGUAGE_CODE,
              audio_encoding=(
                  dialogflow_v2beta1.AudioEncoding.AUDIO_ENCODING_MULAW
                  if AUDIO_FORMAT == 'MULAW'
                  else dialogflow_v2beta1.AudioEncoding
                      .AUDIO_ENCODING_LINEAR_16),
              sample_rate_hertz=SAMPLE_RATE,
              model=SPEECH_MODEL,
              model_variant=(
                  dialogflow_v2beta1.SpeechModelVariant.USE_ENHANCED
                  if USE_ENHANCED_SPEECH_MODEL
                  else dialogflow_v2beta1.SpeechModelVariant.USE_STANDARD)),
          audio=open(INPUT, 'rb').read()))

CallUnaryAPI(client.analyze_content, request)

In [None]:
#@title StreamingAnalyzeContent { display-mode: "form" }

#@markdown Input type, text or audio.
INPUT_TYPE = 'TEXT' #@param ["TEXT", "AUDIO"]
#@markdown Language of the input, e.g. 'en-us'
LANGUAGE_CODE = 'en-us' #@param { type: "string" }
#@markdown Input, the text query or the file of audio clip
INPUT = '/content/gdrive/MyDrive/hello.raw' #@param { type: "string" }
#@markdown audio format, MULAW or LINEAR16
AUDIO_FORMAT = 'LINEAR16' #@param ["MULAW", "LINEAR16"]
#@markdown Sample rate, e.g. 16000
SAMPLE_RATE = 8000 #@param { type: "integer" }
#@markdown Speech model, e.g. 'phone_call'
SPEECH_MODEL = 'phone_call' #@param { type: "string" }
#@markdown Whether to use enhanced speech model.
USE_ENHANCED_SPEECH_MODEL = True #@param { type: "boolean" }
#@markdown Whether to use single utterance mode
USE_SINGLE_UTTERANCE_MODE = True #@param { type: "boolean" }
#@markdown Audio chunk duration in ms.
CHUNK_DURATION = 100 #@param { type: "integer" }

client = dialogflow_v2beta1.ParticipantsClient(
    credentials=credentials,
    client_options={
        'quota_project_id': PROJECT_ID
    })

if INPUT_TYPE == 'TEXT':
  def yield_requests():
    yield dialogflow_v2beta1.StreamingAnalyzeContentRequest(
        participant=participant_name,
        text_config=dialogflow_v2beta1.InputTextConfig(
            language_code=LANGUAGE_CODE))
    yield dialogflow_v2beta1.StreamingAnalyzeContentRequest(input_text=INPUT)
else:
  def yield_requests():
    yield dialogflow_v2beta1.StreamingAnalyzeContentRequest(
        participant=participant_name,
        audio_config=dialogflow_v2beta1.InputAudioConfig(
            language_code=LANGUAGE_CODE,
            audio_encoding=(
                dialogflow_v2beta1.AudioEncoding.AUDIO_ENCODING_MULAW
                if AUDIO_FORMAT == 'MULAW'
                else dialogflow_v2beta1.AudioEncoding
                    .AUDIO_ENCODING_LINEAR_16),
            sample_rate_hertz=SAMPLE_RATE,
            model=SPEECH_MODEL,
            model_variant=(
                dialogflow_v2beta1.SpeechModelVariant.USE_ENHANCED
                if USE_ENHANCED_SPEECH_MODEL
                else dialogflow_v2beta1.SpeechModelVariant.USE_STANDARD),
            single_utterance=USE_SINGLE_UTTERANCE_MODE))

    for chunk in YieldAudioChunks(
        open(INPUT, 'rb'),
        sample_rate=SAMPLE_RATE,
        sample_width=1 if AUDIO_FORMAT == 'MULAW' else 2,
        chunk_duration=CHUNK_DURATION):
      yield dialogflow_v2beta1.StreamingAnalyzeContentRequest(input_audio=chunk)

CallStreamingAPI(client.streaming_analyze_content, yield_requests)