Scripts/Runtime/Wit.cs (582 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the license found in the
* LICENSE file in the root directory of this source tree.
*/
using UnityEngine;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using Facebook.WitAi.Configuration;
using Facebook.WitAi.Data;
using Facebook.WitAi.Events;
using Facebook.WitAi.Interfaces;
using Facebook.WitAi.Lib;
using UnityEngine.Events;
using UnityEngine.Serialization;
namespace Facebook.WitAi
{
public class Wit : VoiceService, IWitRuntimeConfigProvider
{
[FormerlySerializedAs("runtimeConfiguration")]
[Header("Wit Configuration")]
[FormerlySerializedAs("configuration")]
[Tooltip("The configuration that will be used when activating wit. This includes api key.")]
[SerializeField]
private WitRuntimeConfiguration _runtimeConfiguration = new WitRuntimeConfiguration();
private IAudioInputSource _micInput;
private WitRequestOptions _currentRequestOptions;
private float _lastMinVolumeLevelTime;
private WitRequest _recordingRequest;
private bool _isSoundWakeActive;
private RingBuffer<byte> _micDataBuffer;
private RingBuffer<byte>.Marker _lastSampleMarker;
private byte[] _writeBuffer;
private bool _minKeepAliveWasHit;
private bool _isActive;
private byte[] _byteDataBuffer;
private ITranscriptionProvider _activeTranscriptionProvider;
private Coroutine _timeLimitCoroutine;
// Transcription based endpointing
private bool _receivedTranscription;
private float _lastWordTime;
// Parallel Requests
private HashSet<WitRequest> _transmitRequests = new HashSet<WitRequest>();
private HashSet<WitRequest> _queuedRequests = new HashSet<WitRequest>();
private Coroutine _queueHandler;
#region Interfaces
private IWitByteDataReadyHandler[] _dataReadyHandlers;
private IWitByteDataSentHandler[] _dataSentHandlers;
private Coroutine _micInitCoroutine;
private IDynamicEntitiesProvider[] _dynamicEntityProviders;
#endregion
#if DEBUG_SAMPLE
private FileStream sampleFile;
#endif
/// <summary>
/// Returns true if wit is currently active and listening with the mic
/// </summary>
public override bool Active => _isActive || IsRequestActive;
public override bool IsRequestActive => null != _recordingRequest && _recordingRequest.IsActive;
public WitRuntimeConfiguration RuntimeConfiguration
{
get => _runtimeConfiguration;
set
{
_runtimeConfiguration = value;
InitializeConfig();
}
}
/// <summary>
/// Gets/Sets a custom transcription provider. This can be used to replace any built in asr
/// with an on device model or other provided source
/// </summary>
public override ITranscriptionProvider TranscriptionProvider
{
get => _activeTranscriptionProvider;
set
{
if (null != _activeTranscriptionProvider)
{
_activeTranscriptionProvider.OnFullTranscription.RemoveListener(
OnFullTranscription);
_activeTranscriptionProvider.OnPartialTranscription.RemoveListener(
OnPartialTranscription);
_activeTranscriptionProvider.OnMicLevelChanged.RemoveListener(
OnTranscriptionMicLevelChanged);
_activeTranscriptionProvider.OnStartListening.RemoveListener(
OnMicStartListening);
_activeTranscriptionProvider.OnStoppedListening.RemoveListener(
OnMicStoppedListening);
}
_activeTranscriptionProvider = value;
if (null != _activeTranscriptionProvider)
{
_activeTranscriptionProvider.OnFullTranscription.AddListener(
OnFullTranscription);
_activeTranscriptionProvider.OnPartialTranscription.AddListener(
OnPartialTranscription);
_activeTranscriptionProvider.OnMicLevelChanged.AddListener(
OnTranscriptionMicLevelChanged);
_activeTranscriptionProvider.OnStartListening.AddListener(
OnMicStartListening);
_activeTranscriptionProvider.OnStoppedListening.AddListener(
OnMicStoppedListening);
}
}
}
public override bool MicActive => null != _micInput && _micInput.IsRecording;
protected override bool ShouldSendMicData => _runtimeConfiguration.sendAudioToWit ||
null == _activeTranscriptionProvider;
#region LIFECYCLE
// Find transcription provider & Mic
protected override void Awake()
{
base.Awake();
if (null == _activeTranscriptionProvider &&
_runtimeConfiguration.customTranscriptionProvider)
{
TranscriptionProvider = _runtimeConfiguration.customTranscriptionProvider;
}
_micInput = GetComponent<IAudioInputSource>();
if (_micInput == null)
{
_micInput = gameObject.AddComponent<Mic>();
}
_dataReadyHandlers = GetComponents<IWitByteDataReadyHandler>();
_dataSentHandlers = GetComponents<IWitByteDataSentHandler>();
}
// Add mic delegates
protected override void OnEnable()
{
base.OnEnable();
#if UNITY_EDITOR
// Make sure we have a mic input after a script recompile
if (null == _micInput)
{
_micInput = GetComponent<IAudioInputSource>();
}
#endif
_micInput.OnSampleReady += OnMicSampleReady;
_micInput.OnStartRecording += OnMicStartListening;
_micInput.OnStopRecording += OnMicStoppedListening;
_dynamicEntityProviders = GetComponents<IDynamicEntitiesProvider>();
InitializeConfig();
}
// If always recording, begin now
private void InitializeConfig()
{
if (_runtimeConfiguration.alwaysRecord)
{
StartRecording();
}
}
// Remove mic delegates
protected override void OnDisable()
{
base.OnDisable();
_micInput.OnSampleReady -= OnMicSampleReady;
_micInput.OnStartRecording -= OnMicStartListening;
_micInput.OnStopRecording -= OnMicStoppedListening;
}
#endregion
#region ACTIVATION
/// <summary>
/// Activate the microphone and send data to Wit for NLU processing.
/// </summary>
public override void Activate()
{
Activate(new WitRequestOptions());
}
/// <summary>
/// Activate the microphone and send data to Wit for NLU processing.
/// </summary>
public override void Activate(WitRequestOptions requestOptions)
{
if (!IsConfigurationValid())
{
Debug.LogError("Cannot activate without valid Wit Configuration.");
return;
}
if (_isActive) return;
StopRecording();
if (!_micInput.IsRecording && ShouldSendMicData)
{
_minKeepAliveWasHit = false;
_isSoundWakeActive = true;
#if DEBUG_SAMPLE
var file = Application.dataPath + "/test.pcm";
sampleFile = File.Open(file, FileMode.Create);
Debug.Log("Writing recording to file: " + file);
#endif
StartRecording();
}
if (!_isActive)
{
_activeTranscriptionProvider?.Activate();
_isActive = true;
_lastMinVolumeLevelTime = float.PositiveInfinity;
_currentRequestOptions = requestOptions;
}
}
public override void ActivateImmediately()
{
ActivateImmediately(new WitRequestOptions());
}
public override void ActivateImmediately(WitRequestOptions requestOptions)
{
if (!IsConfigurationValid())
{
Debug.LogError("Cannot activate without valid Wit Configuration.");
return;
}
// Make sure we aren't checking activation time until
// the mic starts recording. If we're already recording for a live
// recording, we just triggered an activation so we will reset the
// last minvolumetime to ensure a minimum time from activation time
_lastMinVolumeLevelTime = float.PositiveInfinity;
_lastWordTime = float.PositiveInfinity;
_receivedTranscription = false;
if (ShouldSendMicData)
{
_recordingRequest = RuntimeConfiguration.witConfiguration.SpeechRequest(requestOptions, _dynamicEntityProviders);
_recordingRequest.audioEncoding = _micInput.AudioEncoding;
_recordingRequest.onPartialTranscription = OnPartialTranscription;
_recordingRequest.onFullTranscription = OnFullTranscription;
_recordingRequest.onInputStreamReady = r => OnWitReadyForData();
_recordingRequest.onResponse += HandleResult;
events.OnRequestCreated?.Invoke(_recordingRequest);
_recordingRequest.Request();
_timeLimitCoroutine = StartCoroutine(DeactivateDueToTimeLimit());
}
if (!_isActive)
{
if (_runtimeConfiguration.alwaysRecord && null != _micDataBuffer)
{
_lastSampleMarker = _micDataBuffer.CreateMarker();
}
_activeTranscriptionProvider?.Activate();
_isActive = true;
}
}
/// <summary>
/// Send text data to Wit.ai for NLU processing
/// </summary>
/// <param name="text">Text to be processed</param>
public override void Activate(string text)
{
Activate(text, new WitRequestOptions());
}
/// <summary>
/// Send text data to Wit.ai for NLU processing
/// </summary>
/// <param name="text">Text to be processed</param>
/// <param name="requestOptions">Additional options</param>
public override void Activate(string text, WitRequestOptions requestOptions)
{
if (!IsConfigurationValid())
{
Debug.LogError("Cannot activate without valid Wit Configuration.");
return;
}
SendTranscription(text, requestOptions);
}
/// <summary>
/// Check configuration, client access token & app id
/// </summary>
public virtual bool IsConfigurationValid()
{
return _runtimeConfiguration.witConfiguration != null &&
!string.IsNullOrEmpty(_runtimeConfiguration.witConfiguration.clientAccessToken);
}
#endregion
#region RECORDING
// Stop any recording
private void StopRecording()
{
if (null != _micInitCoroutine)
{
StopCoroutine(_micInitCoroutine);
_micInitCoroutine = null;
}
if (_micInput.IsRecording && !_runtimeConfiguration.alwaysRecord)
{
_micInput.StopRecording();
_lastSampleMarker = null;
#if DEBUG_SAMPLE
sampleFile.Close();
#endif
}
}
// When wit is ready, start recording
private void OnWitReadyForData()
{
_lastMinVolumeLevelTime = Time.time;
if (!_micInput.IsRecording)
{
StartRecording();
}
}
// Handle begin recording
private void StartRecording()
{
// Stop any init coroutine
if (null != _micInitCoroutine)
{
StopCoroutine(_micInitCoroutine);
_micInitCoroutine = null;
}
// Wait for input and then try again
if (!_micInput.IsInputAvailable)
{
_micInitCoroutine = StartCoroutine(WaitForMic());
events.OnError.Invoke("Input Error", "No input source was available. Cannot activate for voice input.");
}
// Begin recording
else
{
_micInput.StartRecording(_runtimeConfiguration.sampleLengthInMs);
InitializeMicDataBuffer();
}
}
// Wait until mic is available
private IEnumerator WaitForMic()
{
yield return new WaitUntil(() => _micInput.IsInputAvailable);
_micInitCoroutine = null;
StartRecording();
}
// Generate mic data buffer if needed
private void InitializeMicDataBuffer()
{
if (null == _micDataBuffer && _runtimeConfiguration.micBufferLengthInSeconds > 0)
{
_micDataBuffer = new RingBuffer<byte>((int) Mathf.Ceil(2 * _runtimeConfiguration.micBufferLengthInSeconds * 1000 * _runtimeConfiguration.sampleLengthInMs));
_lastSampleMarker = _micDataBuffer.CreateMarker();
}
}
// Callback for mic start
private void OnMicStartListening()
{
events?.OnStartListening?.Invoke();
}
// Callback for mic end
private void OnMicStoppedListening()
{
events?.OnStoppedListening?.Invoke();
}
// Callback for mic sample ready
private void OnMicSampleReady(int sampleCount, float[] sample, float levelMax)
{
if (null == TranscriptionProvider || !TranscriptionProvider.OverrideMicLevel)
{
OnMicLevelChanged(levelMax);
}
if (null != _micDataBuffer)
{
if (_isSoundWakeActive && levelMax > _runtimeConfiguration.soundWakeThreshold)
{
_lastSampleMarker = _micDataBuffer.CreateMarker(
(int) (-_runtimeConfiguration.micBufferLengthInSeconds * 1000 *
_runtimeConfiguration.sampleLengthInMs));
}
byte[] data = Convert(sample);
_micDataBuffer.Push(data, 0, data.Length);
if (data.Length > 0)
{
events.OnByteDataReady?.Invoke(data, 0, data.Length);
for(int i = 0; null != _dataReadyHandlers && i < _dataReadyHandlers.Length; i++)
{
_dataReadyHandlers[i].OnWitDataReady(data, 0, data.Length);
}
}
#if DEBUG_SAMPLE
sampleFile.Write(data, 0, data.Length);
#endif
}
if (IsRequestActive && _recordingRequest.IsRequestStreamActive)
{
if (null != _micDataBuffer && _micDataBuffer.Capacity > 0)
{
if (null == _writeBuffer)
{
_writeBuffer = new byte[sample.Length * 2];
}
// Flush the marker buffer to catch up
int read;
while ((read = _lastSampleMarker.Read(_writeBuffer, 0, _writeBuffer.Length, true)) > 0)
{
_recordingRequest.Write(_writeBuffer, 0, read);
events.OnByteDataSent?.Invoke(_writeBuffer, 0, read);
for (int i = 0; null != _dataSentHandlers && i < _dataSentHandlers.Length; i++)
{
_dataSentHandlers[i].OnWitDataSent(_writeBuffer, 0, read);
}
}
}
else
{
byte[] sampleBytes = Convert(sample);
_recordingRequest.Write(sampleBytes, 0, sampleBytes.Length);
}
if (_receivedTranscription)
{
if (Time.time - _lastWordTime >
_runtimeConfiguration.minTranscriptionKeepAliveTimeInSeconds)
{
Debug.Log("Deactivated due to inactivity. No new words detected.");
DeactivateRequest(events.OnStoppedListeningDueToInactivity);
}
}
else if (Time.time - _lastMinVolumeLevelTime >
_runtimeConfiguration.minKeepAliveTimeInSeconds)
{
Debug.Log("Deactivated input due to inactivity.");
DeactivateRequest(events.OnStoppedListeningDueToInactivity);
}
}
else if (_isSoundWakeActive && levelMax > _runtimeConfiguration.soundWakeThreshold)
{
events.OnMinimumWakeThresholdHit?.Invoke();
_isSoundWakeActive = false;
ActivateImmediately(_currentRequestOptions);
}
}
// Convert
private byte[] Convert(float[] samples)
{
var sampleCount = samples.Length;
if (null == _byteDataBuffer || _byteDataBuffer.Length != sampleCount)
{
_byteDataBuffer = new byte[sampleCount * 2];
}
int rescaleFactor = 32767; //to convert float to Int16
for (int i = 0; i < sampleCount; i++)
{
short data = (short) (samples[i] * rescaleFactor);
_byteDataBuffer[i * 2] = (byte) data;
_byteDataBuffer[i * 2 + 1] = (byte) (data >> 8);
}
return _byteDataBuffer;
}
// Mic level change
private void OnMicLevelChanged(float level)
{
if (level > _runtimeConfiguration.minKeepAliveVolume)
{
_lastMinVolumeLevelTime = Time.time;
_minKeepAliveWasHit = true;
}
events.OnMicLevelChanged?.Invoke(level);
}
// Mic level changed in transcription
private void OnTranscriptionMicLevelChanged(float level)
{
if (null != TranscriptionProvider && TranscriptionProvider.OverrideMicLevel)
{
OnMicLevelChanged(level);
}
}
#endregion
#region DEACTIVATION
/// <summary>
/// Stop listening and submit the collected microphone data to wit for processing.
/// </summary>
public override void Deactivate()
{
DeactivateRequest(_micInput.IsRecording ? events.OnStoppedListeningDueToDeactivation : null, false);
}
/// <summary>
/// Stop listening and abort any requests that may be active without waiting for a response.
/// </summary>
public override void DeactivateAndAbortRequest()
{
events.OnAborting.Invoke();
DeactivateRequest(_micInput.IsRecording ? events.OnStoppedListeningDueToDeactivation : null, true);
}
// Stop listening if time expires
private IEnumerator DeactivateDueToTimeLimit()
{
yield return new WaitForSeconds(_runtimeConfiguration.maxRecordingTime);
if (IsRequestActive)
{
Debug.Log($"Deactivated input due to timeout.\nMax Record Time: {_runtimeConfiguration.maxRecordingTime}");
DeactivateRequest(events.OnStoppedListeningDueToTimeout, false);
}
}
private void DeactivateRequest(UnityEvent onComplete = null, bool abort = false)
{
// Stop timeout coroutine
if (null != _timeLimitCoroutine)
{
StopCoroutine(_timeLimitCoroutine);
_timeLimitCoroutine = null;
}
// Stop recording
StopRecording();
_micDataBuffer?.Clear();
_writeBuffer = null;
// Deactivate transcription provider
_activeTranscriptionProvider?.Deactivate();
// Deactivate recording request
bool isRecordingRequestActive = IsRequestActive;
DeactivateWitRequest(_recordingRequest, abort);
// Abort transmitting requests
if (abort)
{
AbortQueue();
foreach (var request in _transmitRequests)
{
DeactivateWitRequest(request, true);
}
_transmitRequests.Clear();
}
// Transmit recording request
else if (isRecordingRequestActive && _minKeepAliveWasHit)
{
_transmitRequests.Add(_recordingRequest);
_recordingRequest = null;
events.OnMicDataSent?.Invoke();
}
// Disable below event
_minKeepAliveWasHit = false;
// No longer active
_isActive = false;
// Perform on complete event
onComplete?.Invoke();
}
// Deactivate wit request
private void DeactivateWitRequest(WitRequest request, bool abort)
{
if (request != null && request.IsActive)
{
if (abort)
{
request.AbortRequest();
}
else
{
request.CloseRequestStream();
}
}
}
#endregion
#region TRANSCRIPTION
private void OnPartialTranscription(string transcription)
{
// Clear record data
_receivedTranscription = true;
_lastWordTime = Time.time;
// Delegate
events.OnPartialTranscription.Invoke(transcription);
}
private void OnFullTranscription(string transcription)
{
// End existing request
DeactivateRequest(null);
// Delegate
events.OnFullTranscription?.Invoke(transcription);
// Send transcription
if (_runtimeConfiguration.customTranscriptionProvider)
{
SendTranscription(transcription, new WitRequestOptions());
}
}
private void SendTranscription(string transcription, WitRequestOptions requestOptions)
{
// Create request & add response delegate
WitRequest request = RuntimeConfiguration.witConfiguration.MessageRequest(transcription, requestOptions, _dynamicEntityProviders);
request.onResponse += HandleResult;
// Call on create delegate
events.OnRequestCreated?.Invoke(request);
// Add to queue
AddToQueue(request);
}
#endregion
#region QUEUE
// Add request to wait queue
private void AddToQueue(WitRequest request)
{
// In editor or disabled, do not queue
if (!Application.isPlaying || _runtimeConfiguration.maxConcurrentRequests <= 0)
{
_transmitRequests.Add(request);
request.Request();
return;
}
// Add to queue
_queuedRequests.Add(request);
// If not running, begin
if (_queueHandler == null)
{
_queueHandler = StartCoroutine(PerformDequeue());
}
}
// Abort request
private void AbortQueue()
{
if (_queueHandler != null)
{
StopCoroutine(_queueHandler);
_queueHandler = null;
}
foreach (var request in _queuedRequests)
{
DeactivateWitRequest(request, true);
}
_queuedRequests.Clear();
}
// Coroutine used to send transcriptions when possible
private IEnumerator PerformDequeue()
{
// Perform until no requests remain
while (_queuedRequests.Count > 0)
{
// Wait a frame to space out requests
yield return new WaitForEndOfFrame();
// If space, dequeue & request
if (_transmitRequests.Count < _runtimeConfiguration.maxConcurrentRequests)
{
// Dequeue
WitRequest request = _queuedRequests.First();
_queuedRequests.Remove(request);
// Transmit
_transmitRequests.Add(request);
request.Request();
}
}
// Kill coroutine
_queueHandler = null;
}
#endregion
#region RESPONSE
/// <summary>
/// Main thread call to handle result callbacks
/// </summary>
private void HandleResult(WitRequest request)
{
// If result is obtained before transcription
if (request == _recordingRequest)
{
DeactivateRequest(null, false);
}
// Handle success
if (request.StatusCode == (int) HttpStatusCode.OK)
{
if (null != request.ResponseData)
{
events?.OnResponse?.Invoke(request.ResponseData);
}
else
{
events?.OnError?.Invoke("No Data", "No data was returned from the server.");
}
}
// Handle failure
else
{
if (request.StatusCode != WitRequest.ERROR_CODE_ABORTED)
{
events?.OnError?.Invoke("HTTP Error " + request.StatusCode,
request.StatusDescription);
}
else
{
events?.OnAborted?.Invoke();
}
}
// Remove from transmit list, missing if aborted
if ( _transmitRequests.Contains(request))
{
_transmitRequests.Remove(request);
}
// Complete delegate
events?.OnRequestCompleted?.Invoke();
}
#endregion
}
public interface IWitRuntimeConfigProvider
{
WitRuntimeConfiguration RuntimeConfiguration { get; }
}
}