in src/AIHub/Controllers/AudioTranscriptionController.cs [34:121]
public async Task<IActionResult> TranscribeAudio(string audio_url)
{
string audio = audio_url + sasUri.Query;
// CALL 1: STT 3.1
var request = new HttpRequestMessage(HttpMethod.Post, "https://" + speechRegion + ".api.cognitive.microsoft.com/speechtotext/v3.1/transcriptions");
request.Headers.Add("Ocp-Apim-Subscription-Key", speechSubscriptionKey);
var requestBody = new
{
contentUrls = new[] { audio },
locale = "es-es",
displayName = "My Transcription",
model = (string?)null,
properties = new
{
wordLevelTimestampsEnabled = true,
languageIdentification = new
{
candidateLocales = new[] { "en-US", "de-DE", "es-ES" }
}
}
};
Console.WriteLine(JsonSerializer.Serialize(requestBody));
request.Content = new StringContent(JsonSerializer.Serialize(requestBody), Encoding.UTF8, "application/json");
var response = await httpClient.SendAsync(request);
response.EnsureSuccessStatusCode();
var responsejson = JsonSerializer.Deserialize<JsonObject>(await response.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson["self"]!.ToString());
if (responsejson["self"] == null || responsejson["self"]!.ToString() == string.Empty)
{
ViewBag.Message = "Error in the transcription process";
return View("AudioTranscription", model);
}
var output_result = responsejson["self"]!.ToString();
Console.WriteLine("SELF: " + output_result);
// CALL 2: CHECK FOR FINISH
var request2 = new HttpRequestMessage(HttpMethod.Get, output_result);
httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", speechSubscriptionKey);
var content2 = new StringContent(string.Empty);
content2.Headers.ContentType = new MediaTypeHeaderValue("application/json");
request2.Content = content2;
var response2 = await httpClient.SendAsync(request2);
response2.EnsureSuccessStatusCode();
//Console.WriteLine(await response2.Content.ReadAsStringAsync());
var responsejson2 = JsonSerializer.Deserialize<JsonObject>(await response.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson2);
while (responsejson2["status"]!.ToString() != "Succeeded")
{
Thread.Sleep(10000);
response2 = await httpClient.GetAsync(output_result);
responsejson2 = JsonSerializer.Deserialize<JsonObject>(await response2.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson2["status"]!.ToString());
}
// CALL 3: GET RESULTS URL
var request3 = new HttpRequestMessage(HttpMethod.Get, output_result + "/files/");
request3.Headers.Add("Ocp-Apim-Subscription-Key", speechSubscriptionKey);
var content3 = new StringContent(string.Empty);
content3.Headers.ContentType = new MediaTypeHeaderValue("application/json");
request3.Content = content3;
var response3 = await httpClient.SendAsync(request3);
response3.EnsureSuccessStatusCode();
var responsejson3 = JsonSerializer.Deserialize<JsonObject>(await response3.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson3);
// Extract contentUrl field
string output_result3 = (string)responsejson3["values"]![0]!["links"]!["contentUrl"]!;
Console.WriteLine(output_result3);
// CALL 4: GET RESULTS (TRANSCRIPTION)
var request4 = new HttpRequestMessage(HttpMethod.Get, output_result3);
request4.Headers.Add("Ocp-Apim-Subscription-Key", speechSubscriptionKey);
var content4 = new StringContent(string.Empty);
content4.Headers.ContentType = new MediaTypeHeaderValue("application/json");
request4.Content = content4;
var response4 = await httpClient.SendAsync(request4);
response4.EnsureSuccessStatusCode();
Console.WriteLine(await response4.Content.ReadAsStringAsync());
var jsonObject4 = JsonSerializer.Deserialize<JsonObject>(await response4.Content.ReadAsStringAsync())!;
string output_result4 = (string)jsonObject4["combinedRecognizedPhrases"]![0]!["lexical"]!;
Console.WriteLine(output_result4);
// Show transcript results
model.Message = output_result4;
ViewBag.Message = "TRANSCRIPTION RESULTS: \n\n" + output_result4;
// return View("AudioTranscription", model);
return Ok(model);
}