in src/AIHub/Controllers/VideoAnalyzerController.cs [116:217]
public async Task<IActionResult> DenseCaptionVideo(string video_url, string prompt)
{
string GPT4V_ENDPOINT = $"{AOAIendpoint}openai/deployments/{AOAIDeploymentName}/extensions/chat/completions?api-version=2023-07-01-preview"; //2024-02-15-preview";
string VISION_API_ENDPOINT = $"{Visionendpoint}computervision";
string VISION_API_KEY = VisionKey;
string VIDEO_INDEX_NAME = Regex.Replace(video_url.Split("/").Last().Split(".").First().GetHashCode().ToString(), "[^a-zA-Z0-9]", "");
string VIDEO_FILE_SAS_URL = video_url + sasUri.Query;
// Step 1: Create an Index
var response = await CreateVideoIndex(VISION_API_ENDPOINT, VISION_API_KEY, VIDEO_INDEX_NAME);
Console.WriteLine(response.StatusCode);
Console.WriteLine(await response.Content.ReadAsStringAsync());
// Step 2: Add a video file to the index
response = await AddVideoToIndex(VISION_API_ENDPOINT, VISION_API_KEY, VIDEO_INDEX_NAME, VIDEO_FILE_SAS_URL, VIDEO_DOCUMENT_ID);
Console.WriteLine(response.StatusCode);
Console.WriteLine(await response.Content.ReadAsStringAsync());
// Step 3: Wait for ingestion to complete
if (!await WaitForIngestionCompletion(VISION_API_ENDPOINT, VISION_API_KEY, VIDEO_INDEX_NAME))
{
Console.WriteLine("Ingestion did not complete within the expected time.");
}
if (string.IsNullOrEmpty(AOAIsubscriptionKey))
{
var credential = new DefaultAzureCredential();
httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", credential.GetToken(new TokenRequestContext(["https://cognitiveservices.azure.com/.default"])).Token);
}
else
{
httpClient.DefaultRequestHeaders.Add("api-key", AOAIsubscriptionKey);
}
var payload = new
{
model = "gpt-4-vision-preview",
dataSources = new[]
{
new
{
type = "AzureComputerVisionVideoIndex",
parameters = new
{
computerVisionBaseUrl = VISION_API_ENDPOINT,
computerVisionApiKey = VISION_API_KEY,
indexName = VIDEO_INDEX_NAME,
videoUrls = new[] { VIDEO_FILE_SAS_URL }
}
}
},
enhancements = new
{
video = new { enabled = true }
},
messages = new object[]
{
new {
role = "system",
content = new object[]
{
"You are an AI assistant that helps people find information."
}
},
new {
role = "user",
content = new object[]
{
new {
type = "acv_document_id",
acv_document_id = VIDEO_DOCUMENT_ID
},
new {
type = "text",
text = prompt
}
},
}
},
temperature = 0.7,
top_p = 0.95,
max_tokens = 4096
};
var content = "";
try
{
var chatResponse = await httpClient.PostAsync(GPT4V_ENDPOINT, new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"));
chatResponse.EnsureSuccessStatusCode();
content = await chatResponse.Content.ReadAsStringAsync();
var responseContent = JsonSerializer.Deserialize<JsonObject>(content);
Console.WriteLine(responseContent);
model.Message = responseContent?["choices"]?[0]?["message"]?["content"]?.ToString();
model.Video = VIDEO_FILE_SAS_URL;
}
catch (Exception e)
{
Console.WriteLine($"Error after GPT4V: {e.Message} with content: {content}");
}
return View("VideoAnalyzer", model);
}