public async Task DenseCaptionVideo()

in src/AIHub/Controllers/VideoAnalyzerController.cs [116:217]


    public async Task<IActionResult> DenseCaptionVideo(string video_url, string prompt)
    {
        string GPT4V_ENDPOINT = $"{AOAIendpoint}openai/deployments/{AOAIDeploymentName}/extensions/chat/completions?api-version=2023-07-01-preview"; //2024-02-15-preview";
        string VISION_API_ENDPOINT = $"{Visionendpoint}computervision";
        string VISION_API_KEY = VisionKey;
        string VIDEO_INDEX_NAME = Regex.Replace(video_url.Split("/").Last().Split(".").First().GetHashCode().ToString(), "[^a-zA-Z0-9]", "");
        string VIDEO_FILE_SAS_URL = video_url + sasUri.Query;

        // Step 1: Create an Index
        var response = await CreateVideoIndex(VISION_API_ENDPOINT, VISION_API_KEY, VIDEO_INDEX_NAME);
        Console.WriteLine(response.StatusCode);
        Console.WriteLine(await response.Content.ReadAsStringAsync());

        // Step 2: Add a video file to the index
        response = await AddVideoToIndex(VISION_API_ENDPOINT, VISION_API_KEY, VIDEO_INDEX_NAME, VIDEO_FILE_SAS_URL, VIDEO_DOCUMENT_ID);
        Console.WriteLine(response.StatusCode);
        Console.WriteLine(await response.Content.ReadAsStringAsync());

        // Step 3: Wait for ingestion to complete
        if (!await WaitForIngestionCompletion(VISION_API_ENDPOINT, VISION_API_KEY, VIDEO_INDEX_NAME))
        {
            Console.WriteLine("Ingestion did not complete within the expected time.");
        }

        if (string.IsNullOrEmpty(AOAIsubscriptionKey))
        {
            var credential = new DefaultAzureCredential();
            httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", credential.GetToken(new TokenRequestContext(["https://cognitiveservices.azure.com/.default"])).Token);
        }
        else
        {
            httpClient.DefaultRequestHeaders.Add("api-key", AOAIsubscriptionKey);
        }
        var payload = new
        {
            model = "gpt-4-vision-preview",
            dataSources = new[]
            {
                new
                {
                    type = "AzureComputerVisionVideoIndex",
                    parameters = new
                    {
                        computerVisionBaseUrl = VISION_API_ENDPOINT,
                        computerVisionApiKey = VISION_API_KEY,
                        indexName = VIDEO_INDEX_NAME,
                        videoUrls = new[] { VIDEO_FILE_SAS_URL }
                    }
                }
            },
            enhancements = new
            {
                video = new { enabled = true }
            },
            messages = new object[]
            {
                new {
                    role = "system",
                    content = new object[]
                        {
                            "You are an AI assistant that helps people find information."
                        }
                },
                new {
                    role = "user",
                    content = new object[]
                    {
                        new {
                            type = "acv_document_id",
                            acv_document_id = VIDEO_DOCUMENT_ID
                        },
                        new {
                            type = "text",
                            text = prompt
                        }
                    },
                }
            },
            temperature = 0.7,
            top_p = 0.95,
            max_tokens = 4096
        };

        var content = "";
        try
        {
            var chatResponse = await httpClient.PostAsync(GPT4V_ENDPOINT, new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"));
            chatResponse.EnsureSuccessStatusCode();
            content = await chatResponse.Content.ReadAsStringAsync();
            var responseContent = JsonSerializer.Deserialize<JsonObject>(content);
            Console.WriteLine(responseContent);

            model.Message = responseContent?["choices"]?[0]?["message"]?["content"]?.ToString();
            model.Video = VIDEO_FILE_SAS_URL;
        }
        catch (Exception e)
        {
            Console.WriteLine($"Error after GPT4V: {e.Message} with content: {content}");
        }

        return View("VideoAnalyzer", model);
    }