aiplatform/api/AIPlatform.Samples/AudioInputTranscription.cs

/* * Copyright 2024 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // [START generativeaionvertexai_gemini_audio_transcription] using Google.Cloud.AIPlatform.V1; using System; using System.Threading.Tasks; public class AudioInputTranscription { public async Task<string> TranscribeAudio( string projectId = "your-project-id", string location = "us-central1", string publisher = "google", string model = "gemini-2.0-flash-001") { var predictionServiceClient = new PredictionServiceClientBuilder { Endpoint = $"{location}-aiplatform.googleapis.com" }.Build(); string prompt = @"Can you transcribe this interview, in the format of timecode, speaker, caption. Use speaker A, speaker B, etc. to identify speakers."; var generateContentRequest = new GenerateContentRequest { Model = $"projects/{projectId}/locations/{location}/publishers/{publisher}/models/{model}", Contents = { new Content { Role = "USER", Parts = { new Part { Text = prompt }, new Part { FileData = new() { MimeType = "audio/mp3", FileUri = "gs://cloud-samples-data/generative-ai/audio/pixel.mp3" } } } } } }; GenerateContentResponse response = await predictionServiceClient.GenerateContentAsync(generateContentRequest); string responseText = response.Candidates[0].Content.Parts[0].Text; Console.WriteLine(responseText); return responseText; } } // [END generativeaionvertexai_gemini_audio_transcription]

aiplatform/api/AIPlatform.Samples/AudioInputTranscription.cs (39 lines of code) (raw):