gemini/multimodal-live-api/websocket-demo-app/frontend/gemini-live-api.js (137 lines of code) (raw):
class GeminiLiveResponseMessage {
constructor(data) {
this.data = "";
this.type = "";
this.endOfTurn = data?.serverContent?.turnComplete;
const parts = data?.serverContent?.modelTurn?.parts;
if (data?.setupComplete) {
this.type = "SETUP COMPLETE";
} else if (parts?.length && parts[0].text) {
this.data = parts[0].text;
this.type = "TEXT";
} else if (parts?.length && parts[0].inlineData) {
this.data = parts[0].inlineData.data;
this.type = "AUDIO";
}
}
}
class GeminiLiveAPI {
constructor(proxyUrl, projectId, model, apiHost) {
this.proxyUrl = proxyUrl;
this.projectId = projectId;
this.model = model;
this.modelUri = `projects/${this.projectId}/locations/us-central1/publishers/google/models/${this.model}`;
this.responseModalities = ["AUDIO"];
this.systemInstructions = "";
this.apiHost = apiHost;
this.serviceUrl = `wss://${this.apiHost}/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
this.onReceiveResponse = (message) => {
console.log("Default message received callback", message);
};
this.onConnectionStarted = () => {
console.log("Default onConnectionStarted");
};
this.onErrorMessage = (message) => {
alert(message);
};
this.accessToken = "";
this.websocket = null;
console.log("Created Gemini Live API object: ", this);
}
setProjectId(projectId) {
this.projectId = projectId;
this.modelUri = `projects/${this.projectId}/locations/us-central1/publishers/google/models/${this.model}`;
}
setAccessToken(newAccessToken) {
console.log("setting access token: ", newAccessToken);
this.accessToken = newAccessToken;
}
connect(accessToken) {
this.setAccessToken(accessToken);
this.setupWebSocketToService();
}
disconnect() {
this.webSocket.close();
}
sendMessage(message) {
this.webSocket.send(JSON.stringify(message));
}
onReceiveMessage(messageEvent) {
console.log("Message received: ", messageEvent);
const messageData = JSON.parse(messageEvent.data);
const message = new GeminiLiveResponseMessage(messageData);
console.log("onReceiveMessageCallBack this ", this);
this.onReceiveResponse(message);
}
setupWebSocketToService() {
console.log("connecting: ", this.proxyUrl);
this.webSocket = new WebSocket(this.proxyUrl);
this.webSocket.onclose = (event) => {
console.log("websocket closed: ", event);
this.onErrorMessage("Connection closed");
};
this.webSocket.onerror = (event) => {
console.log("websocket error: ", event);
this.onErrorMessage("Connection error");
};
this.webSocket.onopen = (event) => {
console.log("websocket open: ", event);
this.sendInitialSetupMessages();
this.onConnectionStarted();
};
this.webSocket.onmessage = this.onReceiveMessage.bind(this);
}
sendInitialSetupMessages() {
const serviceSetupMessage = {
bearer_token: this.accessToken,
service_url: this.serviceUrl,
};
this.sendMessage(serviceSetupMessage);
const sessionSetupMessage = {
setup: {
model: this.modelUri,
generation_config: {
response_modalities: this.responseModalities,
},
system_instruction: {
parts: [{ text: this.systemInstructions }],
},
},
};
this.sendMessage(sessionSetupMessage);
}
sendTextMessage(text) {
const textMessage = {
client_content: {
turns: [
{
role: "user",
parts: [{ text: text }],
},
],
turn_complete: true,
},
};
this.sendMessage(textMessage);
}
sendRealtimeInputMessage(data, mime_type) {
const message = {
realtime_input: {
media_chunks: [
{
mime_type: mime_type,
data: data,
},
],
},
};
this.sendMessage(message);
}
sendAudioMessage(base64PCM) {
this.sendRealtimeInputMessage(base64PCM, "audio/pcm");
}
sendImageMessage(base64Image, mime_type = "image/jpeg") {
this.sendRealtimeInputMessage(base64Image, mime_type);
}
}
console.log("loaded gemini-live-api.js");