components/app.tsx

"use client"; import Controls from "@/components/controls"; import Scene from "@/components/scene"; import Logs from "@/components/logs"; import { useEffect, useRef, useState, useCallback } from "react"; import { INSTRUCTIONS, TOOLS } from "@/lib/config"; import { BASE_URL, MODEL } from "@/lib/constants"; type ToolCallOutput = { response: string; [key: string]: any; }; export default function App() { const [logs, setLogs] = useState<any[]>([]); const [toolCall, setToolCall] = useState<any>(null); const [isSessionStarted, setIsSessionStarted] = useState(false); const [isSessionActive, setIsSessionActive] = useState(false); const [isListening, setIsListening] = useState(false); const [dataChannel, setDataChannel] = useState<RTCDataChannel | null>(null); const peerConnection = useRef<RTCPeerConnection | null>(null); const audioElement = useRef<HTMLAudioElement | null>(null); const [audioStream, setAudioStream] = useState<MediaStream | null>(null); const audioTransceiver = useRef<RTCRtpTransceiver | null>(null); const tracks = useRef<RTCRtpSender[] | null>(null); // Start a new realtime session async function startSession() { try { if (!isSessionStarted) { setIsSessionStarted(true); // Get an ephemeral session token const session = await fetch("/api/session").then((response) => response.json() ); const sessionToken = session.client_secret.value; const sessionId = session.id; console.log("Session id:", sessionId); // Create a peer connection const pc = new RTCPeerConnection(); // Set up to play remote audio from the model if (!audioElement.current) { audioElement.current = document.createElement("audio"); } audioElement.current.autoplay = true; pc.ontrack = (e) => { if (audioElement.current) { audioElement.current.srcObject = e.streams[0]; } }; const stream = await navigator.mediaDevices.getUserMedia({ audio: true, }); stream.getTracks().forEach((track) => { const sender = pc.addTrack(track, stream); if (sender) { tracks.current = [...(tracks.current || []), sender]; } }); // Set up data channel for sending and receiving events const dc = pc.createDataChannel("oai-events"); setDataChannel(dc); // Start the session using the Session Description Protocol (SDP) const offer = await pc.createOffer(); await pc.setLocalDescription(offer); const sdpResponse = await fetch(`${BASE_URL}?model=${MODEL}`, { method: "POST", body: offer.sdp, headers: { Authorization: `Bearer ${sessionToken}`, "Content-Type": "application/sdp", }, }); const answer: RTCSessionDescriptionInit = { type: "answer", sdp: await sdpResponse.text(), }; await pc.setRemoteDescription(answer); peerConnection.current = pc; } } catch (error) { console.error("Error starting session:", error); } } // Stop current session, clean up peer connection and data channel function stopSession() { if (dataChannel) { dataChannel.close(); } if (peerConnection.current) { peerConnection.current.close(); } setIsSessionStarted(false); setIsSessionActive(false); setDataChannel(null); peerConnection.current = null; if (audioStream) { audioStream.getTracks().forEach((track) => track.stop()); } setAudioStream(null); setIsListening(false); audioTransceiver.current = null; } // Grabs a new mic track and replaces the placeholder track in the transceiver async function startRecording() { try { const newStream = await navigator.mediaDevices.getUserMedia({ audio: true, }); setAudioStream(newStream); // If we already have an audioSender, just replace its track: if (tracks.current) { const micTrack = newStream.getAudioTracks()[0]; tracks.current.forEach((sender) => { sender.replaceTrack(micTrack); }); } else if (peerConnection.current) { // Fallback if audioSender somehow didn't get set newStream.getTracks().forEach((track) => { const sender = peerConnection.current?.addTrack(track, newStream); if (sender) { tracks.current = [...(tracks.current || []), sender]; } }); } setIsListening(true); console.log("Microphone started."); } catch (error) { console.error("Error accessing microphone:", error); } } // Replaces the mic track with a placeholder track function stopRecording() { setIsListening(false); // Stop existing mic tracks so the user’s mic is off if (audioStream) { audioStream.getTracks().forEach((track) => track.stop()); } setAudioStream(null); // Replace with a placeholder (silent) track if (tracks.current) { const placeholderTrack = createEmptyAudioTrack(); tracks.current.forEach((sender) => { sender.replaceTrack(placeholderTrack); }); } } // Creates a placeholder track that is silent function createEmptyAudioTrack(): MediaStreamTrack { const audioContext = new AudioContext(); const destination = audioContext.createMediaStreamDestination(); return destination.stream.getAudioTracks()[0]; } // Send a message to the model const sendClientEvent = useCallback( (message: any) => { if (dataChannel) { message.event_id = message.event_id || crypto.randomUUID(); dataChannel.send(JSON.stringify(message)); } else { console.error( "Failed to send message - no data channel available", message ); } }, [dataChannel] ); // Attach event listeners to the data channel when a new one is created useEffect(() => { async function handleToolCall(output: any) { const toolCall = { name: output.name, arguments: output.arguments, }; console.log("Tool call:", toolCall); setToolCall(toolCall); // TOOL CALL HANDLING // Initialize toolCallOutput with a default response const toolCallOutput: ToolCallOutput = { response: `Tool call ${toolCall.name} executed successfully.`, }; // Handle special tool calls if (toolCall.name === "get_iss_position") { const issPosition = await fetch("/api/iss").then((response) => response.json() ); console.log("ISS position:", issPosition); toolCallOutput.issPosition = issPosition; } sendClientEvent({ type: "conversation.item.create", item: { type: "function_call_output", call_id: output.call_id, output: JSON.stringify(toolCallOutput), }, }); // Force a model response to make sure it responds after certain tool calls if ( toolCall.name === "get_iss_position" || toolCall.name === "display_data" ) { sendClientEvent({ type: "response.create", }); } } if (dataChannel) { // Append new server events to the list dataChannel.addEventListener("message", (e) => { const event = JSON.parse(e.data); if (event.type === "response.done") { const output = event.response.output[0]; setLogs((prev) => [output, ...prev]); if (output?.type === "function_call") { handleToolCall(output); } } }); // Set session active when the data channel is opened dataChannel.addEventListener("open", () => { setIsSessionActive(true); setIsListening(true); setLogs([]); // Send session config const sessionUpdate = { type: "session.update", session: { tools: TOOLS, instructions: INSTRUCTIONS, }, }; sendClientEvent(sessionUpdate); console.log("Session update sent:", sessionUpdate); }); } }, [dataChannel, sendClientEvent]); const handleConnectClick = async () => { if (isSessionActive) { console.log("Stopping session."); stopSession(); } else { console.log("Starting session."); startSession(); } }; const handleMicToggleClick = async () => { if (isListening) { console.log("Stopping microphone."); stopRecording(); } else { console.log("Starting microphone."); startRecording(); } }; return ( <div className="relative size-full"> <Scene toolCall={toolCall} /> <Controls handleConnectClick={handleConnectClick} handleMicToggleClick={handleMicToggleClick} isConnected={isSessionActive} isListening={isListening} /> <Logs messages={logs} /> </div> ); }

components/app.tsx (243 lines of code) (raw):