in src/app/App.tsx [28:410]
function App() {
const searchParams = useSearchParams();
const { transcriptItems, addTranscriptMessage, addTranscriptBreadcrumb } =
useTranscript();
const { logClientEvent, logServerEvent } = useEvent();
const [selectedAgentName, setSelectedAgentName] = useState<string>("");
const [selectedAgentConfigSet, setSelectedAgentConfigSet] =
useState<AgentConfig[] | null>(null);
const [dataChannel, setDataChannel] = useState<RTCDataChannel | null>(null);
const pcRef = useRef<RTCPeerConnection | null>(null);
const dcRef = useRef<RTCDataChannel | null>(null);
const audioElementRef = useRef<HTMLAudioElement | null>(null);
const [sessionStatus, setSessionStatus] =
useState<SessionStatus>("DISCONNECTED");
const [isEventsPaneExpanded, setIsEventsPaneExpanded] =
useState<boolean>(true);
const [userText, setUserText] = useState<string>("");
const [isPTTActive, setIsPTTActive] = useState<boolean>(false);
const [isPTTUserSpeaking, setIsPTTUserSpeaking] = useState<boolean>(false);
const [isAudioPlaybackEnabled, setIsAudioPlaybackEnabled] =
useState<boolean>(true);
const sendClientEvent = (eventObj: any, eventNameSuffix = "") => {
if (dcRef.current && dcRef.current.readyState === "open") {
logClientEvent(eventObj, eventNameSuffix);
dcRef.current.send(JSON.stringify(eventObj));
} else {
logClientEvent(
{ attemptedEvent: eventObj.type },
"error.data_channel_not_open"
);
console.error(
"Failed to send message - no data channel available",
eventObj
);
}
};
const handleServerEventRef = useHandleServerEvent({
setSessionStatus,
selectedAgentName,
selectedAgentConfigSet,
sendClientEvent,
setSelectedAgentName,
});
useEffect(() => {
let finalAgentConfig = searchParams.get("agentConfig");
if (!finalAgentConfig || !allAgentSets[finalAgentConfig]) {
finalAgentConfig = defaultAgentSetKey;
const url = new URL(window.location.toString());
url.searchParams.set("agentConfig", finalAgentConfig);
window.location.replace(url.toString());
return;
}
const agents = allAgentSets[finalAgentConfig];
const agentKeyToUse = agents[0]?.name || "";
setSelectedAgentName(agentKeyToUse);
setSelectedAgentConfigSet(agents);
}, [searchParams]);
useEffect(() => {
if (selectedAgentName && sessionStatus === "DISCONNECTED") {
connectToRealtime();
}
}, [selectedAgentName]);
useEffect(() => {
if (
sessionStatus === "CONNECTED" &&
selectedAgentConfigSet &&
selectedAgentName
) {
const currentAgent = selectedAgentConfigSet.find(
(a) => a.name === selectedAgentName
);
addTranscriptBreadcrumb(
`Agent: ${selectedAgentName}`,
currentAgent
);
updateSession(true);
}
}, [selectedAgentConfigSet, selectedAgentName, sessionStatus]);
useEffect(() => {
if (sessionStatus === "CONNECTED") {
console.log(
`updatingSession, isPTTACtive=${isPTTActive} sessionStatus=${sessionStatus}`
);
updateSession();
}
}, [isPTTActive]);
const fetchEphemeralKey = async (): Promise<string | null> => {
logClientEvent({ url: "/session" }, "fetch_session_token_request");
const tokenResponse = await fetch("/api/session");
const data = await tokenResponse.json();
logServerEvent(data, "fetch_session_token_response");
if (!data.client_secret?.value) {
logClientEvent(data, "error.no_ephemeral_key");
console.error("No ephemeral key provided by the server");
setSessionStatus("DISCONNECTED");
return null;
}
return data.client_secret.value;
};
const connectToRealtime = async () => {
if (sessionStatus !== "DISCONNECTED") return;
setSessionStatus("CONNECTING");
try {
const EPHEMERAL_KEY = await fetchEphemeralKey();
if (!EPHEMERAL_KEY) {
return;
}
if (!audioElementRef.current) {
audioElementRef.current = document.createElement("audio");
}
audioElementRef.current.autoplay = isAudioPlaybackEnabled;
const { pc, dc } = await createRealtimeConnection(
EPHEMERAL_KEY,
audioElementRef
);
pcRef.current = pc;
dcRef.current = dc;
dc.addEventListener("open", () => {
logClientEvent({}, "data_channel.open");
});
dc.addEventListener("close", () => {
logClientEvent({}, "data_channel.close");
});
dc.addEventListener("error", (err: any) => {
logClientEvent({ error: err }, "data_channel.error");
});
dc.addEventListener("message", (e: MessageEvent) => {
handleServerEventRef.current(JSON.parse(e.data));
});
setDataChannel(dc);
} catch (err) {
console.error("Error connecting to realtime:", err);
setSessionStatus("DISCONNECTED");
}
};
const disconnectFromRealtime = () => {
if (pcRef.current) {
pcRef.current.getSenders().forEach((sender) => {
if (sender.track) {
sender.track.stop();
}
});
pcRef.current.close();
pcRef.current = null;
}
setDataChannel(null);
setSessionStatus("DISCONNECTED");
setIsPTTUserSpeaking(false);
logClientEvent({}, "disconnected");
};
const sendSimulatedUserMessage = (text: string) => {
const id = uuidv4().slice(0, 32);
addTranscriptMessage(id, "user", text, true);
sendClientEvent(
{
type: "conversation.item.create",
item: {
id,
type: "message",
role: "user",
content: [{ type: "input_text", text }],
},
},
"(simulated user text message)"
);
sendClientEvent(
{ type: "response.create" },
"(trigger response after simulated user text message)"
);
};
const updateSession = (shouldTriggerResponse: boolean = false) => {
sendClientEvent(
{ type: "input_audio_buffer.clear" },
"clear audio buffer on session update"
);
const currentAgent = selectedAgentConfigSet?.find(
(a) => a.name === selectedAgentName
);
const turnDetection = isPTTActive
? null
: {
type: "server_vad",
threshold: 0.5,
prefix_padding_ms: 300,
silence_duration_ms: 200,
create_response: true,
};
const instructions = currentAgent?.instructions || "";
const tools = currentAgent?.tools || [];
const sessionUpdateEvent = {
type: "session.update",
session: {
modalities: ["text", "audio"],
instructions,
voice: "coral",
input_audio_format: "pcm16",
output_audio_format: "pcm16",
input_audio_transcription: { model: "whisper-1" },
turn_detection: turnDetection,
tools,
},
};
sendClientEvent(sessionUpdateEvent);
if (shouldTriggerResponse) {
sendSimulatedUserMessage("hi");
}
};
const cancelAssistantSpeech = async () => {
const mostRecentAssistantMessage = [...transcriptItems]
.reverse()
.find((item) => item.role === "assistant");
if (!mostRecentAssistantMessage) {
console.warn("can't cancel, no recent assistant message found");
return;
}
if (mostRecentAssistantMessage.status === "DONE") {
console.log("No truncation needed, message is DONE");
return;
}
sendClientEvent({
type: "conversation.item.truncate",
item_id: mostRecentAssistantMessage?.itemId,
content_index: 0,
audio_end_ms: Date.now() - mostRecentAssistantMessage.createdAtMs,
});
sendClientEvent(
{ type: "response.cancel" },
"(cancel due to user interruption)"
);
};
const handleSendTextMessage = () => {
if (!userText.trim()) return;
cancelAssistantSpeech();
sendClientEvent(
{
type: "conversation.item.create",
item: {
type: "message",
role: "user",
content: [{ type: "input_text", text: userText.trim() }],
},
},
"(send user text message)"
);
setUserText("");
sendClientEvent({ type: "response.create" }, "trigger response");
};
const handleTalkButtonDown = () => {
if (sessionStatus !== "CONNECTED" || dataChannel?.readyState !== "open")
return;
cancelAssistantSpeech();
setIsPTTUserSpeaking(true);
sendClientEvent({ type: "input_audio_buffer.clear" }, "clear PTT buffer");
};
const handleTalkButtonUp = () => {
if (
sessionStatus !== "CONNECTED" ||
dataChannel?.readyState !== "open" ||
!isPTTUserSpeaking
)
return;
setIsPTTUserSpeaking(false);
sendClientEvent({ type: "input_audio_buffer.commit" }, "commit PTT");
sendClientEvent({ type: "response.create" }, "trigger response PTT");
};
const onToggleConnection = () => {
if (sessionStatus === "CONNECTED" || sessionStatus === "CONNECTING") {
disconnectFromRealtime();
setSessionStatus("DISCONNECTED");
} else {
connectToRealtime();
}
};
const handleAgentChange = (e: React.ChangeEvent<HTMLSelectElement>) => {
const newAgentConfig = e.target.value;
const url = new URL(window.location.toString());
url.searchParams.set("agentConfig", newAgentConfig);
window.location.replace(url.toString());
};
const handleSelectedAgentChange = (
e: React.ChangeEvent<HTMLSelectElement>
) => {
const newAgentName = e.target.value;
setSelectedAgentName(newAgentName);
};
useEffect(() => {
const storedPushToTalkUI = localStorage.getItem("pushToTalkUI");
if (storedPushToTalkUI) {
setIsPTTActive(storedPushToTalkUI === "true");
}
const storedLogsExpanded = localStorage.getItem("logsExpanded");
if (storedLogsExpanded) {
setIsEventsPaneExpanded(storedLogsExpanded === "true");
}
const storedAudioPlaybackEnabled = localStorage.getItem(
"audioPlaybackEnabled"
);
if (storedAudioPlaybackEnabled) {
setIsAudioPlaybackEnabled(storedAudioPlaybackEnabled === "true");
}
}, []);
useEffect(() => {
localStorage.setItem("pushToTalkUI", isPTTActive.toString());
}, [isPTTActive]);
useEffect(() => {
localStorage.setItem("logsExpanded", isEventsPaneExpanded.toString());
}, [isEventsPaneExpanded]);
useEffect(() => {
localStorage.setItem(
"audioPlaybackEnabled",
isAudioPlaybackEnabled.toString()
);
}, [isAudioPlaybackEnabled]);
useEffect(() => {
if (audioElementRef.current) {
if (isAudioPlaybackEnabled) {
audioElementRef.current.play().catch((err) => {
console.warn("Autoplay may be blocked by browser:", err);
});
} else {
audioElementRef.current.pause();
}
}
}, [isAudioPlaybackEnabled]);
const agentSetKey = searchParams.get("agentConfig") || "default";
return (
<div className="text-base flex flex-col h-screen bg-gray-100 text-gray-800 relative">
<div className="p-5 text-lg font-semibold flex justify-between items-center">
<div className="flex items-center">
<div onClick={() => window.location.reload()} style={{ cursor: 'pointer' }}>