9-realtime/openai-realtime-console-example/src/pages/ConsolePage.tsx (682 lines of code) (raw):
/**
* Running a local relay server will allow you to hide your API key
* and run custom logic on the server
*
* Set the local relay server address to:
* REACT_APP_LOCAL_RELAY_SERVER_URL=http://localhost:8081
*
* This will also require you to set OPENAI_API_KEY= in a `.env` file
* You can run it with `npm run relay`, in parallel with `npm start`
*/
const LOCAL_RELAY_SERVER_URL: string =
process.env.REACT_APP_LOCAL_RELAY_SERVER_URL || '';
import { useEffect, useRef, useCallback, useState } from 'react';
import { RealtimeClient } from '@openai/realtime-api-beta';
import { ItemType } from '@openai/realtime-api-beta/dist/lib/client.js';
import { WavRecorder, WavStreamPlayer } from '../lib/wavtools/index.js';
import { instructions } from '../utils/conversation_config.js';
import { WavRenderer } from '../utils/wav_renderer';
import { X, Edit, Zap, ArrowUp, ArrowDown } from 'react-feather';
import { Button } from '../components/button/Button';
import { Toggle } from '../components/toggle/Toggle';
import { Map } from '../components/Map';
import './ConsolePage.scss';
import { isJsxOpeningLikeElement } from 'typescript';
/**
* Type for result from get_weather() function call
*/
interface Coordinates {
lat: number;
lng: number;
location?: string;
temperature?: {
value: number;
units: string;
};
wind_speed?: {
value: number;
units: string;
};
}
/**
* Type for all event logs
*/
interface RealtimeEvent {
time: string;
source: 'client' | 'server';
count?: number;
event: { [key: string]: any };
}
export function ConsolePage() {
/**
* Ask user for API Key
* If we're using the local relay server, we don't need this
*/
const apiKey = LOCAL_RELAY_SERVER_URL
? ''
: localStorage.getItem('tmp::voice_api_key') ||
prompt('OpenAI API Key') ||
'';
if (apiKey !== '') {
localStorage.setItem('tmp::voice_api_key', apiKey);
}
/**
* Instantiate:
* - WavRecorder (speech input)
* - WavStreamPlayer (speech output)
* - RealtimeClient (API client)
*/
const wavRecorderRef = useRef<WavRecorder>(
new WavRecorder({ sampleRate: 24000 })
);
const wavStreamPlayerRef = useRef<WavStreamPlayer>(
new WavStreamPlayer({ sampleRate: 24000 })
);
const clientRef = useRef<RealtimeClient>(
new RealtimeClient(
LOCAL_RELAY_SERVER_URL
? { url: LOCAL_RELAY_SERVER_URL }
: {
apiKey: apiKey,
dangerouslyAllowAPIKeyInBrowser: true,
}
)
);
/**
* References for
* - Rendering audio visualization (canvas)
* - Autoscrolling event logs
* - Timing delta for event log displays
*/
const clientCanvasRef = useRef<HTMLCanvasElement>(null);
const serverCanvasRef = useRef<HTMLCanvasElement>(null);
const eventsScrollHeightRef = useRef(0);
const eventsScrollRef = useRef<HTMLDivElement>(null);
const startTimeRef = useRef<string>(new Date().toISOString());
/**
* All of our variables for displaying application state
* - items are all conversation items (dialog)
* - realtimeEvents are event logs, which can be expanded
* - memoryKv is for set_memory() function
* - coords, marker are for get_weather() function
*/
const [items, setItems] = useState<ItemType[]>([]);
const [realtimeEvents, setRealtimeEvents] = useState<RealtimeEvent[]>([]);
const [expandedEvents, setExpandedEvents] = useState<{
[key: string]: boolean;
}>({});
const [isConnected, setIsConnected] = useState(false);
const [canPushToTalk, setCanPushToTalk] = useState(true);
const [isRecording, setIsRecording] = useState(false);
const [memoryKv, setMemoryKv] = useState<{ [key: string]: any }>({});
const [coords, setCoords] = useState<Coordinates | null>({
lat: 37.775593,
lng: -122.418137,
});
const [marker, setMarker] = useState<Coordinates | null>(null);
/**
* Utility for formatting the timing of logs
*/
const formatTime = useCallback((timestamp: string) => {
const startTime = startTimeRef.current;
const t0 = new Date(startTime).valueOf();
const t1 = new Date(timestamp).valueOf();
const delta = t1 - t0;
const hs = Math.floor(delta / 10) % 100;
const s = Math.floor(delta / 1000) % 60;
const m = Math.floor(delta / 60_000) % 60;
const pad = (n: number) => {
let s = n + '';
while (s.length < 2) {
s = '0' + s;
}
return s;
};
return `${pad(m)}:${pad(s)}.${pad(hs)}`;
}, []);
/**
* When you click the API key
*/
const resetAPIKey = useCallback(() => {
const apiKey = prompt('OpenAI API Key');
if (apiKey !== null) {
localStorage.clear();
localStorage.setItem('tmp::voice_api_key', apiKey);
window.location.reload();
}
}, []);
/**
* Connect to conversation:
* WavRecorder task speech input, WavStreamPlayer output, client is API client
*/
const connectConversation = useCallback(async () => {
const client = clientRef.current;
const wavRecorder = wavRecorderRef.current;
const wavStreamPlayer = wavStreamPlayerRef.current;
// Set state variables
startTimeRef.current = new Date().toISOString();
setIsConnected(true);
setRealtimeEvents([]);
setItems(client.conversation.getItems());
// Connect to microphone
await wavRecorder.begin();
// Connect to audio output
await wavStreamPlayer.connect();
// Connect to realtime API
await client.connect();
client.sendUserMessageContent([
{
type: `input_text`,
text: `Hello!`,
// text: `For testing purposes, I want you to list ten car brands. Number each item, e.g. "one (or whatever number you are one): the item name".`
},
]);
if (client.getTurnDetectionType() === 'server_vad') {
await wavRecorder.record((data) => client.appendInputAudio(data.mono));
}
}, []);
/**
* Disconnect and reset conversation state
*/
const disconnectConversation = useCallback(async () => {
setIsConnected(false);
setRealtimeEvents([]);
setItems([]);
setMemoryKv({});
setCoords({
lat: 37.775593,
lng: -122.418137,
});
setMarker(null);
const client = clientRef.current;
client.disconnect();
const wavRecorder = wavRecorderRef.current;
await wavRecorder.end();
const wavStreamPlayer = wavStreamPlayerRef.current;
await wavStreamPlayer.interrupt();
}, []);
const deleteConversationItem = useCallback(async (id: string) => {
const client = clientRef.current;
client.deleteItem(id);
}, []);
/**
* In push-to-talk mode, start recording
* .appendInputAudio() for each sample
*/
const startRecording = async () => {
setIsRecording(true);
const client = clientRef.current;
const wavRecorder = wavRecorderRef.current;
const wavStreamPlayer = wavStreamPlayerRef.current;
const trackSampleOffset = await wavStreamPlayer.interrupt();
if (trackSampleOffset?.trackId) {
const { trackId, offset } = trackSampleOffset;
await client.cancelResponse(trackId, offset);
}
await wavRecorder.record((data) => client.appendInputAudio(data.mono));
};
/**
* In push-to-talk mode, stop recording
*/
const stopRecording = async () => {
setIsRecording(false);
const client = clientRef.current;
const wavRecorder = wavRecorderRef.current;
await wavRecorder.pause();
client.createResponse();
};
/**
* Switch between Manual <> VAD mode for communication
*/
const changeTurnEndType = async (value: string) => {
const client = clientRef.current;
const wavRecorder = wavRecorderRef.current;
if (value === 'none' && wavRecorder.getStatus() === 'recording') {
await wavRecorder.pause();
}
client.updateSession({
turn_detection: value === 'none' ? null : { type: 'server_vad' },
});
if (value === 'server_vad' && client.isConnected()) {
await wavRecorder.record((data) => client.appendInputAudio(data.mono));
}
setCanPushToTalk(value === 'none');
};
/**
* Auto-scroll the event logs
*/
useEffect(() => {
if (eventsScrollRef.current) {
const eventsEl = eventsScrollRef.current;
const scrollHeight = eventsEl.scrollHeight;
// Only scroll if height has just changed
if (scrollHeight !== eventsScrollHeightRef.current) {
eventsEl.scrollTop = scrollHeight;
eventsScrollHeightRef.current = scrollHeight;
}
}
}, [realtimeEvents]);
/**
* Auto-scroll the conversation logs
*/
useEffect(() => {
const conversationEls = [].slice.call(
document.body.querySelectorAll('[data-conversation-content]')
);
for (const el of conversationEls) {
const conversationEl = el as HTMLDivElement;
conversationEl.scrollTop = conversationEl.scrollHeight;
}
}, [items]);
/**
* Set up render loops for the visualization canvas
*/
useEffect(() => {
let isLoaded = true;
const wavRecorder = wavRecorderRef.current;
const clientCanvas = clientCanvasRef.current;
let clientCtx: CanvasRenderingContext2D | null = null;
const wavStreamPlayer = wavStreamPlayerRef.current;
const serverCanvas = serverCanvasRef.current;
let serverCtx: CanvasRenderingContext2D | null = null;
const render = () => {
if (isLoaded) {
if (clientCanvas) {
if (!clientCanvas.width || !clientCanvas.height) {
clientCanvas.width = clientCanvas.offsetWidth;
clientCanvas.height = clientCanvas.offsetHeight;
}
clientCtx = clientCtx || clientCanvas.getContext('2d');
if (clientCtx) {
clientCtx.clearRect(0, 0, clientCanvas.width, clientCanvas.height);
const result = wavRecorder.recording
? wavRecorder.getFrequencies('voice')
: { values: new Float32Array([0]) };
WavRenderer.drawBars(
clientCanvas,
clientCtx,
result.values,
'#0099ff',
10,
0,
8
);
}
}
if (serverCanvas) {
if (!serverCanvas.width || !serverCanvas.height) {
serverCanvas.width = serverCanvas.offsetWidth;
serverCanvas.height = serverCanvas.offsetHeight;
}
serverCtx = serverCtx || serverCanvas.getContext('2d');
if (serverCtx) {
serverCtx.clearRect(0, 0, serverCanvas.width, serverCanvas.height);
const result = wavStreamPlayer.analyser
? wavStreamPlayer.getFrequencies('voice')
: { values: new Float32Array([0]) };
WavRenderer.drawBars(
serverCanvas,
serverCtx,
result.values,
'#009900',
10,
0,
8
);
}
}
window.requestAnimationFrame(render);
}
};
render();
return () => {
isLoaded = false;
};
}, []);
/**
* Core RealtimeClient and audio capture setup
* Set all of our instructions, tools, events and more
*/
useEffect(() => {
// Get refs
const wavStreamPlayer = wavStreamPlayerRef.current;
const client = clientRef.current;
// Set instructions
client.updateSession({ instructions: instructions });
// Set transcription, otherwise we don't get user transcriptions back
client.updateSession({ input_audio_transcription: { model: 'whisper-1' } });
// Add tools
client.addTool(
{
name: 'set_memory',
description: 'Saves important data about the user into memory.',
parameters: {
type: 'object',
properties: {
key: {
type: 'string',
description:
'The key of the memory value. Always use lowercase and underscores, no other characters.',
},
value: {
type: 'string',
description: 'Value can be anything represented as a string',
},
},
required: ['key', 'value'],
},
},
async ({ key, value }: { [key: string]: any }) => {
setMemoryKv((memoryKv) => {
const newKv = { ...memoryKv };
newKv[key] = value;
return newKv;
});
return { ok: true };
}
);
client.addTool(
{
name: 'get_weather',
description:
'Retrieves the weather for a given lat, lng coordinate pair. Specify a label for the location.',
parameters: {
type: 'object',
properties: {
lat: {
type: 'number',
description: 'Latitude',
},
lng: {
type: 'number',
description: 'Longitude',
},
location: {
type: 'string',
description: 'Name of the location',
},
},
required: ['lat', 'lng', 'location'],
},
},
async ({ lat, lng, location }: { [key: string]: any }) => {
setMarker({ lat, lng, location });
setCoords({ lat, lng, location });
const result = await fetch(
`https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lng}¤t=temperature_2m,wind_speed_10m`
);
const json = await result.json();
const temperature = {
value: json.current.temperature_2m as number,
units: json.current_units.temperature_2m as string,
};
const wind_speed = {
value: json.current.wind_speed_10m as number,
units: json.current_units.wind_speed_10m as string,
};
setMarker({ lat, lng, location, temperature, wind_speed });
return json;
}
);
client.addTool(
{
name: 'submit_email_query',
description:
'Use this when the user asks a question about their email, or simply a question about personal information.',
parameters: {
type: 'object',
properties: {
input_query: {
type: 'string',
description: 'The query to search.',
},
},
},
},
async ({ input_query }: { [key: string]: any }) => {
setTimeout(() => {
client.sendUserMessageContent([
{
type: `input_text`,
text: `{'result':'july 2, 2024'}`,
},
]);
}, 10000);
return "{'status':'submitted - pending'}";
}
);
client.addTool(
{
name: 'check_last_query_status',
description:
'Use this function to check the status of the last query, if the user asks about it.',
parameters: {
type: 'object',
properties: {},
},
},
async ({}: { [key: string]: any }) => {
return "{'status':'still pending'}";
}
);
// handle realtime events from client + server for event logging
client.on('realtime.event', (realtimeEvent: RealtimeEvent) => {
setRealtimeEvents((realtimeEvents) => {
const lastEvent = realtimeEvents[realtimeEvents.length - 1];
if (lastEvent?.event.type === realtimeEvent.event.type) {
// if we receive multiple events in a row, aggregate them for display purposes
lastEvent.count = (lastEvent.count || 0) + 1;
return realtimeEvents.slice(0, -1).concat(lastEvent);
} else {
return realtimeEvents.concat(realtimeEvent);
}
});
});
client.on('error', (event: any) => console.error(event));
client.on('conversation.interrupted', async () => {
const trackSampleOffset = await wavStreamPlayer.interrupt();
if (trackSampleOffset?.trackId) {
const { trackId, offset } = trackSampleOffset;
await client.cancelResponse(trackId, offset);
}
});
client.on('conversation.updated', async ({ item, delta }: any) => {
const items = client.conversation.getItems();
if (delta?.audio) {
wavStreamPlayer.add16BitPCM(delta.audio, item.id);
}
if (item.status === 'completed' && item.formatted.audio?.length) {
const wavFile = await WavRecorder.decode(
item.formatted.audio,
24000,
24000
);
item.formatted.file = wavFile;
}
setItems(items);
});
setItems(client.conversation.getItems());
return () => {
// cleanup; resets to defaults
client.reset();
};
}, []);
/**
* Render the application
*/
return (
<div data-component="ConsolePage">
<div className="content-top">
<div className="content-title">
<img src="/openai-logomark.svg" />
<span>realtime console</span>
</div>
<div className="content-api-key">
{!LOCAL_RELAY_SERVER_URL && (
<Button
icon={Edit}
iconPosition="end"
buttonStyle="flush"
label={`api key: ${apiKey.slice(0, 3)}...`}
onClick={() => resetAPIKey()}
/>
)}
</div>
</div>
<div className="content-main">
<div className="content-logs">
<div className="content-block events">
<div className="visualization">
<div className="visualization-entry client">
<canvas ref={clientCanvasRef} />
</div>
<div className="visualization-entry server">
<canvas ref={serverCanvasRef} />
</div>
</div>
<div className="content-block-title">events</div>
<div className="content-block-body" ref={eventsScrollRef}>
{!realtimeEvents.length && `awaiting connection...`}
{realtimeEvents.map((realtimeEvent, i) => {
const count = realtimeEvent.count;
const event = { ...realtimeEvent.event };
if (event.type === 'input_audio_buffer.append') {
event.audio = `[trimmed: ${event.audio.length} bytes]`;
} else if (event.type === 'response.audio.delta') {
event.delta = `[trimmed: ${event.delta.length} bytes]`;
}
return (
<div className="event" key={event.event_id}>
<div className="event-timestamp">
{formatTime(realtimeEvent.time)}
</div>
<div className="event-details">
<div
className="event-summary"
onClick={() => {
// toggle event details
const id = event.event_id;
const expanded = { ...expandedEvents };
if (expanded[id]) {
delete expanded[id];
} else {
expanded[id] = true;
}
setExpandedEvents(expanded);
}}
>
<div
className={`event-source ${
event.type === 'error'
? 'error'
: realtimeEvent.source
}`}
>
{realtimeEvent.source === 'client' ? (
<ArrowUp />
) : (
<ArrowDown />
)}
<span>
{event.type === 'error'
? 'error!'
: realtimeEvent.source}
</span>
</div>
<div className="event-type">
{event.type}
{count && ` (${count})`}
</div>
</div>
{!!expandedEvents[event.event_id] && (
<div className="event-payload">
{JSON.stringify(event, null, 2)}
</div>
)}
</div>
</div>
);
})}
</div>
</div>
<div className="content-block conversation">
<div className="content-block-title">conversation</div>
<div className="content-block-body" data-conversation-content>
{!items.length && `awaiting connection...`}
{items.map((conversationItem, i) => {
return (
<div className="conversation-item" key={conversationItem.id}>
<div className={`speaker ${conversationItem.role || ''}`}>
<div>
{(
conversationItem.role || conversationItem.type
).replaceAll('_', ' ')}
</div>
<div
className="close"
onClick={() =>
deleteConversationItem(conversationItem.id)
}
>
<X />
</div>
</div>
<div className={`speaker-content`}>
{/* tool response */}
{conversationItem.type === 'function_call_output' && (
<div>{conversationItem.formatted.output}</div>
)}
{/* tool call */}
{!!conversationItem.formatted.tool && (
<div>
{conversationItem.formatted.tool.name}(
{conversationItem.formatted.tool.arguments})
</div>
)}
{!conversationItem.formatted.tool &&
conversationItem.role === 'user' && (
<div>
{conversationItem.formatted.transcript ||
(conversationItem.formatted.audio?.length
? '(awaiting transcript)'
: conversationItem.formatted.text ||
'(item sent)')}
</div>
)}
{!conversationItem.formatted.tool &&
conversationItem.role === 'assistant' && (
<div>
{conversationItem.formatted.transcript ||
conversationItem.formatted.text ||
'(truncated)'}
</div>
)}
{conversationItem.formatted.file && (
<audio
src={conversationItem.formatted.file.url}
controls
/>
)}
</div>
</div>
);
})}
</div>
</div>
<div className="content-actions">
<Toggle
defaultValue={false}
labels={['manual', 'vad']}
values={['none', 'server_vad']}
onChange={(_, value) => changeTurnEndType(value)}
/>
<div className="spacer" />
{isConnected && canPushToTalk && (
<Button
label={isRecording ? 'release to send' : 'push to talk'}
buttonStyle={isRecording ? 'alert' : 'regular'}
disabled={!isConnected || !canPushToTalk}
onMouseDown={startRecording}
onMouseUp={stopRecording}
/>
)}
<div className="spacer" />
<Button
label={isConnected ? 'disconnect' : 'connect'}
iconPosition={isConnected ? 'end' : 'start'}
icon={isConnected ? X : Zap}
buttonStyle={isConnected ? 'regular' : 'action'}
onClick={
isConnected ? disconnectConversation : connectConversation
}
/>
</div>
</div>
{/* <div className="content-right">
<div className="content-block map">
<div className="content-block-title">get_weather()</div>
<div className="content-block-title bottom">
{marker?.location || 'not yet retrieved'}
{!!marker?.temperature && (
<>
<br />
🌡️ {marker.temperature.value} {marker.temperature.units}
</>
)}
{!!marker?.wind_speed && (
<>
{' '}
🍃 {marker.wind_speed.value} {marker.wind_speed.units}
</>
)}
</div>
<div className="content-block-body full">
{coords && (
<Map
center={[coords.lat, coords.lng]}
location={coords.location}
/>
)}
</div>
</div>
<div className="content-block kv">
<div className="content-block-title">set_memory()</div>
<div className="content-block-body content-kv">
{JSON.stringify(memoryKv, null, 2)}
</div>
</div>
</div> */}
</div>
</div>
);
}