in src/spatial_understanding/Prompt.tsx [74:241]
async function handleSend() {
let activeDataURL;
const maxSize = 640;
const copyCanvas = document.createElement('canvas');
const ctx = copyCanvas.getContext('2d')!;
if (stream) {
// screenshare
const video = videoRef.current!;
const scale = Math.min(
maxSize / video.videoWidth,
maxSize / video.videoHeight,
);
copyCanvas.width = video.videoWidth * scale;
copyCanvas.height = video.videoHeight * scale;
ctx.drawImage(
video,
0,
0,
video.videoWidth * scale,
video.videoHeight * scale,
);
} else if (imageSrc) {
const image = await loadImage(imageSrc);
const scale = Math.min(maxSize / image.width, maxSize / image.height);
copyCanvas.width = image.width * scale;
copyCanvas.height = image.height * scale;
console.log(copyCanvas);
ctx.drawImage(image, 0, 0, image.width * scale, image.height * scale);
}
activeDataURL = copyCanvas.toDataURL('image/png');
if (lines.length > 0) {
for (const line of lines) {
const p = new Path2D(
getSvgPathFromStroke(
getStroke(
line[0].map(([x, y]) => [
x * copyCanvas.width,
y * copyCanvas.height,
0.5,
]),
lineOptions,
),
),
);
ctx.fillStyle = line[1];
ctx.fill(p);
}
activeDataURL = copyCanvas.toDataURL('image/png');
}
const prompt = prompts[detectType];
setHoverEntered(false);
let response = (
await ai.models.generateContent({
model:
detectType === 'Segmentation masks'
? 'models/gemini-2.5-pro-exp-03-25'
: modelSelected,
contents: [
{
role: 'user',
parts: [
{
inlineData: {
data: activeDataURL.replace('data:image/png;base64,', ''),
mimeType: 'image/png',
},
},
{text: is2d ? get2dPrompt() : prompt.join(' ')},
],
},
],
config: {temperature},
})
).text;
if (response.includes('```json')) {
response = response.split('```json')[1].split('```')[0];
}
const parsedResponse = JSON.parse(response);
if (detectType === '2D bounding boxes') {
const formattedBoxes = parsedResponse.map(
(box: {box_2d: [number, number, number, number]; label: string}) => {
const [ymin, xmin, ymax, xmax] = box.box_2d;
return {
x: xmin / 1000,
y: ymin / 1000,
width: (xmax - xmin) / 1000,
height: (ymax - ymin) / 1000,
label: box.label,
};
},
);
setHoverEntered(false);
setBoundingBoxes2D(formattedBoxes);
} else if (detectType === 'Points') {
const formattedPoints = parsedResponse.map(
(point: {point: [number, number]; label: string}) => {
return {
point: {
x: point.point[1] / 1000,
y: point.point[0] / 1000,
},
label: point.label,
};
},
);
setPoints(formattedPoints);
} else if (detectType === 'Segmentation masks') {
const formattedBoxes = parsedResponse.map(
(box: {
box_2d: [number, number, number, number];
label: string;
mask: ImageData;
}) => {
const [ymin, xmin, ymax, xmax] = box.box_2d;
return {
x: xmin / 1000,
y: ymin / 1000,
width: (xmax - xmin) / 1000,
height: (ymax - ymin) / 1000,
label: box.label,
imageData: box.mask,
};
},
);
setHoverEntered(false);
// sort largest to smallest
const sortedBoxes = formattedBoxes.sort(
(a: any, b: any) => b.width * b.height - a.width * a.height,
);
setBoundingBoxMasks(sortedBoxes);
} else {
const formattedBoxes = parsedResponse.map(
(box: {
box_3d: [
number,
number,
number,
number,
number,
number,
number,
number,
number,
];
label: string;
}) => {
const center = box.box_3d.slice(0, 3);
const size = box.box_3d.slice(3, 6);
const rpy = box.box_3d
.slice(6)
.map((x: number) => (x * Math.PI) / 180);
return {
center,
size,
rpy,
label: box.label,
};
},
);
setBoundingBoxes3D(formattedBoxes);
}
}