in firestore-semantic-search/functions/src/functions/backfill_embeddings_task.ts [27:113]
export async function backfillEmbeddingsTaskHandler(data: any) {
const {id, collectionName, documentIds} = data;
if (!documentIds || documentIds.length === 0) {
functions.logger.info('No document ids found, skipping...');
return;
}
const taskRef = admin.firestore().doc(`${config.tasksDoc}/enqueues/${id}`);
const documents: {
id: string;
data: any;
}[] = [];
await taskRef.update({
status: 'PROCESSING',
});
await admin.firestore().runTransaction(async transaction => {
const refs = documentIds.map((id: string) =>
admin.firestore().collection(collectionName).doc(id)
);
const docs = await transaction.getAll(...refs);
docs.map(doc => {
const data = doc.data();
if (!data) {
functions.logger.error(`Document ${doc.ref.path} has no data`);
return;
}
documents.push({
id: doc.ref.id,
data,
});
});
});
const datapoints = await getDatapointsList(documents);
if (datapoints.length === 0) {
functions.logger.info('No datapoints found, skipping...');
return;
}
const localFilePath = await utils.saveEmbeddingsToTmpFile(datapoints);
const destinationPath = `datapoints/${id}.json`;
functions.logger.info(
`Embeddings will be saved to ${destinationPath} 📝, uploading to the bucket...`
);
await utils.uploadToCloudStorage(localFilePath, destinationPath);
functions.logger.info(
`Embeddings uploaded to the bucket ${config.bucketName} in ${destinationPath} 🚀`
);
await taskRef.update({
status: 'DONE',
filePath: `gs://${config.bucketName}/${destinationPath}`,
});
await utils.deleteTempFiles();
const tasksDoc = await admin.firestore().doc(config.tasksDoc).get();
const {totalLength} = tasksDoc.data() as any;
let {processedLength} = tasksDoc.data() as any;
processedLength += documentIds.length;
await admin
.firestore()
.doc(config.tasksDoc)
.update({
processedLength: admin.firestore.FieldValue.increment(documentIds.length),
});
if (processedLength === totalLength) {
await admin.firestore().doc(config.tasksDoc).update({
status: BackfillStatus.DONE,
});
} else {
await _createNextTask(id);
}
}