export async function backfillEmbeddingsTaskHandler()

in firestore-semantic-search/functions/src/functions/backfill_embeddings_task.ts [27:113]


export async function backfillEmbeddingsTaskHandler(data: any) {
  const {id, collectionName, documentIds} = data;

  if (!documentIds || documentIds.length === 0) {
    functions.logger.info('No document ids found, skipping...');
    return;
  }

  const taskRef = admin.firestore().doc(`${config.tasksDoc}/enqueues/${id}`);

  const documents: {
    id: string;
    data: any;
  }[] = [];

  await taskRef.update({
    status: 'PROCESSING',
  });

  await admin.firestore().runTransaction(async transaction => {
    const refs = documentIds.map((id: string) =>
      admin.firestore().collection(collectionName).doc(id)
    );
    const docs = await transaction.getAll(...refs);

    docs.map(doc => {
      const data = doc.data();
      if (!data) {
        functions.logger.error(`Document ${doc.ref.path} has no data`);
        return;
      }

      documents.push({
        id: doc.ref.id,
        data,
      });
    });
  });

  const datapoints = await getDatapointsList(documents);

  if (datapoints.length === 0) {
    functions.logger.info('No datapoints found, skipping...');
    return;
  }

  const localFilePath = await utils.saveEmbeddingsToTmpFile(datapoints);

  const destinationPath = `datapoints/${id}.json`;

  functions.logger.info(
    `Embeddings will be saved to ${destinationPath} 📝, uploading to the bucket...`
  );

  await utils.uploadToCloudStorage(localFilePath, destinationPath);

  functions.logger.info(
    `Embeddings uploaded to the bucket ${config.bucketName} in ${destinationPath} 🚀`
  );

  await taskRef.update({
    status: 'DONE',
    filePath: `gs://${config.bucketName}/${destinationPath}`,
  });

  await utils.deleteTempFiles();

  const tasksDoc = await admin.firestore().doc(config.tasksDoc).get();
  const {totalLength} = tasksDoc.data() as any;
  let {processedLength} = tasksDoc.data() as any;

  processedLength += documentIds.length;
  await admin
    .firestore()
    .doc(config.tasksDoc)
    .update({
      processedLength: admin.firestore.FieldValue.increment(documentIds.length),
    });

  if (processedLength === totalLength) {
    await admin.firestore().doc(config.tasksDoc).update({
      status: BackfillStatus.DONE,
    });
  } else {
    await _createNextTask(id);
  }
}