export async function handler()

in src/backend/catalog-builder/catalog-builder.lambda.ts [29:170]


export async function handler(event: CatalogBuilderInput, context: Context) {
  console.log(JSON.stringify(event, null, 2));

  const BUCKET_NAME = requireEnv('BUCKET_NAME');

  const packages = new Map<string, Map<number, PackageInfo>>();
  const denyList = await DenyListClient.newClient();

  console.log('Loading existing catalog (if present)...');

  const data = await aws.s3().getObject({ Bucket: BUCKET_NAME, Key: constants.CATALOG_KEY }).promise()
    .catch((err: AWSError) => err.code !== 'NoSuchKey'
      ? Promise.reject(err)
      : Promise.resolve({ /* no data */ } as S3.GetObjectOutput));

  if (data.Body) {
    console.log('Catalog found. Loading...');
    const catalog: CatalogModel = JSON.parse(data.Body.toString('utf-8'));
    for (const info of catalog.packages) {
      const denyRule = denyList.lookup(info.name, info.version);
      if (denyRule != null) {
        console.log(`Dropping ${info.name}@${info.version} from catalog: ${denyRule.reason}`);
        continue;
      }
      if (!packages.has(info.name)) {
        packages.set(info.name, new Map());
      }
      packages.get(info.name)!.set(info.major, info);
    }
  }

  // If defined, the function will invoke itself again to resume the work from that key (this
  // happens only in "from scratch" or "rebuild" cases).
  let nextStartAfter: string | undefined;

  if (event.package) {
    if (!event.package.key.endsWith(constants.PACKAGE_KEY_SUFFIX)) {
      throw new Error(`The provided package key is invalid: ${event.package.key} does not end in ${constants.PACKAGE_KEY_SUFFIX}`);
    }

    console.log('Registering new packages...');
    // note that we intentionally don't catch errors here to let these
    // event go to the DLQ for manual inspection.
    await appendPackage(packages, event.package.key, BUCKET_NAME, denyList);
  }

  // If we don't have a package in event, then we're refreshing the catalog. This is also true if we
  // don't have a catalog body (from scratch) or if "startAfter" is set (continuation of from
  // scratch).
  if (!event?.package || !data.Body || event.startAfter) {

    console.log('Recreating or refreshing catalog...');
    const failures: any = {};
    for await (const { Key: pkgKey } of relevantObjects(BUCKET_NAME, event.startAfter)) {
      try {
        await appendPackage(packages, pkgKey!, BUCKET_NAME, denyList);
      } catch (e) {
        failures[pkgKey!] = e;
      }
      // If we're getting short on time (1 minute out of 15 left), we'll be continuing in a new
      // invocation after writing what we've done so far to S3...
      if (context.getRemainingTimeInMillis() <= 60_000) {
        nextStartAfter = pkgKey;
        break;
      }
    }
    for (const [key, error] of Object.entries(failures)) {
      console.log(`Failed processing ${key}: ${error}`);
    }

    await metricScope((metrics) => async () => {
      metrics.setDimensions();
      const failedCount = Object.keys(failures).length;
      console.log(`Marking ${failedCount} failed packages`);
      metrics.putMetric(MetricName.FAILED_PACKAGES_ON_RECREATION, failedCount, Unit.Count);
    })();
  }

  // Build the final data package...
  console.log('Consolidating catalog...');
  const catalog: CatalogModel = { packages: new Array<PackageInfo>(), updated: new Date().toISOString() };
  for (const majors of packages.values()) {
    for (const pkg of majors.values()) {
      catalog.packages.push(pkg);
    }
  }

  console.log(`There are now ${catalog.packages.length} registered package major versions`);
  await metricScope((metrics) => async () => {
    metrics.setDimensions();
    metrics.putMetric(MetricName.REGISTERED_PACKAGES_MAJOR_VERSION, catalog.packages.length, Unit.Count);
    metrics.putMetric(
      MetricName.MISSING_CONSTRUCT_FRAMEWORK_COUNT,
      catalog.packages.filter((pkg) => pkg.constructFramework == null).length,
      Unit.Count,
    );
    metrics.putMetric(
      MetricName.MISSING_CONSTRUCT_FRAMEWORK_VERSION_COUNT,
      catalog.packages.filter(
        (pkg) => pkg.constructFramework && pkg.constructFramework.majorVersion == null,
      ).length,
      Unit.Count,
    );
  })();


  // Clean up existing entries if necessary. In particular, remove the license texts as they make
  // the catalog unnecessarily large, and may hinder some search queries' result quality.
  for (const entry of catalog.packages) {
    if (entry.metadata) {
      delete (entry.metadata as any).licenseText;
    }
  }

  // Upload the result to S3 and exit.
  const result = await aws.s3().putObject({
    Bucket: BUCKET_NAME,
    Key: constants.CATALOG_KEY,
    Body: JSON.stringify(catalog, null, 2),
    ContentType: 'application/json',
    CacheControl: CacheStrategy.default().toString(),
    Metadata: {
      'Lambda-Log-Group': context.logGroupName,
      'Lambda-Log-Stream': context.logStreamName,
      'Lambda-Run-Id': context.awsRequestId,
      'Package-Count': `${catalog.packages.length}`,
    },
  }).promise();

  if (nextStartAfter != null) {
    console.log(`Will continue from ${nextStartAfter} in new invocation...`);
    const nextEvent: CatalogBuilderInput = { ...event, startAfter: nextStartAfter };
    // We start it asynchronously, as this function has a provisionned
    // concurrency of 1 (so a synchronous attempt would always be throttled).
    await aws.lambda().invokeAsync({
      FunctionName: context.functionName,
      InvokeArgs: JSON.stringify(nextEvent, null, 2),
    }).promise();
  }

  return result;
}