in src/backend/catalog-builder/catalog-builder.lambda.ts [29:170]
export async function handler(event: CatalogBuilderInput, context: Context) {
console.log(JSON.stringify(event, null, 2));
const BUCKET_NAME = requireEnv('BUCKET_NAME');
const packages = new Map<string, Map<number, PackageInfo>>();
const denyList = await DenyListClient.newClient();
console.log('Loading existing catalog (if present)...');
const data = await aws.s3().getObject({ Bucket: BUCKET_NAME, Key: constants.CATALOG_KEY }).promise()
.catch((err: AWSError) => err.code !== 'NoSuchKey'
? Promise.reject(err)
: Promise.resolve({ /* no data */ } as S3.GetObjectOutput));
if (data.Body) {
console.log('Catalog found. Loading...');
const catalog: CatalogModel = JSON.parse(data.Body.toString('utf-8'));
for (const info of catalog.packages) {
const denyRule = denyList.lookup(info.name, info.version);
if (denyRule != null) {
console.log(`Dropping ${info.name}@${info.version} from catalog: ${denyRule.reason}`);
continue;
}
if (!packages.has(info.name)) {
packages.set(info.name, new Map());
}
packages.get(info.name)!.set(info.major, info);
}
}
// If defined, the function will invoke itself again to resume the work from that key (this
// happens only in "from scratch" or "rebuild" cases).
let nextStartAfter: string | undefined;
if (event.package) {
if (!event.package.key.endsWith(constants.PACKAGE_KEY_SUFFIX)) {
throw new Error(`The provided package key is invalid: ${event.package.key} does not end in ${constants.PACKAGE_KEY_SUFFIX}`);
}
console.log('Registering new packages...');
// note that we intentionally don't catch errors here to let these
// event go to the DLQ for manual inspection.
await appendPackage(packages, event.package.key, BUCKET_NAME, denyList);
}
// If we don't have a package in event, then we're refreshing the catalog. This is also true if we
// don't have a catalog body (from scratch) or if "startAfter" is set (continuation of from
// scratch).
if (!event?.package || !data.Body || event.startAfter) {
console.log('Recreating or refreshing catalog...');
const failures: any = {};
for await (const { Key: pkgKey } of relevantObjects(BUCKET_NAME, event.startAfter)) {
try {
await appendPackage(packages, pkgKey!, BUCKET_NAME, denyList);
} catch (e) {
failures[pkgKey!] = e;
}
// If we're getting short on time (1 minute out of 15 left), we'll be continuing in a new
// invocation after writing what we've done so far to S3...
if (context.getRemainingTimeInMillis() <= 60_000) {
nextStartAfter = pkgKey;
break;
}
}
for (const [key, error] of Object.entries(failures)) {
console.log(`Failed processing ${key}: ${error}`);
}
await metricScope((metrics) => async () => {
metrics.setDimensions();
const failedCount = Object.keys(failures).length;
console.log(`Marking ${failedCount} failed packages`);
metrics.putMetric(MetricName.FAILED_PACKAGES_ON_RECREATION, failedCount, Unit.Count);
})();
}
// Build the final data package...
console.log('Consolidating catalog...');
const catalog: CatalogModel = { packages: new Array<PackageInfo>(), updated: new Date().toISOString() };
for (const majors of packages.values()) {
for (const pkg of majors.values()) {
catalog.packages.push(pkg);
}
}
console.log(`There are now ${catalog.packages.length} registered package major versions`);
await metricScope((metrics) => async () => {
metrics.setDimensions();
metrics.putMetric(MetricName.REGISTERED_PACKAGES_MAJOR_VERSION, catalog.packages.length, Unit.Count);
metrics.putMetric(
MetricName.MISSING_CONSTRUCT_FRAMEWORK_COUNT,
catalog.packages.filter((pkg) => pkg.constructFramework == null).length,
Unit.Count,
);
metrics.putMetric(
MetricName.MISSING_CONSTRUCT_FRAMEWORK_VERSION_COUNT,
catalog.packages.filter(
(pkg) => pkg.constructFramework && pkg.constructFramework.majorVersion == null,
).length,
Unit.Count,
);
})();
// Clean up existing entries if necessary. In particular, remove the license texts as they make
// the catalog unnecessarily large, and may hinder some search queries' result quality.
for (const entry of catalog.packages) {
if (entry.metadata) {
delete (entry.metadata as any).licenseText;
}
}
// Upload the result to S3 and exit.
const result = await aws.s3().putObject({
Bucket: BUCKET_NAME,
Key: constants.CATALOG_KEY,
Body: JSON.stringify(catalog, null, 2),
ContentType: 'application/json',
CacheControl: CacheStrategy.default().toString(),
Metadata: {
'Lambda-Log-Group': context.logGroupName,
'Lambda-Log-Stream': context.logStreamName,
'Lambda-Run-Id': context.awsRequestId,
'Package-Count': `${catalog.packages.length}`,
},
}).promise();
if (nextStartAfter != null) {
console.log(`Will continue from ${nextStartAfter} in new invocation...`);
const nextEvent: CatalogBuilderInput = { ...event, startAfter: nextStartAfter };
// We start it asynchronously, as this function has a provisionned
// concurrency of 1 (so a synchronous attempt would always be throttled).
await aws.lambda().invokeAsync({
FunctionName: context.functionName,
InvokeArgs: JSON.stringify(nextEvent, null, 2),
}).promise();
}
return result;
}