function getRelevantVersionInfos()

in src/package-sources/npmjs/npm-js-follower.lambda.ts [293:423]


function getRelevantVersionInfos(
  changes: readonly Change[],
  metrics: MetricsLogger,
  denyList: DenyListClient,
  licenseList: LicenseListClient,
  knownVersions: Map<string, Date>,
): readonly UpdatedVersion[] {

  const result = new Array<UpdatedVersion>();

  for (const change of changes) {
    // Filter out all elements that don't have a "name" in the document, as
    // these are schemas, which are not relevant to our business here.
    if (change.doc.name === undefined) {
      console.error(`[${change.seq}] Changed document contains no 'name': ${change.id}`);
      metrics.putMetric(MetricName.UNPROCESSABLE_ENTITY, 1, Unit.Count);
      continue;
    }

    // The normalize function change the object in place, if the doc object is invalid it will return undefined
    if (normalizeNPMMetadata(change.doc) === undefined) {
      console.error(`[${change.seq}] Changed document invalid, npm normalize returned undefined: ${change.id}`);
      metrics.putMetric(MetricName.UNPROCESSABLE_ENTITY, 1, Unit.Count);
      continue;
    }

    // Sometimes, there are no versions in the document. We skip those.
    if (change.doc.versions == null) {
      console.error(`[${change.seq}] Changed document contains no 'versions': ${change.id}`);
      metrics.putMetric(MetricName.UNPROCESSABLE_ENTITY, 1, Unit.Count);
      continue;
    }

    // Sometimes, there is no 'time' entry in the document. We skip those.
    if (change.doc.time == null) {
      console.error(`[${change.seq}] Changed document contains no 'time': ${change.id}`);
      metrics.putMetric(MetricName.UNPROCESSABLE_ENTITY, 1, Unit.Count);
      continue;
    }

    // Get the last modification date from the change
    const packageVersionUpdates = Object.entries(change.doc.time)
      // Ignore the "created" and "modified" keys here
      .filter(([key]) => key !== 'created' && key !== 'modified')
      // Parse all the dates to ensure they are comparable
      .map(([version, isoDate]) => [version, new Date(isoDate)] as const);
    metrics.putMetric(MetricName.PACKAGE_VERSION_COUNT, packageVersionUpdates.length, Unit.Count);

    for (const [version, modified] of packageVersionUpdates) {
      const knownKey = `${change.doc.name}@${version}`;
      const known = knownVersions.get(knownKey);
      if (known == null || known < modified) {
        const infos = change.doc.versions[version];
        if (infos == null) {
          // Could be the version in question was un-published.
          console.log(`[${change.seq}] Could not find info for "${change.doc.name}@${version}". Was it un-published?`);
        } else if (isConstructLibrary(infos)) {

          // skip if this package is denied
          const denied = denyList.lookup(infos.name, infos.version);
          if (denied) {
            console.log(`[${change.seq}] Package denied: ${JSON.stringify(denied)}`);
            knownVersions.set(knownKey, modified);
            metrics.putMetric(MetricName.DENY_LISTED_COUNT, 1, Unit.Count);
            continue;
          }

          metrics.putMetric(MetricName.PACKAGE_VERSION_AGE, Date.now() - modified.getTime(), Unit.Milliseconds);
          const isEligible = licenseList.lookup(infos.license ?? 'UNLICENSED') != null;
          metrics.putMetric(MetricName.INELIGIBLE_LICENSE, isEligible ? 0 : 1, Unit.Count);
          if (isEligible) {
            result.push({ infos, modified, seq: change.seq });
          } else {
            console.log(`[${change.seq}] Package "${change.doc.name}@${version}" does not use allow-listed license: ${infos.license ?? 'UNLICENSED'}`);
            knownVersions.set(knownKey, modified);
          }
        }
        // Else this is not a construct library, so we'll just ignore it...
      }
    }
  }
  return result;

  /**
   * This determines whether a package is "interesting" to ConstructHub or not. This is related but
   * not necessarily identical to the logic in the ingestion process that annotates package metadata
   * with a construct framework name + version (those could ultimately be re-factored to share more
   * of the logic/heuristics, though).
   *
   * Concretely, it checks for a list of known "official" packages for various construct frameworks,
   * and packages that have a dependency on such a package. It also has a keywords allow-list as a
   * fall-back (the current dependency-based logic does not consider transitive dependencies and
   * might hence miss certain rare use-cases, which keywords would rescue).
   */
  function isConstructLibrary(infos: VersionInfo): boolean {
    if (infos.jsii == null) {
      return false;
    }
    // The "constructs" package is a sign of a constructs library
    return isConstructFrameworkPackage(infos.name)
      // Recursively apply on dependencies
      || Object.keys(infos.dependencies ?? {}).some(isConstructFrameworkPackage)
      || Object.keys(infos.devDependencies ?? {}).some(isConstructFrameworkPackage)
      || Object.keys(infos.peerDependencies ?? {}).some(isConstructFrameworkPackage)
      // Keyword-based fallback
      || infos.keywords?.some((kw) => CONSTRUCT_KEYWORDS.has(kw));
  }

  /**
   * Package is one of the known construct framework's first party packages:
   * - @aws-cdk/*
   * - @cdktf/*
   * - cdk8s or cdk8s-plus
   */
  function isConstructFrameworkPackage(name: string): boolean {
    // IMPORTANT NOTE: Prefix matching should only be used for @scope/ names.

    // The low-level constructs package
    return name === 'constructs'
      // AWS CDK Packages
      || name === 'aws-cdk-lib'
      || name === 'monocdk'
      || name.startsWith('@aws-cdk/')
      // CDK8s packages
      || name === 'cdk8s'
      || /^cdk8s-plus(?:-(?:17|20|21|22))?$/.test(name)
      // CDKTf packages
      || name === 'cdktf'
      || name.startsWith('@cdktf/');
  }
}