async function parseSnippetDefinitions()

in common/tools/dev-tool/src/commands/run/update-snippets.ts [182:490]


async function parseSnippetDefinitions(
  project: ProjectInfo,
): Promise<Map<string, SnippetDefinition>> {
  const results = new Map<string, SnippetDefinition>();

  const snippetFile = path.join(project.path, ...SNIPPET_PATH);

  const relativeIndexPath = path.relative(
    path.dirname(snippetFile),
    path.join(project.path, "src"),
  );

  const program = ts.createProgram({
    rootNames: [snippetFile],
    options: {},
  });

  const sourceFile = program.getSourceFile(snippetFile)!;
  const checker = program.getTypeChecker();

  const printer = ts.createPrinter({
    newLine: EOL === "\r\n" ? ts.NewLineKind.CarriageReturnLineFeed : ts.NewLineKind.LineFeed,
    removeComments: false,
    noEmitHelpers: true,
  });

  const visitSnippetDefinition: ts.Visitor = (node: ts.Node) => {
    let expr: ts.Expression;

    // We accept any test definition that calls the exact symbol 'it' with a
    // string literal and a function expression where the body of the function
    // is a block. We don't care whether the function is named or async or what
    // its type annotations are. Those may be used freely to ensure correctness.
    //
    // Snippet ::= 'it' ( $name:litstr , BlockFn )
    // BlockFn ::=
    //   | 'async'? ( $_:params ) => { $body:statements }
    //   | 'async'? function $_:ident? ( ) { $body:statements }
    if (
      node &&
      ts.isCallExpression(node) &&
      ts.isIdentifier((expr = node.expression)) &&
      (expr as ts.Identifier).escapedText === "it" &&
      ts.isStringLiteral(node.arguments[0]) &&
      (ts.isFunctionExpression(node.arguments[1]) ||
        (ts.isArrowFunction(node.arguments[1]) && ts.isBlock(node.arguments[1].body)))
    ) {
      const name = node.arguments[0] as ts.StringLiteral;
      const body = (node.arguments[1] as ts.ArrowFunction | ts.FunctionExpression).body as ts.Block;

      // Print the statements out as they are. We're going to recompile them later.
      const contents = printer.printList(
        ts.ListFormat.MultiLineBlockStatements,
        body.statements,
        sourceFile,
      );

      const imports: { name: string; moduleSpecifier: string; isDefault: boolean }[] = [];

      // This nested visitor is just for extracting the imports of a symbol.
      const symbolImportVisitor: ts.Visitor = (node: ts.Node) => {
        if (ts.isIdentifier(node)) {
          const importLocations = extractImportLocations(node);
          if (importLocations.length > 1) {
            // We can probably handle this, but it's an obscure case and it's probably better to let it error out and
            // then observe whether or not we actually need (or even _want_) snippets with merged imports.
            throw new Error(
              `unrecoverable error: the type definition of '${node.text}' in the snippet file is merged between multiple imports, so we cannot extract it`,
            );
          } else if (importLocations.length === 1) {
            // The symbol was imported, so we need to track the imports to add them to the snippet later.
            log.debug(`symbol ${node.text} was imported from ${importLocations[0]}`);
            imports.push({
              name: node.text,
              ...importLocations[0],
            });
          }
          // else the symbol was not imported within this file, so it must be defined in the ambient context of the
          // module, so we don't need to generate any code for it.
        }

        ts.forEachChild(node, symbolImportVisitor);

        return undefined;
      };

      ts.visitNodes(body.statements, symbolImportVisitor);

      // We've found a snippet. No need to recur any farther. We'll take the body of this snippet and transpile it as a
      // file using `convert`.
      log.debug(`found a snippet named ${name.text}: \n${contents}`);

      interface ImportedSymbols {
        default?: string;
        named?: Set<string>;
      }

      // We have a loose map of imports in the form { [k:symbol]: module } and we need to anneal it into a map
      // { [k: module]: symbol[] } (one import statement per module with the whole list of symbols imported from it)
      const importMap = new Map<string, ImportedSymbols>();

      for (const { name, moduleSpecifier, isDefault } of imports) {
        let moduleImports = importMap.get(moduleSpecifier);
        if (!moduleImports) {
          moduleImports = {};
          importMap.set(moduleSpecifier, moduleImports);
        }
        if (isDefault) {
          if (moduleImports.default) {
            throw new Error(
              `unrecoverable error: multiple default imports from the same module '${moduleSpecifier}'`,
            );
          }
          moduleImports.default = name;
        } else {
          if (!moduleImports.named) {
            moduleImports.named = new Set();
          }
          moduleImports.named.add(name);
        }
      }

      // Form import declarations and prepend them to the rest of the contents.
      const fullSnippetTypeScriptText = (
        [...importMap.entries()]
          .map(([module, imports]) => {
            const importParts = [];
            if (imports.default) {
              importParts.push(imports.default);
            }
            if (imports.named) {
              importParts.push(`{ ${[...imports.named].join(", ")} }`);
            }

            if (importParts.length === 0) {
              throw new Error(
                `unrecoverable error: no imports were generated for the snippet '${name.text}'`,
              );
            }

            return `import ${importParts.join(", ")} from "${module}";`;
          })
          .join(EOL) +
        EOL +
        EOL +
        contents
      )
        .replace(
          // Need to get rid of any ts-ignores that were added because of unused symbols
          TS_IGNORE,
          UNIX_EOL,
        )
        .replace(
          // Need to get rid of any ts-ignores that were added because of unused symbols
          TS_PRESERVE_WHITESPACE,
          UNIX_EOL + UNIX_EOL,
        )
        .trim();

      // Run the same syntax validation pass that we run on samples when we convert to JS. This will prevent you from
      // using any syntax that isn't supported by our min node in snippets!
      const checkSyntax: ts.TransformerFactory<ts.SourceFile> = (context) => (sourceFile) => {
        const emitError = createDiagnosticEmitter(sourceFile);

        const visitor: ts.Visitor = (node) => {
          const syntaxError = testSyntax(node);

          if (syntaxError) {
            emitError(syntaxError.message, node, syntaxError.suggest);
          }

          return ts.visitEachChild(node, visitor, context);
        };

        ts.visitNode(sourceFile, visitor);

        return sourceFile;
      };

      // TODO: how can we run this on the TS source without emitting to JS?
      // We'll also simplify the snippets a bit by getting rid of any expressions of the form
      //
      // EnvLookup ::= 'process' . 'env' . $_:ident ElseOp $e:expr
      // ElseOp ::= '??' | '||'
      //
      // We'll just replace them with the expr $e, simplifying the snippets a bit.
      const replaceEnvLookup: ts.TransformerFactory<ts.SourceFile> = (context) => (sourceFile) => {
        const visitor: ts.Visitor = (node) => {
          if (
            // Nullish coalesce is simply defined as a BinaryExpression where the operatorToken is a '??'.
            ts.isNullishCoalesce(node) ||
            (ts.isBinaryExpression(node) && node.operatorToken.kind === ts.SyntaxKind.BarBarToken)
          ) {
            const left = (node as ts.BinaryExpression).left;

            if (
              ts.isPropertyAccessExpression(left) &&
              // Won't bother checking the AST any further than this. It's sufficient I think to just see if the text of
              // the expression is something of the form `'process' '.' 'env'`
              /^\s*process\s*\.\s*env\s*/.test(left.expression.getText(sourceFile))
            ) {
              return (node as ts.BinaryExpression).right;
            }
          }
          return ts.visitEachChild(node, visitor, context);
        };

        ts.visitNode(sourceFile, visitor);

        return sourceFile;
      };

      results.set(name.text, {
        name: name.text,
        typescriptSourceText: format(fullSnippetTypeScriptText, "typescript").then((res) =>
          res.split(/\r?\n/),
        ),
        async convert() {
          const res = await convert(fullSnippetTypeScriptText, {
            transformers: {
              before: [replaceEnvLookup, checkSyntax],
              after: [],
            },
          });
          return res.trim().split(/\r?\n/);
        },
      });
    }

    ts.forEachChild(node, visitSnippetDefinition);

    return undefined;
  };

  visitSnippetDefinition(sourceFile);

  return results;

  /**
   * A helper function to extract imported symbols from TypeScript nodes.
   *
   * If the node has a symbol (for example, an identifier), then the symbol is resolved. Symbol declarations may be
   * merged, so we extract all declarations that come from import clauses _in the same file_ and extract the module
   * specifier from their parent. The symbol is considered "defined" by a combined import from those locations.
   *
   * @param node - the node to check for imports
   * @returns a list of module specifiers that form the definition of the node's symbol, or undefined
   */
  function extractImportLocations(node: ts.Node): {
    isDefault: boolean;
    moduleSpecifier: string;
  }[] {
    const sym = checker.getSymbolAtLocation(node);

    // Get all the decls that are in source files and where the decl comes from an import clause.
    const nonDefaultExports = sym?.declarations
      ?.filter(
        (decl) =>
          decl.getSourceFile() === sourceFile &&
          decl.parent?.parent &&
          ts.isImportClause(decl.parent.parent),
      )
      .map(
        // It is a grammar error for moduleSpecifier to be anything other than a string literal. In future versions of
        // ES, that might become untrue, but it seems unlikely.
        (decl) => {
          const moduleSpecifierText = (
            (decl.parent.parent as ts.ImportClause).parent.moduleSpecifier as ts.StringLiteral
          ).text;

          if (
            moduleSpecifierText === relativeIndexPath ||
            moduleSpecifierText === path.join(relativeIndexPath, "index.js") ||
            moduleSpecifierText === path.join(relativeIndexPath, "index")
          ) {
            return { moduleSpecifier: project.name, isDefault: false };
          } else {
            return { moduleSpecifier: moduleSpecifierText, isDefault: false };
          }
        },
      );

    const defaultExports = sym?.declarations
      ?.filter(
        (decl) =>
          decl.getSourceFile() === sourceFile &&
          ts.isImportClause(decl) &&
          ts.isImportDeclaration(decl.parent) &&
          decl.name,
      )
      .map((decl) => {
        const moduleSpecifierText = (
          (decl.parent as ts.ImportDeclaration).moduleSpecifier as ts.StringLiteral
        ).text;

        if (
          moduleSpecifierText === relativeIndexPath ||
          moduleSpecifierText === path.join(relativeIndexPath, "index.js") ||
          moduleSpecifierText === path.join(relativeIndexPath, "index")
        ) {
          return { moduleSpecifier: project.name, isDefault: true };
        } else {
          return { moduleSpecifier: moduleSpecifierText, isDefault: true };
        }
      });

    return [...(nonDefaultExports ?? []), ...(defaultExports ?? [])];
  }
}