tools/@aws-cdk/lazify/lib/index.ts (198 lines of code) (raw):

/** * Transform a set of .js files, making all module imports lazy * * That is: * * - Find all top-level require() assignments, and replace them with a function that performs * the require(). That way, the require() is only done if any of the objects from its scope * are actually used. * - Find all (transpiled) `export * from 'xyz';` statements (by searching for an invocation * of `__exportStar()`): load the actual module, enumerate the entries, and create a getter * for each entry. */ import { promises as fs } from 'fs'; import * as path from 'path'; import * as ts from 'typescript'; type LogFn = (...x: string[]) => void; export async function transformFile(filename: string, verbose: boolean) { progress(filename, '... '); const contents = await fs.readFile(filename, { encoding: 'utf-8' }); const transformed = transformFileContents(filename, contents, progress); await fs.writeFile(filename, transformed, { encoding: 'utf-8' }); progress(' Done!\n'); function progress(...x: string[]) { if (verbose) { process.stderr.write(x.join(' ')); } } } export function transformFileContents(filename: string, contents: string, progress?: LogFn) { const sourceFile = ts.createSourceFile( filename, contents, ts.ScriptTarget.Latest, true // setParentNodes, need this for tree analysis ); // Find all top-level requires and turn them into a function const topLevelAssignments = sourceFile.statements .filter(ts.isVariableStatement) .filter((stmt) => stmt.declarationList.declarations.length === 1) .map((stmt) => [stmt, stmt.declarationList.declarations[0]] as const); progress?.(`${topLevelAssignments.length} declarations`, '... '); const topLevelRequires = topLevelAssignments .flatMap(([stmt, a]) => a.initializer && ts.isCallExpression(a.initializer) && ts.isIdentifier(a.initializer.expression) && a.initializer.expression.text === 'require' && ts.isStringLiteral(a.initializer.arguments[0]) && ts.isIdentifier(a.name) ? [[stmt, a.name, a.initializer.arguments[0].text] as const] : []); progress?.(`${topLevelRequires.length} requires`, '... '); let file = sourceFile; for (const [stmt, binding, moduleName] of topLevelRequires) { const result = ts.transform(file, [(ctx: ts.TransformationContext): ts.Transformer<ts.SourceFile> => { const factory = ctx.factory; const gen = new ExpressionGenerator(factory); const visit: ts.Visitor = node => { // If this is the statement, replace it with a function definition // We replace it with a function that will replace itself after the first invocation. // This is memoizing on steroids. Instead of: // // function mod() { return require('mod'); } // // We do: // // let mod = () => { const tmp = require('mod'); mod = () => tmp; return tmp; } // // This is about 100x faster at call time (~20ns per call instead of ~2us). if (node === stmt) { return createVariable(factory, binding, factory.createArrowFunction(undefined, undefined, [], undefined, undefined, factory.createBlock([ // tmp = require(...) createVariable(factory, 'tmp', factory.createCallExpression(factory.createIdentifier('require'), [], [factory.createStringLiteral(moduleName)])), // <this_fn> = () => tmp gen.assignmentStatement(binding.text, factory.createArrowFunction(undefined, undefined, [], undefined, undefined, factory.createIdentifier('tmp'))), // return tmp factory.createReturnStatement(factory.createIdentifier('tmp')), ]), ), ); } // If this is a shorthand property assignment and we we are the identifier in it, split it into two if (ts.isShorthandPropertyAssignment(node) && ts.isIdentifier(node.name) && node.name.text === binding.text) { return factory.createPropertyAssignment(node.name.text, factory.createCallExpression(factory.createIdentifier(binding.text), [], [])); } // If this was an identifier referencing the original required module, turn it into a function call if (ts.isIdentifier(node) && node.text === binding.text) { // Ignore this identifier if it is not in RHS position const ignore = node.parent && ( (ts.isPropertyAssignment(node.parent) && node.parent.name === node) // { ident: value } || (ts.isPropertyAccessExpression(node.parent) && node.parent.name === node) // obj.ident = 3; || ts.isMethodDeclaration(node.parent) // public ident() { ... } || ts.isMethodSignature(node.parent) // interface X { ident(); } || ts.isPropertyDeclaration(node.parent) // class X { ident: string } || ts.isPropertySignature(node.parent) // interface X { ident: string } || ts.isGetAccessor(node.parent) // class X { get ident() { ... } } || ts.isGetAccessorDeclaration(node.parent) // interface X { get ident: string } || ts.isSetAccessor(node.parent) // class X { set ident() { ... } } || ts.isSetAccessorDeclaration(node.parent) // interface X { set ident: string } ); // Another concern is shadowing: we're not checking for that right now because // I don't know how to and in our code base it won't pose a problem, as we have // linter rules that forbid identifier shadowing (this is an // assumption that makes this tool non-portable for now). // More places are also not RHS but if we leave those, it'll blow up syntactically and that's good if (!ignore) { return factory.createCallExpression(factory.createIdentifier(binding.text), [], []); } } return ts.visitEachChild(node, child => visit(child), ctx); }; return (sf: ts.SourceFile) => ts.visitNode(sf, visit, ts.isSourceFile) ?? sf; }]); file = result.transformed[0]; progress?.('X'); } // Replace __exportStar file = ts.transform(file, [(ctx: ts.TransformationContext): ts.Transformer<ts.SourceFile> => { const factory = ctx.factory; const gen = new ExpressionGenerator(factory); const visit: ts.Visitor = node => { if (node.parent && ts.isSourceFile(node.parent) && ts.isExpressionStatement(node) && ts.isCallExpression(node.expression) && ts.isIdentifier(node.expression.expression) && node.expression.expression.text === '__exportStar' && node.expression.arguments.length === 2 && ts.isCallExpression(node.expression.arguments[0]) && ts.isIdentifier(node.expression.arguments[0].expression) && node.expression.arguments[0].expression.text === 'require' && ts.isStringLiteral(node.expression.arguments[0].arguments[0])) { // __exportStar(require('something'), exports); const requiredModule = node.expression.arguments[0].arguments[0].text; const file = require.resolve(requiredModule, { paths: [path.dirname(filename)] }); // FIXME: Should probably do this in a subprocess const module = require(file); const entries = Object.keys(module); return entries.flatMap((entry) => gen.moduleGetterOnce(entry, requiredModule, (mod) => factory.createPropertyAccessExpression(mod, entry)) ); } if (node.parent && ts.isSourceFile(node.parent) && ts.isExpressionStatement(node) && ts.isBinaryExpression(node.expression) && node.expression.operatorToken.kind === ts.SyntaxKind.EqualsToken && ts.isPropertyAccessExpression(node.expression.left) && ts.isIdentifier(node.expression.left.expression) && node.expression.left.expression.text === 'exports' && ts.isCallExpression(node.expression.right) && ts.isIdentifier(node.expression.right.expression) && node.expression.right.expression.text === 'require' && ts.isStringLiteral(node.expression.right.arguments[0])) { // exports.module = require('./module'); const exportName = node.expression.left.name.text; const moduleName = node.expression.right.arguments[0].text; return gen.moduleGetterOnce(exportName, moduleName, (x) => x); } return ts.visitEachChild(node, child => visit(child), ctx); }; return (sf: ts.SourceFile) => ts.visitNode(sf, visit, ts.isSourceFile) ?? sf; }]).transformed[0]; // To print the AST, we'll use TypeScript's printer const printer = ts.createPrinter({ newLine: ts.NewLineKind.LineFeed }); return printer.printFile(file); } function createVariable(factory: ts.NodeFactory, name: string | ts.BindingName, expression: ts.Expression) { return factory.createVariableStatement([], factory.createVariableDeclarationList([ factory.createVariableDeclaration(name, undefined, undefined, expression), ])); } class ExpressionGenerator { private alreadyEmittedExports = new Set<string>(); private emittedNoFold = false; constructor(private readonly factory: ts.NodeFactory) { } /** * Create an lazy getter for a particular value at the module level * * Since Node statically analyzes CommonJS modules to determine its exports * (using the `cjs-module-lexer` module), we need to trick it into recognizing * these exports as legitimate. * * We do that by generating one form it will recognize that doesn't do anything, * in combination with a form that actually works, that doesn't disqualify the * export name, and that doesn't get collapsed by esbuild. * * If we do: * * ``` * exports.myExport = void 0; * Object.defineProperty(exports, 'myExport', { ... }); * ``` * * Then the lexer detects conflicting definitions of `myExport`, one of which is * not supported, and it disqualifies the name for being exported. * * If we do: * * ``` * exports.myExport = void 0; * Object.defineProperty(exports', 'm' + 'yExport', { ... }); * ``` * * Then the code passes the lexer: it detects `myExport` as an export, and it * doesn't detect the disqualifying export. * * However, that last syntax is detected and constant-folded by `esbuild` (which * we run to minify all files)! So esbuild turns `'m' + 'yExport'` back into * `'myExport'`, and then the lexer detects it again as a disqualifying export! * * So we need to find an expression that won't be constant-folded by esbuild, and * won't be detected by the lexer. * * This is what we'll be generating: * * ``` * let _noFold; * exports.myExport = void 0; * Object.defineProperty(exports', _noFold = 'myExport', { ... }); * ``` * * This takes advantage of the fact that the return value of an `<x> = <y>` expression * returns `<y>`, but has a side effect so cannot be safely optimized away. */ public moduleGetter( exportName: string, moduleName: string, moduleFormatter: (x: ts.Expression) => ts.Expression, ) { const factory = this.factory; const ret = []; if (!this.emittedNoFold) { ret.push( factory.createVariableStatement([], factory.createVariableDeclarationList([ factory.createVariableDeclaration('_noFold'), ]))); this.emittedNoFold = true; } ret.push( // exports.<name> = void 0; factory.createExpressionStatement(factory.createBinaryExpression( factory.createPropertyAccessExpression( factory.createIdentifier('exports'), factory.createIdentifier(exportName)), ts.SyntaxKind.EqualsToken, factory.createVoidZero())), // Object.defineProperty(exports, _noFold = "<name>", { get: () => ... }); factory.createExpressionStatement(factory.createCallExpression( factory.createPropertyAccessExpression(factory.createIdentifier('Object'), factory.createIdentifier('defineProperty')), undefined, [ factory.createIdentifier('exports'), this.assignment('_noFold', factory.createStringLiteral(exportName)), factory.createObjectLiteralExpression([ factory.createPropertyAssignment('enumerable', factory.createTrue()), factory.createPropertyAssignment('configurable', factory.createTrue()), factory.createPropertyAssignment('get', factory.createArrowFunction(undefined, undefined, [], undefined, undefined, moduleFormatter( factory.createCallExpression(factory.createIdentifier('require'), undefined, [factory.createStringLiteral(moduleName)])))), ]), ] ) )); return ret; } /** * Prevent emitting an export if it has already been emitted before * * This assumes that the symbols have the same definition, and are only duplicated because of * accidental multiple `export *`s. */ public moduleGetterOnce( exportName: string, moduleName: string, moduleFormatter: (x: ts.Expression) => ts.Expression, ): ReturnType<ExpressionGenerator['moduleGetter']> { if (this.alreadyEmittedExports.has(exportName)) { return []; } this.alreadyEmittedExports.add(exportName); return this.moduleGetter(exportName, moduleName, moduleFormatter); } public assignment(name: string, expression: ts.Expression) { return this.factory.createBinaryExpression( this.factory.createIdentifier(name), ts.SyntaxKind.EqualsToken, expression); } public assignmentStatement(name: string, expression: ts.Expression) { return this.factory.createExpressionStatement(this.assignment(name, expression)); } }