codex-cli/src/utils/singlepass/context_files.ts (302 lines of code) (raw):

/* eslint-disable no-await-in-loop */ import * as fsSync from "fs"; import fs from "fs/promises"; import path from "path"; /** Represents file contents with absolute path. */ export interface FileContent { path: string; content: string; } /** A simple LRU cache entry structure. */ interface CacheEntry { /** Last modification time of the file (epoch ms). */ mtime: number; /** Size of the file in bytes. */ size: number; /** Entire file content. */ content: string; } /** * A minimal LRU-based file cache to store file contents keyed by absolute path. * We store (mtime, size, content). If a file's mtime or size changes, we consider * the cache invalid and re-read. */ class LRUFileCache { private maxSize: number; private cache: Map<string, CacheEntry>; constructor(maxSize: number) { this.maxSize = maxSize; this.cache = new Map(); } /** * Retrieves the cached entry for the given path, if it exists. * If found, we re-insert it in the map to mark it as recently used. */ get(key: string): CacheEntry | undefined { const entry = this.cache.get(key); if (entry) { // Re-insert to maintain recency this.cache.delete(key); this.cache.set(key, entry); } return entry; } /** * Insert or update an entry in the cache. */ set(key: string, entry: CacheEntry): void { // if key already in map, delete it so that insertion below sets recency. if (this.cache.has(key)) { this.cache.delete(key); } this.cache.set(key, entry); // If over capacity, evict the least recently used entry. if (this.cache.size > this.maxSize) { const firstKey = this.cache.keys().next(); if (!firstKey.done) { this.cache.delete(firstKey.value); } } } /** * Remove an entry from the cache. */ delete(key: string): void { this.cache.delete(key); } /** * Returns all keys in the cache (for pruning old files, etc.). */ keys(): IterableIterator<string> { return this.cache.keys(); } } // Environment-based defaults const MAX_CACHE_ENTRIES = parseInt( process.env["TENX_FILE_CACHE_MAX_ENTRIES"] || "1000", 10, ); // Global LRU file cache instance. const FILE_CONTENTS_CACHE = new LRUFileCache(MAX_CACHE_ENTRIES); // Default list of glob patterns to ignore if the user doesn't provide a custom ignore file. const DEFAULT_IGNORE_PATTERNS = ` # Binaries and large media *.woff *.exe *.dll *.bin *.dat *.pdf *.png *.jpg *.jpeg *.gif *.bmp *.tiff *.ico *.zip *.tar *.gz *.rar *.7z *.mp3 *.mp4 *.avi *.mov *.wmv # Build and distribution build/* dist/* # Logs and temporary files *.log *.tmp *.swp *.swo *.bak *.old # Python artifacts *.egg-info/* __pycache__/* *.pyc *.pyo *.pyd .pytest_cache/* .ruff_cache/* venv/* .venv/* env/* # Rust artifacts target/* Cargo.lock # Node.js artifacts *.tsbuildinfo node_modules/* package-lock.json # Environment files .env/* # Git .git/* # OS specific files .DS_Store Thumbs.db # Hidden files .*/* .* `; function _read_default_patterns_file(filePath?: string): string { if (!filePath) { return DEFAULT_IGNORE_PATTERNS; } return fsSync.readFileSync(filePath, "utf-8"); } /** Loads ignore patterns from a file (or a default list) and returns a list of RegExp patterns. */ export function loadIgnorePatterns(filePath?: string): Array<RegExp> { try { const raw = _read_default_patterns_file(filePath); const lines = raw.split(/\r?\n/); const cleaned = lines .map((l: string) => l.trim()) .filter((l: string) => l && !l.startsWith("#")); // Convert each pattern to a RegExp with a leading '*/'. const regs = cleaned.map((pattern: string) => { const escaped = pattern .replace(/[.+^${}()|[\]\\]/g, "\\$&") .replace(/\*/g, ".*") .replace(/\?/g, "."); const finalRe = `^(?:(?:(?:.*/)?)(?:${escaped}))$`; return new RegExp(finalRe, "i"); }); return regs; } catch { return []; } } /** Checks if a given path is ignored by any of the compiled patterns. */ export function shouldIgnorePath( p: string, compiledPatterns: Array<RegExp>, ): boolean { const normalized = path.resolve(p); for (const regex of compiledPatterns) { if (regex.test(normalized)) { return true; } } return false; } /** * Recursively builds an ASCII representation of a directory structure, given a list * of file paths. */ export function makeAsciiDirectoryStructure( rootPath: string, filePaths: Array<string>, ): string { const root = path.resolve(rootPath); // We'll store a nested object. Directories => sub-tree or null if it's a file. interface DirTree { [key: string]: DirTree | null; } const tree: DirTree = {}; for (const file of filePaths) { const resolved = path.resolve(file); let relPath: string; try { const rp = path.relative(root, resolved); // If it's outside of root, skip. if (rp.startsWith("..")) { continue; } relPath = rp; } catch { continue; } const parts = relPath.split(path.sep); let current: DirTree = tree; for (let i = 0; i < parts.length; i++) { const part = parts[i]; if (!part) { continue; } if (i === parts.length - 1) { // file current[part] = null; } else { if (!current[part]) { current[part] = {}; } current = current[part] as DirTree; } } } const lines: Array<string> = [root]; function recurse(node: DirTree, prefix: string): void { const entries = Object.keys(node).sort((a, b) => { // Directories first, then files const aIsDir = node[a] != null; const bIsDir = node[b] != null; if (aIsDir && !bIsDir) { return -1; } if (!aIsDir && bIsDir) { return 1; } return a.localeCompare(b); }); for (let i = 0; i < entries.length; i++) { const entry = entries[i]; if (!entry) { continue; } const isLast = i === entries.length - 1; const connector = isLast ? "└──" : "├──"; const isDir = node[entry] != null; lines.push(`${prefix}${connector} ${entry}`); if (isDir) { const newPrefix = prefix + (isLast ? " " : "│ "); recurse(node[entry] as DirTree, newPrefix); } } } recurse(tree, ""); return lines.join("\n"); } /** * Recursively collects all files under rootPath that are not ignored, skipping symlinks. * Then for each file, we check if it's in the LRU cache. If not or changed, we read it. * Returns an array of FileContent. * * After collecting, we remove from the cache any file that no longer exists in the BFS. */ export async function getFileContents( rootPath: string, compiledPatterns: Array<RegExp>, ): Promise<Array<FileContent>> { const root = path.resolve(rootPath); const candidateFiles: Array<string> = []; // BFS queue of directories const queue: Array<string> = [root]; while (queue.length > 0) { const currentDir = queue.pop()!; let dirents: Array<fsSync.Dirent> = []; try { dirents = await fs.readdir(currentDir, { withFileTypes: true }); } catch { continue; } for (const dirent of dirents) { try { const resolved = path.resolve(currentDir, dirent.name); // skip symlinks const lstat = await fs.lstat(resolved); if (lstat.isSymbolicLink()) { continue; } if (dirent.isDirectory()) { // check if ignored if (!shouldIgnorePath(resolved, compiledPatterns)) { queue.push(resolved); } } else if (dirent.isFile()) { // check if ignored if (!shouldIgnorePath(resolved, compiledPatterns)) { candidateFiles.push(resolved); } } } catch { // skip } } } // We'll read the stat for each candidate file, see if we can skip reading from cache. const results: Array<FileContent> = []; // We'll keep track of which files we actually see. const seenPaths = new Set<string>(); await Promise.all( candidateFiles.map(async (filePath) => { seenPaths.add(filePath); let st: fsSync.Stats | null = null; try { st = await fs.stat(filePath); } catch { return; } if (!st) { return; } const cEntry = FILE_CONTENTS_CACHE.get(filePath); if ( cEntry && Math.abs(cEntry.mtime - st.mtime.getTime()) < 1 && cEntry.size === st.size ) { // same mtime, same size => use cache results.push({ path: filePath, content: cEntry.content }); } else { // read file try { const buf = await fs.readFile(filePath); const content = buf.toString("utf-8"); // store in cache FILE_CONTENTS_CACHE.set(filePath, { mtime: st.mtime.getTime(), size: st.size, content, }); results.push({ path: filePath, content }); } catch { // skip } } }), ); // Now remove from cache any file that wasn't encountered. const currentKeys = [...FILE_CONTENTS_CACHE.keys()]; for (const key of currentKeys) { if (!seenPaths.has(key)) { FILE_CONTENTS_CACHE.delete(key); } } // sort results by path results.sort((a, b) => a.path.localeCompare(b.path)); return results; }