private async processWorkspaceFolders()

in server/aws-lsp-codewhisperer/src/shared/localProjectContextController.ts [376:485]


    private async processWorkspaceFolders(
        workspaceFolders?: WorkspaceFolder[] | null,
        ignoreFilePatterns?: string[],
        respectUserGitIgnores?: boolean,
        includeSymLinks?: boolean,
        fileExtensions?: string[],
        maxFileSizeMB?: number,
        maxIndexSizeMB?: number
    ): Promise<string[]> {
        if (!workspaceFolders?.length) {
            this.log.info(`Skipping indexing: no workspace folders available`)
            return []
        }

        this.log.info(`Indexing ${workspaceFolders.length} workspace folders...`)

        const filter = ignore().add(ignoreFilePatterns ?? [])

        maxFileSizeMB = Math.min(maxFileSizeMB ?? Infinity, this.DEFAULT_MAX_FILE_SIZE_MB)
        maxIndexSizeMB = Math.min(maxIndexSizeMB ?? Infinity, this.DEFAULT_MAX_INDEX_SIZE_MB)

        const sizeConstraints: SizeConstraints = {
            maxFileSize: maxFileSizeMB * this.MB_TO_BYTES,
            remainingIndexSize: maxIndexSizeMB * this.MB_TO_BYTES,
        }

        const controller = new AbortController()

        const workspaceSourceFiles = await Promise.all(
            workspaceFolders.map(async (folder: WorkspaceFolder) => {
                const absolutePath = path.resolve(URI.parse(folder.uri).fsPath)
                const localGitIgnoreFiles: string[] = []

                const crawler = new fdir()
                    .withSymlinks({ resolvePaths: !includeSymLinks })
                    .withAbortSignal(controller.signal)
                    .exclude((dirName: string, dirPath: string) => {
                        const relativePath = path.relative(absolutePath, dirPath)
                        return relativePath.startsWith('..') || filter.ignores(relativePath)
                    })
                    .glob(...(fileExtensions?.map(ext => `**/*${ext}`) ?? []), '**/.gitignore')
                    .filter((filePath: string, isDirectory: boolean) => {
                        const relativePath = path.relative(absolutePath, filePath)

                        if (isDirectory || relativePath.startsWith('..') || filter.ignores(relativePath)) {
                            return false
                        }

                        if (!respectUserGitIgnores && sizeConstraints.remainingIndexSize <= 0) {
                            controller.abort()
                            return false
                        }

                        if (path.basename(filePath) === '.gitignore') {
                            localGitIgnoreFiles.push(filePath)
                            return false
                        }

                        return respectUserGitIgnores || this.fileMeetsFileSizeConstraints(filePath, sizeConstraints)
                    })

                return crawler
                    .crawl(absolutePath)
                    .withPromise()
                    .then(async (sourceFiles: string[]) => {
                        if (!respectUserGitIgnores) {
                            return sourceFiles
                        }

                        const userGitIgnoreFilterByFile = new Map(
                            await Promise.all(
                                localGitIgnoreFiles.map(async filePath => {
                                    const filter = ignore()
                                    try {
                                        filter.add((await fs.promises.readFile(filePath)).toString())
                                    } catch (error) {
                                        this.log.error(`Error reading .gitignore file ${filePath}: ${error}`)
                                    }
                                    return [filePath, filter] as const
                                })
                            )
                        )

                        return sourceFiles.reduce((filteredSourceFiles, filePath) => {
                            if (sizeConstraints.remainingIndexSize <= 0) {
                                return filteredSourceFiles
                            }

                            const isIgnored = [...userGitIgnoreFilterByFile].some(
                                ([gitIgnorePath, filter]: [string, any]) => {
                                    const gitIgnoreDir = path.dirname(path.resolve(gitIgnorePath))
                                    const relativePath = path.relative(gitIgnoreDir, filePath)

                                    return !relativePath.startsWith('..') && filter.ignores(relativePath)
                                }
                            )

                            if (!isIgnored && this.fileMeetsFileSizeConstraints(filePath, sizeConstraints)) {
                                filteredSourceFiles.push(filePath)
                            }

                            return filteredSourceFiles
                        }, [] as string[])
                    })
            })
        ).then((nestedFilePaths: string[][]) => nestedFilePaths.flat())

        this.log.info(`Indexing complete: found ${workspaceSourceFiles.length} files.`)
        return workspaceSourceFiles
    }