codex-cli/src/approvals.ts (440 lines of code) (raw):
import type { ParseEntry, ControlOperator } from "shell-quote";
import {
identify_files_added,
identify_files_needed,
} from "./utils/agent/apply-patch";
import * as path from "path";
import { parse } from "shell-quote";
export type SafetyAssessment = {
/**
* If set, this approval is for an apply_patch call and these are the
* arguments.
*/
applyPatch?: ApplyPatchCommand;
} & (
| {
type: "auto-approve";
/**
* This must be true if the command is not on the "known safe" list, but
* was auto-approved due to `full-auto` mode.
*/
runInSandbox: boolean;
reason: string;
group: string;
}
| {
type: "ask-user";
}
/**
* Reserved for a case where we are certain the command is unsafe and should
* not be presented as an option to the user.
*/
| {
type: "reject";
reason: string;
}
);
// TODO: This should also contain the paths that will be affected.
export type ApplyPatchCommand = {
patch: string;
};
export type ApprovalPolicy =
/**
* Under this policy, only "known safe" commands as defined by
* `isSafeCommand()` that only read files will be auto-approved.
*/
| "suggest"
/**
* In addition to commands that are auto-approved according to the rules for
* "suggest", commands that write files within the user's approved list of
* writable paths will also be auto-approved.
*/
| "auto-edit"
/**
* All commands are auto-approved, but are expected to be run in a sandbox
* where network access is disabled and writes are limited to a specific set
* of paths.
*/
| "full-auto";
/**
* Tries to assess whether a command is safe to run, though may defer to the
* user for approval.
*
* Note `env` must be the same `env` that will be used to spawn the process.
*/
export function canAutoApprove(
command: ReadonlyArray<string>,
workdir: string | undefined,
policy: ApprovalPolicy,
writableRoots: ReadonlyArray<string>,
env: NodeJS.ProcessEnv = process.env,
): SafetyAssessment {
if (command[0] === "apply_patch") {
return command.length === 2 && typeof command[1] === "string"
? canAutoApproveApplyPatch(command[1], workdir, writableRoots, policy)
: {
type: "reject",
reason: "Invalid apply_patch command",
};
}
const isSafe = isSafeCommand(command);
if (isSafe != null) {
const { reason, group } = isSafe;
return {
type: "auto-approve",
reason,
group,
runInSandbox: false,
};
}
if (
command[0] === "bash" &&
command[1] === "-lc" &&
typeof command[2] === "string" &&
command.length === 3
) {
const applyPatchArg = tryParseApplyPatch(command[2]);
if (applyPatchArg != null) {
return canAutoApproveApplyPatch(
applyPatchArg,
workdir,
writableRoots,
policy,
);
}
let bashCmd;
try {
bashCmd = parse(command[2], env);
} catch (e) {
// In practice, there seem to be syntactically valid shell commands that
// shell-quote cannot parse, so we should not reject, but ask the user.
switch (policy) {
case "full-auto":
// In full-auto, we still run the command automatically, but must
// restrict it to the sandbox.
return {
type: "auto-approve",
reason: "Full auto mode",
group: "Running commands",
runInSandbox: true,
};
case "suggest":
case "auto-edit":
// In all other modes, since we cannot reason about the command, we
// should ask the user.
return {
type: "ask-user",
};
}
}
// bashCmd could be a mix of strings and operators, e.g.:
// "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
// We try to ensure that *every* command segment is deemed safe and that
// all operators belong to an allow-list. If so, the entire expression is
// considered auto-approvable.
const shellSafe = isEntireShellExpressionSafe(bashCmd);
if (shellSafe != null) {
const { reason, group } = shellSafe;
return {
type: "auto-approve",
reason,
group,
runInSandbox: false,
};
}
}
return policy === "full-auto"
? {
type: "auto-approve",
reason: "Full auto mode",
group: "Running commands",
runInSandbox: true,
}
: { type: "ask-user" };
}
function canAutoApproveApplyPatch(
applyPatchArg: string,
workdir: string | undefined,
writableRoots: ReadonlyArray<string>,
policy: ApprovalPolicy,
): SafetyAssessment {
switch (policy) {
case "full-auto":
// Continue to see if this can be auto-approved.
break;
case "suggest":
return {
type: "ask-user",
applyPatch: { patch: applyPatchArg },
};
case "auto-edit":
// Continue to see if this can be auto-approved.
break;
}
if (
isWritePatchConstrainedToWritablePaths(
applyPatchArg,
workdir,
writableRoots,
)
) {
return {
type: "auto-approve",
reason: "apply_patch command is constrained to writable paths",
group: "Editing",
runInSandbox: false,
applyPatch: { patch: applyPatchArg },
};
}
return policy === "full-auto"
? {
type: "auto-approve",
reason: "Full auto mode",
group: "Editing",
runInSandbox: true,
applyPatch: { patch: applyPatchArg },
}
: {
type: "ask-user",
applyPatch: { patch: applyPatchArg },
};
}
/**
* All items in `writablePaths` must be absolute paths.
*/
function isWritePatchConstrainedToWritablePaths(
applyPatchArg: string,
workdir: string | undefined,
writableRoots: ReadonlyArray<string>,
): boolean {
// `identify_files_needed()` returns a list of files that will be modified or
// deleted by the patch, so all of them should already exist on disk. These
// candidate paths could be further canonicalized via fs.realpath(), though
// that does seem necessary and may even cause false negatives (assuming we
// allow writes in other directories that are symlinked from a writable path)
//
// By comparison, `identify_files_added()` returns a list of files that will
// be added by the patch, so they should NOT exist on disk yet and therefore
// using one with fs.realpath() should return an error.
return (
allPathsConstrainedTowritablePaths(
identify_files_needed(applyPatchArg),
workdir,
writableRoots,
) &&
allPathsConstrainedTowritablePaths(
identify_files_added(applyPatchArg),
workdir,
writableRoots,
)
);
}
function allPathsConstrainedTowritablePaths(
candidatePaths: ReadonlyArray<string>,
workdir: string | undefined,
writableRoots: ReadonlyArray<string>,
): boolean {
return candidatePaths.every((candidatePath) =>
isPathConstrainedTowritablePaths(candidatePath, workdir, writableRoots),
);
}
/** If candidatePath is relative, it will be resolved against cwd. */
function isPathConstrainedTowritablePaths(
candidatePath: string,
workdir: string | undefined,
writableRoots: ReadonlyArray<string>,
): boolean {
const candidateAbsolutePath = resolvePathAgainstWorkdir(
candidatePath,
workdir,
);
return writableRoots.some((writablePath) =>
pathContains(writablePath, candidateAbsolutePath),
);
}
/**
* If not already an absolute path, resolves `candidatePath` against `workdir`
* if specified; otherwise, against `process.cwd()`.
*/
export function resolvePathAgainstWorkdir(
candidatePath: string,
workdir: string | undefined,
): string {
if (path.isAbsolute(candidatePath)) {
return candidatePath;
} else if (workdir != null) {
return path.resolve(workdir, candidatePath);
} else {
return path.resolve(candidatePath);
}
}
/** Both `parent` and `child` must be absolute paths. */
function pathContains(parent: string, child: string): boolean {
const relative = path.relative(parent, child);
return (
// relative path doesn't go outside parent
!!relative && !relative.startsWith("..") && !path.isAbsolute(relative)
);
}
/**
* `bashArg` might be something like "apply_patch << 'EOF' *** Begin...".
* If this function returns a string, then it is the content the arg to
* apply_patch with the heredoc removed.
*/
function tryParseApplyPatch(bashArg: string): string | null {
const prefix = "apply_patch";
if (!bashArg.startsWith(prefix)) {
return null;
}
const heredoc = bashArg.slice(prefix.length);
const heredocMatch = heredoc.match(
/^\s*<<\s*['"]?(\w+)['"]?\n([\s\S]*?)\n\1/,
);
if (heredocMatch != null && typeof heredocMatch[2] === "string") {
return heredocMatch[2].trim();
} else {
return heredoc.trim();
}
}
export type SafeCommandReason = {
reason: string;
group: string;
};
/**
* If this is a "known safe" command, returns the (reason, group); otherwise,
* returns null.
*/
export function isSafeCommand(
command: ReadonlyArray<string>,
): SafeCommandReason | null {
const [cmd0, cmd1, cmd2, cmd3] = command;
switch (cmd0) {
case "cd":
return {
reason: "Change directory",
group: "Navigating",
};
case "ls":
return {
reason: "List directory",
group: "Searching",
};
case "pwd":
return {
reason: "Print working directory",
group: "Navigating",
};
case "true":
return {
reason: "No-op (true)",
group: "Utility",
};
case "echo":
return { reason: "Echo string", group: "Printing" };
case "cat":
return {
reason: "View file contents",
group: "Reading files",
};
case "rg":
return {
reason: "Ripgrep search",
group: "Searching",
};
case "find": {
// Certain options to `find` allow executing arbitrary processes, so we
// cannot auto-approve them.
if (
command.some((arg: string) => UNSAFE_OPTIONS_FOR_FIND_COMMAND.has(arg))
) {
break;
} else {
return {
reason: "Find files or directories",
group: "Searching",
};
}
}
case "grep":
return {
reason: "Text search (grep)",
group: "Searching",
};
case "head":
return {
reason: "Show file head",
group: "Reading files",
};
case "tail":
return {
reason: "Show file tail",
group: "Reading files",
};
case "wc":
return {
reason: "Word count",
group: "Reading files",
};
case "which":
return {
reason: "Locate command",
group: "Searching",
};
case "git":
switch (cmd1) {
case "status":
return {
reason: "Git status",
group: "Versioning",
};
case "branch":
return {
reason: "List Git branches",
group: "Versioning",
};
case "log":
return {
reason: "Git log",
group: "Using git",
};
case "diff":
return {
reason: "Git diff",
group: "Using git",
};
case "show":
return {
reason: "Git show",
group: "Using git",
};
default:
return null;
}
case "cargo":
if (cmd1 === "check") {
return {
reason: "Cargo check",
group: "Running command",
};
}
break;
case "sed":
if (
cmd1 === "-n" &&
isValidSedNArg(cmd2) &&
typeof cmd3 === "string" &&
command.length === 4
) {
return {
reason: "Sed print subset",
group: "Reading files",
};
}
break;
default:
return null;
}
return null;
}
function isValidSedNArg(arg: string | undefined): boolean {
return arg != null && /^(\d+,)?\d+p$/.test(arg);
}
const UNSAFE_OPTIONS_FOR_FIND_COMMAND: ReadonlySet<string> = new Set([
// Options that can execute arbitrary commands.
"-exec",
"-execdir",
"-ok",
"-okdir",
// Option that deletes matching files.
"-delete",
// Options that write pathnames to a file.
"-fls",
"-fprint",
"-fprint0",
"-fprintf",
]);
// ---------------- Helper utilities for complex shell expressions -----------------
// A conservative allow-list of bash operators that do not, on their own, cause
// side effects. Redirections (>, >>, <, etc.) and command substitution `$()`
// are intentionally excluded. Parentheses used for grouping are treated as
// strings by `shell-quote`, so we do not add them here. Reference:
// https://github.com/substack/node-shell-quote#parsecmd-opts
const SAFE_SHELL_OPERATORS: ReadonlySet<string> = new Set([
"&&", // logical AND
"||", // logical OR
"|", // pipe
";", // command separator
]);
/**
* Determines whether a parsed shell expression consists solely of safe
* commands (as per `isSafeCommand`) combined using only operators in
* `SAFE_SHELL_OPERATORS`.
*
* If entirely safe, returns the reason/group from the *first* command
* segment so callers can surface a meaningful description. Otherwise returns
* null.
*/
function isEntireShellExpressionSafe(
parts: ReadonlyArray<ParseEntry>,
): SafeCommandReason | null {
if (parts.length === 0) {
return null;
}
try {
// Collect command segments delimited by operators. `shell-quote` represents
// subshell grouping parentheses as literal strings "(" and ")"; treat them
// as unsafe to keep the logic simple (since subshells could introduce
// unexpected scope changes).
let currentSegment: Array<string> = [];
let firstReason: SafeCommandReason | null = null;
const flushSegment = (): boolean => {
if (currentSegment.length === 0) {
return true; // nothing to validate (possible leading operator)
}
const assessment = isSafeCommand(currentSegment);
if (assessment == null) {
return false;
}
if (firstReason == null) {
firstReason = assessment;
}
currentSegment = [];
return true;
};
for (const part of parts) {
if (typeof part === "string") {
// If this string looks like an open/close parenthesis or brace, treat as
// unsafe to avoid parsing complexity.
if (part === "(" || part === ")" || part === "{" || part === "}") {
return null;
}
currentSegment.push(part);
} else if (isParseEntryWithOp(part)) {
// Validate the segment accumulated so far.
if (!flushSegment()) {
return null;
}
// Validate the operator itself.
if (!SAFE_SHELL_OPERATORS.has(part.op)) {
return null;
}
} else {
// Unknown token type
return null;
}
}
// Validate any trailing command segment.
if (!flushSegment()) {
return null;
}
return firstReason;
} catch (_err) {
// If there's any kind of failure, just bail out and return null.
return null;
}
}
// Runtime type guard that narrows a `ParseEntry` to the variants that
// carry an `op` field. Using a dedicated function avoids the need for
// inline type assertions and makes the narrowing reusable and explicit.
function isParseEntryWithOp(
entry: ParseEntry,
): entry is { op: ControlOperator } | { op: "glob"; pattern: string } {
return (
typeof entry === "object" &&
entry != null &&
// Using the safe `in` operator keeps the check property-safe even when
// `entry` is a `string`.
"op" in entry &&
typeof (entry as { op?: unknown }).op === "string"
);
}