codex-rs/core/src/is_safe_command.rs (261 lines of code) (raw):

use tree_sitter::Parser; use tree_sitter::Tree; use tree_sitter_bash::LANGUAGE as BASH; pub fn is_known_safe_command(command: &[String]) -> bool { if is_safe_to_call_with_exec(command) { return true; } // TODO(mbolin): Also support safe commands that are piped together such // as `cat foo | wc -l`. matches!( command, [bash, flag, script] if bash == "bash" && flag == "-lc" && try_parse_bash(script).and_then(|tree| try_parse_single_word_only_command(&tree, script)).is_some_and(|parsed_bash_command| is_safe_to_call_with_exec(&parsed_bash_command)) ) } fn is_safe_to_call_with_exec(command: &[String]) -> bool { let cmd0 = command.first().map(String::as_str); match cmd0 { Some( "cat" | "cd" | "echo" | "grep" | "head" | "ls" | "pwd" | "rg" | "tail" | "wc" | "which", ) => true, Some("find") => { // Certain options to `find` can delete files, write to files, or // execute arbitrary commands, so we cannot auto-approve the // invocation of `find` in such cases. #[rustfmt::skip] const UNSAFE_FIND_OPTIONS: &[&str] = &[ // Options that can execute arbitrary commands. "-exec", "-execdir", "-ok", "-okdir", // Option that deletes matching files. "-delete", // Options that write pathnames to a file. "-fls", "-fprint", "-fprint0", "-fprintf", ]; !command .iter() .any(|arg| UNSAFE_FIND_OPTIONS.contains(&arg.as_str())) } // Git Some("git") => matches!( command.get(1).map(String::as_str), Some("branch" | "status" | "log" | "diff" | "show") ), // Rust Some("cargo") if command.get(1).map(String::as_str) == Some("check") => true, // Special-case `sed -n {N|M,N}p FILE` Some("sed") if { command.len() == 4 && command.get(1).map(String::as_str) == Some("-n") && is_valid_sed_n_arg(command.get(2).map(String::as_str)) && command.get(3).map(String::is_empty) == Some(false) } => { true } // ── anything else ───────────────────────────────────────────────── _ => false, } } fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> { let lang = BASH.into(); let mut parser = Parser::new(); parser.set_language(&lang).expect("load bash grammar"); let old_tree: Option<&Tree> = None; parser.parse(bash_lc_arg, old_tree) } /// If `tree` represents a single Bash command whose name and every argument is /// an ordinary `word`, return those words in order; otherwise, return `None`. /// /// `src` must be the exact source string that was parsed into `tree`, so we can /// extract the text for every node. pub fn try_parse_single_word_only_command(tree: &Tree, src: &str) -> Option<Vec<String>> { // Any parse error is an immediate rejection. if tree.root_node().has_error() { return None; } // (program …) with exactly one statement let root = tree.root_node(); if root.kind() != "program" || root.named_child_count() != 1 { return None; } let cmd = root.named_child(0)?; // (command …) if cmd.kind() != "command" { return None; } let mut words = Vec::new(); let mut cursor = cmd.walk(); for child in cmd.named_children(&mut cursor) { match child.kind() { // The command name node wraps one `word` child. "command_name" => { let word_node = child.named_child(0)?; // make sure it's only a word if word_node.kind() != "word" { return None; } words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned()); } // Positional‑argument word (allowed). "word" | "number" => { words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned()); } "string" => { if child.child_count() == 3 && child.child(0)?.kind() == "\"" && child.child(1)?.kind() == "string_content" && child.child(2)?.kind() == "\"" { words.push(child.child(1)?.utf8_text(src.as_bytes()).ok()?.to_owned()); } else { // Anything else means the command is *not* plain words. return None; } } "concatenation" => { // TODO: Consider things like `'ab\'a'`. return None; } "raw_string" => { // Raw string is a single word, but we need to strip the quotes. let raw_string = child.utf8_text(src.as_bytes()).ok()?; let stripped = raw_string .strip_prefix('\'') .and_then(|s| s.strip_suffix('\'')); if let Some(stripped) = stripped { words.push(stripped.to_owned()); } else { return None; } } // Anything else means the command is *not* plain words. _ => return None, } } Some(words) } /* ---------------------------------------------------------- Example ---------------------------------------------------------- */ /// Returns true if `arg` matches /^(\d+,)?\d+p$/ fn is_valid_sed_n_arg(arg: Option<&str>) -> bool { // unwrap or bail let s = match arg { Some(s) => s, None => return false, }; // must end with 'p', strip it let core = match s.strip_suffix('p') { Some(rest) => rest, None => return false, }; // split on ',' and ensure 1 or 2 numeric parts let parts: Vec<&str> = core.split(',').collect(); match parts.as_slice() { // single number, e.g. "10" [num] => !num.is_empty() && num.chars().all(|c| c.is_ascii_digit()), // two numbers, e.g. "1,5" [a, b] => { !a.is_empty() && !b.is_empty() && a.chars().all(|c| c.is_ascii_digit()) && b.chars().all(|c| c.is_ascii_digit()) } // anything else (more than one comma) is invalid _ => false, } } #[cfg(test)] mod tests { use super::*; fn vec_str(args: &[&str]) -> Vec<String> { args.iter().map(|s| s.to_string()).collect() } #[test] fn known_safe_examples() { assert!(is_safe_to_call_with_exec(&vec_str(&["ls"]))); assert!(is_safe_to_call_with_exec(&vec_str(&["git", "status"]))); assert!(is_safe_to_call_with_exec(&vec_str(&[ "sed", "-n", "1,5p", "file.txt" ]))); // Safe `find` command (no unsafe options). assert!(is_safe_to_call_with_exec(&vec_str(&[ "find", ".", "-name", "file.txt" ]))); } #[test] fn unknown_or_partial() { assert!(!is_safe_to_call_with_exec(&vec_str(&["foo"]))); assert!(!is_safe_to_call_with_exec(&vec_str(&["git", "fetch"]))); assert!(!is_safe_to_call_with_exec(&vec_str(&[ "sed", "-n", "xp", "file.txt" ]))); // Unsafe `find` commands. for args in [ vec_str(&["find", ".", "-name", "file.txt", "-exec", "rm", "{}", ";"]), vec_str(&[ "find", ".", "-name", "*.py", "-execdir", "python3", "{}", ";", ]), vec_str(&["find", ".", "-name", "file.txt", "-ok", "rm", "{}", ";"]), vec_str(&["find", ".", "-name", "*.py", "-okdir", "python3", "{}", ";"]), vec_str(&["find", ".", "-delete", "-name", "file.txt"]), vec_str(&["find", ".", "-fls", "/etc/passwd"]), vec_str(&["find", ".", "-fprint", "/etc/passwd"]), vec_str(&["find", ".", "-fprint0", "/etc/passwd"]), vec_str(&["find", ".", "-fprintf", "/root/suid.txt", "%#m %u %p\n"]), ] { assert!( !is_safe_to_call_with_exec(&args), "expected {:?} to be unsafe", args ); } } #[test] fn bash_lc_safe_examples() { assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls"]))); assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls -1"]))); assert!(is_known_safe_command(&vec_str(&[ "bash", "-lc", "git status" ]))); assert!(is_known_safe_command(&vec_str(&[ "bash", "-lc", "grep -R \"Cargo.toml\" -n" ]))); assert!(is_known_safe_command(&vec_str(&[ "bash", "-lc", "sed -n 1,5p file.txt" ]))); assert!(is_known_safe_command(&vec_str(&[ "bash", "-lc", "sed -n '1,5p' file.txt" ]))); assert!(is_known_safe_command(&vec_str(&[ "bash", "-lc", "find . -name file.txt" ]))); } #[test] fn bash_lc_unsafe_examples() { assert!( !is_known_safe_command(&vec_str(&["bash", "-lc", "git", "status"])), "Four arg version is not known to be safe." ); assert!( !is_known_safe_command(&vec_str(&["bash", "-lc", "'git status'"])), "The extra quoting around 'git status' makes it a program named 'git status' and is therefore unsafe." ); assert!( !is_known_safe_command(&vec_str(&["bash", "-lc", "find . -name file.txt -delete"])), "Unsafe find option should not be auto‑approved." ); } #[test] fn test_try_parse_single_word_only_command() { let script_with_single_quoted_string = "sed -n '1,5p' file.txt"; let parsed_words = try_parse_bash(script_with_single_quoted_string) .and_then(|tree| { try_parse_single_word_only_command(&tree, script_with_single_quoted_string) }) .unwrap(); assert_eq!( vec![ "sed".to_string(), "-n".to_string(), // Ensure the single quotes are properly removed. "1,5p".to_string(), "file.txt".to_string() ], parsed_words, ); let script_with_number_arg = "ls -1"; let parsed_words = try_parse_bash(script_with_number_arg) .and_then(|tree| try_parse_single_word_only_command(&tree, script_with_number_arg)) .unwrap(); assert_eq!(vec!["ls", "-1"], parsed_words,); let script_with_double_quoted_string_with_no_funny_stuff_arg = "grep -R \"Cargo.toml\" -n"; let parsed_words = try_parse_bash(script_with_double_quoted_string_with_no_funny_stuff_arg) .and_then(|tree| { try_parse_single_word_only_command( &tree, script_with_double_quoted_string_with_no_funny_stuff_arg, ) }) .unwrap(); assert_eq!(vec!["grep", "-R", "Cargo.toml", "-n"], parsed_words); } }