reverie-process/src/seccomp/mod.rs (183 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
//! Provides helpers for constructing a [`seccomp`][seccomp] filter. This is a
//! pure Rust implementation and does not require libseccomp.
//!
//! # Seccomp Background
//!
//! [`seccomp(2)`][seccomp] is a powerful tool for changing how a process tree
//! behaves when a syscall happens. Seccomp can be used to install a filter that
//! applies to every child process in a process tree. Since filters cannot be
//! removed, they can only get more restrictive. The language used for filters is
//! called `seccomp-bpf`. It is a subset of the BPF byte code language.
//!
//! Some of the restrictions include:
//! - Only being able to JMP forward and never backward. This prevents loops and
//! ensures seccomp-bpf filters always terminate. This is also true of BPF.
//! - Cannot call libbpf functions.
//! - Cannot operate on 64-bit integers, only 32-bit integers.
//!
//! [seccomp]: https://man7.org/linux/man-pages/man2/seccomp.2.html
//!
//! You can think of a seccomp-bpf program as a little function that gets
//! executed for every syscall:
//!
//! ```no_compile
//! // NOTE: seccomp-bpf programs are actually written in byte code, but if a
//! // high-level language could be compiled to BPF byte code, this is what it'd
//! // look like.
//! fn my_program(data: seccomp_data) -> Action {
//! if data.nr == 2 {
//! return Action::Trace;
//! }
//!
//! if data.nr == 3 {
//! return Action::KillProcess;
//! }
//!
//! // Allow the syscall by default.
//! Action::Allow
//! }
//! ```
//!
//! where `seccomp_data` is defined as:
//!
//! ```no_compile
//! struct seccomp_data {
//! // The syscall number.
//! nr: u32,
//! // The architecture.
//! arch: u32,
//! // Instruction pointer.
//! ip: u64,
//! // The 6 syscall arguments.
//! args: [u64; 6],
//! }
//! ```
//!
//! This is the only input available to the seccomp filter and is the only bit of
//! data available to make a decision about a syscall (i.e., an "action"). An
//! action might be nothing (i.e., allow the syscall through), kill the
//! process/thread with `SIGSYS`, forward the syscall to ptrace, or return an
//! error code.
#[macro_use]
mod bpf;
#[allow(unused)]
mod notif;
use bpf::*;
use syscalls::Errno;
use syscalls::Sysno;
pub use bpf::Filter;
pub use notif::*;
use std::collections::BTreeMap;
/// Builder for creating seccomp filters.
#[derive(Clone)]
pub struct FilterBuilder {
/// The target architecture.
target_arch: TargetArch,
/// The action to take if there are no matches.
default_action: Action,
/// The action to take for each syscall.
syscalls: BTreeMap<Sysno, Action>,
/// Ranges of instruction pointer values.
ip_ranges: Vec<(u64, u64, Action)>,
}
/// The target architecture.
#[allow(non_camel_case_types, missing_docs)]
#[derive(Debug, Copy, Clone)]
#[repr(u32)]
pub enum TargetArch {
x86 = AUDIT_ARCH_X86,
x86_64 = AUDIT_ARCH_X86_64,
mips = AUDIT_ARCH_MIPS,
powerpc = AUDIT_ARCH_PPC,
powerpc64 = AUDIT_ARCH_PPC64,
arm = AUDIT_ARCH_ARM,
aarch64 = AUDIT_ARCH_AARCH64,
}
/// The action to take if the conditions of a rule all match.
#[derive(Debug, Copy, Clone)]
pub enum Action {
/// Allows the syscallto be executed.
Allow,
/// Returns the specified error instead of executing the syscall.
Errno(Errno),
/// Prevents the syscall from being executed and the kernel will kill the
/// calling thread with `SIGSYS`.
KillThread,
/// Prevents the syscall from being executed and the kernel will kill the
/// calling process with `SIGSYS`.
KillProcess,
/// Same as [`Action::Allow`] but logs the call.
Log,
/// If the thread is being ptraced and the tracing process specified
/// `PTRACE_O_SECCOMP`, the tracing process will be notified via
/// `PTRACE_EVENT_SECCOMP` and the value provided can be retrieved using
/// `PTRACE_GETEVENTMSG`.
Trace(u16),
/// Disallow and raise a SIGSYS in the calling process.
Trap,
/// Notifies userspace.
Notify,
}
impl From<Action> for u32 {
fn from(action: Action) -> u32 {
match action {
Action::Allow => libc::SECCOMP_RET_ALLOW,
Action::Errno(x) => {
libc::SECCOMP_RET_ERRNO | (x.into_raw() as u32 & libc::SECCOMP_RET_DATA)
}
Action::KillThread => libc::SECCOMP_RET_KILL_THREAD,
Action::KillProcess => libc::SECCOMP_RET_KILL_PROCESS,
Action::Log => libc::SECCOMP_RET_LOG,
Action::Trace(x) => libc::SECCOMP_RET_TRACE | (x as u32 & libc::SECCOMP_RET_DATA),
Action::Trap => libc::SECCOMP_RET_TRAP,
Action::Notify => 0x7fc00000u32,
}
}
}
impl From<Action> for sock_filter {
fn from(action: Action) -> sock_filter {
BPF_STMT(BPF_RET + BPF_K, u32::from(action))
}
}
impl TargetArch {
#![allow(missing_docs)]
#[cfg(target_arch = "x86")]
pub const CURRENT: TargetArch = Self::x86;
#[cfg(target_arch = "x86_64")]
pub const CURRENT: TargetArch = Self::x86_64;
#[cfg(target_arch = "mips")]
pub const CURRENT: TargetArch = Self::mips;
#[cfg(target_arch = "powerpc")]
pub const CURRENT: TargetArch = Self::powerpc;
#[cfg(target_arch = "powerpc64")]
pub const CURRENT: TargetArch = Self::powerpc64;
#[cfg(target_arch = "arm")]
pub const CURRENT: TargetArch = Self::arm;
#[cfg(target_arch = "aarch64")]
pub const CURRENT: TargetArch = Self::aarch64;
}
impl Default for TargetArch {
fn default() -> Self {
Self::CURRENT
}
}
impl Default for FilterBuilder {
fn default() -> Self {
Self::new()
}
}
impl FilterBuilder {
/// Creates the seccomp filter builder.
pub fn new() -> Self {
Self {
target_arch: TargetArch::default(),
default_action: Action::KillThread,
syscalls: Default::default(),
ip_ranges: Default::default(),
}
}
/// Sets the target architecture. If this doesn't match the architecture of
/// the process, then the process is killed. This is the first step in the
/// seccomp filter and ensures that we're working with the right syscall
/// table. Each architecture has a slightly different syscall table and we
/// need to make sure the syscall numbers we're using are the right ones for
/// the architecture.
///
/// By default, the target architecture is set to the architecture of the
/// current program (i.e., `TargetArch::CURRENT`).
pub fn target_arch(&mut self, target_arch: TargetArch) -> &mut Self {
self.target_arch = target_arch;
self
}
/// The default action to take if there are no matches. By default, the
/// default action is to kill the current thread (i.e., the filter becomes an
/// allowlist).
///
/// When using an allowlist of syscalls, this should be set to
/// `Action::KillThread` or `Action::KillProcess`.
///
/// When using a blocklist of syscalls, this should be set to
/// `Action::Allow`.
pub fn default_action(&mut self, action: Action) -> &mut Self {
self.default_action = action;
self
}
/// Sets the action to take for the given syscall.
pub fn syscall(&mut self, syscall: Sysno, action: Action) -> &mut Self {
self.syscalls.insert(syscall, action);
self
}
/// Sets the action to take for a set of syscalls.
pub fn syscalls<I>(&mut self, table: I) -> &mut Self
where
I: IntoIterator<Item = (Sysno, Action)>,
{
self.syscalls.extend(table);
self
}
/// Take an action if the instruction pointer `ip >= begin && ip < end`.
///
/// This is useful in conjunction with `mmap`. For example, we can use this
/// to deny any syscalls made outside of `ld.so` or `libc.so`. It can also be
/// used to avoid tracing syscalls injected with ptrace.
///
/// Multiple ranges can be added and are checked in sequence.
pub fn ip_range(&mut self, begin: u64, end: u64, action: Action) -> &mut Self {
self.ip_ranges.push((begin, end, action));
self
}
/// Adds multiple IP ranges. This is equivalent to calling
/// [`FilterBuilder::ip_range`] multiple times.
pub fn ip_ranges<I>(&mut self, ranges: I) -> &mut Self
where
I: IntoIterator<Item = (u64, u64, Action)>,
{
self.ip_ranges.extend(ranges);
self
}
/// Generates the byte code for the filter.
pub fn build(&self) -> Filter {
let mut filter = Filter::new();
// This should be the first step for every seccomp-bpf filter.
VALIDATE_ARCH(self.target_arch as u32).into_bpf(&mut filter);
if !self.ip_ranges.is_empty() {
LOAD_SYSCALL_IP().into_bpf(&mut filter);
for (begin, end, action) in &self.ip_ranges {
IP_RANGE(*begin, *end, (*action).into()).into_bpf(&mut filter);
}
}
if !self.syscalls.is_empty() {
// Load the syscall number.
LOAD_SYSCALL_NR.into_bpf(&mut filter);
for (syscall, action) in &self.syscalls {
SYSCALL(*syscall, (*action).into()).into_bpf(&mut filter);
}
}
// The default action is always performed last.
sock_filter::from(self.default_action).into_bpf(&mut filter);
filter
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn smoke() {
assert_eq!(
FilterBuilder::new()
.default_action(Action::Allow)
.target_arch(TargetArch::x86_64)
.syscalls([
(Sysno::read, Action::KillThread),
(Sysno::write, Action::KillThread),
(Sysno::open, Action::KillThread),
(Sysno::close, Action::KillThread),
(Sysno::write, Action::KillThread),
])
.build(),
seccomp_bpf![
VALIDATE_ARCH(AUDIT_ARCH_X86_64),
LOAD_SYSCALL_NR,
SYSCALL(Sysno::read, DENY),
SYSCALL(Sysno::write, DENY),
SYSCALL(Sysno::open, DENY),
SYSCALL(Sysno::close, DENY),
ALLOW,
]
);
}
}