antlir/nspawn_in_subvol/clonecaps/clonecaps.c (252 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ /** Usage: clonecaps /proc/PID/status -- cmd argv1 ... First, set current capabilities to match those in the specified `procfs`-formatted process status file, and exit with a non-zero code if that is not possible. Note that we attempt to clone all 5 classes of capabilties: inheritable, permitted, effective, bounding_set, ambient. Ambient caps will not be clonable if this is compiled with `libcap-ng`. We will fail unless `/proc/MY_PID/status` exaactly matches the specified capability settings, so older `libcap-ng` is only usable in situations where the current process's ambient caps already match the target's. If capabilities match the target, this will `execv` a new process, using arguments 3 onwards. Compiling with `libcap-ng` older than 0.8: gcc -std=c99 nspawn_in_subvol/clonecaps/clonecaps.c -o clonecaps -lcap-ng Compiling with 0.8 and newer: gcc -std=c99 -DCAPNG_SUPPORTS_AMBIENT=1 \ nspawn_in_subvol/clonecaps/clonecaps.c -o clonecaps -lcap-ng **/ #include <cap-ng.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> // This is in service of OSS compilation, where getting a modern `libcap-ng` // may be a hassle. #ifndef CAPNG_SUPPORTS_AMBIENT // Our `libcap-ng` cannot yet set ambient caps, so ask for the best it can. #define CAPNG_SELECT_ALL CAPNG_SELECT_BOTH // We use this as a key in our parsing, but never pass it to `capng_` code. #define CAPNG_AMBIENT 16 #endif // Returns the last valid capability for the current kernel, or -1 on error. int find_last_cap() { int last_cap = -1; // Negative so we fail if `sscanf` fails FILE* last_cap_file = fopen("/proc/sys/kernel/cap_last_cap", "re"); if (last_cap_file == NULL) { perror("open /proc/sys/kernel/cap_last_cap"); return -1; } // No error-checking since `last_cap` will stay at -1 on failure to match. fscanf(last_cap_file, "%d", &last_cap); fclose(last_cap_file); if (last_cap < 0 || last_cap >= 64) { fprintf(stderr, "Got %d in /proc/sys/kernel/cap_last_cap\n", last_cap); return -1; } return last_cap; } // Returns `true` iff all `bits` were successfully added to `libcap-ng` state. bool add_all_caps(int last_cap, capng_type_t cap_type, __u64 bits) { for (int cap = 0; cap <= last_cap; ++cap) { if (bits & ((__u64)1 << cap)) { if (capng_update(CAPNG_ADD, cap_type, cap) != 0) { fprintf( stderr, "Failed to add capability %d of capability type %d\n", cap, cap_type); return false; } } } return true; } // Returns `true` iff `expected_bits` matches `libcap-ng` state. // // NB: This is kind of useless as of Oct 2020, because of // https://github.com/stevegrubb/libcap-ng/issues/19 // Hopefully, once the bug is fixed, it won't even be necessary? bool check_all_caps(int last_cap, capng_type_t cap_type, __u64 expected_bits) { __u64 actual_bits = 0; for (int cap = 0; cap <= last_cap; ++cap) { actual_bits |= (__u64)capng_have_capability(cap_type, cap) << cap; } if (actual_bits != expected_bits) { fprintf( stderr, "Unexpected caps of type %d: actual %llx != expected %llx\n", cap_type, actual_bits, expected_bits); return false; } return true; } typedef struct { __u64 inheritable; __u64 permitted; __u64 effective; __u64 bounding_set; __u64 ambient; } cap_bits_t; // Detail for `read_procfs_cap_bits`. bool _match( char* buf, const char* pref, int* cap_type, int match_type, int* pref_len) { *pref_len = strlen(pref); if (strncmp(buf, pref, *pref_len) == 0) { *cap_type = match_type; return true; } return false; } // Parses the `Cap...:` lines from `/proc/PID/status` and populates `cap_bits`. // Returns `true` on success. On error, logs to stderr and returns `false`. bool read_procfs_cap_bits(const char* status_filename, cap_bits_t* cap_bits) { memset(cap_bits, 0, sizeof(*cap_bits)); // We'll compare these to make sure we saw all the expected procfs lines. int expected_cap_types = CAPNG_INHERITABLE | CAPNG_PERMITTED | CAPNG_EFFECTIVE // NB: Antlir doesn't really like kernels older than 4.3, so // I did not bother to conditionalize the availability of CapAmb. | CAPNG_AMBIENT | CAPNG_BOUNDING_SET; int actual_cap_types = 0; FILE* status_file = fopen(status_filename, "re"); if (status_file == NULL) { perror(status_filename); return false; } // Not all lines are under 64 bytes (the max length is ~unbounded thanks // to groups), but `Cap*:` lines will be, for the foreseeable future. // As of capability API v3, they are at 25 bytes including newline. char buf[64]; bool continuing_line = false; // Previous `buf` lacked `\n`. while (fgets(buf, sizeof(buf), status_file)) { bool skip_buf = continuing_line; continuing_line = (buf[strlen(buf) - 1] != '\n'); if (skip_buf) { continue; // Nothing to see here, this is not a `Cap*:` line. } // Both values are populated by `match` int cap_type = 0; int pref_len = 0; if (!(_match(buf, "CapInh:\t", &cap_type, CAPNG_INHERITABLE, &pref_len) || _match(buf, "CapPrm:\t", &cap_type, CAPNG_PERMITTED, &pref_len) || _match(buf, "CapEff:\t", &cap_type, CAPNG_EFFECTIVE, &pref_len) || _match(buf, "CapBnd:\t", &cap_type, CAPNG_BOUNDING_SET, &pref_len) || _match(buf, "CapAmb:\t", &cap_type, CAPNG_AMBIENT, &pref_len))) { continue; } // Fail on duplicate cap types in the input if (actual_cap_types & cap_type) { fprintf( stderr, "%s: Capability type %d occurred more than once\n", status_filename, cap_type); return false; } actual_cap_types |= cap_type; // Read out the bits for this capability, we'll apply them later char* end_of_bits = NULL; __u64 bits = strtoull(buf + pref_len, &end_of_bits, 16); // We should have read 16 hex bytes, terminated by a newline. if ((end_of_bits - (buf + pref_len)) != 16 || end_of_bits[0] != '\n') { fprintf( stderr, "%s: Failed to parse value %s for capability type %d\n", status_filename, buf + pref_len, cap_type); return false; } if (cap_type == CAPNG_INHERITABLE) { cap_bits->inheritable = bits; } else if (cap_type == CAPNG_PERMITTED) { cap_bits->permitted = bits; } else if (cap_type == CAPNG_EFFECTIVE) { cap_bits->effective = bits; } else if (cap_type == CAPNG_BOUNDING_SET) { cap_bits->bounding_set = bits; } else if (cap_type == CAPNG_AMBIENT) { cap_bits->ambient = bits; } } fclose(status_file); if (actual_cap_types != expected_cap_types) { fprintf( stderr, "%s: Missing capability types: %d vs %d\n", status_filename, actual_cap_types, expected_cap_types); return false; } return true; } bool is_debug() { static int debug = -1; if (debug == -1) { const char* debug_env = getenv("ANTLIR_DEBUG"); debug = debug_env && debug_env[0]; } return debug; } void fprint_cap_bits(FILE* file, const char* msg, cap_bits_t cap_bits) { fprintf( file, "%s: i %llx, p %llx, e %llx, bs %llx, a %llx\n", msg, cap_bits.inheritable, cap_bits.permitted, cap_bits.effective, cap_bits.bounding_set, cap_bits.ambient); } int main(int argc, char** argv) { if (argc < 3 || strcmp("--", argv[2]) != 0) { fprintf(stderr, "Usage: clonecaps /proc/PID/status -- cmd argv1 ...\n"); return 1; } char* target_procfs_path = argv[1]; argv += 3; // Skip "our" args, this is now ready to `execv`. argc -= 3; // The running kernel may not match our compile-time headers. int last_cap = find_last_cap(); if (last_cap == -1) { // The function already printed the error return 1; } // We read this to check that `libcap-ng` worked correctly, since // `check_all_caps` cannot. char my_procfs_path[64]; // `/proc/PID/status` fits even with 64-bit PIDs if (snprintf( my_procfs_path, sizeof(my_procfs_path), "/proc/%d/status", getpid()) >= sizeof(my_procfs_path)) { fprintf(stderr, "PID too long??? %d\n", getpid()); return 1; } if (is_debug()) { cap_bits_t cur_bits; if (!read_procfs_cap_bits(my_procfs_path, &cur_bits)) { return 1; // An error was already printed } fprint_cap_bits(stderr, "Initial procfs for getpid()", cur_bits); } cap_bits_t target_bits; if (!read_procfs_cap_bits(target_procfs_path, &target_bits)) { return 1; // An error was already printed } if (is_debug()) { fprint_cap_bits(stderr, "Procfs for target PID", target_bits); } capng_clear(CAPNG_SELECT_ALL); // Clear traditional, bounding, ambient // Clone the target's values if (!(add_all_caps(last_cap, CAPNG_INHERITABLE, target_bits.inheritable) && add_all_caps(last_cap, CAPNG_PERMITTED, target_bits.permitted) && add_all_caps(last_cap, CAPNG_EFFECTIVE, target_bits.effective) && add_all_caps(last_cap, CAPNG_BOUNDING_SET, target_bits.bounding_set) #ifdef CAPNG_SUPPORTS_AMBIENT && add_all_caps(last_cap, CAPNG_AMBIENT, target_bits.ambient) #endif )) { return 1; // `add_all_caps` already printed an error } // Apply traditional & bounding if (capng_apply(CAPNG_SELECT_ALL) != 0) { fprint_cap_bits(stderr, "Failed to apply capabilities", target_bits); return 1; } #ifdef CAPNG_SUPPORTS_AMBIENT // Due to the following bug, ambient capabilities only get applied the // second time around: https://github.com/stevegrubb/libcap-ng/issues/18 // // This can be removed once both the OSS and FB versions of `libcap-ng` // are guaranteed to include b6ff250a71a1f0a11b2917186155d2426080293d // from https://github.com/stevegrubb/libcap-ng if (capng_apply(CAPNG_SELECT_ALL) != 0) { fprint_cap_bits(stderr, "Failed to re-apply capabilities", target_bits); return 1; } #endif if (!(check_all_caps(last_cap, CAPNG_INHERITABLE, target_bits.inheritable) && check_all_caps(last_cap, CAPNG_PERMITTED, target_bits.permitted) && check_all_caps(last_cap, CAPNG_EFFECTIVE, target_bits.effective) && check_all_caps(last_cap, CAPNG_BOUNDING_SET, target_bits.bounding_set) #ifdef CAPNG_SUPPORTS_AMBIENT && check_all_caps(last_cap, CAPNG_AMBIENT, target_bits.ambient) #endif )) { return 1; // `check_all_caps` already printed an error } cap_bits_t final_bits; if (!read_procfs_cap_bits(my_procfs_path, &final_bits)) { return 1; // An error was already printed } // Note that this will fail if the target proc has ambient caps that do // not match ours, and our `libcap-ng` is old. // // This also detects an `libcap-ng` bug: // https://github.com/stevegrubb/libcap-ng/issues/19 if (final_bits.inheritable != target_bits.inheritable || final_bits.permitted != target_bits.permitted || final_bits.effective != target_bits.effective || final_bits.bounding_set != target_bits.bounding_set || final_bits.ambient != target_bits.ambient) { fprint_cap_bits(stderr, "After applying new capabilities", target_bits); fprint_cap_bits(stderr, "Aborting, procfs does not match", final_bits); return 1; } else if (is_debug()) { fprint_cap_bits(stderr, "Final procfs for getpid()", final_bits); } execv(argv[0], argv); perror("execv"); return 1; }