in xar/XarExecFuse.cpp [226:543]
int main(int argc, char** argv) {
XAR_CHECK_SIMPLE(getuid() == geteuid());
// Set our umask to a good default for the files we create. Save
// the old value to restore before executing the XAR bootstrap
// script.
auto old_umask = umask(0022);
if (argc < 2) {
usage();
return 1;
}
// Skip past our executable name, the optional -m flag, and, after
// stashing a copy of it, the path to the xar file. This leaves
// argv[0:argc-1] as the parameters to pass to the process we exec.
argv++;
argc--;
bool mount_only = false;
bool print_only = false;
while (argv[0] && argv[0][0] == '-') {
if (strcmp(argv[0], "-m") == 0) {
mount_only = true;
} else if (strcmp(argv[0], "-n") == 0) {
print_only = true;
} else if (strcmp(argv[0], "-h") == 0) {
usage();
return 0;
} else {
usage();
return 1;
}
argv++;
argc--;
}
if (!argv[0]) {
usage();
return 1;
}
char* xar_path = argv[0];
argv++;
argc--;
// Extract our required fields from the XAR header. XAREXEC_TARGET
// is required unless the -m flag was used.
auto header = tools::xar::read_xar_header(xar_path);
size_t offset;
try {
size_t end;
offset = std::stoull(header[tools::xar::kOffsetName], &end);
if (end != header[tools::xar::kOffsetName].size()) {
throw std::invalid_argument("Offset not entirely an integer");
}
} catch (const std::exception& ex) {
cerr << "Header offset is non-integral: " << header[tools::xar::kOffsetName]
<< endl;
XAR_FATAL << "Exact error: " << ex.what();
}
std::string uuid = header[tools::xar::kUuidName];
std::string execpath;
auto it = header.find(tools::xar::kXarexecTarget);
if (it != header.end()) {
execpath = it->second;
}
if (!mount_only && execpath.empty()) {
XAR_FATAL << "No XAREXEC_TARGET in XAR header of " << xar_path;
}
if (!std::all_of(uuid.begin(), uuid.end(), isxdigit)) {
XAR_FATAL << "uuid must only contain hex digits";
}
if (uuid.empty()) {
XAR_FATAL << "uuid must be non-empty";
}
// If provided, use a non-default mount root from the header.
std::string mountroot;
it = header.find(tools::xar::kMountRoot);
if (it != header.end()) {
mountroot = it->second;
} else {
// Otherwise find the first proper mount root from our list of
// defaults.
for (const auto& candidate : tools::xar::default_mount_roots()) {
struct stat st;
if (stat(candidate.c_str(), &st) == 0 && (st.st_mode & 07777) == 01777) {
mountroot = candidate;
break;
}
}
if (mountroot.empty()) {
tools::xar::no_mount_roots_help_message(XAR_FATAL);
}
}
struct stat st;
if (stat(mountroot.c_str(), &st) != 0) {
XAR_FATAL << "Failed to stat mount root '" << mountroot
<< "': " << strerror(errno);
}
if ((st.st_mode & 07777) != 01777) {
XAR_FATAL << "Mount root '" << mountroot << "' permissions should be 01777";
}
// Path is /mnt/xarfuse/uid-N/UUID-ns-Y; we make directories under
// /mnt/xarfuse as needed. Replace /mnt/xarfuse with custom values
// as specified.
std::string user_basedir = get_user_basedir(mountroot);
// mtab sucks. In some environments, particularly centos6, when
// mtab is shared between different mount namespaces, we want to
// disambiguate by more than just the XAR's uuid and user's uid. We
// use the mount namespace id, but optionally also take a
// user-specified "seed" from the environment. We cannot rely
// purely on mount namespace as the kernel will aggressively re-use
// namespace IDs, so while namespace helps with concurrent jobs, it
// can fail with jobs run after other jobs.
auto env_seed = getenv("XAR_MOUNT_SEED");
std::string mount_directory = uuid;
if (env_seed && *env_seed && strchr(env_seed, '/') == nullptr) {
mount_directory += "-seed-";
mount_directory += env_seed;
} else if (stat("/proc/self/ns/pid", &st) == 0) {
mount_directory += "-seed-nspid" + std::to_string(st.st_ino);
// Sometimes we are in the same namespace but different cgroups
// (e.g. systemd using cgroups to control process lifetime but not
// putting processes into mount namespaces). This can cause
// cgroup termination to destroy a shared squashfuse_ll process.
//
// Use the inode of the cgroup we are running in. Note this is
// best effort in kernel 5.2 as inode numbers can be reused
// sequentially (but not concurrently); kernel 5.6 makes this
// truly unique across boots.
auto maybe_cgroup_inode =
tools::xar::read_sysfs_cgroup_inode("/proc/self/cgroup");
if (maybe_cgroup_inode) {
mount_directory += "_cgpid" + std::to_string(*maybe_cgroup_inode);
}
}
// Try to determine our mount namespace id (via the inode on
// /proc/self/ns/mnt); if we can, make that part of our mountpoint's
// name. This ensures that /etc/mtab on centos6 has unique entries
// for processes in different namespaces, even if /etc itself is
// shared among them. See t12007704 for details.
// Note: will fail on macOS.
if (stat("/proc/self/ns/mnt", &st) == 0) {
mount_directory += "-ns-" + std::to_string(st.st_ino);
}
const size_t squashfuse_idle_timeout = get_squashfuse_timeout();
auto mount_path = user_basedir + "/" + mount_directory;
if (print_only) {
cout << mount_path << endl;
return 0;
}
// Our lockfile for directory /mnt/xarfuse/uid-N/UUID-ns-Y is
// /mnt/xarfuse/uid-N/lockfile.UUID-ns-Y.
auto lockfile = user_basedir + "/lockfile." + mount_directory;
int lock_fd = grab_lock(lockfile);
if (mkdir(mount_path.c_str(), 0755) == 0) {
// On macOS, mkdir sets the new directory's group to the enclosing directory
// which is not necessarily owned by the euid executing the xar. Instead,
// chown() the new directory to the euid and egid.
if (kIsDarwin) {
XAR_PCHECK_SIMPLE(chown(mount_path.c_str(), geteuid(), getegid()) == 0);
}
} else if (errno != EEXIST) {
XAR_FATAL << "mkdir failed:" << strerror(errno);
}
// Construct our exec path; if it already exists, we're done and can
// simply execute it.
const std::string exec_path = mount_path + "/" + execpath;
if (tools::xar::debugging) {
cerr << "exec: " << exec_path << " as " << getuid() << " " << getgid()
<< endl;
}
// Hold a file descriptor open to one of the files in the XAR; this
// will prevent unmounting as we exec the bootstrap and it execs
// anything. Intentionally not O_CLOEXEC. This is necessary
// because the exec call typically targets a shell script inside the
// XAR and so the script won't remain open while the exec happens --
// the kernel will examine it, run a bash process, and that will
// open the shell script. Between the parsing and bash opening it,
// the mount point could disappear. Also, that script itself often
// exec's the python interpreter living on local disk, which will
// open a py file in the XAR... again a brief moment where the
// unmount can occur. We open now, very early to signal to
// squashfuse_ll before the statfs call, but will try to re-open
// later if this one fails. So ignore the return code for now.
int bootstrap_fd = open(exec_path.c_str(), O_RDONLY);
bool newMount = false;
// TODO(chip): also mount DEPENDENCIES
if (!is_squashfuse_mounted(mount_path, true)) {
// This should never happen. And yet, just in case, let's make
// sure we will hold the right file open.
if (bootstrap_fd != -1) {
// TODO: one day we should find a way to log or bump a key; this
// would be a good case.
close(bootstrap_fd);
bootstrap_fd = -1;
}
// Check mount_path sanity before mounting; once mounted, though,
// the permissions may change, so we have to do the check after we
// grab the lock but know we need to perform a mount.
check_file_sanity(mount_path, Expectation::Directory, 0755);
pid_t pid = fork();
XAR_PCHECK_SIMPLE(pid >= 0);
if (pid == 0) {
sanitize_file_descriptors();
std::string opts = "-ooffset=" + std::to_string(offset);
if (squashfuse_idle_timeout > 0) {
opts += ",timeout=" + std::to_string(squashfuse_idle_timeout);
}
if (tools::xar::fuse_allows_visible_mounts("/etc/fuse.conf")) {
opts += ",allow_root";
}
auto const ret = execlp(
kSquashFuseExecutable,
kSquashFuseExecutable,
opts.c_str(),
xar_path,
mount_path.c_str(),
nullptr);
if (ret != 0) {
XAR_FATAL << "Failed to exec squashfuse_ll: " << strerror(errno)
<< ". Try installing squashfuse from "
"https://github.com/vasi/squashfuse/releases.";
}
} else {
int status = 0;
XAR_PCHECK_SIMPLE(waitpid(pid, &status, 0) == pid);
// We only make it out of this block if we have an exit status of 0.
if (WIFEXITED(status)) {
if (WEXITSTATUS(status) != 0) {
XAR_FATAL << "squashfuse_ll failed with exit status "
<< WEXITSTATUS(status);
}
} else if (WIFSIGNALED(status)) {
XAR_FATAL << "squashfuse_ll failed with signal " << WTERMSIG(status);
} else {
XAR_FATAL << "squashfuse_ll failed with unknown exit status " << status;
}
}
newMount = true;
}
// Wait for up to 9 seconds for mount to be available
auto start = std::chrono::steady_clock::now();
auto timeout = std::chrono::seconds(9);
while (!is_squashfuse_mounted(mount_path, false)) {
if (std::chrono::steady_clock::now() - start > timeout) {
XAR_FATAL << "timed out waiting for squashfs mount";
}
/* sleep override */
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
// Touch the lockfile; our unmount script will use it as a proxy for
// unmounting "stale" mounts.
XAR_PCHECK_SIMPLE(futimes(lock_fd, nullptr) == 0);
if (mount_only) {
cout << mount_path << endl;
return 0;
}
// Retry opening our fd if we failed before.
if (bootstrap_fd == -1) {
bootstrap_fd = open(exec_path.c_str(), O_RDONLY);
}
// Still no success? Bail.
if (bootstrap_fd == -1) {
XAR_FATAL << "Unable to open " << exec_path << ": " << strerror(errno);
}
// cmd line is:
// newArgs[0] = "/bin/sh"
// newArgs[1] = "-e"
// newArgs[2] = mounted path inside squash file to run
// newArgs[3] = path to the squash file itself
// newArgs[4], newArgs[5], ... = args passed on our command line
// Why argc + 5? The 4 new params and the trailing nullptr entry.
char* newArgs[argc + 5];
newArgs[0] = strdup("/bin/sh");
newArgs[1] = strdup("-e");
newArgs[2] = strdup(exec_path.c_str());
if (!newArgs[0] || !newArgs[1] || !newArgs[2]) {
XAR_FATAL << "strdup failed, call the cops"
<< ": " << strerror(errno);
}
newArgs[3] = xar_path;
for (int i = 0; i < argc; ++i) {
newArgs[i + 4] = argv[i];
}
newArgs[argc + 4] = nullptr;
for (int i = 0; newArgs[i]; ++i) {
if (tools::xar::debugging) {
cerr << " exec arg: " << newArgs[i] << endl;
}
}
if (newMount) {
setenv("XARFUSE_NEW_MOUNT", "1", 1);
}
umask(old_umask);
if (execv(newArgs[0], newArgs) != 0) {
XAR_FATAL << "execv: " << strerror(errno) << "cmd: " << newArgs[0];
}
return 0;
}