void Subprocess::spawnInternal()

in folly/Subprocess.cpp [340:517]


void Subprocess::spawnInternal(
    std::unique_ptr<const char*[]> argv,
    const char* executable,
    Options& options,
    const std::vector<std::string>* env,
    int errFd) {
  // Parent work, pre-fork: create pipes
  std::vector<int> childFds;
  // Close all of the childFds as we leave this scope
  SCOPE_EXIT {
    // These are only pipes, closing them shouldn't fail
    for (int cfd : childFds) {
      CHECK_ERR(::close(cfd));
    }
  };

  int r;
  for (auto& p : options.fdActions_) {
    if (p.second == PIPE_IN || p.second == PIPE_OUT) {
      int fds[2];
      // We're setting both ends of the pipe as close-on-exec. The child
      // doesn't need to reset the flag on its end, as we always dup2() the fd,
      // and dup2() fds don't share the close-on-exec flag.
#if FOLLY_HAVE_PIPE2
      // If possible, set close-on-exec atomically. Otherwise, a concurrent
      // Subprocess invocation can fork() between "pipe" and "fnctl",
      // causing FDs to leak.
      r = ::pipe2(fds, O_CLOEXEC);
      checkUnixError(r, "pipe2");
#else
      r = ::pipe(fds);
      checkUnixError(r, "pipe");
      r = fcntl(fds[0], F_SETFD, FD_CLOEXEC);
      checkUnixError(r, "set FD_CLOEXEC");
      r = fcntl(fds[1], F_SETFD, FD_CLOEXEC);
      checkUnixError(r, "set FD_CLOEXEC");
#endif
      pipes_.emplace_back();
      Pipe& pipe = pipes_.back();
      pipe.direction = p.second;
      int cfd;
      if (p.second == PIPE_IN) {
        // Child gets reading end
        pipe.pipe = folly::File(fds[1], /*ownsFd=*/true);
        cfd = fds[0];
      } else {
        pipe.pipe = folly::File(fds[0], /*ownsFd=*/true);
        cfd = fds[1];
      }
      p.second = cfd; // ensure it gets dup2()ed
      pipe.childFd = p.first;
      childFds.push_back(cfd);
    }
  }

  // This should already be sorted, as options.fdActions_ is
  DCHECK(std::is_sorted(pipes_.begin(), pipes_.end()));

  // Note that the const casts below are legit, per
  // http://pubs.opengroup.org/onlinepubs/009695399/functions/exec.html

  auto argVec = const_cast<char**>(argv.get());

  // Set up environment
  std::unique_ptr<const char*[]> envHolder;
  char** envVec;
  if (env) {
    envHolder = cloneStrings(*env);
    envVec = const_cast<char**>(envHolder.get());
  } else {
    envVec = environ;
  }

  // Block all signals around vfork; see http://ewontfix.com/7/.
  //
  // As the child may run in the same address space as the parent until
  // the actual execve() system call, any (custom) signal handlers that
  // the parent has might alter parent's memory if invoked in the child,
  // with undefined results.  So we block all signals in the parent before
  // vfork(), which will cause them to be blocked in the child as well (we
  // rely on the fact that Linux, just like all sane implementations, only
  // clones the calling thread).  Then, in the child, we reset all signals
  // to their default dispositions (while still blocked), and unblock them
  // (so the exec()ed process inherits the parent's signal mask)
  //
  // The parent also unblocks all signals as soon as vfork() returns.
  sigset_t allBlocked;
  r = sigfillset(&allBlocked);
  checkUnixError(r, "sigfillset");
  sigset_t oldSignals;

  r = pthread_sigmask(SIG_SETMASK, &allBlocked, &oldSignals);
  checkPosixError(r, "pthread_sigmask");
  SCOPE_EXIT {
    // Restore signal mask
    r = pthread_sigmask(SIG_SETMASK, &oldSignals, nullptr);
    CHECK_EQ(r, 0) << "pthread_sigmask: " << errnoStr(r); // shouldn't fail
  };

  // Call c_str() here, as it's not necessarily safe after fork.
  const char* childDir =
      options.childDir_.empty() ? nullptr : options.childDir_.c_str();

  pid_t pid;
#ifdef __linux__
  if (options.cloneFlags_) {
    pid = syscall(SYS_clone, *options.cloneFlags_, 0, nullptr, nullptr);
  } else {
#endif
    if (options.detach_) {
      // If we are detaching we must use fork() instead of vfork() for the first
      // fork, since we aren't going to simply call exec() in the child.
      pid = AtFork::forkInstrumented(fork);
    } else {
      if (kIsSanitizeThread) {
        // TSAN treats vfork as fork, so use the instrumented version
        // instead
        pid = AtFork::forkInstrumented(fork);
      } else {
        pid = vfork();
      }
    }
#ifdef __linux__
  }
#endif
  checkUnixError(pid, errno, "failed to fork");
  if (pid == 0) {
    // Fork a second time if detach_ was requested.
    // This must be done before signals are restored in prepareChild()
    if (options.detach_) {
#ifdef __linux__
      if (options.cloneFlags_) {
        pid = syscall(SYS_clone, *options.cloneFlags_, 0, nullptr, nullptr);
      } else {
#endif
        if (kIsSanitizeThread) {
          // TSAN treats vfork as fork, so use the instrumented version
          // instead
          pid = AtFork::forkInstrumented(fork);
        } else {
          pid = vfork();
        }
#ifdef __linux__
      }
#endif
      if (pid == -1) {
        // Inform our parent process of the error so it can throw in the parent.
        childError(errFd, kChildFailure, errno);
      } else if (pid != 0) {
        // We are the intermediate process.  Exit immediately.
        // Our child will still inform the original parent of success/failure
        // through errFd.  The pid of the grandchild process never gets
        // propagated back up to the original parent.  In the future we could
        // potentially send it back using errFd if we needed to.
        _exit(0);
      }
    }

    int errnoValue = prepareChild(options, &oldSignals, childDir);
    if (errnoValue != 0) {
      childError(errFd, kChildFailure, errnoValue);
    }

    errnoValue = runChild(executable, argVec, envVec, options);
    // If we get here, exec() failed.
    childError(errFd, kExecFailure, errnoValue);
  }

  // Child is alive.  We have to be very careful about throwing after this
  // point.  We are inside the constructor, so if we throw the Subprocess
  // object will have never existed, and the destructor will never be called.
  //
  // We should only throw if we got an error via the errFd, and we know the
  // child has exited and can be immediately waited for.  In all other cases,
  // we have no way of cleaning up the child.
  pid_ = pid;
  returnCode_ = ProcessReturnCode::makeRunning();
}