utils/s2n_fork_detection.c (189 lines of code) (raw):
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
/* force the internal header to be included first, since it modifies _GNU_SOURCE/_POSIX_C_SOURCE */
/* clang-format off */
#include "utils/s2n_fork_detection_features.h"
/* clang-format on */
#include "utils/s2n_fork_detection.h"
#include "error/s2n_errno.h"
#include "utils/s2n_safety.h"
#if defined(S2N_MADVISE_SUPPORTED) && defined(MADV_WIPEONFORK)
#if (MADV_WIPEONFORK != 18)
#error "MADV_WIPEONFORK is not 18"
#endif
#else /* defined(S2N_MADVISE_SUPPORTED) && defined(MADV_WIPEONFORK) */
#define MADV_WIPEONFORK 18
#endif
/* Sometimes (for example, on FreeBSD) MAP_INHERIT_ZERO is called INHERIT_ZERO */
#if !defined(MAP_INHERIT_ZERO) && defined(INHERIT_ZERO)
#define MAP_INHERIT_ZERO INHERIT_ZERO
#endif
/* These variables are used to disable all fork detection mechanisms or at the
* individual level during testing.
*/
static bool ignore_wipeonfork_or_inherit_zero_method_for_testing = false;
static bool ignore_pthread_atfork_method_for_testing = false;
static bool ignore_fork_detection_for_testing = false;
#define S2N_FORK_EVENT 0
#define S2N_NO_FORK_EVENT 1
struct FGN_STATE {
/* The current cached fork generation number for this process */
uint64_t current_fork_generation_number;
/* Semaphore controlling access to the shared sentinel and signaling whether
* fork detection is enabled or not. We could use zero_on_fork_addr, but
* avoid overloading by using an explicit variable.
*/
bool is_fork_detection_enabled;
/* Sentinel that signals a fork event has occurred */
volatile char *zero_on_fork_addr;
pthread_once_t fork_detection_once;
pthread_rwlock_t fork_detection_rw_lock;
};
/* We only need a single statically initialised state. Note, the state is
* inherited by child processes.
*/
static struct FGN_STATE fgn_state = {
.current_fork_generation_number = 0,
.is_fork_detection_enabled = false,
.zero_on_fork_addr = NULL,
.fork_detection_once = PTHREAD_ONCE_INIT,
.fork_detection_rw_lock = PTHREAD_RWLOCK_INITIALIZER,
};
/* Can currently never fail. See initialise_fork_detection_methods() for
* motivation.
*/
static inline S2N_RESULT s2n_initialise_wipeonfork_best_effort(void *addr, long page_size)
{
#if defined(S2N_MADVISE_SUPPORTED)
/* Return value ignored on purpose */
madvise(addr, (size_t) page_size, MADV_WIPEONFORK);
#endif
return S2N_RESULT_OK;
}
static inline S2N_RESULT s2n_initialise_inherit_zero(void *addr, long page_size)
{
#if defined(S2N_MINHERIT_SUPPORTED) && defined(MAP_INHERIT_ZERO)
RESULT_ENSURE(minherit(addr, page_size, MAP_INHERIT_ZERO) == 0, S2N_ERR_FORK_DETECTION_INIT);
#endif
return S2N_RESULT_OK;
}
static void s2n_pthread_atfork_on_fork(void)
{
/* This zeroises the first byte of the memory page pointed to by
* *zero_on_fork_addr. This is the same byte used as fork event detection
* sentinel in s2n_get_fork_generation_number(). The same memory page, and in
* turn, the byte, is also the memory zeroised by the MADV_WIPEONFORK fork
* detection mechanism.
*
* Aquire locks to be on the safe side. We want to avoid the checks in
* s2n_get_fork_generation_number() getting executed before setting the sentinel
* flag. The write lock prevents any other thread from owning any other type
* of lock.
*
* pthread_atfork_on_fork() cannot return errors. Hence, there is no way to
* gracefully recover if [un]locking fails.
*/
if (pthread_rwlock_wrlock(&fgn_state.fork_detection_rw_lock) != 0) {
printf("pthread_rwlock_wrlock() failed. Aborting.\n");
abort();
}
if (fgn_state.zero_on_fork_addr == NULL) {
printf("fgn_state.zero_on_fork_addr is NULL. Aborting.\n");
abort();
}
*fgn_state.zero_on_fork_addr = 0;
if (pthread_rwlock_unlock(&fgn_state.fork_detection_rw_lock) != 0) {
printf("pthread_rwlock_unlock() failed. Aborting.\n");
abort();
}
}
static S2N_RESULT s2n_inititalise_pthread_atfork(void)
{
/* Register the fork handler pthread_atfork_on_fork that is executed in the
* child process after a fork.
*/
if (s2n_is_pthread_atfork_supported() == true) {
RESULT_ENSURE(pthread_atfork(NULL, NULL, s2n_pthread_atfork_on_fork) == 0, S2N_ERR_FORK_DETECTION_INIT);
}
return S2N_RESULT_OK;
}
static S2N_RESULT s2n_initialise_fork_detection_methods_try(void *addr, long page_size)
{
RESULT_GUARD_PTR(addr);
/* Some systems don't define MADV_WIPEONFORK in sys/mman.h but the kernel
* still supports the mechanism (AL2 being a prime example). Likely because
* glibc on the system is old. We might be able to include kernel header
* files directly, that define MADV_WIPEONFORK, conditioning on specific
* OS's. But it is a mess. A more reliable method is to probe the system, at
* run-time, whether madvise supports the MADV_WIPEONFORK advice. However,
* the method to probe for this feature is equivalent to actually attempting
* to initialise the MADV_WIPEONFORK fork detection. Compare with
* probe_madv_wipeonfork_support() (used for testing).
*
* Instead, we apply best-effort to initialise the MADV_WIPEONFORK fork
* detection and otherwise always require pthread_atfork to be initialised.
* We also currently always apply prediction resistance. So, this should be
* a safe default.
*/
if (ignore_wipeonfork_or_inherit_zero_method_for_testing == false) {
RESULT_GUARD(s2n_initialise_wipeonfork_best_effort(addr, page_size));
}
if (ignore_wipeonfork_or_inherit_zero_method_for_testing == false) {
RESULT_GUARD(s2n_initialise_inherit_zero(addr, page_size));
}
if (ignore_pthread_atfork_method_for_testing == false) {
RESULT_GUARD(s2n_inititalise_pthread_atfork());
}
fgn_state.zero_on_fork_addr = addr;
*fgn_state.zero_on_fork_addr = S2N_NO_FORK_EVENT;
fgn_state.is_fork_detection_enabled = true;
return S2N_RESULT_OK;
}
static S2N_RESULT s2n_setup_mapping(void **addr, long *page_size)
{
*page_size = sysconf(_SC_PAGESIZE);
RESULT_ENSURE_GT(*page_size, 0);
*addr = mmap(NULL, (size_t) *page_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
RESULT_ENSURE_NE(*addr, MAP_FAILED);
return S2N_RESULT_OK;
}
static void s2n_initialise_fork_detection_methods(void)
{
void *addr = MAP_FAILED;
long page_size = 0;
/* Only used to disable fork detection mechanisms during testing. */
if (ignore_wipeonfork_or_inherit_zero_method_for_testing == true && ignore_pthread_atfork_method_for_testing == true) {
ignore_fork_detection_for_testing = true;
return;
}
if (s2n_result_is_error(s2n_setup_mapping(&addr, &page_size)) == true) {
return;
}
/* Now we know that we have some memory mapped. Try to initialise fork
* detection methods. Unmap the memory if we fail for some reason.
*/
if (s2n_result_is_error(s2n_initialise_fork_detection_methods_try(addr, page_size)) == true) {
/* No reason to verify return value of munmap() since we can't use that
* information for anything anyway. */
munmap(addr, (size_t) page_size);
addr = NULL;
fgn_state.zero_on_fork_addr = NULL;
fgn_state.is_fork_detection_enabled = false;
}
}
/* s2n_get_fork_generation_number returns S2N_RESULT_OK on success and
* S2N_RESULT_ERROR otherwise.
*
* On success, returns the current fork generation number in
* return_fork_generation_number. Caller must synchronise access to
* return_fork_generation_number.
*/
S2N_RESULT s2n_get_fork_generation_number(uint64_t *return_fork_generation_number)
{
RESULT_ENSURE(pthread_once(&fgn_state.fork_detection_once, s2n_initialise_fork_detection_methods) == 0, S2N_ERR_FORK_DETECTION_INIT);
if (ignore_fork_detection_for_testing == true) {
/* Fork detection is meant to be disabled. Hence, return success.
* This should only happen during testing.
*/
RESULT_ENSURE(s2n_in_unit_test(), S2N_ERR_NOT_IN_UNIT_TEST);
return S2N_RESULT_OK;
}
RESULT_ENSURE(fgn_state.is_fork_detection_enabled == true, S2N_ERR_FORK_DETECTION_INIT);
/* In most cases, we would not need to increment the fork generation number.
* So, it is cheaper, in the expected case, to take an optimistic read lock
* and later aquire a write lock if needed.
* Note that we set the returned fgn before checking for a fork event. We
* need to do this because thread execution might change between releasing
* the read lock and taking the write lock. In that time span, another
* thread can reset the fork event detection sentinel and we return from
* s2n_get_fork_generation_number() without setting the returned fgn
* appropriately.
*/
RESULT_ENSURE(pthread_rwlock_rdlock(&fgn_state.fork_detection_rw_lock) == 0, S2N_ERR_RETRIEVE_FORK_GENERATION_NUMBER);
*return_fork_generation_number = fgn_state.current_fork_generation_number;
if (*fgn_state.zero_on_fork_addr != S2N_FORK_EVENT) {
/* No fork event detected. */
RESULT_ENSURE(pthread_rwlock_unlock(&fgn_state.fork_detection_rw_lock) == 0, S2N_ERR_RETRIEVE_FORK_GENERATION_NUMBER);
return S2N_RESULT_OK;
}
RESULT_ENSURE(pthread_rwlock_unlock(&fgn_state.fork_detection_rw_lock) == 0, S2N_ERR_RETRIEVE_FORK_GENERATION_NUMBER);
/* We are mutating the process-global, cached fork generation number. Need
* to acquire the write lock for that. Set returned fgn before checking the
* if condition with the same reasons as above.
*/
RESULT_ENSURE(pthread_rwlock_wrlock(&fgn_state.fork_detection_rw_lock) == 0, S2N_ERR_RETRIEVE_FORK_GENERATION_NUMBER);
*return_fork_generation_number = fgn_state.current_fork_generation_number;
if (*fgn_state.zero_on_fork_addr == S2N_FORK_EVENT) {
/* Fork event has been detected; reset sentinel, increment cached fork
* generation number (which is now "current" in this child process), and
* write incremented fork generation number to the output parameter.
*/
*fgn_state.zero_on_fork_addr = S2N_NO_FORK_EVENT;
fgn_state.current_fork_generation_number = fgn_state.current_fork_generation_number + 1;
*return_fork_generation_number = fgn_state.current_fork_generation_number;
}
RESULT_ENSURE(pthread_rwlock_unlock(&fgn_state.fork_detection_rw_lock) == 0, S2N_ERR_RETRIEVE_FORK_GENERATION_NUMBER);
return S2N_RESULT_OK;
}
static void s2n_cleanup_cb_munmap(void **probe_addr)
{
munmap(*probe_addr, (size_t) sysconf(_SC_PAGESIZE));
}
/* Run-time probe checking whether the system supports the MADV_WIPEONFORK fork
* detection mechanism.
*/
static S2N_RESULT s2n_probe_madv_wipeonfork_support(void)
{
bool result = false;
/* It is not an error to call munmap on a range that does not contain any
* mapped pages.
*/
DEFER_CLEANUP(void *probe_addr = MAP_FAILED, s2n_cleanup_cb_munmap);
long page_size = 0;
RESULT_GUARD(s2n_setup_mapping(&probe_addr, &page_size));
#if defined(S2N_MADVISE_SUPPORTED)
/* Some versions of qemu (up to at least 5.0.0-rc4, see
* linux-user/syscall.c) ignore invalid advice arguments. Hence, we first
* verify that madvise() rejects advice arguments it doesn't know about.
*/
RESULT_ENSURE_NE(madvise(probe_addr, (size_t) page_size, -1), 0);
RESULT_ENSURE_EQ(madvise(probe_addr, (size_t) page_size, MADV_WIPEONFORK), 0);
result = true;
#endif
RESULT_ENSURE_EQ(result, true);
return S2N_RESULT_OK;
}
bool s2n_is_madv_wipeonfork_supported(void)
{
return s2n_result_is_ok(s2n_probe_madv_wipeonfork_support());
}
bool s2n_is_map_inherit_zero_supported(void)
{
#if defined(S2N_MINHERIT_SUPPORTED) && defined(MAP_INHERIT_ZERO)
return true;
#else
return false;
#endif
}
bool s2n_is_pthread_atfork_supported(void)
{
/*
* There is a bug in OpenBSD's libc which is triggered by
* multi-generational forking of multi-threaded processes which call
* pthread_atfork(3). Under these conditions, a grandchild process will
* deadlock when trying to fork a great-grandchild.
* https://marc.info/?l=openbsd-tech&m=167047636422884&w=2
*/
#if defined(__OpenBSD__)
return false;
#else
return true;
#endif
}
/* Use for testing only */
S2N_RESULT s2n_ignore_wipeonfork_and_inherit_zero_for_testing(void)
{
RESULT_ENSURE(s2n_in_unit_test(), S2N_ERR_NOT_IN_UNIT_TEST);
ignore_wipeonfork_or_inherit_zero_method_for_testing = true;
return S2N_RESULT_OK;
}
S2N_RESULT s2n_ignore_pthread_atfork_for_testing(void)
{
RESULT_ENSURE(s2n_in_unit_test(), S2N_ERR_NOT_IN_UNIT_TEST);
ignore_pthread_atfork_method_for_testing = true;
return S2N_RESULT_OK;
}