Source/PLCrashSysctl.c (74 lines of code) (raw):
/*
* Author: Landon Fuller <landonf@plausiblelabs.com>
*
* Copyright (c) 2008-2013 Plausible Labs Cooperative, Inc.
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "PLCrashSysctl.h"
#include <string.h>
#include <errno.h>
/**
* @internal
* @defgroup plcrash_host Host and Process Info
* @ingroup plcrash_internal
*
* Implements general utility functions for gathering host/process statistics.
* @{
*/
/*
* Wrap sysctl(), automatically allocating a sufficiently large buffer for the returned data. The new buffer's
* length will be returned in @a length.
*
* @param name The sysctl MIB name.
* @param length On success, will be populated with the length of the result. If NULL, length will not be supplied.
*
* @return Returns a malloc-allocated buffer containing the sysctl result on success. On failure, NULL is returned
* and the global variable errno is set to indicate the error. The caller is responsible for free()'ing the returned
* buffer.
*/
static void *plcrash_sysctl_malloc (const char *name, size_t *length) {
/* Attempt to fetch the data, looping until our buffer is sufficiently sized. */
void *result = NULL;
size_t result_len = 0;
int ret;
/* If our buffer is too small after allocation, loop until it succeeds -- the requested destination size
* may change after each iteration. */
do {
/* Fetch the expected length */
if ((ret = sysctlbyname(name, NULL, &result_len, NULL, 0)) == -1)
break;
/* Allocate the destination buffer */
if (result != NULL)
free(result);
result = malloc(result_len);
/* Fetch the value */
ret = sysctlbyname(name, result, &result_len, NULL, 0);
} while (ret == -1 && errno == ENOMEM);
/* Handle failure */
if (ret == -1) {
int saved_errno = errno;
if (result != NULL)
free(result);
errno = saved_errno;
return NULL;
}
/* Provide the length */
if (length != NULL)
*length = result_len;
return result;
}
/**
* Wrap sysctl() and fetch a C string, automatically allocating a sufficiently large buffer for the returned data.
*
* @param name The sysctl MIB name.
* @param length On success, will be populated with the length of the result. If NULL, length will not be supplied.
*
* @return Returns a malloc-allocated NULL-terminated C string containing the sysctl result on success. On failure,
* NULL is returned and the global variable errno is set to indicate the error. The caller is responsible for
* free()'ing the returned buffer.
*/
char *plcrash_sysctl_string (const char *name) {
return plcrash_sysctl_malloc(name, NULL);
}
/**
* Wrap sysctl() and fetch an integer value.
*
* @param name The sysctl MIB name.
* @param result On success, the integer result will be provided via this pointer.
*
* @return Returns true on success. On failure, false is returned and the global variable errno is set to indicate
* the error.
*/
bool plcrash_sysctl_int (const char *name, int *result) {
size_t len = sizeof(*result);
if (sysctlbyname(name, result, &len, NULL, 0) != 0)
return false;
return true;
}
/**
* Find the byte length of @a s, minus any invalid trailing multibyte sequences.
*
* This function is primarily useful for extracting a valid UTF-8 string from a fixed uffer length returned by the kernel;
* the kernel will copy the UTF-8 string directly into the target buffer, resulting in dangling multi-byte characters that
* prevent decoding by strict UTF-8 decoders.
*
* @param s The string buffer to scan.
* @param maxlen The maximum number of bytes that will be scanned in @a s.
* @return Returns the number of valid utf-8 bytes that precede maxlen, or should maxlen exceed the string length, the
* string's terminating NUL character.
*
* @warning This function returns the byte length, not the code point length, of the valid UTF-8 encoded string data.
*/
size_t plcrash_sysctl_valid_utf8_bytes_max (const uint8_t *s, size_t maxlen) {
/*
* For the official specification documenting the multibyte encoding, refer to:
* The Unicode Standard, Version 6.2 - Core Specification
* Chapter 3, Section 9 - Unicode Encoding Forms
*
* UTF-8 uses a variable-width encoding, with each code point corresponding to
* a 1, 2, 3, or 4 byte sequence.
*
* +---------------------+----------+----------+----------+----------+
* | Code Point Bit Size | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
* +---------------------+----------+----------+----------+----------+
* | 7 | 0xxxxxxx | | | |
* | 11 | 110xxxxx | 10xxxxxx | | |
* | 16 | 1110xxxx | 10xxxxxx | 10xxxxxx | |
* | 21 | 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx |
* +---------------------+----------+----------+----------+----------+
*/
/* The currently string byte position */
size_t len = 0;
/* Handle (and skip) an initial BOM */
if (maxlen >= 3 && s[0] == 0xEF && s[1] == 0xBB && s[2] == 0xBF)
len += 3;
/* Work forwards, validating UTF-8 character ranges as we go. */
for (; len < maxlen && s[len] != '\0'; len++) {
uint8_t c = s[len];
/* Determine the sequence length */
size_t seqlen = 0;
if ((c & 0x80) == 0) {
/* 1 byte sequence. Code point value range is 0 to 127. */
seqlen = 0;
continue;
} else if ((c & 0xE0) == 0xC0) {
/* 1 byte continuation of a 2 byte sequence. Code point value range is 128 to 2047 */
seqlen = 1;
} else if ((c & 0xF0) == 0xE0) {
/* 2 byte continuation of a 3 byte sequence. Code point value range is 2048 to 55295 and 57344 to 65535 */
seqlen = 2;
} else if ((c & 0xF8) == 0xF0) {
/* 3 byte continuation of a 4 byte sequence. Code point value range is 65536 to 1114111 */
seqlen = 3;
} else {
/* Invalid UTF-8 character (eg, >= 128) */
return len;
}
/* Verify that the sequence (including the now validated but uncounted leading byte) fits within maxlen */
if (maxlen - (len + 1) < seqlen)
return len;
/* Validate the sequence's trailing bytes */
size_t validated = 0;
for (size_t i = 0; i < seqlen; i++) {
uint8_t trailer = s[len + i + 1]; /* len + i + already-validated-byte */
/* This byte must be a UTF-8 trailing byte. If not, then return the length, minus this
* incomplete multibyte sequence */
if (trailer == '\0' || (trailer & 0xC0) != 0x80)
return len;
/* Mark position as validated */
validated++;
}
if (validated == seqlen) {
/* Fully validated */
len += seqlen;
} else {
/* Couldn't validate the sequence; return the length up to (but not including) the invalid sequence. */
return len;
}
}
return len;
}
/**
* Find the byte length of @a s in bytes, minus any invalid truncated multibyte sequences.
*
* This function is primarily useful for extracting a valid UTF-8 string from a fixed uffer length returned by the kernel;
* the kernel will copy the UTF-8 string directly into the target buffer, resulting in dangling multi-byte characters that
* prevent decoding by strict UTF-8 decoders.
*
* @param s The string buffer to scan.
* @return Returns the number of valid utf-8 bytes that precede the string's terminating NUL character.
*
* @warning This function returns the byte length, not the code point length, of the valid UTF-8 encoded string data.
*/
size_t plcrash_sysctl_valid_utf8_bytes (const uint8_t *s) {
/* We could avoid strlen() by having our own character iterating loop, but this works reliably
* and performance here is not the primary aim. */
return plcrash_sysctl_valid_utf8_bytes_max(s, strlen((const char *)s));
}
/*
* @}
*/