glean/rts/binary.h (299 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <folly/FBString.h>
#include <folly/Range.h>
#include <folly/Varint.h>
#include "common/hs/util/cpp/memory.h"
#include "glean/rts/id.h"
#include "glean/rts/error.h"
#include "glean/rts/nat.h"
#include "glean/rts/string.h"
namespace facebook {
namespace glean {
namespace binary {
inline folly::ByteRange byteRange(const std::vector<unsigned char>& s) {
return folly::ByteRange(s.data(), s.size());
}
inline folly::ByteRange byteRange(const std::string& s) {
return folly::ByteRange(
reinterpret_cast<const unsigned char*>(s.data()), s.size());
}
inline folly::ByteRange byteRange(const folly::fbstring& s) {
return folly::ByteRange(
reinterpret_cast<const unsigned char*>(s.data()), s.size());
}
inline folly::ByteRange stringRange(const char* s) {
return folly::ByteRange(reinterpret_cast<const unsigned char*>(s), strlen(s));
}
inline std::string mkString(folly::ByteRange r) {
return std::string(reinterpret_cast<const char*>(r.data()), r.size());
}
inline folly::fbstring mkFbstring(folly::ByteRange r) {
return folly::fbstring(reinterpret_cast<const char*>(r.data()), r.size());
}
/// Return the smallest sequence of bytes that is lexicographically larger than
/// and sequence with the prefix 'range' or the empty sequence if no such
/// sequence exists. Example:
///
/// lexicographicallyNext({0x25, 0x42, 0xFF, 0xFF}) == {0x25, 0x43}
/// lexicographicallyNext({0xFF}) = {}
std::vector<unsigned char> lexicographicallyNext(folly::ByteRange range);
namespace detail {
template<typename T, typename = void> struct word_traits;
template<typename T>
struct word_traits<T, typename std::enable_if_t<
std::is_integral<std::remove_const_t<T>>::value ||
std::is_enum<std::remove_const_t<T>>::value>> {
using word_type = T;
static T fromWord(word_type x) { return x; }
static word_type toWord(T x) { return x; }
};
template<typename T>
struct word_traits<rts::WordId<T>> {
using word_type = typename rts::WordId<T>::word_type;
static rts::WordId<T> fromWord(word_type x) {
return rts::WordId<T>::fromWord(x);
}
static word_type toWord(rts::WordId<T> x) {
return x.toWord();
}
};
}
struct Output;
/**
* A binary buffer which can be read from.
*
* NOTE: It does not own the memory!
*
*/
struct Input {
folly::ByteRange buf;
Input() {}
explicit Input(const folly::ByteRange& b) : buf(b) {}
explicit Input(const std::string* s)
: buf(reinterpret_cast<const unsigned char*>(s->data()), s->size()) {}
explicit Input(const folly::fbstring* s)
: buf(reinterpret_cast<const unsigned char*>(s->data()), s->size()) {}
Input(const void* p, size_t n)
: buf(static_cast<const unsigned char*>(p), n) {}
Input(const void* start, const void* finish)
: buf(static_cast<const unsigned char*>(start),
static_cast<const unsigned char*>(finish)) {}
void wantError(size_t n) const {
rts::error("truncated input: expected {} bytes, got {}", n, buf.size());
}
/// Ensure that there are at least n bytes left
inline void want(size_t n) const {
if (buf.size() < n) {
wantError(n);
}
}
bool empty() const {
return buf.empty();
}
/// Read a fixed width number.
template <typename T>
T fixed() {
using word_type = typename detail::word_traits<T>::word_type;
const auto n = sizeof(word_type);
want(n);
const void* p = buf.data();
buf.uncheckedAdvance(n);
return detail::word_traits<T>::fromWord(folly::loadUnaligned<word_type>(p));
}
/// Read a packed unsigned number
template <typename T>
inline T packed() {
auto temp = buf;
if (auto r = folly::tryDecodeVarint(temp)) {
buf = temp;
return detail::word_traits<T>::fromWord(
static_cast<typename detail::word_traits<T>::word_type>(r.value()));
} else {
rts::error("invalid packed value");
}
}
/// Validate and read an encoded nat
inline uint64_t untrustedNat() {
auto r = rts::loadUntrustedNat(buf.begin(), buf.end());
if (r.second != nullptr) {
buf = {r.second, buf.end()};
return r.first;
} else {
rts::error("invalid nat");
}
}
/// Read an encoded nat without any checks
inline uint64_t trustedNat() {
auto r = rts::loadTrustedNat(buf.begin());
assert(r.second <= buf.end());
buf = {r.second, buf.end()};
return r.first;
}
/// Validate and skip over an encoded nat
inline void skipUntrustedNat() {
auto p = rts::skipUntrustedNat(buf.begin(), buf.end());
if (p != nullptr) {
buf = {p, buf.end()};
} else {
rts::error("invalid nat");
}
}
/// Skip over an encoded nat without any checks
inline void skipTrustedNat() {
auto p = rts::skipTrustedNat(buf.begin());
buf = {p, buf.end()};
}
uint8_t byte() {
want(1);
auto c = *buf.data();
buf.uncheckedAdvance(1);
return c;
}
/// Read n bytes
folly::ByteRange bytes(size_t n) {
want(n);
auto p = buf.data();
buf.uncheckedAdvance(n);
return folly::ByteRange(p, n);
}
/// Read the rest of the input
folly::ByteRange bytes() {
return bytes(buf.size());
}
const unsigned char* data() const {
return buf.data();
}
const unsigned char* end() const {
return buf.end();
}
size_t size() const {
return buf.size();
}
/// Validate and skip over a mangled UTF-8 string.
void skipUntrustedString() {
buf.uncheckedAdvance(rts::validateUntrustedString(buf));
}
/// Validate and skip over a mangled string, writing its demangled
/// representation into the Output
void demangleUntrustedString(Output& output) {
buf.uncheckedAdvance(rts::demangleUntrustedString(buf, output));
}
/// Skip over a trusted mangled string and return its *demangled* size.
size_t skipTrustedString() {
auto r = rts::skipTrustedString(buf);
buf.uncheckedAdvance(r.first);
return r.second;
}
template <typename T>
T generic_string(size_t n) {
folly::ByteRange r = bytes(n);
return T(reinterpret_cast<const char*>(r.data()), r.size());
}
folly::fbstring fbstring(size_t n) {
return generic_string<folly::fbstring>(n);
}
folly::fbstring fbstring() {
return fbstring(buf.size());
}
std::string string(size_t n) {
return generic_string<std::string>(n);
}
std::string string() {
return string(buf.size());
}
bool shift(folly::ByteRange pat) {
auto n = pat.size();
if (buf.size() >= n && !std::memcmp(buf.data(), pat.data(), n)) {
buf.uncheckedAdvance(n);
return true;
} else {
return false;
}
}
}; // namespace binary
/**
*
* A binary buffer which can be written to.
*
*/
struct Output {
Output() {}
Output(Output&&) = default;
Output& operator=(Output&&) = default;
Output(const Output&) = delete;
void operator=(const Output&) = delete;
size_t size() const {
return buf.size();
}
const unsigned char* data() const {
return buf.data();
}
// Write a packed unsigned number
template <typename T>
void packed(T x) {
auto p = buf.buffer(folly::kMaxVarintLength64);
auto n = folly::encodeVarint(
static_cast<uint64_t>(detail::word_traits<T>::toWord(x)), p);
buf.use(n);
}
/// Write an encoded nat
inline void nat(uint64_t x) {
auto p = buf.buffer(rts::MAX_NAT_SIZE);
auto n = rts::storeNat(p, x);
buf.use(n);
}
// Write a fixed width number
template <typename T>
void fixed(T x) {
const auto w = detail::word_traits<T>::toWord(x);
bytes(&w, sizeof(w));
}
void put(folly::ByteRange bytes) {
this->bytes(bytes.begin(), bytes.size());
}
void bytes(const void* data, size_t size) {
if (size > 0) {
auto b = buf.grab(size);
std::memcpy(b, data, size);
}
}
void expect(size_t n) {
(void)buf.buffer(n);
}
/// Store the mangled representation of a UTF-8 string. The validity of the
/// string isn't checked.
void mangleString(folly::ByteRange r) {
rts::mangleString(r, *this);
}
folly::ByteRange bytes() & {
return buf.to<folly::ByteRange>();
}
folly::fbstring fbstring() const {
return buf.to<folly::fbstring>();
}
std::string string() const {
return buf.to<std::string>();
}
hs::ffi::malloced_array<uint8_t> moveBytes() {
return buf.moveBytes();
}
folly::fbstring moveToFbString() {
return buf.moveToFbString();
}
private:
/// Simple implementation of a growable buffer. We need to be able to get
/// ownership of the underlying memory which only folly::fbstring and
/// folly::IOBuf seem to provide but both seem to be significantly slower
/// (the latter more so than the former).
class Buf {
public:
Buf() {
len = 0;
}
Buf(Buf&&) = default;
Buf& operator=(Buf&&) = default;
Buf(const Buf&) = delete;
Buf& operator=(const Buf&) = delete;
size_t size() const {
return len;
}
size_t capacity() const {
return buf.size();
}
const unsigned char* data() const {
return buf.get();
}
/// Return a point to enough space for n bytes. This reserves memory but
/// doesn't increase the size - this can be done via 'use' afterwards.
unsigned char* buffer(size_t n) {
if (n > capacity() - size()) {
realloc(n);
}
return buf.get() + size();
}
/// Increase the size of the buffer. This doesn't reserve memory so the
/// new size must be <= capacity.
void use(size_t n) {
assert(capacity() - size() >= n);
len += n;
}
/// Increase the buffer size by n and return a pointer to the new memory.
unsigned char* grab(size_t n) {
auto p = buffer(n);
use(n);
return p;
}
hs::ffi::malloced_array<uint8_t> moveBytes() {
buf.prune(len);
return std::move(buf);
}
/// Transfer ownership of the underlying memory to a folly::fbstring.
folly::fbstring moveToFbString();
/// Create a container
template<typename C> C to() const {
return C(buf.get(), buf.get() + size());
}
private:
void realloc(size_t n);
hs::ffi::malloced_array<uint8_t> buf;
size_t len;
};
Buf buf;
};
} // namespace binary
} // namespace glean
} // namespace facebook