glean/rts/ffi.cpp (889 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include "common/hs/util/cpp/ffi.h" #include "common/hs/util/cpp/memory.h" #include "common/hs/util/cpp/wrap.h" #include "glean/if/gen-cpp2/glean_types.h" #include "glean/rts/bytecode/subroutine.h" #include "glean/rts/cache.h" #include "glean/rts/ffi.h" #include "glean/rts/id.h" #include "glean/rts/lookup.h" #include "glean/rts/ownership.h" #include "glean/rts/ownership/slice.h" #include "glean/rts/query.h" #include "glean/rts/sanity.h" #include "glean/rts/stacked.h" #include "glean/rts/string.h" #include "glean/rts/substitution.h" #include "glean/rts/validate.h" #include <folly/Exception.h> #include <vector> #include <algorithm> #ifdef OSS #include <cpp/HsStruct.h> #else #include <common/hs/util/cpp/HsStruct.h> #endif using namespace facebook::hs; namespace facebook { namespace glean { namespace rts { namespace c { struct SharedLookupCacheStats { std::shared_ptr<facebook::glean::rts::LookupCache::Stats> value; }; struct SharedSubroutine { std::shared_ptr<facebook::glean::rts::Subroutine> value; }; namespace { template<typename F> const char *pop_value(const void **start, const void *end, F f) { return ffi::wrap([=]() { binary::Input input(*start, end); f(input); *start = input.data(); }); } } extern "C" { const char *glean_inventory_new( size_t count, const int64_t *ids, const void * const *name_ptrs, const size_t *name_sizes, const int32_t *versions, SharedSubroutine * const *typecheckers, SharedSubroutine * const *traversals, Inventory **inventory) { return ffi::wrap([=]{ std::vector<rts::Predicate> predicates; predicates.reserve(count); for (size_t i = 0; i < count; ++i) { predicates.push_back(rts::Predicate{ Pid::fromThrift(ids[i]), std::string(static_cast<const char *>(name_ptrs[i]), name_sizes[i]), versions[i], typecheckers[i]->value, traversals[i]->value }); } *inventory = new Inventory(std::move(predicates)); }); } void glean_inventory_free(Inventory *inventory) { ffi::free_(inventory); } const char *glean_inventory_predicates( Inventory *inventory, size_t *count, const Predicate ***predicates) { return ffi::wrap([=] { const auto preds = inventory->predicates(); const auto n = preds.size(); *count = n; *predicates = ffi::clone_array(preds.data(), preds.size()).release(); }); } const char *glean_inventory_serialize( Inventory *inventory, const void **data, size_t *size) { return ffi::wrap([=] { ffi::clone_bytes(inventory->serialize()).release_to(data, size); }); } const char *glean_inventory_deserialize( const void *data, size_t size, Inventory **inventory) { return ffi::wrap([=] { *inventory = new Inventory(Inventory::deserialize( {reinterpret_cast<const unsigned char *>(data), size})); }); } const char *glean_predicate_unpack( Predicate *predicate, int64_t *id, const void **name, size_t *name_size, int32_t *version) { return ffi::wrap([=] { *id = predicate->id.toThrift(); *name = predicate->name.data(); *name_size = predicate->name.size(); *version = predicate->version; }); } const char *glean_inventory_equal( const Inventory *first, const Inventory *second, bool *result) { return ffi::wrap([=] { *result = *first == *second; }); } const char *glean_snapshot_new( Lookup *base, int64_t boundary, Lookup **snapshot) { return ffi::wrap([=] { *snapshot = new Snapshot(base, Id::fromThrift(boundary)); }); } const char *glean_lookupcache_stats_new(SharedLookupCacheStats **stats) { return ffi::wrap([=] { *stats = new SharedLookupCacheStats{ std::make_shared<LookupCache::Stats>() }; }); } void glean_lookupcache_stats_free(SharedLookupCacheStats *stats) { ffi::free_(stats); } void glean_lookupcache_stats_read_and_reset_counters( SharedLookupCacheStats *stats, uint64_t *values, size_t size) { auto buffer = stats->value->readAndResetCounters(); if (size == buffer.size()) { std::copy(buffer.begin(), buffer.end(), values); } else { LOG(ERROR) << "glean_lookupcache_stats_read_and_reset_counters: invalid size"; std::fill(values, values+size, 0); } } const char *glean_lookupcache_new( size_t capacity, size_t shards, SharedLookupCacheStats *stats, LookupCache **cache) { return ffi::wrap([=] { *cache = new LookupCache( LookupCache::Options{capacity, shards}, stats->value); }); } void glean_lookupcache_free(LookupCache *cache) { ffi::free_(cache); } const char *glean_lookupcache_clear(LookupCache *cache) { return ffi::wrap([=] { cache->clear(); }); } const char *glean_lookupcache_anchor_new( Lookup *base, LookupCache *cache, Lookup **anchor) { return ffi::wrap([=] { *anchor = new LookupCache::Anchor(cache->anchor(base)); }); } void glean_lookupcache_anchor_free(Lookup *anchor) { ffi::free_(anchor); } void glean_interrupt_running_queries() { interruptRunningQueries(); } const char *glean_query_execute_compiled( Inventory *inventory, Define *facts, DefineOwnership *ownership, SharedSubroutine *sub, uint64_t pid, SharedSubroutine *traverse, uint64_t max_results, uint64_t max_bytes, uint64_t max_time_ms, uint64_t depth, uint64_t *expand_pids, uint64_t num_expand_pids, uint64_t want_stats, QueryResults **presults ) { return ffi::wrap([=]() { std::unordered_set<Pid, folly::hasher<Pid>> expandPids; if (expand_pids) { expandPids = std::unordered_set<Pid, folly::hasher<Pid>>( reinterpret_cast<Pid*>(expand_pids), reinterpret_cast<Pid*>(expand_pids) + num_expand_pids); } *presults = executeQuery( *inventory, *facts, ownership, *(sub->value), Pid::fromWord(pid), traverse ? traverse->value : nullptr, max_results == 0 ? folly::none : folly::Optional<uint64_t>(max_results), max_bytes == 0 ? folly::none : folly::Optional<uint64_t>(max_bytes), max_time_ms == 0 ? folly::none : folly::Optional<uint64_t>(max_time_ms), static_cast<Depth>(depth), expandPids, want_stats, folly::none ).release(); }); } const char *glean_query_restart_compiled( Inventory *inventory, Define *facts, DefineOwnership *ownership, void *cont, int64_t cont_size, uint64_t max_results, uint64_t max_bytes, uint64_t max_time_ms, uint64_t depth, uint64_t *expand_pids, uint64_t num_expand_pids, uint64_t want_stats, QueryResults **presults ) { return ffi::wrap([=]() { std::unordered_set<Pid, folly::hasher<Pid>> expandPids; if (expand_pids) { expandPids = std::unordered_set<Pid, folly::hasher<Pid>>( reinterpret_cast<Pid*>(expand_pids), reinterpret_cast<Pid*>(expand_pids) + num_expand_pids); } *presults = restartQuery( *inventory, *facts, ownership, max_results == 0 ? folly::none : folly::Optional<uint64_t>(max_results), max_bytes == 0 ? folly::none : folly::Optional<uint64_t>(max_bytes), max_time_ms == 0 ? folly::none : folly::Optional<uint64_t>(max_time_ms), static_cast<Depth>(depth), expandPids, want_stats, cont, cont_size ).release(); }); } void glean_lookup_free(Lookup *lookup) { ffi::free_(lookup); } const char *glean_lookup_empty(Lookup** lookup) { return ffi::wrap([=] { *lookup = new EmptyLookup(); }); } const char *glean_lookup_starting_id(Lookup *lookup, int64_t *id) { return ffi::wrap([=]{ *id = lookup->startingId().toThrift(); }); } const char *glean_lookup_first_free_id(Lookup *lookup, int64_t *id) { return ffi::wrap([=]{ *id = lookup->firstFreeId().toThrift(); }); } const char *glean_lookup_fact( Lookup *lookup, int64_t id, int64_t *type, void **key, size_t *key_size, void **value, size_t *value_size) { return ffi::wrap([=]() { ffi::malloced_array<uint8_t> key_bytes; ffi::malloced_array<uint8_t> value_bytes; auto found = lookup->factById( Id::fromThrift(id), [&](auto ty, auto clause) { *type = ty.toThrift(); key_bytes = ffi::clone_bytes(clause.key()); value_bytes = ffi::clone_bytes(clause.value()); return true; } ); if (!found) { *type = 0; } key_bytes.release_to(key, key_size); value_bytes.release_to(value, value_size); }); } const char *glean_define_fact( Define *facts, glean_predicate_id_t predicate, Output *clause, size_t key_size, glean_fact_id_t *id) { return ffi::wrap([=]{ assert(key_size <= clause->size()); *id = facts->define( Pid::fromThrift(predicate), Fact::Clause::from(clause->bytes(), key_size)).toThrift(); }); } const char *glean_define_untrusted_batch( Define *facts, Inventory *inventory, int64_t batch_first_id, const int64_t *ids, size_t batch_count, const void *batch_facts_data, size_t batch_facts_size, Substitution **subst) { return ffi::wrap([=] { *subst = new Substitution( defineUntrustedBatch( *facts, *inventory, Id::fromThrift(batch_first_id), reinterpret_cast<const Id*>(ids), batch_count, folly::ByteRange( static_cast<const unsigned char *>(batch_facts_data), batch_facts_size))); }); } const char *glean_new_subst( int64_t first, size_t size, Substitution **subst) { return ffi::wrap([=]() { *subst = new Substitution(Id::fromThrift(first), size); }); } void glean_free_subst(Substitution *subst) { ffi::free_(subst); } const char *glean_subst_compose( const Substitution *first, const Substitution *second, Substitution **result) { return ffi::wrap([=] { *result = new Substitution(Substitution::compose(*first, *second)); }); } const char *glean_serialize_subst( const Substitution *subst, int64_t *firstId, size_t *count, int64_t **ids) { return ffi::wrap([=]() { thrift::Subst s = subst->serialize(); *firstId = s.get_firstId(); *count = s.get_ids().size(); *ids = ffi::clone_array(s.get_ids().data(), *count).release(); }); } const char *glean_subst_intervals( const Substitution *subst, const glean_fact_id_t *ins, size_t ins_size, glean_fact_id_t **outs, size_t *outs_size) { return ffi::wrap([=] { std::vector<Id> ids; ids.reserve(ins_size); std::transform( ins, ins+ins_size, std::back_inserter(ids), Id::fromThrift); auto res = subst->substIntervals(ids); auto fres = ffi::malloc_array<glean_fact_id_t>(res.size()); std::transform(res.begin(), res.end(), fres.get(), [](auto id) { return id.toThrift(); }); fres.release_to(outs, outs_size); }); } const char *glean_factset_new( int64_t first_id, FactSet **facts) { return ffi::wrap([=] { *facts = new FactSet(Id::fromThrift(first_id)); }); } void glean_factset_free(FactSet *facts) { ffi::free_(facts); } size_t glean_factset_fact_memory(FactSet *facts) { return facts->factMemory(); } int64_t glean_factset_first_free_id(FactSet *facts) { return facts->firstFreeId().toThrift(); } Lookup *glean_factset_lookup(FactSet *facts) { return facts; } Define *glean_factset_define(FactSet *facts) { return facts; } const char *glean_factset_serialize( FactSet *facts, int64_t *first_id, size_t *count, void **facts_data, size_t *facts_size) { return ffi::wrap([=] { auto batch = facts->serialize(); *first_id = batch.get_firstId(); *count = batch.get_count(); ffi::clone_bytes(batch.get_facts()).release_to(facts_data, facts_size); }); } const char *glean_factset_serializeReorder( FactSet *facts, uint64_t *order, size_t order_size, int64_t *first_id, size_t *count, void **facts_data, size_t *facts_size) { return ffi::wrap([=] { auto batch = facts->serializeReorder( folly::Range<const uint64_t*>(order,order_size)); *first_id = batch.get_firstId(); *count = batch.get_count(); ffi::clone_bytes(batch.get_facts()).release_to(facts_data, facts_size); }); } const char* glean_factset_rebase( FactSet* facts, const Inventory* inventory, int64_t firstId, size_t count, int64_t* ids, LookupCache* cache, FactSet** result) { return ffi::wrap([=] { thrift::Subst thrift_subst; auto subst_vec = std::vector<int64_t>(); // TODO: Remove this copy subst_vec.insert(subst_vec.end(), &ids[0], &ids[count]); thrift_subst.firstId() = firstId; thrift_subst.ids() = subst_vec; Substitution subst = Substitution::deserialize(thrift_subst); GLEAN_SANITY_CHECK(subst.sanityCheck(false)); *result = nullptr; cache->withBulkStore([&](auto& store) { GLEAN_SANITY_CHECK(facts->sanityCheck()); *result = new FactSet(facts->rebase(*inventory, subst, store)); GLEAN_SANITY_CHECK((*result)->sanityCheck()); }); }); } const char *glean_factset_append( FactSet *target, FactSet *source) { return ffi::wrap([=] { target->append(std::move(*source)); }); } const char *glean_stacked_lookup_new( Lookup *base, Lookup *added, Lookup **stacked) { return ffi::wrap([=] { *stacked = new Stacked<Lookup>(base, added); }); } const char *glean_stacked_define_new( Lookup *base, Define *added, Define **stacked) { return ffi::wrap([=] { *stacked = new Stacked<Define>(base, added); }); } void glean_stacked_define_free(Define *stacked) { return ffi::free_(stacked); } const char *glean_new_builder(Output **builder) { return ffi::wrap([=]() { *builder = new Output; }); } void glean_free_builder(Output *builder) { ffi::free_(builder); } size_t glean_builder_size(Output *builder) { return builder->size(); } const char *glean_finish_builder( Output *builder, void **data, size_t *size) { return ffi::wrap([=]() { ffi::clone_bytes(builder->bytes()).release_to(data, size); }); } const char *glean_reset_builder(Output *builder) { return ffi::wrap([=]() { *builder = binary::Output(); }); } const char *glean_push_value_byte(Output *builder, unsigned char val) { return ffi::wrap([=]() { builder->fixed<unsigned char>(val); }); } const char *glean_push_value_bytes( Output *builder, const void *data, size_t size) { return ffi::wrap([=]() { builder->bytes(data,size); }); } const char *glean_push_value_nat(Output *builder, uint64_t val) { return ffi::wrap([=]() { builder->packed(val); }); } const char *glean_push_value_array(Output *builder, size_t size) { return ffi::wrap([=]() { builder->packed(size); }); } const char *glean_push_value_selector( Output *builder, size_t selector) { return ffi::wrap([=]() { builder->packed(selector); }); } const char *glean_push_value_string( Output *builder, const void *data, size_t size) { return ffi::wrap([=]() { builder->mangleString(folly::ByteRange( static_cast<const unsigned char *>(data),size)); }); } const char *glean_push_value_fact( Output *builder, glean_fact_id_t fact) { return ffi::wrap([=]() { builder->packed(Id::fromThrift(fact)); }); } const char *glean_pop_value_byte( const void **start, const void *end, uint8_t *byte) { return pop_value(start, end, [=](binary::Input& input) { *byte = input.fixed<uint8_t>(); }); } const char *glean_pop_value_nat( const void **start, const void *end, uint64_t *nat) { return pop_value(start, end, [=](binary::Input& input) { *nat = input.packed<uint64_t>(); }); } const char *glean_pop_value_array( const void **start, const void *end, size_t *size) { return pop_value(start, end, [=](binary::Input& input) { *size = input.packed<size_t>(); }); } const char *glean_pop_value_bytes_ref( const void **start, const void *end, size_t size, const void **bytes) { return pop_value(start, end, [=](binary::Input& input) { *bytes = input.bytes(size).data(); }); } const char *glean_pop_value_bytes( const void **start, const void *end, size_t size, void **bytes) { return pop_value(start, end, [=](binary::Input& input) { *bytes = ffi::clone_bytes(input.bytes(size).data(), size).release(); }); } const char *glean_pop_value_selector( const void **start, const void *end, size_t *selector) { return pop_value(start, end, [=](binary::Input& input) { *selector = input.packed<size_t>(); }); } const char *glean_pop_value_string( const void **start, const void *end, void **bytes, size_t *size) { return pop_value(start, end, [=](binary::Input& input) { binary::Output output; input.demangleUntrustedString(output); output.moveBytes().release_to(bytes,size); }); } size_t glean_pop_value_trusted_string_ref( const void **start, const void *end) { const auto p = static_cast<const unsigned char *>(*start); auto r = skipTrustedString({p, static_cast<const unsigned char *>(end)}); *start = p + r.first; return r.second; } const char *glean_pop_value_fact( const void **start, const void *end, glean_fact_id_t *fact) { return pop_value(start, end, [=](binary::Input& input) { *fact = input.packed<Id>().toThrift(); }); } const char *glean_push_fact( Output *builder, int64_t pid, Output *clause, size_t key_size) { return ffi::wrap([=] { CHECK_GE(clause->size(), key_size); Fact::serialize( *builder, Pid::fromThrift(pid), Fact::Clause::from(clause->bytes(), key_size)); }); } size_t glean_string_demangle_trusted( const uint8_t *start, size_t size, uint8_t *buffer) { return demangleTrustedString({start, size}, buffer); } void glean_free_query_results(QueryResults *results) { ffi::free_(results); } const char *glean_subroutine_new( const uint64_t *code, size_t code_size, size_t inputs, size_t outputs, size_t locals, const uint64_t *constants_ptr, size_t constants_size, const void * const *literal_ptrs, const size_t *literal_sizes, size_t literal_count, SharedSubroutine **sub) { return ffi::wrap([=] { std::vector<uint64_t> constants( constants_ptr, constants_ptr + constants_size); std::vector<std::string> literals; literals.reserve(literal_count); for (size_t i = 0; i < literal_count; ++i) { literals.push_back(std::string( static_cast<const char *>(literal_ptrs[i]), literal_sizes[i])); } *sub = new SharedSubroutine{std::make_shared<Subroutine>(Subroutine{ std::vector<uint64_t>(code, code + code_size), inputs, outputs, locals, std::move(constants), std::move(literals) })}; }); } void glean_subroutine_free(SharedSubroutine *sub) { ffi::free_(sub); } void glean_subroutine_inspect( SharedSubroutine *sub, const uint64_t **code, size_t *code_size, size_t *inputs, size_t *outputs, size_t *locals, const uint64_t **constants, size_t *constants_size, size_t *lit_count) { *code = sub->value->code.data(); *code_size = sub->value->code.size(); *inputs = sub->value->inputs; *outputs = sub->value->outputs; *locals = sub->value->locals; *constants = sub->value->constants.data(); *constants_size = sub->value->constants.size(); *lit_count = sub->value->literals.size(); } size_t glean_subroutine_size( SharedSubroutine *sub) { return sub->value->size(); } void glean_subroutine_literal( SharedSubroutine *sub, size_t index, const void **ptr, size_t *size) { if (index < sub->value->literals.size()) { *ptr = sub->value->literals[index].data(); *size = sub->value->literals[index].size(); } else { *ptr = nullptr; *size = 0; } } const char *glean_invoke_typechecker( const SharedSubroutine *typechecker, const void *input, size_t input_size, void **output, size_t *output_size) { return ffi::wrap([=] { assert(typechecker->value->inputs == 4); const std::function<uint64_t(uint64_t,uint64_t)> rename = [](uint64_t id, uint64_t) { return id; }; binary::Output out; const uint64_t args[] = { reinterpret_cast<uint64_t>(&rename), reinterpret_cast<uint64_t>(input), reinterpret_cast<uint64_t>(input) + input_size, reinterpret_cast<uint64_t>(&out) }; typechecker->value->execute(args); ffi::clone_bytes(out.bytes()).release_to(output, output_size); }); } const char *glean_validate( const Inventory *inventory, char typecheck, char keys, size_t limit, Lookup *lookup) { return ffi::wrap([=] { Validate v; v.typecheck = typecheck != 0; v.keys = keys != 0; v.limit = limit; validate(*inventory, v, *lookup); }); } void glean_ownership_unit_iterator_free(OwnershipUnitIterator *iter) { ffi::free_(iter); } void glean_derived_fact_ownership_iterator_free( DerivedFactOwnershipIterator *iter) { ffi::free_(iter); } const char *glean_ownership_compute( Inventory *inventory, Lookup *lookup, OwnershipUnitIterator *iter, ComputedOwnership **result ) { return ffi::wrap([=] { *result = computeOwnership(*inventory, *lookup, iter).release(); }); } void glean_ownership_free(Ownership *own) { ffi::free_(own); } void glean_computed_ownership_free(ComputedOwnership *own) { ffi::free_(own); } const char *glean_get_fact_owner( Ownership *ownership, glean_fact_id_t fact, uint32_t *uset_id) { return ffi::wrap([=] { *uset_id = ownership->getOwner(Id::fromWord(fact)); }); } const char *glean_get_ownership_set( Ownership *ownership, uint32_t uset_id, int *op, OwnershipSet **result) { return ffi::wrap([=] { auto exp = ownership->getUset(uset_id); if (!exp.hasValue()) { *result = nullptr; } else { std::vector<uint32_t> elts; exp->set.foreach([&](UsetId setid) { elts.push_back(setid); }); *op = exp->op; *result = new HsArray(std::move(elts)); }; }); } const char *glean_slice_compute( Ownership *ownership, uint32_t *unit_ids, size_t unit_ids_size, int exclude, Slice **result) { return ffi::wrap([=] { auto vec = std::vector<uint32_t>(unit_ids, unit_ids + unit_ids_size); std::sort(vec.begin(), vec.end()); *result = slice(*ownership, vec, exclude != 0).release(); }); } void glean_slice_free(Slice *slice) { ffi::free_(slice); } const char *glean_make_sliced( Lookup *lookup, Ownership *ownership, Slice *slice, Sliced **sliced) { return ffi::wrap([=] { *sliced = new Sliced(lookup, ownership, slice); }); } void glean_sliced_free(Sliced *sliced) { ffi::free_(sliced); } const char *glean_new_define_ownership( Ownership *own, int64_t pid, int64_t first_id, DefineOwnership **result ) { return ffi::wrap([=] { *result = new DefineOwnership(own,Pid::fromWord(pid), Id::fromWord(first_id)); }); } const char *glean_define_ownership_subst( DefineOwnership *define, const Substitution *subst) { return ffi::wrap([=] { define->subst(*subst); }); } const char *glean_define_ownership_sort_by_owner( DefineOwnership *define, uint64_t facts, HsArray<int64_t> *result) { return ffi::wrap([=] { *result = define->sortByOwner(facts); }); } void glean_define_ownership_free(DefineOwnership *def) { ffi::free_(def); } const char *glean_derived_ownership_compute( Ownership *own, DerivedFactOwnershipIterator *iter, ComputedOwnership **result) { return ffi::wrap([=] { *result = computeDerivedOwnership(*own, iter).release(); }); } } } } } }