glean/rts/ownership/derived.cpp (133 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include <numeric> #include <folly/container/F14Map.h> #include "glean/rts/ownership/derived.h" #include "glean/rts/timer.h" namespace facebook { namespace glean { namespace rts { void DefineOwnership::derivedFrom(Id id, const std::set<UsetId>& deps) { if (deps.size() == 0) { LOG(ERROR) << "DefineOwnership::derivedFrom: empty deps"; return; } SetU32 set = SetU32::from(deps); UsetId usetid; if (set.size() == 1) { usetid = *deps.begin(); } else { auto uset = std::make_unique<Uset>(set, And, 0); size_t size = set.size(); usetid = ownership_->lookupSet(uset.get()); if (usetid == INVALID_USET) { auto q = uset.get(); auto p = usets_.add(std::move(uset)); if (p == q) { usets_.promote(p); usetid = p->id; newSets_.push_back(p); VLOG(2) << "new set: " << usetid << ", size = " << size; } else { usetid = p->id; VLOG(2) << "existing set in batch: " << usetid; } } else { VLOG(2) << "existing set in DB: " << usetid; } } if (id >= first_id_) { new_ids_.push_back(id); new_owners_.push_back(usetid); } else { ids_.push_back(id); owners_.push_back(usetid); } } std::vector<int64_t> DefineOwnership::sortByOwner(uint64_t facts) { // We have a set of facts in the batch with Ids [first_id, first_id+1, ..] // We want to group these facts by owner, so that facts with the same // owner are adjacent. // // We're going to rearrange the facts and give them new IDs. // // 0. we start with // ids = [ 0, 1, 2, 3, 0 ] // owners = [ C, B, A, B, A ] // Note that we may have recorded multiple owners for some facts. // // 1. make a vector of owners, indexed by fact ID: // [ X, B, A, B ] // for a fact that has multiple owners, we'll make up a temporary // new set Id. These are only for sorting purposes, they're not real // set Ids - the real ones are created later in // computeDerivedOwnership(). // // e.g. we need a new set ID (let's use X) to represent the owner of // fact 0, because it has multiple owners assigned (C and A). std::vector<UsetId> owners(facts, INVALID_USET); auto tmpset = usets_.getNextId(); for (size_t i = 0; i < new_ids_.size(); i++) { auto ix = new_ids_[i] - first_id_; if (owners[ix] == INVALID_USET) { owners[ix] = new_owners_[i]; } else { // this fact has multiple owners, use a new temporary UsetId. owners[ix] = tmpset++; } } // 2. make a vector order = [0..facts-1] // this will become the ordering that we return after we sort it. std::vector<int64_t> order(facts); std::iota(order.begin(), order.end(), first_id_.toWord()); // 2. sort order by owner = [ 2, 1, 3, 0 ] // // This is the order in which we will serialize the facts, which // essentially renumbers them. Hence the facts will be renumbered // 2 -> 0 // 1 -> 1 // 3 -> 2 // 0 -> 3 sort(order.begin(), order.end(), [&](uint64_t a, uint64_t b) { return owners[a - first_id_.toWord()] < owners[b - first_id_.toWord()]; }); // 3. produce a mapping from old fact Ids to new fact Ids std::vector<Id> idmap(facts); for (size_t i = 0; i < facts; i++) { idmap[order[i] - first_id_.toWord()] = Id::fromWord(i + first_id_.toWord()); } // 4. substitute fact IDs in new_ids for (size_t i = 0; i < new_ids_.size(); i++) { new_ids_[i] = idmap[new_ids_[i] - first_id_]; } return order; // At this point we must serialize the batch using // serializeReorder(order) so that the facts agree with the IDs used // in this DefineOwnership. } void DefineOwnership::subst(const Substitution& subst) { for (auto& id : ids_) { id = subst.subst(id); } for (auto& id : new_ids_) { id = subst.subst(id); } } std::unique_ptr<ComputedOwnership> computeDerivedOwnership( Ownership& ownership, DerivedFactOwnershipIterator *iter) { auto t = makeAutoTimer("computeDerivedOwnership"); VLOG(1) << "computing derived ownership"; // Here we are identifying facts that were derived multiple // different ways. e.g. if a fact was derived twice with owners A // and B, then its final ownership set will be A || B. That is, the // fact will be visibile (derivable) if either A or B are visible. std::map<uint64_t,UsetId> factOwners; folly::F14FastMap<int64_t,std::set<UsetId>> factOwnerSets; while (const auto owners = iter->get()) { for (uint32_t i = 0; i < owners->ids.size(); i++) { auto id = owners->ids[i].toWord(); auto owner = owners->owners[i]; const auto [it, inserted] = factOwners.insert({id, owner}); if (!inserted) { const auto [it2, _] = factOwnerSets.insert({id, {it->second}}); it2->second.insert(owner); } } } // Create all the new sets // we are under the write lock here, so we can create canonical // sets, no need to rebase them later. Usets usets(ownership.nextSetId()); for (auto& pair : factOwnerSets) { if (pair.second.size() > 1) { SetU32 set = SetU32::from(pair.second); auto uset = std::make_unique<Uset>(std::move(set), Or, 0); auto usetid = ownership.lookupSet(uset.get()); if (usetid == INVALID_USET) { auto p = usets.add(std::move(uset)); usets.promote(p); usetid = p->id; VLOG(1) << "new set: " << usetid; } else { VLOG(1) << "existing set: " << usetid; } factOwners[pair.first] = usetid; } } auto sets = usets.toEliasFano(); // convert factOwners into a vector of intervals std::vector<std::pair<Id,UsetId>> intervals; intervals.reserve(factOwners.size()); UsetId current = INVALID_USET; for (auto& pair : factOwners) { auto usetid = pair.second; if (usetid != current) { intervals.push_back(std::make_pair(Id::fromWord(pair.first), usetid)); current = usetid; } } VLOG(1) << "computing derived ownership: " << intervals.size() << " intervals"; // Now build a ComputedOwnership that we can return return std::make_unique<ComputedOwnership>( usets.getFirstId(), std::move(sets), std::move(intervals)); } } } }