sql_utils/base/map_util.h (342 lines of code) (raw):

// // Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #ifndef THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_BASE_MAP_UTIL_H_ #define THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_BASE_MAP_UTIL_H_ // This file provides utility functions for use with STL map-like data // structures, such as std::map and hash_map. Some functions will also work with // sets, such as ContainsKey(). // // The main functions in this file fall into the following categories: // // - Find*() // - Contains*() // - Insert*() // - Lookup*() // // These functions often have "...OrDie" or "...OrDieNoPrint" variants. These // variants will crash the process with a SQL_CHECK() failure on error, // including the offending key/data in the log message. The NoPrint variants // will not include the key/data in the log output under the assumption that // it's not a printable type. // // Most functions are fairly self explanatory from their names, with the // exception of Find*() vs Lookup*(). The Find functions typically use the map's // .find() member function to locate and return the map's value type. The // Lookup*() functions typically use the map's .insert() (yes, insert) member // function to insert the given value if necessary and returns (usually a // reference to) the map's value type for the found item. // // See the per-function comments for specifics. // // There are also a handful of functions for doing other miscellaneous things. // // A note on terminology: // // In this file, `m` and `M` represent a map and its type. // // Map-like containers are collections of pairs. Like all STL containers they // contain a few standard typedefs identifying the types of data they contain. // Given the following map declaration: // // std::map<string, int> my_map; // // the notable typedefs would be as follows: // // - key_type -- string // - value_type -- std::pair<const string, int> // - mapped_type -- int // // Note that the map above contains two types of "values": the key-value pairs // themselves (value_type) and the values within the key-value pairs // (mapped_type). A value_type consists of a key_type and a mapped_type. // // The documentation below is written for programmers thinking in terms of keys // and the (mapped_type) values associated with a given key. For example, the // statement // // my_map["foo"] = 3; // // has a key of "foo" (type: string) with a value of 3 (type: int). // #include <stddef.h> #include <iterator> #include <memory> #include <string> #include <type_traits> #include <utility> #include <vector> #include "absl/meta/type_traits.h" #include "sql_utils/base/logging.h" #include "sql_utils/base/map_traits.h" #include "sql_utils/base/no_destructor.h" namespace bigquery_ml_utils_base { // These helper template aliases are implementation details of map_util, // provided for notational convenience. Despite the file-level documentation // about map typedefs, map_util doesn't actually use them. // It uses the Map's value_type, and the value_type's first_type and // second_type. This can matter for nonconformant containers. template <typename M> using MapUtilValueT = typename M::value_type; template <typename M> using MapUtilKeyT = typename MapUtilValueT<M>::first_type; template <typename M> using MapUtilMappedT = typename MapUtilValueT<M>::second_type; namespace internal_map_util { template <typename M, typename = void> struct HasTryEmplace : std::false_type {}; template <typename M> struct HasTryEmplace<M, absl::void_t<decltype(std::declval<M>().try_emplace( std::declval<const MapUtilKeyT<M>&>()))>> : std::true_type {}; template <typename M, typename = void> struct InitType { using type = MapUtilValueT<M>; }; template <typename M> struct InitType<M, typename std::enable_if<!std::is_convertible< typename M::init_type, MapUtilValueT<M>>::value>::type> { using type = typename M::init_type; }; template <typename V> const V& ValueInitializedDefault() { static const bigquery_ml_utils_base::NoDestructor<V> value_initialized_default{}; return *value_initialized_default; } } // namespace internal_map_util template <typename M> using MapUtilInitT = typename internal_map_util::InitType<M>::type; // // Find*() // // Returns a const reference to the value associated with the given key if it // exists. Crashes otherwise. // // This is intended as a replacement for operator[] as an rvalue (for reading) // when the key is guaranteed to exist. // // operator[] for lookup is discouraged for several reasons (note that these // reasons may apply to only some map types): // * It has a side-effect of inserting missing keys // * It is not thread-safe (even when it is not inserting, it can still // choose to resize the underlying storage) // * It invalidates iterators (when it chooses to resize) // * It default constructs a value object even if it doesn't need to // // This version assumes the key is printable, and includes it in the fatal log // message. template <typename M, typename KeyType = MapUtilKeyT<M>> const MapUtilMappedT<M>& FindOrDie(const M& m, const KeyType& key) { auto it = m.find(key); SQL_CHECK(it != m.end()) << "Map key not found: " << key; return bigquery_ml_utils_base::subtle::GetMapped(*it); } // Same as above, but returns a non-const reference. template <typename M, typename KeyType = MapUtilKeyT<M>> MapUtilMappedT<M>& FindOrDie(M& m, // NOLINT const KeyType& key) { auto it = m.find(key); SQL_CHECK(it != m.end()) << "Map key not found: " << key; return bigquery_ml_utils_base::subtle::GetMapped(*it); } // Same as FindOrDie above, but doesn't log the key on failure. template <typename M, typename KeyType = MapUtilKeyT<M>> const MapUtilMappedT<M>& FindOrDieNoPrint(const M& m, const KeyType& key) { auto it = m.find(key); SQL_CHECK(it != m.end()) << "Map key not found"; return bigquery_ml_utils_base::subtle::GetMapped(*it); } // Same as above, but returns a non-const reference. template <typename M, typename KeyType = MapUtilKeyT<M>> MapUtilMappedT<M>& FindOrDieNoPrint(M& m, // NOLINT const KeyType& key) { auto it = m.find(key); SQL_CHECK(it != m.end()) << "Map key not found"; return bigquery_ml_utils_base::subtle::GetMapped(*it); } // Returns a const reference to the value associated with the given key if it // exists, otherwise returns a const reference to a value-initialized object // that is never destroyed. template <typename M, typename KeyType = MapUtilKeyT<M>> const MapUtilMappedT<M>& FindWithDefault(const M& m, const KeyType& key) { auto it = m.find(key); if (it != m.end()) return bigquery_ml_utils_base::subtle::GetMapped(*it); return internal_map_util::ValueInitializedDefault<MapUtilMappedT<M>>(); } // Returns a const reference to the value associated with the given key if it // exists, otherwise returns a const reference to the provided default value. // // Prefer the two-argument form unless you need to specify a custom default // value (i.e., one that is not equal to a value-initialized instance). // // WARNING: If a temporary object is passed as the default "value," // this function will return a reference to that temporary object, // which will be destroyed at the end of the statement. A common // example: if you have a map with string values, and you pass a char* // as the default "value," either use the returned value immediately // or store it in a string (not string&). // template <typename M> const MapUtilMappedT<M>& FindWithDefault(const M& m, const MapUtilKeyT<M>& key, const MapUtilMappedT<M>& value) { auto it = m.find(key); if (it != m.end()) return bigquery_ml_utils_base::subtle::GetMapped(*it); return value; } // Returns a pointer to the const value associated with the given key if it // exists, or null otherwise. template <typename M, typename KeyType = MapUtilKeyT<M>> const MapUtilMappedT<M>* FindOrNull(const M& m, const KeyType& key) { auto it = m.find(key); if (it == m.end()) return nullptr; return &bigquery_ml_utils_base::subtle::GetMapped(*it); } // Returns a pointer to the non-const value associated with the given key if it // exists, or null otherwise. template <typename M, typename KeyType = MapUtilKeyT<M>> MapUtilMappedT<M>* FindOrNull(M& m, // NOLINT const KeyType& key) { auto it = m.find(key); if (it == m.end()) return nullptr; return &bigquery_ml_utils_base::subtle::GetMapped(*it); } // Returns the pointer value associated with the given key. If none is found, // null is returned. The function is designed to be used with a map of keys // to pointers. // // This function does not distinguish between a missing key and a key mapped // to a null value. template <typename M, typename KeyType = MapUtilKeyT<M>> MapUtilMappedT<M> FindPtrOrNull(const M& m, const KeyType& key) { auto it = m.find(key); if (it == m.end()) return MapUtilMappedT<M>(); return bigquery_ml_utils_base::subtle::GetMapped(*it); } // Same as above, except takes non-const reference to m. // // This function is needed for containers that propagate constness to the // pointee, such as boost::ptr_map. template <typename M, typename KeyType = MapUtilKeyT<M>> MapUtilMappedT<M> FindPtrOrNull(M& m, // NOLINT const KeyType& key) { auto it = m.find(key); if (it == m.end()) return MapUtilMappedT<M>(); return bigquery_ml_utils_base::subtle::GetMapped(*it); } // Finds the value associated with the given key and copies it to *value (if // non-null). Returns false if the key was not found, true otherwise. template <typename M, typename Key, typename Value> bool FindCopy(const M& m, const Key& key, Value* value) { auto it = m.find(key); if (it == m.end()) return false; if (value) *value = bigquery_ml_utils_base::subtle::GetMapped(*it); return true; } // // Contains*() // // Returns true if and only if the given m contains the given key. template <typename M, typename Key> bool ContainsKey(const M& m, const Key& key) { return m.find(key) != m.end(); } // Returns true if and only if the given m contains the given key-value // pair. template <typename M, typename Key, typename Value> bool ContainsKeyValuePair(const M& m, const Key& key, const Value& value) { auto range = m.equal_range(key); for (auto it = range.first; it != range.second; ++it) { if (bigquery_ml_utils_base::subtle::GetMapped(*it) == value) { return true; } } return false; } // // Insert*() // // Inserts the given key-value pair into the m. Returns true if and // only if the key from the given pair didn't previously exist. Otherwise, the // value in the map is replaced with the value from the given pair. template <typename M> bool InsertOrUpdate(M* m, const MapUtilInitT<M>& vt) { auto ret = m->insert(vt); if (ret.second) return true; subtle::GetMapped(*ret.first) = subtle::GetMapped(vt); // update return false; } // Same as above, except that the key and value are passed separately. template <typename M> bool InsertOrUpdate(M* m, const MapUtilKeyT<M>& key, const MapUtilMappedT<M>& value) { return InsertOrUpdate(m, {key, value}); } // Inserts/updates all the key-value pairs from the range defined by the // iterators "first" and "last" into the given m. template <typename M, typename InputIterator> void InsertOrUpdateMany(M* m, InputIterator first, InputIterator last) { for (; first != last; ++first) { InsertOrUpdate(m, *first); } } // Change the value associated with a particular key in a map or hash_map // of the form std::map<Key, Value*> which owns the objects pointed to by the // value pointers. If there was an existing value for the key, it is deleted. // True indicates an insert took place, false indicates an update + delete. template <typename M> bool InsertAndDeleteExisting(M* m, const MapUtilKeyT<M>& key, const MapUtilMappedT<M>& value) { auto ret = m->insert(MapUtilValueT<M>(key, value)); if (ret.second) return true; delete ret.first->second; ret.first->second = value; return false; } // Inserts the given key and value into the given m if and only if the // given key did NOT already exist in the m. If the key previously // existed in the m, the value is not changed. Returns true if the // key-value pair was inserted; returns false if the key was already present. template <typename M> bool InsertIfNotPresent(M* m, const MapUtilInitT<M>& vt) { return m->insert(vt).second; } // Same as above except the key and value are passed separately. template <typename M> bool InsertIfNotPresent(M* m, const MapUtilKeyT<M>& key, const MapUtilMappedT<M>& value) { return InsertIfNotPresent(m, {key, value}); } // Same as above except dies if the key already exists in the m. template <typename M> void InsertOrDie(M* m, const MapUtilInitT<M>& value) { SQL_CHECK(InsertIfNotPresent(m, value)) << "duplicate value: " << value; } // Same as above except doesn't log the value on error. template <typename M> void InsertOrDieNoPrint(M* m, const MapUtilInitT<M>& value) { SQL_CHECK(InsertIfNotPresent(m, value)) << "duplicate value."; } // Inserts the key-value pair into the m. Dies if key was already // present. template <typename M> void InsertOrDie(M* m, const MapUtilKeyT<M>& key, const MapUtilMappedT<M>& data) { SQL_CHECK(InsertIfNotPresent(m, key, data)) << "duplicate key: " << key; } // Same as above except doesn't log the key on error. template <typename M> void InsertOrDieNoPrint(M* m, const MapUtilKeyT<M>& key, const MapUtilMappedT<M>& data) { SQL_CHECK(InsertIfNotPresent(m, key, data)) << "duplicate key."; } // Inserts a new key and default-initialized value. Dies if the key was already // present. Returns a reference to the value. Example usage: // // std::map<int, SomeProto> m; // SomeProto& proto = InsertKeyOrDie(&m, 3); // proto.set_field("foo"); template <typename M> auto InsertKeyOrDie(M* m, const MapUtilKeyT<M>& key) -> typename std::enable_if<internal_map_util::HasTryEmplace<M>::value, MapUtilMappedT<M>&>::type { auto res = m->try_emplace(key); SQL_CHECK(res.second) << "duplicate key: " << key; return bigquery_ml_utils_base::subtle::GetMapped(*res.first); } // Anything without try_emplace, we support with the legacy code path. template <typename M> auto InsertKeyOrDie(M* m, const MapUtilKeyT<M>& key) -> typename std::enable_if<!internal_map_util::HasTryEmplace<M>::value, MapUtilMappedT<M>&>::type { auto res = m->insert(MapUtilValueT<M>(key, MapUtilMappedT<M>())); SQL_CHECK(res.second) << "duplicate key: " << key; return res.first->second; } // // Lookup*() // // Looks up a given key and value pair in m and inserts the key-value pair if // it's not already present. Returns a reference to the value associated with // the key. template <typename M> MapUtilMappedT<M>& LookupOrInsert(M* m, const MapUtilInitT<M>& vt) { return subtle::GetMapped(*m->insert(vt).first); } // Same as above except the key-value are passed separately. template <typename M> MapUtilMappedT<M>& LookupOrInsert(M* m, const MapUtilKeyT<M>& key, const MapUtilMappedT<M>& value) { return LookupOrInsert(m, {key, value}); } // Returns a reference to the pointer associated with key. If not found, a // pointee is constructed and added to the map. In that case, the new pointee is // forwarded constructor arguments; when no arguments are provided the default // constructor is used. // // Useful for containers of the form Map<Key, Ptr>, where Ptr is pointer-like. template <typename M, class... Args> MapUtilMappedT<M>& LookupOrInsertNew(M* m, const MapUtilKeyT<M>& key, Args&&... args) { using Mapped = MapUtilMappedT<M>; using MappedDeref = decltype(*std::declval<Mapped>()); using Element = typename std::decay<MappedDeref>::type; auto ret = m->insert(MapUtilValueT<M>(key, Mapped())); if (ret.second) { ret.first->second = Mapped(new Element(std::forward<Args>(args)...)); } return ret.first->second; } // // Misc Utility Functions // // Updates the value associated with the given key. If the key was not already // present, then the key-value pair are inserted and "previous" is unchanged. If // the key was already present, the value is updated and "*previous" will // contain a copy of the old value. // // Returns true if and only if there was an already existing value. // // InsertOrReturnExisting has complementary behavior that returns the // address of an already existing value, rather than updating it. template <typename M> bool UpdateReturnCopy(M* m, const MapUtilValueT<M>& vt, MapUtilMappedT<M>* previous) { auto ret = m->insert(vt); if (ret.second) return false; if (previous) *previous = ret.first->second; ret.first->second = vt.second; // update return true; } // Same as above except that the key and mapped value are passed separately. template <typename M> bool UpdateReturnCopy(M* m, const MapUtilKeyT<M>& key, const MapUtilMappedT<M>& value, MapUtilMappedT<M>* previous) { return UpdateReturnCopy(m, MapUtilValueT<M>(key, value), previous); } // Tries to insert the given key-value pair into the m. Returns null // if the insert succeeds. Otherwise, returns a pointer to the existing value. // // This complements UpdateReturnCopy in that it allows to update only after // verifying the old value and still insert quickly without having to look up // twice. Unlike UpdateReturnCopy this also does not come with the issue of an // undefined previous* in case new data was inserted. template <typename M> MapUtilMappedT<M>* InsertOrReturnExisting(M* m, const MapUtilValueT<M>& vt) { auto ret = m->insert(vt); if (ret.second) return nullptr; // Inserted, no previous value. return &ret.first->second; // Return address of previous value. } // Same as above, except for explicit key and data. template <typename M> MapUtilMappedT<M>* InsertOrReturnExisting(M* m, const MapUtilKeyT<M>& key, const MapUtilMappedT<M>& data) { return InsertOrReturnExisting(m, MapUtilValueT<M>(key, data)); } // Saves the reverse mapping into reverse. Returns true if values could all be // inserted. template <typename M, typename ReverseM> bool ReverseMap(const M& m, ReverseM* reverse) { SQL_CHECK(reverse != nullptr); bool all_unique = true; for (const auto& kv : m) { if (!InsertOrUpdate(reverse, kv.second, kv.first)) { all_unique = false; } } return all_unique; } // Like ReverseMap above, but returns its output m. Return type has to // be specified explicitly. Example: // M::M(...) : m_(...), r_(ReverseMap<decltype(r_)>(m_)) {} template <typename ReverseM, typename M> ReverseM ReverseMap(const M& m) { typename std::remove_const<ReverseM>::type reverse; ReverseMap(m, &reverse); return reverse; } // Erases the m item identified by the given key, and returns the value // associated with that key. It is assumed that the value (i.e., the // mapped_type) is a pointer. Returns null if the key was not found in the // m. // // Examples: // std::map<string, MyType*> my_map; // // One line cleanup: // delete EraseKeyReturnValuePtr(&my_map, "abc"); // // Use returned value: // std::unique_ptr<MyType> value_ptr( // EraseKeyReturnValuePtr(&my_map, "abc")); // if (value_ptr.get()) // value_ptr->DoSomething(); // template <typename M> MapUtilMappedT<M> EraseKeyReturnValuePtr(M* m, const MapUtilKeyT<M>& key) { auto it = m->find(key); if (it == m->end()) return nullptr; MapUtilMappedT<M> v = std::move(bigquery_ml_utils_base::subtle::GetMapped(*it)); m->erase(it); return v; } // Inserts all the keys from m into key_container, which must // support insert(M::key_type). // // Note: any initial contents of the key_container are not cleared. template <typename M, typename KeyContainer> void InsertKeysFromMap(const M& m, KeyContainer* key_container) { SQL_CHECK(key_container != nullptr); for (const auto& kv : m) { key_container->insert(kv.first); } } // Appends all the keys from m into key_container, which must // support push_back(M::key_type). // // Note: any initial contents of the key_container are not cleared. template <typename M, typename KeyContainer> void AppendKeysFromMap(const M& m, KeyContainer* key_container) { SQL_CHECK(key_container != nullptr); for (const auto& kv : m) { key_container->push_back(kv.first); } } // A more specialized overload of AppendKeysFromMap to optimize reallocations // for the common case in which we're appending keys to a vector and hence can // (and sometimes should) call reserve() first. // // (It would be possible to play SFINAE games to call reserve() for any // m that supports it, but this seems to get us 99% of what we need // without the complexity of a SFINAE-based solution.) template <typename M, typename KeyType> void AppendKeysFromMap(const M& m, std::vector<KeyType>* key_container) { SQL_CHECK(key_container != nullptr); // We now have the opportunity to call reserve(). Calling reserve() every // time is a bad idea for some use cases: libstdc++'s implementation of // std::vector<>::reserve() resizes the vector's backing store to exactly the // given size (unless it's already at least that big). Because of this, // the use case that involves appending a lot of small maps (total size // N) one by one to a vector would be O(N^2). But never calling reserve() // loses the opportunity to improve the use case of adding from a large // map to an empty vector (this improves performance by up to 33%). A // number of heuristics are possible. Here we use the simplest one. if (key_container->empty()) { key_container->reserve(m.size()); } for (const auto& kv : m) { key_container->push_back(kv.first); } } // Inserts all the values from m into value_container, which must // support push_back(M::mapped_type). // // Note: any initial contents of the value_container are not cleared. template <typename M, typename ValueContainer> void AppendValuesFromMap(const M& m, ValueContainer* value_container) { SQL_CHECK(value_container != nullptr); for (const auto& kv : m) { value_container->push_back(kv.second); } } // A more specialized overload of AppendValuesFromMap to optimize reallocations // for the common case in which we're appending values to a vector and hence // can (and sometimes should) call reserve() first. // // (It would be possible to play SFINAE games to call reserve() for any // m that supports it, but this seems to get us 99% of what we need // without the complexity of a SFINAE-based solution.) template <typename M, typename ValueType> void AppendValuesFromMap(const M& m, std::vector<ValueType>* value_container) { SQL_CHECK(value_container != nullptr); // See AppendKeysFromMap for why this is done. if (value_container->empty()) { value_container->reserve(m.size()); } for (const auto& kv : m) { value_container->push_back(kv.second); } } // Erases all elements of m where predicate evaluates to true. // Note: To avoid unnecessary temporary copies of map elements passed to the // predicate, the predicate must accept 'const M::value_type&'. // In particular, the value type for a map is 'std::pair<const K, V>', and so a // predicate accepting 'std::pair<K, V>' will result in temporary copies. template <typename M, typename Predicate> auto AssociativeEraseIf(M* m, Predicate predicate) -> typename std::enable_if< std::is_same<void, decltype(m->erase(m->begin()))>::value>::type { SQL_CHECK(m != nullptr); for (auto it = m->begin(); it != m->end();) { if (predicate(*it)) { m->erase(it++); } else { ++it; } } } template <typename M, typename Predicate> auto AssociativeEraseIf(M* m, Predicate predicate) -> typename std::enable_if<std::is_same< decltype(m->begin()), decltype(m->erase(m->begin()))>::value>::type { SQL_CHECK(m != nullptr); for (auto it = m->begin(); it != m->end();) { if (predicate(*it)) { it = m->erase(it); } else { ++it; } } } } // namespace bigquery_ml_utils_base #endif // THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_BASE_MAP_UTIL_H_