bench/Bigram.h (38 lines of code) (raw):

/** * Copyright (c) 2014-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ /* * Bigram.h * * Created on: Jun 24, 2015 * Author: ldemailly */ #pragma once #include <cassert> #include <iostream> #include <unordered_map> /** * A structure holding 2 bytes/characters */ class Bigram { public: Bigram(char c1, char c2); // Allow initialization from string constants // Only allow "xy" as input in that case if we used directly // std::array of 2 it complains of mismatch (size 3 because of the NUL) // e.g. Bigram b{"XY"}; but not "X" or "XYZ" at compile time /* implicit */ Bigram(const char (&in)[3]); Bigram(); // ~Bigram(); // Inlining [] gains about 5% or 20 Mbytes/sec (385->405) inline char operator[](int idx) const { assert(idx >= 0 && idx < 2); return b_[idx]; } bool operator==(const Bigram &o) const; bool operator!=(const Bigram &o) const { return !(operator==(o)); } bool operator<(const Bigram &o) const; void toPrintableString(std::string &result) const; void toBinary(std::string &result) const; void binarySerialize(std::ostream &os) const; /// Reads from the stream the 2 bytes for this bigram - returns true if /// no error/eof triggered when reading bool binaryDeserialize(std::istream &is); std::string toPrintableString() const; /// Utility only to be used for logging to escape 1 character static std::string toPrintableString(char c); private: char b_[2]; }; /// Dumps a bigram in human readable format std::ostream &operator<<(std::ostream &os, const Bigram &b); namespace std { /// Hash function for Bigrams template <> struct hash<Bigram> { std::size_t operator()(const Bigram &k) const { return ((k[0] << 1) | k[1]); } }; } /// Handy types typedef std::unordered_map<Bigram, uint32_t> MapOfBigramToCount; typedef std::pair<Bigram, uint32_t> PairBigramCount;