include/core/CStateMachine.h (87 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#ifndef INCLUDED_ml_core_CStateMachine_h
#define INCLUDED_ml_core_CStateMachine_h
#include <core/CoreTypes.h>
#include <core/ImportExport.h>
#include <boost/operators.hpp>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <list>
#include <map>
#include <string>
#include <vector>
namespace ml {
namespace core {
class CStatePersistInserter;
class CStateRestoreTraverser;
//! \brief A simple state machine implementation.
//!
//! DESCRIPTION:\n
//! This defines an alphabet of symbols, a set of states and a transition
//! table.
//!
//! A state machine is the tuple \f$\left(\Sigma, S, \delta, s_0\right)\f$.
//! Here,
//! -# \f$\Sigma\f$ is an alphabet of symbols which are passed to the
//! machine.
//! -# \f$S\f$ are the collection of state.
//! -# \f$\delta\f$ is the transition function \f$\delta : S \cross \Sigma \leftarrow S\f$.
//! -# \f$s_0\f$ Is the initial state.
//!
//! IMPLEMENTATION DECISIONS:\n
//! Most of the state is stored statically and unique machines are identified
//! by their alphabet, states, and transition table. This state can be shared
//! by many instances of the machine and so the only state that needs to be
//! stored is the machine identifier, which is managed by the create function,
//! and the current state.
//!
//! Note I purposely haven't bothered to allow the user to register actions to
//! perform when a symbol is applied. This is because these typically need
//! additional information to be supplied and there is no way of doing that
//! at the level of this interface without using void* or equivalent. Instead
//! it is intended that the user of this class holds an instance of the state
//! machine and uses it to implement an apply function which receives additional
//! state.
//!
//! This implementation is mostly lock free since after it is created the shared
//! state of a machine is effectively constant and we can use the double locking
//! pattern in create with an atomic to pass a message to other threads that a
//! new machine is ready to use.
//!
class CORE_EXPORT CStateMachine {
public:
using TSizeVec = std::vector<std::size_t>;
using TSizeVecVec = std::vector<TSizeVec>;
using TStrVec = std::vector<std::string>;
using TSizeSizeMap = std::map<std::size_t, std::size_t>;
public:
//! Set the number of machines we expect the program to use.
static void expectedNumberMachines(std::size_t number);
//! Create a machine with a specified alphabet, set of states and
//! transition function and initialize its state to \p state.
//!
//! \note This can fail if the supplied data are inconsistent in
//! which case the state is set to bad.
static CStateMachine create(const TStrVec& alphabet,
const TStrVec& states,
const TSizeVecVec& transitionFunction,
std::size_t state);
//! \name Persistence
//@{
//! Initialize by reading state from \p traverser.
bool acceptRestoreTraverser(CStateRestoreTraverser& traverser,
const TSizeSizeMap& mapping = TSizeSizeMap());
//! Persist state by passing information to the supplied inserter.
void acceptPersistInserter(CStatePersistInserter& inserter) const;
//@}
//! Check if the machine is bad, i.e. not a valid state machine.
bool bad() const;
//! Apply \p symbol to the machine.
bool apply(std::size_t symbol);
//! Get the current state of the machine.
std::size_t state() const;
//! Print \p state.
std::string printState(std::size_t state) const;
//! Print \p symbol.
std::string printSymbol(std::size_t symbol) const;
//! Get a checksum of this object.
std::uint64_t checksum() const;
//! Print all the state machines.
static std::size_t numberMachines();
protected:
//! Clear all machines (for test only).
static void clear();
private:
//! \brief The state of a single machine.
struct CORE_EXPORT SMachine {
SMachine(const TStrVec& alphabet, const TStrVec& states, const TSizeVecVec& transitionFunction);
SMachine(const SMachine& other);
//! The alphabet of action symbols \f$\Sigma\f$.
TStrVec s_Alphabet;
//! The possible states \f$S\f$.
TStrVec s_States;
//! The transition table \f$\delta : \Sigma \times S \rightarrow S\f$.
TSizeVecVec s_TransitionFunction;
};
//! \brief A lightweight object to lookup a single machine.
struct CORE_EXPORT SLookupMachine
: boost::equality_comparable2<SLookupMachine, SMachine> {
SLookupMachine(const TStrVec& alphabet,
const TStrVec& states,
const TSizeVecVec& transitionFunction);
//! Test if two machines are equal.
bool operator==(const SMachine& rhs) const;
//! The alphabet of action symbols \f$\Sigma\f$.
const TStrVec& s_Alphabet;
//! The possible states \f$S\f$.
const TStrVec& s_States;
//! The transition table \f$\delta : \Sigma \times S \rightarrow S\f$.
const TSizeVecVec& s_TransitionFunction;
};
//! \brief A custom paired down std::deque like container.
//!
//! DESCRIPTION:\n
//! We have rather specific requirements for this container:
//! -# It must be (as) random access (as possible),
//! -# It must be possible for push_back to occur concurrently with
//! lookup of an existing item in the container.
//!
//! IMPLEMENTATION DECISIONS:\n
//! With std::deque implementations any invocation of operator[] can
//! fail if there is a concurrent push_back. The code using this class
//! ensures that it only ever asks for an element which already exists
//! in the container at the start of push_back. This is possible safely
//! with a std::list. However, since this also needs to be random access,
//! using a vanilla std::list, which has \f$O(N)\f$ complexity for
//! accessing the \f$N^{th}\f$ item and poor locality of reference,
//! is not suitable. Instead we prefer to use a list of preallocated
//! std::vectors, on which it is also safe to call push_back, for our
//! use case, provided doing so doesn't cause them to reallocate.
class CORE_EXPORT CMachineDeque {
private:
//! The default vector capacity.
static const std::size_t DEFAULT_CAPACITY = 20;
public:
CMachineDeque();
//! Set the vector capacity to \p capacity.
void capacity(std::size_t capacity);
//! Access the element at \p pos.
const SMachine& operator[](std::size_t pos) const;
//! Get the number of elements in this container.
std::size_t size() const;
//! Add a new element to the back of the collection.
void push_back(const SMachine& machine);
//! Remove all elements.
void clear();
private:
using TMachineVec = std::vector<SMachine>;
using TMachineVecList = std::list<TMachineVec>;
private:
//! The vector capacity.
//!
//! \note This should be set to slightly more than the number
//! of distinct machines which are created by the program
//! which uses this class.
std::size_t m_Capacity;
//! Get the number of available machines.
std::atomic<std::size_t> m_NumberMachines;
//! The actual machines.
TMachineVecList m_Machines;
};
private:
CStateMachine();
//! Try to find \p machine in the range [\p begin, \p end).
static std::size_t find(std::size_t begin, std::size_t end, const SLookupMachine& machine);
private:
//! The machine identifier.
std::size_t m_Machine;
//! The current state of the machine.
std::size_t m_State;
//! A complete list of available machines.
static CMachineDeque ms_Machines;
};
}
}
#endif // INCLUDE_CORE_CSTATEMACHINE_H_