glean/db/Glean/Query/Codegen.hs (1,420 lines of code) (raw):

{- Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. -} {-# LANGUAGE RecursiveDo, DeriveFunctor, DeriveTraversable #-} module Glean.Query.Codegen ( CodegenQuery , QueryWithInfo(..) , compileQuery , compileQueryFacts , CgQuery(..) , CgStatement_(..) , CgStatement , Generator_(..) , Generator , Var(..) , Match(..) , matchVar , Pat , Expr , PrimOp(..) ) where import Control.Exception import Control.Monad.Extra (whenJust) import Control.Monad.State import Data.Bifunctor import Data.Bifoldable import Data.ByteString (ByteString) import qualified Data.ByteString as ByteString import Data.Coerce import Data.IntSet (IntSet) import qualified Data.IntSet as IntSet import Data.Maybe import Data.Text (Text) import qualified Data.Text as Text import Data.Text.Prettyprint.Doc import qualified Data.Vector as Vector import Data.Vector (Vector, (!), (//)) import Data.Word import Foreign.Ptr hiding (WordPtr) import System.IO.Unsafe import qualified Util.FFI as FFI import Util.Log import qualified Util.Log.Text as TextLog import qualified Glean.Angle.Types as Angle import Glean.Bytecode.Types import qualified Glean.FFI as FFI import Glean.Query.Types (IsWild(..)) import Glean.RTS import Glean.RTS.Builder import Glean.RTS.Bytecode.Code import Glean.RTS.Bytecode.Disassemble import Glean.RTS.Bytecode.Gen.Issue import Glean.RTS.Bytecode.Gen.Instruction (Insn(..)) import Glean.RTS.Foreign.Bytecode import Glean.RTS.Foreign.Query import Glean.RTS.Traverse import Glean.RTS.Types import Glean.RTS.Term hiding (Match) import Glean.Typed.Binary (buildRtsValue) import Glean.Types hiding (Nat, Byte) {- Debugging the bytecode query backend: Use glog verbosity levels to enable logging. For example, to enable verbosity level 2, you can do GLOG_v=2 <program> <args> (The environment variable GLOG_v is often more reliable than passing -v 3 on the command line, since it bypasses the program's own command-line parser). Verbosity levels are as follows: 2: log the query at different stages (typechecking, expansion, ...) 3: disassembles the generated bytecode 5: tracing during bytecode execution (see rts/query.cpp) -} -- ----------------------------------------------------------------------------- -- Flattened Query types -- | A query produces a finite series of terms data CgQuery = CgQuery { -- | For each result produced by the query body, build this -- term. It cannot contain 'MatchWild' or 'MatchBind'. flatQueryHead :: Expr -- | A sequence of statements, like a list -- comprehension. Variables bound by each statement scope over the -- statements that follow it. , flatQueryBody :: [CgStatement] } deriving Show -- | A statement is either a single generator: -- -- > pat = gen -- -- or a disjunction: -- -- > (stmt; ...) | ... | (stmt; ...) -- data CgStatement_ var = CgStatement (Pat_ var) (Generator_ var) | CgNegation [CgStatement_ var] | CgDisjunction [[CgStatement_ var]] deriving (Show, Functor, Foldable, Traversable) -- For rationale, see Note [why do we have sequential composition?] type CgStatement = CgStatement_ Var -- | A generator produces a finite series of terms data Generator_ var -- | Enumerate facts of a predicate matching a pattern -- -- > v = pred pat -- > ... -- -- The left-hand side can be any pattern that matches the fact ID, -- so for example to look up a specific fact we can do -- -- > 1234 = pred _ -- = FactGenerator { -- | Predicate to search generatorPid :: PidRef -- | match the fact key , generatorPat :: Pat_ var -- | match the fact value , generatorValue :: Pat_ var } -- | Generate values | TermGenerator (Expr_ var) -- | Produces facts of a derived predicate. The values of -- this generator are fact IDs that refer to facts in the -- temporary Environment produced by a query. | DerivedFactGenerator { generatorPid :: PidRef , generatorKey :: Expr_ var , generatorValue :: Expr_ var } -- | Produces all elements of an array | ArrayElementGenerator { generatorEltTy :: Type -- type of the elements , generatorArray :: Expr_ var } | PrimCall { primOp :: PrimOp , primOpArgs :: [Expr_ var] } deriving (Show, Functor, Foldable, Traversable) type Generator = Generator_ Var -- | Primitive operations data PrimOp = PrimOpToLower | PrimOpLength | PrimOpRelToAbsByteSpans | PrimOpGtNat | PrimOpGeNat | PrimOpLtNat | PrimOpLeNat | PrimOpNeNat | PrimOpAddNat | PrimOpNeExpr deriving (Eq, Show) data Var = Var { varType :: Type , varId :: {-# UNPACK #-}!Int , varOrigName :: Maybe Text } deriving Show instance Eq Var where Var _ x _ == Var _ y _ = x == y data Match ext var -- | Always matches = MatchWild Type -- | Never matches | MatchNever Type -- | Match a literal Fact ID | MatchFid Fid -- | Match a value and bind it to this variable | MatchBind var -- | Match a value bound earlier. Note that this means we can have -- patterns like pred(X,X), where the second X matches the value -- bound by the first X. | MatchVar var -- | Match two patterns simultaneously. Fails if either of them -- fails to match, and binds the MatchBinds from both sides if -- both patterns match. Not valid in expressions, only patterns. | MatchAnd (Term (Match ext var)) (Term (Match ext var)) -- | Match a prefix of a string. | MatchPrefix ByteString -- the prefix of the string (utf-8 encoded) (Term (Match ext var)) -- the rest of the string -- | placeholder for extending this type | MatchExt ext deriving (Functor, Foldable, Traversable, Show) instance Bifunctor Match where bimap f g = \case MatchWild x -> MatchWild x MatchNever x -> MatchNever x MatchFid x -> MatchFid x MatchExt ext -> MatchExt (f ext) MatchVar var -> MatchVar (g var) MatchBind var -> MatchBind (g var) MatchAnd a b -> MatchAnd (fmap (bimap f g) a) (fmap (bimap f g) b) MatchPrefix b term -> MatchPrefix b $ fmap (bimap f g) term instance Bifoldable Match where bifoldMap f g = \case MatchWild _ -> mempty MatchNever _ -> mempty MatchFid _ -> mempty MatchExt ext -> f ext MatchVar var -> g var MatchBind var -> g var MatchAnd a b -> foldMap (bifoldMap f g) a <> foldMap (bifoldMap f g) b MatchPrefix _ term -> foldMap (bifoldMap f g) term matchVar :: Match ext var -> Maybe var matchVar (MatchVar v) = Just v matchVar (MatchBind v ) = Just v matchVar _ = Nothing instance Pretty Var where pretty (Var _ v nm) = pretty (fromMaybe "" nm) <> "_" <> pretty v instance Pretty ext => Pretty (Match ext Var) where pretty (MatchAnd l r) = pretty l <+> "@" <+> pretty r pretty (MatchBind v@(Var ty _ _)) = pretty v <> ":" <> pretty ty pretty (MatchPrefix str rest) = pretty (show str) <> ".." <> pretty rest pretty (MatchExt ext) = pretty ext pretty other = prettyMatchAtom other instance IsWild (Term (Match ext v)) where isWild (Ref MatchWild{}) = True isWild _ = False prettyMatchAtom :: Pretty ext => Match ext Var -> Doc ann prettyMatchAtom (MatchWild _) = "_" prettyMatchAtom (MatchNever _) = "()" prettyMatchAtom (MatchFid fid) = pretty fid prettyMatchAtom (MatchVar v) = pretty v prettyMatchAtom (MatchPrefix str rest) = pretty (show str) <> ".." <> pretty rest prettyMatchAtom other = "(" <> pretty other <> ")" type Pat = Pat_ Var type Expr = Expr_ Var type Pat_ var = Term (Match () var) type Expr_ var = Term (Match () var) {- Note [why do we have sequential composition?] The issue is that queries for sum types can't necessarily be handled by nested generators. Consider v = cxx1.FunctionName (name(cxx1.Name "xyz" ) | operator(cxx1.Name "+")) If we flattened this into nested generators we would get x = cxx1.Name "xyz" y = cxx1.Name "+" z = cxx1.FunctionName (name x | operator y) Now suppose there is no name xyz. This query will match nothing, because the generator for cxx1.Name "xyz" would be empty. (even if the generator matched, flattening out the generators like this will test too many combinations and do too much work). With sequential composition of queries we can do it like this: n = (name x where x = cxx1.Name "xyz") | (operator x where x = cxx1.Name "+") v = cxx1.FunctionName n (Note that this query won't work if you write it because we can't typecheck the sub-query "name x where ...", but we can generate the AST for it in the JSON query compiler.) -} {- Note [pausing/resuming queries] To support paging queries we need to be able to pause and resume a query in progress. This is achieved by capturing the state of the bytecode evaluator during the query, and serializing it so that it can be returned to the user. Later the user sends back the serialized query state, and we deserialize it and continue executing it. Roughly speaking this is achieved as follows: * The saved state of the evaluator is described by the Thrift type QueryCont in glean/if/internal.thrift. We use Thrift for serializing and deserializing the state. * The state includes: - the code, instruction pointer and local registers - the state of any fact iterators generated by 'seek' - the contents of binary::Output registers created by 'output' * The bytecode instruction Suspend allows the subroutine to be suspending by passing current state to an FFI call, which is used to capture the state and record it, and then returning from the subroutine. To make this work we have to obey a rule in the bytecode program: Don't keep any pointers in local registers across Suspend because they won't be valid when we resume. Pointers in input registers are fine; the contents of these will be saved and restored as necessary (this includes binary::Output registers). -} -- | A Query with flat generators, ready for compilation type CodegenQuery = QueryWithInfo CgQuery data QueryWithInfo q = QueryWithInfo { qiQuery :: q , qiNumVars :: Int -- ^ maximum index of any Var + 1 , qiReturnType :: Type } -- | Find all variables that need to be bound to a binary::Output findOutputs :: CgQuery -> IntSet findOutputs q = findOutputsQuery q IntSet.empty where findOutputsQuery :: CgQuery -> IntSet -> IntSet findOutputsQuery (CgQuery _ stmts) r = foldr findOutputsStmt r stmts findOutputsStmt :: CgStatement -> IntSet -> IntSet findOutputsStmt (CgStatement lhs gen) r = findOutputsGen gen (foldr findOutputsMatch r lhs) findOutputsStmt (CgNegation stmts) r = foldr findOutputsStmt r stmts findOutputsStmt (CgDisjunction stmtss) r = foldr (flip (foldr findOutputsStmt)) r stmtss findOutputsGen :: Generator -> IntSet -> IntSet findOutputsGen (FactGenerator _ kpat vpat) r = foldr findOutputsMatch (foldr findOutputsMatch r vpat) kpat findOutputsGen (TermGenerator t) r = findOutputsPat t r findOutputsGen (DerivedFactGenerator _ k v) r = findOutputsPat k (findOutputsPat v r) findOutputsGen (ArrayElementGenerator _ _) r = r findOutputsGen (PrimCall _ args) r = foldr findOutputsPat r args findOutputsPat pat r = foldr findOutputsMatch r pat findOutputsMatch :: Match () Var -> IntSet -> IntSet findOutputsMatch (MatchPrefix _ rest) r = foldr findOutputsMatch r rest findOutputsMatch (MatchBind (Var ty var _)) r | not (isWordTy ty) = IntSet.insert var r findOutputsMatch (MatchAnd a b) r = findOutputsPat a (findOutputsPat b r) findOutputsMatch _ r = r compileQuery :: CodegenQuery -- ^ The query to compile. NB. no type checking or validation is -- done on this; we assume that earlier phases have done this. A -- malformed query can cause a crash. -> IO CompiledQuery compileQuery (QueryWithInfo query numVars ty) = do vlog 2 $ show (pretty query) (idTerm, resultKey, resultValue, stmts) <- case query of (CgQuery (Tuple [idTerm, resultKey, resultValue]) stmts) -> return (idTerm, resultKey, resultValue, stmts) _other -> throwIO $ BadQuery "unsupported query" sub <- generateQueryCode $ \ regs@QueryRegs{..} -> do let outputVars = IntSet.toList $ findOutputs query outputs (length outputVars) $ \outputRegs -> do let outputRegAssocs :: [(Int, Register 'BinaryOutputPtr)] outputRegAssocs = zip outputVars outputRegs locals (numVars - length outputVars) $ \localRegs -> do let localRegAssocs :: [(Int, Register 'Word)] localRegAssocs = zip (filter (`notElem` outputVars) [0..]) localRegs -- vector of variables corresponding to the vars in the original query let vars = Vector.replicate numVars (error "compileQuery") // (localRegAssocs ++ coerce outputRegAssocs) -- resultKeyReg/resultValueReg is where we build up result values output $ \resultKeyOutput -> output $ \resultValueOutput -> compileStatements regs stmts vars $ mdo -- If the result term is a variable, avoid unnecessarily -- copying it into resultOutput and just use it directly. resultKeyReg <- case resultKey of Ref (MatchVar (Var ty v _)) | not (isWordTy ty) -> return (castRegister (vars ! v)) _other -> do resetOutput resultKeyOutput buildTerm resultKeyOutput vars resultKey return resultKeyOutput resultValReg <- case resultValue of Ref (MatchVar (Var ty v _)) | not (isWordTy ty) -> return (Just (castRegister (vars ! v))) Tuple [] -> return Nothing _other -> do resetOutput resultValueOutput buildTerm resultValueOutput vars resultValue return (Just resultValueOutput) idReg <- case idTerm of Ref (MatchVar (Var ty idVar _)) | isWordTy ty -> return (vars ! idVar) Ref (MatchFid fid) -> constant (fromIntegral (fromFid fid)) _ -> error "unsupported result type" local $ \ptr -> local $ \end -> local $ \len -> mdo getOutput resultKeyReg ptr end ptrDiff ptr end len val <- case resultValReg of Nothing -> castRegister <$> constant 0 Just reg -> do getOutput reg ptr end local $ \tmp -> do ptrDiff ptr end tmp add tmp len return reg result idReg resultKeyReg val len jumpIf0 len continue decrAndJumpIf0 maxResults pause -- check whether we have exceeded maxBytes. Note that we -- will return more than max_bytes, but this way we don't -- have to backtrack and regenerate the most recent result -- again in the continuation. jumpIfGt len maxBytes pause sub len maxBytes jump continue pause <- label suspend saveState continue -- see Note [pausing/resuming queries] continue <- label return () ret TextLog.vlog 3 $ Text.unlines $ disassemble "Query" sub -- Tell the query engine how to traverse results for expanding -- nested facts. (pid, traverse) <- case derefType ty of Angle.Record [ pidfield, key, val ] | Angle.Predicate (PidRef pid ref) <- derefType (Angle.fieldDefType pidfield) -> do traverse <- case ref of PredicateRef "_tmp_" _ -> Just <$> -- detect temporary predicates, see Glean.Query.Flatten.captureKey -- TODO: matching like this is a bit janky. genTraversal (Angle.fieldDefType key) (Angle.fieldDefType val) _otherwise -> return Nothing return (Just pid, traverse) _other -> throwIO $ ErrorCall "unrecognised query return type" return (CompiledQuery sub pid traverse) -- | A 'ResultTerm' is represented in two ways depending on the type -- of the value being returned: -- -- * 'PredicateTy' and 'NatTy' results are stored directly in a 'Register Word' -- * Other types are built in a 'binary::Output' and are represented by a -- 'Register BinaryOutputPtr'. -- -- 'isWordTy' returns 'True' for the first kind. -- isWordTy :: Type -> Bool isWordTy = isWordRep . repType where isWordRep PredicateRep{} = True isWordRep ByteRep = True isWordRep NatRep = True isWordRep _ = False isEmptyTy :: Type -> Bool isEmptyTy ty | TupleRep [] <- repType ty = True | otherwise = False patIsExactFid :: Vector (Register 'Word) -> Term (Match () Var) -> Maybe (Label -> Code (Register 'Word)) patIsExactFid vars pat = case pat of Ref (MatchFid fid) -> Just $ \_ -> constant (fromIntegral (fromFid fid)) Ref (MatchVar (Var ty v _)) | PredicateRep{} <- repType ty -> Just $ \_ -> return (vars ! v) Nat n -> Just $ \_ -> constant n -- if the typechecker allows it Ref (MatchAnd p q) -> case (patIsExactFid vars p, patIsExactFid vars q) of -- the MatchAnd cases are a bit silly, but who knows what earlier -- transformations might generate. (Just l, Just r) -> Just $ \fail -> do r1 <- l fail r2 <- r fail jumpIfNe r1 r2 fail -- patterns better be equal return r1 (Just l, Nothing) | Just cmpR <- cmpWordPat vars q -> Just $ \fail -> do reg <- l fail cmpR reg fail return reg (Nothing, Just r) | Just cmpR <- cmpWordPat vars p -> Just $ \fail -> do reg <- r fail cmpR reg fail return reg _other -> Nothing _other -> Nothing -- | if the given pattern is represented by a word register, then -- return the code to compare a value in the given register against -- the pattern. -- -- A @Term (Match Var)@ doesn't have enough information to reconstruct -- its 'Type', because in particular we can't reconstruct a 'SumTy' -- from an 'Alt' value. But it does have enough information to derive -- its representation, so we can tell by looking at a pattern whether -- it is represented as a word. cmpWordPat :: Vector (Register 'Word) -> Term (Match () Var) -> Maybe (Register 'Word -> Label -> Code ()) cmpWordPat vars pat = case pat of Byte b -> Just $ \reg fail -> do r <- constant (fromIntegral b) jumpIfNe reg r fail Nat n -> Just $ \reg fail -> do r <- constant n jumpIfNe reg r fail Ref (MatchFid fid) -> Just $ \reg fail -> do r <- constant (fromIntegral (fromFid fid)) jumpIfNe reg r fail Ref (MatchWild ty) | isWordTy ty -> Just $ \_ _ -> return () Ref (MatchNever ty) | isWordTy ty -> Just $ \_ fail -> jump fail Ref (MatchVar (Var ty v _)) | isWordTy ty -> Just $ \reg fail -> jumpIfNe reg (vars ! v) fail Ref (MatchAnd a b) -> case (cmpWordPat vars a, cmpWordPat vars b) of (Nothing, Nothing) -> Nothing (Just f, Nothing) -> Just f (Nothing, Just f) -> Just f (Just f, Just g) -> Just $ \reg fail -> f reg fail >> g reg fail Ref (MatchBind (Var ty v _)) | isWordTy ty -> Just $ \reg _ -> loadReg reg (vars ! v) _otherwise -> Nothing -- | Compare a value in an output register with a pattern. If the -- pattern matches, fall through, otherwise jump to the given label. cmpOutputPat :: Vector (Register 'Word) -> Register 'BinaryOutputPtr -- ^ register containing the value -> [QueryChunk Var] -- ^ pattern to match against -> Label -- ^ jump here on match failure -> Code () cmpOutputPat vars reg pat fail = local $ \ptr -> local $ \begin -> do getOutput reg begin ptr matchPat vars begin ptr fail pat compileTermGen :: Expr -> Vector (Register 'Word) -- ^ registers for variables -> Maybe (Register 'BinaryOutputPtr) -- ^ where to put the output -> Code a -- ^ @andThen@: code to insert after -- the result is constructed. -> Code a compileTermGen term vars maybeReg andThen = do forM_ maybeReg $ \resultOutput -> case term of Nat n -> loadConst (fromIntegral n) (castRegister resultOutput) Byte n -> loadConst (fromIntegral n) (castRegister resultOutput) Ref (MatchFid f) -> loadConst (fromIntegral (fromFid f)) (castRegister resultOutput) Ref (MatchVar (Var ty v _)) | isWordTy ty -> loadReg (vars ! v) (castRegister resultOutput) _other -> do resetOutput resultOutput buildTerm resultOutput vars term andThen compileStatements :: forall a s. QueryRegs s -> [CgStatement] -> Vector (Register 'Word) -- ^ registers for variables -> Code a -- ^ @andThen@: code to insert after -- the result is constructed. -> Code a compileStatements regs@(QueryRegs{..} :: QueryRegs s) stmts vars andThen = compile stmts where compile [] = andThen compile (CgStatement (Ref (MatchWild _)) gen : rest) = compileGen gen Nothing $ compile rest compile (CgStatement (Ref (MatchBind (Var _ var _))) gen : rest) = compileGen gen (Just (vars ! var)) $ compile rest -- <pat> = <fact gen> is a lookup if <pat> is known compile (CgStatement pat (FactGenerator (PidRef pid _) kpat vpat) : rest) | Just load <- patIsExactFid vars pat = mdo let patOutput :: forall a . [QueryChunk Var] -> (Register 'BinaryOutputPtr -> (Label -> Code ()) -> Code a) -> Code a patOutput chunks cont = case chunks of [QueryBind (Var ty v _)] | not (isWordTy ty) -> cont (castRegister (vars ! v)) (\_ -> return ()) _ | all isWild chunks -> do reg <- constant 0 -- null means "don't copy the key/value" cont (castRegister reg) (\_ -> return ()) | otherwise -> output $ \reg -> cont reg (\fail -> cmpOutputPat vars reg chunks fail) patOutput (preProcessPat kpat) $ \kout kcmp -> patOutput (preProcessPat vpat) $ \vout vcmp -> do reg <- load fail local $ \pidReg -> mdo lookupKeyValue reg kout vout pidReg -- TODO: if this is a trusted fact ID (i.e. not supplied by -- the user) then we could skip this test. expected <- constant (fromIntegral (fromPid pid)) jumpIfEq pidReg expected ok raise "fact has the wrong type" ok <- label return () kcmp fail vcmp fail a <- compile rest fail <- label return a -- ToDO: push the pat into compileGen and match it eagerly, save -- some copying. compile (CgStatement pat gen : rest) = outReg $ \reg -> compileGen gen (Just reg) $ mdo filterPat reg pat fail a <- compile rest fail <- label return a where outReg | Just{} <- maybeWordFilter = local | otherwise = \f -> output (f . castRegister) maybeWordFilter = cmpWordPat vars pat filterPat reg pat fail | Just cmp <- maybeWordFilter = cmp reg fail | otherwise = cmpOutputPat vars (castRegister reg) chunks fail where chunks = preProcessPat pat compile (CgNegation stmts : rest) = mdo local $ \seekLevel -> do currentSeek seekLevel compileStatements regs stmts vars $ do endSeek seekLevel jump fail a <- compile rest fail <- label return a -- an empty list of generators should fall through without -- executing inner, but we have to compile inner because we need -- its result. compile (CgDisjunction [] : rest) = mdo jump fail a <- compile rest fail <- label return a compile (CgDisjunction [one] : rest) = compile (one ++ rest) -- To compile sequential generators, we simply compile each generator, -- and when it produces a result we "call" the continuation to -- handle the rest of the query. As each generator completes, -- it falls through to the next one. -- -- "calling" the continuation is done by explicitly passing the -- return address, because we don't have a stack (yet). Some -- minor shenanigans are involved to ensure that the "called" -- routine's local registers don't clash with its "call site". compile (CgDisjunction stmtss : rest) = local $ \innerRet -> mdo sites <- forM stmtss $ \stmts -> do compileStatements regs stmts vars $ mdo site <- callSite loadLabel ret_ innerRet jump doInner ret_ <- label return site jump done doInner <- label a <- calledFrom sites $ compile rest jumpReg innerRet done <- label return a -- Helper function for processing numeric primitive operations compileGenNumericPrim :: forall a . Expr -> Expr -> Maybe (Register 'Word) -> Code a -> (Register 'Word -> Register 'Word -> Label -> Code ()) -> Code a compileGenNumericPrim p q maybeReg inner failOp = withNatTerm vars p $ \a -> withNatTerm vars q $ \b -> mdo failOp (castRegister a) (castRegister b) fail whenJust maybeReg (resetOutput . castRegister) r <- inner fail <- label return r compileGen :: forall a . Generator -> Maybe (Register 'Word) -> Code a -> Code a compileGen (TermGenerator term) maybeReg inner = do compileTermGen term vars (coerce maybeReg) inner compileGen (PrimCall PrimOpNeNat [p, q]) maybeReg inner = compileGenNumericPrim p q maybeReg inner $ \a b fail -> jumpIfEq a b fail compileGen (PrimCall PrimOpGtNat [p, q]) maybeReg inner = compileGenNumericPrim p q maybeReg inner $ \a b fail -> jumpIfLe a b fail compileGen (PrimCall PrimOpGeNat [p, q]) maybeReg inner = compileGenNumericPrim p q maybeReg inner $ \a b fail -> jumpIfLt a b fail compileGen (PrimCall PrimOpLtNat [p, q]) maybeReg inner = compileGenNumericPrim p q maybeReg inner $ \a b fail -> jumpIfGe a b fail compileGen (PrimCall PrimOpLeNat [p, q]) maybeReg inner = compileGenNumericPrim p q maybeReg inner $ \a b fail -> jumpIfGt a b fail compileGen (PrimCall PrimOpNeExpr [p, q]) maybeReg inner = mdo case cmpWordPat vars p of Just cmp -> local $ \q' -> compileTermGen q vars (Just q') $ cmp (castRegister q') ok Nothing -> withTerm vars q $ \q' -> cmpOutputPat vars q' (preProcessPat p) ok jump fail ok <- label whenJust maybeReg (resetOutput . castRegister) r <- inner fail <- label return r compileGen (PrimCall _prim _args) Nothing inner = inner compileGen (PrimCall PrimOpAddNat [p, q]) (Just reg) inner = withNatTerm vars p $ \a -> withNatTerm vars q $ \b -> do loadReg (castRegister a) (castRegister reg) add (castRegister b) (castRegister reg) inner compileGen (PrimCall PrimOpToLower [arg]) (Just reg) inner = withTerm vars arg $ \str -> do local $ \ptr -> local $ \end -> do getOutput str ptr end resetOutput (castRegister reg) outputStringToLower ptr end (castRegister reg) inner compileGen (PrimCall PrimOpLength [arg]) (Just reg) inner = withTerm vars arg $ \array -> do local $ \ptr -> local $ \end -> do getOutput array ptr end inputNat ptr end reg inner compileGen (PrimCall PrimOpRelToAbsByteSpans [arg]) (Just reg) inner = withTerm vars arg $ \array -> do local $ \ptr -> local $ \end -> do getOutput array ptr end resetOutput (castRegister reg) outputRelToAbsByteSpans ptr end (castRegister reg) inner compileGen PrimCall{} _ _ = error "compileGen: unknown PrimCall" compileGen (ArrayElementGenerator _ _) Nothing inner = inner compileGen (ArrayElementGenerator eltTy term) (Just reg) inner = withTerm vars term $ \array -> do local $ \len -> do local $ \off -> mdo local $ \ptr -> local $ \end -> do getOutput array ptr end ptrDiff ptr end len local $ \start -> do move ptr start -- the number of elements is useless to us, since we need -- to traverse the array linearly. We'll use the length -- to know when we've reached the end. inputSkipNat ptr end ptrDiff start ptr off jumpIfEq off len done loop <- label -- Note that ptr/len are not valid here, because we might -- have suspended and resumed during the previous -- iteration of the loop, so we must getOutput again. -- We're maintaining the current offset into the array -- value in the 'off' register. local $ \ptr -> local $ \end -> do getOutput array ptr end ptrDiff ptr end len advancePtr ptr off local $ \start -> local $ \size -> do -- really want to just matchPat here move ptr start if isWordTy eltTy then do inputNat ptr end reg else do skipTrusted ptr end eltTy resetOutput (castRegister reg) outputBytes start ptr (castRegister reg) ptrDiff start ptr size add size off a <- inner jumpIfLt off len loop done <- label return a -- derived fact where we don't bind the Fid: no need to store it compileGen DerivedFactGenerator{} Nothing inner = inner -- derived fact where we bind the Fid: generate the key and -- store the fact in the environment. compileGen (DerivedFactGenerator (PidRef pid _) key val) (Just resultReg) inner = do rpid <- constant (fromIntegral (fromPid pid)) let isEmpty (Ref (MatchVar (Var ty _ _))) = isEmptyTy ty isEmpty (Tuple []) = True isEmpty _ = False local $ \size -> do if | isEmpty val -> withTerm vars key $ \out -> do getOutputSize out size newDerivedFact rpid out size resultReg | isEmpty key -> withTerm vars val $ \out -> do getOutputSize out size newDerivedFact rpid out size resultReg | otherwise -> output $ \out -> do resetOutput out buildTerm out vars key getOutputSize out size buildTerm out vars val newDerivedFact rpid out size resultReg inner compileGen (FactGenerator (PidRef pid _) kpat vpat) maybeReg inner = do compileFactGenerator regs vars pid kpat vpat maybeReg inner compileFactGenerator :: forall a s. QueryRegs s -> Vector (Register 'Word) -- ^ registers for variables -> Pid -> Pat -> Pat -> Maybe (Register 'Word) -> Code a -> Code a compileFactGenerator (QueryRegs{..} :: QueryRegs s) vars pid kpat vpat maybeReg inner = local $ \seekTok -> local $ \prefix_size -> do let noCapture _ _ = return () withPrefix :: [QueryChunk Var] -> ( -- load the prefix into registers (Register 'DataPtr -> Register 'DataPtr -> Code ()) -- remaining non-prefix chunks -> [QueryChunk Var] -- capture the key if necessary -> Maybe (Register 'DataPtr -> Register 'DataPtr -> Code ()) -> Code a) -> Code a withPrefix chunks fun = case chunks of -- Just a fixed ByteString: use it directly (QueryPrefix bs : rest) | emptyPrefix rest -> do lit_i <- literal bs fun (\ptr end -> issue $ LoadLiteral lit_i ptr end) rest Nothing -- A variable: use it directly (QueryVar (Var ty v _) : rest) | emptyPrefix rest, not (isWordTy ty) -> fun (getOutput (castRegister (vars ! v))) rest Nothing -- Special case for a QueryBind that covers the whole -- pattern. This is used to capture the key of a fact so -- that we can return it, but we don't want it to interfere -- with prefix lookup. [QueryAnd [QueryBind (Var ty var _)] pats] -> withPrefix pats $ \load remaining capture -> do let capture' input inputend | isWordTy ty = inputNat input inputend (vars ! var) -- word-typed variables must be represented by -- the word itself (this is assumed elsewhere) -- so we can't just copy the bytes in that case. | otherwise = do let reg = castRegister (vars ! var) resetOutput reg outputBytes input inputend reg fromMaybe noCapture capture input inputend fun load remaining (Just capture') -- Otherwise: build up the prefix in a binary::Output -- (register 'prefixOut') _otherwise -> do output $ \prefixOut -> do resetOutput prefixOut remaining <- buildPrefix prefixOut vars chunks fun (getOutput prefixOut) remaining Nothing let kchunks = preProcessPat kpat let vchunks = preProcessPat vpat withPrefix kchunks $ \loadPrefix remaining captureKey -> mdo -- hasPrefix is True if there is a prefix pattern *and* there is a -- non-empty pattern to match after the prefix. let hasPrefix = not (emptyPrefix kchunks) && not (all isWild remaining) typ <- constant (fromIntegral (fromPid pid)) local $ \ptr -> local $ \ptrend -> do loadPrefix ptr ptrend when hasPrefix $ ptrDiff ptr ptrend prefix_size seek typ ptr ptrend seekTok -- for each fact... loop <- label local $ \clause -> local $ \keyend -> local $ \clauseend -> local $ \saveclause -> mdo let need_value = not $ all isWild vchunks local $ \ignore -> local $ \ok -> do next seekTok need_value ok clause keyend clauseend (fromMaybe ignore maybeReg) jumpIf0 ok end -- 0 -> no match decrAndJumpIf0 ok continue -- 1 -> match suspend saveState loop -- 2 -> timeout / interrupted continue <- label return () when (isJust captureKey) $ move clause saveclause -- skip the prefix when hasPrefix $ add prefix_size $ castRegister clause -- check that the rest of the key matches the pattern matchPat vars clause keyend loop remaining -- match the value when need_value $ do move keyend clause matchPat vars clause clauseend loop vchunks -- matched; let's capture the key if necessary fromMaybe noCapture captureKey saveclause keyend a <- inner -- loop to find more facts, unless this is a point query which could -- only have a single result. unless (null remaining) $ jump loop end <- label endSeek seekTok return a -- ---------------------------------------------------------------------------- -- | A pre-processed pattern, ready for compilation to bytecode data QueryChunk var = -- | A literal sequence of bytes to match, corresponding to a -- fragment of the serialized pattern QueryPrefix ByteString -- | A wildcard, represented as the type of the fragment to skip over | QueryWild Type -- | Never matches | QueryNever -- | A binder: like a wildcard, but we capture the fragment and bind -- it to a variable | QueryBind var -- | A variable: match this fragment against the specified variable | QueryVar var -- | Match two patterns simultaneously. This is the preprocessed -- equivalent of 'MatchAnd'. | QueryAnd [QueryChunk var] [QueryChunk var] -- | A sum type, with different matchers for each of the alternatives. -- If an alternative is Nothing, then the pattern does not match -- that alternative. | QuerySum [Maybe [QueryChunk var]] deriving Show type M a = StateT (Builder, Bool, [QueryChunk Var]) IO a -- @Bool@ is whether the builder is empty (True for empty, False for non-empty) instance IsWild (QueryChunk var) where isWild QueryWild{} = True isWild _ = False -- -- | Process a query into a [QueryChunk], which enables the compiler to -- generate efficient code for a pattern match. -- preProcessPat :: Term (Match () Var) -> [QueryChunk Var] preProcessPat pat = unsafePerformIO $ withBuilder $ \builder -> do (_, (_, _, chunks)) <- flip runStateT (builder, True, []) $ build pat >> endOfChunk return (reverse chunks) where builder :: M Builder builder = do (b, empty, chunks) <- get if empty then do put (b, False, chunks) return b else return b endOfChunk :: M () endOfChunk= do (b,empty,chunks) <- get if empty then return () else do bs <- lift $ finishBuilder b lift $ resetBuilder b put (b, True, QueryPrefix bs : chunks) getChunks :: M () -> M [QueryChunk Var] getChunks m = do (b, empty, saveChunks) <- get put (b, empty, []) m endOfChunk (b, empty, chunks) <- get put (b, empty, saveChunks) return (reverse chunks) chunk c = do endOfChunk modify $ \(b, empty, chunks) -> (b, empty, c : chunks) prefixString s = do b <- builder lift $ FFI.unsafeWithBytes s $ \p n -> FFI.call $ glean_push_value_bytes b (castPtr p) n -- NB. Use glean_push_value_bytes to omit the NUL NUL suffix. -- Thus the remaining input is a valid string, and we can continue -- by matching it as a StringTy with QueryWild or QueryBind. build :: Term (Match () Var) -> M () build pat = case pat of Array terms -> do b <- builder lift $ FFI.call $ glean_push_value_array b $ fromIntegral $ length terms mapM_ build terms Tuple terms -> mapM_ build terms (Alt ix term) -> do b <- builder lift $ FFI.call $ glean_push_value_selector b $ fromIntegral ix build term Ref (MatchVar v) -> chunk (QueryVar v) Ref (MatchBind v) -> chunk (QueryBind v) Ref (MatchWild ty) -> chunk (QueryWild ty) Ref (MatchNever _) -> chunk QueryNever Ref (MatchFid fid) -> do b <- builder lift $ FFI.call $ glean_push_value_fact b fid -- optimise MatchAnd where one side is a wildcard: Ref (MatchAnd (Ref MatchWild{}) b) -> build b Ref (MatchAnd a (Ref MatchWild{})) -> build a Ref (MatchAnd a b) -> do endOfChunk chunk =<< QueryAnd <$> getChunks (build a) <*> getChunks (build b) Ref (MatchPrefix txt rest) -> do prefixString txt build rest Ref MatchExt{} -> error "preProcessPat" Byte w -> do b <- builder lift $ FFI.call $ glean_push_value_byte b w Nat w -> do b <- builder lift $ FFI.call $ glean_push_value_nat b w String s -> do b <- builder lift $ FFI.unsafeWithBytes s $ \p n -> FFI.call $ glean_push_value_string b (castPtr p) n ByteArray bs -> do b <- builder lift $ encodeByteArray b bs -- | True if the prefix of this query is empty emptyPrefix :: [QueryChunk Var] -> Bool emptyPrefix (QueryPrefix{} : _) = False emptyPrefix (QueryVar{} : _) = False emptyPrefix [QueryAnd [QueryBind{}] x] = emptyPrefix x emptyPrefix _ = True -- | Serialize the prefix of a query into an output register, and return -- the remaining non-prefix part of the query. buildPrefix :: Register 'BinaryOutputPtr -> Vector (Register 'Word) -- ^ registers for variables -> [QueryChunk Var] -> Code [QueryChunk Var] buildPrefix out vars chunks = go chunks where go (QueryPrefix bs : rest) = do local $ \ptr -> local $ \end -> do loadLiteral bs ptr end outputBytes ptr end out go rest go (QueryVar (Var ty v _) : rest) | isWordTy ty = do outputNat (vars ! v) out go rest -- every other type is currently represented as a binary::Output go (QueryVar (Var _other v _) : rest) = do local $ \ptr -> local $ \end -> do getOutput (castRegister (vars ! v)) ptr end outputBytes ptr end out go rest go other = return other -- We might want to consider handling QuerySum here: if an -- alt can be serialized to a prefix, it might be better -- to seek to it directly. But then we would possibly have -- to do multiple seeks. -- -- | Generate code to skip over a value of the given type in the input -- skipTrusted :: Register 'DataPtr -> Register 'DataPtr -> Type -> Code () skipTrusted input inputend ty = skip (repType ty) where skip ty = case ty of ByteRep -> do size <- constant 1; inputBytes input inputend size NatRep -> inputSkipNat input inputend ArrayRep eltTy -> local $ \size -> do inputNat input inputend size case eltTy of ByteRep -> inputBytes input inputend size _ -> mdo jumpIf0 size end loop2 <- label skip eltTy decrAndJumpIfNot0 size loop2 end <- label return () TupleRep tys -> mapM_ skip tys SumRep tys -> mdo local $ \sel -> do inputNat input inputend sel select sel alts raise "selector out of range" alts <- forM tys $ \ty -> do alt <- label skip ty jump end return alt end <- label return () StringRep -> inputSkipTrustedString input inputend PredicateRep _ -> inputSkipNat input inputend -- | Load a term that is expected to be word-typed into a register. withNatTerm :: Vector (Register 'Word) -- ^ registers for variables -> Expr -> (Register 'Word -> Code a) -> Code a withNatTerm vars term andThen = do case term of Nat n -> local $ \r -> do loadConst (fromIntegral n) r; andThen r Byte n -> local $ \r -> do loadConst (fromIntegral n) r; andThen r Ref (MatchFid f) -> local $ \r -> do loadConst (fromIntegral (fromFid f)) r; andThen r Ref (MatchVar (Var ty v _)) | isWordTy ty -> andThen (vars ! v) _other -> error "withNatTerm: shouldn't happen" -- | Serialize a term into an output register. A copy is avoided if -- the term is already represented by an output register. withTerm :: Vector (Register 'Word) -> Term (Match () Var) -> (Register 'BinaryOutputPtr -> Code a) -> Code a withTerm vars (Ref (MatchVar (Var ty v _))) action | not (isWordTy ty) = action (castRegister (vars ! v)) withTerm vars term action = do output $ \reg -> do resetOutput reg buildTerm reg vars term action reg -- | Serialize a term into the given output register. buildTerm :: Register 'BinaryOutputPtr -> Vector (Register 'Word) -> Term (Match () Var) -> Code () buildTerm output vars term = go term where go term = case term of Byte b -> outputByteImm (fromIntegral b) output Nat n -> outputNatImm n output String s -> local $ \ptr -> local $ \end -> do -- NOTE: We assume that the string has been validated during parsing. loadLiteral (mangleString s) ptr end outputBytes ptr end output Array vs -> do outputNatImm (fromIntegral (length vs)) output mapM_ go vs Tuple fields -> mapM_ go fields Alt n term -> do outputNatImm n output; go term Ref (MatchFid f) -> outputNatImm (fromIntegral (fromFid f)) output Ref (MatchPrefix str rest) -> do local $ \ptr -> local $ \end -> do let mangled = fromValue (String str) withoutTerminator = ByteString.take (ByteString.length mangled - 2) mangled loadLiteral withoutTerminator ptr end outputBytes ptr end output go rest Ref (MatchVar (Var ty var _)) | isWordTy ty -> outputNat (vars ! var) output | otherwise -> local $ \ptr -> local $ \end -> do getOutput (castRegister (vars ! var)) ptr end outputBytes ptr end output other -> error $ "buildTerm: " <> show other -- | check that a value matches a pattern, and bind variables as -- necessary. The pattern is assumed to cover the *whole* of the -- input. matchPat :: Vector (Register 'Word) -- ^ registers for variables -> Register 'DataPtr -- ^ the input -> Register 'DataPtr -- ^ the input end -> Label -- ^ jump to here on mismatch -> [QueryChunk Var] -- ^ pattern to match -> Code () -- special case for binding to a single variable: we don't need to -- traverse the data, just copy the bytes. matchPat vars input inputend _ [QueryBind (Var ty var _)] | not (isWordTy ty) = do let reg = castRegister (vars ! var) resetOutput reg outputBytes input inputend reg -- general case matchPat vars input inputend fail chunks = mapM_ match (reverse (dropWhile isWild (reverse chunks))) -- there's no point in traversing data at the end of the key -- if we're just ignoring it, so drop trailing wildcards. where match (QueryPrefix bs) = do local $ \ok -> do inputShiftLit input inputend bs ok jumpIf0 ok fail -- chunk didn't match match (QueryWild ty) = skipTrusted input inputend ty match QueryNever = jump fail match (QueryVar (Var ty var _)) | isWordTy ty = local $ \id -> do inputNat input inputend id jumpIfNe id (vars ! var) fail match (QueryVar (Var ty var _)) | isEmptyTy ty = return () -- the empty tuple could be represented by a null pointer, so it's -- not safe to do inputShiftBytes anyway. | otherwise = local $ \ptr -> local $ \end -> local $ \ok -> do getOutput (castRegister (vars ! var)) ptr end inputShiftBytes input inputend ptr end ok jumpIf0 ok fail match (QueryAnd a b) = do local $ \start -> do move input start mapM_ match a move start input mapM_ match b match (QuerySum alts) = mdo local $ \sel -> do inputNat input inputend sel select sel lbls raise "selector out of range" lbls <- forM alts $ \mb -> do case mb of Nothing -> return fail Just chunks -> do lbl <- label mapM_ match chunks jump end return lbl end <- label return () match (QueryBind (Var ty var _)) | isWordTy ty = inputNat input inputend (vars ! var) | otherwise = local $ \start -> do let outReg = castRegister (vars ! var) resetOutput outReg move input start skipTrusted input inputend ty outputBytes start input outReg return () ----------------------------------------------------------------------------- -- | Compile a query for some facts, possibly with recursive expansion. compileQueryFacts :: [FactQuery] -> IO CompiledQuery compileQueryFacts facts = do input <- withBuilder $ \builder -> do buildRtsValue builder [ (fromIntegral factQuery_id :: Word64, if factQuery_recursive then 1 else 0 :: Word64) | FactQuery{..} <- facts ] finishBuilder builder sub <- generateQueryCode $ \ QueryRegs{..} -> output $ \kout -> output $ \vout -> local $ \fid -> local $ \pid -> local $ \rec_ -> local $ \ptr -> local $ \end -> do loadLiteral input ptr end local $ inputNat ptr end -- ignore the size loop <- label inputNat ptr end fid inputNat ptr end rec_ lookupKeyValue fid kout vout pid resultWithPid fid kout vout pid rec_ jumpIfLt (castRegister ptr) (castRegister end) loop ret return (CompiledQuery sub Nothing Nothing) -- ----------------------------------------------------------------------------- -- The FFI layer for query bytecode subroutines -- -- IF YOU BREAK BACKWARD COMPATIBILITY HERE, BUMP version IN -- Glean.Bytecode.Generate.Instruction -- -- IF YOU ALSO BREAK FORWARD COMPATIBILITY, BUMP latestSupportedVersion AS WELL -- data QueryRegs s = QueryRegs { -- | Start a new traversal of facts beginning with a given prefix seek :: Register 'Word -> Register 'DataPtr -> Register 'DataPtr -> Register 'Word -> Code () -- | Fetch the current seek token , currentSeek :: Register 'Word -> Code () -- | Release the state associated with an iterator token , endSeek :: Register 'Word -> Code () -- | Grab the next fact in a traversal , next :: Register 'Word -- token -> Bool -- do we need the value? -> Register 'Word -- result -> Register 'DataPtr -- clause begin -> Register 'DataPtr -- key end -> Register 'DataPtr -- clause end -> Register 'Word -- id -> Code () -- | Fact lookup , lookupKeyValue :: Register 'Word -> Register 'BinaryOutputPtr -> Register 'BinaryOutputPtr -> Register 'Word -> Code () -- | Record a result , result :: Register 'Word -> Register 'BinaryOutputPtr -> Register 'BinaryOutputPtr -> Register 'Word -> Code () -- | Record a result, with a given pid and optional recursive expansion , resultWithPid :: Register 'Word -> Register 'BinaryOutputPtr -> Register 'BinaryOutputPtr -> Register 'Word -> Register 'Word -> Code () -- | Record a new derived fact , newDerivedFact :: Register 'Word -> Register 'BinaryOutputPtr -> Register 'Word -> Register 'Word -> Code () -- | Save the current state, resume at the given label later , saveState :: Register ('Fun ['WordPtr,'WordPtr] 'Void) -- | Maximum number of results to return , maxResults :: Register 'Word -- | Maximum number of bytes to return , maxBytes :: Register 'Word } generateQueryCode :: (forall s . QueryRegs s -> Code ()) -> IO (Subroutine CompiledQuery) generateQueryCode f = generate Optimised $ \ ((seek_, currentSeek_, endSeek_, next_, lookupKey_), (result_, resultWithPid_, newDerivedFact_), saveState, maxResults, maxBytes) -> let seek typ ptr end tok = callFun_3_1 seek_ typ (castRegister ptr) (castRegister end) tok currentSeek tok = callFun_0_1 currentSeek_ tok endSeek tok = callFun_1_0 endSeek_ tok next tok need_value ok clause keyend clauseend id = do demand <- constant $ if need_value then 1 else 0 callFun_2_5 next_ tok demand ok (castRegister clause) (castRegister keyend) (castRegister clauseend) id lookupKeyValue id kout vout pid = callFun_3_1 lookupKey_ id (castRegister kout) (castRegister vout) pid result id key val new = callFun_3_1 result_ id (castRegister key) (castRegister val) new resultWithPid id key val pid recexp = callFun_5_0 resultWithPid_ id (castRegister key) (castRegister val) pid recexp newDerivedFact ty key val id = callFun_3_1 newDerivedFact_ ty (castRegister key) (castRegister val) id in f QueryRegs{..} -- ----------------------------------------------------------------------------- -- Pretty-printing instance Pretty CgQuery where pretty (CgQuery expr []) = pretty expr pretty (CgQuery expr stmts) = hang 2 $ sep (pretty expr <+> "where" : punctuate ";" (map pretty stmts)) instance Pretty CgStatement where pretty = \case CgStatement pat gen -> hang 2 $ sep [pretty pat <+> "=", pretty gen] CgNegation stmts -> "!" <> doStmts stmts CgDisjunction stmtss -> sep (punctuate " |" (map doStmts stmtss)) where doStmts stmts = hang 2 (sep [sep ("(" : punctuate ";" (map pretty stmts)), ")"]) instance Pretty Generator where pretty (FactGenerator pred kpat vpat) | isWild vpat || isUnit vpat = pretty pred <+> pretty kpat | otherwise = pretty pred <+> pretty kpat <+> "->" <+> pretty vpat where isUnit (Tuple []) = True isUnit _ = False pretty (TermGenerator q) = pretty q pretty (DerivedFactGenerator pid k (Tuple [])) = pretty pid <> "<- (" <> pretty k <> ")" pretty (DerivedFactGenerator pid k v) = pretty pid <> "<- (" <> pretty k <> " -> " <> pretty v <> ")" pretty (ArrayElementGenerator _ arr) = pretty arr <> "[..]" pretty (PrimCall op args) = hsep (pretty op : map pretty args) instance Pretty PrimOp where pretty PrimOpToLower = "prim.toLower" pretty PrimOpLength = "prim.length" pretty PrimOpRelToAbsByteSpans = "prim.relToAbsByteSpans" pretty PrimOpGtNat = "prim.gtNat" pretty PrimOpGeNat = "prim.geNat" pretty PrimOpLtNat = "prim.ltNat" pretty PrimOpLeNat = "prim.leNat" pretty PrimOpNeNat = "prim.neNat" pretty PrimOpAddNat = "prim.addNat" pretty PrimOpNeExpr = "prim.neExpr"