glean/lang/lsif/Data/LSIF/Angle.hs (302 lines of code) (raw):

{- Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. -} {- Convert Data.LSIF into glean/schema/lsif.angle-compatible data via JSON. Note: no dependency on the schema, to make iteration a bit quicker. Longer term we might want this typed for maintenance reasons. -} {-# LANGUAGE OverloadedStrings #-} module Data.LSIF.Angle (toAngle) where import Control.Monad.State.Strict import Data.Aeson ( object, Value(String, Array), KeyValue(..) ) import Data.Aeson.Types ( Pair ) import Data.IntMap ( IntMap ) import qualified Data.IntMap.Strict as IMap import Data.IntSet ( IntSet ) import qualified Data.IntSet as Set import Data.Maybe ( fromMaybe ) import Data.Text ( Text ) import qualified Data.Text as Text import qualified Data.Vector as V import Data.LSIF.Types import Data.LSIF.JSON ({- instances -}) -- Converts from AESON-parsed standard LSIF into Glean-compatible types -- | Parser env, to track command statements in LSIF that change scope -- So much bookeeping to recover the types data Env = Env { root :: !Text, -- ^ used to clean up file paths to be repo-relative openProjects :: IntSet, -- open project identifiers openDocuments :: IntSet, -- stack of open document identifiers -- track some of the types associated with ids idTypeMap :: IntMap VertexType, -- resultset id -> defnition id resultSetToDefMap :: IntMap Id, -- resultset id -> fileid of definition range resultSetToFileMap :: IntMap Id, -- each range (ref or def or decl) is a `next` edge member of a result set rangeToResultSetMap :: IntMap Id, -- when we see an textDocument/definition edge, record its resultset definitionResultToResultSetMap :: IntMap Id } -- Track the implied type of some of the graph nodes identifiers data VertexType = DefinitionType | DeclarationType | ReferenceType | ResultSetType deriving (Eq, Ord, Bounded, Enum) -- Build up some lookup tables for the assoc lists between refs/defs/decls/files -- to aid in generaing flatter angle facts empty :: Env empty = Env "" Set.empty Set.empty IMap.empty IMap.empty IMap.empty IMap.empty IMap.empty -- Drop projectRoot prefix from URI to yield repo-relative paths for Glean filterRoot :: Text -> State Env Text filterRoot path = do prefix <- root <$> get pure $ fromMaybe path (Text.stripPrefix (prefix <> "/") path) setRoot :: Text -> State Env () setRoot path = modify' (\e -> e { root = path }) insertProject, insertDocument :: Id -> State Env () insertProject (Id n) = modify' $ \e -> e { openProjects = Set.insert (fromIntegral n) (openProjects e) } insertDocument (Id n) = modify' $ \e -> e { openDocuments = Set.insert (fromIntegral n) (openDocuments e) } insertType :: Id -> VertexType -> State Env () insertType (Id n) ty = modify' $ \e -> e { idTypeMap = IMap.insert (fromIntegral n) ty (idTypeMap e) } resultSetToDef :: Id -> Id -> State Env () resultSetToDef (Id n) defId = modify' $ \e -> e { resultSetToDefMap = IMap.insert (fromIntegral n) defId (resultSetToDefMap e) } insertInResultSet :: Id -> Id -> State Env () insertInResultSet (Id n) resultSetId = modify' $ \e -> e { rangeToResultSetMap = IMap.insert (fromIntegral n) resultSetId (rangeToResultSetMap e) } insertDefinitionResult :: Id -> Id -> State Env () insertDefinitionResult (Id n) resultMapId = modify' $ \e -> e { definitionResultToResultSetMap = IMap.insert (fromIntegral n) resultMapId (definitionResultToResultSetMap e) } insertResultSetFile :: Id -> Id -> State Env () insertResultSetFile (Id n) fileId = modify' $ \e -> e { resultSetToFileMap = IMap.insert (fromIntegral n) fileId (resultSetToFileMap e) } getResultSetOfDefinitionResult :: Id -> State Env (Maybe Id) getResultSetOfDefinitionResult = lookupIMapEnv definitionResultToResultSetMap getTypeOf :: Id -> State Env (Maybe VertexType) getTypeOf = lookupIMapEnv idTypeMap lookupIMapEnv :: (Env -> IntMap a) -> Id -> State Env (Maybe a) lookupIMapEnv f (Id n) = do imap <- f <$> get pure (IMap.lookup (fromIntegral n) imap) lookupIn :: t -> (t -> IntMap a) -> Id -> Maybe a lookupIn env f (Id n) = IMap.lookup (fromIntegral n) (f env) -- -- | Convert LSIF json to Glean json -- -- We map over the statements, in order, generating 0 or more facts for Glean -- LSIF guarantees things are defined before references to them are used, and -- uses unique ids for facts. We can use that to simplify processing -- -- vertexes correspond to facts holding new data -- edges relate existnig vertex facts to each other (with new facts) -- -- Currently we emit one distinct predicate/fact pair per LSIF row, which is -- verbose. We should group by lsif.angle predicates first, before generating -- the final value, and likely improves load time. -- toAngle :: LSIF -> Value toAngle (LSIF facts) = Array $ V.concatMap V.fromList $ evalState (V.mapM factToAngle facts) empty -- | Process each key/fact pair factToAngle :: KeyFact -> State Env [Value] factToAngle (KeyFact _ MetaData{..}) = do setRoot projectRoot predicate "lsif.Metadata.1" ([ "lsifVersion" .= version, "projectRoot" .= string projectRoot, -- src.File "positionEncoding" .= positionEncoding ] ++ (case toolInfo of -- optional Nothing -> [] Just ToolInfo{..} -> ["toolInfo" .= object [ "toolName" .= toolName, "toolArgs" .= toolArgs, "version" .= version ]] )) factToAngle (KeyFact _ Project{..}) = predicate "lsif.Project.1" [ "kind" .= fromEnum kind ] factToAngle (KeyFact _ PackageInformation{..}) = predicate "lsif.PackageInformation.1" [ "name" .= name, "manager" .= manager, "version" .= version ] -- LSIF Documents are uri/language pairs. factToAngle (KeyFact n Document{..}) = do path <- filterRoot uri predicate "lsif.Document.1" [ "file" .= object [ factId n, -- generates a new src.File fact with this id "key" .= path ], "language" .= fromEnum language ] -- Push document or project identifier onto open stack. This is list of -- documents or projects for which facts may still be emitted. factToAngle (KeyFact _ (Event Begin scope n)) = [] <$ case scope of ProjectScope -> insertProject n DocumentScope -> insertDocument n -- should be src.File id of previous $event -- Declares a resultset, which is used to group definitions, uses, symbol ids -- (monikers), projects, hovertext and more factToAngle (KeyFact n ResultSet) = [] <$ insertType n ResultSetType -- Associate a range fact with a result set. -- We use this to track chains of ref -> resultset -> def for later generation factToAngle (KeyFact _ (Edge EdgeNext outV inV)) = [] <$ do outFactType <- getTypeOf outV inFactType <- getTypeOf inV case (outFactType, inFactType ) of (Just DefinitionType, Just ResultSetType) -> do outV `insertInResultSet` inV resultSetToDef inV outV -- record inverse map as well (Just ReferenceType, Just ResultSetType) -> outV `insertInResultSet` inV _ -> return () -- Range facts. These are range spans, 0-indexed, and may have optional -- tag/labels indicating what kind of span they are. They may have no text -- or no tag attached, generally. -- -- Later event:next facts will associate ranges with defs/decl/ref/ other facts -- factToAngle (KeyFact n (SymbolRange range mtag)) = do -- record the type if we have the tag handy case mtag of Just Definition{} -> insertType n DefinitionType Just Declaration{} -> insertType n DeclarationType Just Reference{} -> insertType n ReferenceType _ -> return () -- emit a range fact for this id predicateId "lsif.Range.1" n $ [ "range" .= toRange range ] ++ mFullRange ++ mText where mFullRange = case mtag of Just Definition{..} -> ["fullRange" .= toRange fullRange] Just Declaration{..} -> ["fullRange" .= toRange fullRange] _ -> [] mText = case mtag of Just Definition{..} -> ["text" .= string tagText] Just Declaration{..} -> ["text" .= string tagText] Just Reference{..} -> ["text" .= string tagText] _ -> [] -- These declare some types factToAngle (KeyFact n DefinitionResult) = [] <$ insertType n DefinitionType factToAngle (KeyFact n DeclarationResult) = [] <$ insertType n DeclarationType factToAngle (KeyFact n ReferenceResult) = [] <$ insertType n ReferenceType -- if we see a text/Document/Definition edge, track the resultmap it points to -- we need this later to look up files -> to their definition resultmaps factToAngle (KeyFact _ (Edge EdgeTextDocumentDefinition outV inV)) = do insertDefinitionResult inV outV pure [] -- Item edges link ranges to semantic elements like definitions and references -- They are tied to documents. factToAngle (KeyFact _ (Item outV inVs fileId prop)) = do ty <- getTypeOf outV case ty of Just DefinitionType -> do -- we can now map fileIds to their definition result sets mResultSet <- getResultSetOfDefinitionResult outV case mResultSet of -- record the definition resultSet points to its fileId Just resultSetId -> insertResultSetFile resultSetId fileId _ -> return () -- and emit the file definition fact predicate "lsif.FileDefinitions.1" [ "file" .= fileId , "range" .= inVs -- vector ] Just DeclarationType -> predicate "lsif.FileDeclarations.1" [ "file" .= fileId , "range" .= inVs -- vector ] -- this is an item set of references in this file -- we should be able to emit the target def if it has already been -- seen, for each ref id Just ReferenceType | prop == Just References -> do env <- get let xrefFacts :: [[Pair]] xrefFacts = [ [ "file" .= fileId , "range" .= inV , "target" .= object [ "file" .= targetFileId , "range" .= targetDefId ] ] | inV <- V.toList inVs , Just resultSetId <- pure (lookupIn env rangeToResultSetMap inV) , Just targetFileId <- pure (lookupIn env resultSetToFileMap resultSetId) , Just targetDefId <- pure (lookupIn env resultSetToDefMap resultSetId) ] if null xrefFacts then pure [] else pure [ object [ "predicate" .= text "lsif.FileReferences.1", "facts" .= [ object (map key xrefFacts) ] ] ] _ -> pure [] -- warning? a definition for something we haven't seen declared factToAngle _ = pure [] -- -- JSON-generating utilities -- predicate :: Applicative f => Text -> [Pair] -> f [Value] predicate name facts = pure $ pure $ object [ "predicate" .= text name, "facts" .= [ object [ key facts ] ] ] predicateId :: Applicative f => Text -> Id -> [Pair] -> f [Value] predicateId name id_ facts = pure $ pure $ object [ "predicate" .= text name, "facts" .= [ object [ factId id_, key facts ] ] ] toRange :: Range -> Value toRange Range{..} = object [ -- lsif.Range data type "lineBegin" .= line start, "columnBegin" .= character start, "lineEnd" .= line end, "columnEnd" .= character end ] key :: KeyValue kv => [Pair] -> kv key xs = "key" .= object xs factId :: KeyValue kv => Id -> kv factId (Id id_) = "id" .= id_ string :: Text -> Value string s = object [ "key" .= s ] text :: Text -> Value text = String