src/scip-lib/partialloader/index.go (359 lines of code) (raw):
package partialloader
import (
"errors"
"math"
"os"
"path/filepath"
"sort"
"sync"
"sync/atomic"
"github.com/sourcegraph/scip/bindings/go/scip"
"github.com/uber/scip-lsp/src/scip-lib/mapper"
"github.com/uber/scip-lsp/src/scip-lib/model"
scanner "github.com/uber/scip-lsp/src/scip-lib/scanner"
)
// PartialIndex is a partial index of a SCIP index
type PartialIndex interface {
SetDocumentLoadedCallback(func(*model.Document))
LoadIndex(indexPath string, indexReader scanner.ScipReader) error
LoadIndexFile(file string) error
LoadDocument(relativeDocPath string) (*model.Document, error)
GetSymbolInformation(symbol string) (*model.SymbolInformation, string, error)
GetSymbolInformationFromDescriptors(descriptors []model.Descriptor, version string) (*model.SymbolInformation, string, error)
References(symbol string) (map[string][]*model.Occurrence, error)
Implementations(symbol string) ([]string, error)
Tidy() error
}
type docNodes struct {
nodes []*SymbolPrefixTreeNode
revision int64
}
// PartialLoadedIndex is a partial index of a SCIP index
type PartialLoadedIndex struct {
// PrefixTreeRoot is the root of the symbol prefix tree
PrefixTreeRoot *SymbolPrefixTree
prefixTreeMu sync.RWMutex
// LoadedDocuments maps a document path to the SCIP Document
LoadedDocuments map[string]*model.Document
loadedDocsMu sync.RWMutex
// DocTreeNodes maps a document path to each of the symbol prefix tree nodes that the doc refers to
DocTreeNodes map[string]*docNodes
docTreeNodesMu sync.RWMutex
// Current revision number for tracking updates
revision atomic.Int64
// Documents that were updated in the current revision, mapped to their latest revision number
updatedDocs map[string]int64
updatedDocsMu sync.RWMutex
// Document to index map
docToIndex map[string]string
docToIndexMu sync.RWMutex
// Mutex to protect index modifications
modificationMu sync.Mutex
indexFolder string
pool *scanner.BufferPool
onDocumentLoaded func(*model.Document)
// ImplementorsBySymbol maps abstract/interface symbol -> set of implementing symbols
implementorsMu sync.RWMutex
ImplementorsBySymbol map[string]map[string]struct{}
}
// NewPartialLoadedIndex creates a new PartialLoadedIndex
func NewPartialLoadedIndex(indexFolder string) PartialIndex {
return &PartialLoadedIndex{
PrefixTreeRoot: NewSymbolPrefixTree(),
DocTreeNodes: make(map[string]*docNodes),
LoadedDocuments: make(map[string]*model.Document),
updatedDocs: make(map[string]int64),
docToIndex: make(map[string]string, 0),
indexFolder: indexFolder,
pool: scanner.NewBufferPool(1024, 12),
onDocumentLoaded: func(*model.Document) {},
ImplementorsBySymbol: make(map[string]map[string]struct{}),
}
}
// SetDocumentLoadedCallback sets the callback for when a document is loaded
func (p *PartialLoadedIndex) SetDocumentLoadedCallback(callback func(*model.Document)) {
p.onDocumentLoaded = callback
}
// LoadIndexFile loads a SCIP index file into the PartialLoadedIndex
func (p *PartialLoadedIndex) LoadIndexFile(file string) error {
indexReader, err := os.Open(file)
if err != nil {
return err
}
defer indexReader.Close()
return p.LoadIndex(file, indexReader)
}
// References returns the occurrences of a symbol in the index
func (p *PartialLoadedIndex) References(symbol string) (map[string][]*model.Occurrence, error) {
if scip.IsLocalSymbol(symbol) {
return nil, nil
}
occMutex := sync.Mutex{}
occurrences := make(map[string][]*model.Occurrence)
docScanner := &scanner.IndexScannerImpl{
Pool: p.pool,
MatchOccurrence: func(occSymbol []byte) bool {
return string(occSymbol) == symbol
},
VisitOccurrence: func(docPath string, occ *scip.Occurrence) {
occMutex.Lock()
defer occMutex.Unlock()
modelOcc := mapper.ScipOccurrenceToModelOccurrence(occ)
if occurrences[docPath] == nil {
occurrences[docPath] = make([]*model.Occurrence, 0)
}
occurrences[docPath] = append(occurrences[docPath], modelOcc)
},
}
docScanner.InitBuffers()
err := docScanner.ScanIndexFolder(p.indexFolder, true)
if err != nil {
return nil, err
}
return occurrences, nil
}
// LoadIndex loads a SCIP index into the PartialLoadedIndex
func (p *PartialLoadedIndex) LoadIndex(indexPath string, indexReader scanner.ScipReader) error {
localPrefixTree := &SymbolPrefixTreeNode{
Children: make(map[model.Descriptor]*SymbolPrefixTreeNode),
}
localDocTreeNodes := make(map[string]*docNodes)
localUpdatedDocs := make(map[string]int64)
localDocToIndex := make(map[string]string)
localImplementorsBySymbol := make(map[string]map[string]struct{})
loadScanner := &scanner.IndexScannerImpl{
Pool: p.pool,
MatchSymbol: func(symbol []byte) bool {
return !scip.IsLocalSymbol(string(symbol))
},
MatchDocumentPath: func(indexDocPath string) bool {
docPath := filepath.Clean(indexDocPath)
localDocToIndex[docPath] = indexPath
if localDocTreeNodes[docPath] == nil {
localDocTreeNodes[docPath] = &docNodes{
nodes: make([]*SymbolPrefixTreeNode, 0),
revision: p.revision.Add(1),
}
}
localUpdatedDocs[docPath] = localDocTreeNodes[docPath].revision
p.loadedDocsMu.RLock()
_, docLoaded := p.LoadedDocuments[docPath]
p.loadedDocsMu.RUnlock()
return docLoaded // We should only load the document if it had already been loaded
},
VisitSymbol: func(indexDocPath string, info *scip.SymbolInformation) {
docPath := filepath.Clean(indexDocPath)
modelInfo := mapper.ScipSymbolInformationToModelSymbolInformation(info)
leafNode, isNew := localPrefixTree.AddSymbol(docPath, modelInfo, p.revision.Load())
// Populate reverse implementors for quick lookup (impl -> abs)
for _, rel := range modelInfo.Relationships {
if rel != nil && rel.IsImplementation {
abs := rel.Symbol
if localImplementorsBySymbol[abs] == nil {
localImplementorsBySymbol[abs] = make(map[string]struct{})
}
localImplementorsBySymbol[abs][modelInfo.Symbol] = struct{}{}
}
}
if isNew {
localDocTreeNodes[docPath].nodes = append(localDocTreeNodes[docPath].nodes, leafNode)
}
},
VisitDocument: func(doc *scip.Document) {
docPath := filepath.Clean(doc.RelativePath)
modelDoc := mapper.ScipDocumentToModelDocument(doc)
p.loadedDocsMu.Lock()
p.LoadedDocuments[docPath] = modelDoc
p.loadedDocsMu.Unlock()
p.onDocumentLoaded(modelDoc)
},
}
loadScanner.InitBuffers()
defer func() {
p.modificationMu.Lock()
defer p.modificationMu.Unlock()
p.mergePrefixTree(localPrefixTree)
p.mergeDocTreeNodes(localDocTreeNodes)
p.mergeUpdatedDocs(localUpdatedDocs)
p.mergeDocToIndex(localDocToIndex)
p.mergeImplementors(localImplementorsBySymbol)
}()
return loadScanner.ScanIndexReader(indexReader)
}
// GetSymbolInformationFromDescriptors accepts a slice of descriptors and returns the matching symbol information
// If version is specified, it will return the symbol information for the specified version
// If version is not specified, it will return the symbol information for the first version it finds
func (p *PartialLoadedIndex) GetSymbolInformationFromDescriptors(descriptors []model.Descriptor, version string) (*model.SymbolInformation, string, error) {
p.prefixTreeMu.RLock()
defer p.prefixTreeMu.RUnlock()
if len(descriptors) == 0 {
return nil, "", errors.New("no descriptors provided")
}
node := &p.PrefixTreeRoot.SymbolPrefixTreeNode
for _, part := range descriptors {
node = node.Children[part]
if node == nil {
return nil, "", nil
}
}
if node.SymbolVersions == nil || len(node.SymbolVersions) == 0 || version == "" {
return nil, "", nil
}
// If version is specified, try to get that specific version
if versionInfo, exists := node.SymbolVersions[version]; exists {
return versionInfo.Info, versionInfo.DocumentPath, nil
}
// If version is not specified or not found, return the first sorted version we find
versions := make([]string, 0, len(node.SymbolVersions))
for v := range node.SymbolVersions {
versions = append(versions, v)
}
sort.Strings(versions)
firstVersionInfo := node.SymbolVersions[versions[0]]
return firstVersionInfo.Info, firstVersionInfo.DocumentPath, nil
}
// GetSymbolInformation returns the symbol information for a given symbol as well as the document path where it was found
func (p *PartialLoadedIndex) GetSymbolInformation(symbol string) (*model.SymbolInformation, string, error) {
if scip.IsLocalSymbol(symbol) {
return nil, "", nil
}
// Parse symbol and walk the tree to find the symbol information
sy, err := model.ParseScipSymbol(symbol)
if err != nil {
return nil, "", err
}
return p.GetSymbolInformationFromDescriptors(mapper.ScipDescriptorsToModelDescriptors(sy.Descriptors), sy.Package.Version)
}
// mergePrefixTree merges the local prefix tree into the main index
func (p *PartialLoadedIndex) mergePrefixTree(localPrefixTree *SymbolPrefixTreeNode) {
p.prefixTreeMu.Lock()
defer p.prefixTreeMu.Unlock()
p.PrefixTreeRoot.Merge(localPrefixTree)
}
// mergeDocTreeNodes merges the local doc tree nodes into the main index
func (p *PartialLoadedIndex) mergeDocTreeNodes(localDocTreeNodes map[string]*docNodes) {
p.docTreeNodesMu.Lock()
defer p.docTreeNodesMu.Unlock()
for docPath, nodes := range localDocTreeNodes {
if p.DocTreeNodes[docPath] == nil {
p.DocTreeNodes[docPath] = nodes
} else {
p.DocTreeNodes[docPath].nodes = append(p.DocTreeNodes[docPath].nodes, nodes.nodes...)
p.DocTreeNodes[docPath].revision = int64(math.Max(float64(p.DocTreeNodes[docPath].revision), float64(nodes.revision)))
}
}
}
// mergeUpdatedDocs merges the local updated docs into the main index
func (p *PartialLoadedIndex) mergeUpdatedDocs(localUpdatedDocs map[string]int64) {
p.updatedDocsMu.Lock()
defer p.updatedDocsMu.Unlock()
for docPath, revision := range localUpdatedDocs {
p.updatedDocs[docPath] = int64(math.Max(float64(p.updatedDocs[docPath]), float64(revision)))
}
}
// mergeDocToIndex merges the local docToIndex into the main index
func (p *PartialLoadedIndex) mergeDocToIndex(localDocToIndex map[string]string) {
p.docToIndexMu.Lock()
defer p.docToIndexMu.Unlock()
for docPath, indexPath := range localDocToIndex {
p.docToIndex[docPath] = indexPath
}
}
// mergeImplementors merges a local reverse implementors map into the main index
func (p *PartialLoadedIndex) mergeImplementors(local map[string]map[string]struct{}) {
if len(local) == 0 {
return
}
p.implementorsMu.Lock()
defer p.implementorsMu.Unlock()
for abs, impls := range local {
if p.ImplementorsBySymbol[abs] == nil {
p.ImplementorsBySymbol[abs] = make(map[string]struct{})
}
for impl := range impls {
p.ImplementorsBySymbol[abs][impl] = struct{}{}
}
}
}
// LoadDocument loads a document into the PartialLoadedIndex
func (p *PartialLoadedIndex) LoadDocument(relativeDocPath string) (*model.Document, error) {
p.loadedDocsMu.RLock()
if p.LoadedDocuments[relativeDocPath] != nil {
doc := p.LoadedDocuments[relativeDocPath]
p.loadedDocsMu.RUnlock()
return doc, nil
}
p.loadedDocsMu.RUnlock()
p.docToIndexMu.RLock()
index, ok := p.docToIndex[relativeDocPath]
p.docToIndexMu.RUnlock()
var (
doc *model.Document
err error
)
if ok {
doc, err = p.loadDocumentFromIndex(index, relativeDocPath)
} else {
doc, err = p.loadDocumentFromIndexFolder(relativeDocPath)
}
if doc != nil {
p.onDocumentLoaded(doc)
}
return doc, err
}
func (p *PartialLoadedIndex) loadDocumentFromIndex(indexPath, docPath string) (*model.Document, error) {
docScanner := &scanner.IndexScannerImpl{
Pool: p.pool,
MatchDocumentPath: func(indexRelativeDocPath string) bool {
indexRelativeDocPath = filepath.Clean(indexRelativeDocPath)
return indexRelativeDocPath == docPath
},
VisitDocument: func(doc *scip.Document) {
modelDoc := mapper.ScipDocumentToModelDocument(doc)
p.loadedDocsMu.Lock()
p.LoadedDocuments[docPath] = modelDoc
p.loadedDocsMu.Unlock()
},
}
docScanner.InitBuffers()
err := docScanner.ScanIndexFile(indexPath)
if err != nil {
return nil, err
}
p.loadedDocsMu.RLock()
doc := p.LoadedDocuments[docPath]
p.loadedDocsMu.RUnlock()
return doc, nil
}
// loadDocumentFromIndexFolder loads a document from the index folder into the PartialLoadedIndex
func (p *PartialLoadedIndex) loadDocumentFromIndexFolder(relativeDocPath string) (*model.Document, error) {
docScanner := &scanner.IndexScannerImpl{
Pool: p.pool,
MatchDocumentPath: func(s string) bool {
s = filepath.Clean(s)
return s == relativeDocPath
},
VisitDocument: func(doc *scip.Document) {
modelDoc := mapper.ScipDocumentToModelDocument(doc)
p.loadedDocsMu.Lock()
p.LoadedDocuments[relativeDocPath] = modelDoc
p.loadedDocsMu.Unlock()
},
}
docScanner.InitBuffers()
err := docScanner.ScanIndexFolder(p.indexFolder, true)
if err != nil {
return nil, err
}
p.loadedDocsMu.RLock()
doc := p.LoadedDocuments[relativeDocPath]
p.loadedDocsMu.RUnlock()
return doc, nil
}
// Implementations returns the list of implementing symbols for a given abstract/interface symbol
func (p *PartialLoadedIndex) Implementations(symbol string) ([]string, error) {
p.implementorsMu.RLock()
defer p.implementorsMu.RUnlock()
set := p.ImplementorsBySymbol[symbol]
if set == nil {
return []string{}, nil
}
res := make([]string, 0, len(set))
for s := range set {
res = append(res, s)
}
return res, nil
}
// Tidy prunes nodes for documents that were updated in the current revision
func (p *PartialLoadedIndex) Tidy() error {
// Acquire the modification mutex to prevent new index loads during cleanup
p.modificationMu.Lock()
defer p.modificationMu.Unlock()
p.updatedDocsMu.RLock()
for docPath, nodes := range p.DocTreeNodes {
p.updatedDocsMu.RUnlock()
p.docTreeNodesMu.RLock()
if nodes != nil {
// Get the common parent of all nodes for this document
// and prune from there to avoid affecting other documents
parent := nodes.nodes[0].Parent
if parent != nil {
p.prefixTreeMu.Lock()
parent.PruneNodes(docPath, nodes.revision)
p.prefixTreeMu.Unlock()
}
}
p.docTreeNodesMu.RUnlock()
p.updatedDocsMu.RLock()
}
p.updatedDocsMu.RUnlock()
// Clear the updated docs map after cleanup
p.updatedDocsMu.Lock()
p.updatedDocs = make(map[string]int64)
p.updatedDocsMu.Unlock()
return nil
}