internal/zoekt/zoekt.go (111 lines of code) (raw):

// Package zoekt provides a client for interacting with the Zoekt code search indexing system. // It offers functionality to create and manage search indexes, handle repository metadata, // and process files for indexing with support for incremental updates. // // The package wraps the underlying Zoekt library, providing simplified interfaces for // repository branch management, file addition with size limits, and index building operations. // It maintains state about the current repository version and supports delta indexing // to optimize performance when only portions of a repository have changed. package zoekt import ( "fmt" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/index" ) type RepositoryBranch struct { Name string Version string } type Options struct { IndexDir string ID uint32 IsDelta bool RepoSource string SizeMax int Branches []RepositoryBranch Parallelism int } type Client struct { opts *Options builderOptions *index.Options } func (rb *RepositoryBranch) castToZoekt() zoekt.RepositoryBranch { return zoekt.RepositoryBranch{ Name: rb.Name, Version: rb.Version, } } func NewZoektClient(opts *Options) *Client { return &Client{ opts: opts, builderOptions: defaultBuilderOptions(opts), } } func defaultBuilderOptions(opts *Options) *index.Options { branches := make([]zoekt.RepositoryBranch, 0, len(opts.Branches)) for _, b := range opts.Branches { branches = append(branches, b.castToZoekt()) } buildOpts := index.Options{ IndexDir: opts.IndexDir, SizeMax: opts.SizeMax, IsDelta: opts.IsDelta, Parallelism: opts.Parallelism, RepositoryDescription: zoekt.Repository{ ID: opts.ID, Name: fmt.Sprint(opts.ID), Source: opts.RepoSource, Branches: branches, }} buildOpts.SetDefaults() return &buildOpts } func (c *Client) AddFile(builder *index.Builder, path string, content []byte, size int64, tooLarge bool, branches []string) error { if tooLarge && !c.builderOptions.IgnoreSizeMax(path) { if err := builder.Add(index.Document{ SkipReason: index.SkipReasonTooLarge, Name: path, Branches: branches, }); err != nil { return err } return nil } if err := builder.Add(index.Document{ Name: path, Content: content, Branches: branches, }); err != nil { return fmt.Errorf("error adding document with name %s: %w", path, err) } return nil } func (c *Client) IncrementalSkipIndexing() bool { opts := c.builderOptions return opts.IncrementalSkipIndexing() } func (c *Client) NewBuilder() (*index.Builder, error) { builder, err := index.NewBuilder(*c.builderOptions) if err != nil { return nil, fmt.Errorf("build.NewBuilder: %w", err) } return builder, nil } func (c *Client) GetCurrentSHA() (string, bool, error) { existingRepository, ok, err := c.findRepositoryMetadata() if err != nil { return "", ok, err } if !ok { return "", ok, nil } for _, branch := range existingRepository.Branches { if branch.Name == "HEAD" { return branch.Version, true, nil } } return "", true, nil } func (c *Client) findRepositoryMetadata() (*zoekt.Repository, bool, error) { opts := c.builderOptions existingRepository, _, ok, err := opts.FindRepositoryMetadata() if err != nil { return nil, ok, fmt.Errorf("failed to get repository metadata: %w", err) } // todo - is this explicit check needed or can the repository be returned if !ok { return nil, ok, nil } return existingRepository, ok, nil }