internal/zoekt/zoekt.go (111 lines of code) (raw):
// Package zoekt provides a client for interacting with the Zoekt code search indexing system.
// It offers functionality to create and manage search indexes, handle repository metadata,
// and process files for indexing with support for incremental updates.
//
// The package wraps the underlying Zoekt library, providing simplified interfaces for
// repository branch management, file addition with size limits, and index building operations.
// It maintains state about the current repository version and supports delta indexing
// to optimize performance when only portions of a repository have changed.
package zoekt
import (
"fmt"
"github.com/sourcegraph/zoekt"
"github.com/sourcegraph/zoekt/index"
)
type RepositoryBranch struct {
Name string
Version string
}
type Options struct {
IndexDir string
ID uint32
IsDelta bool
RepoSource string
SizeMax int
Branches []RepositoryBranch
Parallelism int
}
type Client struct {
opts *Options
builderOptions *index.Options
}
func (rb *RepositoryBranch) castToZoekt() zoekt.RepositoryBranch {
return zoekt.RepositoryBranch{
Name: rb.Name,
Version: rb.Version,
}
}
func NewZoektClient(opts *Options) *Client {
return &Client{
opts: opts,
builderOptions: defaultBuilderOptions(opts),
}
}
func defaultBuilderOptions(opts *Options) *index.Options {
branches := make([]zoekt.RepositoryBranch, 0, len(opts.Branches))
for _, b := range opts.Branches {
branches = append(branches, b.castToZoekt())
}
buildOpts := index.Options{
IndexDir: opts.IndexDir,
SizeMax: opts.SizeMax,
IsDelta: opts.IsDelta,
Parallelism: opts.Parallelism,
RepositoryDescription: zoekt.Repository{
ID: opts.ID,
Name: fmt.Sprint(opts.ID),
Source: opts.RepoSource,
Branches: branches,
}}
buildOpts.SetDefaults()
return &buildOpts
}
func (c *Client) AddFile(builder *index.Builder, path string, content []byte, size int64, tooLarge bool, branches []string) error {
if tooLarge && !c.builderOptions.IgnoreSizeMax(path) {
if err := builder.Add(index.Document{
SkipReason: index.SkipReasonTooLarge,
Name: path,
Branches: branches,
}); err != nil {
return err
}
return nil
}
if err := builder.Add(index.Document{
Name: path,
Content: content,
Branches: branches,
}); err != nil {
return fmt.Errorf("error adding document with name %s: %w", path, err)
}
return nil
}
func (c *Client) IncrementalSkipIndexing() bool {
opts := c.builderOptions
return opts.IncrementalSkipIndexing()
}
func (c *Client) NewBuilder() (*index.Builder, error) {
builder, err := index.NewBuilder(*c.builderOptions)
if err != nil {
return nil, fmt.Errorf("build.NewBuilder: %w", err)
}
return builder, nil
}
func (c *Client) GetCurrentSHA() (string, bool, error) {
existingRepository, ok, err := c.findRepositoryMetadata()
if err != nil {
return "", ok, err
}
if !ok {
return "", ok, nil
}
for _, branch := range existingRepository.Branches {
if branch.Name == "HEAD" {
return branch.Version, true, nil
}
}
return "", true, nil
}
func (c *Client) findRepositoryMetadata() (*zoekt.Repository, bool, error) {
opts := c.builderOptions
existingRepository, _, ok, err := opts.FindRepositoryMetadata()
if err != nil {
return nil, ok, fmt.Errorf("failed to get repository metadata: %w", err)
}
// todo - is this explicit check needed or can the repository be returned
if !ok {
return nil, ok, nil
}
return existingRepository, ok, nil
}