in cmd/zoekt-repo-index/main.go [122:287]
func main() {
sizeMax := flag.Int("file_limit", 128<<10, "maximum file size")
shardLimit := flag.Int("shard_limit", 100<<20, "maximum corpus size for a shard")
parallelism := flag.Int("parallelism", 1, "maximum number of parallel indexing processes")
revPrefix := flag.String("rev_prefix", "refs/remotes/origin/", "prefix for references")
baseURLStr := flag.String("base_url", "", "base url to interpret repository names")
repoCacheDir := flag.String("repo_cache", "", "root for repository cache")
indexDir := flag.String("index", build.DefaultDir, "index directory for *.zoekt files")
manifestRepoURL := flag.String("manifest_repo_url", "", "set a URL for a git repository holding manifest XML file. Provide the BRANCH:XML-FILE as further command-line arguments")
manifestRevPrefix := flag.String("manifest_rev_prefix", "refs/remotes/origin/", "prefixes for branches in manifest repository")
repoName := flag.String("name", "", "set repository name")
repoURL := flag.String("url", "", "set repository URL")
maxSubProjects := flag.Int("max_sub_projects", 0, "trim number of projects in manifest, for debugging.")
incremental := flag.Bool("incremental", true, "only index if the repository has changed.")
flag.Parse()
// Tune GOMAXPROCS to match Linux container CPU quota.
maxprocs.Set()
if *repoCacheDir == "" {
log.Fatal("must set --repo_cache")
}
repoCache := gitindex.NewRepoCache(*repoCacheDir)
if u, err := url.Parse(*baseURLStr); err != nil {
log.Fatalf("Parse(%q): %v", u, err)
} else if *repoName == "" {
*repoName = filepath.Join(u.Host, u.Path)
}
opts := build.Options{
Parallelism: *parallelism,
SizeMax: *sizeMax,
ShardMax: *shardLimit,
IndexDir: *indexDir,
RepositoryDescription: zoekt.Repository{
Name: *repoName,
URL: *repoURL,
},
}
opts.SetDefaults()
baseURL, err := url.Parse(*baseURLStr)
if err != nil {
log.Fatalf("Parse baseURL %q: %v", *baseURLStr, err)
}
branches, err := parseBranches(*manifestRepoURL, *manifestRevPrefix, repoCache, flag.Args())
if err != nil {
log.Fatalf("parseBranches(%s, %s): %v", *manifestRepoURL, *manifestRevPrefix, err)
}
if len(branches) == 0 {
log.Fatal("must specify at least one branch")
}
if *maxSubProjects > 0 {
for _, b := range branches {
if *maxSubProjects < len(b.mf.Project) {
b.mf.Project = b.mf.Project[:*maxSubProjects]
}
}
}
perBranch := map[string]map[fileKey]gitindex.BlobLocation{}
opts.SubRepositories = map[string]*zoekt.Repository{}
// branch => repo => version
versionMap := map[string]map[string]plumbing.Hash{}
for _, br := range branches {
br.mf.Filter()
files, versions, err := iterateManifest(br.mf, *baseURL, *revPrefix, repoCache)
if err != nil {
log.Fatalf("iterateManifest: %v", err)
}
perBranch[br.branch] = files
for key, loc := range files {
_, ok := opts.SubRepositories[key.SubRepoPath]
if ok {
// This can be incorrect: if the layout of manifests
// changes across branches, then the same file could
// be in different subRepos. We'll pretend this is not
// a problem.
continue
}
desc := &zoekt.Repository{}
if err := gitindex.SetTemplatesFromOrigin(desc, loc.URL); err != nil {
log.Fatalf("SetTemplatesFromOrigin(%s): %v", loc.URL, err)
}
opts.SubRepositories[key.SubRepoPath] = desc
}
versionMap[br.branch] = versions
}
for _, br := range branches {
var paths []string
for p := range opts.SubRepositories {
paths = append(paths, p)
}
sort.Strings(paths)
// Compute a version of the aggregate. This version
// has nothing to do with git, but will let us do
// incrementality correctly.
hasher := sha1.New()
for _, p := range paths {
repo := opts.SubRepositories[p]
id := versionMap[br.branch][p]
// it is possible that 'id' is zero, if this
// branch of the manifest doesn't have this
// particular subrepository.
hasher.Write([]byte(p))
hasher.Write([]byte(id.String()))
repo.Branches = append(repo.Branches, zoekt.RepositoryBranch{
Name: br.branch,
Version: id.String(),
})
}
opts.RepositoryDescription.Branches = append(opts.RepositoryDescription.Branches, zoekt.RepositoryBranch{
Name: br.branch,
Version: fmt.Sprintf("%x", hasher.Sum(nil)),
})
}
// key => branch
all := map[fileKey][]string{}
for br, files := range perBranch {
for k := range files {
all[k] = append(all[k], br)
}
}
if *incremental && opts.IncrementalSkipIndexing() {
return
}
builder, err := build.NewBuilder(opts)
if err != nil {
log.Fatal(err)
}
for k, branches := range all {
loc := perBranch[branches[0]][k]
data, err := loc.Blob(&k.ID)
if err != nil {
log.Fatal(err)
}
doc := zoekt.Document{
Name: k.FullPath(),
Content: data,
SubRepositoryPath: k.SubRepoPath,
}
doc.Branches = append(doc.Branches, branches...)
if err := builder.Add(doc); err != nil {
log.Printf("Add(%s): %v", doc.Name, err)
break
}
}
if err := builder.Finish(); err != nil {
log.Fatalf("Finish: %v", err)
}
}