func main()

in cmd/zoekt-repo-index/main.go [122:287]


func main() {
	sizeMax := flag.Int("file_limit", 128<<10, "maximum file size")
	shardLimit := flag.Int("shard_limit", 100<<20, "maximum corpus size for a shard")
	parallelism := flag.Int("parallelism", 1, "maximum number of parallel indexing processes")

	revPrefix := flag.String("rev_prefix", "refs/remotes/origin/", "prefix for references")
	baseURLStr := flag.String("base_url", "", "base url to interpret repository names")
	repoCacheDir := flag.String("repo_cache", "", "root for repository cache")
	indexDir := flag.String("index", build.DefaultDir, "index directory for *.zoekt files")
	manifestRepoURL := flag.String("manifest_repo_url", "", "set a URL for a git repository holding manifest XML file. Provide the BRANCH:XML-FILE as further command-line arguments")
	manifestRevPrefix := flag.String("manifest_rev_prefix", "refs/remotes/origin/", "prefixes for branches in manifest repository")
	repoName := flag.String("name", "", "set repository name")
	repoURL := flag.String("url", "", "set repository URL")
	maxSubProjects := flag.Int("max_sub_projects", 0, "trim number of projects in manifest, for debugging.")
	incremental := flag.Bool("incremental", true, "only index if the repository has changed.")
	flag.Parse()

	// Tune GOMAXPROCS to match Linux container CPU quota.
	maxprocs.Set()

	if *repoCacheDir == "" {
		log.Fatal("must set --repo_cache")
	}
	repoCache := gitindex.NewRepoCache(*repoCacheDir)

	if u, err := url.Parse(*baseURLStr); err != nil {
		log.Fatalf("Parse(%q): %v", u, err)
	} else if *repoName == "" {
		*repoName = filepath.Join(u.Host, u.Path)
	}

	opts := build.Options{
		Parallelism: *parallelism,
		SizeMax:     *sizeMax,
		ShardMax:    *shardLimit,
		IndexDir:    *indexDir,
		RepositoryDescription: zoekt.Repository{
			Name: *repoName,
			URL:  *repoURL,
		},
	}
	opts.SetDefaults()
	baseURL, err := url.Parse(*baseURLStr)
	if err != nil {
		log.Fatalf("Parse baseURL %q: %v", *baseURLStr, err)
	}

	branches, err := parseBranches(*manifestRepoURL, *manifestRevPrefix, repoCache, flag.Args())
	if err != nil {
		log.Fatalf("parseBranches(%s, %s): %v", *manifestRepoURL, *manifestRevPrefix, err)
	}
	if len(branches) == 0 {
		log.Fatal("must specify at least one branch")
	}
	if *maxSubProjects > 0 {
		for _, b := range branches {
			if *maxSubProjects < len(b.mf.Project) {
				b.mf.Project = b.mf.Project[:*maxSubProjects]
			}
		}
	}

	perBranch := map[string]map[fileKey]gitindex.BlobLocation{}
	opts.SubRepositories = map[string]*zoekt.Repository{}

	// branch => repo => version
	versionMap := map[string]map[string]plumbing.Hash{}
	for _, br := range branches {
		br.mf.Filter()
		files, versions, err := iterateManifest(br.mf, *baseURL, *revPrefix, repoCache)
		if err != nil {
			log.Fatalf("iterateManifest: %v", err)
		}

		perBranch[br.branch] = files
		for key, loc := range files {
			_, ok := opts.SubRepositories[key.SubRepoPath]
			if ok {
				// This can be incorrect: if the layout of manifests
				// changes across branches, then the same file could
				// be in different subRepos. We'll pretend this is not
				// a problem.
				continue
			}

			desc := &zoekt.Repository{}
			if err := gitindex.SetTemplatesFromOrigin(desc, loc.URL); err != nil {
				log.Fatalf("SetTemplatesFromOrigin(%s): %v", loc.URL, err)
			}

			opts.SubRepositories[key.SubRepoPath] = desc
		}
		versionMap[br.branch] = versions
	}

	for _, br := range branches {
		var paths []string
		for p := range opts.SubRepositories {
			paths = append(paths, p)
		}
		sort.Strings(paths)

		// Compute a version of the aggregate. This version
		// has nothing to do with git, but will let us do
		// incrementality correctly.
		hasher := sha1.New()
		for _, p := range paths {
			repo := opts.SubRepositories[p]
			id := versionMap[br.branch][p]

			// it is possible that 'id' is zero, if this
			// branch of the manifest doesn't have this
			// particular subrepository.
			hasher.Write([]byte(p))
			hasher.Write([]byte(id.String()))
			repo.Branches = append(repo.Branches, zoekt.RepositoryBranch{
				Name:    br.branch,
				Version: id.String(),
			})
		}

		opts.RepositoryDescription.Branches = append(opts.RepositoryDescription.Branches, zoekt.RepositoryBranch{
			Name:    br.branch,
			Version: fmt.Sprintf("%x", hasher.Sum(nil)),
		})
	}

	// key => branch
	all := map[fileKey][]string{}
	for br, files := range perBranch {
		for k := range files {
			all[k] = append(all[k], br)
		}
	}

	if *incremental && opts.IncrementalSkipIndexing() {
		return
	}

	builder, err := build.NewBuilder(opts)
	if err != nil {
		log.Fatal(err)
	}
	for k, branches := range all {
		loc := perBranch[branches[0]][k]
		data, err := loc.Blob(&k.ID)
		if err != nil {
			log.Fatal(err)
		}

		doc := zoekt.Document{
			Name:              k.FullPath(),
			Content:           data,
			SubRepositoryPath: k.SubRepoPath,
		}

		doc.Branches = append(doc.Branches, branches...)
		if err := builder.Add(doc); err != nil {
			log.Printf("Add(%s): %v", doc.Name, err)
			break
		}
	}
	if err := builder.Finish(); err != nil {
		log.Fatalf("Finish: %v", err)
	}
}