func()

in internal/gitaly/storage/storagemgr/partition/transaction_manager_housekeeping.go [186:370]


func (mgr *TransactionManager) prepareRepacking(ctx context.Context, transaction *Transaction) error {
	defer trace.StartRegion(ctx, "prepareRepacking").End()

	if transaction.runHousekeeping.repack == nil {
		return nil
	}

	span, ctx := tracing.StartSpanIfHasParent(ctx, "transaction.prepareRepacking", nil)
	defer span.Finish()

	finishTimer := mgr.metrics.housekeeping.ReportTaskLatency("repack", "prepare")
	defer finishTimer()

	var err error
	repack := transaction.runHousekeeping.repack

	// Build a working repository pointing to snapshot repository. Housekeeping task can access the repository
	// without the needs for quarantine.
	workingRepository := mgr.repositoryFactory.Build(transaction.snapshot.RelativePath(transaction.relativePath))
	repoPath := mgr.getAbsolutePath(workingRepository.GetRelativePath())

	isFullRepack, err := housekeeping.ValidateRepacking(repack.config)
	if err != nil {
		return fmt.Errorf("validating repacking: %w", err)
	}

	if repack.config.Strategy == housekeepingcfg.RepackObjectsStrategyIncrementalWithUnreachable {
		// Once the transaction manager has been applied and at least one complete repack has occurred, there
		// should be no loose unreachable objects remaining in the repository. When the transaction manager
		// processes a change, it consolidates all unreachable objects and objects about to become reachable
		// into a new packfile, which is then placed in the repository. As a result, unreachable objects may
		// still exist but are confined to packfiles. These will eventually be cleaned up during a full repack.
		// In the interim, geometric repacking is utilized to optimize the structure of packfiles for faster
		// access. Therefore, this operation is effectively a no-op. However, we maintain it for the sake of
		// backward compatibility with the existing housekeeping scheduler.
		return errRepackNotSupportedStrategy
	}

	// Capture the list of packfiles and their baggages before repacking.
	beforeFiles, err := mgr.collectPackFiles(ctx, repoPath)
	if err != nil {
		return fmt.Errorf("collecting existing packfiles: %w", err)
	}

	// midx file is different from other pack files because the file name stays
	// same after repacking but it's content changes. We need to save the stat
	// information of the midx file to compare modification times after repacking.
	midxFileName := "multi-pack-index"
	midxPath := filepath.Join(repoPath, "objects", "pack", midxFileName)
	oldMidxInode, err := wal.GetInode(midxPath)
	if err != nil {
		return fmt.Errorf("get midx inode before repacking: %w", err)
	}

	// All of the repacking operations pack/remove all loose objects. New ones are not written anymore with transactions.
	// As we're packing them away not, log their removal.
	objectsDirRelativePath := filepath.Join(transaction.relativePath, "objects")
	objectsDirEntries, err := os.ReadDir(filepath.Join(transaction.snapshot.Root(), objectsDirRelativePath))
	if err != nil {
		return fmt.Errorf("read objects dir: %w", err)
	}

	for _, entry := range objectsDirEntries {
		if entry.IsDir() && regexpLooseObjectDir.MatchString(entry.Name()) {
			if err := storage.RecordDirectoryRemoval(transaction.FS(), transaction.FS().Root(), filepath.Join(objectsDirRelativePath, entry.Name())); err != nil {
				return fmt.Errorf("record loose object dir removal: %w", err)
			}
		}
	}

	switch repack.config.Strategy {
	case housekeepingcfg.RepackObjectsStrategyGeometric:
		// Geometric repacking rearranges the list of packfiles according to a geometric progression. This process
		// does not consider object reachability. Since all unreachable objects remain within small packfiles,
		// they become included in the newly created packfiles. Geometric repacking does not prune any objects.
		if err := housekeeping.PerformGeometricRepacking(ctx, workingRepository, repack.config); err != nil {
			return fmt.Errorf("perform geometric repacking: %w", err)
		}
	case housekeepingcfg.RepackObjectsStrategyFullWithUnreachable:
		// Git does not pack loose unreachable objects if there are no existing packs in the repository.
		// Perform an incremental repack first. This ensures all loose object are part of a pack and will be
		// included in the full pack we're about to build. This allows us to remove the loose objects from the
		// repository when applying the pack without losing any objects.
		//
		// Issue: https://gitlab.com/gitlab-org/git/-/issues/336
		if err := housekeeping.PerformIncrementalRepackingWithUnreachable(ctx, workingRepository); err != nil {
			return fmt.Errorf("perform geometric repacking: %w", err)
		}

		// This strategy merges all packfiles into a single packfile, simultaneously removing any loose objects
		// if present. Unreachable objects are then appended to the end of this unified packfile. Although the
		// `git-repack(1)` command does not offer an option to specifically pack loose unreachable objects, this
		// is not an issue because the transaction manager already ensures that unreachable objects are
		// contained within packfiles. Therefore, this strategy effectively consolidates all packfiles into a
		// single one. Adopting this strategy is crucial for alternates, as it ensures that we can manage
		// objects within an object pool without the capability to prune them.
		if err := housekeeping.PerformFullRepackingWithUnreachable(ctx, workingRepository, repack.config); err != nil {
			return err
		}
	case housekeepingcfg.RepackObjectsStrategyFullWithCruft:
		// Both of above strategies don't prune unreachable objects. They re-organize the objects between
		// packfiles. In the traditional housekeeping, the manager gets rid of unreachable objects via full
		// repacking with cruft. It pushes all unreachable objects to a cruft packfile and keeps track of each
		// object mtimes. All unreachable objects exceeding a grace period are cleaned up. The grace period is
		// to ensure the housekeeping doesn't delete a to-be-reachable object accidentally, for example when GC
		// runs while a concurrent push is being processed.
		// The transaction manager handles concurrent requests very differently from the original git way. Each
		// request runs on a snapshot repository and the results are collected in the form of packfiles. Those
		// packfiles contain resulting reachable and unreachable objects. As a result, we don't need to take
		// object expiry nor curft pack into account. This operation triggers a normal full repack without
		// cruft packing.
		// Afterward, packed unreachable objects are removed. During migration to transaction system, there
		// might be some loose unreachable objects. They will eventually be packed via either of the above tasks.
		if err := housekeeping.PerformRepack(ctx, workingRepository, repack.config,
			// Do a full repack. By using `-a` instead of `-A` we will immediately discard unreachable
			// objects instead of exploding them into loose objects.
			gitcmd.Flag{Name: "-a"},
			// Don't include objects part of alternate.
			gitcmd.Flag{Name: "-l"},
			// Delete loose objects made redundant by this repack and redundant packfiles.
			gitcmd.Flag{Name: "-d"},
		); err != nil {
			return err
		}
	}

	// Re-capture the list of packfiles and their baggages after repacking.
	afterFiles, err := mgr.collectPackFiles(ctx, repoPath)
	if err != nil {
		return fmt.Errorf("collecting new packfiles: %w", err)
	}

	newMidxInode, err := wal.GetInode(midxPath)
	if err != nil {
		return fmt.Errorf("get midx inode after repacking: %w", err)
	}

	for file := range beforeFiles {
		// We delete the files only if it's missing from the before set.
		if _, exist := afterFiles[file]; !exist || (file == midxFileName && newMidxInode != oldMidxInode) {
			transaction.walEntry.RemoveDirectoryEntry(filepath.Join(
				objectsDirRelativePath, "pack", file,
			))
		}
	}

	for file := range afterFiles {
		// Similarly, we don't need to link existing packfiles.
		if _, exist := beforeFiles[file]; !exist || (file == midxFileName && newMidxInode != oldMidxInode) {
			fileRelativePath := filepath.Join(objectsDirRelativePath, "pack", file)

			if err := transaction.walEntry.CreateFile(
				filepath.Join(transaction.snapshot.Root(), fileRelativePath),
				fileRelativePath,
			); err != nil {
				return fmt.Errorf("record pack file creations: %q: %w", file, err)
			}
		}
	}

	if isFullRepack {
		timestampRelativePath := filepath.Join(transaction.relativePath, stats.FullRepackTimestampFilename)
		timestampAbsolutePath := filepath.Join(transaction.snapshot.Root(), timestampRelativePath)

		info, err := os.Stat(timestampAbsolutePath)
		if err != nil && !errors.Is(err, fs.ErrNotExist) {
			return fmt.Errorf("stat repack timestamp file: %w", err)
		}

		if err := stats.UpdateFullRepackTimestamp(filepath.Join(transaction.snapshot.Root(), transaction.relativePath), time.Now()); err != nil {
			return fmt.Errorf("updating repack timestamp: %w", err)
		}

		if info != nil {
			// The file existed and needs to be removed first.
			transaction.walEntry.RemoveDirectoryEntry(timestampRelativePath)
		}

		if err := transaction.walEntry.CreateFile(timestampAbsolutePath, timestampRelativePath); err != nil {
			return fmt.Errorf("stage repacking timestamp: %w", err)
		}
	}

	return nil
}