in src/shipit/phase/ShipItCreateNewRepoPhase.php [177:363]
private static async function genCreateNewGitRepoImpl(
string $output_dir,
ShipItManifest $manifest,
(function(ShipItChangeset): Awaitable<ShipItChangeset>) $gen_filter,
shape('name' => string, 'email' => string) $committer,
bool $do_submodules,
?string $revision = null,
): Awaitable<void> {
$logger = new ShipItVerboseLogger($manifest->isVerboseEnabled());
$source = await ShipItRepo::genTypedOpen<ShipItSourceRepo>(
$manifest->getSourceSharedLock(),
$manifest->getSourcePath(),
$manifest->getSourceBranch(),
);
$logger->out(" Exporting...");
$export = await $source->genExport(
$manifest->getSourceRoots(),
$do_submodules,
$revision,
);
$export_dir = $export['tempDir'];
$rev = $export['revision'];
$logger->out(" Creating unfiltered commit...");
await self::genInitGitRepo($export_dir->getPath(), $committer);
// The following code is necessarily convoluted. In order to support
// creating/verifying repos that are greater than 2 GB we need to break the
// unfiltered initial commit into a series of chunks that are small enough
// to be processed by ShipIt (max Hack string size is 2GB). After ShipIt
// has processed each chunked commit we use git commands to directly squash
// everything, dodging the Hack string size limit.
//
// `git ls-files` is used to get a list of all files, which is then split
// into chunks
//
// For each chunk, `git add` the files and then `git commit`
//
// To filter, find the initial commit SHA with `git rev-parse` and then
// read all commits into ShipItChangesets, apply filtering, and commit.
//
// After everything, squash to a single commit (with ShipIt tracking info).
$all_filenames_chunked = (
await (
new ShipItShellCommand(
$export_dir->getPath(),
'git',
'ls-files',
'--others',
)
)->genRun()
)->getStdOut()
|> Str\split($$, "\n")
|> Vec\filter($$, ($line) ==> !Str\is_empty($line))
// `git ls-files` returns files with escaping, if necessary. Since we
// already escape arguments in ShipItShellCommand, we need to remove
// the escaping from any files that have it:
|> Vec\map($$, ($line) ==> Str\trim($line, '"'))
|> Vec\chunk($$, self::FILE_CHUNK_SIZE);
$chunk_count = C\count($all_filenames_chunked);
// @lint-ignore UNUSED_RESULT
await Dict\map_with_key_async($all_filenames_chunked, async (
$i,
$chunk_filenames,
) ==> {
if ($manifest->isVerboseEnabled()) {
$logger->out(" Processing chunk %d/%d", $i + 1, $chunk_count);
}
await self::genExecSteps(
$export_dir->getPath(),
vec[
Vec\concat(vec['git', 'add', '--force'], $chunk_filenames),
vec[
'git',
'commit',
'--message',
Str\format('unfiltered commit chunk #%d', $i),
],
],
);
});
$logger->out(" Filtering...");
$export_lock = ShipItScopedFlock::createShared(
ShipItScopedFlock::getLockFilePathForRepoPath($export_dir->getPath()),
);
try {
$exported_repo = await ShipItRepo::genTypedOpen<ShipItSourceRepo>(
$export_lock,
$export_dir->getPath(),
'master',
);
$current_commit = (
await (
new ShipItShellCommand(
$export_dir->getPath(),
'git',
'rev-list',
'--max-parents=0',
'HEAD',
)
)->genRun()
)->getStdOut()
|> Str\trim($$);
$changesets = vec[];
while ($current_commit !== null) {
if ($manifest->isVerboseEnabled()) {
$logger->out(" Processing %s", $current_commit);
}
$changesets[] = (
// @lint-ignore AWAIT_IN_LOOP We need to do this serially
await $exported_repo
->genChangesetFromID($current_commit)
)
?->withID($rev);
// @lint-ignore AWAIT_IN_LOOP We need to do this serially
$current_commit = await $exported_repo->genFindNextCommit(
$current_commit,
keyset[],
);
}
} finally {
$export_lock->release();
}
$changesets = Vec\filter_nulls($changesets);
invariant(!C\is_empty($changesets), 'got a null changeset :/');
$changesets = await Vec\map_async($changesets, async ($changeset) ==> {
$changeset = await $gen_filter($changeset);
if ($manifest->isVerboseEnabled()) {
$changeset->dumpDebugMessages();
}
return $changeset;
});
$changesets[0] = $changesets[0]
|> $$->withSubject('Initial commit')
|> ShipItSync::addTrackingData($manifest, $$, $rev);
$logger->out(" Creating new repo...");
await self::genInitGitRepo($output_dir, $committer);
$output_lock = ShipItScopedFlock::createShared(
ShipItScopedFlock::getLockFilePathForRepoPath($output_dir),
);
try {
$filtered_repo = await ShipItRepo::genTypedOpen<ShipItDestinationRepo>(
$output_lock,
$output_dir,
'--orphan='.$manifest->getDestinationBranch(),
);
foreach ($changesets as $changeset) {
// @lint-ignore AWAIT_IN_LOOP These need to be committed one at a time
await $filtered_repo->genCommitPatch($changeset, $do_submodules);
}
// Now that we've filtered and committed all files into disparate chunks,
// we need to squash the chunks into a single commit. Fortunately, the
// following commands work just fine if HEAD == initial commit
$initial_commit_sha = (
await (
new ShipItShellCommand(
$output_dir,
'git',
'rev-list',
'--max-parents=0',
'HEAD',
)
)->genRun()
)->getStdOut()
|> Str\trim($$);
await self::genExecSteps(
$output_dir,
vec[
// Rewind HEAD (but NOT checked out file contents) to initial commit:
vec['git', 'reset', '--soft', $initial_commit_sha],
// Amend initial commit with content from all chunks
// (this preserves initial commit's message w/ ShipIt tracking details)
vec['git', 'commit', '--amend', '--no-edit'],
],
);
} finally {
$output_lock->release();
}
}