src/shipit/phase/ShipItCreateNewRepoPhase.php (304 lines of code) (raw):
<?hh
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
* @format
*/
/**
* This file was moved from fbsource to www. View old history in diffusion:
* https://fburl.com/4zrm06z0
*/
namespace Facebook\ShipIt;
use namespace HH\Lib\{C, Dict, Str, Vec}; // @oss-enable
final class ShipItCreateNewRepoPhase extends ShipItPhase {
private ?string $sourceCommit = null;
private ?string $outputPath = null;
private bool $shouldDoSubmodules = true;
// In an ideal world, we could chunk based on file size. But that's
// non-trivial so the next best thing is to hope that average file size
// is less than or equal to 20MB (aka 2GB / 100), fingers crossed:
const int FILE_CHUNK_SIZE = 100;
public function __construct(
private (function(ShipItChangeset): Awaitable<ShipItChangeset>) $genFilter,
private shape('name' => string, 'email' => string) $committer,
) {
$this->skip();
}
<<__Override>>
public function getReadableName(): string {
return 'Create a new git repo with an initial commit';
}
<<__Override>>
public function getCLIArguments(): vec<ShipItCLIArgument> {
return vec[
shape(
'long_name' => 'create-new-repo',
'description' =>
'Create a new git repository with a single commit, then exit',
'write' => $_ ==> $this->unskip(),
),
shape(
'long_name' => 'create-new-repo-from-commit::',
'description' =>
'Like --create-new-repo, but at a specified source commit',
'write' => $rev ==> {
$this->sourceCommit = $rev;
$this->unskip();
return true;
},
),
shape(
'long_name' => 'create-new-repo-output-path::',
'description' =>
'When using --create-new-repo or --create-new-repo-from-commit, '.
'create the new repository in this directory',
'write' => $path ==> {
$this->outputPath = $path;
return $this->outputPath;
},
),
shape(
'long_name' => 'skip-submodules',
'description' => 'Don\'t sync submodules',
'write' => $_ ==> {
$this->shouldDoSubmodules = false;
return $this->shouldDoSubmodules;
},
),
];
}
<<__Override>>
protected async function genRunImpl(
ShipItManifest $manifest,
): Awaitable<void> {
$output = $this->outputPath;
try {
if ($output === null) {
$temp_dir = await self::genCreateNewGitRepo(
$manifest,
$this->genFilter,
$this->committer,
$this->shouldDoSubmodules,
$this->sourceCommit,
);
// Do not delete the output directory.
$temp_dir->keep();
$output = $temp_dir->getPath();
} else {
await self::genCreateNewGitRepoAt(
$manifest,
$output,
$this->genFilter,
$this->committer,
$this->shouldDoSubmodules,
$this->sourceCommit,
);
}
} catch (\Exception $e) {
ShipItLogger::err(" Error: %s\n", $e->getMessage());
throw new ShipItExitException(1);
}
ShipItLogger::out(" New repository created at %s\n", $output);
throw new ShipItExitException(0);
}
private static async function genInitGitRepo(
string $path,
shape('name' => string, 'email' => string) $committer,
): Awaitable<void> {
await self::genExecSteps(
$path,
vec[
vec['git', 'init'],
vec['git', 'config', 'user.name', $committer['name']],
vec['git', 'config', 'user.email', $committer['email']],
],
);
}
public static async function genCreateNewGitRepo(
ShipItManifest $manifest,
(function(ShipItChangeset): Awaitable<ShipItChangeset>) $gen_filter,
shape('name' => string, 'email' => string) $committer,
bool $do_submodules = true,
?string $revision = null,
): Awaitable<ShipItTempDir> {
$temp_dir = new ShipItTempDir('git-with-initial-commit');
await self::genCreateNewGitRepoImpl(
$temp_dir->getPath(),
$manifest,
$gen_filter,
$committer,
$do_submodules,
$revision,
);
return $temp_dir;
}
public static async function genCreateNewGitRepoAt(
ShipItManifest $manifest,
string $output_dir,
(function(ShipItChangeset): Awaitable<ShipItChangeset>) $gen_filter,
shape('name' => string, 'email' => string) $committer,
bool $do_submodules = true,
?string $revision = null,
): Awaitable<void> {
if (PHP\file_exists($output_dir)) {
throw new ShipItException("path '$output_dir' already exists");
}
PHP\mkdir($output_dir, 0755, /* recursive = */ true);
try {
await self::genCreateNewGitRepoImpl(
$output_dir,
$manifest,
$gen_filter,
$committer,
$do_submodules,
$revision,
);
} catch (\Exception $e) {
await (new ShipItShellCommand(null, 'rm', '-rf', $output_dir))->genRun();
throw $e;
}
}
private static async function genCreateNewGitRepoImpl(
string $output_dir,
ShipItManifest $manifest,
(function(ShipItChangeset): Awaitable<ShipItChangeset>) $gen_filter,
shape('name' => string, 'email' => string) $committer,
bool $do_submodules,
?string $revision = null,
): Awaitable<void> {
$logger = new ShipItVerboseLogger($manifest->isVerboseEnabled());
$source = await ShipItRepo::genTypedOpen<ShipItSourceRepo>(
$manifest->getSourceSharedLock(),
$manifest->getSourcePath(),
$manifest->getSourceBranch(),
);
$logger->out(" Exporting...");
$export = await $source->genExport(
$manifest->getSourceRoots(),
$do_submodules,
$revision,
);
$export_dir = $export['tempDir'];
$rev = $export['revision'];
$logger->out(" Creating unfiltered commit...");
await self::genInitGitRepo($export_dir->getPath(), $committer);
// The following code is necessarily convoluted. In order to support
// creating/verifying repos that are greater than 2 GB we need to break the
// unfiltered initial commit into a series of chunks that are small enough
// to be processed by ShipIt (max Hack string size is 2GB). After ShipIt
// has processed each chunked commit we use git commands to directly squash
// everything, dodging the Hack string size limit.
//
// `git ls-files` is used to get a list of all files, which is then split
// into chunks
//
// For each chunk, `git add` the files and then `git commit`
//
// To filter, find the initial commit SHA with `git rev-parse` and then
// read all commits into ShipItChangesets, apply filtering, and commit.
//
// After everything, squash to a single commit (with ShipIt tracking info).
$all_filenames_chunked = (
await (
new ShipItShellCommand(
$export_dir->getPath(),
'git',
'ls-files',
'--others',
)
)->genRun()
)->getStdOut()
|> Str\split($$, "\n")
|> Vec\filter($$, ($line) ==> !Str\is_empty($line))
// `git ls-files` returns files with escaping, if necessary. Since we
// already escape arguments in ShipItShellCommand, we need to remove
// the escaping from any files that have it:
|> Vec\map($$, ($line) ==> Str\trim($line, '"'))
|> Vec\chunk($$, self::FILE_CHUNK_SIZE);
$chunk_count = C\count($all_filenames_chunked);
// @lint-ignore UNUSED_RESULT
await Dict\map_with_key_async($all_filenames_chunked, async (
$i,
$chunk_filenames,
) ==> {
if ($manifest->isVerboseEnabled()) {
$logger->out(" Processing chunk %d/%d", $i + 1, $chunk_count);
}
await self::genExecSteps(
$export_dir->getPath(),
vec[
Vec\concat(vec['git', 'add', '--force'], $chunk_filenames),
vec[
'git',
'commit',
'--message',
Str\format('unfiltered commit chunk #%d', $i),
],
],
);
});
$logger->out(" Filtering...");
$export_lock = ShipItScopedFlock::createShared(
ShipItScopedFlock::getLockFilePathForRepoPath($export_dir->getPath()),
);
try {
$exported_repo = await ShipItRepo::genTypedOpen<ShipItSourceRepo>(
$export_lock,
$export_dir->getPath(),
'master',
);
$current_commit = (
await (
new ShipItShellCommand(
$export_dir->getPath(),
'git',
'rev-list',
'--max-parents=0',
'HEAD',
)
)->genRun()
)->getStdOut()
|> Str\trim($$);
$changesets = vec[];
while ($current_commit !== null) {
if ($manifest->isVerboseEnabled()) {
$logger->out(" Processing %s", $current_commit);
}
$changesets[] = (
// @lint-ignore AWAIT_IN_LOOP We need to do this serially
await $exported_repo
->genChangesetFromID($current_commit)
)
?->withID($rev);
// @lint-ignore AWAIT_IN_LOOP We need to do this serially
$current_commit = await $exported_repo->genFindNextCommit(
$current_commit,
keyset[],
);
}
} finally {
$export_lock->release();
}
$changesets = Vec\filter_nulls($changesets);
invariant(!C\is_empty($changesets), 'got a null changeset :/');
$changesets = await Vec\map_async($changesets, async ($changeset) ==> {
$changeset = await $gen_filter($changeset);
if ($manifest->isVerboseEnabled()) {
$changeset->dumpDebugMessages();
}
return $changeset;
});
$changesets[0] = $changesets[0]
|> $$->withSubject('Initial commit')
|> ShipItSync::addTrackingData($manifest, $$, $rev);
$logger->out(" Creating new repo...");
await self::genInitGitRepo($output_dir, $committer);
$output_lock = ShipItScopedFlock::createShared(
ShipItScopedFlock::getLockFilePathForRepoPath($output_dir),
);
try {
$filtered_repo = await ShipItRepo::genTypedOpen<ShipItDestinationRepo>(
$output_lock,
$output_dir,
'--orphan='.$manifest->getDestinationBranch(),
);
foreach ($changesets as $changeset) {
// @lint-ignore AWAIT_IN_LOOP These need to be committed one at a time
await $filtered_repo->genCommitPatch($changeset, $do_submodules);
}
// Now that we've filtered and committed all files into disparate chunks,
// we need to squash the chunks into a single commit. Fortunately, the
// following commands work just fine if HEAD == initial commit
$initial_commit_sha = (
await (
new ShipItShellCommand(
$output_dir,
'git',
'rev-list',
'--max-parents=0',
'HEAD',
)
)->genRun()
)->getStdOut()
|> Str\trim($$);
await self::genExecSteps(
$output_dir,
vec[
// Rewind HEAD (but NOT checked out file contents) to initial commit:
vec['git', 'reset', '--soft', $initial_commit_sha],
// Amend initial commit with content from all chunks
// (this preserves initial commit's message w/ ShipIt tracking details)
vec['git', 'commit', '--amend', '--no-edit'],
],
);
} finally {
$output_lock->release();
}
}
private static async function genExecSteps(
string $path,
vec<vec<string>> $steps,
): Awaitable<void> {
foreach ($steps as $step) {
// @lint-ignore AWAIT_IN_LOOP This needs to be done serially
await (new ShipItShellCommand($path, ...$step))->genRun();
}
}
}