private static async function genCreateNewGitRepoImpl()

in src/shipit/phase/ShipItCreateNewRepoPhase.php [177:363]


  private static async function genCreateNewGitRepoImpl(
    string $output_dir,
    ShipItManifest $manifest,
    (function(ShipItChangeset): Awaitable<ShipItChangeset>) $gen_filter,
    shape('name' => string, 'email' => string) $committer,
    bool $do_submodules,
    ?string $revision = null,
  ): Awaitable<void> {
    $logger = new ShipItVerboseLogger($manifest->isVerboseEnabled());

    $source = await ShipItRepo::genTypedOpen<ShipItSourceRepo>(
      $manifest->getSourceSharedLock(),
      $manifest->getSourcePath(),
      $manifest->getSourceBranch(),
    );

    $logger->out("  Exporting...");
    $export = await $source->genExport(
      $manifest->getSourceRoots(),
      $do_submodules,
      $revision,
    );
    $export_dir = $export['tempDir'];
    $rev = $export['revision'];

    $logger->out("  Creating unfiltered commit...");
    await self::genInitGitRepo($export_dir->getPath(), $committer);

    // The following code is necessarily convoluted. In order to support
    // creating/verifying repos that are greater than 2 GB we need to break the
    // unfiltered initial commit into a series of chunks that are small enough
    // to be processed by ShipIt (max Hack string size is 2GB). After ShipIt
    // has processed each chunked commit we use git commands to directly squash
    // everything, dodging the Hack string size limit.
    //
    // `git ls-files` is used to get a list of all files, which is then split
    // into chunks
    //
    // For each chunk, `git add` the files and then `git commit`
    //
    // To filter, find the initial commit SHA with `git rev-parse` and then
    // read all commits into ShipItChangesets, apply filtering, and commit.
    //
    // After everything, squash to a single commit (with ShipIt tracking info).

    $all_filenames_chunked = (
      await (
        new ShipItShellCommand(
          $export_dir->getPath(),
          'git',
          'ls-files',
          '--others',
        )
      )->genRun()
    )->getStdOut()
      |> Str\split($$, "\n")
      |> Vec\filter($$, ($line) ==> !Str\is_empty($line))
      // `git ls-files` returns files with escaping, if necessary. Since we
      // already escape arguments in ShipItShellCommand, we need to remove
      // the escaping from any files that have it:
      |> Vec\map($$, ($line) ==> Str\trim($line, '"'))
      |> Vec\chunk($$, self::FILE_CHUNK_SIZE);

    $chunk_count = C\count($all_filenames_chunked);

    // @lint-ignore UNUSED_RESULT
    await Dict\map_with_key_async($all_filenames_chunked, async (
      $i,
      $chunk_filenames,
    ) ==> {
      if ($manifest->isVerboseEnabled()) {
        $logger->out("    Processing chunk %d/%d", $i + 1, $chunk_count);
      }
      await self::genExecSteps(
        $export_dir->getPath(),
        vec[
          Vec\concat(vec['git', 'add', '--force'], $chunk_filenames),
          vec[
            'git',
            'commit',
            '--message',
            Str\format('unfiltered commit chunk #%d', $i),
          ],
        ],
      );
    });

    $logger->out("  Filtering...");
    $export_lock = ShipItScopedFlock::createShared(
      ShipItScopedFlock::getLockFilePathForRepoPath($export_dir->getPath()),
    );
    try {
      $exported_repo = await ShipItRepo::genTypedOpen<ShipItSourceRepo>(
        $export_lock,
        $export_dir->getPath(),
        'master',
      );
      $current_commit = (
        await (
          new ShipItShellCommand(
            $export_dir->getPath(),
            'git',
            'rev-list',
            '--max-parents=0',
            'HEAD',
          )
        )->genRun()
      )->getStdOut()
        |> Str\trim($$);
      $changesets = vec[];
      while ($current_commit !== null) {
        if ($manifest->isVerboseEnabled()) {
          $logger->out("    Processing %s", $current_commit);
        }
        $changesets[] = (
          // @lint-ignore AWAIT_IN_LOOP We need to do this serially
          await $exported_repo
            ->genChangesetFromID($current_commit)
        )
          ?->withID($rev);
        // @lint-ignore AWAIT_IN_LOOP We need to do this serially
        $current_commit = await $exported_repo->genFindNextCommit(
          $current_commit,
          keyset[],
        );
      }
    } finally {
      $export_lock->release();
    }
    $changesets = Vec\filter_nulls($changesets);
    invariant(!C\is_empty($changesets), 'got a null changeset :/');
    $changesets = await Vec\map_async($changesets, async ($changeset) ==> {
      $changeset = await $gen_filter($changeset);
      if ($manifest->isVerboseEnabled()) {
        $changeset->dumpDebugMessages();
      }
      return $changeset;
    });
    $changesets[0] = $changesets[0]
      |> $$->withSubject('Initial commit')
      |> ShipItSync::addTrackingData($manifest, $$, $rev);

    $logger->out("  Creating new repo...");
    await self::genInitGitRepo($output_dir, $committer);
    $output_lock = ShipItScopedFlock::createShared(
      ShipItScopedFlock::getLockFilePathForRepoPath($output_dir),
    );
    try {
      $filtered_repo = await ShipItRepo::genTypedOpen<ShipItDestinationRepo>(
        $output_lock,
        $output_dir,
        '--orphan='.$manifest->getDestinationBranch(),
      );
      foreach ($changesets as $changeset) {
        // @lint-ignore AWAIT_IN_LOOP These need to be committed one at a time
        await $filtered_repo->genCommitPatch($changeset, $do_submodules);
      }

      // Now that we've filtered and committed all files into disparate chunks,
      // we need to squash the chunks into a single commit. Fortunately, the
      // following commands work just fine if HEAD == initial commit
      $initial_commit_sha = (
        await (
          new ShipItShellCommand(
            $output_dir,
            'git',
            'rev-list',
            '--max-parents=0',
            'HEAD',
          )
        )->genRun()
      )->getStdOut()
        |> Str\trim($$);
      await self::genExecSteps(
        $output_dir,
        vec[
          // Rewind HEAD (but NOT checked out file contents) to initial commit:
          vec['git', 'reset', '--soft', $initial_commit_sha],
          // Amend initial commit with content from all chunks
          // (this preserves initial commit's message w/ ShipIt tracking details)
          vec['git', 'commit', '--amend', '--no-edit'],
        ],
      );
    } finally {
      $output_lock->release();
    }
  }