protected function parseIndexHunk()

in src/parser/ArcanistDiffParser.php [556:809]


  protected function parseIndexHunk(ArcanistDiffChange $change) {
    $is_git = $this->getIsGit();
    $is_mercurial = $this->getIsMercurial();
    $is_svn = (!$is_git && !$is_mercurial);

    $move_source = null;

    $line = $this->getLine();
    if ($is_git) {
      do {

        $patterns = array(
          '(?P<new>new) file mode (?P<newmode>\d+)',
          '(?P<deleted>deleted) file mode (?P<oldmode>\d+)',
          // These occur when someone uses `chmod` on a file.
          'old mode (?P<oldmode>\d+)',
          'new mode (?P<newmode>\d+)',
          // These occur when you `mv` a file and git figures it out.
          'similarity index ',
          'rename from (?P<old>.*)',
          '(?P<move>rename) to (?P<cur>.*)',
          'copy from (?P<old>.*)',
          '(?P<copy>copy) to (?P<cur>.*)',
        );

        $ok = false;
        $match = null;
        foreach ($patterns as $pattern) {
          $ok = preg_match('@^'.$pattern.'@', $line, $match);
          if ($ok) {
            break;
          }
        }

        if (!$ok) {
          if ($line === null ||
              preg_match('/^(diff --git|commit) /', $line)) {
            // In this case, there are ONLY file mode changes, or this is a
            // pure move. If it's a move, flag these changesets so we can build
            // synthetic changes later, enabling us to show file contents in
            // Differential -- git only gives us a block like this:
            //
            //   diff --git a/README b/READYOU
            //   similarity index 100%
            //   rename from README
            //   rename to READYOU
            //
            // ...i.e., there is no associated diff.

            // This allows us to distinguish between property changes only
            // and actual moves. For property changes only, we can't currently
            // build a synthetic diff correctly, so just skip it.
            // TODO: Build synthetic diffs for property changes, too.
            if ($change->getType() != ArcanistDiffChangeType::TYPE_CHANGE) {
              $change->setNeedsSyntheticGitHunks(true);
              if ($move_source) {
                $move_source->setNeedsSyntheticGitHunks(true);
              }
            }
            return;
          }
          break;
        }

        if (!empty($match['oldmode'])) {
          $change->setOldProperty('unix:filemode', $match['oldmode']);
        }
        if (!empty($match['newmode'])) {
          $change->setNewProperty('unix:filemode', $match['newmode']);
        }

        if (!empty($match['deleted'])) {
          $change->setType(ArcanistDiffChangeType::TYPE_DELETE);
        }

        if (!empty($match['new'])) {
          // If you replace a symlink with a normal file, git renders the change
          // as a "delete" of the symlink plus an "add" of the new file. We
          // prefer to represent this as a change.
          if ($change->getType() == ArcanistDiffChangeType::TYPE_DELETE) {
            $change->setType(ArcanistDiffChangeType::TYPE_CHANGE);
          } else {
            $change->setType(ArcanistDiffChangeType::TYPE_ADD);
          }
        }

        if (!empty($match['old'])) {
          $match['old'] = self::unescapeFilename($match['old']);
          $change->setOldPath($match['old']);
        }

        if (!empty($match['cur'])) {
          $match['cur'] = self::unescapeFilename($match['cur']);
          $change->setCurrentPath($match['cur']);
        }

        if (!empty($match['copy'])) {
          $change->setType(ArcanistDiffChangeType::TYPE_COPY_HERE);
          $old = $this->buildChange($change->getOldPath());
          $type = $old->getType();

          if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY) {
            $old->setType(ArcanistDiffChangeType::TYPE_MULTICOPY);
          } else {
            $old->setType(ArcanistDiffChangeType::TYPE_COPY_AWAY);
          }

          $old->addAwayPath($change->getCurrentPath());
        }

        if (!empty($match['move'])) {
          $change->setType(ArcanistDiffChangeType::TYPE_MOVE_HERE);
          $old = $this->buildChange($change->getOldPath());
          $type = $old->getType();

          if ($type == ArcanistDiffChangeType::TYPE_MULTICOPY) {
            // Great, no change.
          } else if ($type == ArcanistDiffChangeType::TYPE_MOVE_AWAY) {
            $old->setType(ArcanistDiffChangeType::TYPE_MULTICOPY);
          } else if ($type == ArcanistDiffChangeType::TYPE_COPY_AWAY) {
            $old->setType(ArcanistDiffChangeType::TYPE_MULTICOPY);
          } else {
            $old->setType(ArcanistDiffChangeType::TYPE_MOVE_AWAY);
          }

          // We'll reference this above.
          $move_source = $old;

          $old->addAwayPath($change->getCurrentPath());
        }

        $line = $this->nextNonemptyLine();
      } while (true);
    }

    $line = $this->getLine();

    if ($is_svn) {
      $ok = preg_match('/^=+\s*$/', $line);
      if (!$ok) {
        $this->didFailParse(pht(
          "Expected '%s' divider line.",
          '======================='));
      } else {
        // Adding an empty file in SVN can produce an empty line here.
        $line = $this->nextNonemptyLine();
      }
    } else if ($is_git) {
      $ok = preg_match('/^index .*$/', $line);
      if (!$ok) {
        // TODO: "hg diff -g" diffs ("mercurial git-style diffs") do not include
        // this line, so we can't parse them if we fail on it. Maybe introduce
        // a flag saying "parse this diff using relaxed git-style diff rules"?

        // $this->didFailParse("Expected 'index af23f...a98bc' header line.");
      } else {
        // NOTE: In the git case, where this patch is the last change in the
        // file, we may have a final terminal newline. Skip over it so that
        // we'll hit the '$line === null' block below. This is covered by the
        // 'git-empty-file.gitdiff' test case.
        $line = $this->nextNonemptyLine();
      }
    }

    // If there are files with only whitespace changes and -b or -w are
    // supplied as command-line flags to `diff', svn and git both produce
    // changes without any body.
    if ($line === null ||
        preg_match(
          '/^(Index:|Property changes on:|diff --git|commit) /',
          $line)) {
      return;
    }

    $is_binary_add = preg_match(
      '/^Cannot display: file marked as a binary type\.$/',
      rtrim($line));
    if ($is_binary_add) {
      $this->nextLine(); // Cannot display: file marked as a binary type.
      $this->nextNonemptyLine(); // svn:mime-type = application/octet-stream
      $this->markBinary($change);
      return;
    }

    // We can get this in git, or in SVN when a file exists in the repository
    // WITHOUT a binary mime-type and is changed and given a binary mime-type.
    $is_binary_diff = preg_match(
      '/^(Binary files|Files) .* and .* differ$/',
      rtrim($line));
    if ($is_binary_diff) {
      $this->nextNonemptyLine(); // Binary files x and y differ
      $this->markBinary($change);
      return;
    }

    // This occurs under "hg diff --git" when a binary file is removed. See
    // test case "hg-binary-delete.hgdiff". (I believe it never occurs under
    // git, which reports the "files X and /dev/null differ" string above. Git
    // can not apply these patches.)
    $is_hg_binary_delete = preg_match(
      '/^Binary file .* has changed$/',
      rtrim($line));
    if ($is_hg_binary_delete) {
      $this->nextNonemptyLine();
      $this->markBinary($change);
      return;
    }

    // With "git diff --binary" (not a normal mode, but one users may explicitly
    // invoke and then, e.g., copy-paste into the web console) or "hg diff
    // --git" (normal under hg workflows), we may encounter a literal binary
    // patch.
    $is_git_binary_patch = preg_match(
      '/^GIT binary patch$/',
      rtrim($line));
    if ($is_git_binary_patch) {
      $this->nextLine();
      $this->parseGitBinaryPatch();
      $line = $this->getLine();
      if (preg_match('/^literal/', $line)) {
        // We may have old/new binaries (change) or just a new binary (hg add).
        // If there are two blocks, parse both.
        $this->parseGitBinaryPatch();
      }
      $this->markBinary($change);
      return;
    }

    if ($is_git) {
      // "git diff -b" ignores whitespace, but has an empty hunk target
      if (preg_match('@^diff --git .*$@', $line)) {
        $this->nextLine();
        return null;
      }
    }

    if ($this->isRCS) {
      // Skip the RCS headers.
      $this->nextLine();
      $this->nextLine();
      $this->nextLine();
    }

    $old_file = $this->parseHunkTarget();
    $new_file = $this->parseHunkTarget();

    if ($this->isRCS) {
      $change->setCurrentPath($new_file);
    }

    $change->setOldPath($old_file);

    $this->parseChangeset($change);
  }