public function parseDiff()

in src/parser/ArcanistDiffParser.php [189:367]


  public function parseDiff($diff) {
    if (!strlen(trim($diff))) {
      throw new Exception(pht("Can't parse an empty diff!"));
    }

    // Detect `git-format-patch`, by looking for a "---" line somewhere in
    // the file and then a footer with Git version number, which looks like
    // this:
    //
    //   --
    //   1.8.4.2
    //
    // Note that `git-format-patch` adds a space after the "--", but we don't
    // require it when detecting patches, as trailing whitespace can easily be
    // lost in transit.
    $detect_patch = '/^---$.*^-- ?[\s\d.]+\z/ms';
    $message = null;
    if (preg_match($detect_patch, $diff)) {
      list($message, $diff) = $this->stripGitFormatPatch($diff);
    }

    $this->didStartParse($diff);

    // Strip off header comments. While `patch` allows comments anywhere in the
    // file, `git apply` is more strict. We get these comments in `hg export`
    // diffs, and Eclipse can also produce them.
    $line = $this->getLineTrimmed();
    while (preg_match('/^#/', $line)) {
      $line = $this->nextLine();
    }

    if (strlen($message)) {
      // If we found a message during pre-parse steps, add it to the resulting
      // changes here.
      $change = $this->buildChange(null)
        ->setType(ArcanistDiffChangeType::TYPE_MESSAGE)
        ->setMetadata('message', $message);
    }

    do {
      $patterns = array(
        // This is a normal SVN text change, probably from "svn diff".
        '(?P<type>Index): (?P<cur>.+)',
        // This is an SVN text change, probably from "svnlook diff".
        '(?P<type>Modified|Added|Deleted|Copied): (?P<cur>.+)',
        // This is an SVN property change, probably from "svn diff".
        '(?P<type>Property changes on): (?P<cur>.+)',
        // This is a git commit message, probably from "git show".
        '(?P<type>commit) (?P<hash>[a-f0-9]+)(?: \(.*\))?',
        // This is a git diff, probably from "git show" or "git diff".
        // Note that the filenames may appear quoted.
        '(?P<type>diff --git) (?P<oldnew>.*)',
        // RCS Diff
        '(?P<type>rcsdiff -u) (?P<oldnew>.*)',
        // This is a unified diff, probably from "diff -u" or synthetic diffing.
        '(?P<type>---) (?P<old>.+)\s+\d{4}-\d{2}-\d{2}.*',
        '(?P<binary>Binary files|Files) '.
          '(?P<old>.+)\s+\d{4}-\d{2}-\d{2} and '.
          '(?P<new>.+)\s+\d{4}-\d{2}-\d{2} differ.*',
        // This is a normal Mercurial text change, probably from "hg diff". It
        // may have two "-r" blocks if it came from "hg diff -r x:y".
        '(?P<type>diff -r) (?P<hgrev>[a-f0-9]+) (?:-r [a-f0-9]+ )?(?P<cur>.+)',
      );

      $line = $this->getLineTrimmed();
      $match = null;
      $ok = $this->tryMatchHeader($patterns, $line, $match);

      $failed_parse = false;
      if (!$ok && $this->isFirstNonEmptyLine()) {
        // 'hg export' command creates so called "extended diff" that
        // contains some meta information and comment at the beginning
        // (isFirstNonEmptyLine() to check for beginning). Actual mercurial
        // code detects where comment ends and unified diff starts by
        // searching for "diff -r" or "diff --git" in the text.
        $this->saveLine();
        $line = $this->nextLineThatLooksLikeDiffStart();
        if (!$this->tryMatchHeader($patterns, $line, $match)) {
          // Restore line before guessing to display correct error.
          $this->restoreLine();
          $failed_parse = true;
        }
      } else if (!$ok) {
        $failed_parse = true;
      }

      if ($failed_parse) {
        $this->didFailParse(
          pht(
            "Expected a hunk header, like '%s' (svn), '%s' (svn properties), ".
            "'%s' (git show), '%s' (git diff), '%s' (unified diff), or ".
            "'%s' (hg diff or patch).",
            'Index: /path/to/file.ext',
            'Property changes on: /path/to/file.ext',
            'commit 59bcc3ad6775562f845953cf01624225',
            'diff --git',
            '--- filename',
            'diff -r'));
      }

      if (isset($match['type'])) {
        if ($match['type'] == 'diff --git') {
          list($old, $new) = self::splitGitDiffPaths($match['oldnew']);
          $match['old'] = $old;
          $match['cur'] = $new;
        }
      }

      $change = $this->buildChange(idx($match, 'cur'));

      if (isset($match['old'])) {
        $change->setOldPath($match['old']);
      }

      if (isset($match['hash'])) {
        $change->setCommitHash($match['hash']);
      }

      if (isset($match['binary'])) {
        $change->setFileType(ArcanistDiffChangeType::FILE_BINARY);
        $line = $this->nextNonemptyLine();
        continue;
      }

      $line = $this->nextLine();

      switch ($match['type']) {
        case 'Index':
        case 'Modified':
        case 'Added':
        case 'Deleted':
        case 'Copied':
          $this->parseIndexHunk($change);
          break;
        case 'Property changes on':
          $this->parsePropertyHunk($change);
          break;
        case 'diff --git':
          $this->setIsGit(true);
          $this->parseIndexHunk($change);
          break;
        case 'commit':
          $this->setIsGit(true);
          $this->parseCommitMessage($change);
          break;
        case '---':
          $ok = preg_match(
            '@^(?:\+\+\+) (.*)\s+\d{4}-\d{2}-\d{2}.*$@',
            $line,
            $match);
          if (!$ok) {
            $this->didFailParse(pht(
              "Expected '%s' in unified diff.",
              '+++ filename'));
          }
          $change->setCurrentPath($match[1]);
          $line = $this->nextLine();
          $this->parseChangeset($change);
          break;
        case 'diff -r':
          $this->setIsMercurial(true);
          $this->parseIndexHunk($change);
          break;
        case 'rcsdiff -u':
          $this->isRCS = true;
          $this->parseIndexHunk($change);
          break;
        default:
          $this->didFailParse(pht('Unknown diff type.'));
          break;
      }
    } while ($this->getLine() !== null);

    $this->didFinishParse();

    $this->loadSyntheticData();

    return $this->changes;
  }