in src/parser/ArcanistDiffParser.php [189:367]
public function parseDiff($diff) {
if (!strlen(trim($diff))) {
throw new Exception(pht("Can't parse an empty diff!"));
}
// Detect `git-format-patch`, by looking for a "---" line somewhere in
// the file and then a footer with Git version number, which looks like
// this:
//
// --
// 1.8.4.2
//
// Note that `git-format-patch` adds a space after the "--", but we don't
// require it when detecting patches, as trailing whitespace can easily be
// lost in transit.
$detect_patch = '/^---$.*^-- ?[\s\d.]+\z/ms';
$message = null;
if (preg_match($detect_patch, $diff)) {
list($message, $diff) = $this->stripGitFormatPatch($diff);
}
$this->didStartParse($diff);
// Strip off header comments. While `patch` allows comments anywhere in the
// file, `git apply` is more strict. We get these comments in `hg export`
// diffs, and Eclipse can also produce them.
$line = $this->getLineTrimmed();
while (preg_match('/^#/', $line)) {
$line = $this->nextLine();
}
if (strlen($message)) {
// If we found a message during pre-parse steps, add it to the resulting
// changes here.
$change = $this->buildChange(null)
->setType(ArcanistDiffChangeType::TYPE_MESSAGE)
->setMetadata('message', $message);
}
do {
$patterns = array(
// This is a normal SVN text change, probably from "svn diff".
'(?P<type>Index): (?P<cur>.+)',
// This is an SVN text change, probably from "svnlook diff".
'(?P<type>Modified|Added|Deleted|Copied): (?P<cur>.+)',
// This is an SVN property change, probably from "svn diff".
'(?P<type>Property changes on): (?P<cur>.+)',
// This is a git commit message, probably from "git show".
'(?P<type>commit) (?P<hash>[a-f0-9]+)(?: \(.*\))?',
// This is a git diff, probably from "git show" or "git diff".
// Note that the filenames may appear quoted.
'(?P<type>diff --git) (?P<oldnew>.*)',
// RCS Diff
'(?P<type>rcsdiff -u) (?P<oldnew>.*)',
// This is a unified diff, probably from "diff -u" or synthetic diffing.
'(?P<type>---) (?P<old>.+)\s+\d{4}-\d{2}-\d{2}.*',
'(?P<binary>Binary files|Files) '.
'(?P<old>.+)\s+\d{4}-\d{2}-\d{2} and '.
'(?P<new>.+)\s+\d{4}-\d{2}-\d{2} differ.*',
// This is a normal Mercurial text change, probably from "hg diff". It
// may have two "-r" blocks if it came from "hg diff -r x:y".
'(?P<type>diff -r) (?P<hgrev>[a-f0-9]+) (?:-r [a-f0-9]+ )?(?P<cur>.+)',
);
$line = $this->getLineTrimmed();
$match = null;
$ok = $this->tryMatchHeader($patterns, $line, $match);
$failed_parse = false;
if (!$ok && $this->isFirstNonEmptyLine()) {
// 'hg export' command creates so called "extended diff" that
// contains some meta information and comment at the beginning
// (isFirstNonEmptyLine() to check for beginning). Actual mercurial
// code detects where comment ends and unified diff starts by
// searching for "diff -r" or "diff --git" in the text.
$this->saveLine();
$line = $this->nextLineThatLooksLikeDiffStart();
if (!$this->tryMatchHeader($patterns, $line, $match)) {
// Restore line before guessing to display correct error.
$this->restoreLine();
$failed_parse = true;
}
} else if (!$ok) {
$failed_parse = true;
}
if ($failed_parse) {
$this->didFailParse(
pht(
"Expected a hunk header, like '%s' (svn), '%s' (svn properties), ".
"'%s' (git show), '%s' (git diff), '%s' (unified diff), or ".
"'%s' (hg diff or patch).",
'Index: /path/to/file.ext',
'Property changes on: /path/to/file.ext',
'commit 59bcc3ad6775562f845953cf01624225',
'diff --git',
'--- filename',
'diff -r'));
}
if (isset($match['type'])) {
if ($match['type'] == 'diff --git') {
list($old, $new) = self::splitGitDiffPaths($match['oldnew']);
$match['old'] = $old;
$match['cur'] = $new;
}
}
$change = $this->buildChange(idx($match, 'cur'));
if (isset($match['old'])) {
$change->setOldPath($match['old']);
}
if (isset($match['hash'])) {
$change->setCommitHash($match['hash']);
}
if (isset($match['binary'])) {
$change->setFileType(ArcanistDiffChangeType::FILE_BINARY);
$line = $this->nextNonemptyLine();
continue;
}
$line = $this->nextLine();
switch ($match['type']) {
case 'Index':
case 'Modified':
case 'Added':
case 'Deleted':
case 'Copied':
$this->parseIndexHunk($change);
break;
case 'Property changes on':
$this->parsePropertyHunk($change);
break;
case 'diff --git':
$this->setIsGit(true);
$this->parseIndexHunk($change);
break;
case 'commit':
$this->setIsGit(true);
$this->parseCommitMessage($change);
break;
case '---':
$ok = preg_match(
'@^(?:\+\+\+) (.*)\s+\d{4}-\d{2}-\d{2}.*$@',
$line,
$match);
if (!$ok) {
$this->didFailParse(pht(
"Expected '%s' in unified diff.",
'+++ filename'));
}
$change->setCurrentPath($match[1]);
$line = $this->nextLine();
$this->parseChangeset($change);
break;
case 'diff -r':
$this->setIsMercurial(true);
$this->parseIndexHunk($change);
break;
case 'rcsdiff -u':
$this->isRCS = true;
$this->parseIndexHunk($change);
break;
default:
$this->didFailParse(pht('Unknown diff type.'));
break;
}
} while ($this->getLine() !== null);
$this->didFinishParse();
$this->loadSyntheticData();
return $this->changes;
}