in src/parser/ArcanistDiffParser.php [875:1043]
protected function parseChangeset(ArcanistDiffChange $change) {
// If a diff includes two sets of changes to the same file, let the
// second one win. In particular, this occurs when adding subdirectories
// in Subversion that contain files: the file text will be present in
// both the directory diff and the file diff. See T5555. Dropping the
// hunks lets whichever one shows up later win instead of showing changes
// twice.
$change->dropHunks();
$all_changes = array();
do {
$hunk = new ArcanistDiffHunk();
$line = $this->getLineTrimmed();
$real = array();
// In the case where only one line is changed, the length is omitted.
// The final group is for git, which appends a guess at the function
// context to the diff.
$matches = null;
$ok = preg_match(
'/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(?: .*?)?$/U',
$line,
$matches);
if (!$ok) {
// It's possible we hit the style of an svn1.7 property change.
// This is a 4-line Index block, followed by an empty line, followed
// by a "Property changes on:" section similar to svn1.6.
if ($line == '') {
$line = $this->nextNonemptyLine();
$ok = preg_match('/^Property changes on:/', $line);
if (!$ok) {
$this->didFailParse(pht('Confused by empty line'));
}
$line = $this->nextLine();
return $this->parsePropertyHunk($change);
}
$this->didFailParse(pht(
"Expected hunk header '%s'.",
'@@ -NN,NN +NN,NN @@'));
}
$hunk->setOldOffset($matches[1]);
$hunk->setNewOffset($matches[3]);
// Cover for the cases where length wasn't present (implying one line).
$old_len = idx($matches, 2);
if (!strlen($old_len)) {
$old_len = 1;
}
$new_len = idx($matches, 4);
if (!strlen($new_len)) {
$new_len = 1;
}
$hunk->setOldLength($old_len);
$hunk->setNewLength($new_len);
$add = 0;
$del = 0;
$hit_next_hunk = false;
while ((($line = $this->nextLine()) !== null)) {
if (strlen(rtrim($line, "\r\n"))) {
$char = $line[0];
} else {
// Normally, we do not encouter empty lines in diffs, because
// unchanged lines have an initial space. However, in Git, with
// the option `diff.suppress-blank-empty` set, unchanged blank lines
// emit as completely empty. If we encounter a completely empty line,
// treat it as a ' ' (i.e., unchanged empty line) line.
$char = ' ';
}
switch ($char) {
case '\\':
if (!preg_match('@\\ No newline at end of file@', $line)) {
$this->didFailParse(
pht("Expected '\ No newline at end of file'."));
}
if ($new_len) {
$real[] = $line;
$hunk->setIsMissingOldNewline(true);
} else {
$real[] = $line;
$hunk->setIsMissingNewNewline(true);
}
if (!$new_len) {
break 2;
}
break;
case '+':
++$add;
--$new_len;
$real[] = $line;
break;
case '-':
if (!$old_len) {
// In this case, we've hit "---" from a new file. So don't
// advance the line cursor.
$hit_next_hunk = true;
break 2;
}
++$del;
--$old_len;
$real[] = $line;
break;
case ' ':
if (!$old_len && !$new_len) {
break 2;
}
--$old_len;
--$new_len;
$real[] = $line;
break;
default:
// We hit something, likely another hunk.
$hit_next_hunk = true;
break 2;
}
}
if ($old_len || $new_len) {
$this->didFailParse(pht('Found the wrong number of hunk lines.'));
}
$corpus = implode('', $real);
$is_binary = false;
if ($this->detectBinaryFiles) {
$is_binary = !phutil_is_utf8($corpus);
$try_encoding = $this->tryEncoding;
if ($is_binary && $try_encoding) {
$is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
if (!$is_binary) {
$corpus = phutil_utf8_convert($corpus, 'UTF-8', $try_encoding);
if (!phutil_is_utf8($corpus)) {
throw new Exception(
pht(
"Failed to convert a hunk from '%s' to UTF-8. ".
"Check that the specified encoding is correct.",
$try_encoding));
}
}
}
}
if ($is_binary) {
// SVN happily treats binary files which aren't marked with the right
// mime type as text files. Detect that junk here and mark the file
// binary. We'll catch stuff with unicode too, but that's verboten
// anyway. If there are too many false positives with this we might
// need to make it threshold-triggered instead of triggering on any
// unprintable byte.
$change->setFileType(ArcanistDiffChangeType::FILE_BINARY);
} else {
$hunk->setCorpus($corpus);
$hunk->setAddLines($add);
$hunk->setDelLines($del);
$change->addHunk($hunk);
}
if (!$hit_next_hunk) {
$line = $this->nextNonemptyLine();
}
} while (preg_match('/^@@ /', $line));
}