in src/workflow/ArcanistDiffWorkflow.php [948:1182]
protected function generateChanges() {
$parser = $this->newDiffParser();
$is_raw = $this->isRawDiffSource();
if ($is_raw) {
if ($this->getArgument('raw')) {
fwrite(STDERR, pht('Reading diff from stdin...')."\n");
$raw_diff = file_get_contents('php://stdin');
} else if ($this->getArgument('raw-command')) {
list($raw_diff) = execx('%C', $this->getArgument('raw-command'));
} else {
throw new Exception(pht('Unknown raw diff source.'));
}
$changes = $parser->parseDiff($raw_diff);
foreach ($changes as $key => $change) {
// Remove "message" changes, e.g. from "git show".
if ($change->getType() == ArcanistDiffChangeType::TYPE_MESSAGE) {
unset($changes[$key]);
}
}
return $changes;
}
$repository_api = $this->getRepositoryAPI();
if ($repository_api instanceof ArcanistSubversionAPI) {
$paths = $this->generateAffectedPaths();
$this->primeSubversionWorkingCopyData($paths);
// Check to make sure the user is diffing from a consistent base revision.
// This is mostly just an abuse sanity check because it's silly to do this
// and makes the code more difficult to effectively review, but it also
// affects patches and makes them nonportable.
$bases = $repository_api->getSVNBaseRevisions();
// Remove all files with baserev "0"; these files are new.
foreach ($bases as $path => $baserev) {
if ($bases[$path] <= 0) {
unset($bases[$path]);
}
}
if ($bases) {
$rev = reset($bases);
$revlist = array();
foreach ($bases as $path => $baserev) {
$revlist[] = ' '.pht('Revision %s, %s', $baserev, $path);
}
$revlist = implode("\n", $revlist);
foreach ($bases as $path => $baserev) {
if ($baserev !== $rev) {
throw new ArcanistUsageException(
pht(
"Base revisions of changed paths are mismatched. Update all ".
"paths to the same base revision before creating a diff: ".
"\n\n%s",
$revlist));
}
}
// If you have a change which affects several files, all of which are
// at a consistent base revision, treat that revision as the effective
// base revision. The use case here is that you made a change to some
// file, which updates it to HEAD, but want to be able to change it
// again without updating the entire working copy. This is a little
// sketchy but it arises in Facebook Ops workflows with config files and
// doesn't have any real material tradeoffs (e.g., these patches are
// perfectly applyable).
$repository_api->overrideSVNBaseRevisionNumber($rev);
}
$changes = $parser->parseSubversionDiff(
$repository_api,
$paths);
} else if ($repository_api instanceof ArcanistGitAPI) {
$diff = $repository_api->getFullGitDiff(
$repository_api->getBaseCommit(),
$repository_api->getHeadCommit());
if (!strlen($diff)) {
throw new ArcanistUsageException(
pht('No changes found. (Did you specify the wrong commit range?)'));
}
$changes = $parser->parseDiff($diff);
} else if ($repository_api instanceof ArcanistMercurialAPI) {
$diff = $repository_api->getFullMercurialDiff();
if (!strlen($diff)) {
throw new ArcanistUsageException(
pht('No changes found. (Did you specify the wrong commit range?)'));
}
$changes = $parser->parseDiff($diff);
} else {
throw new Exception(pht('Repository API is not supported.'));
}
$limit = 1024 * 1024 * 4;
foreach ($changes as $change) {
$size = 0;
foreach ($change->getHunks() as $hunk) {
$size += strlen($hunk->getCorpus());
}
if ($size > $limit) {
$byte_warning = pht(
"Diff for '%s' with context is %s bytes in length. ".
"Generally, source changes should not be this large.",
$change->getCurrentPath(),
new PhutilNumber($size));
if (!$this->getArgument('less-context')) {
$byte_warning .= ' '.pht(
"If this file is a huge text file, try using the '%s' flag.",
'--less-context');
}
if ($repository_api instanceof ArcanistSubversionAPI) {
throw new ArcanistUsageException(
$byte_warning.' '.
pht(
"If the file is not a text file, mark it as binary with:".
"\n\n $ %s\n",
'svn propset svn:mime-type application/octet-stream <filename>'));
} else {
$skip_confirmation = !$this->isTTY() && $this->getArgument('nointeractive');
$confirm = $byte_warning.' '.pht(
"If the file is not a text file, you can mark it 'binary'. ".
"Mark this file as 'binary' and continue?");
if ($skip_confirmation || phutil_console_confirm($confirm)) {
$change->convertToBinaryChange($repository_api);
} else {
throw new ArcanistUsageException(
pht('Aborted generation of gigantic diff.'));
}
}
}
}
$try_encoding = nonempty($this->getArgument('encoding'), null);
$utf8_problems = array();
foreach ($changes as $change) {
foreach ($change->getHunks() as $hunk) {
$corpus = $hunk->getCorpus();
if (!phutil_is_utf8($corpus)) {
// If this corpus is heuristically binary, don't try to convert it.
// mb_check_encoding() and mb_convert_encoding() are both very very
// liberal about what they're willing to process.
$is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
if (!$is_binary) {
if (!$try_encoding) {
try {
$try_encoding = $this->getRepositoryEncoding();
} catch (ConduitClientException $e) {
if ($e->getErrorCode() == 'ERR-BAD-ARCANIST-PROJECT') {
echo phutil_console_wrap(
pht('Lookup of encoding in arcanist project failed: %s',
$e->getMessage())."\n");
} else {
throw $e;
}
}
}
if ($try_encoding) {
$corpus = phutil_utf8_convert($corpus, 'UTF-8', $try_encoding);
$name = $change->getCurrentPath();
if (phutil_is_utf8($corpus)) {
$this->writeStatusMessage(
pht(
"Converted a '%s' hunk from '%s' to UTF-8.\n",
$name,
$try_encoding));
$hunk->setCorpus($corpus);
continue;
}
}
}
$utf8_problems[] = $change;
break;
}
}
}
// If there are non-binary files which aren't valid UTF-8, warn the user
// and treat them as binary changes. See D327 for discussion of why Arcanist
// has this behavior.
if ($utf8_problems) {
$utf8_warning =
sprintf(
"%s\n\n%s\n\n %s\n",
pht(
'This diff includes %s file(s) which are not valid UTF-8 (they '.
'contain invalid byte sequences). You can either stop this '.
'workflow and fix these files, or continue. If you continue, '.
'these files will be marked as binary.',
phutil_count($utf8_problems)),
pht(
"You can learn more about how Phabricator handles character ".
"encodings (and how to configure encoding settings and detect and ".
"correct encoding problems) by reading 'User Guide: UTF-8 and ".
"Character Encoding' in the Phabricator documentation."),
pht(
'%s AFFECTED FILE(S)',
phutil_count($utf8_problems)));
$confirm = pht(
'Do you want to mark these %s file(s) as binary and continue?',
phutil_count($utf8_problems));
echo phutil_console_format(
"**%s**\n",
pht('Invalid Content Encoding (Non-UTF8)'));
echo phutil_console_wrap($utf8_warning);
$file_list = mpull($utf8_problems, 'getCurrentPath');
$file_list = ' '.implode("\n ", $file_list);
echo $file_list;
if (
$this->getArgument('automatically-convert-non-utf')
|| phutil_console_confirm($confirm, $default_no = false)
) {
foreach ($utf8_problems as $change) {
$change->convertToBinaryChange($repository_api);
}
} else {
throw new ArcanistUsageException(pht('Aborted workflow to fix UTF-8.'));
}
}
$this->uploadFilesForChanges($changes);
return $changes;
}