in src/parser/PhutilURI.php [30:142]
public function __construct($uri, $params = array()) {
if ($uri instanceof PhutilURI) {
$this->protocol = $uri->protocol;
$this->user = $uri->user;
$this->pass = $uri->pass;
$this->domain = $uri->domain;
$this->port = $uri->port;
$this->path = $uri->path;
$this->query = $uri->query;
$this->fragment = $uri->fragment;
$this->type = $uri->type;
$this->initializeQueryParams(phutil_string_cast($uri), $params);
return;
}
$uri = phutil_string_cast($uri);
$type = self::TYPE_URI;
// Reject ambiguous URIs outright. Different versions of different clients
// parse these in different ways. See T12526 for discussion.
if (preg_match('(^[^/:]*://[^/]*[#?].*:)', $uri)) {
throw new Exception(
pht(
'Rejecting ambiguous URI "%s". This URI is not formatted or '.
'encoded properly.',
$uri));
}
$matches = null;
if (preg_match('(^([^/:]*://[^/]*)(\\?.*)\z)', $uri, $matches)) {
// If the URI is something like `idea://open?file=/path/to/file`, the
// `parse_url()` function will parse `open?file=` as the host. This is
// not the expected result. Break the URI into two pieces, stick a slash
// in between them, parse that, then remove the path. See T6106.
$parts = parse_url($matches[1].'/'.$matches[2]);
unset($parts['path']);
} else if ($this->isGitURIPattern($uri)) {
// Handle Git/SCP URIs in the form "user@domain:relative/path".
$user = '(?:(?P<user>[^/@]+)@)?';
$host = '(?P<host>[^/:]+)';
$path = ':(?P<path>.*)';
$ok = preg_match('(^'.$user.$host.$path.'\z)', $uri, $matches);
if (!$ok) {
throw new Exception(
pht(
'Failed to parse URI "%s" as a Git URI.',
$uri));
}
$parts = $matches;
$parts['scheme'] = 'ssh';
$type = self::TYPE_GIT;
} else {
$parts = parse_url($uri);
}
// The parse_url() call will accept URIs with leading whitespace, but many
// other tools (like git) will not. See T4913 for a specific example. If
// the input string has leading whitespace, fail the parse.
if ($parts) {
if (ltrim($uri) != $uri) {
$parts = false;
}
}
// NOTE: `parse_url()` is very liberal about host names; fail the parse if
// the host looks like garbage. In particular, we do not allow hosts which
// begin with "." or "-". See T12961 for a specific attack which relied on
// hosts beginning with "-".
if ($parts) {
$host = idx($parts, 'host', '');
if (strlen($host)) {
if (!preg_match('/^[a-zA-Z0-9]+[a-zA-Z0-9\\.\\-]*\z/', $host)) {
$parts = false;
}
}
}
if (!$parts) {
$parts = array();
}
// stringyness is to preserve API compatibility and
// allow the tests to continue passing
$this->protocol = idx($parts, 'scheme', '');
$this->user = rawurldecode(idx($parts, 'user', ''));
$this->pass = rawurldecode(idx($parts, 'pass', ''));
$this->domain = idx($parts, 'host', '');
$this->port = (string)idx($parts, 'port', '');
$this->path = idx($parts, 'path', '');
$query = idx($parts, 'query');
if ($query) {
$pairs = id(new PhutilQueryStringParser())
->parseQueryStringToPairList($query);
foreach ($pairs as $pair) {
list($key, $value) = $pair;
$this->appendQueryParam($key, $value);
}
}
$this->fragment = idx($parts, 'fragment', '');
$this->type = $type;
$this->initializeQueryParams($uri, $params);
}