in src/parser/html/PhutilHTMLParser.php [237:432]
private function parseAttributes($attributes) {
$state = 'key';
$whitespace = array(
' ' => true,
"\n" => true,
"\t" => true,
"\r" => true,
);
$map = array();
$len = strlen($attributes);
$key_pos = null;
for ($ii = 0; $ii < $len; $ii++) {
$c = $attributes[$ii];
$is_space = isset($whitespace[$c]);
switch ($state) {
case 'key':
// We're looking for the start of an attribute name.
// Skip over any whitespace.
if ($is_space) {
break;
}
// If we see "<tag =...", that isn't valid. Treat this tag as
// content.
if ($c === '=') {
return null;
}
// If we see a quotation mark with no attribute name, that isn't
// valid. Treat this tag as content.
if ($c === '"') {
return null;
}
// Any other character marks the beginning of an attribute name.
// Switch the parser state to "name" to parse the name.
$name_pos = $ii;
$state = 'name';
break;
case 'name':
// We're looking for the end of an attribute name.
// Finding a "=" or a space character ends the attribute name.
// Save it, then figure out what to do with the parser state.
if ($c === '=' || $is_space) {
$name_value = substr($attributes, $name_pos, $ii - $name_pos);
$name_value = phutil_utf8_strtolower($name_value);
// If this attribute already exists, the tag is invalid. This means
// the input is something like "<tag a=1 a=2>".
if (isset($map[$name_value])) {
return null;
}
}
// If we find an "=", that's the end of the name. Next, we're going
// to parse a value.
if ($c === '=') {
$state = 'value';
break;
}
// If we find whitespace, that's the end of the name. We're going
// to look for an "=".
if ($is_space) {
$state = 'equals';
break;
}
break;
case 'equals':
// We've parsed the name of an attribute and are looking for an
// "=" character.
// Skip over any whitespace.
if ($is_space) {
break;
}
// This is the "=" we're looking for, so we're good to go.
if ($c === '=') {
$state = 'value';
break;
}
// If this is anything else, this is an attribute name with no
// value. Treat it as "true" and move on. This corresponds to an
// input like "<input disabled>".
$map[$name_value] = true;
$name_pos = $ii;
$state = 'name';
break;
case 'value':
// We've parsed an "=" and are looking for the start of a value.
// Skip over any whitespace.
if ($is_space) {
break;
}
// Don't accept "<tag a==" to mean that key "a" has a value of
// "=", since this is silly. To specify a value beginning with "=",
// you have to quote it.
if ($c === '=') {
return null;
}
// Anything else is a value.
$value_pos = $ii;
// This is a quotation mark, so parse a quoted value.
if ($c === '"') {
$value_pos = $value_pos + 1;
$state = 'quoted';
} else {
$state = 'unquoted';
}
break;
case 'quoted':
// We've started parsing a quoted value, so look for the closing
// quote.
// We found the closing quote, so pull out the actual value.
if ($c === '"') {
$attr_value = substr($attributes, $value_pos, $ii - $value_pos);
$map[$name_value] = $attr_value;
$state = 'key';
break;
}
// Anything else is more text in the quoted value.
break;
case 'unquoted':
// We've started parsing an unquoted value, so look for terminating
// whitespace.
// We've found some whitespace, so pull out the actual value.
if ($is_space) {
$attr_value = substr($attributes, $value_pos, $ii - $value_pos);
$map[$name_value] = $attr_value;
$state = 'key';
break;
}
// Anything else is more text in the unquoted value.
break;
}
}
switch ($state) {
case 'key':
// We were looking for the start of an attribute name, so there's
// nothing to clean up.
break;
case 'name':
// We were looking for the end of an attribute name. Treat whatever
// we found as a name.
$name_value = substr($attributes, $name_pos, $len - $name_pos);
if (isset($map[$name_value])) {
return null;
}
$map[$name_value] = true;
break;
case 'equals':
case 'value':
// We found an attribute name followed by whitespace or an "=". Treat
// whatever we found as a valid attribute name with no value.
if (isset($map[$name_value])) {
return null;
}
$map[$name_value] = true;
break;
case 'quoted':
case 'unquoted':
// We were parsing a value but ran out of characters before we found
// the delimiter or closing quote. Treat whatever we found as a quoted
// value.
$attr_value = substr($attributes, $value_pos, $len - $name_pos);
$map[$name_value] = $attr_value;
break;
}
return $map;
}