src/unparsed-blocks/HTMLBlock.php (90 lines of code) (raw):
<?hh // strict
/*
* Copyright (c) 2004-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
namespace Facebook\Markdown\UnparsedBlocks;
use type Facebook\Markdown\Blocks\HTMLBlock as ASTNode;
use namespace Facebook\Markdown\Inlines;
use namespace HH\Lib\{C, Dict, Str, Vec};
<<__ConsistentConstruct>>
class HTMLBlock extends FencedBlock {
const string TAG_NAME = '[a-z][a-z0-9-]*';
const string ATTRIBUTE_NAME = '[a-z_:][a-z0-9_.:-]*';
const string UNQUOTED_ATTRIBUTE_VALUE = '[^"\'=<>` ]+';
const string SINGLE_QUOTED_ATTRIBUTE_VALUE = "'[^']*'";
const string DOUBLE_QUOTED_ATTRIBUTE_VALUE = '"[^"]*"';
const string ATTRIBUTE_VALUE =
'('.
self::UNQUOTED_ATTRIBUTE_VALUE.'|'.
self::SINGLE_QUOTED_ATTRIBUTE_VALUE.'|'.
self::DOUBLE_QUOTED_ATTRIBUTE_VALUE.
')';
const string ATTRIBUTE_VALUE_SPECIFICATION = "\\s*=\\s*".self::ATTRIBUTE_VALUE;
const string ATTRIBUTE =
"\\s+".self::ATTRIBUTE_NAME.'('.self::ATTRIBUTE_VALUE_SPECIFICATION.')?';
const dict<string, string> PARAGRAPH_INTERRUPTING_PATTERNS = dict[
// GFM spec states that closing tag doesn't need to match opening tag
'/^(<script|<pre|<style)( |>|$)/i' => ',</script>|</pre>|</style>,i',
'/^<!--/' => '/-->/',
'/^<\\?/' => '/\\?>/',
'/^<![A-Z]/' => '/>/',
'/^<!\\[CDATA\\[/' => '/\\]\\]>/',
// This very large allowlist is in the spec
'/^<\\/?(address|article|aside|base|basefont|blockquote|body|caption|'.
'center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|'.
'figcaption|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|'.
'html|iframe|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|'.
'optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|'.
'thead|title|tr|track|ul)([ \\t]+|$|>|\\/>)/i' => '/^$/',
];
const dict<string, string> NON_INTERRUPTING_PATTERNS = dict[
// Open tag
'/^<'.self::TAG_NAME.'('.self::ATTRIBUTE.')* *\\/?> *$/i' => '/^$/',
// Closing tag
'/^<\\/'.self::TAG_NAME.' *> *$/' => '/^$/',
];
public function __construct(
private string $content,
) {
}
<<__Override>>
protected static function createFromLines(
vec<string> $lines,
int $_indentation_of_first,
bool $_eof,
): this {
if (C\last($lines) === '') {
$lines = Vec\take($lines, C\count($lines) - 1);
}
return new static(Str\join($lines, "\n"));
}
<<__Override>>
public static function consume(
Context $context,
Lines $lines,
): ?(this, Lines) {
if (!$context->isHTMLEnabled()) {
return null;
}
return parent::consume($context, $lines);
}
<<__Override>>
public static function getEndPatternForFirstLine(
Context $context,
int $column,
string $line,
): ?string {
if ($context->isInParagraphContinuation()) {
$patterns = self::PARAGRAPH_INTERRUPTING_PATTERNS;
} else {
$patterns = Dict\merge(
self::PARAGRAPH_INTERRUPTING_PATTERNS,
self::NON_INTERRUPTING_PATTERNS,
);
}
list($_, $line, $_) = Lines::stripUpToNLeadingWhitespace($line, 3, $column);
foreach ($patterns as $start => $end) {
if (\preg_match($start, $line) === 1) {
return $end;
}
}
return null;
}
<<__Override>>
public function withParsedInlines(Inlines\Context $_): ASTNode {
return new ASTNode($this->content);
}
}