in source/com.microsoft.tfs.util/src/com/microsoft/tfs/util/htmlfilter/HTMLFilter.java [132:348]
private static void parse(final String html, final IHTMLFilterWriter writer) {
// iterate characters in the string
final int n = html.length(); // end index
int i = 0; // current index
while (i < n) {
// scan text until the tag
int i0 = i;
while (i < n && html.charAt(i) != '<') {
i++;
}
// copy text to output
if (i > i0) {
writer.writeText(html, i0, i - i0);
continue; // next item
}
Check.isTrue(i < n && html.charAt(i) == '<', "i < n && html.charAt(i) == '<'"); //$NON-NLS-1$
// scan the tag
i0 = i++; // mark and skip '>'
// check for '<!' section
if (i < n && html.charAt(i) == '!') {
i++; // skip '!'
// check for HTML comment syntax: '<!-- ... -->'
if (i + 1 < n && html.charAt(i) == '-' && html.charAt(i + 1) == '-') {
i += 2; // skip '--'
i = skipUntil(html, i, '-', '-', '>');
continue; // next syntax element
}
// check for CDATA section: '<![CDATA[ ... ]]>'
if (i + 6 < n
&& html.charAt(i) == '['
&& html.charAt(i + 1) == 'C'
&& html.charAt(i + 2) == 'D'
&& html.charAt(i + 3) == 'A'
&& html.charAt(i + 4) == 'T'
&& html.charAt(i + 5) == 'A'
&& html.charAt(i + 6) == '[') {
i += 7; // skip '[CDATA['
i = skipUntil(html, i, ']', ']', '>');
continue; // next syntax element
}
// skip other DTD sections: '<! ... >'
i = skipUntil(html, i, '>');
continue; // next syntax element
}
// check for '<?' section
if (i < n && html.charAt(i) == '?') {
i++; // skip '?'
i = skipUntil(html, i, '>');
continue; // next syntax element
}
// check for '/' character
boolean endTag = false;
if (i < n && html.charAt(i) == '/') {
endTag = true;
i++; // skip '/'
}
i = skipWhiteSpaces(html, i);
// scan tag name
final ScanResult scanResult = scanName(html, i);
final String tag = scanResult.tag;
i = scanResult.offset;
// special handling of script tag: script until </script>
if (AllowedHTMLTags.isSpecialTag(tag)) {
// skip until end of tag
i = skipUntil(html, i, '>');
while (i < n) {
// wait for tag start
i = skipUntil(html, i, '<');
// check for comment
if (i + 2 < n && html.charAt(i) == '!' && html.charAt(i + 1) == '-' && html.charAt(i + 2) == '-') {
i = skipUntil(html, i, '-', '-', '>');
}
// check for end tag
else if (i < n && html.charAt(i) == '/') {
i++; // skip '/'
i = skipWhiteSpaces(html, i);
String etag;
final ScanResult innerScanResult = scanName(html, i);
i = innerScanResult.offset;
etag = innerScanResult.tag;
if (AllowedHTMLTags.areTagsEqual(tag, etag)) {
i = skipUntil(html, i, '>');
break;
}
}
// something, skip it
else if (i < n) {
i++;
}
}
continue; // next syntax element
}
// skip entire tag
if (!AllowedHTMLTags.isAllowedTag(tag)) {
i = skipUntil(html, i, '>');
continue;
}
// allowed tag: write down proceeded part
writer.writeTag(html, i0, i - i0, tag, endTag);
// loop attributes
while (i < n) {
i0 = i; // new starting point
i = skipWhiteSpaces(html, i);
// end of tag?
if (i < n && html.charAt(i) == '/') {
i++; // skip '/'
}
// end tag?
if (i < n && html.charAt(i) == '>') {
i++; // skip '>'
writer.writeEndOfTag(html, i0, i - i0, tag);
break;
}
// attribute?
if (i < n && Character.isLetterOrDigit(html.charAt(i))) {
// scan tag name
String attr;
final ScanResult innerScanResult = scanName(html, i);
i = innerScanResult.offset;
attr = innerScanResult.tag;
i = skipWhiteSpaces(html, i);
int i1 = 0; // start of attribute value
int i2 = 0; // end of attribute value
// check value part
if (i < n && html.charAt(i) == '=') {
i++; // skip '='
i = skipWhiteSpaces(html, i);
i1 = i; // attribute value starts here
if (i < n && (html.charAt(i) == '\'' || html.charAt(i) == '"')) {
final char term = html.charAt(i++); // save and skip
// terminator
i1 = i; // the attribute starts just here
// skip until end terminator or end tag
while (i < n && html.charAt(i) != '>' && html.charAt(i) != term) {
i++;
}
i2 = i; // attrubute ends here
// skip terminator
if (i < n && html.charAt(i) == term) {
i++;
}
} else {
// skip while not whitespace or end
while (i < n
&& html.charAt(i) != '>'
&& !Character.isWhitespace(html.charAt(i))
&& !Character.isISOControl(html.charAt(i))) {
i++;
}
i2 = i; // attribute ends here
}
// ignore "javascript:..." type of attributes
if (i >= i1 + 11 && AllowedHTMLTags.areTagsEqual(html.substring(i1, i1 + 11), "javascript:")) //$NON-NLS-1$
{
attr = ""; // prohibited //$NON-NLS-1$
}
}
// and of attribute: check it
if (AllowedHTMLTags.isAllowedAttribute(tag, attr)) {
writer.writeAttribute(html, i0, i - i0, tag, attr, i1, i2);
}
continue; // next attribute
}
// unknown character - skip it
if (i < n && html.charAt(i) != '>') {
i++;
}
}
}
// end of loop: i==n
Check.isTrue(i == n, "i == n"); //$NON-NLS-1$
}