in Documents/Fixed Documents/DocumentSerialization/HtmlXmlConverter/htmltoxamlconverter.cs [167:288]
private static XmlNode AddBlock(XmlElement xamlParentElement, XmlNode htmlNode, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext)
{
if (htmlNode is XmlComment)
{
DefineInlineFragmentParent((XmlComment)htmlNode, /*xamlParentElement:*/null);
}
else if (htmlNode is XmlText)
{
htmlNode = AddImplicitParagraph(xamlParentElement, htmlNode, inheritedProperties, stylesheet, sourceContext);
}
else if (htmlNode is XmlElement)
{
// Identify element name
XmlElement htmlElement = (XmlElement)htmlNode;
string htmlElementName = htmlElement.LocalName; // Keep the name case-sensitive to check xml names
string htmlElementNamespace = htmlElement.NamespaceURI;
if (htmlElementNamespace != HtmlParser.XhtmlNamespace)
{
// Non-html element. skip it
// Isn't it too agressive? What if this is just an error in html tag name?
// TODO: Consider skipping just a wparrer in recursing into the element tree,
// which may produce some garbage though coming from xml fragments.
return htmlElement;
}
// Put source element to the stack
sourceContext.Add(htmlElement);
// Convert the name to lowercase, because html elements are case-insensitive
htmlElementName = htmlElementName.ToLower();
// Switch to an appropriate kind of processing depending on html element name
switch (htmlElementName)
{
// Sections:
case "html":
case "body":
case "div":
case "form": // not a block according to xhtml spec
case "pre": // Renders text in a fixed-width font
case "blockquote":
case "caption":
case "center":
case "cite":
AddSection(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
// Paragraphs:
case "p":
case "h1":
case "h2":
case "h3":
case "h4":
case "h5":
case "h6":
case "textarea":
case "dd": // ???
case "dl": // ???
case "dt": // ???
case "tt": // ???
AddParagraph(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "ol":
case "ul":
case "dir": // TODO: treat as UL element
case "menu": // TODO: treat as UL element
// List element conversion
AddList(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "li":
// LI outside of OL/UL
// Collect all sibling LIs, wrap them into a List and then proceed with the element following the last of LIs
htmlNode = AddOrphanListItems(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "img":
// TODO: Add image processing
AddImage(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "table":
// hand off to table parsing function which will perform special table syntax checks
AddTable(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
case "tbody":
case "tfoot":
case "thead":
case "tr":
case "td":
case "th":
// Table stuff without table wrapper
// TODO: add special-case processing here for elements that should be within tables when the
// parent element is NOT a table. If the parent element is a table they can be processed normally.
// we need to compare against the parent element here, we can't just break on a switch
goto default; // Thus we will skip this element as unknown, but still recurse into it.
case "style": // We already pre-processed all style elements. Ignore it now
case "meta":
case "head":
case "title":
case "script":
// Ignore these elements
break;
default:
// Wrap a sequence of inlines into an implicit paragraph
htmlNode = AddImplicitParagraph(xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext);
break;
}
// Remove the element from the stack
Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlElement);
sourceContext.RemoveAt(sourceContext.Count - 1);
}
// Return last processed node
return htmlNode;
}