in 2LCS/HttpClientHelper.cs [732:760]
private static string RemoveUnwantedTags(string data)
{
if (string.IsNullOrEmpty(data)) return string.Empty;
var document = new HtmlDocument();
document.LoadHtml(data);
var acceptableTags = new string[] { };// { "strong", "em", "u"};
var nodes = new Queue<HtmlNode>(document.DocumentNode.SelectNodes("./*|./text()"));
while (nodes.Count > 0)
{
var node = nodes.Dequeue();
var parentNode = node.ParentNode;
if (acceptableTags.Contains(node.Name) || node.Name == "#text") continue;
var childNodes = node.SelectNodes("./*|./text()");
if (childNodes != null)
{
foreach (var child in childNodes)
{
nodes.Enqueue(child);
parentNode.InsertBefore(child, node);
}
}
parentNode.RemoveChild(node);
}
var result = document.DocumentNode.InnerHtml;
result = Regex.Replace(result, @" |\r\n|\t|\n|\r", " ");
result = Regex.Replace(result, @""", "\"");
return result;
}