in readability/readability.py [0:0]
def _parse(self, input):
if isinstance(input, (_ElementTree, HtmlElement)):
doc = input
self.encoding = 'utf-8'
else:
doc, self.encoding = build_doc(input)
doc = html_cleaner.clean_html(doc)
base_href = self.url
if base_href:
# trying to guard against bad links like <a href="http://[http://...">
try:
# such support is added in lxml 3.3.0
doc.make_links_absolute(
base_href,
resolve_base_href=True,
handle_failures=self.handle_failures,
)
except TypeError: # make_links_absolute() got an unexpected keyword argument 'handle_failures'
# then we have lxml < 3.3.0
# please upgrade to lxml >= 3.3.0 if you're failing here!
doc.make_links_absolute(
base_href,
resolve_base_href=True,
handle_failures=self.handle_failures,
)
else:
doc.resolve_base_href(handle_failures=self.handle_failures)
return doc