in bleach/linkifier.py [0:0]
def handle_links(self, src_iter):
"""Handle links in character tokens"""
in_a = False # happens, if parse_email=True and if a mail was found
for token in src_iter:
if in_a:
if token["type"] == "EndTag" and token["name"] == "a":
in_a = False
yield token
continue
elif token["type"] == "StartTag" and token["name"] == "a":
in_a = True
yield token
continue
if token["type"] == "Characters":
text = token["data"]
new_tokens = []
end = 0
for match in self.url_re.finditer(text):
if match.start() > end:
new_tokens.append(
{"type": "Characters", "data": text[end : match.start()]}
)
url = match.group(0)
prefix = suffix = ""
# Sometimes we pick up too much in the url match, so look for
# bits we should drop and remove them from the match
url, prefix, suffix = self.strip_non_url_bits(url)
# If there's no protocol, add one
if PROTO_RE.search(url):
href = url
else:
href = "http://%s" % url
attrs = {(None, "href"): href, "_text": url}
attrs = self.apply_callbacks(attrs, True)
if attrs is None:
# Just add the text
new_tokens.append(
{"type": "Characters", "data": prefix + url + suffix}
)
else:
# Add the "a" tag!
if prefix:
new_tokens.append({"type": "Characters", "data": prefix})
_text = attrs.pop("_text", "")
new_tokens.extend(
[
{"type": "StartTag", "name": "a", "data": attrs},
{"type": "Characters", "data": str(_text)},
{"type": "EndTag", "name": "a"},
]
)
if suffix:
new_tokens.append({"type": "Characters", "data": suffix})
end = match.end()
if new_tokens:
# Yield the adjusted set of tokens and then continue
# through the loop
if end < len(text):
new_tokens.append({"type": "Characters", "data": text[end:]})
yield from new_tokens
continue
yield token