in python/markdown2.py [0:0]
def _do_links(self, text):
"""Turn Markdown link shortcuts into XHTML <a> and <img> tags.
This is a combination of Markdown.pl's _DoAnchors() and
_DoImages(). They are done together because that simplified the
approach. It was necessary to use a different approach than
Markdown.pl because of the lack of atomic matching support in
Python's regex engine used in $g_nested_brackets.
"""
MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24
# `anchor_allowed_pos` is used to support img links inside
# anchors, but not anchors inside anchors. An anchor's start
# pos must be `>= anchor_allowed_pos`.
anchor_allowed_pos = 0
curr_pos = 0
while True: # Handle the next link.
# The next '[' is the start of:
# - an inline anchor: [text](url "title")
# - a reference anchor: [text][id]
# - an inline img: 
# - a reference img: ![text][id]
# - a footnote ref: [^id]
# (Only if 'footnotes' extra enabled)
# - a footnote defn: [^id]: ...
# (Only if 'footnotes' extra enabled) These have already
# been stripped in _strip_footnote_definitions() so no
# need to watch for them.
# - a link definition: [id]: url "title"
# These have already been stripped in
# _strip_link_definitions() so no need to watch for them.
# - not markup: [...anything else...
try:
start_idx = text.index('[', curr_pos)
except ValueError:
break
text_length = len(text)
# Find the matching closing ']'.
# Markdown.pl allows *matching* brackets in link text so we
# will here too. Markdown.pl *doesn't* currently allow
# matching brackets in img alt text -- we'll differ in that
# regard.
bracket_depth = 0
for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
text_length)):
ch = text[p]
if ch == ']':
bracket_depth -= 1
if bracket_depth < 0:
break
elif ch == '[':
bracket_depth += 1
else:
# Closing bracket not found within sentinel length.
# This isn't markup.
curr_pos = start_idx + 1
continue
link_text = text[start_idx+1:p]
# Possibly a footnote ref?
if "footnotes" in self.extras and link_text.startswith("^"):
normed_id = re.sub(r'\W', '-', link_text[1:])
if normed_id in self.footnotes:
self.footnote_ids.append(normed_id)
result = '<sup class="footnote-ref" id="fnref-%s">' \
'<a href="#fn-%s">%s</a></sup>' \
% (normed_id, normed_id, len(self.footnote_ids))
text = text[:start_idx] + result + text[p+1:]
else:
# This id isn't defined, leave the markup alone.
curr_pos = p+1
continue
# Now determine what this is by the remainder.
p += 1
if p == text_length:
return text
# Inline anchor or img?
if text[p] == '(': # attempt at perf improvement
url, title, url_end_idx = self._extract_url_and_title(text, p)
if url is not None:
# Handle an inline anchor or img.
is_img = start_idx > 0 and text[start_idx-1] == "!"
if is_img:
start_idx -= 1
# We've got to encode these to avoid conflicting
# with italics/bold.
url = url.replace('*', self._escape_table['*']) \
.replace('_', self._escape_table['_'])
if title:
title_str = ' title="%s"' % (
_xml_escape_attr(title)
.replace('*', self._escape_table['*'])
.replace('_', self._escape_table['_']))
else:
title_str = ''
if is_img:
img_class_str = self._html_class_str_from_tag("img")
result = '<img src="%s" alt="%s"%s%s%s' \
% (url.replace('"', '"'),
_xml_escape_attr(link_text),
title_str, img_class_str, self.empty_element_suffix)
if "smarty-pants" in self.extras:
result = result.replace('"', self._escape_table['"'])
curr_pos = start_idx + len(result)
text = text[:start_idx] + result + text[url_end_idx:]
elif start_idx >= anchor_allowed_pos:
result_head = '<a href="%s"%s>' % (url, title_str)
result = '%s%s</a>' % (result_head, link_text)
if "smarty-pants" in self.extras:
result = result.replace('"', self._escape_table['"'])
# <img> allowed from curr_pos on, <a> from
# anchor_allowed_pos on.
curr_pos = start_idx + len(result_head)
anchor_allowed_pos = start_idx + len(result)
text = text[:start_idx] + result + text[url_end_idx:]
else:
# Anchor not allowed here.
curr_pos = start_idx + 1
continue
# Reference anchor or img?
else:
match = self._tail_of_reference_link_re.match(text, p)
if match:
# Handle a reference-style anchor or img.
is_img = start_idx > 0 and text[start_idx-1] == "!"
if is_img:
start_idx -= 1
link_id = match.group("id").lower()
if not link_id:
link_id = link_text.lower() # for links like [this][]
if link_id in self.urls:
url = self.urls[link_id]
# We've got to encode these to avoid conflicting
# with italics/bold.
url = url.replace('*', self._escape_table['*']) \
.replace('_', self._escape_table['_'])
title = self.titles.get(link_id)
if title:
before = title
title = _xml_escape_attr(title) \
.replace('*', self._escape_table['*']) \
.replace('_', self._escape_table['_'])
title_str = ' title="%s"' % title
else:
title_str = ''
if is_img:
img_class_str = self._html_class_str_from_tag("img")
result = '<img src="%s" alt="%s"%s%s%s' \
% (url.replace('"', '"'),
link_text.replace('"', '"'),
title_str, img_class_str, self.empty_element_suffix)
if "smarty-pants" in self.extras:
result = result.replace('"', self._escape_table['"'])
curr_pos = start_idx + len(result)
text = text[:start_idx] + result + text[match.end():]
elif start_idx >= anchor_allowed_pos:
result = '<a href="%s"%s>%s</a>' \
% (url, title_str, link_text)
result_head = '<a href="%s"%s>' % (url, title_str)
result = '%s%s</a>' % (result_head, link_text)
if "smarty-pants" in self.extras:
result = result.replace('"', self._escape_table['"'])
# <img> allowed from curr_pos on, <a> from
# anchor_allowed_pos on.
curr_pos = start_idx + len(result_head)
anchor_allowed_pos = start_idx + len(result)
text = text[:start_idx] + result + text[match.end():]
else:
# Anchor not allowed here.
curr_pos = start_idx + 1
else:
# This id isn't defined, leave the markup alone.
curr_pos = match.end()
continue
# Otherwise, it isn't markup.
curr_pos = start_idx + 1
return text