def process_quotation()

in data_parsing.py [0:0]


def process_quotation(line):
	quote_flags = ('passage=', 'text=')
	line = re.sub(r'#*?\*:?', '', line)

	if 'seemorecites' in line.lower():
		return -1, -1

	if '|| QUOTE=' in line:
		q = line.split('|| QUOTE=')
		q_tags = q[0]
		if len(q) > 2: q = '/ '.join(q[1:])
		else: q = q[1]

		if re.search(r'{{.*?}}', q):
			q = re.sub(r'{{|}}', '', q)
			q = q.split('|')[-1]
			q = re.sub(r'passage=|text=', '', q)

	elif re.search(r'<ref>.*?</ref>', line):
		q_tags = re.search(r'<ref>(.*?)</ref>', line).group(1)
		q = re.sub(r'<ref>.*?</ref>', '', line)

	elif re.search(r'{{.*?}}', line):
		q_tags = re.sub(r'{{|}}', '', line)
		q_tags = [t.strip() for t in q_tags.strip().split('|')]
	
		q = [t for t in q_tags if t.lower().startswith(quote_flags)]
		if len(q) > 0:
			q = re.sub(r'passage=|text=', '', q[0])
			q_tags = [t for t in q_tags if not t.lower().startswith(quote_flags)]

		else:
			q = [t for t in q_tags if not re.match(r'^.*?=', t)]
			if len(q) > 0: #hopefully this is okay
				q = q[-1]
				q_tags = [t for t in q_tags if t != q]
			else:
				return -1, -1

	#assuming the quote is here in quotes
	else: 
		#cleaning double quotes to parse examples
		line = re.sub(r'(?<!\')\'{2}(?!\')', '"', line)
		if re.search(r'".*?"', line):
			q = re.search(r'".*?"', line)
			q = q.group(0)
			q = re.sub(r'"', '', q)
			q_tags = re.sub(r'".*?"', '', line)
		else:
			q = line
			q_tags = []

	if len(q) > CHAR_THRESHOLD and ' ' in q:
		return q, q_tags
	else:
		return -1, -1