in src/alpaca_eval/completion_parsers.py [0:0]
def regex_parser(completion: str, outputs_to_match: dict[Any, Any]) -> list[Any]:
r"""Parse a single batch of completions, by returning a sequence of keys in the order in which outputs_to_match
was matched.
Parameters
----------
completion : str
Completion to parse.
outputs_to_match : dict[str, Any]
Dictionary of compiled regex to match. Keys are the keys to return in the order in which they are matched.
The values can be either a compiled regex or a string. If a string, it will be compiled to a regex and that will
be modified inplace.
Examples
--------
>>> completion = ('\n(b)\n\n### Best output for example 8:\n(a)\n\n### Best output for example 9:\n(b)\n\n### Best'\
... ' output for example 10:\n(a)\n\n### Best output for example 11:\n(a)')
>>> regex_parser(completion, {1: r"\n\(a\)", 2: r"\n\(b\)"})
[2, 1, 2, 1, 1]
>>> regex_parser(' (a)', {1: r" \(a\)", 2: r" \(b\)"})
[1]
>>> completion = ('### Preferred output in JSON format for example 4:\r\n{{\r\n"Concise explanation": "Both'\
... ' outputs are incorrect, but Output (a) is less confusing and more concise.",\r\n"Output (a) is better than'\
... ' Output (b)": true\r\n}}\r\n\r\n### Preferred output in JSON format for example 5:\r\n{{\r\n"Concise'\
... ' explanation": "Both outputs are incomplete, but Output (b) seems to start with a more relevant source."'\
... ',\r\n"Output (a) is better than Output (b)": false\r\n}}\r\n\r\n### Preferred output in JSON format for'\
... ' example 6:\r\n{{\r\n"Concise explanation": "Both outputs are incorrect, but Output (a) is less confusing and'\
... ' more concise.",\r\n"Output (a) is better than Output (b)": true\r\n}}\r\n\r\n### Preferred output in JSON' \
... ' format for example 7:\r\n{{\r\n"Concise explanation": "Both outputs are incomplete, but Output (b) seems to'\
... ' start with a more relevant source.", \r\n"Output (a) is better than Output (b)": false\r\n}}')
>>> regex_parser(completion, {1: ' true', 2: ' false'})
[1, 2, 1, 2]
"""
if completion == "":
raise ValueError("The completion is empty.")
for k, v in outputs_to_match.items():
if not isinstance(v, re.Pattern):
# inplace modification, which is bad practice but useful to speedup
outputs_to_match[k] = re.compile(v)
completion = copy.deepcopy(completion)
responses = []
while True:
match, key = utils._find_first_match(completion, outputs_to_match)
if not match:
break
responses.append(key)
# avoid matching the same output twice
completion = completion[match.end() :]
return responses