in model/mm_dst/gpt2_dst/utils/convert.py [0:0]
def parse_flattened_result(to_parse):
"""
Parse out the belief state from the raw text.
Return an empty list if the belief state can't be parsed
Input:
- A single <str> of flattened result
e.g. 'User: Show me something else => Belief State : DA:REQUEST ...'
Output:
- Parsed result in a JSON format, where the format is:
[
{
'act': <str> # e.g. 'DA:REQUEST',
'slots': [
<str> slot_name,
<str> slot_value
]
}, ... # End of a frame
] # End of a dialog
"""
dialog_act_regex = re.compile(
r'([\w:?.?]*) *\[(.*)\] *\(([^\]]*)\) *\<([^\]]*)\>'
)
slot_regex = re.compile(r"([A-Za-z0-9_.-:]*) *= (\[(.*)\]|[^,]*)")
request_regex = re.compile(r"([A-Za-z0-9_.-:]+)")
object_regex = re.compile(r"([A-Za-z0-9]+)")
belief = []
# Parse
splits = to_parse.strip().split(START_BELIEF_STATE)
if len(splits) == 2:
to_parse = splits[1].strip()
splits = to_parse.split(END_OF_BELIEF)
if len(splits) == 2:
# to_parse: 'DIALOG_ACT_1 : [ SLOT_NAME = SLOT_VALUE, ... ] ...'
to_parse = splits[0].strip()
for dialog_act in dialog_act_regex.finditer(to_parse):
d = {
"act": dialog_act.group(1),
"slots": [],
"request_slots": [],
"objects": [],
}
for slot in slot_regex.finditer(dialog_act.group(2)):
d["slots"].append([slot.group(1).strip(), slot.group(2).strip()])
for request_slot in request_regex.finditer(dialog_act.group(3)):
d["request_slots"].append(request_slot.group(1).strip())
for object_id in object_regex.finditer(dialog_act.group(4)):
str_object_id = object_id.group(1).strip()
try:
# Object ID should always be <int>.
int_object_id = int(str_object_id)
d["objects"].append(int_object_id)
except:
pass
if d != {}:
belief.append(d)
return belief