in dataset-construction/src/ndb_data/generation/map_kelm.py [0:0]
def resolve_first_ref(ref):
ref = ref.replace("$COMMA$", ",")
toks = ref.split()
parsed = []
next_id = 0
is_relation = False
iteration = 0
prev_end = 0
while next_id < len(toks) and iteration <= 2:
iteration += 1
text, resolved_id, startptr, nextptr = get_longest(
toks, next_id, restrict_relation=is_relation
)
if toks[next_id] == ",":
next_id += 1
continue
if text is not None and resolved_id is not None:
next_id = nextptr + 1
is_relation = not is_relation
parsed.append((text, resolved_id))
# Fix previous resolution
if prev_end != startptr:
recovered = try_recovery(" ".join(toks[:startptr]))
if recovered is not None:
parsed[-2] = recovered
else:
print(f"Failed to fix {ref}")
print(parsed)
del parsed[-2]
elif len(parsed) >= 3:
# Fix this resolution
aa = clean(" ".join(toks[startptr:])).split()
if len(parsed[-1][0].split()) < len(aa):
recovery = try_recovery(" ".join(toks[startptr:]))
if recovery is not None:
parsed[-1] = recovery
else:
print(f"Failed to fix2 {ref}")
print(parsed)
del parsed[-1]
prev_end = nextptr + 1
if len(parsed) >= 3:
break
else:
if len(parsed) > 1:
print(ref)
print("Early stop")
print(toks[next_id:])
print(parsed)
print()
break
return parsed