in utils/convert_json_to_jsonlines.py [0:0]
def get_token_offset(mention, sentences):
context_sentence = sentences[mention["turn_id"]-1]
mention_char_start, mention_char_end = mention["span"]["start"], mention["span"]["end"]
token_offset = sum([len(sent) for sent in sentences[:mention["turn_id"]-1]])
char_offset = 0
mention_token_start, mention_token_end = -1, -1
for token_idx, token in enumerate(context_sentence):
char_offset += len(token) + 1
if char_offset > mention_char_start and mention_token_start == -1:
mention_token_start = token_idx
if char_offset > mention_char_end and mention_token_end == -1:
mention_token_end = token_idx
break
return token_offset+mention_token_start, token_offset+mention_token_end