in datasets.py [0:0]
def load_data_single_split(self, split: str, seed: int = None) -> List[InputExample]:
"""
Load data for a single split (train, dev, or test).
"""
file_path = os.path.join(self.data_dir(), f'{split}.json')
i = 0
examples = []
with open(file_path, 'r') as f:
for line in f:
obj = json.loads(line)
text = obj['raw_text']
text_split = text.split()
char_to_word_idx = self.get_word_idx(text)
for event in obj['events']:
# trigger
start_char, end_char = event['trigger']['start'], event['trigger']['end']
start_word, end_word = char_to_word_idx[start_char], char_to_word_idx[end_char - 1] + 1
predicate = Entity(
id=None, type=EntityType(short='V', natural='predicate'),
start=start_word, end=end_word
)
arguments = []
for arg in event['arguments']:
assert len(arg['values']) == 1
values = arg['values'][0]
arg_start_char, arg_end_char = values['start'], values['end']
arg_start_word, arg_end_word = char_to_word_idx[arg_start_char], \
char_to_word_idx[arg_end_char - 1] + 1
arg_name = arg['name']
argument = Entity(
id=None,
type=EntityType(short=arg_name, natural=self.natural_entity_types[arg_name]
if arg_name in self.natural_entity_types else arg_name),
start=arg_start_word,
end=arg_end_word
)
arguments.append(argument)
example = InputExample(
id=f'{split}-{i}',
tokens=text_split,
entities=arguments,
relations=[],
sentence_level_entities=[predicate]
)
examples.append(example)
i += 1
self.entity_types = {
entity.type.natural: entity.type
for example in examples for entity in example.entities
}
return examples