in datasets.py [0:0]
def load_data_single_split(self, split: str, seed: int = None) -> List[InputExample]:
"""
Load data for a single split (train, dev, or test).
"""
examples = []
name = self.name if self.data_name is None else self.data_name
file_path = os.path.join(self.data_dir(), f'{name}_{split}.json')
with open(file_path, 'r') as f:
data = json.load(f)
logging.info(f"Loaded {len(data)} sentences for split {split} of {self.name}")
for i, x in enumerate(data):
num_triggers = x['triggers']
for trigger_id in range(min(1, len(num_triggers))):
entities = [
Entity(id=j, type=self.entity_types[y['type']], start=y['start'], end=y['end'])
for j, y in enumerate(x['entities'])
]
triggers = [
Entity(id=j, type=self.entity_types[y['type']], start=y['start'], end=y['end'])
for j, y in enumerate(x['triggers'][trigger_id:trigger_id+1]) if len(x['triggers']) > 0
]
assert len(triggers) <= 1, 'no more than 1 trigger'
relations = [
# here we take the trigger as the tail entity of the relation
Relation(
type=self.relation_types[y['type']], head=entities[y['head']], tail=triggers[y['tail']]
)
for y in x['relations'] if y['tail'] == trigger_id and len(num_triggers) > 0
]
tokens = x['tokens']
example = InputExample(
id=f'{split}-{i}',
tokens=tokens,
entities=entities,
triggers=triggers,
relations=relations,
)
examples.append(example)
return examples