in datasets.py [0:0]
def load_data_single_split(self, split: str, seed: int = None) -> List[InputExample]:
"""
Load data for a single split (train, dev, or test).
"""
file_path = os.path.join(self.data_dir(), f'json/{split}.json')
examples = []
# we fill entity/relation types while parsing the data
self.entity_types = {}
self.relation_types = {}
with open(file_path, 'r') as f:
data = json.load(f)
logging.info(f"Loaded {len(data)} sentences for split {split} of {self.name}")
i = 0
for idx, obj in enumerate(data):
words = obj['token']
head_start, head_end, head_type = obj['subj_start'], obj['subj_end'] + 1, obj['subj_type']
tail_start, tail_end, tail_type = obj['obj_start'], obj['obj_end'] + 1, obj['obj_type']
relation = obj['relation']
if head_type not in self.entity_types:
self.entity_types[head_type] = EntityType(short=head_type, natural=self.to_natural(head_type))
if tail_type not in self.entity_types:
self.entity_types[tail_type] = EntityType(short=tail_type, natural=self.to_natural(tail_type))
head_entity = Entity(
id=None,
type=self.entity_types[head_type],
start=head_start,
end=head_end
)
tail_entity = Entity(
id=None,
type=self.entity_types[tail_type],
start=tail_start,
end=tail_end
)
entities = [
head_entity, tail_entity
]
if relation not in self.relation_types:
self.relation_types[relation] = RelationType(short=relation, natural=self.to_natural(relation))
relations = [
Relation(type=self.relation_types[relation], head=head_entity, tail=tail_entity)
]
example = InputExample(
id=f'{split}-{i}',
tokens=words,
entities=entities,
relations=relations,
)
i += 1
examples.append(example)
self.relation_types = {
relation.type.short: relation.type
for example in examples for relation in example.relations
}
return examples