in datasets.py [0:0]
def load_data_by_relation_type(self, split: str) -> Dict[str, List[InputExample]]:
"""
Load data for a single split (train or dev) by relation type.
This is useful for episodic training/evaluation, where we sample N classes at each episode.
"""
examples_by_type = {}
file_path = os.path.join(self.data_dir(), f'{self.data_name}_{split}.json')
with open(file_path, 'r') as f:
data = json.load(f)
i = 0
for type_id in data:
assert type_id in self.relation_types
relation_type = self.relation_types[type_id]
examples = []
for idx, _data in enumerate(data[type_id]):
tokens = _data['tokens']
head_entity = _data['h'][2][0]
tail_entity = _data['t'][2][0]
if len(head_entity) == 1:
head_entity = [head_entity[0], head_entity[0]]
if len(tail_entity) == 1:
tail_entity = [tail_entity[0], tail_entity[0]]
head_entity = Entity(id=None, type=self.entity_types['head'],
start=head_entity[0], end=head_entity[1] + 1)
tail_entity = Entity(id=None, type=self.entity_types['tail'],
start=tail_entity[0], end=tail_entity[1] + 1)
entities = [head_entity, tail_entity]
relations = [
Relation(
type=relation_type, head=head_entity, tail=tail_entity
)
]
example = InputExample(
id=f'{split}-{i}',
tokens=tokens,
entities=entities,
relations=relations,
)
examples.append(example)
i += 1
examples_by_type[type_id] = examples
return examples_by_type