in datasets.py [0:0]
def load_data_single_split(self, split: str, seed: int = None) -> List[InputExample]:
"""
Load data for a single split (train, dev, or test).
"""
examples = []
file_path = os.path.join(self.data_dir(), f'{split}.json')
with open(file_path, 'r') as f:
data = json.load(f)
logging.info(f"Loaded {len(data)} sentences for split {split} of {self.name}")
for i, x in enumerate(data):
entities = []
relations = []
for y in x['spo_details']:
entity1_start, entity1_end, entity1_type, relation_type, \
entity2_start, entity2_end, entity2_type = y
entity1 = Entity(type=self.entity_types[entity1_type], start=entity1_start, end=entity1_end)
entity2 = Entity(type=self.entity_types[entity2_type], start=entity2_start, end=entity2_end)
try:
i1 = entities.index(entity1)
except ValueError:
# add entity to the list
i1 = len(entities)
entities.append(entity1)
try:
i2 = entities.index(entity2)
except ValueError:
# add entity to the list
i2 = len(entities)
entities.append(entity2)
relation = Relation(
type=self.relation_types[relation_type], head=entities[i1], tail=entities[i2],
)
relations.append(relation)
tokens = x['tokens']
example = InputExample(
id=f'{split}-{i}',
tokens=tokens,
entities=entities,
relations=relations,
)
examples.append(example)
return examples