in cached_classes.py [0:0]
def __init__(self):
train_data_raw = read_jsonl(CACHED_FEATURES.format(split='train'))
test_data_raw = read_jsonl(CACHED_FEATURES.format(split='test'))
#items are hashed by (prompt, completion) pairs
self.train_data = {tuple(d['prompt_completion']): d['features'] for d in train_data_raw}
self.test_data = {tuple(d['prompt_completion']): d['features'] for d in test_data_raw}
#not used in this example since everything is already cached, but it generated the cached data
self.features_config = read_yaml(CONFIG_FILE_PATH)