def __init__()

in cached_classes.py [0:0]


    def __init__(self):
        train_data_raw = read_jsonl(CACHED_FEATURES.format(split='train'))
        test_data_raw = read_jsonl(CACHED_FEATURES.format(split='test'))

        #items are hashed by (prompt, completion) pairs
        self.train_data = {tuple(d['prompt_completion']): d['features'] for d in train_data_raw}
        self.test_data = {tuple(d['prompt_completion']): d['features'] for d in test_data_raw}

        #not used in this example since everything is already cached, but it generated the cached data
        self.features_config = read_yaml(CONFIG_FILE_PATH)