def __init__()

in src/pixparse/data/datasets_utils.py [0:0]


    def __init__(self, root_dir, split, transform=None):
        self.extra_tokens = ['<s_answer>', '</s_answer>', '</s_question>', '<s_question>']
        self.root_dir = root_dir
        self.split = split
        assert split in ["train", "test", "val"], "split is not train, test or val."
        if split == "test" or split == "val":
            json_path = os.path.join(root_dir, split, f"{split}_v1.0.json")
        else:
            json_path = os.path.join(root_dir, split, f"processed_{split}_v1.0.json")
        assert os.path.isdir(self.root_dir), f"Can't find {root_dir}. Make sure you have DocVQA files locally."
        assert os.path.isfile(json_path), f"{json_path} not found. Make sure you have the processed dataset."
        self.img_dir = os.path.join(root_dir, split)
        
        with open(json_path, 'r') as f:
            self.data_dict = json.load(f)
        self.all_images = list(self.data_dict.keys())
        self.transform = transform