src/pixparse/task/task_cruller_pretrain.py (11 lines): - line 26: # FIXME structure of config tree - line 34: model: ModelCfg = field(default_factory=ModelCfg) # FIXME rename model_cfg to diff from model_name? - line 39: # FIXME figure out how to get command line args to overlay on top pre-defined - line 112: self.model = Cruller(cfg.model) # FIXME would be good to defer weight init here - line 122: # TODO refactor, used in many tasks - line 138: #transforms.CenterCrop(448), # FIXME need better aspect preserving resize & pad - line 146: # TODO These metrics have to be organized as dicts of dicts. - line 160: FIXME this interface needs refinement - line 174: # FIXME currently thinking moving to device, setup DDP / FSDP makes sense - line 212: # FIXME will need two paths here to support interval vs step based durations - line 367: # TODO Add other metrics relevant for eval step src/pixparse/task/task_cruller_finetune_CORD.py (10 lines): - line 49: ) # FIXME rename model_cfg to diff from model_name? - line 53: # FIXME figure out how to get command line args to overlay on top pre-defined - line 209: self.model = Cruller(cfg.model) # FIXME would be good to defer weight init here - line 232: # TODO refactor, used in many tasks - line 271: # transforms.CenterCrop(448), # FIXME need better aspect preserving resize & pad - line 284: FIXME this interface needs refinement - line 298: # FIXME currently thinking moving to device, setup DDP / FSDP makes sense - line 368: # FIXME will need two paths here to support interval vs step based durations - line 403: x, # FIXME move this batcher/tokenizer elsewhere - line 526: ) # FIXME not needed anymore? we preprocess everything before src/pixparse/task/task_cruller_finetune_RVLCDIP.py (9 lines): - line 41: ) # FIXME rename model_cfg to diff from model_name? - line 45: # FIXME figure out how to get command line args to overlay on top pre-defined - line 147: self.model = Cruller(cfg.model) # FIXME would be good to defer weight init here - line 167: # TODO refactor, used in many tasks - line 193: # transforms.CenterCrop(448), # FIXME need better aspect preserving resize & pad - line 206: FIXME this interface needs refinement - line 235: # FIXME currently thinking moving to device, setup DDP / FSDP makes sense - line 275: # FIXME will need two paths here to support interval vs step based durations - line 309: tokenizer_fn = lambda x: self.tokenizer.trunk(x, #FIXME move this batcher/tokenizer elsewhere src/pixparse/task/task_cruller_finetune_docvqa.py (8 lines): - line 49: ) # FIXME rename model_cfg to diff from model_name? - line 53: # FIXME figure out how to get command line args to overlay on top pre-defined - line 120: self.model = Cruller(cfg.model) # FIXME would be good to defer weight init here - line 170: # transforms.CenterCrop(448), # FIXME need better aspect preserving resize & pad - line 183: FIXME this interface needs refinement - line 197: # FIXME currently thinking moving to device, setup DDP / FSDP makes sense - line 254: # FIXME will need two paths here to support interval vs step based durations - line 405: ) # FIXME not needed anymore? we preprocess everything before src/pixparse/task/task_cruller_finetune_xent.py (8 lines): - line 35: model: ModelCfg = field(default_factory=ModelCfg) # FIXME rename model_cfg to diff from model_name? - line 38: # FIXME figure out how to get command line args to overlay on top pre-defined - line 102: self.model = Cruller(cfg.model) # FIXME would be good to defer weight init here - line 112: # TODO refactor, used in many tasks - line 128: #transforms.CenterCrop(448), # FIXME need better aspect preserving resize & pad - line 144: # FIXME pass along resume arg here - line 203: # FIXME will need two paths here to support interval vs step based durations - line 315: # TODO Remove eval method from train dataclass? src/pixparse/data/preprocess.py (7 lines): - line 52: # FIXME complete and update this fn to match our OCR annotation format - line 54: # FIXME this was an intermediate annotation form, should not exist anymore - line 72: # FIXME for initial behaviour we will randomly sample one of N pages - line 73: # TODO determine if we want to train in multi-page mode, use another sampling strategy? - line 83: n_wanted_pages = min(1, num_pages) #TODO increase that number for multipage processing - line 86: # FIXME treating pages separately, this best approach or tokenize w/ page-break? - line 90: # FIXME see self.donut_model.json2token, task specific json tokenization for src/pixparse/task/task_cruller_eval_cord.py (7 lines): - line 35: ) # FIXME rename model_cfg to diff from model_name? - line 39: # FIXME figure out how to get command line args to overlay on top pre-defined - line 162: self.model = Cruller(cfg.model) # FIXME would be good to defer weight init here - line 195: # TODO refactor, used in many tasks - line 222: # transforms.CenterCrop(448), # FIXME need better aspect preserving resize & pad - line 300: # TODO move this to a __getitem__ for pickling - line 391: # TODO do that, call average_metrics in end src/pixparse/task/task_cruller_eval_ocr.py (7 lines): - line 30: ) # FIXME rename model_cfg to diff from model_name? - line 34: # FIXME figure out how to get command line args to overlay on top pre-defined - line 115: # TODO refactor, used in many tasks - line 140: # transforms.CenterCrop(448), # FIXME need better aspect preserving resize & pad - line 148: # TODO These metrics have to be organized as dicts of dicts. - line 224: # TODO Add other metrics relevant for eval step - line 244: # TODO do that, call average_metrics in end src/pixparse/task/task_cruller_eval_rvlcdip.py (6 lines): - line 28: ) # FIXME rename model_cfg to diff from model_name? - line 32: # FIXME figure out how to get command line args to overlay on top pre-defined - line 138: self.model = Cruller(cfg.model) # FIXME would be good to defer weight init here - line 170: # transforms.CenterCrop(448), # FIXME need better aspect preserving resize & pad - line 309: # TODO Add other metrics relevant for eval step - line 329: # TODO do that, call average_metrics in end src/pixparse/models/text_decoder_hf.py (6 lines): - line 10: def create_text_decoder(cfg: TextDecoderCfg) -> transformers.BartForCausalLM: # FIXME for type hints - line 15: if False: # FIXME this were set in Donut but missed in first pass, should compare - line 23: #config.vocab_size = # FIXME set vocab size here or rely on model resize when tokens added? - line 34: # TODO Following is the donut hack. Unused without generate(). - line 76: "encoder_hidden_states": encoder_outputs #.last_hidden_state, #FIXME for timm ViT encoder there is no last hidden state - line 91: # FIXME is this always going to be a direct pass through or will some tasks/models src/pixparse/task/task_cruller_eval_docvqa.py (6 lines): - line 36: ) # FIXME rename model_cfg to diff from model_name? - line 40: # FIXME figure out how to get command line args to overlay on top pre-defined - line 108: self.model = Cruller(cfg.model) # FIXME would be good to defer weight init here - line 141: # TODO refactor, used in many tasks - line 168: # transforms.CenterCrop(448), # FIXME need better aspect preserving resize & pad - line 313: # TODO do that, call average_metrics in end src/pixparse/app/eval.py (5 lines): - line 59: # FIXME how do we log metrics per dataset? - line 106: # FIXME defer load checkpoint to task? - line 112: # FIXME check if path is local or s3? - line 152: # FIXME add common functionality for loader selection per task - line 162: create_decoder_pipe=create_image_text_pipe, # TODO abstract away type of decoder needed src/pixparse/app/train.py (5 lines): - line 55: # FIXME flatten interval loop to have one eval point - line 104: #TODO make completion of exp name derived from essential hparams - line 132: # FIXME make optional for resume. - line 140: # FIXME check if path is local or s3? - line 180: create_decoder_pipe=create_doc_anno_pipe, # TODO abstract away type of decoder needed src/pixparse/utils/ocr_utils.py (3 lines): - line 59: ) # FIXME the -100 id token is there to be ignored by cross entropy, we replace it by padding - line 79: # FIXME sometimes we are decoding no text at all after cleaning - line 169: # TODO This "hacky" function should eventually be replaced by .generate() from GenerationMixin that does the same thing. src/pixparse/data/loader.py (3 lines): - line 30: image_key="pdf;tif;tiff;png;jpg;jpeg", # FIXME jpeg added for test w/ cc12m - line 68: # TODO afdd factory for dataloaders? - line 84: root_dir=f"/fsx/pablo/.cache/{cfg.source}", # FIXME hacky hack src/pixparse/task/task_donut_eval_ocr.py (3 lines): - line 152: # FIXME sometimes we are decoding no text at all after cleaning - line 179: # TODO Add other metrics relevant for eval step - line 199: # TODO do that, call average_metrics in end src/pixparse/framework/task.py (1 line): - line 84: # TODO Remove eval method from train dataclass src/pixparse/tokenizers/tokenizer_hf.py (1 line): - line 8: extra_kwargs = {} #FIXME do we want to pass additional_special_tokens here? they are task-specific src/pixparse/framework/monitor.py (1 line): - line 283: # FIXME log interval (epoch) summaries to tensorboard? src/pixparse/framework/device.py (1 line): - line 111: # FIXME support different device types, just using cuda to start src/pixparse/models/image_encoder_timm.py (1 line): - line 22: # FIXME need to add support for changing input resolution / attn window sizes for models like swin, src/pixparse/framework/eval.py (1 line): - line 10: # FIXME (Pablo) not sure if I understand this correctly, src/pixparse/models/config.py (1 line): - line 29: pad_token_id: Optional[int] = None # FIXME move this to TokenizerCfg? src/pixparse/data/transforms.py (1 line): - line 38: # FIXME design a config class to cover coarse and fine-grained aug options src/pixparse/data/config.py (1 line): - line 7: # FIXME preprocessing hard coded in Task right now src/pixparse/framework/config.py (1 line): - line 39: model_state_dict: dict = field(default_factory=dict) #FIXME move out state dict into dict of dict