src/pixparse/task/task_cruller_pretrain.py (11 lines):
	- line 26: # FIXME structure of config tree
	- line 34: model: ModelCfg = field(default_factory=ModelCfg)  # FIXME rename model_cfg to diff from model_name?
	- line 39: # FIXME figure out how to get command line args to overlay on top pre-defined
	- line 112: self.model = Cruller(cfg.model)  # FIXME would be good to defer weight init here
	- line 122: # TODO refactor, used in many tasks
	- line 138: #transforms.CenterCrop(448),  # FIXME need better aspect preserving resize & pad
	- line 146: # TODO These metrics have to be organized as dicts of dicts.
	- line 160: FIXME this interface needs refinement
	- line 174: # FIXME currently thinking moving to device, setup DDP / FSDP makes sense
	- line 212: # FIXME will need two paths here to support interval vs step based durations
	- line 367: # TODO Add other metrics relevant for eval step


src/pixparse/task/task_cruller_finetune_CORD.py (10 lines):
	- line 49: )  # FIXME rename model_cfg to diff from model_name?
	- line 53: # FIXME figure out how to get command line args to overlay on top pre-defined
	- line 209: self.model = Cruller(cfg.model)  # FIXME would be good to defer weight init here
	- line 232: # TODO refactor, used in many tasks
	- line 271: # transforms.CenterCrop(448),  # FIXME need better aspect preserving resize & pad
	- line 284: FIXME this interface needs refinement
	- line 298: # FIXME currently thinking moving to device, setup DDP / FSDP makes sense
	- line 368: # FIXME will need two paths here to support interval vs step based durations
	- line 403: x,  # FIXME move this batcher/tokenizer elsewhere
	- line 526: ) # FIXME not needed anymore? we preprocess everything before


src/pixparse/task/task_cruller_finetune_RVLCDIP.py (9 lines):
	- line 41: )  # FIXME rename model_cfg to diff from model_name?
	- line 45: # FIXME figure out how to get command line args to overlay on top pre-defined
	- line 147: self.model = Cruller(cfg.model)  # FIXME would be good to defer weight init here
	- line 167: # TODO refactor, used in many tasks
	- line 193: # transforms.CenterCrop(448),  # FIXME need better aspect preserving resize & pad
	- line 206: FIXME this interface needs refinement
	- line 235: # FIXME currently thinking moving to device, setup DDP / FSDP makes sense
	- line 275: # FIXME will need two paths here to support interval vs step based durations
	- line 309: tokenizer_fn = lambda x: self.tokenizer.trunk(x, #FIXME move this batcher/tokenizer elsewhere


src/pixparse/task/task_cruller_finetune_docvqa.py (8 lines):
	- line 49: )  # FIXME rename model_cfg to diff from model_name?
	- line 53: # FIXME figure out how to get command line args to overlay on top pre-defined
	- line 120: self.model = Cruller(cfg.model)  # FIXME would be good to defer weight init here
	- line 170: # transforms.CenterCrop(448),  # FIXME need better aspect preserving resize & pad
	- line 183: FIXME this interface needs refinement
	- line 197: # FIXME currently thinking moving to device, setup DDP / FSDP makes sense
	- line 254: # FIXME will need two paths here to support interval vs step based durations
	- line 405: ) # FIXME not needed anymore? we preprocess everything before


src/pixparse/task/task_cruller_finetune_xent.py (8 lines):
	- line 35: model: ModelCfg = field(default_factory=ModelCfg)  # FIXME rename model_cfg to diff from model_name?
	- line 38: # FIXME figure out how to get command line args to overlay on top pre-defined
	- line 102: self.model = Cruller(cfg.model)  # FIXME would be good to defer weight init here
	- line 112: # TODO refactor, used in many tasks
	- line 128: #transforms.CenterCrop(448),  # FIXME need better aspect preserving resize & pad
	- line 144: # FIXME pass along resume arg here
	- line 203: # FIXME will need two paths here to support interval vs step based durations
	- line 315: # TODO Remove eval method from train dataclass?


src/pixparse/data/preprocess.py (7 lines):
	- line 52: # FIXME complete and update this fn to match our OCR annotation format
	- line 54: # FIXME this was an intermediate annotation form, should not exist anymore
	- line 72: # FIXME for initial behaviour we will randomly sample one of N pages
	- line 73: # TODO determine if we want to train in multi-page mode, use another sampling strategy?
	- line 83: n_wanted_pages = min(1, num_pages) #TODO increase that number for multipage processing
	- line 86: # FIXME treating pages separately, this best approach or tokenize w/ page-break?
	- line 90: # FIXME see self.donut_model.json2token, task specific json tokenization for


src/pixparse/task/task_cruller_eval_cord.py (7 lines):
	- line 35: )  # FIXME rename model_cfg to diff from model_name?
	- line 39: # FIXME figure out how to get command line args to overlay on top pre-defined
	- line 162: self.model = Cruller(cfg.model)  # FIXME would be good to defer weight init here
	- line 195: # TODO refactor, used in many tasks
	- line 222: # transforms.CenterCrop(448),  # FIXME need better aspect preserving resize & pad
	- line 300: # TODO move this to a __getitem__ for pickling
	- line 391: # TODO do that, call average_metrics in end


src/pixparse/task/task_cruller_eval_ocr.py (7 lines):
	- line 30: )  # FIXME rename model_cfg to diff from model_name?
	- line 34: # FIXME figure out how to get command line args to overlay on top pre-defined
	- line 115: # TODO refactor, used in many tasks
	- line 140: # transforms.CenterCrop(448),  # FIXME need better aspect preserving resize & pad
	- line 148: # TODO These metrics have to be organized as dicts of dicts.
	- line 224: # TODO Add other metrics relevant for eval step
	- line 244: # TODO do that, call average_metrics in end


src/pixparse/task/task_cruller_eval_rvlcdip.py (6 lines):
	- line 28: )  # FIXME rename model_cfg to diff from model_name?
	- line 32: # FIXME figure out how to get command line args to overlay on top pre-defined
	- line 138: self.model = Cruller(cfg.model)  # FIXME would be good to defer weight init here
	- line 170: # transforms.CenterCrop(448),  # FIXME need better aspect preserving resize & pad
	- line 309: # TODO Add other metrics relevant for eval step
	- line 329: # TODO do that, call average_metrics in end


src/pixparse/models/text_decoder_hf.py (6 lines):
	- line 10: def create_text_decoder(cfg: TextDecoderCfg) -> transformers.BartForCausalLM:  # FIXME for type hints
	- line 15: if False:  # FIXME this were set in Donut but missed in first pass, should compare
	- line 23: #config.vocab_size =   # FIXME set vocab size here or rely on model resize when tokens added?
	- line 34: # TODO Following is the donut hack. Unused without generate().
	- line 76: "encoder_hidden_states": encoder_outputs #.last_hidden_state, #FIXME for timm ViT encoder there is no last hidden state
	- line 91: # FIXME is this always going to be a direct pass through or will some tasks/models


src/pixparse/task/task_cruller_eval_docvqa.py (6 lines):
	- line 36: )  # FIXME rename model_cfg to diff from model_name?
	- line 40: # FIXME figure out how to get command line args to overlay on top pre-defined
	- line 108: self.model = Cruller(cfg.model)  # FIXME would be good to defer weight init here
	- line 141: # TODO refactor, used in many tasks
	- line 168: # transforms.CenterCrop(448),  # FIXME need better aspect preserving resize & pad
	- line 313: # TODO do that, call average_metrics in end


src/pixparse/app/eval.py (5 lines):
	- line 59: # FIXME how do we log metrics per dataset?
	- line 106: # FIXME defer load checkpoint to task?
	- line 112: # FIXME check if path is local or s3?
	- line 152: # FIXME add common functionality for loader selection per task
	- line 162: create_decoder_pipe=create_image_text_pipe, # TODO abstract away type of decoder needed


src/pixparse/app/train.py (5 lines):
	- line 55: # FIXME flatten interval loop to have one eval point
	- line 104: #TODO make completion of exp name derived from essential hparams
	- line 132: # FIXME make optional for resume.
	- line 140: # FIXME check if path is local or s3?
	- line 180: create_decoder_pipe=create_doc_anno_pipe,  # TODO abstract away type of decoder needed


src/pixparse/utils/ocr_utils.py (3 lines):
	- line 59: )  # FIXME the -100 id token is there to be ignored by cross entropy, we replace it by padding
	- line 79: # FIXME sometimes we are decoding no text at all after cleaning
	- line 169: # TODO This "hacky" function should eventually be replaced by .generate() from GenerationMixin that does the same thing.


src/pixparse/data/loader.py (3 lines):
	- line 30: image_key="pdf;tif;tiff;png;jpg;jpeg",  # FIXME jpeg added for test w/ cc12m
	- line 68: # TODO afdd factory for dataloaders?
	- line 84: root_dir=f"/fsx/pablo/.cache/{cfg.source}",  # FIXME hacky hack


src/pixparse/task/task_donut_eval_ocr.py (3 lines):
	- line 152: # FIXME sometimes we are decoding no text at all after cleaning
	- line 179: # TODO Add other metrics relevant for eval step
	- line 199: # TODO do that, call average_metrics in end


src/pixparse/framework/task.py (1 line):
	- line 84: # TODO Remove eval method from train dataclass


src/pixparse/tokenizers/tokenizer_hf.py (1 line):
	- line 8: extra_kwargs = {} #FIXME do we want to pass additional_special_tokens here? they are task-specific


src/pixparse/framework/monitor.py (1 line):
	- line 283: # FIXME log interval (epoch) summaries to tensorboard?


src/pixparse/framework/device.py (1 line):
	- line 111: # FIXME support different device types, just using cuda to start


src/pixparse/models/image_encoder_timm.py (1 line):
	- line 22: # FIXME need to add support for changing input resolution / attn window sizes for models like swin,


src/pixparse/framework/eval.py (1 line):
	- line 10: # FIXME (Pablo) not sure if I understand this correctly,


src/pixparse/models/config.py (1 line):
	- line 29: pad_token_id: Optional[int] = None # FIXME move this to TokenizerCfg?


src/pixparse/data/transforms.py (1 line):
	- line 38: # FIXME design a config class to cover coarse and fine-grained aug options


src/pixparse/data/config.py (1 line):
	- line 7: # FIXME preprocessing hard coded in Task right now


src/pixparse/framework/config.py (1 line):
	- line 39: model_state_dict: dict = field(default_factory=dict) #FIXME move out state dict into dict of dict