src/chug/doc/doc_read_processor.py (7 lines): - line 49: # FIXME for initial behaviour we will randomly sample one of N pages - line 50: # TODO determine if we want to train in multi-page mode, use another sampling strategy? - line 54: n_wanted_pages = min(1, num_pages) # TODO increase for multi-page processing, rand start+end? - line 79: # FIXME currently encoding each page separately with own start/end tokens. - line 123: # FIXME cleanup, split process and decode for more flexibility - line 136: # FIXME warn assert that target not supported w/o text preprocessing? - line 172: # TODO support 'image info' to relay details such as text bbox, layout src/chug/image/build_transforms_image.py (4 lines): - line 49: # FIXME map aug_cfg -> timm.create_transform args more carefully - line 54: use_prefetcher=not do_normalize, # FIXME prefetcher mode disables normalize, but outputs np.array - line 75: use_prefetcher=not do_normalize, # FIXME prefetcher mode disables normalize, but outputs np.array - line 122: # FIXME note we don't have good option for 'longest' resizing w/ RRC src/chug/image/build_transforms_doc.py (4 lines): - line 71: # FIXME merge defaults w/ aug_cfg - line 205: # FIXME merge defaults w/ aug_cfg - line 289: alpha_affine=12., # FIXME no common param, alpha_affine unique to alb - line 324: # FIXME leave alb uncomposed too if composed=False? src/chug/wds/decode.py (4 lines): - line 59: # FIXME test and use an alternate pdf reader/render as default - line 205: # FIXME need to work out padding / selection issues for multi-page support - line 208: assert page_sampling in {'random', 'first', 'last'} # TODO add 'all' w/ multi-page support - line 298: enable_doc: bool = False, # FIXME enable doc support by default once tested? src/chug/hfds/loader.py (3 lines): - line 107: # FIXME num_samples calc, get a reliable estimate from dataset in streaming mode - line 120: # FIXME split_dataset_by_node has some concerns as currently implemented - line 133: drop_last=batched and is_training, # FIXME improve wrt train vs validation vs sharding specifics src/chug/doc/doc_vqa_processor.py (3 lines): - line 56: # FIXME support flexible q/a prompting formats, do with prefix/suffix or template strings? - line 68: # FIXME multi qa expansion is a WIP - line 108: # FIXME implement expansion for multi-qa (and eventually multi-page option) src/chug/loader.py (2 lines): - line 72: # FIXME make validation task specific once we have tasks that don't require both image and text preproc - line 121: multi_interval=True, # FIXME via config? src/chug/common/config.py (2 lines): - line 227: # FIXME add code to resolve shard information from _info.yaml or .json files (see dataset_info.py) - line 255: sizes = None # FIXME resolve sizes src/chug/doc/doc_processor.py (2 lines): - line 105: # FIXME note, should move to torchvision v2 annotations at some point - line 192: # FIXME separate decode & preprocess interfaces src/chug/text/tokenization.py (2 lines): - line 92: # FIXME calculate prompt end pos for validation use (target not needed) - line 114: # FIXME just binding prepare_text_input fn for now src/chug/wds/shardlists.py (1 line): - line 26: self.num_sub_intervals = num_sub_intervals # FIXME experimental feature src/chug/wds/dataset_info.py (1 line): - line 30: ## FIXME this is not working / not completed, parsing _info files is a TODO src/chug/task_pipeline/pipeline_image_text.py (1 line): - line 40: # FIXME add support for caption target for caption tasks or use a separate pipe? src/chug/common/types.py (1 line): - line 67: # TODO resolve dataset info and track base url, shard info (sizes, etc) src/chug/task_pipeline/pipeline_gtparse.py (1 line): - line 39: FIXME add support for caption target for caption tasks or separate pipe? src/chug/wds/tariterators.py (1 line): - line 49: # FIXME wds version throws if suffix in current_sample, but we have a potential for src/chug/image/transforms_torch.py (1 line): - line 116: # FIXME check tensor vs PIL and convert as needed, this is assuming PIL right now src/chug/wds/loader.py (1 line): - line 104: # FIXME support distributed eval src/chug/task_pipeline/pipeline_doc_vqa.py (1 line): - line 74: # FIXME prompt templates instead of prefix+suffix above?