vision/m4/models/vllama3/modeling_vllama3.py (9 lines):
	- line 202: # TODO (joao): remove the `if` below, only used for BC
	- line 246: self.register_buffer("inv_freq", inv_freq, persistent=False)  # TODO joao: may break with compilation
	- line 417: # TODO (joao): remove in v4.45 (RoPE is computed in the model, not in the decoder layers)
	- line 501: # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
	- line 556: # TODO: These transpose are quite inefficient but Flash Attention requires the layout [batch_size, sequence_length, num_heads, head_dim]. We would need to refactor the KV cache
	- line 627: # TODO: Remove the `query_length != 1` check once Flash Attention for RoCm is bumped to 2.1. For details, please see the comment in LlamaFlashAttention2 __init__.
	- line 721: # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
	- line 1398: # TODO: As of torch==2.2.0, the `attention_mask` passed to the model in `generate` is 2D and of dynamic length even when the static
	- line 1731: # TODO: use `next_tokens` directly instead.


vision/smolvlm2/smolvlm/datasets/dataset.py (6 lines):
	- line 505: # TODO: define number of retries somewhere else
	- line 522: random.seed(42) # TODO: should we set this here, or is this global variable we set anyway? make sure this makes sense.
	- line 557: ## TODO: can we simplify this logic??
	- line 566: #TODO: we should add this as data arg.
	- line 721: #TODO: add logger.warning if no <image> placeholder!
	- line 803: #TODO: add warning message if multi image but not enough tokens.


vision/m4/sourcing/pmd/local_loaders/localized_narratives__openimages/localized_narratives__openimages.py (5 lines):
	- line 22: # TODO: @thomasw21
	- line 25: # TODO: @thomasw21
	- line 28: # TODO: @thomasw21
	- line 31: # TODO: @thomasw21
	- line 44: # # TODO @thomasw21: We download ALL Open-images from a tar file (that also has bounding boxes but oh well), and we filter only those we have annotations for


vision/m4/sourcing/pmd/local_loaders/yfcc100m/yfcc100m.py (4 lines):
	- line 21: # TODO: @thomasw21
	- line 24: # TODO: @thomasw21
	- line 27: # TODO: @thomasw21
	- line 30: # TODO: @thomasw21


vision/smolvlm2/smolvlm/datasets/dataset_clip_sampling.py (4 lines):
	- line 637: # TODO: define number of retries somewhere else
	- line 654: random.seed(42) # TODO: should we set this here, or is this global variable we set anyway? make sure this makes sense.
	- line 690: ## TODO: can we simplify this logic??
	- line 853: #TODO: add logger.warning if no <image> placeholder!


vision/m4/sourcing/pmd/local_loaders/localized_narratives__flickr30k/localized_narratives__flickr30k.py (4 lines):
	- line 24: # TODO: @thomasw21
	- line 27: # TODO: @thomasw21
	- line 30: # TODO: @thomasw21
	- line 33: # TODO: @thomasw21


vision/m4/sourcing/pmd/local_loaders/localized_narratives__coco/localized_narratives__coco.py (4 lines):
	- line 23: # TODO: @thomasw21
	- line 26: # TODO: @thomasw21
	- line 29: # TODO: @thomasw21
	- line 32: # TODO: @thomasw21


vision/m4/sourcing/pmd/local_loaders/localized_narratives__ADE20k/localized_narratives__ADE20k.py (4 lines):
	- line 22: # TODO: @thomasw21
	- line 25: # TODO: @thomasw21
	- line 28: # TODO: @thomasw21
	- line 31: # TODO: @thomasw21


vision/m4/sourcing/pmd/local_loaders/coco/coco.py (4 lines):
	- line 23: # TODO: @thomasw21
	- line 26: # TODO: @thomasw21
	- line 29: # TODO: @thomasw21
	- line 32: # TODO: @thomasw21


vision/m4/sourcing/data_collection/utils/simplification_utils.py (4 lines):
	- line 90: return None  # TODO iframes
	- line 93: # TODO: have checks on valid URLs
	- line 119: # TODO: refine this function. fairly imprefect.
	- line 192: # TODO: eventually, for image sizes we could parse cases like


vision/m4/training/dataset.py (3 lines):
	- line 1223: #  TODO: change this and fix trainer as well when epoch logic
	- line 1283: # TODO: Move epoch logic here instead of training loop
	- line 1349: # TODO: Aman: It looks like this has somewhat changed from last I checked. On slack, we


vision/m4/training/trainer.py (3 lines):
	- line 986: # TODO: This step will go away in future PRs. The dataloader already knows the state when it
	- line 1144: # TODO: Allow mem usage to be logged according to LogginTypes passed in hparams
	- line 1676: # TODO :: This is actually broken and not respected by `accelerate` - fails!


vision/m4/sourcing/processing/extracting_ngrams/utils.py (2 lines):
	- line 12: TODO: update so that we can take in multiple shards
	- line 103: # TODO: update


vision/m4/sourcing/processing/extracting_ngrams/extract_documents_ngrams.py (2 lines):
	- line 31: # Dirty implem. TODO: update
	- line 50: # TODO: get some way of logging number of skipped documents (not at the process level tough...)


vision/m4/sourcing/pmd/helpers.py (2 lines):
	- line 123: # TODO @thomasw21 Not really sure we care about versioning ...
	- line 134: # TODO @thomasw21: Maybe lru if we're scared of it being too big at some point.


vision/data/datasets_processing_scripts/create_evaluation_datasets/Flickr30k/flickr30k.py (2 lines):
	- line 41: # TODO: Victor
	- line 46: # TODO: Victor


vision/data/datasets_processing_scripts/build_ethic_dataset/bias_generation_eval_idefics2.py (1 line):
	- line 71: """  # TODO: choose for chatty model, in the mean time, i am just removing it


vision/m4/models/vmistral/modeling_vmistral.py (1 line):
	- line 1017: ) ** 2  # TODO: pretty sure that does not work for CLIP models since there is the CLS token


vision/m4/evaluation/custom_metrics/doc_vqa_metrics.py (1 line):
	- line 120: ),  # TODO: change that field (and perhaps the dump format itself) to actually match the server


vision/m4/evaluation/generation/deprecated_generation/generate.py (1 line):
	- line 61: # TODO: allow for different prompts depending on image


vision/m4/models/vgpt2/configuration_vgpt2.py (1 line):
	- line 46: TODO: this doc is completely out of sync with the actual args


vision/smolvlm2/smolvlm/datasets/builder.py (1 line):
	- line 313: max_frames = 1 #TODO: verify this is good default


vision/m4/training/dataset_utils.py (1 line):
	- line 241: # FIXME test and use an alternate pdf reader/render as default


vision/m4/evaluation/launch.py (1 line):
	- line 1: # TODO LAUNCH EVALUATTION


vision/m4/models/idefics/modeling_idefics.py (1 line):
	- line 836: # but TODO: come back to this later.


vision/smolvlm2/smolvlm/train/train.py (1 line):
	- line 45: #TODO: check what these do.


vision/m4/training/utils.py (1 line):
	- line 999: # TODO maybe not necessary to have special treatment?


vision/m4/sourcing/data_collection/utils/fetching_utils.py (1 line):
	- line 38: # TODO: Ultimately, should handle that with a real database such as Sqlitedict or Redis, but these require opening and closing a connection to the database,


vision/m4/sourcing/data_collection/visualization/pair_stat_dashboard.py (1 line):
	- line 218: # TODO: Add Filtered extraction_method


vision/m4/training/config.py (1 line):
	- line 328: # TODO: Move to per dataset params as it makes more sense there