optimum/exporters/neuron/model_configs.py (10 lines):
	- line 108: ATOL_FOR_VALIDATION = 1e-1  # TODO: why accuracy more off than other arch
	- line 361: # TODO: We should decouple clip text and vision, this would need fix on Optimum main. For the current workaround
	- line 535: # TODO: compilation failed due to a bug in xla: https://github.com/pytorch/xla/issues/6398.
	- line 548: # TODO: compilation failed due to a bug in xla: https://github.com/pytorch/xla/issues/6398.
	- line 589: # TODO: compilation failed due to a bug in xla: https://github.com/pytorch/xla/issues/6398.
	- line 604: # TODO: compilation failed due to a bug in xla: https://github.com/pytorch/xla/issues/6398.
	- line 661: # TODO : add text_image, image and image_embeds
	- line 967: )  # TODO: add extra args, eg. revision, trust_remote_code, etc.
	- line 1013: "attention_mask",  # TODO: replace with `encoder_attention_mask` after optimum 1.14 release
	- line 1114: )  # TODO: add extra args, eg. revision, trust_remote_code, etc.


optimum/neuron/pipelines/diffusers/pipeline_controlnet.py (4 lines):
	- line 214: # TODO: support guess mode of ControlNet
	- line 240: # TODO: support ip adapter
	- line 321: # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
	- line 324: # TODO: 7.1 Add image embeds for IP-Adapter


optimum/exporters/neuron/__main__.py (3 lines):
	- line 328: # TODO: Enable optional outputs for Stable Diffusion
	- line 360: # TODO: Enable optional outputs for encoders
	- line 657: # TODO: support the validation of tp models.


optimum/neuron/pipelines/diffusers/pipeline_controlnet_sd_xl.py (3 lines):
	- line 282: # TODO: Remove after the guess mode of ControlNet is supported
	- line 313: # TODO: support ip adapter
	- line 389: # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/neuron/models/inference/backend/config.py (3 lines):
	- line 96: # TODO: these flags are suposed to work in NxDI. Either make them work or remove them
	- line 144: # TODO: Check if we really need different batch size for CTE and TKG, given
	- line 175: # TODO: Check if start_rank_id can be modified dynamically at runtime


optimum/neuron/modeling_diffusion.py (3 lines):
	- line 343: self.safety_checker = safety_checker  # TODO: implement the class `NeuronStableDiffusionSafetyChecker`.
	- line 1566: )  # TODO: support multiple IP adapters
	- line 1627: self.image_projection_layers = ModuleList([ImageProjection()])  # TODO: support multiple IP adapters


optimum/neuron/accelerate/accelerator.py (3 lines):
	- line 96: # TODO: check that removing it does not break anything.
	- line 162: # TODO: make it more robust, similar to the prepare_data_loader function in `accelerate`.
	- line 492: # TODO: can it be cleaned?


optimum/exporters/neuron/convert.py (2 lines):
	- line 361: # TODO: Remove after the weights/neff separation compilation of sdxl is patched by a neuron sdk release: https://github.com/aws-neuron/aws-neuron-sdk/issues/859
	- line 416: # FIXME: this is overly complicated just to pass the config


optimum/neuron/models/inference/llama/modeling_llama.py (2 lines):
	- line 293: # TODO: Modularize RotaryEmbedding. See how HF transformers does it in 4.43.
	- line 487: # TODO: this hack can be removed after replication_id is ready to use


optimum/neuron/models/inference/backend/modules/generation/generation_utils.py (2 lines):
	- line 73: # TODO: Remove _sample and define separate flow for on-device sampling that doesn't use HF.
	- line 421: # @yihsian: TODO: complete with using target tokengen model


optimum/neuron/models/inference/backend/modules/attention/attention_base.py (2 lines):
	- line 293: TODO: Throw an exception instead of disabling flash attention if explicitly enabled but not eligible.
	- line 320: # TODO: refactor/decompose this to reduce duplication with compute_for_token_gen


optimum/neuron/accelerate/utils/misc.py (2 lines):
	- line 137: # TODO: @michaelbenayoun
	- line 193: # TODO: @michaelbenayoun. Need to find a better way to identify the blocks to apply gradient


optimum/neuron/models/inference/backend/modules/kvcache/kv_cache_manager.py (2 lines):
	- line 69: self.is_kv_cache_tiled = False  # TODO: enable this when compiler fixes CR 158191111 (as per NxDI comment)
	- line 247: # TODO once compiler fixes CR 158191111 we can turn back output tiling on


optimum/neuron/models/training/transformations_utils.py (2 lines):
	- line 1223: # TODO: fix once it is fixed in neuronx_distributed
	- line 1332: # TODO: fix once it is fixed in neuronx_distributed


optimum/neuron/models/training/llama/modeling_llama.py (1 line):
	- line 121: self.register_buffer("inv_freq", inv_freq, persistent=False)  # TODO joao: may break with compilation


optimum/neuron/models/inference/backend/modules/decoder/decoder_wrapper.py (1 line):
	- line 324: # TODO: This else block is a short-term fix for Llava/ViT models to use DecoderModelInstance.


optimum/neuron/utils/misc.py (1 line):
	- line 513: # TODO: this whole bulk is not very optimized, improve it once the tests are written.


optimum/exporters/neuron/base.py (1 line):
	- line 323: # TODO: remove the mapper and use directly torch float dtype after the PR in Optimum makes its way to a release: https://github.com/huggingface/optimum/pull/2117


optimum/neuron/models/inference/phi3/modeling_phi3.py (1 line):
	- line 68: # TODO: this hack can be removed after replication_id is ready to use


optimum/neuron/models/inference/backend/pretrained_model.py (1 line):
	- line 123: # FIXME: this should not be part of neuron_config but is used in downstream classes


optimum/neuron/modeling_traced.py (1 line):
	- line 302: if not disable_neuron_cache and is_neuronx_available():  # TODO: support caching of Inf1 as well


optimum/neuron/peft/tuners/lora/layer.py (1 line):
	- line 449: # TODO: no dtype conversion here, unlike in Linear, is that correct?


optimum/exporters/neuron/model_wrappers.py (1 line):
	- line 158: guess_mode=False,  # TODO: support guess mode of ControlNet


optimum/neuron/models/inference/qwen3/modeling_qwen3.py (1 line):
	- line 120: # TODO: this hack can be removed after replication_id is ready to use


optimum/neuron/models/inference/backend/modules/attention/gqa.py (1 line):
	- line 466: # TODO: set weight to state dict support is pending.


tools/auto_fill_inference_cache.py (1 line):
	- line 258: # TODO: Remove when https://github.com/huggingface/optimum/pull/1793/ is merged in Optimum


optimum/exporters/neuron/utils.py (1 line):
	- line 535: # TODO: get it into https://github.com/huggingface/optimum/blob/4a7cb298140ee9bed968d98a780a950d15bb2935/optimum/exporters/utils.py#L77


optimum/neuron/models/inference/backend/modules/decoder/modeling_decoder.py (1 line):
	- line 356: # TODO: Replace this with rankid + scatter call once supported


optimum/neuron/generation/utils.py (1 line):
	- line 1612: # TODO: validate with @JingyaHuang


optimum/neuron/accelerate/optimizer.py (1 line):
	- line 77: # TODO: might be needed to override this soon.