optimum/exporters/neuron/model_configs.py (10 lines): - line 108: ATOL_FOR_VALIDATION = 1e-1 # TODO: why accuracy more off than other arch - line 361: # TODO: We should decouple clip text and vision, this would need fix on Optimum main. For the current workaround - line 535: # TODO: compilation failed due to a bug in xla: https://github.com/pytorch/xla/issues/6398. - line 548: # TODO: compilation failed due to a bug in xla: https://github.com/pytorch/xla/issues/6398. - line 589: # TODO: compilation failed due to a bug in xla: https://github.com/pytorch/xla/issues/6398. - line 604: # TODO: compilation failed due to a bug in xla: https://github.com/pytorch/xla/issues/6398. - line 661: # TODO : add text_image, image and image_embeds - line 967: ) # TODO: add extra args, eg. revision, trust_remote_code, etc. - line 1013: "attention_mask", # TODO: replace with `encoder_attention_mask` after optimum 1.14 release - line 1114: ) # TODO: add extra args, eg. revision, trust_remote_code, etc. optimum/neuron/pipelines/diffusers/pipeline_controlnet.py (4 lines): - line 214: # TODO: support guess mode of ControlNet - line 240: # TODO: support ip adapter - line 321: # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline - line 324: # TODO: 7.1 Add image embeds for IP-Adapter optimum/exporters/neuron/__main__.py (3 lines): - line 328: # TODO: Enable optional outputs for Stable Diffusion - line 360: # TODO: Enable optional outputs for encoders - line 657: # TODO: support the validation of tp models. optimum/neuron/pipelines/diffusers/pipeline_controlnet_sd_xl.py (3 lines): - line 282: # TODO: Remove after the guess mode of ControlNet is supported - line 313: # TODO: support ip adapter - line 389: # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline optimum/neuron/models/inference/backend/config.py (3 lines): - line 96: # TODO: these flags are suposed to work in NxDI. Either make them work or remove them - line 144: # TODO: Check if we really need different batch size for CTE and TKG, given - line 175: # TODO: Check if start_rank_id can be modified dynamically at runtime optimum/neuron/modeling_diffusion.py (3 lines): - line 343: self.safety_checker = safety_checker # TODO: implement the class `NeuronStableDiffusionSafetyChecker`. - line 1566: ) # TODO: support multiple IP adapters - line 1627: self.image_projection_layers = ModuleList([ImageProjection()]) # TODO: support multiple IP adapters optimum/neuron/accelerate/accelerator.py (3 lines): - line 96: # TODO: check that removing it does not break anything. - line 162: # TODO: make it more robust, similar to the prepare_data_loader function in `accelerate`. - line 492: # TODO: can it be cleaned? optimum/exporters/neuron/convert.py (2 lines): - line 361: # TODO: Remove after the weights/neff separation compilation of sdxl is patched by a neuron sdk release: https://github.com/aws-neuron/aws-neuron-sdk/issues/859 - line 416: # FIXME: this is overly complicated just to pass the config optimum/neuron/models/inference/llama/modeling_llama.py (2 lines): - line 293: # TODO: Modularize RotaryEmbedding. See how HF transformers does it in 4.43. - line 487: # TODO: this hack can be removed after replication_id is ready to use optimum/neuron/models/inference/backend/modules/generation/generation_utils.py (2 lines): - line 73: # TODO: Remove _sample and define separate flow for on-device sampling that doesn't use HF. - line 421: # @yihsian: TODO: complete with using target tokengen model optimum/neuron/models/inference/backend/modules/attention/attention_base.py (2 lines): - line 293: TODO: Throw an exception instead of disabling flash attention if explicitly enabled but not eligible. - line 320: # TODO: refactor/decompose this to reduce duplication with compute_for_token_gen optimum/neuron/accelerate/utils/misc.py (2 lines): - line 137: # TODO: @michaelbenayoun - line 193: # TODO: @michaelbenayoun. Need to find a better way to identify the blocks to apply gradient optimum/neuron/models/inference/backend/modules/kvcache/kv_cache_manager.py (2 lines): - line 69: self.is_kv_cache_tiled = False # TODO: enable this when compiler fixes CR 158191111 (as per NxDI comment) - line 247: # TODO once compiler fixes CR 158191111 we can turn back output tiling on optimum/neuron/models/training/transformations_utils.py (2 lines): - line 1223: # TODO: fix once it is fixed in neuronx_distributed - line 1332: # TODO: fix once it is fixed in neuronx_distributed optimum/neuron/models/training/llama/modeling_llama.py (1 line): - line 121: self.register_buffer("inv_freq", inv_freq, persistent=False) # TODO joao: may break with compilation optimum/neuron/models/inference/backend/modules/decoder/decoder_wrapper.py (1 line): - line 324: # TODO: This else block is a short-term fix for Llava/ViT models to use DecoderModelInstance. optimum/neuron/utils/misc.py (1 line): - line 513: # TODO: this whole bulk is not very optimized, improve it once the tests are written. optimum/exporters/neuron/base.py (1 line): - line 323: # TODO: remove the mapper and use directly torch float dtype after the PR in Optimum makes its way to a release: https://github.com/huggingface/optimum/pull/2117 optimum/neuron/models/inference/phi3/modeling_phi3.py (1 line): - line 68: # TODO: this hack can be removed after replication_id is ready to use optimum/neuron/models/inference/backend/pretrained_model.py (1 line): - line 123: # FIXME: this should not be part of neuron_config but is used in downstream classes optimum/neuron/modeling_traced.py (1 line): - line 302: if not disable_neuron_cache and is_neuronx_available(): # TODO: support caching of Inf1 as well optimum/neuron/peft/tuners/lora/layer.py (1 line): - line 449: # TODO: no dtype conversion here, unlike in Linear, is that correct? optimum/exporters/neuron/model_wrappers.py (1 line): - line 158: guess_mode=False, # TODO: support guess mode of ControlNet optimum/neuron/models/inference/qwen3/modeling_qwen3.py (1 line): - line 120: # TODO: this hack can be removed after replication_id is ready to use optimum/neuron/models/inference/backend/modules/attention/gqa.py (1 line): - line 466: # TODO: set weight to state dict support is pending. tools/auto_fill_inference_cache.py (1 line): - line 258: # TODO: Remove when https://github.com/huggingface/optimum/pull/1793/ is merged in Optimum optimum/exporters/neuron/utils.py (1 line): - line 535: # TODO: get it into https://github.com/huggingface/optimum/blob/4a7cb298140ee9bed968d98a780a950d15bb2935/optimum/exporters/utils.py#L77 optimum/neuron/models/inference/backend/modules/decoder/modeling_decoder.py (1 line): - line 356: # TODO: Replace this with rankid + scatter call once supported optimum/neuron/generation/utils.py (1 line): - line 1612: # TODO: validate with @JingyaHuang optimum/neuron/accelerate/optimizer.py (1 line): - line 77: # TODO: might be needed to override this soon.