optimum/habana/transformers/generation/utils.py (6 lines):
	- line 319: # 8. Remove unexpected `generate` inputs (TODO @joao: fix trainer and examples)
	- line 580: # TODO (joao): remove output/input mismatch when these old models (xlnet, reformer) are deprecated
	- line 915: # TODO (joao): per-model generation config classes.
	- line 1309: # TODO (joao): generalize this check with other types of inputs
	- line 2770: # TODO (joao): this OP throws "skipping cudagraphs due to ['incompatible ops']", find solution
	- line 2776: # TODO: no ignore_eos check here since there is a compilation error, will add ignore_eos here if fixed


optimum/habana/transformers/models/falcon/modeling_falcon.py (5 lines):
	- line 241: # TODO : Need to be fixed to use index_select()
	- line 385: flash_attention_fast_softmax = True  # TODO pass this along
	- line 393: # TODO very similar to the fp8 case above, could be merged.
	- line 455: # TODO needs to be turned into a module for quantization
	- line 802: # TODO: Due to perf degradation, disable spda_attn_mask


optimum/habana/accelerate/accelerator.py (5 lines):
	- line 105: # TODO: remove these when the features are upstream or removed
	- line 188: # TODO: Look at enabling native TP training directly with a proper config
	- line 299: TODO: Temporarily disable this upcast due to FSDP graph compile issue.
	- line 387: # TODO: Remove when compile_regions is removed
	- line 641: # TODO: Remove when accelerate supports Sequence/Context parallelism


optimum/habana/transformers/trainer.py (5 lines):
	- line 1024: # TODO: keep syncs for fast DDP?
	- line 1067: # TODO: to merge self.accelerator.clip_grad_norm_ when HMP is removed
	- line 1225: # TODO: in the future support only specific min PEFT versions
	- line 1435: # TODO: for some reason the fsdp model is not unwrapped correctly here, the self.mode
	- line 2214: # TODO: this needs to be fixed and made cleaner later.


optimum/habana/transformers/models/minicpm/modeling_minicpm.py (4 lines):
	- line 558: # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
	- line 648: # TODO: These transpose are quite inefficient but Flash Attention requires the layout [batch_size, sequence_length, num_heads, head_dim]. We would need to refactor the KV cache
	- line 721: # TODO: Remove the `query_length != 1` check once Flash Attention for RoCm is bumped to 2.1. For details, please see the comment in MiniCPMFlashAttention2 __init__.
	- line 816: # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.


optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py (3 lines):
	- line 320: >>> import requests   #TODO to test?
	- line 430: # 8. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
	- line 483: # latent_model_input = self.scheduler.scale_model_input(latent_model_input, timestep) #TODO why this has been removed?


optimum/habana/transformers/models/llama/modeling_llama.py (3 lines):
	- line 150: self.register_buffer("inv_freq", inv_freq, persistent=False)  # TODO joao: may break with compilation
	- line 608: # TODO: update when auto mp params is enabled in DeepSpeed (cf. https://github.com/HabanaAI/DeepSpeed/blob/94309c7b5dfc1a69858f5c9f25737b2f81a332a5/deepspeed/module_inject/replace_module.py#L440)
	- line 628: # TODO: the following section cause torch.compile performance issue with graph recompilation


optimum/habana/transformers/models/decilm/modeling_decilm.py (3 lines):
	- line 415: # TODO joao: standardize interface for the different Cache classes and remove of this if
	- line 437: # TODO joao: remove this `else` after `generate` prioritizes `Cache` objects
	- line 479: # TODO: use `next_tokens` directly instead.


optimum/habana/transformers/models/gemma2/modeling_gemma2.py (3 lines):
	- line 83: # TODO (joao): remove the `if` below, only used for BC
	- line 142: self.register_buffer("inv_freq", inv_freq, persistent=False)  # TODO joao: may break with compilation
	- line 1062: # TODO: remove `.clone()` when it is fixed in SynapseAI


optimum/habana/transformers/models/mllama/modeling_mllama.py (3 lines):
	- line 172: # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
	- line 852: # TODO: we have only SDPA currently and there's a bug when attn-bias is passed. Need to add eager attn and return the line
	- line 1152: # TODO: we have no attention_mask so this won't work, check if we really won't need attention mask and find another way


optimum/habana/diffusers/models/unet_2d_condition.py (2 lines):
	- line 88: # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
	- line 219: # TODO: to remove in SynapseAI 1.13?


optimum/habana/transformers/models/qwen2_vl/modeling_qwen2_vl.py (2 lines):
	- line 184: # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
	- line 652: # if we get 4D attention mask we cannot calculate rope deltas anymore. TODO @raushan fixme


optimum/habana/transformers/models/baichuan/modeling_baichuan.py (2 lines):
	- line 80: TODO @thomasw21 this doesn't work as nicely due to the masking strategy, and so masking varies slightly.
	- line 236: # TODO: remove `.clone()` when it is fixed in SynapseAI


optimum/habana/transformers/modeling_rope_utils.py (2 lines):
	- line 28: # TODO (joao): remove the `if` below, only used for BC
	- line 87: self.register_buffer("inv_freq", inv_freq, persistent=False)  # TODO joao: may break with compilation


optimum/habana/distributed/distributed_runner.py (1 line):
	- line 78: # TODO: remove multi_hls


optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py (1 line):
	- line 484: # 8. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/transformers/models/bloom/modeling_bloom.py (1 line):
	- line 44: TODO @thomasw21 this doesn't work as nicely due to the masking strategy, and so masking varies slightly.


optimum/habana/transformers/integrations/deepspeed.py (1 line):
	- line 117: # TODO: temporary workaround


optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py (1 line):
	- line 294: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/transformers/models/xlm_roberta/modeling_xlm_roberta.py (1 line):
	- line 46: # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented.


optimum/habana/peft/peft_model.py (1 line):
	- line 42: # TODO: starting with transformers 4.38, all architectures should support caching.


optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py (1 line):
	- line 542: # 9. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/transformers/models/gptj/modeling_gptj.py (1 line):
	- line 118: # TODO: implement rotary_emb()


optimum/habana/transformers/models/llava_next/modeling_llava_next.py (1 line):
	- line 91: # TODO: from Transformers v4.45, `generate` sets `num_logits_to_keep` to 1 if not given, which we don't want here


optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py (1 line):
	- line 594: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/peft/layer.py (1 line):
	- line 115: # TODO we assume that position_ids is not None here, not sure if that is safe but the old code also did that


optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py (1 line):
	- line 656: # 9. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/transformers/models/whisper/modeling_whisper.py (1 line):
	- line 50: # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once this is implemented.


optimum/habana/diffusers/models/attention_processor.py (1 line):
	- line 161: # TODO: add support for attn.scale when we move to Torch 2.1


optimum/habana/diffusers/models/controlnet_sdv.py (1 line):
	- line 405: # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can


optimum/habana/diffusers/models/unet_spatio_temporal_condition_controlnet.py (1 line):
	- line 129: # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can


optimum/habana/transformers/models/llava_onevision/modeling_llava_onevision.py (1 line):
	- line 94: # TODO: from Transformers v4.45, `generate` sets `num_logits_to_keep` to 1 if not given, which we don't want here


optimum/habana/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py (1 line):
	- line 498: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_mlperf.py (1 line):
	- line 700: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py (1 line):
	- line 319: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/transformers/models/qwen2_moe/modeling_qwen2_moe.py (1 line):
	- line 79: # TODO: remove `.clone()` when it is fixed in SynapseAI


optimum/habana/transformers/training_args.py (1 line):
	- line 906: # those deprecated arguments are removed from TrainingArguments. (TODO: transformers v5)


optimum/habana/transformers/generation/candidate_generator.py (1 line):
	- line 39: # TODO: may need to complete this for encoder-decoders: https://github.com/huggingface/transformers/blob/v4.38.2/src/transformers/generation/utils.py#L1133


optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py (1 line):
	- line 460: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/diffusers/pipelines/controlnet/pipeline_controlnet.py (1 line):
	- line 463: # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/transformers/models/paligemma/modeling_paligemma.py (1 line):
	- line 134: # TODO: from Transformers v4.45, `generate` sets `num_logits_to_keep` to 1 if not given, which we don't want here


optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py (1 line):
	- line 480: # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/transformers/models/video_llava/modeling_video_llava.py (1 line):
	- line 409: # TODO: @raushan retain only the new behavior after v4.47


optimum/habana/transformers/trainer_seq2seq.py (1 line):
	- line 353: # TODO: remove this hack when the legacy code that initializes generation_config from a model config is


optimum/habana/transformers/gaudi_configuration.py (1 line):
	- line 67: # TODO: to remove in a future version


optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py (1 line):
	- line 371: # 8. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox.py (1 line):
	- line 297: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py (1 line):
	- line 350: # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline


optimum/habana/transformers/models/wav2vec2/modeling_wav2vec2.py (1 line):
	- line 520: # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once this is implemented.


optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py (1 line):
	- line 428: # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline