chatlearn/schedule/model_manager.py (3 lines):
	- line 230: # TODO: use decorator to annotate
	- line 334: # TODO: for colocate gpu_per_process > 1, support later
	- line 388: # TODO: One GPU task still not work


chatlearn/runtime/environment.py (2 lines):
	- line 228: # TODO: we will consider colocation/offload later
	- line 294: # TODO: we will consider colocation/offload later


chatlearn/utils/utils.py (2 lines):
	- line 394: # [TODO:baodong.lh] support custom_op like min, max to reduce metrics
	- line 406: # [TODO:baodong.lh] imporve performance by distributing the task to per-replica


chatlearn/data/data.py (2 lines):
	- line 177: # TODO: fix hardcode key for sample len
	- line 270: # TODO: deal with situation that relay_sample_manager is None


chatlearn/models/deepspeed_module.py (2 lines):
	- line 135: # TODO: try attn_implementation="flash_attention_2"
	- line 156: # TODO: deal with offload later


chatlearn/runtime/executor.py (2 lines):
	- line 185: # TODO: deal with one2many scene
	- line 249: # TODO: add index for one2many case


chatlearn/models/vllm_module.py (1 line):
	- line 571: # TODO: we may need to let setup return model, optimizer and opt_param_scheduler


chatlearn/models/vllm/hooks/vllm_0_6_6/qwen3.py (1 line):
	- line 294: # TODO (@robertgshaw2): see if this can be moved out


chatlearn/models/megatron_module.py (1 line):
	- line 96: # TODO: we may need to let setup return model, optimizer and opt_param_scheduler


chatlearn/utils/log_monitor.py (1 line):
	- line 264: # TODO: try to reduce this frequency


chatlearn/utils/megatron_utils.py (1 line):
	- line 142: # TODO is this still a necessary option?