train/comms/pt/commsTraceReplay.py (6 lines):
	- line 170: # TODO:
	- line 595: # TODO: collect perf. from all ranks to rank 0 and detect any imbalanced perf?
	- line 637: # FIXME:  0 is a common case, need this info from trace for more accurate replay
	- line 639: # FIXME: assuming it's always sum for reduce/allreduce operations
	- line 641: # FIXME: alwasy perfom blocking comms; may study non-blocking in the future
	- line 667: # TODO: file name may get changed later


train/comms/pt/comms.py (4 lines):
	- line 21: ### TODO: add these to class variables?
	- line 139: # TODO: check the correctness of root, should be between 0 to [world_size -1]
	- line 284: # TODO: investigate the cache effect
	- line 1060: # TODO: allow user to set specific size


train/comms/pt/dlrm.py (4 lines):
	- line 459: temp.append(args.arch_sparse_feature_size)  # PENDING/TODO: Update it based on trainer/feeds model.
	- line 552: # TODO: only support pytorch-dist as the nw-stack now
	- line 723: # FIXME: can we refere to extend_distributed.ExtendProcessGroup.alltoallv?
	- line 1051: # FIXME: can we make it common


train/comms/pt/comms_utils.py (3 lines):
	- line 30: # TODO: Is this the best way to exit?
	- line 207: # TODO: Error handle
	- line 626: self.allreduce_qcomm = 32  # TODO: set it as the bitwidth for now until the quantization kernels be supported


train/compute/python/lib/iterator.py (3 lines):
	- line 53: # TODO lofe: support kwargs too.
	- line 117: # TODO lofe: should also check for ATTR_RANGE
	- line 187: # TODO lofe: support kwargs too.


train/comms/pt/pytorch_dist_backend.py (1 line):
	- line 539: # TODO: this is a temporary workaround; need to unify the type of commsParams in comms and dlrm