apex/apex/optimizers/fp16_optimizer.py (5 lines):
	- line 45: # - flat by groups, not keeping state. TODO: remove state explicitly?
	- line 46: # - master gard and unflat master weight never exist. TODO: a way to save out unflat master?
	- line 118: # TODO: Not most efficient with copy to cpu and sync
	- line 141: if norm_groups[i] == -1: #TODO: early break
	- line 154: # TODO: we probably don't need this? just to be safe


tensorboardX/tensorboardX/pytorch_graph.py (4 lines):
	- line 97: # TODO: See if we can remove this in the future
	- line 183: # TODO: compute correct memory usage and CPU time once
	- line 260: with torch.onnx.set_training(model, False):  # TODO: move outside of torch.onnx
	- line 280: # TODO: See if we can extract GPU vs CPU information from the PyTorch model


tensorboardX/tensorboardX/visdom_writer.py (4 lines):
	- line 193: # TODO: reverse the logic here, shoudl do the permutation in numpy
	- line 248: # TODO: Visdom doesn't support graph visualization yet, so this is a no-op
	- line 253: # TODO: Visdom doesn't support graph visualization yet, so this is a no-op
	- line 258: # TODO: Visdom doesn't support embeddings yet, so this is a no-op


apex/apex/amp/lists/functional_overrides.py (2 lines):
	- line 2: # TODO: think about the following two. They do weird things.
	- line 48: # TODO: which of these can be fp16?


apex/apex/fp16_utils/fp16_optimizer.py (2 lines):
	- line 12: # TODO:  Update overflow check + downscale to use Carl's fused kernel.
	- line 191: # TODO:  Centralize exposure and import error checking for the C backend.


apex/apex/parallel/distributed.py (2 lines):
	- line 227: # TODO:  I really need to centralize the C++ backed imports
	- line 335: # TODO:  How do we want to handle multiple backward passes between


apex/csrc/multi_tensor_apply.cuh (2 lines):
	- line 13: // TODO:  Kernel arg size limit may be <4KB for some other cards (ie Jetson)
	- line 57: // TODO:  Print which tensor fails.


apex/apex/amp/handle.py (2 lines):
	- line 93: # TODO:  Rewrite FusedAdam to use multi-tensor apply and the same loss scaler.
	- line 206: # TODO: this code block is duplicated here and `opt.py`. Unify.


apex/apex/amp/_process_optimizer.py (1 line):
	- line 273: # TODO:  Centralize exposure and import error checking for the C backend.


tensorboardX/tensorboardX/beholder/beholder.py (1 line):
	- line 161: # TODO: blanket try and except for production? I don't someone's script to die


tensorboardX/tensorboardX/writer.py (1 line):
	- line 89: # TODO: See if we can remove this in the future if we are


apex/apex/amp/rnn_compat.py (1 line):
	- line 50: # TODO: where else is this a problem?


tensorboardX/tensorboardX/summary.py (1 line):
	- line 81: # TODO: expose other parameters in the future.


jukebox/prior/prior.py (1 line):
	- line 263: # assert chunk_size % self.prime_loss_dims == 0. TODO: Check if needed


apex/apex/amp/frontend.py (1 line):
	- line 360: # TODO:  is this necessary/useful?


apex/apex/amp/utils.py (1 line):
	- line 171: # TODO: maybe this should actually use


apex/apex/amp/wrap.py (1 line):
	- line 87: # TODO: other mixed-type cases aren't due to amp.