sockeye/beam_search_pt.py (6 lines):
	- line 32: # TODO (fhieber): Consider making inference classes regular modules (or move logic into the model module)
	- line 590: # TODO: check for disabled predicted output length
	- line 611: # TODO: return unnormalized proper score
	- line 756: # TODO: Consider joint tensor for all target factors
	- line 896: # TODO: consider automatically selecting GreedySearch if flags to this method are compatible.
	- line 905: assert not prevent_unk, "Greedy Search does not support prevention of unknown tokens"  # TODO: add support


sockeye/beam_search.py (4 lines):
	- line 597: # TODO: check for disabled predicted output length
	- line 617: score = np.array([-1.])  # TODO: return unnormalized proper score
	- line 941: # TODO: consider automatically selecting GreedySearch if flags to this method are compatible.
	- line 950: assert not prevent_unk, "Greedy Search does not support prevention of unknown tokens"  # TODO: add support


sockeye/model_pt.py (3 lines):
	- line 128: # TODO also consider weight tying with target factor input embeddings
	- line 170: # TODO: figure out int8 quantization of OutputLayer, supporting weight tying & vocabulary selection
	- line 610: # TODO: consider using gain=1 / math.sqrt(2)


sockeye/model.py (3 lines):
	- line 130: # TODO also consider weight tying with target factor input embeddings
	- line 260: # TODO: consider a dictionary mapping as return value
	- line 650: # TODO: check for missing parameters somehow (we allowed scaling to be missing)


sockeye/constants.py (2 lines):
	- line 46: # TODO: make this configurable in the model, separately per target factor.
	- line 74: # TODO replace options list (e.g ENCODERS, DECODERS, ...) with Enum classes


sockeye/layers_pt.py (2 lines):
	- line 198: # TODO: port NVIDIAs implementation to PT C++ custom op
	- line 226: # TODO: port NVIDIAs implementation to PT C++ custom op


sockeye/checkpoint_decoder.py (1 line):
	- line 132: # TODO: possibly support decoding on multiple GPUs


sockeye/encoder.py (1 line):
	- line 196: # TODO DEPRECATE, NO LONGER USED


sockeye/training.py (1 line):
	- line 522: # overwriting here. TODO: make this better...


setup.py (1 line):
	- line 77: # TODO: deprecate mxnet CLIs


sockeye/decoder_pt.py (1 line):
	- line 34: # TODO: while we still have both transformer.TransformerConfig and transformer_pt.TransformerConfig,


sockeye/scoring_pt.py (1 line):
	- line 122: # TODO: scoring should support multiple devices


sockeye/data_io.py (1 line):
	- line 203: # TODO: This is a legacy step from the bucketing module version of Sockeye.


sockeye/loss_pt.py (1 line):
	- line 150: self._reduction = 'mean'  # TODO: consider sum reduction and normalization outside of loss for reporting