reagent/core/types.py (7 lines):
	- line 58: # TODO: can we get this working well with jupyter?
	- line 121: # FIXME: These config types are misplaced but we need to write FBL config adapter
	- line 376: TODO: make this an embedder.
	- line 395: # TODO: have an embedder here
	- line 708: # TODO: handle value/mask of DocList
	- line 886: # TODO: Implement ExtraData.from_dict
	- line 923: # TODO: rename "observation" to "state" in Transition and return cls(**d)


reagent/replay_memory/circular_replay_buffer.py (4 lines):
	- line 148: TODO: implement for stack size > 1
	- line 207: TODO: implement for stack size > 1
	- line 822: # TODO: Save tensors to torch files.
	- line 868: # TODO: Load tensors from torch files.


reagent/gym/datasets/replay_buffer_dataset.py (3 lines):
	- line 47: # TODO: Just use kwargs here?
	- line 91: # TODO: We probably should put member vars into local vars to
	- line 169: # TODO: Just use kwargs here?


reagent/model_managers/model_manager.py (3 lines):
	- line 83: TODO: This function should return ReAgentLightningModule &
	- line 164: # TODO: make abstract
	- line 189: # TODO: make abstract


reagent/evaluation/evaluation_data_page.py (3 lines):
	- line 209: # FIXME: model_values and model_metrics_values should be
	- line 272: # FIXME: calculate model_metrics_values when q_network_cpe is added
	- line 336: # TODO: make generic get_action_idxs for each trainer class


reagent/training/multi_stage_trainer.py (3 lines):
	- line 131: # FIXME: Doesn't support LRScheduler yet
	- line 185: # FIXME: epoch argument is not really correct
	- line 198: # FIXME: this is a hack around https://github.com/PyTorchLightning/pytorch-lightning/pull/9360


reagent/data/manual_data_module.py (2 lines):
	- line 277: # TODO: we currently use the same data for test and validation.
	- line 303: # TODO: we currently use the same data for test and validation.


reagent/model_managers/model_based/cross_entropy_method.py (2 lines):
	- line 36: # TODO: consider possible_actions_mask
	- line 61: # TODO: should this be in base class?


reagent/models/dueling_q_network.py (2 lines):
	- line 106: # TODO: export these as observable values
	- line 204: # TODO: export these as observable values


reagent/gym/policies/predictor_policies.py (2 lines):
	- line 50: # TODO: remove this dependency
	- line 57: # TODO: write SlateQ Wrapper


reagent/net_builder/slate_ranking/slate_ranking_scorer.py (2 lines):
	- line 67: has_user_feat: bool = False  # TODO: deprecate
	- line 70: )  # TODO: if score cap not needed, deprecate


reagent/publishers/file_system_publisher.py (2 lines):
	- line 44: TODO: replace with redis (python) and hiredis (C) for better RASP support
	- line 58: # TODO: make this take in a


reagent/models/actor.py (2 lines):
	- line 89: # TODO: log prob is affected by clamping, how to handle that?
	- line 216: # TODO: calculate log_prob for l2 normalization


reagent/model_managers/parametric_dqn_base.py (2 lines):
	- line 76: # FIXME: this only works for one-hot encoded actions
	- line 106: # TODO: Add below get_data_module() method once methods in


reagent/preprocessing/transforms.py (2 lines):
	- line 37: # TODO: this wouldn't work for possible_actions_mask (list of value, presence)
	- line 311: # TODO assert regarding offsets length compared to value


reagent/training/discrete_crr_trainer.py (1 line):
	- line 378: # TODO: rename underlying function to get_max_possible_values_and_idxs


reagent/gym/envs/oracle_pvm.py (1 line):
	- line 72: TODO: made environment easier to learn from by not using RecSim.


reagent/replay_memory/prioritized_replay_buffer.py (1 line):
	- line 92: # TODO: do priority sampling with torch as well.


serving/scripts/rasp_to_model.py (1 line):
	- line 34: "sequence_number": 0,  # TODO: Support sequences


reagent/workflow/sample_configs/sac_pendulum_offline.yaml (1 line):
	- line 64: # TODO: raise this bar after training stabilize


serving/reagent/serving/operators/PropensityFit.cpp (1 line):
	- line 72: // TODO: Implement pid controller to replace this fixed shift


reagent/optimizer/uninferrable_schedulers.py (1 line):
	- line 14: TODO: remove this file once we can infer everything.


reagent/gym/envs/env_wrapper.py (1 line):
	- line 121: # TODO: add more methods to simplify gym code


reagent/model_utils/seq2slate_utils.py (1 line):
	- line 94: # TODO (@czxttkl): use when we introduce padding


reagent/training/dqn_trainer.py (1 line):
	- line 202: # TODO: calls to _maybe_run_optimizer removed, should be replaced with Trainer parameter


reagent/training/sac_trainer.py (1 line):
	- line 101: # TODO: finish


reagent/training/ppo_trainer.py (1 line):
	- line 96: # TODO: can this line be hit currently in ReAgent?


reagent/mab/mab_algorithm.py (1 line):
	- line 26: TODO: maybe replace with sparse-to-dense tensor function?


reagent/reporting/world_model_reporter.py (1 line):
	- line 67: # TODO: write this for OSS


reagent/training/world_model/mdnrnn_trainer.py (1 line):
	- line 60: # TODO: Must setup (or mock) trainer and a LoggerConnector to call self.log()!


reagent/core/torch_utils.py (1 line):
	- line 41: FIXME: Remove this function after the issue above is resolved


reagent/training/qrdqn_trainer.py (1 line):
	- line 58: # TODO: check to ensure no rl parameter value is set that isn't actively used by class


reagent/reporting/seq2reward_reporter.py (1 line):
	- line 93: # TODO: write this for OSS


reagent/models/cem_planner.py (1 line):
	- line 223: # TODO: Warmstarts means and vars using previous solutions (T48841404)


reagent/model_managers/actor_critic/sac.py (1 line):
	- line 129: # TODO: add in critic


reagent/reporting/reporter_base.py (1 line):
	- line 65: # TODO: write this for OSS


reagent/data/oss_data_fetcher.py (1 line):
	- line 75: TODO: change this to a deterministic subsample.


reagent/workflow/utils.py (1 line):
	- line 71: # TODO: Move this to appropriate location


reagent/preprocessing/sparse_preprocessor.py (1 line):
	- line 104: # TODO: Add option for simple modulo and other hash functions


reagent/core/tracker.py (1 line):
	- line 81: # TODO: Create a generic framework for type conversion


reagent/reporting/parametric_dqn_reporter.py (1 line):
	- line 57: # TODO: write this for OSS


reagent/models/seq2slate.py (1 line):
	- line 566: # TODO: T62502977 create learnable feature vectors for start symbol


reagent/training/dqn_trainer_base.py (1 line):
	- line 198: # TODO: why is reward net commented out?


reagent/prediction/predictor_wrapper.py (1 line):
	- line 195: # TODO for future cleanup: kind of a misnomer now, since not really "difference"


reagent/reporting/actor_critic_reporter.py (1 line):
	- line 63: # TODO: write this for OSS


reagent/workflow/identify_types_flow.py (1 line):
	- line 137: # TODO: for OSS


reagent/optimizer/uninferrable_optimizers.py (1 line):
	- line 11: TODO: remove this file once we can infer everything.


reagent/gym/preprocessors/trainer_preprocessor.py (1 line):
	- line 240: # TODO: abs value to make probability?


reagent/gym/utils.py (1 line):
	- line 131: # TODO: make this a property of EnvWrapper?


reagent/net_builder/synthetic_reward_net_builder.py (1 line):
	- line 65: # TODO add Discrete Single Step Synthetic Reward Predictor


reagent/reporting/discrete_dqn_reporter.py (1 line):
	- line 104: # TODO: write this for OSS


serving/reagent/serving/operators/Ucb.cpp (1 line):
	- line 72: // TODO: Implement CDF of t-distribution for bayesian UCB


reagent/gym/envs/changing_arms.py (1 line):
	- line 66: # FIXME: hardcoded for now


reagent/training/td3_trainer.py (1 line):
	- line 64: minibatches_per_step (optional, TODO: currently unused): the number of minibatch updates


reagent/reporting/discrete_crr_reporter.py (1 line):
	- line 104: # TODO: write this for OSS


reagent/gym/policies/random_policies.py (1 line):
	- line 82: # TODO: consider possible_actions_mask


reagent/training/slate_q_trainer.py (1 line):
	- line 126: # TODO: Probably should create a new model type


reagent/gym/envs/recsim.py (1 line):
	- line 64: # TODO: remove RecsimObsPreprocessor and move it here


reagent/gym/envs/gym.py (1 line):
	- line 43: # TODO: make return serving feature data


reagent/replay_memory/utils.py (1 line):
	- line 58: # TODO: handle possible actions/mask here