reagent/core/types.py (7 lines): - line 58: # TODO: can we get this working well with jupyter? - line 121: # FIXME: These config types are misplaced but we need to write FBL config adapter - line 376: TODO: make this an embedder. - line 395: # TODO: have an embedder here - line 708: # TODO: handle value/mask of DocList - line 886: # TODO: Implement ExtraData.from_dict - line 923: # TODO: rename "observation" to "state" in Transition and return cls(**d) reagent/replay_memory/circular_replay_buffer.py (4 lines): - line 148: TODO: implement for stack size > 1 - line 207: TODO: implement for stack size > 1 - line 822: # TODO: Save tensors to torch files. - line 868: # TODO: Load tensors from torch files. reagent/gym/datasets/replay_buffer_dataset.py (3 lines): - line 47: # TODO: Just use kwargs here? - line 91: # TODO: We probably should put member vars into local vars to - line 169: # TODO: Just use kwargs here? reagent/model_managers/model_manager.py (3 lines): - line 83: TODO: This function should return ReAgentLightningModule & - line 164: # TODO: make abstract - line 189: # TODO: make abstract reagent/evaluation/evaluation_data_page.py (3 lines): - line 209: # FIXME: model_values and model_metrics_values should be - line 272: # FIXME: calculate model_metrics_values when q_network_cpe is added - line 336: # TODO: make generic get_action_idxs for each trainer class reagent/training/multi_stage_trainer.py (3 lines): - line 131: # FIXME: Doesn't support LRScheduler yet - line 185: # FIXME: epoch argument is not really correct - line 198: # FIXME: this is a hack around https://github.com/PyTorchLightning/pytorch-lightning/pull/9360 reagent/data/manual_data_module.py (2 lines): - line 277: # TODO: we currently use the same data for test and validation. - line 303: # TODO: we currently use the same data for test and validation. reagent/model_managers/model_based/cross_entropy_method.py (2 lines): - line 36: # TODO: consider possible_actions_mask - line 61: # TODO: should this be in base class? reagent/models/dueling_q_network.py (2 lines): - line 106: # TODO: export these as observable values - line 204: # TODO: export these as observable values reagent/gym/policies/predictor_policies.py (2 lines): - line 50: # TODO: remove this dependency - line 57: # TODO: write SlateQ Wrapper reagent/net_builder/slate_ranking/slate_ranking_scorer.py (2 lines): - line 67: has_user_feat: bool = False # TODO: deprecate - line 70: ) # TODO: if score cap not needed, deprecate reagent/publishers/file_system_publisher.py (2 lines): - line 44: TODO: replace with redis (python) and hiredis (C) for better RASP support - line 58: # TODO: make this take in a reagent/models/actor.py (2 lines): - line 89: # TODO: log prob is affected by clamping, how to handle that? - line 216: # TODO: calculate log_prob for l2 normalization reagent/model_managers/parametric_dqn_base.py (2 lines): - line 76: # FIXME: this only works for one-hot encoded actions - line 106: # TODO: Add below get_data_module() method once methods in reagent/preprocessing/transforms.py (2 lines): - line 37: # TODO: this wouldn't work for possible_actions_mask (list of value, presence) - line 311: # TODO assert regarding offsets length compared to value reagent/training/discrete_crr_trainer.py (1 line): - line 378: # TODO: rename underlying function to get_max_possible_values_and_idxs reagent/gym/envs/oracle_pvm.py (1 line): - line 72: TODO: made environment easier to learn from by not using RecSim. reagent/replay_memory/prioritized_replay_buffer.py (1 line): - line 92: # TODO: do priority sampling with torch as well. serving/scripts/rasp_to_model.py (1 line): - line 34: "sequence_number": 0, # TODO: Support sequences reagent/workflow/sample_configs/sac_pendulum_offline.yaml (1 line): - line 64: # TODO: raise this bar after training stabilize serving/reagent/serving/operators/PropensityFit.cpp (1 line): - line 72: // TODO: Implement pid controller to replace this fixed shift reagent/optimizer/uninferrable_schedulers.py (1 line): - line 14: TODO: remove this file once we can infer everything. reagent/gym/envs/env_wrapper.py (1 line): - line 121: # TODO: add more methods to simplify gym code reagent/model_utils/seq2slate_utils.py (1 line): - line 94: # TODO (@czxttkl): use when we introduce padding reagent/training/dqn_trainer.py (1 line): - line 202: # TODO: calls to _maybe_run_optimizer removed, should be replaced with Trainer parameter reagent/training/sac_trainer.py (1 line): - line 101: # TODO: finish reagent/training/ppo_trainer.py (1 line): - line 96: # TODO: can this line be hit currently in ReAgent? reagent/mab/mab_algorithm.py (1 line): - line 26: TODO: maybe replace with sparse-to-dense tensor function? reagent/reporting/world_model_reporter.py (1 line): - line 67: # TODO: write this for OSS reagent/training/world_model/mdnrnn_trainer.py (1 line): - line 60: # TODO: Must setup (or mock) trainer and a LoggerConnector to call self.log()! reagent/core/torch_utils.py (1 line): - line 41: FIXME: Remove this function after the issue above is resolved reagent/training/qrdqn_trainer.py (1 line): - line 58: # TODO: check to ensure no rl parameter value is set that isn't actively used by class reagent/reporting/seq2reward_reporter.py (1 line): - line 93: # TODO: write this for OSS reagent/models/cem_planner.py (1 line): - line 223: # TODO: Warmstarts means and vars using previous solutions (T48841404) reagent/model_managers/actor_critic/sac.py (1 line): - line 129: # TODO: add in critic reagent/reporting/reporter_base.py (1 line): - line 65: # TODO: write this for OSS reagent/data/oss_data_fetcher.py (1 line): - line 75: TODO: change this to a deterministic subsample. reagent/workflow/utils.py (1 line): - line 71: # TODO: Move this to appropriate location reagent/preprocessing/sparse_preprocessor.py (1 line): - line 104: # TODO: Add option for simple modulo and other hash functions reagent/core/tracker.py (1 line): - line 81: # TODO: Create a generic framework for type conversion reagent/reporting/parametric_dqn_reporter.py (1 line): - line 57: # TODO: write this for OSS reagent/models/seq2slate.py (1 line): - line 566: # TODO: T62502977 create learnable feature vectors for start symbol reagent/training/dqn_trainer_base.py (1 line): - line 198: # TODO: why is reward net commented out? reagent/prediction/predictor_wrapper.py (1 line): - line 195: # TODO for future cleanup: kind of a misnomer now, since not really "difference" reagent/reporting/actor_critic_reporter.py (1 line): - line 63: # TODO: write this for OSS reagent/workflow/identify_types_flow.py (1 line): - line 137: # TODO: for OSS reagent/optimizer/uninferrable_optimizers.py (1 line): - line 11: TODO: remove this file once we can infer everything. reagent/gym/preprocessors/trainer_preprocessor.py (1 line): - line 240: # TODO: abs value to make probability? reagent/gym/utils.py (1 line): - line 131: # TODO: make this a property of EnvWrapper? reagent/net_builder/synthetic_reward_net_builder.py (1 line): - line 65: # TODO add Discrete Single Step Synthetic Reward Predictor reagent/reporting/discrete_dqn_reporter.py (1 line): - line 104: # TODO: write this for OSS serving/reagent/serving/operators/Ucb.cpp (1 line): - line 72: // TODO: Implement CDF of t-distribution for bayesian UCB reagent/gym/envs/changing_arms.py (1 line): - line 66: # FIXME: hardcoded for now reagent/training/td3_trainer.py (1 line): - line 64: minibatches_per_step (optional, TODO: currently unused): the number of minibatch updates reagent/reporting/discrete_crr_reporter.py (1 line): - line 104: # TODO: write this for OSS reagent/gym/policies/random_policies.py (1 line): - line 82: # TODO: consider possible_actions_mask reagent/training/slate_q_trainer.py (1 line): - line 126: # TODO: Probably should create a new model type reagent/gym/envs/recsim.py (1 line): - line 64: # TODO: remove RecsimObsPreprocessor and move it here reagent/gym/envs/gym.py (1 line): - line 43: # TODO: make return serving feature data reagent/replay_memory/utils.py (1 line): - line 58: # TODO: handle possible actions/mask here