in rlmeta/agents/ppo/ppo_agent.py [0:0]
def __init__(self,
model: ModelLike,
deterministic_policy: bool = False,
replay_buffer: Optional[ReplayBufferLike] = None,
controller: Optional[ControllerLike] = None,
optimizer: Optional[torch.optim.Optimizer] = None,
batch_size: int = 128,
grad_clip: float = 50.0,
gamma: float = 0.99,
gae_lambda: float = 0.95,
eps_clip: float = 0.2,
entropy_ratio: float = 0.01,
advantage_normalization: bool = True,
reward_rescaling: bool = True,
value_clip: bool = True,
push_every_n_steps: int = 1) -> None:
super(PPOAgent, self).__init__()
self.model = model
self.deterministic_policy = deterministic_policy
self.replay_buffer = replay_buffer
self.controller = controller
self.optimizer = optimizer
self.batch_size = batch_size
self.grad_clip = grad_clip
self.gamma = gamma
self.gae_lambda = gae_lambda
self.eps_clip = eps_clip
self.entropy_ratio = entropy_ratio
self.advantage_normalization = advantage_normalization
self.reward_rescaling = reward_rescaling
if self.reward_rescaling:
self.reward_rescaler = NormRescaler(size=1)
self.value_clip = value_clip
self.push_every_n_steps = push_every_n_steps
self.done = False
self.trajectory = []