in lerobot/scripts/rl/gym_manipulator.py [0:0]
def main(cfg: EnvConfig):
"""Main entry point for the robot environment script.
This function runs the robot environment in one of several modes
based on the provided configuration.
Args:
cfg: Configuration object defining the run parameters,
including mode (record, replay, random) and other settings.
"""
env = make_robot_env(cfg)
if cfg.mode == "record":
policy = None
if cfg.pretrained_policy_name_or_path is not None:
from lerobot.common.policies.sac.modeling_sac import SACPolicy
policy = SACPolicy.from_pretrained(cfg.pretrained_policy_name_or_path)
policy.to(cfg.device)
policy.eval()
record_dataset(
env,
policy=policy,
cfg=cfg,
)
exit()
if cfg.mode == "replay":
replay_episode(
env,
cfg=cfg,
)
exit()
env.reset()
# Initialize the smoothed action as a random sample.
smoothed_action = env.action_space.sample() * 0.0
# Smoothing coefficient (alpha) defines how much of the new random sample to mix in.
# A value close to 0 makes the trajectory very smooth (slow to change), while a value close to 1 is less smooth.
alpha = 1.0
num_episode = 0
successes = []
while num_episode < 10:
start_loop_s = time.perf_counter()
# Sample a new random action from the robot's action space.
new_random_action = env.action_space.sample()
# Update the smoothed action using an exponential moving average.
smoothed_action = alpha * new_random_action + (1 - alpha) * smoothed_action
# Execute the step: wrap the NumPy action in a torch tensor.
obs, reward, terminated, truncated, info = env.step(smoothed_action)
if terminated or truncated:
successes.append(reward)
env.reset()
num_episode += 1
dt_s = time.perf_counter() - start_loop_s
busy_wait(1 / cfg.fps - dt_s)
logging.info(f"Success after 20 steps {successes}")
logging.info(f"success rate {sum(successes) / len(successes)}")