in www/www.py [0:0]
def _step(request: StepRequest) -> StepReply:
"""Run the actual step with parsed arguments."""
states: List[StateToVisualize] = []
with env_lock:
env.reward_space = request.reward
env.reset(benchmark=request.benchmark)
# Replay all actions except the last one.
if request.all_states:
# Replay actions one at a time to receive incremental rewards. The
# first item represents the state prior to any actions.
(instcount, autophase), _, done, info = env.step(
action=[],
observations=[
env.observation.spaces["InstCountDict"],
env.observation.spaces["AutophaseDict"],
],
)
if done:
raise ValueError(
f"Failed to compute initial state: {info['error_details']}"
)
states.append(
StateToVisualize(
instcount=instcount,
autophase=autophase,
reward=0,
)
)
for action in request.actions[:-1]:
(instcount, autophase), reward, done, info = env.step(
action,
observations=[
env.observation.spaces["InstCountDict"],
env.observation.spaces["AutophaseDict"],
],
)
states.append(
StateToVisualize(
instcount=instcount,
autophase=autophase,
reward=reward,
)
)
if done:
raise ValueError(
f"Failed to apply action {action}: {info['error_details']}"
)
else:
# Replay actions in a single batch.
_, _, done, info = env.step(request.actions[:-1])
if done:
raise ValueError(
f"Failed to apply actions {request.actions}: {info['error_details']}"
)
# Perform the final action.
(ir, instcount, autophase), (reward,), done, _ = env.raw_step(
actions=request.actions[-1:],
observations=[
env.observation.spaces["Ir"],
env.observation.spaces["InstCountDict"],
env.observation.spaces["AutophaseDict"],
],
rewards=[env.reward_space],
)
states.append(
StateToVisualize(
instcount=instcount,
autophase=autophase,
reward=reward,
)
)
return StepReply(
commandline=env.commandline(),
done=done,
ir=truncate(ir, max_line_len=250, max_lines=1024),
states=states,
)