src_code/controllers/basic_controller_interactive.py [252:292]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        if self.agent_output_type == "pi_logits":
            assert(False)
            #TODO: NOT IMPLEMENTED YET
            if getattr(self.args, "mask_before_softmax", True):
                # Make the logits for unavailable actions very negative to minimise their affect on the softmax
                reshaped_avail_actions = avail_actions.reshape(ep_batch.batch_size * self.n_agents, -1)
                agent_outs[reshaped_avail_actions == 0] = -1e10
                agent_outs_interactive[reshaped_avail_actions == 0] = -1e10
                agent_outs_interactive_[reshaped_avail_actions == 0] = -1e10

            agent_outs = th.nn.functional.softmax(agent_outs, dim=-1)
            agent_outs_interactive = th.nn.functional.softmax(agent_outs_interactive, dim=-1)
            agent_outs_interactive_ = th.nn.functional.softmax(agent_outs_interactive_, dim=-1)
            if not test_mode:
                # Epsilon floor
                epsilon_action_num = agent_outs.size(-1)
                if getattr(self.args, "mask_before_softmax", True):
                    # With probability epsilon, we will pick an available action uniformly
                    epsilon_action_num = reshaped_avail_actions.sum(dim=1, keepdim=True).float()

                agent_outs = ((1 - self.action_selector.epsilon) * agent_outs
                               + th.ones_like(agent_outs) * self.action_selector.epsilon/epsilon_action_num)
                agent_outs_interactive = ((1 - self.action_selector.epsilon) * agent_outs_interactive
                               + th.ones_like(agent_outs_interactive) * self.action_selector.epsilon/epsilon_action_num)
                agent_outs_interactive_ = ((1 - self.action_selector.epsilon) * agent_outs_interactive_
                               + th.ones_like(agent_outs_interactive_) * self.action_selector.epsilon/epsilon_action_num)

                if getattr(self.args, "mask_before_softmax", True):
                    # Zero out the unavailable actions
                    agent_outs[reshaped_avail_actions == 0] = 0.0
                    agent_outs_interactive[reshaped_avail_actions == 0] = 0.0
                    agent_outs_interactive_[reshaped_avail_actions == 0] = 0.0

        return agent_outs.view(ep_batch.batch_size, self.n_agents, -1), agent_outs_interactive.view(ep_batch.batch_size, self.n_agents, -1), agent_outs_interactive_.view(ep_batch.batch_size, self.n_agents, -1), \
            agent_outs_alone.view(ep_batch.batch_size, self.n_agents, -1)

    def init_hidden(self, batch_size):
        hidden_states, hidden_states_alone, hidden_states_ = self.agent.init_hidden()
        self.hidden_states = hidden_states.unsqueeze(0).expand(batch_size, self.n_agents, -1) # bav
        self.hidden_states_alone = hidden_states_alone.unsqueeze(0).expand(batch_size, self.n_agents, -1)
        self.hidden_states_ = hidden_states_.unsqueeze(0).expand(batch_size, self.n_agents, -1) # bav
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src_code/controllers/basic_controller_interactive.py [490:530]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        if self.agent_output_type == "pi_logits":
            assert(False)
            #TODO: NOT IMPLEMENTED YET
            if getattr(self.args, "mask_before_softmax", True):
                # Make the logits for unavailable actions very negative to minimise their affect on the softmax
                reshaped_avail_actions = avail_actions.reshape(ep_batch.batch_size * self.n_agents, -1)
                agent_outs[reshaped_avail_actions == 0] = -1e10
                agent_outs_interactive[reshaped_avail_actions == 0] = -1e10
                agent_outs_interactive_[reshaped_avail_actions == 0] = -1e10

            agent_outs = th.nn.functional.softmax(agent_outs, dim=-1)
            agent_outs_interactive = th.nn.functional.softmax(agent_outs_interactive, dim=-1)
            agent_outs_interactive_ = th.nn.functional.softmax(agent_outs_interactive_, dim=-1)
            if not test_mode:
                # Epsilon floor
                epsilon_action_num = agent_outs.size(-1)
                if getattr(self.args, "mask_before_softmax", True):
                    # With probability epsilon, we will pick an available action uniformly
                    epsilon_action_num = reshaped_avail_actions.sum(dim=1, keepdim=True).float()

                agent_outs = ((1 - self.action_selector.epsilon) * agent_outs
                               + th.ones_like(agent_outs) * self.action_selector.epsilon/epsilon_action_num)
                agent_outs_interactive = ((1 - self.action_selector.epsilon) * agent_outs_interactive
                               + th.ones_like(agent_outs_interactive) * self.action_selector.epsilon/epsilon_action_num)
                agent_outs_interactive_ = ((1 - self.action_selector.epsilon) * agent_outs_interactive_
                               + th.ones_like(agent_outs_interactive_) * self.action_selector.epsilon/epsilon_action_num)

                if getattr(self.args, "mask_before_softmax", True):
                    # Zero out the unavailable actions
                    agent_outs[reshaped_avail_actions == 0] = 0.0
                    agent_outs_interactive[reshaped_avail_actions == 0] = 0.0
                    agent_outs_interactive_[reshaped_avail_actions == 0] = 0.0

        return agent_outs.view(ep_batch.batch_size, self.n_agents, -1), agent_outs_interactive.view(ep_batch.batch_size, self.n_agents, -1), agent_outs_interactive_.view(ep_batch.batch_size, self.n_agents, -1), \
            agent_outs_alone.view(ep_batch.batch_size, self.n_agents, -1)

    def init_hidden(self, batch_size):
        hidden_states, hidden_states_alone, hidden_states_ = self.agent.init_hidden()
        self.hidden_states = hidden_states.unsqueeze(0).expand(batch_size, self.n_agents, -1) # bav
        self.hidden_states_alone = hidden_states_alone.unsqueeze(0).expand(batch_size, self.n_agents, -1)
        self.hidden_states_ = hidden_states_.unsqueeze(0).expand(batch_size, self.n_agents, -1) # bav
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



