def get_individual_q()

in src_code/controllers/basic_controller_interactive.py [0:0]


    def get_individual_q(self, ep_batch, t, test_mode=False):
        agent_inputs = self._build_inputs(ep_batch, t)
        agent_alone_inputs = self._build_alone_inputs(ep_batch, t)
        avail_actions = ep_batch["avail_actions"][:, t]
        agent_outs, agent_outs_interactive, agent_outs_interactive_, agent_outs_alone, self.hidden_states, self.hidden_states_alone, self.hidden_states_ = self.agent.get_individual_q(agent_inputs, agent_alone_inputs, self.hidden_states, self.hidden_states_alone, self.hidden_states_)
        agent_outs = agent_outs - agent_outs_interactive_

        # Softmax the agent outputs if they're policy logits
        if self.agent_output_type == "pi_logits":
            assert(False)
            #TODO: NOT IMPLEMENTED YET
            if getattr(self.args, "mask_before_softmax", True):
                # Make the logits for unavailable actions very negative to minimise their affect on the softmax
                reshaped_avail_actions = avail_actions.reshape(ep_batch.batch_size * self.n_agents, -1)
                agent_outs[reshaped_avail_actions == 0] = -1e10
                agent_outs_interactive[reshaped_avail_actions == 0] = -1e10
                agent_outs_interactive_[reshaped_avail_actions == 0] = -1e10

            agent_outs = th.nn.functional.softmax(agent_outs, dim=-1)
            agent_outs_interactive = th.nn.functional.softmax(agent_outs_interactive, dim=-1)
            agent_outs_interactive_ = th.nn.functional.softmax(agent_outs_interactive_, dim=-1)
            if not test_mode:
                # Epsilon floor
                epsilon_action_num = agent_outs.size(-1)
                if getattr(self.args, "mask_before_softmax", True):
                    # With probability epsilon, we will pick an available action uniformly
                    epsilon_action_num = reshaped_avail_actions.sum(dim=1, keepdim=True).float()

                agent_outs = ((1 - self.action_selector.epsilon) * agent_outs
                               + th.ones_like(agent_outs) * self.action_selector.epsilon/epsilon_action_num)
                agent_outs_interactive = ((1 - self.action_selector.epsilon) * agent_outs_interactive
                               + th.ones_like(agent_outs_interactive) * self.action_selector.epsilon/epsilon_action_num)
                agent_outs_interactive_ = ((1 - self.action_selector.epsilon) * agent_outs_interactive_
                               + th.ones_like(agent_outs_interactive_) * self.action_selector.epsilon/epsilon_action_num)

                if getattr(self.args, "mask_before_softmax", True):
                    # Zero out the unavailable actions
                    agent_outs[reshaped_avail_actions == 0] = 0.0
                    agent_outs_interactive[reshaped_avail_actions == 0] = 0.0
                    agent_outs_interactive_[reshaped_avail_actions == 0] = 0.0

        return agent_outs.view(ep_batch.batch_size, self.n_agents, -1), agent_outs_interactive.view(ep_batch.batch_size, self.n_agents, -1), agent_outs_interactive_.view(ep_batch.batch_size, self.n_agents, -1), \
            agent_outs_alone.view(ep_batch.batch_size, self.n_agents, -1)