in nasrec/base_searcher.py [0:0]
def mutate_arc(self, parent):
child = deepcopy(parent)
# 1. choose block to mutate
block_id = np.random.choice(self.num_blocks, 1)[0]
# 2. choose one token of a block to mutate (e.g., block_type, dense_feat)
token_name = np.random.choice(self.token_names, 1)[0]
while token_name == "skip_connect" and block_id == 0:
block_id = np.random.choice(self.num_blocks, 1)[0]
token_name = np.random.choice(self.token_names, 1)[0]
while (
token_name == "cin"
and len(self.micro_cin_option.arc) == 1
and len(self.micro_cin_option.num_of_layers) == 1
) or (
token_name == "attention"
and self.att_num_tokens["head"] == 1
and self.att_num_tokens["layer"] == 1
and self.att_num_tokens["emb"] == 1
and self.att_num_tokens["drop"] == 1
):
token_name = np.random.choice(self.token_names, 1)[0]
# 3. mutate the corresponding token
new_action = child[block_id][token_name]
while self._action_equal(new_action, child[block_id][token_name]):
if token_name in ["block_type", "mlp_dense", "mlp_emb"]:
new_action_vec = np.random.multinomial(
1, [1.0 / self.num_tokens[token_name]] * self.num_tokens[token_name]
)
new_action = np.argmax(new_action_vec)
elif token_name == "cin":
cin_width = np.argmax(
np.random.multinomial(
1,
[1.0 / len(self.micro_cin_option.arc)]
* len(self.micro_cin_option.arc),
)
)
cin_depth = np.argmax(
np.random.multinomial(
1,
[1.0 / len(self.micro_cin_option.num_of_layers)]
* len(self.micro_cin_option.num_of_layers),
)
)
new_action = {"width": cin_width, "depth": cin_depth}
elif token_name == "attention":
head = np.argmax(
np.random.multinomial(
1,
[1.0 / self.att_num_tokens["head"]]
* self.att_num_tokens["head"],
)
)
layer = np.argmax(
np.random.multinomial(
1,
[1.0 / self.att_num_tokens["layer"]]
* self.att_num_tokens["layer"],
)
)
emb = np.argmax(
np.random.multinomial(
1,
[1.0 / self.att_num_tokens["emb"]] * self.att_num_tokens["emb"],
)
)
drop = np.argmax(
np.random.multinomial(
1,
[1.0 / self.att_num_tokens["drop"]]
* self.att_num_tokens["drop"],
)
)
new_action = {"head": head, "layer": layer, "emb": emb, "drop": drop}
else:
new_action = np.random.binomial(1, 0.5, self.num_tokens[token_name])
child[block_id][token_name] = new_action
vecs = self.dicts_to_vecs(child)
return vecs, child