mesh_tensorflow/transformer/heterogeneous_moe.py [70:90]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        moe_gating=moe_gating,
        moe_num_experts=num_experts,
        moe_loss_coef=loss_coef,
        moe_hidden_size=hidden_size,
        moe_group_size=group_size,
        moe_min_expert_capacity=min_expert_capacity,
        moe_capacity_factor_train=capacity_factor_train,
        moe_capacity_factor_eval=capacity_factor_eval,
        moe_use_second_place_loss=use_second_place_loss,
        moe_second_policy_train=second_policy_train,
        moe_second_policy_eval=second_policy_eval,
        moe_second_threshold_train=second_threshold_train,
        moe_second_threshold_eval=second_threshold_eval,
        moe_dropout_rate=dropout_rate,
        moe_switch_policy_train=switch_policy_train,
        moe_switch_policy_eval=switch_policy_eval,
        moe_switch_dropout=switch_dropout,
        moe_switch_temperature=switch_temperature,
        moe_switch_jitter=switch_jitter,
        moe_output_dim=output_dim,
        moe_ntlb_top_k=ntlb_top_k,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



mesh_tensorflow/transformer/moe.py [70:90]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        moe_gating=moe_gating,
        moe_num_experts=num_experts,
        moe_loss_coef=loss_coef,
        moe_hidden_size=hidden_size,
        moe_group_size=group_size,
        moe_min_expert_capacity=min_expert_capacity,
        moe_capacity_factor_train=capacity_factor_train,
        moe_capacity_factor_eval=capacity_factor_eval,
        moe_use_second_place_loss=use_second_place_loss,
        moe_second_policy_train=second_policy_train,
        moe_second_policy_eval=second_policy_eval,
        moe_second_threshold_train=second_threshold_train,
        moe_second_threshold_eval=second_threshold_eval,
        moe_dropout_rate=dropout_rate,
        moe_switch_policy_train=switch_policy_train,
        moe_switch_policy_eval=switch_policy_eval,
        moe_switch_dropout=switch_dropout,
        moe_switch_temperature=switch_temperature,
        moe_switch_jitter=switch_jitter,
        moe_output_dim=output_dim,
        moe_ntlb_top_k=ntlb_top_k,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



