in optimum/tpu/modeling_gemma.py [0:0]
def __init__(self, config, rank=0, world_size=1):
super().__init__()
self.config = config
self.rank = rank
self.world_size = world_size
self.hidden_size = config.hidden_size
self.intermediate_size = config.intermediate_size
self.gate_proj = ColumnParallelLinear.create(
self.hidden_size, self.intermediate_size, bias=False, rank=self.rank, world_size=self.world_size
)
self.up_proj = ColumnParallelLinear.create(
self.hidden_size, self.intermediate_size, bias=False, rank=self.rank, world_size=self.world_size
)
self.down_proj = RowParallelLinear.create(
self.intermediate_size, self.hidden_size, bias=False, rank=self.rank, world_size=self.world_size
)
if config.hidden_activation is None:
logger.warning_once(
"`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n"
"Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n"
"`config.hidden_activation` if you want to override this behaviour.\n"
"See https://github.com/huggingface/transformers/pull/29402 for more details."
)
config.hidden_activation = "gelu_pytorch_tanh"
hidden_activation = config.hidden_activation
self.act_fn = ACT2FN[hidden_activation]