in training/run_pseudo_labelling.py [0:0]
def __post_init__(self):
if self.attn_type is not None and self.attn_implementation is None:
# set attn_implementation in a backwards compatible way
if self.attn_type == "flash_attn":
self.attn_implementation = "sdpa"
elif self.attn_type == "flash_attn_2":
self.attn_implementation = "flash_attention_2"
elif self.attn_type in [None, "eager", "sdpa", "flash_attention_2"]:
self.attn_implementation = self.attn_type
else:
raise ValueError(
f"Argument `--attn_type` is deprecated, and set to an invalid option `{self.attn_type}`. You should omit the argument `--attn_type`, and instead set `-attention_implementation` to one of the following:\n"
"1. `eager` or `None`: default Transformers attention implementation.\n"
"2. `sdpa`: Flash Attention through PyTorch SDPA. Requires `torch>=2.1`. Recommended for hardware where Flash Attention 2 is not supported, e.g. Turing GPUs, (T4, RTX 2080).\n"
"3. `flash_attn_2`: Flash Attention 2 through the Flash Attention package https://github.com/Dao-AILab/flash-attention. **Always** recommended on supported hardware (Ampere, Ada, or Hopper GPUs, e.g., A100, RTX 3090, RTX 4090, H100)."
)
warnings.warn(
f"Argument `--attn_type` is deprecated. Use `--attn_implementation` instead. Inferring `--attn_implementation={self.attn_implementation} from argument `--attn_type={self.attn_type}`."
)
elif self.attn_type is not None and self.attn_implementation is not None:
raise ValueError(
"`--attn_type` and `--attn_implementation` are both specified. Only the argument `--attn_implementation`."
)