in optimum/graphcore/ipu_configuration.py [0:0]
def __init__(
self,
replication_factor: int = 1,
inference_replication_factor: int = 1,
gradient_accumulation_steps: int = 1,
layers_per_ipu: List[int] = [-1],
inference_layers_per_ipu: Optional[List[int]] = None,
ipus_per_replica: Optional[int] = None,
inference_ipus_per_replica: Optional[int] = None,
optimizer_state_offchip: bool = False,
replicated_tensor_sharding: bool = False,
matmul_proportion: Union[float, List[float]] = 0.2,
inference_matmul_proportion: Optional[Union[float, List[float]]] = None,
enable_half_partials: bool = True,
embedding_serialization_factor: Optional[int] = None,
inference_embedding_serialization_factor: Optional[int] = None,
serialized_embedding_splits_per_ipu: Optional[List[int]] = None,
inference_serialized_embedding_splits_per_ipu: Optional[List[int]] = None,
projection_serialization_factor: Optional[int] = None,
inference_projection_serialization_factor: Optional[int] = None,
serialized_projection_splits_per_ipu: Optional[List[int]] = None,
inference_serialized_projection_splits_per_ipu: Optional[List[int]] = None,
recompute_checkpoint_every_layer: bool = False,
device_iterations: int = 1,
inference_device_iterations: int = 1,
output_mode: str = "final",
seed: Optional[int] = None,
auto_loss_scaling: bool = False,
executable_cache_dir: str = "",
explicit_ir_inference: bool = False,
parallelize_kwargs: Optional[Dict[str, Any]] = None,
inference_parallelize_kwargs: Optional[Dict[str, Any]] = None,
**kwargs,