in src/optimum/nvidia/export/config.py [0:0]
def sharded(config: "ExportConfig", tp: int = 1, pp: int = 1) -> "ExportConfig":
"""
Helper to specific the parallelization strategy to apply to the model
:param config: `ExportConfig` the quantization process should be added to
:param tp: Tensor Parallelism degree to apply (`int` >= 1)
:param pp: Pipeline Parallelism degree to apply (`int` >= 1)
:return: `ExportConfig`
"""
if tp < 1:
raise ValueError(f"Tensor Parallelism (tp) should be >= 1 (got: tp={tp})")
if pp < 1:
raise ValueError(f"Pipeline Parallelism (pp) should be >= 1 (got: pp={pp})")
return config.with_sharding(
sharding=ShardingInfo(tp_size=tp, pp_size=pp, world_size=tp * pp)
)