Path Lines of Code MANIFEST.in 3 benchmarks/__init__.py 1 benchmarks/datasets/__init__.py 1 benchmarks/datasets/mnist.py 23 benchmarks/datasets/wikitext2_data.py 73 benchmarks/experimental/benchmark_dataset.py 40 benchmarks/experimental/benchmark_mevo.py 107 benchmarks/experimental/experimental_async_approaches.py 597 benchmarks/experimental/offload.py 328 benchmarks/experimental/sync_batchnorm.py 28 benchmarks/golden_configs/__init__.py 1 benchmarks/golden_configs/lm_wikitext2.py 103 benchmarks/golden_configs/oss_mnist.py 8 benchmarks/models/__init__.py 1 benchmarks/models/transformer_lm.py 115 benchmarks/moe.py 98 benchmarks/oss.py 254 benchmarks/pipe.py 235 benchmarks/utils.py 124 codecov.yml 19 fairscale/__init__.py 5 fairscale/clib/fused_adam_cuda/compat.h 4 fairscale/clib/fused_adam_cuda/fused_adam_cuda.cpp 5 fairscale/clib/fused_adam_cuda/fused_adam_cuda_kernel.cu 246 fairscale/clib/fused_adam_cuda/multi_tensor_apply.cuh 111 fairscale/experimental/__init__.py 3 fairscale/experimental/nn/__init__.py 6 fairscale/experimental/nn/ampnet_pipe/__init__.py 2 fairscale/experimental/nn/ampnet_pipe/ampnet.py 303 fairscale/experimental/nn/ampnet_pipe/pipe.py 47 fairscale/experimental/nn/auto_shard.py 121 fairscale/experimental/nn/data_parallel/__init__.py 1 fairscale/experimental/nn/data_parallel/gossip/__init__.py 14 fairscale/experimental/nn/data_parallel/gossip/distributed.py 758 fairscale/experimental/nn/data_parallel/gossip/gossiper.py 181 fairscale/experimental/nn/data_parallel/gossip/graph_manager.py 197 fairscale/experimental/nn/data_parallel/gossip/mixing_manager.py 29 fairscale/experimental/nn/data_parallel/gossip/utils/__init__.py 9 fairscale/experimental/nn/data_parallel/gossip/utils/cuda_metering.py 57 fairscale/experimental/nn/data_parallel/gossip/utils/helpers.py 70 fairscale/experimental/nn/distributed_pipeline/__init__.py 3 fairscale/experimental/nn/distributed_pipeline/data.py 8 fairscale/experimental/nn/distributed_pipeline/graph.py 114 fairscale/experimental/nn/distributed_pipeline/loss.py 10 fairscale/experimental/nn/distributed_pipeline/partition_handler.py 178 fairscale/experimental/nn/distributed_pipeline/pipeline.py 117 fairscale/experimental/nn/distributed_pipeline/trace.py 80 fairscale/experimental/nn/mevo.py 317 fairscale/experimental/nn/offload.py 281 fairscale/experimental/nn/ssd_offload.py 197 fairscale/experimental/nn/sync_batchnorm.py 133 fairscale/experimental/optim/__init__.py 3 fairscale/experimental/optim/dynamic_loss_scaler.py 124 fairscale/experimental/tooling/__init__.py 1 fairscale/experimental/tooling/layer_memory_tracker.py 465 fairscale/nn/__init__.py 8 fairscale/nn/checkpoint/__init__.py 3 fairscale/nn/checkpoint/checkpoint_activations.py 167 fairscale/nn/checkpoint/checkpoint_utils.py 21 fairscale/nn/data_parallel/__init__.py 4 fairscale/nn/data_parallel/fsdp_optim_utils.py 137 fairscale/nn/data_parallel/fully_sharded_data_parallel.py 1260 fairscale/nn/data_parallel/sharded_ddp.py 352 fairscale/nn/misc/__init__.py 5 fairscale/nn/misc/flatten_params_wrapper.py 303 fairscale/nn/misc/param_bucket.py 145 fairscale/nn/model_parallel/__init__.py 19 fairscale/nn/model_parallel/cross_entropy.py 48 fairscale/nn/model_parallel/initialize.py 92 fairscale/nn/model_parallel/layers.py 225 fairscale/nn/model_parallel/mappings.py 69 fairscale/nn/model_parallel/random.py 114 fairscale/nn/model_parallel/utils.py 28 fairscale/nn/moe/__init__.py 4 fairscale/nn/moe/moe_layer.py 53 fairscale/nn/moe/top2gate.py 67 fairscale/nn/pipe/__init__.py 6 fairscale/nn/pipe/async_pipe.py 225 fairscale/nn/pipe/async_pipeline.py 65 fairscale/nn/pipe/async_schedule.py 364 fairscale/nn/pipe/balance/__init__.py 34 fairscale/nn/pipe/balance/blockpartition.py 36 fairscale/nn/pipe/balance/profile.py 77 fairscale/nn/pipe/batchnorm.py 98 fairscale/nn/pipe/checkpoint.py 165 fairscale/nn/pipe/copy.py 68 fairscale/nn/pipe/dependency.py 30 fairscale/nn/pipe/messages.py 115 fairscale/nn/pipe/microbatch.py 107 fairscale/nn/pipe/phony.py 15 fairscale/nn/pipe/pipe.py 182 fairscale/nn/pipe/pipeline.py 143 fairscale/nn/pipe/rpc.py 200 fairscale/nn/pipe/skip/__init__.py 3 fairscale/nn/pipe/skip/layout.py 48 fairscale/nn/pipe/skip/namespace.py 23 fairscale/nn/pipe/skip/portal.py 119 fairscale/nn/pipe/skip/skippable.py 179 fairscale/nn/pipe/skip/tracker.py 98 fairscale/nn/pipe/stream.py 55 fairscale/nn/pipe/types.py 52 fairscale/nn/pipe/worker.py 101 fairscale/nn/wrap/__init__.py 3 fairscale/nn/wrap/auto_wrap.py 108 fairscale/optim/__init__.py 13 fairscale/optim/adam.py 192 fairscale/optim/adascale.py 226 fairscale/optim/grad_scaler.py 247 fairscale/optim/layerwise_gradient_scaler.py 145 fairscale/optim/oss.py 339 fairscale/utils/__init__.py 1 fairscale/utils/containers.py 77 fairscale/utils/golden_testing_data.py 8 fairscale/utils/object.py 18 fairscale/utils/parallel.py 55 fairscale/utils/params.py 40 fairscale/utils/reduce_scatter_bucketer.py 106 fairscale/utils/state_dict.py 24 fairscale/utils/testing.py 495 fairscale/utils/testing_memory.py 14 fairscale/utils/version.py 12 fairscale/version.py 1 pyproject.toml 28 release_utils.py 49 setup.cfg 34 setup.py 59 stubs/torch/__init__.pyi 1856 stubs/torch/autograd/__init__.pyi 37 stubs/torch/autograd/grad_mode.pyi 15 stubs/torch/autograd/profiler.pyi 5 stubs/torch/backends/__init__.pyi 1 stubs/torch/backends/cudnn.pyi 3 stubs/torch/cuda/__init__.pyi 69 stubs/torch/cuda/amp/__init__.pyi 6 stubs/torch/cuda/amp/grad_scaler.pyi 12 stubs/torch/cuda/comm/__init__.pyi 19 stubs/torch/distributed/__init__.pyi 68 stubs/torch/distributed/distributed_c10d.pyi 4 stubs/torch/distributed/nn/functional.pyi 4 stubs/torch/distributed/rpc/__init__.pyi 44 stubs/torch/functional.pyi 5 stubs/torch/futures.pyi 3 stubs/torch/jit.pyi 2 stubs/torch/multiprocessing/__init__.pyi 10 stubs/torch/nn/__init__.pyi 4 stubs/torch/nn/common_types.pyi 22 stubs/torch/nn/functional.pyi 177 stubs/torch/nn/modules/__init__.pyi 48 stubs/torch/nn/modules/activation.pyi 117 stubs/torch/nn/modules/adaptive.pyi 22 stubs/torch/nn/modules/batchnorm.pyi 26 stubs/torch/nn/modules/container.pyi 81 stubs/torch/nn/modules/conv.pyi 59 stubs/torch/nn/modules/distance.pyi 15 stubs/torch/nn/modules/dropout.pyi 22 stubs/torch/nn/modules/flatten.pyi 9 stubs/torch/nn/modules/fold.pyi 22 stubs/torch/nn/modules/instancenorm.pyi 10 stubs/torch/nn/modules/linear.pyi 28 stubs/torch/nn/modules/loss.pyi 100 stubs/torch/nn/modules/module.pyi 57 stubs/torch/nn/modules/normalization.pyi 42 stubs/torch/nn/modules/padding.pyi 43 stubs/torch/nn/modules/pixelshuffle.pyi 7 stubs/torch/nn/modules/pooling.pyi 147 stubs/torch/nn/modules/rnn.pyi 81 stubs/torch/nn/modules/sparse.pyi 42 stubs/torch/nn/modules/upsampling.pyi 18 stubs/torch/nn/parallel/__init__.pyi 5 stubs/torch/nn/parallel/common_types.pyi 4 stubs/torch/nn/parallel/data_parallel.pyi 18 stubs/torch/nn/parallel/distributed.pyi 22 stubs/torch/nn/parallel/parallel_apply.pyi 5 stubs/torch/nn/parallel/replicate.pyi 6 stubs/torch/nn/parallel/scatter_gather.pyi 10 stubs/torch/nn/parameter.pyi 18 stubs/torch/optim/__init__.pyi 5 stubs/torch/optim/adam.pyi 4 stubs/torch/optim/lr_scheduler.pyi 31 stubs/torch/optim/optimizer.pyi 13 stubs/torch/optim/sgd.pyi 3 stubs/torch/random.pyi 10 stubs/torch/serialization.pyi 7 stubs/torch/testing/__init__.pyi 2 stubs/torch/utils/__init__.pyi 1 stubs/torch/utils/checkpoint.pyi 7 stubs/torch/utils/data/__init__.pyi 6 stubs/torch/utils/data/dataloader.pyi 29 stubs/torch/utils/data/dataset.pyi 22 stubs/torch/utils/data/distributed.pyi 8 stubs/torch/utils/data/sampler.pyi 17 stubs/torch/version.pyi 3