scheduled removal of DeepSpeedPlugin.cpu_offload* parameters (#9244)

Tshimanga · web-flow · commit 65b3dc4495b9 · 2021-09-01T12:02:30.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -251,6 +251,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated property `ModelCheckpoint.period` in favor of `ModelCheckpoint.every_n_epochs` ([#9213](https://github.com/PyTorchLightning/pytorch-lightning/pull/9213))
 
 
+- Removed deprecated properties `DeepSpeedPlugin.cpu_offload*` in favor of `offload_optimizer`, `offload_parameters` and `pin_memory` ([#9244](https://github.com/PyTorchLightning/pytorch-lightning/pull/9244))
 
 ### Fixed
 
diff --git a/docs/source/advanced/advanced_gpu.rst b/docs/source/advanced/advanced_gpu.rst
@@ -306,7 +306,9 @@ You can also modify the ZeRO-Offload parameters via the plugin as below.
 
     model = MyModel()
     trainer = Trainer(
-        gpus=4, plugins=DeepSpeedPlugin(cpu_offload=True, allgather_bucket_size=5e8, reduce_bucket_size=5e8), precision=16
+        gpus=4,
+        plugins=DeepSpeedPlugin(offload_optimizer=True, allgather_bucket_size=5e8, reduce_bucket_size=5e8),
+        precision=16,
     )
     trainer.fit(model)
 
@@ -581,7 +583,7 @@ This saves memory when training larger models, however requires using a checkpoi
         gpus=4,
         plugins=DeepSpeedPlugin(
             stage=3,
-            cpu_offload=True,  # Enable CPU Offloading
+            offload_optimizer=True,  # Enable CPU Offloading
             cpu_checkpointing=True,  # (Optional) offload activations to CPU
         ),
         precision=16,
@@ -659,7 +661,7 @@ In some cases you may want to define your own DeepSpeed Config, to access all pa
         },
         "zero_optimization": {
             "stage": 2,  # Enable Stage 2 ZeRO (Optimizer/Gradient state partitioning)
-            "cpu_offload": True,  # Enable Offloading optimizer state/calculation to the host CPU
+            "offload_optimizer": True,  # Enable Offloading optimizer state/calculation to the host CPU
             "contiguous_gradients": True,  # Reduce gradient fragmentation.
             "overlap_comm": True,  # Overlap reduce/backward operation of gradients for speed.
             "allgather_bucket_size": 2e8,  # Number of elements to all gather at once.
diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -37,7 +37,7 @@
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE
 from pytorch_lightning.utilities.types import LRSchedulerTypeTuple
-from pytorch_lightning.utilities.warnings import _warn, LightningDeprecationWarning, rank_zero_warn, WarningCache
+from pytorch_lightning.utilities.warnings import rank_zero_warn, WarningCache
 
 warning_cache = WarningCache()
 
@@ -124,9 +124,6 @@ def __init__(
         contiguous_memory_optimization: bool = False,
         synchronize_checkpoint_boundary: bool = False,
         load_full_weights: bool = False,
-        cpu_offload: bool = False,
-        cpu_offload_params: bool = False,
-        cpu_offload_use_pin_memory: bool = False,
     ) -> None:
         """
         Provides capabilities to run training using the DeepSpeed library,
@@ -263,17 +260,6 @@ def __init__(
                 "To use the DeepSpeed plugin, you must have DeepSpeed installed. pip install deepspeed"
             )
 
-        if cpu_offload or cpu_offload_params or cpu_offload_use_pin_memory:
-            _warn(
-                "The usage of `cpu_offload`, `cpu_offload_params`, and `cpu_offload_use_pin_memory` "
-                "is deprecated since v1.4 and will be removed in v1.5."
-                " From now on use `offload_optimizer`, `offload_parameters` and `pin_memory`.",
-                category=LightningDeprecationWarning,
-            )
-            offload_optimizer = cpu_offload
-            offload_parameters = cpu_offload_params
-            pin_memory = cpu_offload_use_pin_memory
-
         super().__init__(
             parallel_devices=parallel_devices,
             num_nodes=num_nodes,
diff --git a/tests/deprecated_api/test_remove_1-5.py b/tests/deprecated_api/test_remove_1-5.py
@@ -16,10 +16,8 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.core.decorators import auto_move_data
-from pytorch_lightning.plugins import DeepSpeedPlugin
 from tests.deprecated_api import no_deprecated_call
 from tests.helpers import BoringDataModule, BoringModel
-from tests.helpers.runif import RunIf
 
 
 def test_v1_5_0_auto_move_data():
@@ -43,16 +41,6 @@ def test_v1_5_0_datamodule_setter():
     assert any("The `LightningModule.datamodule`" in w for w in warning_cache)
 
 
-@RunIf(deepspeed=True)
-@pytest.mark.parametrize(
-    "params", [dict(cpu_offload=True), dict(cpu_offload_params=True), dict(cpu_offload_use_pin_memory=True)]
-)
-def test_v1_5_0_deepspeed_cpu_offload(tmpdir, params):
-
-    with pytest.deprecated_call(match="is deprecated since v1.4 and will be removed in v1.5"):
-        DeepSpeedPlugin(**params)
-
-
 def test_v1_5_0_distributed_backend_trainer_flag():
     with pytest.deprecated_call(match="has been deprecated and will be removed in v1.5."):
         Trainer(distributed_backend="ddp_cpu")
diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py
@@ -372,13 +372,13 @@ def test_deepspeed_custom_activation_checkpointing_params(tmpdir):
 
 @RunIf(min_gpus=1, deepspeed=True)
 def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_config):
-    """Ensure if we use a config and turn off cpu_offload, that this is set to False within the config."""
+    """Ensure if we use a config and turn off offload_optimizer, that this is set to False within the config."""
 
-    deepspeed_zero_config["zero_optimization"]["cpu_offload"] = False
+    deepspeed_zero_config["zero_optimization"]["offload_optimizer"] = False
 
     class TestCallback(Callback):
         def on_before_accelerator_backend_setup(self, trainer, pl_module) -> None:
-            assert trainer.training_type_plugin.config["zero_optimization"]["cpu_offload"] is False
+            assert trainer.training_type_plugin.config["zero_optimization"]["offload_optimizer"] is False
             raise SystemExit()
 
     model = BoringModel()

Original file line number	Diff line number	Diff line change
`@@ -251,6 +251,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).`
`251`	`251`	- Removed deprecated property `ModelCheckpoint.period` in favor of `ModelCheckpoint.every_n_epochs` ([#9213](https://github.com/PyTorchLightning/pytorch-lightning/pull/9213))
`252`	`252`
`253`	`253`
	`254`	+- Removed deprecated properties `DeepSpeedPlugin.cpu_offload*` in favor of `offload_optimizer`, `offload_parameters` and `pin_memory` ([#9244](https://github.com/PyTorchLightning/pytorch-lightning/pull/9244))
`254`	`255`
`255`	`256`	`### Fixed`
`256`	`257`