Add weights_only=True to torch.load (#3012)

svekars · c-p-i-o · commit acdc91bef8e3 · 2024-09-06T09:19:00.000-07:00
* Add weights_only=True to torch.load
diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py
@@ -151,7 +151,8 @@ def tokenize(self, path):
 model.load_state_dict(
     torch.load(
         model_data_filepath + 'word_language_model_quantize.pth',
-        map_location=torch.device('cpu')
+        map_location=torch.device('cpu'),
+        weights_only=True
         )
     )
 
diff --git a/advanced_source/static_quantization_tutorial.rst b/advanced_source/static_quantization_tutorial.rst
@@ -286,7 +286,7 @@ We next define several helper functions to help with model evaluation. These mos
 
     def load_model(model_file): 
         model = MobileNetV2() 
-        state_dict = torch.load(model_file) 
+        state_dict = torch.load(model_file, weights_only=True) 
         model.load_state_dict(state_dict) 
         model.to('cpu') 
         return model  
diff --git a/beginner_source/basics/quickstart_tutorial.py b/beginner_source/basics/quickstart_tutorial.py
@@ -216,7 +216,7 @@ def test(dataloader, model, loss_fn):
 # the state dictionary into it.
 
 model = NeuralNetwork().to(device)
-model.load_state_dict(torch.load("model.pth"))
+model.load_state_dict(torch.load("model.pth", weights_only=True))
 
 #############################################################
 # This model can now be used to make predictions.
diff --git a/beginner_source/basics/saveloadrun_tutorial.py b/beginner_source/basics/saveloadrun_tutorial.py
@@ -32,9 +32,14 @@
 ##########################
 # To load model weights, you need to create an instance of the same model first, and then load the parameters
 # using ``load_state_dict()`` method.
+#
+# In the code below, we set ``weights_only=True`` to limit the
+# functions executed during unpickling to only those necessary for
+# loading weights. Using ``weights_only=True`` is considered
+# a best practice when loading weights.
 
 model = models.vgg16() # we do not specify ``weights``, i.e. create untrained model
-model.load_state_dict(torch.load('model_weights.pth'))
+model.load_state_dict(torch.load('model_weights.pth', weights_only=True))
 model.eval()
 
 ###########################
@@ -50,9 +55,14 @@
 torch.save(model, 'model.pth')
 
 ########################
-# We can then load the model like this:
+# We can then load the model as demonstrated below.
+#
+# As described in `Saving and loading torch.nn.Modules <pytorch.org/docs/main/notes/serialization.html#saving-and-loading-torch-nn-modules>`__,
+# saving ``state_dict``s is considered the best practice. However,
+# below we use ``weights_only=False`` because this involves loading the
+# model, which is a legacy use case for ``torch.save``.
 
-model = torch.load('model.pth')
+model = torch.load('model.pth', weights_only=False),
 
 ########################
 # .. note:: This approach uses Python `pickle <https://docs.python.org/3/library/pickle.html>`_ module when serializing the model, thus it relies on the actual class definition to be available when loading the model.
diff --git a/beginner_source/blitz/cifar10_tutorial.py b/beginner_source/blitz/cifar10_tutorial.py
@@ -221,7 +221,7 @@ def forward(self, x):
 # wasn't necessary here, we only did it to illustrate how to do so):
 
 net = Net()
-net.load_state_dict(torch.load(PATH))
+net.load_state_dict(torch.load(PATH, weights_only=True))
 
 ########################################################################
 # Okay, now let us see what the neural network thinks these examples above are:
diff --git a/beginner_source/fgsm_tutorial.py b/beginner_source/fgsm_tutorial.py
@@ -192,7 +192,7 @@ def forward(self, x):
 model = Net().to(device)
 
 # Load the pretrained model
-model.load_state_dict(torch.load(pretrained_model, map_location=device))
+model.load_state_dict(torch.load(pretrained_model, map_location=device, weights_only=True))
 
 # Set the model in evaluation mode. In this case this is for the Dropout layers
 model.eval()
diff --git a/beginner_source/saving_loading_models.py b/beginner_source/saving_loading_models.py
@@ -153,7 +153,7 @@
 # .. code:: python
 #
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH))
+#    model.load_state_dict(torch.load(PATH), weights_only=True)
 #    model.eval()
 #
 # .. note::
@@ -206,7 +206,7 @@
 # .. code:: python
 #
 #    # Model class must be defined somewhere
-#    model = torch.load(PATH)
+#    model = torch.load(PATH, weights_only=False)
 #    model.eval()
 #
 # This save/load process uses the most intuitive syntax and involves the
@@ -290,7 +290,7 @@
 #    model = TheModelClass(*args, **kwargs)
 #    optimizer = TheOptimizerClass(*args, **kwargs)
 #
-#    checkpoint = torch.load(PATH)
+#    checkpoint = torch.load(PATH, weights_only=True)
 #    model.load_state_dict(checkpoint['model_state_dict'])
 #    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 #    epoch = checkpoint['epoch']
@@ -354,7 +354,7 @@
 #    optimizerA = TheOptimizerAClass(*args, **kwargs)
 #    optimizerB = TheOptimizerBClass(*args, **kwargs)
 #
-#    checkpoint = torch.load(PATH)
+#    checkpoint = torch.load(PATH, weights_only=True)
 #    modelA.load_state_dict(checkpoint['modelA_state_dict'])
 #    modelB.load_state_dict(checkpoint['modelB_state_dict'])
 #    optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
@@ -407,7 +407,7 @@
 # .. code:: python
 #
 #    modelB = TheModelBClass(*args, **kwargs)
-#    modelB.load_state_dict(torch.load(PATH), strict=False)
+#    modelB.load_state_dict(torch.load(PATH), strict=False, weights_only=True)
 #
 # Partially loading a model or loading a partial model are common
 # scenarios when transfer learning or training a new complex model.
@@ -446,7 +446,7 @@
 #
 #    device = torch.device('cpu')
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH, map_location=device))
+#    model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True))
 #
 # When loading a model on a CPU that was trained with a GPU, pass
 # ``torch.device('cpu')`` to the ``map_location`` argument in the
@@ -469,7 +469,7 @@
 #
 #    device = torch.device("cuda")
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH))
+#    model.load_state_dict(torch.load(PATH, weights_only=True))
 #    model.to(device)
 #    # Make sure to call input = input.to(device) on any input tensors that you feed to the model
 #
@@ -497,7 +497,7 @@
 #
 #    device = torch.device("cuda")
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
+#    model.load_state_dict(torch.load(PATH, weights_only=True, map_location="cuda:0"))  # Choose whatever GPU device number you want
 #    model.to(device)
 #    # Make sure to call input = input.to(device) on any input tensors that you feed to the model
 #
diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py
@@ -209,7 +209,7 @@ def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
         print(f'Best val Acc: {best_acc:4f}')
 
         # load best model weights
-        model.load_state_dict(torch.load(best_model_params_path))
+        model.load_state_dict(torch.load(best_model_params_path, weights_only=True))
     return model
 
 
diff --git a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
@@ -397,7 +397,7 @@ def pack_hook(tensor):
     return name
 
 def unpack_hook(name):
-    return torch.load(name)
+    return torch.load(name, weights_only=True)
 
 
 ######################################################################
@@ -420,7 +420,7 @@ def pack_hook(tensor):
     return name
 
 def unpack_hook(name):
-    tensor = torch.load(name)
+    tensor = torch.load(name, weights_only=True)
     os.remove(name)
     return tensor
 
@@ -462,7 +462,7 @@ def pack_hook(tensor):
     return temp_file
 
 def unpack_hook(temp_file):
-    return torch.load(temp_file.name)
+    return torch.load(temp_file.name, weights_only=True)
 
 
 ######################################################################
diff --git a/intermediate_source/ddp_tutorial.rst b/intermediate_source/ddp_tutorial.rst
@@ -214,7 +214,7 @@ and elasticity support, please refer to `TorchElastic <https://pytorch.org/elast
         # configure map_location properly
         map_location = {'cuda:%d' % 0: 'cuda:%d' % rank}
         ddp_model.load_state_dict(
-            torch.load(CHECKPOINT_PATH, map_location=map_location))
+            torch.load(CHECKPOINT_PATH, map_location=map_location, weights_only=True))
 
         loss_fn = nn.MSELoss()
         optimizer = optim.SGD(ddp_model.parameters(), lr=0.001)
diff --git a/intermediate_source/tiatoolbox_tutorial.rst b/intermediate_source/tiatoolbox_tutorial.rst
@@ -368,7 +368,7 @@ The PatchPredictor class runs a CNN-based classifier written in PyTorch.
 
     # Users can load any PyTorch model architecture instead using the following script
     model = vanilla.CNNModel(backbone="resnet18", num_classes=9) # Importing model from torchvision.models.resnet18
-    model.load_state_dict(torch.load(weights_path, map_location="cpu"), strict=True)
+    model.load_state_dict(torch.load(weights_path, map_location="cpu", weights_only=True), strict=True)
     def preproc_func(img):
         img = PIL.Image.fromarray(img)
         img = transforms.ToTensor()(img)
diff --git a/prototype_source/fx_graph_mode_ptq_dynamic.py b/prototype_source/fx_graph_mode_ptq_dynamic.py
@@ -171,7 +171,8 @@ def tokenize(self, path):
 model.load_state_dict(
     torch.load(
         model_data_filepath + 'word_language_model_quantize.pth',
-        map_location=torch.device('cpu')
+        map_location=torch.device('cpu'),
+        weights_only=True
         )
     )
 
diff --git a/prototype_source/fx_graph_mode_ptq_static.rst b/prototype_source/fx_graph_mode_ptq_static.rst
@@ -157,7 +157,7 @@ Download the `torchvision resnet18 model <https://download.pytorch.org/models/re
 
     def load_model(model_file):
         model = resnet18(pretrained=False)
-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, weights_only=True)
         model.load_state_dict(state_dict)
         model.to("cpu")
         return model
@@ -320,15 +320,15 @@ We can now print the size and accuracy of the quantized model.
     # ModuleAttributeError: 'ConvReLU2d' object has no attribute '_modules'
     # save the whole model directly
     # torch.save(quantized_model, fx_graph_mode_model_file_path)
-    # loaded_quantized_model = torch.load(fx_graph_mode_model_file_path)
+    # loaded_quantized_model = torch.load(fx_graph_mode_model_file_path, weights_only=False)
 
     # save with state_dict
     # torch.save(quantized_model.state_dict(), fx_graph_mode_model_file_path)
     # import copy
     # model_to_quantize = copy.deepcopy(float_model)
     # prepared_model = prepare_fx(model_to_quantize, {"": qconfig})
     # loaded_quantized_model = convert_fx(prepared_model)
-    # loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path))
+    # loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path), weights_only=True)
 
     # save with script
     torch.jit.save(torch.jit.script(quantized_model), fx_graph_mode_model_file_path)
diff --git a/prototype_source/pt2e_quant_ptq.rst b/prototype_source/pt2e_quant_ptq.rst
@@ -274,7 +274,7 @@ and rename it to ``data/resnet18_pretrained_float.pth``.
 
     def load_model(model_file):
         model = resnet18(pretrained=False)
-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, weights_only=True)
         model.load_state_dict(state_dict)
         model.to("cpu")
         return model
diff --git a/prototype_source/pt2e_quant_qat.rst b/prototype_source/pt2e_quant_qat.rst
@@ -172,7 +172,7 @@ prepare the data. These steps are very similar to the ones defined in the
 
     def load_model(model_file):
         model = resnet18(pretrained=False)
-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, weights_only=True)
         model.load_state_dict(state_dict)
         return model
 
diff --git a/recipes_source/intel_neural_compressor_for_pytorch.rst b/recipes_source/intel_neural_compressor_for_pytorch.rst
@@ -115,7 +115,7 @@ In this tutorial, the LeNet model is used to demonstrate how to deal with *Intel
             return F.log_softmax(x, dim=1)
 
     model = Net()
-    model.load_state_dict(torch.load('./lenet_mnist_model.pth'))
+    model.load_state_dict(torch.load('./lenet_mnist_model.pth', weights_only=True))
 
 The pretrained model weight `lenet_mnist_model.pth` comes from
 `here <https://drive.google.com/drive/folders/1fn83DF14tWmit0RTKWRhPq5uVXt73e0h?usp=sharing>`_.
diff --git a/recipes_source/recipes/module_load_state_dict_tips.py b/recipes_source/recipes/module_load_state_dict_tips.py
@@ -39,15 +39,15 @@ def forward(self, x):
 # to ``torch.load``, the ``torch.device()`` context manager and the ``assign``
 # keyword argument to ``nn.Module.load_state_dict()``.
 
-state_dict = torch.load('checkpoint.pth', mmap=True)
+state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True)
 with torch.device('meta'):
   meta_m = SomeModule(1000)
 meta_m.load_state_dict(state_dict, assign=True)
 
 #############################################################################
 # Compare the snippet below to the one above:
 
-state_dict = torch.load('checkpoint.pth')
+state_dict = torch.load('checkpoint.pth', weights_only=True)
 m = SomeModule(1000)
 m.load_state_dict(state_dict)
 
@@ -71,7 +71,7 @@ def forward(self, x):
 # * Waiting for the entire checkpoint to be loaded into RAM before performing, for example, some per-tensor processing.
 
 start_time = time.time()
-state_dict = torch.load('checkpoint.pth')
+state_dict = torch.load('checkpoint.pth', weights_only=True)
 end_time = time.time()
 print(f"loading time without mmap={end_time - start_time}")
 
@@ -84,7 +84,7 @@ def forward(self, x):
 # storages will be memory-mapped.
 
 start_time = time.time()
-state_dict = torch.load('checkpoint.pth', mmap=True)
+state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True)
 end_time = time.time()
 print(f"loading time with mmap={end_time - start_time}")
 
diff --git a/recipes_source/recipes/save_load_across_devices.py b/recipes_source/recipes/save_load_across_devices.py
@@ -97,7 +97,7 @@ def forward(self, x):
 # Load
 device = torch.device('cpu')
 model = Net()
-model.load_state_dict(torch.load(PATH, map_location=device))
+model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True))
 
 
 ######################################################################
diff --git a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
@@ -131,7 +131,7 @@ def forward(self, x):
 model = Net()
 optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
 
-checkpoint = torch.load(PATH)
+checkpoint = torch.load(PATH, weights_only=True)
 model.load_state_dict(checkpoint['model_state_dict'])
 optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 epoch = checkpoint['epoch']
diff --git a/recipes_source/recipes/saving_and_loading_models_for_inference.py b/recipes_source/recipes/saving_and_loading_models_for_inference.py
@@ -117,7 +117,7 @@ def forward(self, x):
 
 # Load
 model = Net()
-model.load_state_dict(torch.load(PATH))
+model.load_state_dict(torch.load(PATH, weights_only=True))
 model.eval()
 
 
diff --git a/recipes_source/recipes/saving_multiple_models_in_one_file.py b/recipes_source/recipes/saving_multiple_models_in_one_file.py
@@ -128,7 +128,7 @@ def forward(self, x):
 optimModelA = optim.SGD(modelA.parameters(), lr=0.001, momentum=0.9)
 optimModelB = optim.SGD(modelB.parameters(), lr=0.001, momentum=0.9)
 
-checkpoint = torch.load(PATH)
+checkpoint = torch.load(PATH, weights_only=True)
 modelA.load_state_dict(checkpoint['modelA_state_dict'])
 modelB.load_state_dict(checkpoint['modelB_state_dict'])
 optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
diff --git a/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py b/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py
@@ -124,7 +124,7 @@ def forward(self, x):
 # are loading into.
 # 
 
-netB.load_state_dict(torch.load(PATH), strict=False)
+netB.load_state_dict(torch.load(PATH, weights_only=True), strict=False)
 
 
 ######################################################################

Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,8 @@ def tokenize(self, path):`
`151`	`151`	`model.load_state_dict(`
`152`	`152`	`torch.load(`
`153`	`153`	`model_data_filepath + 'word_language_model_quantize.pth',`
`154`		`- map_location=torch.device('cpu')`
	`154`	`+ map_location=torch.device('cpu'),`
	`155`	`+ weights_only=True`
`155`	`156`	`)`
`156`	`157`	`)`
`157`	`158`
Original file line number	Diff line number	Diff line change
`@@ -153,7 +153,7 @@`
`153`	`153`	`# .. code:: python`
`154`	`154`	`#`
`155`	`155`	`# model = TheModelClass(args, *kwargs)`
`156`		`-# model.load_state_dict(torch.load(PATH))`
	`156`	`+# model.load_state_dict(torch.load(PATH), weights_only=True)`
`157`	`157`	`# model.eval()`
`158`	`158`	`#`
`159`	`159`	`# .. note::`
`@@ -206,7 +206,7 @@`
`206`	`206`	`# .. code:: python`
`207`	`207`	`#`
`208`	`208`	`# # Model class must be defined somewhere`
`209`		`-# model = torch.load(PATH)`
	`209`	`+# model = torch.load(PATH, weights_only=False)`
`210`	`210`	`# model.eval()`
`211`	`211`	`#`
`212`	`212`	`# This save/load process uses the most intuitive syntax and involves the`
`@@ -290,7 +290,7 @@`
`290`	`290`	`# model = TheModelClass(args, *kwargs)`
`291`	`291`	`# optimizer = TheOptimizerClass(args, *kwargs)`
`292`	`292`	`#`
`293`		`-# checkpoint = torch.load(PATH)`
	`293`	`+# checkpoint = torch.load(PATH, weights_only=True)`
`294`	`294`	`# model.load_state_dict(checkpoint['model_state_dict'])`
`295`	`295`	`# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])`
`296`	`296`	`# epoch = checkpoint['epoch']`
`@@ -354,7 +354,7 @@`
`354`	`354`	`# optimizerA = TheOptimizerAClass(args, *kwargs)`
`355`	`355`	`# optimizerB = TheOptimizerBClass(args, *kwargs)`
`356`	`356`	`#`
`357`		`-# checkpoint = torch.load(PATH)`
	`357`	`+# checkpoint = torch.load(PATH, weights_only=True)`
`358`	`358`	`# modelA.load_state_dict(checkpoint['modelA_state_dict'])`
`359`	`359`	`# modelB.load_state_dict(checkpoint['modelB_state_dict'])`
`360`	`360`	`# optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])`
`@@ -407,7 +407,7 @@`
`407`	`407`	`# .. code:: python`
`408`	`408`	`#`
`409`	`409`	`# modelB = TheModelBClass(args, *kwargs)`
`410`		`-# modelB.load_state_dict(torch.load(PATH), strict=False)`
	`410`	`+# modelB.load_state_dict(torch.load(PATH), strict=False, weights_only=True)`
`411`	`411`	`#`
`412`	`412`	`# Partially loading a model or loading a partial model are common`
`413`	`413`	`# scenarios when transfer learning or training a new complex model.`
`@@ -446,7 +446,7 @@`
`446`	`446`	`#`
`447`	`447`	`# device = torch.device('cpu')`
`448`	`448`	`# model = TheModelClass(args, *kwargs)`
`449`		`-# model.load_state_dict(torch.load(PATH, map_location=device))`
	`449`	`+# model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True))`
`450`	`450`	`#`
`451`	`451`	`# When loading a model on a CPU that was trained with a GPU, pass`
`452`	`452`	# ``torch.device('cpu')`` to the ``map_location`` argument in the
`@@ -469,7 +469,7 @@`
`469`	`469`	`#`
`470`	`470`	`# device = torch.device("cuda")`
`471`	`471`	`# model = TheModelClass(args, *kwargs)`
`472`		`-# model.load_state_dict(torch.load(PATH))`
	`472`	`+# model.load_state_dict(torch.load(PATH, weights_only=True))`
`473`	`473`	`# model.to(device)`
`474`	`474`	`# # Make sure to call input = input.to(device) on any input tensors that you feed to the model`
`475`	`475`	`#`
`@@ -497,7 +497,7 @@`
`497`	`497`	`#`
`498`	`498`	`# device = torch.device("cuda")`
`499`	`499`	`# model = TheModelClass(args, *kwargs)`
`500`		`-# model.load_state_dict(torch.load(PATH, map_location="cuda:0")) # Choose whatever GPU device number you want`
	`500`	`+# model.load_state_dict(torch.load(PATH, weights_only=True, map_location="cuda:0")) # Choose whatever GPU device number you want`
`501`	`501`	`# model.to(device)`
`502`	`502`	`# # Make sure to call input = input.to(device) on any input tensors that you feed to the model`
`503`	`503`	`#`