Skip to content

Commit f82f562

Browse files
Add mps device (#1064)
* Add mps device * Add --mps to run_python_examples.sh * Update imagenet with mps device * Use curl in run_python_examples.sh to accommodate macOS devices * Fix for https://github.com/pytorchq/examples/issues/1060
1 parent 5a06e9c commit f82f562

File tree

16 files changed

+150
-44
lines changed

16 files changed

+150
-44
lines changed

dcgan/README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ usage: main.py [-h] --dataset DATASET --dataroot DATAROOT [--workers WORKERS]
2424
[--batchSize BATCHSIZE] [--imageSize IMAGESIZE] [--nz NZ]
2525
[--ngf NGF] [--ndf NDF] [--niter NITER] [--lr LR]
2626
[--beta1 BETA1] [--cuda] [--ngpu NGPU] [--netG NETG]
27-
[--netD NETD]
27+
[--netD NETD] [--mps]
2828
2929
optional arguments:
3030
-h, --help show this help message and exit
@@ -40,6 +40,7 @@ optional arguments:
4040
--lr LR learning rate, default=0.0002
4141
--beta1 BETA1 beta1 for adam. default=0.5
4242
--cuda enables cuda
43+
--mps enables macOS GPU
4344
--ngpu NGPU number of GPUs to use
4445
--netG NETG path to netG (to continue training)
4546
--netD NETD path to netD (to continue training)

dcgan/main.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,15 @@
2525
parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for')
2626
parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
2727
parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
28-
parser.add_argument('--cuda', action='store_true', help='enables cuda')
28+
parser.add_argument('--cuda', action='store_true', default=False, help='enables cuda')
2929
parser.add_argument('--dry-run', action='store_true', help='check a single training cycle works')
3030
parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use')
3131
parser.add_argument('--netG', default='', help="path to netG (to continue training)")
3232
parser.add_argument('--netD', default='', help="path to netD (to continue training)")
3333
parser.add_argument('--outf', default='.', help='folder to output images and model checkpoints')
3434
parser.add_argument('--manualSeed', type=int, help='manual seed')
3535
parser.add_argument('--classes', default='bedroom', help='comma separated list of classes for the lsun data set')
36+
parser.add_argument('--mps', action='store_true', default=False, help='enables macOS GPU training')
3637

3738
opt = parser.parse_args()
3839
print(opt)
@@ -52,6 +53,9 @@
5253

5354
if torch.cuda.is_available() and not opt.cuda:
5455
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
56+
57+
if torch.backends.mps.is_available() and not opt.mps:
58+
print("WARNING: You have mps device, to enable macOS GPU run with --mps")
5559

5660
if opt.dataroot is None and str(opt.dataset).lower() != 'fake':
5761
raise ValueError("`dataroot` parameter is required for dataset \"%s\"" % opt.dataset)
@@ -102,8 +106,14 @@
102106
assert dataset
103107
dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize,
104108
shuffle=True, num_workers=int(opt.workers))
109+
use_mps = opt.mps and torch.backends.mps.is_available()
110+
if opt.cuda:
111+
device = torch.device("cuda:0")
112+
elif use_mps:
113+
device = torch.device("mps")
114+
else:
115+
device = torch.device("cpu")
105116

106-
device = torch.device("cuda:0" if opt.cuda else "cpu")
107117
ngpu = int(opt.ngpu)
108118
nz = int(opt.nz)
109119
ngf = int(opt.ngf)

fast_neural_style/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ python neural_style/neural_style.py eval --content-image </path/to/content/image
2727
- `--output-image`: path for saving the output image.
2828
- `--content-scale`: factor for scaling down the content image if memory is an issue (eg: value of 2 will halve the height and width of content-image)
2929
- `--cuda`: set it to 1 for running on GPU, 0 for CPU.
30+
- `--mps`: set it to 1 for running on macOS GPU
3031

3132
Train model
3233

@@ -40,6 +41,7 @@ There are several command line arguments, the important ones are listed below
4041
- `--style-image`: path to style-image.
4142
- `--save-model-dir`: path to folder where trained model will be saved.
4243
- `--cuda`: set it to 1 for running on GPU, 0 for CPU.
44+
- `--mps`: set it to 1 for running on macOS GPU
4345

4446
Refer to `neural_style/neural_style.py` for other command line arguments. For training new models you might have to tune the values of `--content-weight` and `--style-weight`. The mosaic style model shown above was trained with `--content-weight 1e5` and `--style-weight 1e10`. The remaining 3 models were also trained with similar order of weight parameters with slight variation in the `--style-weight` (`5e10` or `1e11`).
4547

fast_neural_style/neural_style/neural_style.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,12 @@ def check_paths(args):
2929

3030

3131
def train(args):
32-
device = torch.device("cuda" if args.cuda else "cpu")
32+
if args.cuda:
33+
device = torch.device("cuda")
34+
elif args.mps:
35+
device = torch.device("mps")
36+
else:
37+
device = torch.device("cpu")
3338

3439
np.random.seed(args.seed)
3540
torch.manual_seed(args.seed)
@@ -224,10 +229,11 @@ def main():
224229
help="path for saving the output image")
225230
eval_arg_parser.add_argument("--model", type=str, required=True,
226231
help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path")
227-
eval_arg_parser.add_argument("--cuda", type=int, required=True,
228-
help="set it to 1 for running on GPU, 0 for CPU")
232+
eval_arg_parser.add_argument("--cuda", type=int, default=False,
233+
help="set it to 1 for running on cuda, 0 for CPU")
229234
eval_arg_parser.add_argument("--export_onnx", type=str,
230235
help="export ONNX model to a given file")
236+
eval_arg_parser.add_argument('--mps', action='store_true', default=False, help='enable macOS GPU training')
231237

232238
args = main_arg_parser.parse_args()
233239

@@ -237,6 +243,8 @@ def main():
237243
if args.cuda and not torch.cuda.is_available():
238244
print("ERROR: cuda is not available, try running on CPU")
239245
sys.exit(1)
246+
if not args.mps and torch.backends.mps.is_available():
247+
print("WARNING: mps is available, run with --mps to enable macOS GPU")
240248

241249
if args.subcommand == "train":
242250
check_paths(args)

imagenet/main.py

+50-23
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,10 @@ def main():
104104

105105
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
106106

107-
ngpus_per_node = torch.cuda.device_count()
107+
if torch.cuda.is_available():
108+
ngpus_per_node = torch.cuda.device_count()
109+
else:
110+
ngpus_per_node = 1
108111
if args.multiprocessing_distributed:
109112
# Since we have ngpus_per_node processes per node, the total world_size
110113
# needs to be adjusted accordingly
@@ -141,29 +144,33 @@ def main_worker(gpu, ngpus_per_node, args):
141144
print("=> creating model '{}'".format(args.arch))
142145
model = models.__dict__[args.arch]()
143146

144-
if not torch.cuda.is_available():
147+
if not torch.cuda.is_available() and not torch.backends.mps.is_available():
145148
print('using CPU, this will be slow')
146149
elif args.distributed:
147150
# For multiprocessing distributed, DistributedDataParallel constructor
148151
# should always set the single device scope, otherwise,
149152
# DistributedDataParallel will use all available devices.
150-
if args.gpu is not None:
151-
torch.cuda.set_device(args.gpu)
152-
model.cuda(args.gpu)
153-
# When using a single GPU per process and per
154-
# DistributedDataParallel, we need to divide the batch size
155-
# ourselves based on the total number of GPUs of the current node.
156-
args.batch_size = int(args.batch_size / ngpus_per_node)
157-
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
158-
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
159-
else:
160-
model.cuda()
161-
# DistributedDataParallel will divide and allocate batch_size to all
162-
# available GPUs if device_ids are not set
163-
model = torch.nn.parallel.DistributedDataParallel(model)
164-
elif args.gpu is not None:
153+
if torch.cuda.is_available():
154+
if args.gpu is not None:
155+
torch.cuda.set_device(args.gpu)
156+
model.cuda(args.gpu)
157+
# When using a single GPU per process and per
158+
# DistributedDataParallel, we need to divide the batch size
159+
# ourselves based on the total number of GPUs of the current node.
160+
args.batch_size = int(args.batch_size / ngpus_per_node)
161+
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
162+
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
163+
else:
164+
model.cuda()
165+
# DistributedDataParallel will divide and allocate batch_size to all
166+
# available GPUs if device_ids are not set
167+
model = torch.nn.parallel.DistributedDataParallel(model)
168+
elif args.gpu is not None and torch.cuda.is_available():
165169
torch.cuda.set_device(args.gpu)
166170
model = model.cuda(args.gpu)
171+
elif torch.backends.mps.is_available():
172+
device = torch.device("mps")
173+
model = model.to(device)
167174
else:
168175
# DataParallel will divide and allocate batch_size to all available GPUs
169176
if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
@@ -172,8 +179,17 @@ def main_worker(gpu, ngpus_per_node, args):
172179
else:
173180
model = torch.nn.DataParallel(model).cuda()
174181

182+
if torch.cuda.is_available():
183+
if args.gpu:
184+
device = torch.device('cuda:{}'.format(args.gpu))
185+
else:
186+
device = torch.device("cuda")
187+
elif torch.backends.mps.is_available():
188+
device = torch.device("mps")
189+
else:
190+
device = torch.device("cpu")
175191
# define loss function (criterion), optimizer, and learning rate scheduler
176-
criterion = nn.CrossEntropyLoss().cuda(args.gpu)
192+
criterion = nn.CrossEntropyLoss().to(device)
177193

178194
optimizer = torch.optim.SGD(model.parameters(), args.lr,
179195
momentum=args.momentum,
@@ -188,7 +204,7 @@ def main_worker(gpu, ngpus_per_node, args):
188204
print("=> loading checkpoint '{}'".format(args.resume))
189205
if args.gpu is None:
190206
checkpoint = torch.load(args.resume)
191-
else:
207+
elif torch.cuda.is_available():
192208
# Map model to be loaded to specified single gpu.
193209
loc = 'cuda:{}'.format(args.gpu)
194210
checkpoint = torch.load(args.resume, map_location=loc)
@@ -302,10 +318,13 @@ def train(train_loader, model, criterion, optimizer, epoch, args):
302318
# measure data loading time
303319
data_time.update(time.time() - end)
304320

305-
if args.gpu is not None:
321+
if args.gpu is not None and torch.cuda.is_available():
306322
images = images.cuda(args.gpu, non_blocking=True)
307-
if torch.cuda.is_available():
323+
elif not args.gpu and torch.cuda.is_available():
308324
target = target.cuda(args.gpu, non_blocking=True)
325+
elif torch.backends.mps.is_available():
326+
images = images.to('mps')
327+
target = target.to('mps')
309328

310329
# compute output
311330
output = model(images)
@@ -337,8 +356,11 @@ def run_validate(loader, base_progress=0):
337356
end = time.time()
338357
for i, (images, target) in enumerate(loader):
339358
i = base_progress + i
340-
if args.gpu is not None:
359+
if args.gpu is not None and torch.cuda.is_available():
341360
images = images.cuda(args.gpu, non_blocking=True)
361+
if torch.backends.mps.is_available():
362+
images = images.to('mps')
363+
target = target.to('mps')
342364
if torch.cuda.is_available():
343365
target = target.cuda(args.gpu, non_blocking=True)
344366

@@ -421,7 +443,12 @@ def update(self, val, n=1):
421443
self.avg = self.sum / self.count
422444

423445
def all_reduce(self):
424-
device = "cuda" if torch.cuda.is_available() else "cpu"
446+
if torch.cuda.is_available():
447+
device = torch.device("cuda")
448+
elif torch.backends.mps.is_available():
449+
device = torch.device("mps")
450+
else:
451+
device = torch.device("cpu")
425452
total = torch.tensor([self.sum, self.count], dtype=torch.float32, device=device)
426453
dist.all_reduce(total, dist.ReduceOp.SUM, async_op=False)
427454
self.sum, self.count = total.tolist()

legacy/snli/train.py

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
if torch.cuda.is_available():
1818
torch.cuda.set_device(args.gpu)
1919
device = torch.device('cuda:{}'.format(args.gpu))
20+
elif torch.backends.mps.is_available():
21+
device = torch.device('mps')
2022
else:
2123
device = torch.device('cpu')
2224

mnist/main.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ def main():
8585
help='Learning rate step gamma (default: 0.7)')
8686
parser.add_argument('--no-cuda', action='store_true', default=False,
8787
help='disables CUDA training')
88+
parser.add_argument('--no-mps', action='store_true', default=False,
89+
help='disables macOS GPU training')
8890
parser.add_argument('--dry-run', action='store_true', default=False,
8991
help='quickly check a single pass')
9092
parser.add_argument('--seed', type=int, default=1, metavar='S',
@@ -95,10 +97,16 @@ def main():
9597
help='For Saving the current Model')
9698
args = parser.parse_args()
9799
use_cuda = not args.no_cuda and torch.cuda.is_available()
100+
use_mps = not args.no_mps and torch.backends.mps.is_available()
98101

99102
torch.manual_seed(args.seed)
100103

101-
device = torch.device("cuda" if use_cuda else "cpu")
104+
if use_cuda:
105+
device = torch.device("cuda")
106+
elif use_mps:
107+
device = torch.device("mps")
108+
else:
109+
device = torch.device("cpu")
102110

103111
train_kwargs = {'batch_size': args.batch_size}
104112
test_kwargs = {'batch_size': args.test_batch_size}

mnist_hogwild/main.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
help='how many training processes to use (default: 2)')
3030
parser.add_argument('--cuda', action='store_true', default=False,
3131
help='enables CUDA training')
32+
parser.add_argument('--mps', action='store_true', default=False,
33+
help='enables macOS GPU training')
3234
parser.add_argument('--dry-run', action='store_true', default=False,
3335
help='quickly check a single pass')
3436

@@ -55,7 +57,14 @@ def forward(self, x):
5557
args = parser.parse_args()
5658

5759
use_cuda = args.cuda and torch.cuda.is_available()
58-
device = torch.device("cuda" if use_cuda else "cpu")
60+
use_mps = args.mps and torch.backends.mps.is_available()
61+
if use_cuda:
62+
device = torch.device("cuda")
63+
elif use_mps:
64+
device = torch.device("mps")
65+
else:
66+
device = torch.device("cpu")
67+
5968
transform=transforms.Compose([
6069
transforms.ToTensor(),
6170
transforms.Normalize((0.1307,), (0.3081,))

run_python_examples.sh

+5-5
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ function start() {
5656

5757
function dcgan() {
5858
start
59-
python main.py --dataset fake $CUDA_FLAG --dry-run || error "dcgan failed"
59+
python main.py --dataset fake $CUDA_FLAG --mps --dry-run || error "dcgan failed"
6060
}
6161

6262
function distributed() {
@@ -74,15 +74,15 @@ function fast_neural_style() {
7474
test -d "saved_models" || { error "saved models not found"; return; }
7575

7676
echo "running fast neural style model"
77-
python neural_style/neural_style.py eval --content-image images/content-images/amber.jpg --model saved_models/candy.pth --output-image images/output-images/amber-candy.jpg --cuda $CUDA || error "neural_style.py failed"
77+
python neural_style/neural_style.py eval --content-image images/content-images/amber.jpg --model saved_models/candy.pth --output-image images/output-images/amber-candy.jpg --cuda $CUDA --mps || error "neural_style.py failed"
7878
}
7979

8080
function imagenet() {
8181
start
8282
if [[ ! -d "sample/val" || ! -d "sample/train" ]]; then
8383
mkdir -p sample/val/n
8484
mkdir -p sample/train/n
85-
wget "https://upload.wikimedia.org/wikipedia/commons/5/5a/Socks-clinton.jpg" || { error "couldn't download sample image for imagenet"; return; }
85+
curl -O "https://upload.wikimedia.org/wikipedia/commons/5/5a/Socks-clinton.jpg" || { error "couldn't download sample image for imagenet"; return; }
8686
mv Socks-clinton.jpg sample/train/n
8787
cp sample/train/n/* sample/val/n/
8888
fi
@@ -137,7 +137,7 @@ function fx() {
137137

138138
function super_resolution() {
139139
start
140-
python main.py --upscale_factor 3 --batchSize 4 --testBatchSize 100 --nEpochs 1 --lr 0.001 || error "super resolution failed"
140+
python main.py --upscale_factor 3 --batchSize 4 --testBatchSize 100 --nEpochs 1 --lr 0.001 --mps || error "super resolution failed"
141141
}
142142

143143
function time_sequence_prediction() {
@@ -153,7 +153,7 @@ function vae() {
153153

154154
function word_language_model() {
155155
start
156-
python main.py --epochs 1 --dry-run $CUDA_FLAG || error "word_language_model failed"
156+
python main.py --epochs 1 --dry-run $CUDA_FLAG --mps || error "word_language_model failed"
157157
}
158158

159159
function clean() {

siamese_network/main.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,8 @@ def main():
249249
help='Learning rate step gamma (default: 0.7)')
250250
parser.add_argument('--no-cuda', action='store_true', default=False,
251251
help='disables CUDA training')
252+
parser.add_argument('--no-mps', action='store_true', default=False,
253+
help='disables macOS GPU training')
252254
parser.add_argument('--dry-run', action='store_true', default=False,
253255
help='quickly check a single pass')
254256
parser.add_argument('--seed', type=int, default=1, metavar='S',
@@ -260,10 +262,16 @@ def main():
260262
args = parser.parse_args()
261263

262264
use_cuda = not args.no_cuda and torch.cuda.is_available()
265+
use_mps = not args.no_mps and torch.backends.mps.is_available()
263266

264267
torch.manual_seed(args.seed)
265268

266-
device = torch.device("cuda" if use_cuda else "cpu")
269+
if use_cuda:
270+
device = torch.device("cuda")
271+
elif use_mps:
272+
device = torch.device("mps")
273+
else:
274+
device = torch.device("cpu")
267275

268276
train_kwargs = {'batch_size': args.batch_size}
269277
test_kwargs = {'batch_size': args.test_batch_size}

super_resolution/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ optional arguments:
1717
--nEpochs number of epochs to train for
1818
--lr Learning Rate. Default=0.01
1919
--cuda use cuda
20+
--mps enable GPU on macOS
2021
--threads number of threads for data loader to use Default=4
2122
--seed random seed to use. Default=123
2223
```

0 commit comments

Comments
 (0)