Skip to content

Commit ab25e11

Browse files
author
codebasics
committed
tf mirrored stratergy
1 parent d0969a5 commit ab25e11

File tree

1 file changed

+93
-47
lines changed

1 file changed

+93
-47
lines changed

10_gpu_benchmarking/DGX_performance/dgx_benchamrking_on_local_python_test.ipynb 10_gpu_benchmarking/DGX_performance/dgx_benchamrking_tf_mirrored_stratergy.ipynb

+93-47
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
},
4545
{
4646
"cell_type": "code",
47-
"execution_count": 3,
47+
"execution_count": 5,
4848
"metadata": {
4949
"scrolled": true
5050
},
@@ -53,11 +53,10 @@
5353
"data": {
5454
"text/plain": [
5555
"[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),\n",
56-
" PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),\n",
57-
" PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU')]"
56+
" PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU')]"
5857
]
5958
},
60-
"execution_count": 3,
59+
"execution_count": 5,
6160
"metadata": {},
6261
"output_type": "execute_result"
6362
}
@@ -68,16 +67,16 @@
6867
},
6968
{
7069
"cell_type": "code",
71-
"execution_count": 7,
70+
"execution_count": 6,
7271
"metadata": {},
7372
"outputs": [
7473
{
7574
"data": {
7675
"text/plain": [
77-
"'2.2.0'"
76+
"'2.3.0'"
7877
]
7978
},
80-
"execution_count": 7,
79+
"execution_count": 6,
8180
"metadata": {},
8281
"output_type": "execute_result"
8382
}
@@ -88,7 +87,7 @@
8887
},
8988
{
9089
"cell_type": "code",
91-
"execution_count": 5,
90+
"execution_count": 7,
9291
"metadata": {},
9392
"outputs": [
9493
{
@@ -97,7 +96,7 @@
9796
"True"
9897
]
9998
},
100-
"execution_count": 5,
99+
"execution_count": 7,
101100
"metadata": {},
102101
"output_type": "execute_result"
103102
}
@@ -129,25 +128,16 @@
129128
},
130129
{
131130
"cell_type": "code",
132-
"execution_count": 5,
131+
"execution_count": 8,
133132
"metadata": {},
134-
"outputs": [
135-
{
136-
"name": "stdout",
137-
"output_type": "stream",
138-
"text": [
139-
"Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n",
140-
"170500096/170498071 [==============================] - 42s 0us/step\n"
141-
]
142-
}
143-
],
133+
"outputs": [],
144134
"source": [
145135
"(X_train, y_train), (X_test,y_test) = tf.keras.datasets.cifar10.load_data()"
146136
]
147137
},
148138
{
149139
"cell_type": "code",
150-
"execution_count": 6,
140+
"execution_count": 9,
151141
"metadata": {},
152142
"outputs": [
153143
{
@@ -156,7 +146,7 @@
156146
"(50000, 32, 32, 3)"
157147
]
158148
},
159-
"execution_count": 6,
149+
"execution_count": 9,
160150
"metadata": {},
161151
"output_type": "execute_result"
162152
}
@@ -167,7 +157,7 @@
167157
},
168158
{
169159
"cell_type": "code",
170-
"execution_count": 7,
160+
"execution_count": 10,
171161
"metadata": {
172162
"scrolled": true
173163
},
@@ -178,7 +168,7 @@
178168
"(50000, 1)"
179169
]
180170
},
181-
"execution_count": 7,
171+
"execution_count": 10,
182172
"metadata": {},
183173
"output_type": "execute_result"
184174
}
@@ -401,7 +391,7 @@
401391
},
402392
{
403393
"cell_type": "code",
404-
"execution_count": 18,
394+
"execution_count": 11,
405395
"metadata": {
406396
"scrolled": true
407397
},
@@ -413,7 +403,7 @@
413403
},
414404
{
415405
"cell_type": "code",
416-
"execution_count": 19,
406+
"execution_count": 12,
417407
"metadata": {},
418408
"outputs": [],
419409
"source": [
@@ -453,7 +443,7 @@
453443
"cell_type": "code",
454444
"execution_count": 21,
455445
"metadata": {
456-
"scrolled": true
446+
"scrolled": false
457447
},
458448
"outputs": [
459449
{
@@ -476,50 +466,106 @@
476466
]
477467
},
478468
{
479-
"cell_type": "markdown",
469+
"cell_type": "code",
470+
"execution_count": 15,
480471
"metadata": {},
472+
"outputs": [],
481473
"source": [
482-
"<h4 style=\"color:purple\">Model building and training</h4>"
474+
"train_tf_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train_categorical))\n",
475+
"test_tf_dataset = tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test_categorical))"
483476
]
484477
},
485478
{
486479
"cell_type": "code",
487-
"execution_count": 22,
488-
"metadata": {
489-
"scrolled": true
490-
},
480+
"execution_count": 19,
481+
"metadata": {},
491482
"outputs": [
492483
{
493484
"name": "stdout",
494485
"output_type": "stream",
495486
"text": [
496-
"1563/1563 [==============================] - 22s 14ms/step - loss: 1.8602 - accuracy: 0.3321\n"
487+
"WARNING:tensorflow:There are non-GPU devices in `tf.distribute.Strategy`, not using nccl allreduce.\n",
488+
"INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)\n"
497489
]
498-
},
490+
}
491+
],
492+
"source": [
493+
"strategy = tf.distribute.MirroredStrategy()"
494+
]
495+
},
496+
{
497+
"cell_type": "code",
498+
"execution_count": 20,
499+
"metadata": {},
500+
"outputs": [
499501
{
500502
"data": {
501503
"text/plain": [
502-
"<tensorflow.python.keras.callbacks.History at 0x7f697c1c82b0>"
504+
"1"
503505
]
504506
},
505-
"execution_count": 22,
507+
"execution_count": 20,
506508
"metadata": {},
507509
"output_type": "execute_result"
508510
}
509511
],
510512
"source": [
511-
"model = keras.Sequential([\n",
512-
" keras.layers.Flatten(input_shape=(32,32,3)),\n",
513-
" keras.layers.Dense(3000, activation='relu'),\n",
514-
" keras.layers.Dense(1000, activation='relu'),\n",
515-
" keras.layers.Dense(10, activation='sigmoid') \n",
516-
" ])\n",
513+
"strategy.num_replicas_in_sync"
514+
]
515+
},
516+
{
517+
"cell_type": "code",
518+
"execution_count": 21,
519+
"metadata": {},
520+
"outputs": [],
521+
"source": [
522+
"BATCH_SIZE_PER_REPLICA = 64\n",
523+
"BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync\n",
524+
"SHUFFLE_BUFFER_SIZE = 100\n",
525+
"\n",
526+
"train_dataset = train_tf_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)\n",
527+
"test_dataset = test_tf_dataset.batch(BATCH_SIZE)"
528+
]
529+
},
530+
{
531+
"cell_type": "markdown",
532+
"metadata": {},
533+
"source": [
534+
"<h4 style=\"color:purple\">Model building and training</h4>"
535+
]
536+
},
537+
{
538+
"cell_type": "code",
539+
"execution_count": 23,
540+
"metadata": {
541+
"scrolled": true
542+
},
543+
"outputs": [
544+
{
545+
"name": "stdout",
546+
"output_type": "stream",
547+
"text": [
548+
"WARNING:tensorflow:From C:\\Users\\dhava\\AppData\\Roaming\\Python\\Python38\\site-packages\\tensorflow\\python\\data\\ops\\multi_device_iterator_ops.py:601: get_next_as_optional (from tensorflow.python.data.ops.iterator_ops) is deprecated and will be removed in a future version.\n",
549+
"Instructions for updating:\n",
550+
"Use `tf.data.Iterator.get_next_as_optional()` instead.\n",
551+
"782/782 [==============================] - 20s 26ms/step - loss: 1.9170 - accuracy: 0.3119\n"
552+
]
553+
}
554+
],
555+
"source": [
556+
"with strategy.scope():\n",
557+
" model = keras.Sequential([\n",
558+
" keras.layers.Flatten(input_shape=(32,32,3)),\n",
559+
" keras.layers.Dense(3000, activation='relu'),\n",
560+
" keras.layers.Dense(1000, activation='relu'),\n",
561+
" keras.layers.Dense(10, activation='sigmoid') \n",
562+
" ])\n",
517563
"\n",
518-
"model.compile(optimizer='SGD',\n",
519-
" loss='categorical_crossentropy',\n",
520-
" metrics=['accuracy'])\n",
564+
" model.compile(optimizer='SGD',\n",
565+
" loss='categorical_crossentropy',\n",
566+
" metrics=['accuracy'])\n",
521567
"\n",
522-
"model.fit(X_train_scaled, y_train_categorical, epochs=1)"
568+
" model.fit(train_dataset, epochs=1)"
523569
]
524570
},
525571
{

0 commit comments

Comments
 (0)