tensorflow
diff --git a/‎mnist-acgan/README.md
Lines changed: 62 additions & 0 deletions b/‎mnist-acgan/README.md
Lines changed: 62 additions & 0 deletions
diff --git a/‎mnist-acgan/gan.js
Lines changed: 100 additions & 43 deletions b/‎mnist-acgan/gan.js
Lines changed: 100 additions & 43 deletions
@@ -38,12 +38,61 @@ yarn
 yarn train
 ```
 
+If you have a CUDA-enabled GPU on your system, you can add the `--gpu` flag
+to train the model on the GPU, which should give you a significant boost in
+the speed of training:
+
+```sh
+yarn
+yarn train --gpu
+```
+
 The training job is a long running one and takes a few hours to complete on
 a GPU (using @tensorflow/tfjs-node-gpu) and even longer on a CPU
 (using @tensorflow/tfjs-node). It saves the generator part of the ACGAN
 into the `./dist/generator` folder at the beginning of the training and
 at the end of every training epoch. Some additional metadata is
 saved with the model as well.
+
+### Monitoring GAN training using TensorBoard
+
+The Node.js-based training script allows you to log the loss values from
+the generator and the discriminator to
+[TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard).
+Relative to printing loss values to the console, which the
+training script performs by default, logging to tensorboard has the following
+advantanges:
+
+1. Persistence of the loss values, so you can have a copy of the training
+   history available even if the system crashes in the middle of the training
+   for some reason, while logs in consoles a more ephemeral.
+2. Visualizing the loss values as curves makes the trends easier to see (e.g.,
+   see the screenshot below).
+
+![MNIST ACGAN Training: TensorBoard Example](./mnist-acgan-tensorboard-example.png)
+
+To do this in this example, add the flag `--logDir` to the `yarn train`
+command, followed by the directory to which you want the logs to
+be written, e.g.,
+
+```sh
+yarn train --gpu --logDir /tmp/mnist-acgan-logs
+```
+
+Then install tensorboard and start it by pointing it to the log directory:
+
+```sh
+# Skip this step if you have already installed tensorboard.
+pip install tensorboard
+
+tensorboard --logdir /tmp/mnist-acgan-logs
+```
+
+tensorboard will print an HTTP URL in the terminal. Open your browser and
+navigate to the URL to view the loss curves in the Scalar dashboard of
+TensorBoard.
+
+### Running Generator demo in the Browser
 
 To start the demo in the browser, do in a separate terminal:
 
@@ -77,3 +126,16 @@ with
 ```js
 require('@tensorflow/tfjs-node-gpu');
 ```
+
+## Running unit tests
+
+This example comes with JavaScript unit tests. To run them, do:
+
+```sh
+pushd ../  # Go to the root directory of tfjs-exapmles
+yarn
+popd  # Go back to mnist-acgan/
+
+yarn
+yarn test
+```
@@ -25,6 +25,13 @@
  * yarn
  * yarn train
  * ```
+ * 
+ * If available, a CUDA GPU will give you a higher training speed:
+ * 
+ * ```sh
+ * yarn
+ * yarn train --gpu
+ * ```
  *
  * To start the demo in the browser, do in a separate terminal:
  *
@@ -50,13 +57,6 @@ const fs = require('fs');
 const path = require('path');
 
 const argparse = require('argparse');
-const tf = require('@tensorflow/tfjs');
-require('@tensorflow/tfjs-node');
-
-// Uncomment me to train the model on GPU.
-// Requires: CUDA-enabled GPU, installs of CUDA toolkit and CuDNN.
-// require('@tensorflow/tfjs-node-gpu');
-
 const data = require('./data');
 
 // Number of classes in the MNIST dataset.
@@ -65,6 +65,11 @@ const NUM_CLASSES = 10;
 // MNIST image size.
 const IMAGE_SIZE = 28;
 
+// The value of the tf object will be set dynamically, depending on whether
+// the CPU (tfjs-node) or GPU (tfjs-node-gpu) backend is used. This is why
+// `let` is used in lieu of the more conventiona `const` here.
+let tf = require('@tensorflow/tfjs');
+
 /**
  * Build the generator part of ACGAN.
  *
@@ -77,7 +82,7 @@ const IMAGE_SIZE = 28;
  * It generates one output: the generated (i.e., fake) image.
  *
  * @param {number} latentSize Size of the latent space.
- * @returns {tf.Model} The generator model.
+ * @returns {tf.LayersModel} The generator model.
  */
 function buildGenerator(latentSize) {
   tf.util.assert(
@@ -171,7 +176,7 @@ function buildGenerator(latentSize) {
  *      which is the discriminator's 10-class classification result
  *      for the input image.
  *
- * @returns {tf.Model} The discriminator model.
+ * @returns {tf.LayersModel} The discriminator model.
  */
 function buildDiscriminator() {
   const cnn = tf.sequential();
@@ -224,9 +229,43 @@ function buildDiscriminator() {
   return tf.model({inputs: image, outputs: [realnessScore, aux]});
 }
 
+/**
+ * Build a combined ACGAN model.
+ *
+ * @param {number} latentSize Size of the latent vector.
+ * @param {tf.SymbolicTensor} imageClass Symbolic tensor for the desired image
+ *   class. This is the other input to the generator.
+ * @param {tf.LayersModel} generator The generator.
+ * @param {tf.LayersModel} discriminator The discriminator.
+ * @param {tf.Optimizer} optimizer The optimizer to be used for training the
+ *   combined model.
+ * @returns {tf.LayersModel} The combined ACGAN model, compiled.
+ */
+function buildCombinedModel(latentSize, generator, discriminator, optimizer) {
+  // Latent vector. This is one of the two inputs to the generator.
+  const latent = tf.input({shape: [latentSize]});
+  // Desired image class. This is the second input to the generator.
+  const imageClass = tf.input({shape: [1]});
+  // Get the symbolic tensor for fake images generated by the generator.
+  let fake = generator.apply([latent, imageClass]);
+  let aux;
+
+  // We only want to be able to train generation for the combined model.
+  discriminator.trainable = false;
+  [fake, aux] = discriminator.apply(fake);
+  const combined =
+      tf.model({inputs: [latent, imageClass], outputs: [fake, aux]});
+  combined.compile({
+    optimizer,
+    loss: ['binaryCrossentropy', 'sparseCategoricalCrossentropy']
+  });
+  combined.summary();
+  return combined;
+}
+
 // "Soft" one used for training the combined ACGAN model.
 // This is an important trick in training GANs.
-const softOne = tf.scalar(0.95);
+const SOFT_ONE = 0.95;
 
 /**
  * Train the discriminator for one step.
@@ -252,9 +291,9 @@ const softOne = tf.scalar(0.95);
  * @param {number} batchSize Size of the batch to draw from `xTrain` and
  *   `yTrain`.
  * @param {number} latentSize Size of the latent space (z-space).
- * @param {tf.Model} generator The generator of the ACGAN.
- * @param {tf.Model} discriminator The discriminator of the ACGAN.
- * @returns The loss values from the one-step training as numbers.
+ * @param {tf.LayersModel} generator The generator of the ACGAN.
+ * @param {tf.LayersModel} discriminator The discriminator of the ACGAN.
+ * @returns {number[]} The loss values from the one-step training as numbers.
  */
 async function trainDiscriminatorOneStep(
     xTrain, yTrain, batchStart, batchSize, latentSize, generator,
@@ -275,9 +314,10 @@ async function trainDiscriminatorOneStep(
         generator.predict([zVectors, sampledLabels], {batchSize: batchSize});
 
     const x = tf.concat([imageBatch, generatedImages], 0);
+
     const y = tf.tidy(
         () => tf.concat(
-            [tf.ones([batchSize, 1]).mul(softOne), tf.zeros([batchSize, 1])]));
+            [tf.ones([batchSize, 1]).mul(SOFT_ONE), tf.zeros([batchSize, 1])]));
 
     const auxY = tf.concat([labelBatch, sampledLabels], 0);
     return [x, y, auxY];
@@ -295,9 +335,9 @@ async function trainDiscriminatorOneStep(
  *
  * @param {number} batchSize Size of the fake-image batch to generate.
  * @param {number} latentSize Size of the latent space (z-space).
- * @param {tf.Model} combined The instance of tf.Model that combines
+ * @param {tf.LayersModel} combined The instance of tf.LayersModel that combines
  *   the generator and the discriminator.
- * @returns The loss values from the combined model as numbers.
+ * @returns {number[]} The loss values from the combined model as numbers.
  */
 async function trainCombinedModelOneStep(batchSize, latentSize, combined) {
   // TODO(cais): Remove tidy() once the current memory leak issue in tfjs-node
@@ -312,7 +352,7 @@ async function trainCombinedModelOneStep(batchSize, latentSize, combined) {
     // We want to train the generator to trick the discriminator.
     // For the generator, we want all the {fake, not-fake} labels to say
     // not-fake.
-    const trick = tf.tidy(() => tf.ones([batchSize, 1]).mul(softOne));
+    const trick = tf.tidy(() => tf.ones([batchSize, 1]).mul(SOFT_ONE));
     return [zVectors, sampledLabels, trick];
   });
 
@@ -322,11 +362,15 @@ async function trainCombinedModelOneStep(batchSize, latentSize, combined) {
   return losses;
 }
 
-function buildArgumentParser() {
+function parseArguments() {
   const parser = new argparse.ArgumentParser({
     description: 'TensorFlowj.js: MNIST ACGAN trainer example.',
     addHelp: true
   });
+  parser.addArgument('--gpu', {
+    action: 'storeTrue',
+    help: 'Use tfjs-node-gpu for training (required CUDA GPU)'
+  });
   parser.addArgument(
       '--epochs',
       {type: 'int', defaultValue: 100, help: 'Number of training epochs.'});
@@ -353,7 +397,11 @@ function buildArgumentParser() {
     defaultValue: './dist/generator',
     help: 'Path to which the generator model will be saved after every epoch.'
   });
-  return parser;
+  parser.addArgument('--logDir', {
+    type: 'string',
+    help: 'Optional log directory to which the loss values will be written.'
+  });
+  return parser.parseArgs();
 }
 
 function makeMetadata(totalEpochs, currentEpoch, completed) {
@@ -366,8 +414,16 @@ function makeMetadata(totalEpochs, currentEpoch, completed) {
 }
 
 async function run() {
-  const parser = buildArgumentParser();
-  const args = parser.parseArgs();
+  const args = parseArguments();
+  // Set the value of tf depending on whether the CPU or GPU version of
+  // libtensorflow is used.
+  if (args.gpu) {
+    console.log('Using GPU');
+    tf = require('@tensorflow/tfjs-node-gpu');
+  } else {
+    console.log('Using CPU');
+    tf = require('@tensorflow/tfjs-node');
+  }
 
   if (!fs.existsSync(path.dirname(args.generatorSavePath))) {
     fs.mkdirSync(path.dirname(args.generatorSavePath));
@@ -387,23 +443,9 @@ async function run() {
   const generator = buildGenerator(args.latentSize);
   generator.summary();
 
-  const latent = tf.input({shape: [args.latentSize]});
-  const imageClass = tf.input({shape: [1]});
-
-  // Get a fake image.
-  let fake = generator.apply([latent, imageClass]);
-  let aux;
-
-  // We only want to be able to train generation for the combined model.
-  discriminator.trainable = false;
-  [fake, aux] = discriminator.apply(fake);
-  const combined =
-      tf.model({inputs: [latent, imageClass], outputs: [fake, aux]});
-  combined.compile({
-    optimizer: tf.train.adam(args.learningRate, args.adamBeta1),
-    loss: ['binaryCrossentropy', 'sparseCategoricalCrossentropy']
-  });
-  combined.summary();
+  const optimizer = tf.train.adam(args.learningRate, args.adamBeta1);
+  const combined = buildCombinedModel(
+      args.latentSize, generator, discriminator, optimizer);
 
   await data.loadData();
   let {images: xTrain, labels: yTrain} = data.getTrainData();
@@ -413,6 +455,13 @@ async function run() {
   await generator.save(saveURL);
 
   let numTensors;
+  let logWriter;
+  if (args.logDir) {
+    console.log(`Logging to tensorboard at logdir: ${args.logDir}`);
+    logWriter = tf.node.summaryFileWriter(args.logDir);
+  }
+
+  let step = 0;
   for (let epoch = 0; epoch < args.epochs; ++epoch) {
     // Write some metadata to disk at the beginning of every epoch.
     fs.writeFileSync(
@@ -442,7 +491,11 @@ async function run() {
           `epoch ${epoch + 1}/${args.epochs} batch ${batch + 1}/${
               numBatches}: ` +
           `dLoss = ${dLoss[0].toFixed(6)}, gLoss = ${gLoss[0].toFixed(6)}`);
-      tf.dispose([dLoss, gLoss]);
+      if (logWriter != null) {
+        logWriter.scalar('dLoss', dLoss[0], step);
+        logWriter.scalar('gLoss', gLoss[0], step);
+        step++;
+      }
 
       // Assert on no memory leak.
       // TODO(cais): Remove this check once the current memory leak in
@@ -463,16 +516,20 @@ async function run() {
     console.log(`Saved generator model to: ${saveURL}\n`);
   }
 
-  // Write metadata to disk to indicate
-  // the end of the training.
+  // Write metadata to disk to indicate the end of the training.
   fs.writeFileSync(
       metadataPath,
       JSON.stringify(makeMetadata(args.epochs, args.epochs, true)));
 }
 
-run();
+if (require.main === module) {
+  run();
+}
 
 module.exports = {
+  buildCombinedModel,
   buildDiscriminator,
-  buildGenerator
+  buildGenerator,
+  trainCombinedModelOneStep,
+  trainDiscriminatorOneStep
 };