tensorflow · JimClarke5 · Aug 30, 2020 · Aug 30, 2020 · Sep 1, 2020 · Sep 1, 2020
diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaDelta.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaDelta.java
@@ -20,6 +20,7 @@
 import org.tensorflow.Output;
 import org.tensorflow.op.Op;
 import org.tensorflow.op.core.Variable;
+import org.tensorflow.types.TFloat32;
 import org.tensorflow.types.family.TType;
 
 import java.util.List;
@@ -62,24 +63,31 @@
  */
 public class AdaDelta extends Optimizer {
 
+  public static final String DEFAULT_NAME = "Adadelta";
   public static final String ACCUMULATOR = "accum";
   public static final String ACCUMULATOR_UPDATE = "accum_update";
   public static final float LEARNING_RATE_DEFAULT = 0.001f;
   public static final float RHO_DEFAULT = 0.95f;
   public static final float EPSILON_DEFAULT = 1e-7f;
 
-  private final float learningRate;
-
   private final float rho;
 
   private final float epsilon;
 
+  /**
+   * Creates an AdaDelta Optimizer using {@link #DEFAULT_NAME} for the Optimizer name, {@link
+   * #LEARNING_RATE_DEFAULT} for the learningRate, {@link #RHO_DEFAULT} for the rho, and {@link
+   * #EPSILON_DEFAULT} for the epsilon.
+   *
+   * @param graph the TensorFlow graph.
+   */
   public AdaDelta(Graph graph) {
     this(graph, LEARNING_RATE_DEFAULT, RHO_DEFAULT, EPSILON_DEFAULT);
   }
 
   /**
-   * Creates an AdaDelta Optimizer
+   * Creates an AdaDelta Optimizer using {@link #DEFAULT_NAME} for the Optimizer name, {@link
+   * #RHO_DEFAULT} for the rho, and {@link #EPSILON_DEFAULT} for the epsilon.
    *
    * @param graph the TensorFlow Graph
    * @param learningRate the learning rate
@@ -89,43 +97,95 @@ public AdaDelta(Graph graph, float learningRate) {
   }
 
   /**
-   * Creates an AdaDelta Optimizer
+   * Creates an AdaDelta Optimizer using {@link #DEFAULT_NAME} for the Optimizer name, {@link
+   * #RHO_DEFAULT} for the rho, and {@link #EPSILON_DEFAULT} for the epsilon.
+   *
+   * @param graph the TensorFlow Graph
+   * @param learningRateOperand the learning rate Operand, this is used to calculate the learning
+   *     rate.
+   */
+  public AdaDelta(Graph graph, Operand<TFloat32> learningRateOperand) {
+    this(graph, learningRateOperand, RHO_DEFAULT, EPSILON_DEFAULT);
+  }
+
+  /**
+   * Creates an AdaDelta Optimizer {@link #DEFAULT_NAME} for the Optimizer name
    *
    * @param graph the TensorFlow Graph
    * @param learningRate the learning rate
    * @param rho The decay factor
    * @param epsilon A constant epsilon used to better conditioning the grad update
    */
   public AdaDelta(Graph graph, float learningRate, float rho, float epsilon) {
-    super(graph);
-    this.learningRate = learningRate;
-    this.rho = rho;
-    this.epsilon = epsilon;
+    this(graph, null, learningRate, rho, epsilon);
   }
 
   /**
    * Creates an AdaDelta Optimizer
    *
    * @param graph the TensorFlow Graph
-   * @param name the name for this Optimizer (defaults to 'Adadelta')
+   * @param learningRateOperand the learning rate Operand, this is used to calculate the learning
+   *     rate.
+   * @param rho The decay factor
+   * @param epsilon A constant epsilon used to better conditioning the grad update
+   */
+  public AdaDelta(Graph graph, Operand<TFloat32> learningRateOperand, float rho, float epsilon) {
+    this(graph, null, learningRateOperand, rho, epsilon);
+  }
+
+  /**
+   * Creates an AdaDelta Optimizer using {@link #RHO_DEFAULT} for the rho, and {@link *
+   * #EPSILON_DEFAULT} for the epsilon.
+   *
+   * @param graph the TensorFlow Graph
+   * @param name the name for this Optimizer.
    * @param learningRate the learning rate
    */
   public AdaDelta(Graph graph, String name, float learningRate) {
-    this(graph, name, learningRate, 0.95f, 1e-8f);
+    this(graph, name, learningRate, RHO_DEFAULT, EPSILON_DEFAULT);
+  }
+
+  /**
+   * Creates an AdaDelta Optimizer using {@link #RHO_DEFAULT} for the rho, and {@link *
+   * #EPSILON_DEFAULT} for the epsilon.
+   *
+   * @param graph the TensorFlow Graph
+   * @param name the name for this Optimizer.
+   * @param learningRateOperand the learning rate Operand, this is used to calculate the learning
+   *     rate.
+   */
+  public AdaDelta(Graph graph, String name, Operand<TFloat32> learningRateOperand) {
+    this(graph, name, learningRateOperand, RHO_DEFAULT, EPSILON_DEFAULT);
   }
 
   /**
    * Creates an AdaDelta Optimizer
    *
    * @param graph the TensorFlow Graph
-   * @param name the name for this Optimizer (defaults to 'Adadelta')
+   * @param name the name for this Optimizer.
    * @param learningRate the learning rate
    * @param rho The decay factor
    * @param epsilon A constant epsilon used to better conditioning the grad update
    */
   public AdaDelta(Graph graph, String name, float learningRate, float rho, float epsilon) {
-    super(graph, name);
-    this.learningRate = learningRate;
+    super(graph, name, learningRate);
+    this.rho = rho;
+    this.epsilon = epsilon;
+  }
+
+  /**
+   * Creates an AdaDelta Optimizer
+   *
+   * @param graph the TensorFlow Graph
+   * @param name the name for this Optimizer.
+   * @param learningRateOperand the learning rate Operand, this is used to calculate the learning
+   *     rate.
+   * @param rho The decay factor
+   * @param epsilon A constant epsilon used to better conditioning the grad update
+   */
+  public AdaDelta(
+      Graph graph, String name, Operand<TFloat32> learningRateOperand, float rho, float epsilon) {
+    super(graph, name, learningRateOperand);
     this.rho = rho;
     this.epsilon = epsilon;
   }
@@ -162,7 +222,7 @@ protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable
         variable,
         accumSlot,
         accumUpdateSlot,
-        tf.dtypes.cast(tf.constant(learningRate), gradient.dataType()),
+        tf.dtypes.cast(getLearningRateOperand(), gradient.dataType()),
         tf.dtypes.cast(tf.constant(rho), gradient.dataType()),
         tf.dtypes.cast(tf.constant(epsilon), gradient.dataType()),
         gradient);
@@ -184,6 +244,6 @@ public String toString() {
   /** {@inheritDoc} */
   @Override
   public String getOptimizerName() {
-    return "Adadelta";
+    return DEFAULT_NAME;
   }
 }
diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGrad.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/optimizers/AdaGrad.java
@@ -20,6 +20,7 @@
 import org.tensorflow.Output;
 import org.tensorflow.op.Op;
 import org.tensorflow.op.core.Variable;
+import org.tensorflow.types.TFloat32;
 import org.tensorflow.types.family.TType;
 
 import java.util.List;
@@ -40,16 +41,18 @@
  */
 public class AdaGrad extends Optimizer {
 
+  public static final String DEFAULT_NAME = "Adagrad";
+
   public static final String ACCUMULATOR = "accumulator";
   public static final float LEARNING_RATE_DEFAULT = 0.001f;
   public static final float INITIAL_ACCUMULATOR_DEFAULT = 0.01f;
 
-  private final float learningRate;
-
   private final float initialAccumulatorValue;
 
   /**
-   * Creates an AdaGrad Optimizer
+   * Creates an AdaGrad Optimizer using {@link #DEFAULT_NAME} for the Optimizer name, {@link
+   * #LEARNING_RATE_DEFAULT} for the learning rate, and {@link * #INITIAL_ACCUMULATOR_DEFAULT} for
+   * the initialAccumulatorValue.
    *
    * @param graph the TensorFlow Graph
    */
@@ -58,7 +61,8 @@ public AdaGrad(Graph graph) {
   }
 
   /**
-   * Creates an AdaGrad Optimizer
+   * Creates an AdaGrad Optimizer using using {@link #DEFAULT_NAME} for the Optimizer name, {@link *
+   * #INITIAL_ACCUMULATOR_DEFAULT} for the initialAccumulatorValue.
    *
    * @param graph the TensorFlow Graph
    * @param learningRate the learning rate
@@ -68,52 +72,108 @@ public AdaGrad(Graph graph, float learningRate) {
   }
 
   /**
-   * Creates an AdaGrad Optimizer
+   * Creates an AdaGrad Optimizer using using {@link #DEFAULT_NAME} for the Optimizer name, {@link *
+   * #INITIAL_ACCUMULATOR_DEFAULT} for the initialAccumulatorValue.
+   *
+   * @param graph the TensorFlow Graph
+   * @param learningRateOperand the learning rate Operand, this is used to calculate the learning
+   *     rate.
+   */
+  public AdaGrad(Graph graph, Operand<TFloat32> learningRateOperand) {
+    this(graph, learningRateOperand, INITIAL_ACCUMULATOR_DEFAULT);
+  }
+
+  /**
+   * Creates an AdaGrad Optimizer using {@link #DEFAULT_NAME} for the Optimizer name,
    *
    * @param graph the TensorFlow Graph
    * @param learningRate the learning rate
    * @param initialAccumulatorValue Starting value for the accumulators, must be non-negative.
    * @throws java.lang.IllegalArgumentException if initialAccumulatorValue is negative
    */
   public AdaGrad(Graph graph, float learningRate, float initialAccumulatorValue) {
-    super(graph);
-    if (initialAccumulatorValue < 0F) {
-      throw new IllegalArgumentException(
-          String.format(
-              "initialAccumulatorValue must be non-negative: %f", initialAccumulatorValue));
-    }
-    this.learningRate = learningRate;
-    this.initialAccumulatorValue = initialAccumulatorValue;
+    this(graph, null, learningRate, initialAccumulatorValue);
   }
 
   /**
-   * Creates an AdaGrad Optimizer
+   * Creates an AdaGrad Optimizer using {@link #DEFAULT_NAME} for the Optimizer name,
    *
    * @param graph the TensorFlow Graph
-   * @param name the name for this Optimizer (defaults to 'Adagrad')
+   * @param learningRateOperand the learning rate Operand, this is used to calculate the learning
+   *     rate.
+   * @param initialAccumulatorValue Starting value for the accumulators, must be non-negative.
+   * @throws java.lang.IllegalArgumentException if initialAccumulatorValue is negative
+   */
+  public AdaGrad(
+      Graph graph, Operand<TFloat32> learningRateOperand, float initialAccumulatorValue) {
+    this(graph, null, learningRateOperand, initialAccumulatorValue);
+  }
+
+  /**
+   * Creates an AdaGrad Optimizer using {@link #INITIAL_ACCUMULATOR_DEFAULT} for the
+   * initialAccumulatorValue.
+   *
+   * @param graph the TensorFlow Graph
+   * @param name the name for this Optimizer .
    * @param learningRate the learning rate
    */
   public AdaGrad(Graph graph, String name, float learningRate) {
-    this(graph, name, learningRate, 0.01f);
+    this(graph, name, learningRate, INITIAL_ACCUMULATOR_DEFAULT);
+  }
+
+  /**
+   * Creates an AdaGrad Optimizer using {@link #INITIAL_ACCUMULATOR_DEFAULT} for the
+   * initialAccumulatorValue.
+   *
+   * @param graph the TensorFlow Graph
+   * @param name the name for this Optimizer.
+   * @param learningRateOperand the learning rate Operand, this is used to calculate the learning
+   *     rate.
+   */
+  public AdaGrad(Graph graph, String name, Operand<TFloat32> learningRateOperand) {
+    this(graph, name, learningRateOperand, INITIAL_ACCUMULATOR_DEFAULT);
   }
 
   /**
    * Creates an AdaGrad Optimizer
    *
    * @param graph the TensorFlow Graph
-   * @param name the name for this Optimizer (defaults to 'Adagrad')
+   * @param name the name for this Optimizer
    * @param learningRate the learning rate
    * @param initialAccumulatorValue Starting value for the accumulators, must be non-negative.
    * @throws java.lang.IllegalArgumentException if initialAccumulatorValue is negative
    */
   public AdaGrad(Graph graph, String name, float learningRate, float initialAccumulatorValue) {
-    super(graph, name);
+    super(graph, name, learningRate);
+    if (initialAccumulatorValue < 0F) {
+      throw new IllegalArgumentException(
+          String.format(
+              "initialAccumulatorValue must be non-negative: %f", initialAccumulatorValue));
+    }
+    this.initialAccumulatorValue = initialAccumulatorValue;
+  }
+
+  /**
+   * Creates an AdaGrad Optimizer
+   *
+   * @param graph the TensorFlow Graph
+   * @param name the name for this Optimizer
+   * @param learningRateOperand the learning rate Operand, this is used to calculate the learning
+   *     rate.
+   * @param initialAccumulatorValue Starting value for the accumulators, must be non-negative.
+   * @throws java.lang.IllegalArgumentException if initialAccumulatorValue is negative
+   */
+  public AdaGrad(
+      Graph graph,
+      String name,
+      Operand<TFloat32> learningRateOperand,
+      float initialAccumulatorValue) {
+    super(graph, name, learningRateOperand);
     if (initialAccumulatorValue < 0F) {
       throw new IllegalArgumentException(
           String.format(
               "initialAccumulatorValue must be non-negative: %f", initialAccumulatorValue));
     }
-    this.learningRate = learningRate;
     this.initialAccumulatorValue = initialAccumulatorValue;
   }
 
@@ -142,7 +202,7 @@ private <T extends TType> void createAdaGradSlot(Output<T> v) {
   protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable) {
     Variable<T> slot = getSlot(variable, ACCUMULATOR).get();
     return tf.train.applyAdagrad(
-        variable, slot, tf.dtypes.cast(tf.constant(learningRate), gradient.dataType()), gradient);
+        variable, slot, tf.dtypes.cast(getLearningRateOperand(), gradient.dataType()), gradient);
   }
 
   /** {@inheritDoc} */
@@ -159,6 +219,6 @@ public String toString() {
   /** {@inheritDoc} */
   @Override
   public String getOptimizerName() {
-    return "Adagrad";
+    return DEFAULT_NAME;
   }
 }