Skip to content

Learning rate #106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.tensorflow.Output;
import org.tensorflow.op.Op;
import org.tensorflow.op.core.Variable;
import org.tensorflow.types.TFloat32;
import org.tensorflow.types.family.TType;

import java.util.List;
Expand Down Expand Up @@ -62,24 +63,31 @@
*/
public class AdaDelta extends Optimizer {

public static final String DEFAULT_NAME = "Adadelta";
public static final String ACCUMULATOR = "accum";
public static final String ACCUMULATOR_UPDATE = "accum_update";
public static final float LEARNING_RATE_DEFAULT = 0.001f;
public static final float RHO_DEFAULT = 0.95f;
public static final float EPSILON_DEFAULT = 1e-7f;

private final float learningRate;

private final float rho;

private final float epsilon;

/**
* Creates an AdaDelta Optimizer using {@link #DEFAULT_NAME} for the Optimizer name, {@link
* #LEARNING_RATE_DEFAULT} for the learningRate, {@link #RHO_DEFAULT} for the rho, and {@link
* #EPSILON_DEFAULT} for the epsilon.
*
* @param graph the TensorFlow graph.
*/
public AdaDelta(Graph graph) {
this(graph, LEARNING_RATE_DEFAULT, RHO_DEFAULT, EPSILON_DEFAULT);
}

/**
* Creates an AdaDelta Optimizer
* Creates an AdaDelta Optimizer using {@link #DEFAULT_NAME} for the Optimizer name, {@link
* #RHO_DEFAULT} for the rho, and {@link #EPSILON_DEFAULT} for the epsilon.
*
* @param graph the TensorFlow Graph
* @param learningRate the learning rate
Expand All @@ -89,43 +97,95 @@ public AdaDelta(Graph graph, float learningRate) {
}

/**
* Creates an AdaDelta Optimizer
* Creates an AdaDelta Optimizer using {@link #DEFAULT_NAME} for the Optimizer name, {@link
* #RHO_DEFAULT} for the rho, and {@link #EPSILON_DEFAULT} for the epsilon.
*
* @param graph the TensorFlow Graph
* @param learningRateOperand the learning rate Operand, this is used to calculate the learning
* rate.
*/
public AdaDelta(Graph graph, Operand<TFloat32> learningRateOperand) {
this(graph, learningRateOperand, RHO_DEFAULT, EPSILON_DEFAULT);
}

/**
* Creates an AdaDelta Optimizer {@link #DEFAULT_NAME} for the Optimizer name
*
* @param graph the TensorFlow Graph
* @param learningRate the learning rate
* @param rho The decay factor
* @param epsilon A constant epsilon used to better conditioning the grad update
*/
public AdaDelta(Graph graph, float learningRate, float rho, float epsilon) {
super(graph);
this.learningRate = learningRate;
this.rho = rho;
this.epsilon = epsilon;
this(graph, null, learningRate, rho, epsilon);
}

/**
* Creates an AdaDelta Optimizer
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer (defaults to 'Adadelta')
* @param learningRateOperand the learning rate Operand, this is used to calculate the learning
* rate.
* @param rho The decay factor
* @param epsilon A constant epsilon used to better conditioning the grad update
*/
public AdaDelta(Graph graph, Operand<TFloat32> learningRateOperand, float rho, float epsilon) {
this(graph, null, learningRateOperand, rho, epsilon);
}

/**
* Creates an AdaDelta Optimizer using {@link #RHO_DEFAULT} for the rho, and {@link *
* #EPSILON_DEFAULT} for the epsilon.
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer.
* @param learningRate the learning rate
*/
public AdaDelta(Graph graph, String name, float learningRate) {
this(graph, name, learningRate, 0.95f, 1e-8f);
this(graph, name, learningRate, RHO_DEFAULT, EPSILON_DEFAULT);
}

/**
* Creates an AdaDelta Optimizer using {@link #RHO_DEFAULT} for the rho, and {@link *
* #EPSILON_DEFAULT} for the epsilon.
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer.
* @param learningRateOperand the learning rate Operand, this is used to calculate the learning
* rate.
*/
public AdaDelta(Graph graph, String name, Operand<TFloat32> learningRateOperand) {
this(graph, name, learningRateOperand, RHO_DEFAULT, EPSILON_DEFAULT);
}

/**
* Creates an AdaDelta Optimizer
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer (defaults to 'Adadelta')
* @param name the name for this Optimizer.
* @param learningRate the learning rate
* @param rho The decay factor
* @param epsilon A constant epsilon used to better conditioning the grad update
*/
public AdaDelta(Graph graph, String name, float learningRate, float rho, float epsilon) {
super(graph, name);
this.learningRate = learningRate;
super(graph, name, learningRate);
this.rho = rho;
this.epsilon = epsilon;
}

/**
* Creates an AdaDelta Optimizer
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer.
* @param learningRateOperand the learning rate Operand, this is used to calculate the learning
* rate.
* @param rho The decay factor
* @param epsilon A constant epsilon used to better conditioning the grad update
*/
public AdaDelta(
Graph graph, String name, Operand<TFloat32> learningRateOperand, float rho, float epsilon) {
super(graph, name, learningRateOperand);
this.rho = rho;
this.epsilon = epsilon;
}
Expand Down Expand Up @@ -162,7 +222,7 @@ protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable
variable,
accumSlot,
accumUpdateSlot,
tf.dtypes.cast(tf.constant(learningRate), gradient.dataType()),
tf.dtypes.cast(getLearningRateOperand(), gradient.dataType()),
tf.dtypes.cast(tf.constant(rho), gradient.dataType()),
tf.dtypes.cast(tf.constant(epsilon), gradient.dataType()),
gradient);
Expand All @@ -184,6 +244,6 @@ public String toString() {
/** {@inheritDoc} */
@Override
public String getOptimizerName() {
return "Adadelta";
return DEFAULT_NAME;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.tensorflow.Output;
import org.tensorflow.op.Op;
import org.tensorflow.op.core.Variable;
import org.tensorflow.types.TFloat32;
import org.tensorflow.types.family.TType;

import java.util.List;
Expand All @@ -40,16 +41,18 @@
*/
public class AdaGrad extends Optimizer {

public static final String DEFAULT_NAME = "Adagrad";

public static final String ACCUMULATOR = "accumulator";
public static final float LEARNING_RATE_DEFAULT = 0.001f;
public static final float INITIAL_ACCUMULATOR_DEFAULT = 0.01f;

private final float learningRate;

private final float initialAccumulatorValue;

/**
* Creates an AdaGrad Optimizer
* Creates an AdaGrad Optimizer using {@link #DEFAULT_NAME} for the Optimizer name, {@link
* #LEARNING_RATE_DEFAULT} for the learning rate, and {@link * #INITIAL_ACCUMULATOR_DEFAULT} for
* the initialAccumulatorValue.
*
* @param graph the TensorFlow Graph
*/
Expand All @@ -58,7 +61,8 @@ public AdaGrad(Graph graph) {
}

/**
* Creates an AdaGrad Optimizer
* Creates an AdaGrad Optimizer using using {@link #DEFAULT_NAME} for the Optimizer name, {@link *
* #INITIAL_ACCUMULATOR_DEFAULT} for the initialAccumulatorValue.
*
* @param graph the TensorFlow Graph
* @param learningRate the learning rate
Expand All @@ -68,52 +72,108 @@ public AdaGrad(Graph graph, float learningRate) {
}

/**
* Creates an AdaGrad Optimizer
* Creates an AdaGrad Optimizer using using {@link #DEFAULT_NAME} for the Optimizer name, {@link *
* #INITIAL_ACCUMULATOR_DEFAULT} for the initialAccumulatorValue.
*
* @param graph the TensorFlow Graph
* @param learningRateOperand the learning rate Operand, this is used to calculate the learning
* rate.
*/
public AdaGrad(Graph graph, Operand<TFloat32> learningRateOperand) {
this(graph, learningRateOperand, INITIAL_ACCUMULATOR_DEFAULT);
}

/**
* Creates an AdaGrad Optimizer using {@link #DEFAULT_NAME} for the Optimizer name,
*
* @param graph the TensorFlow Graph
* @param learningRate the learning rate
* @param initialAccumulatorValue Starting value for the accumulators, must be non-negative.
* @throws java.lang.IllegalArgumentException if initialAccumulatorValue is negative
*/
public AdaGrad(Graph graph, float learningRate, float initialAccumulatorValue) {
super(graph);
if (initialAccumulatorValue < 0F) {
throw new IllegalArgumentException(
String.format(
"initialAccumulatorValue must be non-negative: %f", initialAccumulatorValue));
}
this.learningRate = learningRate;
this.initialAccumulatorValue = initialAccumulatorValue;
this(graph, null, learningRate, initialAccumulatorValue);
}

/**
* Creates an AdaGrad Optimizer
* Creates an AdaGrad Optimizer using {@link #DEFAULT_NAME} for the Optimizer name,
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer (defaults to 'Adagrad')
* @param learningRateOperand the learning rate Operand, this is used to calculate the learning
* rate.
* @param initialAccumulatorValue Starting value for the accumulators, must be non-negative.
* @throws java.lang.IllegalArgumentException if initialAccumulatorValue is negative
*/
public AdaGrad(
Graph graph, Operand<TFloat32> learningRateOperand, float initialAccumulatorValue) {
this(graph, null, learningRateOperand, initialAccumulatorValue);
}

/**
* Creates an AdaGrad Optimizer using {@link #INITIAL_ACCUMULATOR_DEFAULT} for the
* initialAccumulatorValue.
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer .
* @param learningRate the learning rate
*/
public AdaGrad(Graph graph, String name, float learningRate) {
this(graph, name, learningRate, 0.01f);
this(graph, name, learningRate, INITIAL_ACCUMULATOR_DEFAULT);
}

/**
* Creates an AdaGrad Optimizer using {@link #INITIAL_ACCUMULATOR_DEFAULT} for the
* initialAccumulatorValue.
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer.
* @param learningRateOperand the learning rate Operand, this is used to calculate the learning
* rate.
*/
public AdaGrad(Graph graph, String name, Operand<TFloat32> learningRateOperand) {
this(graph, name, learningRateOperand, INITIAL_ACCUMULATOR_DEFAULT);
}

/**
* Creates an AdaGrad Optimizer
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer (defaults to 'Adagrad')
* @param name the name for this Optimizer
* @param learningRate the learning rate
* @param initialAccumulatorValue Starting value for the accumulators, must be non-negative.
* @throws java.lang.IllegalArgumentException if initialAccumulatorValue is negative
*/
public AdaGrad(Graph graph, String name, float learningRate, float initialAccumulatorValue) {
super(graph, name);
super(graph, name, learningRate);
if (initialAccumulatorValue < 0F) {
throw new IllegalArgumentException(
String.format(
"initialAccumulatorValue must be non-negative: %f", initialAccumulatorValue));
}
this.initialAccumulatorValue = initialAccumulatorValue;
}

/**
* Creates an AdaGrad Optimizer
*
* @param graph the TensorFlow Graph
* @param name the name for this Optimizer
* @param learningRateOperand the learning rate Operand, this is used to calculate the learning
* rate.
* @param initialAccumulatorValue Starting value for the accumulators, must be non-negative.
* @throws java.lang.IllegalArgumentException if initialAccumulatorValue is negative
*/
public AdaGrad(
Graph graph,
String name,
Operand<TFloat32> learningRateOperand,
float initialAccumulatorValue) {
super(graph, name, learningRateOperand);
if (initialAccumulatorValue < 0F) {
throw new IllegalArgumentException(
String.format(
"initialAccumulatorValue must be non-negative: %f", initialAccumulatorValue));
}
this.learningRate = learningRate;
this.initialAccumulatorValue = initialAccumulatorValue;
}

Expand Down Expand Up @@ -142,7 +202,7 @@ private <T extends TType> void createAdaGradSlot(Output<T> v) {
protected <T extends TType> Op applyDense(Output<T> gradient, Output<T> variable) {
Variable<T> slot = getSlot(variable, ACCUMULATOR).get();
return tf.train.applyAdagrad(
variable, slot, tf.dtypes.cast(tf.constant(learningRate), gradient.dataType()), gradient);
variable, slot, tf.dtypes.cast(getLearningRateOperand(), gradient.dataType()), gradient);
}

/** {@inheritDoc} */
Expand All @@ -159,6 +219,6 @@ public String toString() {
/** {@inheritDoc} */
@Override
public String getOptimizerName() {
return "Adagrad";
return DEFAULT_NAME;
}
}
Loading