83
83
import org .tensorflow .op .nn .Relu ;
84
84
import org .tensorflow .op .nn .Relu6 ;
85
85
import org .tensorflow .op .nn .Selu ;
86
- import org .tensorflow .op .nn .SigmoidCrossEntropyWithLogits ;
87
86
import org .tensorflow .op .nn .Softmax ;
88
87
import org .tensorflow .op .nn .SoftmaxCrossEntropyWithLogits ;
89
88
import org .tensorflow .op .nn .Softsign ;
103
102
* @see {@link Ops}
104
103
*/
105
104
public final class NnOps {
106
- public final NnRawOps raw ;
107
-
108
105
private final Scope scope ;
109
106
110
107
private final Ops ops ;
111
108
112
109
NnOps (Ops ops ) {
113
110
this .scope = ops .scope ();
114
111
this .ops = ops ;
115
- raw = new NnRawOps (ops );
116
112
}
117
113
118
114
/**
@@ -1815,55 +1811,6 @@ public <T extends TNumber> Selu<T> selu(Operand<T> features) {
1815
1811
return Selu .create (scope , features );
1816
1812
}
1817
1813
1818
- /**
1819
- * Computes sigmoid cross entropy given <code>logits</code>.
1820
- *
1821
- * <p>Measures the probability error in discrete classification tasks in which each class is
1822
- * independent and not mutually exclusive. For instance, one could perform multilabel
1823
- * classification where a picture can contain both an elephant and a dog at the same time.
1824
- *
1825
- * <p>For brevity, let <code>x = logits</code>, <code>z = labels</code>. The logistic loss in
1826
- * pseudo-code is
1827
- *
1828
- * <pre>
1829
- * z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
1830
- * = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
1831
- * = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
1832
- * = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
1833
- * = (1 - z) * x + log(1 + exp(-x))
1834
- * = x - x * z + log(1 + exp(-x))
1835
- * </pre>
1836
- *
1837
- * <p>For <code>x < 0</code>, to avoid overflow in <code>exp(-x)</code>, we reformulate the above
1838
- *
1839
- * <pre>
1840
- * x - x * z + log(1 + exp(-x))
1841
- * = log(exp(x)) - x * z + log(1 + exp(-x))
1842
- * = - x * z + log(1 + exp(x))
1843
- * </pre>
1844
- *
1845
- * <p>Hence, to ensure stability and avoid overflow, the implementation uses this equivalent
1846
- * formulation
1847
- *
1848
- * <pre>
1849
- * max(x, 0) - x * z + log(1 + exp(-abs(x)))
1850
- * </pre>
1851
- *
1852
- * <p></ode>logits</code> and <code>labels</code> must have the same type and shape.
1853
- *
1854
- * <p>
1855
- *
1856
- * @param labels the labels
1857
- * @param logits the logits of type float32 or float64
1858
- * @param <T> the type of labels and logits
1859
- * @return the component-wise logistic losses.
1860
- * @throws IllegalArgumentException if logits' and labels' do not have the same shape
1861
- */
1862
- public <T extends TNumber > Operand <T > sigmoidCrossEntropyWithLogits (Operand <T > labels ,
1863
- Operand <T > logits ) {
1864
- return SigmoidCrossEntropyWithLogits .sigmoidCrossEntropyWithLogits (scope , labels , logits );
1865
- }
1866
-
1867
1814
/**
1868
1815
* Computes softmax activations.
1869
1816
* For each batch {@code i} and class {@code j} we have
@@ -1881,53 +1828,20 @@ public <T extends TNumber> Softmax<T> softmax(Operand<T> logits) {
1881
1828
}
1882
1829
1883
1830
/**
1884
- * Computes softmax cross entropy between <code>logits</code> and <code>labels</code>.
1885
- *
1886
- * <p>Measures the probability error in discrete classification tasks in which the classes are
1887
- * mutually exclusive (each entry is in exactly one class). For example, each CIFAR-10 image is
1888
- * labeled with one and only one label: an image can be a dog or a truck, but not both.
1889
- *
1890
- * <p><b>NOTE:</b>
1891
- *
1892
- * <p>While the classes are mutually exclusive, their probabilities need not be. All that is
1893
- * required is that each row of <code>labels</code> is a valid probability distribution. If they
1894
- * are not, the computation of the gradient will be incorrect.
1895
- *
1896
- * <p>If using exclusive <code>labels</code> (wherein one and only one class is true at a time),
1897
- * see {@link org.tensorflow.op.NnOps#sparseSoftmaxCrossEntropyWithLogits}
1898
- *
1899
- * <p>Usage:
1900
- *
1901
- * <pre>
1902
- * Operand<TFloat32> logits =
1903
- * tf.constant(new float[][] {{4.0F, 2.0F, 1.0F}, {0.0F, 5.0F, 1.0F}} );
1904
- * Operand<TFloat32> labels =
1905
- * tf.constant(new float[][] {{1.0F, 0.0F, 0.0F}, {0.0F, 0.8F, 0.2F}} );
1906
- * Operand<TFloat32> output =
1907
- * tf.nn.softmaxCrossEntropyWithLogits(labels, logits, -1);
1908
- * // output Shape = [2]
1909
- * // dataType = FLOAT (1)
1910
- * // values { 0.169846, 0.824745 }
1911
- * </pre>
1912
- *
1913
- * <p>Backpropagation will happen into both <code>logits</code> and <code>labels</code>. To
1914
- * disallow backpropagation into <code>labels</code>, pass label tensors through <code>
1915
- * tf.stopGradient</code> before feeding it to this function.
1831
+ * Computes softmax cross entropy cost and gradients to backpropagate.
1832
+ * Inputs are the logits, not probabilities.
1916
1833
*
1917
- * @param labels Each vector along the class dimension should hold a valid probability
1918
- * distribution e.g. for the case in which labels are of shape <code>[batch_size, num_classes]
1919
- * </code>, each row of <code>labels[i]</code> must be a valid probability distribution.
1920
- * @param logits Per-label activations, typically a linear output. These activation energies are
1921
- * interpreted as unnormalized log probabilities.
1922
- * @param axis The class dimension. -1 is the last dimension.
1923
- * @param <T> the number type of the operands
1924
- * @return the softmax cross entropy loss. Its type is the same as <code>logits</code> and its
1925
- * shape is the same as <code>labels</code> except that it does not have the last dimension of
1926
- * <code>labels</code>.
1834
+ * @param <T> data type for {@code loss} output
1835
+ * @param features batch_size x num_classes matrix
1836
+ * @param labels batch_size x num_classes matrix
1837
+ * The caller must ensure that each batch of labels represents a valid
1838
+ * probability distribution.
1839
+ * @param <T> data type for {@code SoftmaxCrossEntropyWithLogits} output and operands
1840
+ * @return a new instance of SoftmaxCrossEntropyWithLogits
1927
1841
*/
1928
- public <T extends TNumber , U extends TNumber > Operand <T > softmaxCrossEntropyWithLogits (
1929
- Operand <U > labels , Operand <T > logits , int axis ) {
1930
- return SoftmaxCrossEntropyWithLogits .softmaxCrossEntropyWithLogits (scope , labels , logits , axis );
1842
+ public <T extends TNumber > SoftmaxCrossEntropyWithLogits <T > softmaxCrossEntropyWithLogits (
1843
+ Operand <T > features , Operand <T > labels ) {
1844
+ return SoftmaxCrossEntropyWithLogits .create (scope , features , labels );
1931
1845
}
1932
1846
1933
1847
/**
@@ -2114,50 +2028,23 @@ public <T extends TType> SpaceToDepth<T> spaceToDepth(Operand<T> input, Long blo
2114
2028
}
2115
2029
2116
2030
/**
2117
- * Computes sparse softmax cross entropy between <code>logits</code> and <code>labels</code>.
2118
- *
2119
- * <p>Measures the probability error in discrete classification tasks in which the classes are
2120
- * mutually exclusive (each entry is in exactly one class). For example, each CIFAR-10 image is
2121
- * labeled with one and only one label: an image can be a dog or a truck, but not both.
2122
- *
2123
- * <p><b>NOTE:</b>
2124
- *
2125
- * <p>For this operation, the probability of a given label is considered exclusive. That is, soft
2126
- * classes are not allowed, and the <code>labels</code> vector must provide a single specific
2127
- * index for the true class for each row of <code>logits</code> (each minibatch entry). For soft
2128
- * softmax classification with a probability distribution for each entry, {@link
2129
- * org.tensorflow.op.NnOps#softmaxCrossEntropyWithLogits}.
2130
- *
2131
- * <p><b>WARNING:</b>
2031
+ * Computes softmax cross entropy cost and gradients to backpropagate.
2032
+ * Unlike {@code SoftmaxCrossEntropyWithLogits}, this operation does not accept
2033
+ * a matrix of label probabilities, but rather a single label per row
2034
+ * of features. This label is considered to have probability 1.0 for the
2035
+ * given row.
2036
+ * <p>Inputs are the logits, not probabilities.
2132
2037
*
2133
- * <p>This op expects unscaled logits, since it performs a <code>softmax</code> on <code>logits
2134
- * </code> internally for efficiency. Do not call this op with the output of <code>softmax</code>,
2135
- * as it will produce incorrect results.
2136
- *
2137
- * <p>A common use case is to have logits of shape <code>[batchSize, numClasses]</code> and have
2138
- * labels of shape <code>[batchSize]</code>, but higher dimensions are supported, in which case
2139
- * the <code>dim</code>-th dimension is assumed to be of size <code>numClasses</code>. <code>
2140
- * logits</code> must have the <cod>dataType</cod> of <code>TFloat16</code>, <code>TFloat32</code>
2141
- * , or <code>TFloat64</code>, and <code>labels</code> must have the dtype of <code>TInt32</code>
2142
- * or <code>TInt64</code>.
2143
- *
2144
- * @param labels <code>Tensor</code> of shape <code>[d_0, d_1, ..., d_{r-1}]</code> (where <code>r
2145
- * </code> is rank of <code>labels</code> and result) and the dataType is <code>TInt32</code>
2146
- * or <code>TInt64</code>. Each entry in <code>labels</code> must be an index in <code>[0,
2147
- * numClasses)</code>. Other values will raise an exception when this op is run on CPU, and
2148
- * return <code>NaN</code> for corresponding loss and gradient rows on GPU.
2149
- * @param logits Per-label activations (typically a linear output) of shape <code>[d_0, d_1, ...,
2150
- * d_{r-1}, numClasses]</code> and dataType of <code>TFloat16</code>, <code>TFloat32</code>,
2151
- * or <code>TFloat64</code>. These activation energies are interpreted as unnormalized log
2152
- * probabilities.
2153
- * @return A <code>Tensor</code> of the same shape as <code>labels</code> and of the same type as
2154
- * <code>logits</code> with the softmax cross entropy loss.
2155
- * @throws IllegalArgumentException If logits are scalars (need to have rank >= 1) or if the rank
2156
- * of the labels is not equal to the rank of the logits minus one.
2157
- */
2158
- public <T extends TNumber , U extends TNumber > Operand sparseSoftmaxCrossEntropyWithLogits (
2159
- Operand <T > labels , Operand <U > logits ) {
2160
- return SparseSoftmaxCrossEntropyWithLogits .sparseSoftmaxCrossEntropyWithLogits (scope , labels , logits );
2038
+ * @param <T> data type for {@code loss} output
2039
+ * @param features batch_size x num_classes matrix
2040
+ * @param labels batch_size vector with values in [0, num_classes).
2041
+ * This is the label for the given minibatch entry.
2042
+ * @param <T> data type for {@code SparseSoftmaxCrossEntropyWithLogits} output and operands
2043
+ * @return a new instance of SparseSoftmaxCrossEntropyWithLogits
2044
+ */
2045
+ public <T extends TNumber > SparseSoftmaxCrossEntropyWithLogits <T > sparseSoftmaxCrossEntropyWithLogits (
2046
+ Operand <T > features , Operand <? extends TNumber > labels ) {
2047
+ return SparseSoftmaxCrossEntropyWithLogits .create (scope , features , labels );
2161
2048
}
2162
2049
2163
2050
/**
0 commit comments