Skip to content

Commit e81878e

Browse files
ZhuBaohefacebook-github-bot
authored andcommitted
Correct padding and activations docstrings in nn module
Summary: Pull Request resolved: pytorch#17197 Differential Revision: D14131284 Pulled By: soumith fbshipit-source-id: 6edd225b47b1dde81b5ad0a23c588c6621987a69
1 parent f2f4030 commit e81878e

File tree

4 files changed

+49
-41
lines changed

4 files changed

+49
-41
lines changed

torch/nn/functional.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -953,16 +953,16 @@ def glu(input, dim=-1):
953953
The gated linear unit. Computes:
954954
955955
.. math ::
956+
\text{GLU}(a, b) = a \otimes \sigma(b)
956957
957-
H = A \times \sigma(B)
958-
959-
where `input` is split in half along `dim` to form `A` and `B`.
958+
where `input` is split in half along `dim` to form `a` and `b`, :math:`\sigma`
959+
is the sigmoid function and :math:`\otimes` is the element-wise product between matrices.
960960
961961
See `Language Modeling with Gated Convolutional Networks <https://arxiv.org/abs/1612.08083>`_.
962962
963963
Args:
964964
input (Tensor): input tensor
965-
dim (int): dimension on which to split the input
965+
dim (int): dimension on which to split the input. Default: -1
966966
"""
967967
if input.dim() == 0:
968968
raise RuntimeError("glu does not suppport scalars because halving size must be even")
@@ -1139,7 +1139,7 @@ def rrelu(input, lower=1. / 8, upper=1. / 3, training=False, inplace=False):
11391139
logsigmoid = _add_docstr(torch._C._nn.log_sigmoid, r"""
11401140
logsigmoid(input) -> Tensor
11411141
1142-
Applies element-wise :math:`\text{LogSigmoid}(x) = \log \left(\frac{1}{1 + \exp(-x_i)}\right)`
1142+
Applies element-wise :math:`\text{LogSigmoid}(x_i) = \log \left(\frac{1}{1 + \exp(-x_i)}\right)`
11431143
11441144
See :class:`~torch.nn.LogSigmoid` for more details.
11451145
""")
@@ -1211,8 +1211,8 @@ def softmin(input, dim=None, _stacklevel=3, dtype=None):
12111211
dim (int): A dimension along which softmin will be computed (so every slice
12121212
along dim will sum to 1).
12131213
dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
1214-
If specified, the input tensor is casted to :attr:`dtype` before the operation
1215-
is performed. This is useful for preventing data type overflows. Default: None.
1214+
If specified, the input tensor is casted to :attr:`dtype` before the operation
1215+
is performed. This is useful for preventing data type overflows. Default: None.
12161216
"""
12171217
if dim is None:
12181218
dim = _get_softmax_dim('softmin', input.dim(), _stacklevel)
@@ -1233,17 +1233,16 @@ def softmax(input, dim=None, _stacklevel=3, dtype=None):
12331233
:math:`\text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)}`
12341234
12351235
It is applied to all slices along dim, and will re-scale them so that the elements
1236-
lie in the range `(0, 1)` and sum to 1.
1236+
lie in the range `[0, 1]` and sum to 1.
12371237
12381238
See :class:`~torch.nn.Softmax` for more details.
12391239
12401240
Arguments:
12411241
input (Tensor): input
12421242
dim (int): A dimension along which softmax will be computed.
12431243
dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
1244-
If specified, the input tensor is casted to :attr:`dtype` before the operation
1245-
is performed. This is useful for preventing data type overflows. Default: None.
1246-
1244+
If specified, the input tensor is casted to :attr:`dtype` before the operation
1245+
is performed. This is useful for preventing data type overflows. Default: None.
12471246
12481247
.. note::
12491248
This function doesn't work directly with NLLLoss,
@@ -1335,8 +1334,8 @@ def log_softmax(input, dim=None, _stacklevel=3, dtype=None):
13351334
input (Tensor): input
13361335
dim (int): A dimension along which log_softmax will be computed.
13371336
dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
1338-
If specified, the input tensor is casted to :attr:`dtype` before the operation
1339-
is performed. This is useful for preventing data type overflows. Default: None.
1337+
If specified, the input tensor is casted to :attr:`dtype` before the operation
1338+
is performed. This is useful for preventing data type overflows. Default: None.
13401339
"""
13411340
if dim is None:
13421341
dim = _get_softmax_dim('log_softmax', input.dim(), _stacklevel)

torch/nn/modules/activation.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
@weak_module
1111
class Threshold(Module):
12-
r"""Thresholds each element of the input Tensor
12+
r"""Thresholds each element of the input Tensor.
1313
1414
Threshold is defined as:
1515
@@ -58,10 +58,9 @@ def extra_repr(self):
5858

5959
@weak_module
6060
class ReLU(Threshold):
61-
r"""Applies the rectified linear unit function element-wise
62-
:math:`\text{ReLU}(x)= \max(0, x)`
61+
r"""Applies the rectified linear unit function element-wise:
6362
64-
.. image:: scripts/activation_images/ReLU.png
63+
:math:`\text{ReLU}(x)= \max(0, x)`
6564
6665
Args:
6766
inplace: can optionally do the operation in-place. Default: ``False``
@@ -71,6 +70,8 @@ class ReLU(Threshold):
7170
dimensions
7271
- Output: :math:`(N, *)`, same shape as the input
7372
73+
.. image:: scripts/activation_images/ReLU.png
74+
7475
Examples::
7576
7677
>>> m = nn.ReLU()
@@ -166,8 +167,6 @@ class Hardtanh(Module):
166167
The range of the linear region :math:`[-1, 1]` can be adjusted using
167168
:attr:`min_val` and :attr:`max_val`.
168169
169-
.. image:: scripts/activation_images/Hardtanh.png
170-
171170
Args:
172171
min_val: minimum value of the linear region range. Default: -1
173172
max_val: maximum value of the linear region range. Default: 1
@@ -181,6 +180,8 @@ class Hardtanh(Module):
181180
dimensions
182181
- Output: :math:`(N, *)`, same shape as the input
183182
183+
.. image:: scripts/activation_images/Hardtanh.png
184+
184185
Examples::
185186
186187
>>> m = nn.Hardtanh(-2, 2)
@@ -394,8 +395,6 @@ class SELU(Module):
394395
with :math:`\alpha = 1.6732632423543772848170429916717` and
395396
:math:`\text{scale} = 1.0507009873554804934193349852946`.
396397
397-
.. image:: scripts/activation_images/SELU.png
398-
399398
More details can be found in the paper `Self-Normalizing Neural Networks`_ .
400399
401400
Args:
@@ -406,6 +405,8 @@ class SELU(Module):
406405
dimensions
407406
- Output: :math:`(N, *)`, same shape as the input
408407
408+
.. image:: scripts/activation_images/SELU.png
409+
409410
Examples::
410411
411412
>>> m = nn.SELU()
@@ -433,7 +434,7 @@ def extra_repr(self):
433434
class GLU(Module):
434435
r"""Applies the gated linear unit function
435436
:math:`{GLU}(a, b)= a \otimes \sigma(b)` where :math:`a` is the first half
436-
of the input vector and :math:`b` is the second half.
437+
of the input matrices and :math:`b` is the second half.
437438
438439
Args:
439440
dim (int): the dimension on which to split the input. Default: -1
@@ -708,8 +709,7 @@ class PReLU(Module):
708709
- Output: :math:`(N, *)`, same shape as the input
709710
710711
Attributes:
711-
weight (Tensor): the learnable weights of shape (attr:`num_parameters`).
712-
The attr:`dtype` is default to
712+
weight (Tensor): the learnable weights of shape (:attr:`num_parameters`).
713713
714714
.. image:: scripts/activation_images/PReLU.png
715715
@@ -789,7 +789,9 @@ def forward(self, input):
789789
class Softmin(Module):
790790
r"""Applies the Softmin function to an n-dimensional input Tensor
791791
rescaling them so that the elements of the n-dimensional output Tensor
792-
lie in the range `(0, 1)` and sum to 1
792+
lie in the range `[0, 1]` and sum to 1.
793+
794+
Softmin is defined as:
793795
794796
.. math::
795797
\text{Softmin}(x_{i}) = \frac{\exp(-x_i)}{\sum_j \exp(-x_j)}
@@ -828,7 +830,7 @@ def forward(self, input):
828830
class Softmax(Module):
829831
r"""Applies the Softmax function to an n-dimensional input Tensor
830832
rescaling them so that the elements of the n-dimensional output Tensor
831-
lie in the range (0,1) and sum to 1
833+
lie in the range [0,1] and sum to 1.
832834
833835
Softmax is defined as:
834836
@@ -918,8 +920,7 @@ class LogSoftmax(Module):
918920
- Output: :math:`(*)`, same shape as the input
919921
920922
Arguments:
921-
dim (int): A dimension along which Softmax will be computed (so every slice
922-
along dim will sum to 1).
923+
dim (int): A dimension along which LogSoftmax will be computed.
923924
924925
Returns:
925926
a Tensor of the same dimension and shape as the input with

torch/nn/modules/adaptive.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class AdaptiveLogSoftmaxWithLoss(Module):
4545
assigned to the first cluster, and targets `101, 102, ..., 1000` will be
4646
assigned to the second cluster, while targets
4747
`1001, 1002, ..., n_classes - 1` will be assigned
48-
to the last, third cluster
48+
to the last, third cluster.
4949
5050
* :attr:`div_value` is used to compute the size of each additional cluster,
5151
which is given as
@@ -74,10 +74,12 @@ class AdaptiveLogSoftmaxWithLoss(Module):
7474
7575
Args:
7676
in_features (int): Number of features in the input tensor
77-
n_classes (int): Number of classes in the dataset.
78-
cutoffs (Sequence): Cutoffs used to assign targets to their buckets.
77+
n_classes (int): Number of classes in the dataset
78+
cutoffs (Sequence): Cutoffs used to assign targets to their buckets
7979
div_value (float, optional): value used as an exponent to compute sizes
8080
of the clusters. Default: 4.0
81+
head_bias (bool, optional): If ``True``, adds a bias term to the ‘head’ of the
82+
adaptive softmax. Default: ``False``
8183
8284
Returns:
8385
``NamedTuple`` with ``output`` and ``loss`` fields:

torch/nn/modules/padding.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class ConstantPad1d(_ConstantPadNd):
3737
Shape:
3838
- Input: :math:`(N, C, W_{in})`
3939
- Output: :math:`(N, C, W_{out})` where
40+
4041
:math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`
4142
4243
Examples::
@@ -86,7 +87,9 @@ class ConstantPad2d(_ConstantPadNd):
8687
Shape:
8788
- Input: :math:`(N, C, H_{in}, W_{in})`
8889
- Output: :math:`(N, C, H_{out}, W_{out})` where
90+
8991
:math:`H_{out} = H_{in} + \text{padding\_top} + \text{padding\_bottom}`
92+
9093
:math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`
9194
9295
Examples::
@@ -97,13 +100,6 @@ class ConstantPad2d(_ConstantPadNd):
97100
tensor([[[ 1.6585, 0.4320],
98101
[-0.8701, -0.4649]]])
99102
>>> m(input)
100-
tensor([[[ 3.5000, 3.5000, 3.5000, 3.5000, 3.5000, 3.5000],
101-
[ 3.5000, 3.5000, 3.5000, 3.5000, 3.5000, 3.5000],
102-
[ 3.5000, 3.5000, 1.6585, 0.4320, 3.5000, 3.5000],
103-
[ 3.5000, 3.5000, -0.8701, -0.4649, 3.5000, 3.5000],
104-
[ 3.5000, 3.5000, 3.5000, 3.5000, 3.5000, 3.5000],
105-
[ 3.5000, 3.5000, 3.5000, 3.5000, 3.5000, 3.5000]]])
106-
>>> m(input)
107103
tensor([[[ 3.5000, 3.5000, 3.5000, 3.5000, 3.5000, 3.5000],
108104
[ 3.5000, 3.5000, 3.5000, 3.5000, 3.5000, 3.5000],
109105
[ 3.5000, 3.5000, 1.6585, 0.4320, 3.5000, 3.5000],
@@ -143,8 +139,11 @@ class ConstantPad3d(_ConstantPadNd):
143139
Shape:
144140
- Input: :math:`(N, C, D_{in}, H_{in}, W_{in})`
145141
- Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` where
142+
146143
:math:`D_{out} = D_{in} + \text{padding\_front} + \text{padding\_back}`
144+
147145
:math:`H_{out} = H_{in} + \text{padding\_top} + \text{padding\_bottom}`
146+
148147
:math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`
149148
150149
Examples::
@@ -189,6 +188,7 @@ class ReflectionPad1d(_ReflectionPadNd):
189188
Shape:
190189
- Input: :math:`(N, C, W_{in})`
191190
- Output: :math:`(N, C, W_{out})` where
191+
192192
:math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`
193193
194194
Examples::
@@ -199,9 +199,6 @@ class ReflectionPad1d(_ReflectionPadNd):
199199
tensor([[[0., 1., 2., 3.],
200200
[4., 5., 6., 7.]]])
201201
>>> m(input)
202-
tensor([[[2., 1., 0., 1., 2., 3., 2., 1.],
203-
[6., 5., 4., 5., 6., 7., 6., 5.]]])
204-
>>> m(input)
205202
tensor([[[2., 1., 0., 1., 2., 3., 2., 1.],
206203
[6., 5., 4., 5., 6., 7., 6., 5.]]])
207204
>>> # using different paddings for different sides
@@ -233,6 +230,7 @@ class ReflectionPad2d(_ReflectionPadNd):
233230
- Output: :math:`(N, C, H_{out}, W_{out})` where
234231
235232
:math:`H_{out} = H_{in} + \text{padding\_top} + \text{padding\_bottom}`
233+
236234
:math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`
237235
238236
Examples::
@@ -293,6 +291,7 @@ class ReplicationPad1d(_ReplicationPadNd):
293291
Shape:
294292
- Input: :math:`(N, C, W_{in})`
295293
- Output: :math:`(N, C, W_{out})` where
294+
296295
:math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`
297296
298297
Examples::
@@ -332,7 +331,9 @@ class ReplicationPad2d(_ReplicationPadNd):
332331
Shape:
333332
- Input: :math:`(N, C, H_{in}, W_{in})`
334333
- Output: :math:`(N, C, H_{out}, W_{out})` where
334+
335335
:math:`H_{out} = H_{in} + \text{padding\_top} + \text{padding\_bottom}`
336+
336337
:math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`
337338
338339
Examples::
@@ -383,8 +384,11 @@ class ReplicationPad3d(_ReplicationPadNd):
383384
Shape:
384385
- Input: :math:`(N, C, D_{in}, H_{in}, W_{in})`
385386
- Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` where
387+
386388
:math:`D_{out} = D_{in} + \text{padding\_front} + \text{padding\_back}`
389+
387390
:math:`H_{out} = H_{in} + \text{padding\_top} + \text{padding\_bottom}`
391+
388392
:math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`
389393
390394
Examples::
@@ -417,7 +421,9 @@ class ZeroPad2d(ConstantPad2d):
417421
Shape:
418422
- Input: :math:`(N, C, H_{in}, W_{in})`
419423
- Output: :math:`(N, C, H_{out}, W_{out})` where
424+
420425
:math:`H_{out} = H_{in} + \text{padding\_top} + \text{padding\_bottom}`
426+
421427
:math:`W_{out} = W_{in} + \text{padding\_left} + \text{padding\_right}`
422428
423429
Examples::

0 commit comments

Comments
 (0)