Skip to content

Commit 2c5e26c

Browse files
author
Thibault de Boissiere
committed
Add SELU
1 parent 6292a42 commit 2c5e26c

9 files changed

+490
-0
lines changed

SELU/README.md

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Experiments with MNIST and SELU
2+
3+
Pytorch implementation of some experiments from [Self-Normalizing Networks](https://arxiv.org/pdf/1706.02515.pdf)
4+
5+
## Dependencies
6+
7+
- python (tested on Anaconda python 3.6.1)
8+
- pytorch (tested on 0.1.12_2)
9+
- sklearn (tested on 0.18.1)
10+
- matplotlib (tested on 2.0.1)
11+
- tqdm
12+
- numpy
13+
14+
15+
## Uage
16+
17+
Main command:
18+
19+
python main.py
20+
21+
Arguments:
22+
23+
--model MODEL Model name, RELUNet or SELUNet
24+
--n_inner_layers N_INNER_LAYERS
25+
Number of inner hidden layers
26+
--hidden_dim HIDDEN_DIM
27+
Hidden layer dimension
28+
--dropout DROPOUT Dropout rate
29+
--use_cuda Use CUDA
30+
--nb_epoch NB_EPOCH Number of training epochs
31+
--batchnorm Whether to use BN for RELUNet
32+
--batch_size BATCH_SIZE
33+
Batch size
34+
--optimizer OPTIMIZER
35+
Optimizer
36+
--learning_rate LEARNING_RATE
37+
Learning rate
38+
39+
40+
## Run a batch of experiments
41+
42+
Modify `run_experiments.sh` as needed then run
43+
44+
bash run_experiments.sh
45+
46+
47+
## Plot results
48+
49+
Run a few experiments. Results are saved in a `results` folder.
50+
Modify `plot_results.py` to select your experiments, then run:
51+
52+
53+
python plot_results.py
54+
55+
56+
## Notes
57+
58+
- The architecture of the NN is the same as in the original paper.
59+
- We plot the loss curves to give some more perspective.
60+
- Initially had a hard time reproducing results. Inspection of loss curves show you just have to train longer until Soboleb loss and MSE loss have similar magnitude. Or increase the weight on the Sobolev loss.

SELU/figures/SELU_LR_1E-2.png

89.8 KB
Loading

SELU/figures/SELU_LR_1E-3.png

82.2 KB
Loading

SELU/figures/SELU_LR_1E-5.png

86.5 KB
Loading

SELU/main.py

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from __future__ import print_function
2+
import os
3+
import argparse
4+
import torchvision.datasets as dset
5+
import train
6+
7+
# Training settings
8+
parser = argparse.ArgumentParser(description='MNIST SELU experiments')
9+
10+
# Neural Net archi
11+
parser.add_argument('--model', default="RELUNet", type=str, help="Model name, RELUNet or SELUNet")
12+
parser.add_argument('--n_inner_layers', default=4, type=int, help="Number of inner hidden layers")
13+
parser.add_argument('--hidden_dim', default=-1, type=int, help="Hidden layer dimension")
14+
parser.add_argument('--dropout', default=0, type=float, help="Dropout rate")
15+
# Training params
16+
parser.add_argument('--use_cuda', action="store_true", help="Use CUDA")
17+
parser.add_argument('--nb_epoch', default=100, type=int, help="Number of training epochs")
18+
parser.add_argument('--batchnorm', action="store_true", help="Whether to use BN for RELUNet")
19+
parser.add_argument('--batch_size', default=128, type=int, help='Batch size')
20+
parser.add_argument('--optimizer', default="SGD", type=str, help="Optimizer")
21+
parser.add_argument('--learning_rate', default=1E-5, type=float, help="Learning rate")
22+
args = parser.parse_args()
23+
24+
25+
assert args.model in ["RELUNet", "SELUNet"]
26+
27+
# Download mnist if it does not exist
28+
if not os.path.isfile("processed/training.pt"):
29+
dset.MNIST(root=".", download=True)
30+
31+
if not os.path.exists("results"):
32+
os.makedirs("results")
33+
34+
# Launch training
35+
train.train(args)

SELU/models.py

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import torch.nn as nn
2+
import torch.nn.functional as F
3+
import torch.nn.init as init
4+
from torch.autograd import Variable
5+
import numpy as np
6+
7+
8+
# network
9+
class RELUNet(nn.Module):
10+
def __init__(self, n_inner_layers, input_dim, hidden_dim, output_dim, dropout=0, batchnorm=True):
11+
12+
super(RELUNet, self).__init__()
13+
14+
self.input_dim = input_dim
15+
self.output_dim = output_dim
16+
self.hidden_dim = hidden_dim
17+
self.dropout = dropout
18+
self.batchnorm = batchnorm
19+
self.n_inner_layers = n_inner_layers
20+
21+
# FC layers
22+
self.fc_in = nn.Linear(input_dim, hidden_dim)
23+
# Hacky way to set inner layers. Ensures they are all converted to
24+
for k in range(n_inner_layers):
25+
setattr(self, "fc_%s" % k, nn.Linear(hidden_dim, hidden_dim))
26+
self.fc_out = nn.Linear(hidden_dim, output_dim)
27+
28+
# BN layers
29+
self.bn_in = nn.BatchNorm1d(hidden_dim)
30+
31+
for k in range(n_inner_layers):
32+
setattr(self, "bn_%s" % k, nn.BatchNorm1d(hidden_dim))
33+
34+
# Initialize weights specifically for relu
35+
36+
# First layer
37+
init.normal(self.fc_in.weight, std=2. / np.sqrt(np.float32(self.input_dim)))
38+
init.constant(self.fc_in.bias, 0.)
39+
40+
# Inner layers
41+
for i in range(self.n_inner_layers):
42+
init.normal(getattr(self, "fc_%s" % i).weight, std=2. / np.sqrt(np.float32(self.hidden_dim)))
43+
init.constant(getattr(self, "fc_%s" % i).bias, 0.)
44+
45+
# Last layer
46+
init.normal(self.fc_out.weight, std=2. / np.sqrt(np.float32(self.hidden_dim)))
47+
init.constant(self.fc_out.bias, 0.)
48+
49+
def forward(self, x, training=False):
50+
51+
# First layer
52+
x = self.fc_in(x)
53+
if self.batchnorm:
54+
x = self.bn_in(x)
55+
x = F.relu(x)
56+
if self.dropout > 0:
57+
x = F.dropout(x, p=self.dropout, training=training)
58+
59+
# Inner layers
60+
for i in range(self.n_inner_layers):
61+
x = getattr(self, "fc_%s" % i)(x)
62+
if self.batchnorm:
63+
x = getattr(self, "bn_%s" % i)(x)
64+
x = F.relu(x)
65+
if self.dropout > 0:
66+
x = F.dropout(x, p=self.dropout, training=training)
67+
68+
# Output layers
69+
x = self.fc_out(x)
70+
71+
return x
72+
73+
74+
def alpha_dropout(input, p=0.5, training=False):
75+
"""Applies alpha dropout to the input.
76+
77+
See :class:`~torch.nn.AlphaDropout` for details.
78+
79+
Args:
80+
p (float, optional): the drop probability
81+
training (bool, optional): switch between training and evaluation mode
82+
"""
83+
if p < 0 or p > 1:
84+
raise ValueError("dropout probability has to be between 0 and 1, "
85+
"but got {}".format(p))
86+
87+
if p == 0 or not training:
88+
return input
89+
90+
alpha = -1.7580993408473766
91+
keep_prob = 1 - p
92+
# TODO avoid casting to byte after resize
93+
noise = input.data.new().resize_(input.size())
94+
noise.bernoulli_(p)
95+
noise = Variable(noise.byte())
96+
97+
output = input.masked_fill(noise, alpha)
98+
99+
a = (keep_prob + alpha ** 2 * keep_prob * (1 - keep_prob)) ** (-0.5)
100+
b = -a * alpha * (1 - keep_prob)
101+
102+
return output.mul_(a).add_(b)
103+
104+
105+
def selu(x):
106+
alpha = 1.6732632423543772848170429916717
107+
scale = 1.0507009873554804934193349852946
108+
return scale * F.elu(x, alpha)
109+
110+
111+
class SELUNet(nn.Module):
112+
def __init__(self, n_inner_layers, input_dim, hidden_dim, output_dim, dropout=0.05):
113+
114+
super(SELUNet, self).__init__()
115+
116+
self.dropout = dropout
117+
118+
self.input_dim = input_dim
119+
self.output_dim = output_dim
120+
self.hidden_dim = hidden_dim
121+
self.n_inner_layers = n_inner_layers
122+
self.fc_in = nn.Linear(input_dim, hidden_dim)
123+
for k in range(n_inner_layers):
124+
setattr(self, "fc_%s" % k, nn.Linear(hidden_dim, hidden_dim))
125+
self.fc_out = nn.Linear(hidden_dim, output_dim)
126+
127+
# Initialize weights specifically for selu
128+
129+
# First layer
130+
init.normal(self.fc_in.weight, std=1. / np.sqrt(np.float32(self.input_dim)))
131+
init.constant(self.fc_in.bias, 0.)
132+
133+
# Inner layers
134+
for i in range(self.n_inner_layers):
135+
init.normal(getattr(self, "fc_%s" % i).weight, std=1. / np.sqrt(np.float32(self.hidden_dim)))
136+
init.constant(getattr(self, "fc_%s" % i).bias, 0.)
137+
138+
# Last layer
139+
init.normal(self.fc_out.weight, std=1. / np.sqrt(np.float32(self.hidden_dim)))
140+
init.constant(self.fc_out.bias, 0.)
141+
142+
def forward(self, x, training=False):
143+
144+
# First layer
145+
x = self.fc_in(x)
146+
x = selu(x)
147+
if self.dropout > 0:
148+
x = alpha_dropout(x, p=self.dropout, training=training)
149+
150+
# Inner layers
151+
for i in range(self.n_inner_layers):
152+
x = getattr(self, "fc_%s" % i)(x)
153+
x = selu(x)
154+
if self.dropout > 0:
155+
x = alpha_dropout(x, p=self.dropout, training=training)
156+
157+
# Output layers
158+
x = self.fc_out(x)
159+
160+
return x

SELU/plot_results.py

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import matplotlib.pylab as plt
2+
import matplotlib.gridspec as gridspec
3+
import json
4+
import os
5+
6+
7+
def plot():
8+
9+
############################
10+
# Adapt to existing experiments
11+
############################
12+
list_width = [0.5, 1, 1.5, 2]
13+
list_depth = [6, 10, 18, 34]
14+
learning_rate = "1E-5"
15+
result_folder = "results_1e-5"
16+
17+
plt.figure(figsize=(12,5))
18+
gs = gridspec.GridSpec(1,2)
19+
20+
##################
21+
# SGD results
22+
##################
23+
ax0 = plt.subplot(gs[0])
24+
25+
for i in range(len(list_depth)):
26+
depth = list_depth[i]
27+
exp = "%s/RELUNet_depth_%s_opt_SGD_drop_0_bn_True.json" % (result_folder, depth)
28+
with open(exp, "r") as f:
29+
d_losses = json.load(f)
30+
ax0.plot(d_losses["train_loss"],
31+
linewidth=list_width[i],
32+
color="C0",
33+
label="RELU, Depth: %s" % depth)
34+
35+
for i in range(len(list_depth)):
36+
depth = list_depth[i]
37+
exp = "%s/SELUNet_depth_%s_opt_SGD_drop_0_bn_False.json" % (result_folder, depth)
38+
with open(exp, "r") as f:
39+
d_losses = json.load(f)
40+
ax0.plot(d_losses["train_loss"],
41+
linewidth=list_width[i],
42+
color="C1",
43+
label="SELU, Depth: %s" % depth)
44+
ax0.legend(loc="best")
45+
ax0.set_title("SGD, Learning Rate = %s" % learning_rate, fontsize=16)
46+
ax0.set_yscale("log")
47+
ax0.set_ylim([1E-6, 10])
48+
ax0.set_xlabel("Epochs", fontsize=18)
49+
ax0.set_ylabel("Train logloss", fontsize=18)
50+
51+
##################
52+
# Adam results
53+
##################
54+
ax1 = plt.subplot(gs[1])
55+
56+
for i in range(len(list_depth)):
57+
depth = list_depth[i]
58+
exp = "%s/RELUNet_depth_%s_opt_Adam_drop_0_bn_True.json" % (result_folder, depth)
59+
with open(exp, "r") as f:
60+
d_losses = json.load(f)
61+
ax1.plot(d_losses["train_loss"],
62+
linewidth=list_width[i],
63+
color="C0",
64+
label="RELU, Depth: %s" % depth)
65+
66+
for i in range(len(list_depth)):
67+
depth = list_depth[i]
68+
exp = "%s/SELUNet_depth_%s_opt_Adam_drop_0_bn_False.json" % (result_folder, depth)
69+
with open(exp, "r") as f:
70+
d_losses = json.load(f)
71+
ax1.plot(d_losses["train_loss"],
72+
linewidth=list_width[i],
73+
color="C1",
74+
label="SELU, Depth: %s" % depth)
75+
ax1.legend(loc="best")
76+
ax1.set_title("Adam, Learning Rate = %s" % learning_rate, fontsize=16)
77+
ax1.set_yscale("log")
78+
ax1.set_ylim([1E-6, 10])
79+
ax1.set_xlabel("Epochs", fontsize=18)
80+
ax1.set_ylabel("Train logloss", fontsize=18)
81+
82+
if not os.path.exists("figures"):
83+
os.makedirs("figures")
84+
85+
plt.savefig("figures/SELU_LR_%s.png" % learning_rate)
86+
87+
if __name__ == '__main__':
88+
89+
plot()

SELU/run_experiments.sh

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# RELU
2+
python main.py --use_cuda --n_inner_layers 4 --model RELUNet --learning_rate 1E-5 --nb_epoch 20 --batchnorm --optimizer Adam
3+
python main.py --use_cuda --n_inner_layers 8 --model RELUNet --learning_rate 1E-5 --nb_epoch 20 --batchnorm --optimizer Adam
4+
python main.py --use_cuda --n_inner_layers 16 --model RELUNet --learning_rate 1E-5 --nb_epoch 20 --batchnorm --optimizer Adam
5+
python main.py --use_cuda --n_inner_layers 32 --model RELUNet --learning_rate 1E-5 --nb_epoch 20 --batchnorm --optimizer Adam
6+
# SELU
7+
python main.py --use_cuda --n_inner_layers 4 --model SELUNet --learning_rate 1E-5 --nb_epoch 20 --optimizer Adam
8+
python main.py --use_cuda --n_inner_layers 8 --model SELUNet --learning_rate 1E-5 --nb_epoch 20 --optimizer Adam
9+
python main.py --use_cuda --n_inner_layers 16 --model SELUNet --learning_rate 1E-5 --nb_epoch 20 --optimizer Adam
10+
python main.py --use_cuda --n_inner_layers 32 --model SELUNet --learning_rate 1E-5 --nb_epoch 20 --optimizer Adam
11+
# SELU + dropout
12+
python main.py --use_cuda --n_inner_layers 4 --model SELUNet --learning_rate 1E-5 --nb_epoch 200 --dropout 0.05
13+
python main.py --use_cuda --n_inner_layers 8 --model SELUNet --learning_rate 1E-5 --nb_epoch 200 --dropout 0.05
14+
python main.py --use_cuda --n_inner_layers 16 --model SELUNet --learning_rate 1E-5 --nb_epoch 200 --dropout 0.05
15+
python main.py --use_cuda --n_inner_layers 32 --model SELUNet --learning_rate 1E-5 --nb_epoch 200 --dropout 0.05

0 commit comments

Comments
 (0)