训练时loss损失一直不变，希望指点一下 #32

jinpeifei2015 · 2019-06-27T09:51:33Z

模型deploy如下：
name: "ArcFace"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
resize_param {
prob: 1
resize_mode: WARP
height: 128
width: 128
interp_mode: LINEAR
interp_mode: AREA
interp_mode: CUBIC
interp_mode: LANCZOS4
}
mirror: True
crop_h: 128
crop_w: 128
#distort_param {
# brightness_prob: 0.5
# brightness_delta: 32
# contrast_prob: 0.5
# contrast_lower: 0.5
# contrast_upper: 1.5
# hue_prob: 0.5
# hue_delta: 18
# saturation_prob: 0.5
# saturation_lower: 0.5
# saturation_upper: 1.5
# random_order_prob: 0.
#}
}
data_param {
source: "/media/zz/7c333a37-0503-4f81-8103-0ef7e776f6fb/Face_Data/casia_extract_aligned_train_9204cls_lmdb"
batch_size: 512
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
resize_param {
prob: 1
resize_mode: WARP
height: 128
width: 128
interp_mode: LINEAR
}
crop_h: 128
crop_w: 128
}
data_param {
source: "/media/zz/7c333a37-0503-4f81-8103-0ef7e776f6fb/Face_Data/casia_extract_aligned_test_9204cls_lmdb"
batch_size: 2
backend: LMDB
}
}
############## CNN Architecture ###############
layer {
name: "data/bias"
type: "Bias"
bottom: "data"
top: "data/bias"
param {
lr_mult: 0
decay_mult: 0
}
bias_param {
filler {
type: "constant"
value: -128
}
}
}
################################################
layer {
name: "conv1"
type: "Convolution"
bottom: "data/bias"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 7
pad: 3
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv1_bn"
type: "BatchNorm"
bottom: "conv1"
top: "conv1"
}
layer {
name: "conv1_scale"
type: "Scale"
bottom: "conv1"
top: "conv1"
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "conv1_relu"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "pool1_1"
type: "Pooling"
bottom: "pool1"
top: "pool1_1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1_1"
top: "conv2_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 1
stride: 1
pad: 0
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv2_1_bn"
type: "BatchNorm"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_1_scale"
type: "Scale"
bottom: "conv2_1"
top: "conv2_1"
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "conv2_1_relu"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv2_2_bn"
type: "BatchNorm"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "conv2_2_scale"
type: "Scale"
bottom: "conv2_2"
top: "conv2_2"
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "conv2_2_relu"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
##############################################
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 1
pad: 0
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv3_1_bn"
type: "BatchNorm"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_1_scale"
type: "Scale"
bottom: "conv3_1"
top: "conv3_1"
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "conv3_1_relu"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 3
pad: 1
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv3_2_bn"
type: "BatchNorm"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_2_scale"
type: "Scale"
bottom: "conv3_2"
top: "conv3_2"
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "conv3_2_relu"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "conv3_2"
top: "conv4_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 1
pad: 0
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_1_bn"
type: "BatchNorm"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_1_scale"
type: "Scale"
bottom: "conv4_1"
top: "conv4_1"
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "conv4_1_relu"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_2_bn"
type: "BatchNorm"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_2_scale"
type: "Scale"
bottom: "conv4_2"
top: "conv4_2"
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "conv4_2_relu"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
################################################
layer {
name: "conv5_1"
type: "Convolution"
bottom: "conv4_2"
top: "conv5_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 1
pad: 0
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv5_1_bn"
type: "BatchNorm"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_1_scale"
type: "Scale"
bottom: "conv5_1"
top: "conv5_1"
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "conv5_1_relu"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv5_1"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
#########################################
#########################################
layer {
name: "fc1"
type: "InnerProduct"
bottom: "pool3"
top: "fc1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc1_bn"
type: "BatchNorm"
bottom: "fc1"
top: "fc1"
}
layer {
name: "fc1_scale"
type: "Scale"
bottom: "fc1"
top: "fc1"
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "fc1_relu"
type: "ReLU"
bottom: "fc1"
top: "fc1"
}
layer {
name: "fc2"
type: "InnerProduct"
bottom: "fc1"
top: "fc2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 128
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc2_norm"
type: "NormalizeJin"
bottom: "fc2"
top: "fc2_norm"
norm_jin_param {
across_spatial: true
scale_filler {
type: "constant"
value: 1.0
}
channel_shared: true
}
}
############### Arc-Softmax Loss ##############

layer {
name: "fc6_changed"
type: "InnerProduct"
bottom: "fc2_norm"
top: "fc6"
inner_product_param {
num_output: 9204
normalize: true
weight_filler {
type: "xavier"
}
bias_term: false
}
}
####################################################
layer {
name: "cosin_add_m"
type: "CosinAddm"
bottom: "fc6"
bottom: "label"
top: "fc6_margin"
cosin_add_m_param {
m: 0.1
}
include {
phase: TRAIN
}
}

layer {
name: "fc6_margin_scale"
type: "Scale"
bottom: "fc6_margin"
top: "fc6_margin_scale"
param {
lr_mult: 0
decay_mult: 0
}
scale_param {
filler{
type: "constant"
value: 64
}
}
include {
phase: TRAIN
}
}

######################################################
layer {
name: "softmax_loss"
type: "SoftmaxWithLoss"
bottom: "fc6_margin_scale"
bottom: "label"
#bottom: "label"
#bottom: "data"
top: "softmax_loss"
loss_weight: 1
include {
phase: TRAIN
}
}

layer {
name: "Accuracy"
type: "Accuracy"
bottom: "fc6"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}

loss损失如下：
I0627 17:38:58.567371 6757 solver.cpp:224] Iteration 450 (2.13816 iter/s, 4.67691s/10 iters), loss = 87.3365
I0627 17:38:58.567402 6757 solver.cpp:243] Train net output #0: softmax_loss = 87.3365 (* 1 = 87.3365 loss)
I0627 17:38:58.567409 6757 sgd_solver.cpp:137] Iteration 450, lr = 0.00314
I0627 17:39:03.256306 6757 solver.cpp:224] Iteration 460 (2.13288 iter/s, 4.6885s/10 iters), loss = 87.3365
I0627 17:39:03.256340 6757 solver.cpp:243] Train net output #0: softmax_loss = 87.3365 (* 1 = 87.3365 loss)
I0627 17:39:03.256347 6757 sgd_solver.cpp:137] Iteration 460, lr = 0.00314
I0627 17:39:07.941520 6757 solver.cpp:224] Iteration 470 (2.13457 iter/s, 4.68478s/10 iters), loss = 87.3365
I0627 17:39:07.941551 6757 solver.cpp:243] Train net output #0: softmax_loss = 87.3365 (* 1 = 87.3365 loss)
I0627 17:39:07.941558 6757 sgd_solver.cpp:137] Iteration 470, lr = 0.00314
I0627 17:39:12.623337 6757 solver.cpp:224] Iteration 480 (2.13612 iter/s, 4.68139s/10 iters), loss = 87.3365
I0627 17:39:12.623456 6757 solver.cpp:243] Train net output #0: softmax_loss = 87.3365 (* 1 = 87.3365 loss)
请问该如何修改？

raywang1992 · 2019-08-22T11:31:17Z

请问你解决这个问题了吗

changgongcheng · 2019-12-05T08:31:43Z

同样的问题啊

vaan2010 · 2021-04-19T03:12:23Z

在solver.prototxt中的learning rate調低試試看，我自己是用0.0005或是0.0001可以正常收斂

jinpeifei2015 changed the title ~~训练时loss损失一直一样~~ 训练时loss损失一直不变 Jun 27, 2019

jinpeifei2015 changed the title ~~训练时loss损失一直不变~~ 训练时loss损失一直不变，希望指点一下 Jun 27, 2019

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

训练时loss损失一直不变，希望指点一下 #32

训练时loss损失一直不变，希望指点一下 #32

jinpeifei2015 commented Jun 27, 2019 •

edited

Loading

raywang1992 commented Aug 22, 2019

changgongcheng commented Dec 5, 2019

vaan2010 commented Apr 19, 2021 •

edited

Loading

训练时loss损失一直不变，希望指点一下 #32

训练时loss损失一直不变，希望指点一下 #32

Comments

jinpeifei2015 commented Jun 27, 2019 • edited Loading

raywang1992 commented Aug 22, 2019

changgongcheng commented Dec 5, 2019

vaan2010 commented Apr 19, 2021 • edited Loading

jinpeifei2015 commented Jun 27, 2019 •

edited

Loading

vaan2010 commented Apr 19, 2021 •

edited

Loading