Skip to content

Commit 53e37ef

Browse files
committed
check point
1 parent 86e1a09 commit 53e37ef

File tree

4 files changed

+18
-14
lines changed

4 files changed

+18
-14
lines changed

cifar10.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,12 @@ int cifar10()
193193
trainingDataFilenames.push_back("cifar-10/data_batch_5.bin");
194194
std::vector<ff::CudaTensor> trainingImages;
195195
std::vector<ff::CudaTensor> trainingLabels;
196-
LoadCifar10(kBatchSize, 50000, false, trainingDataFilenames, trainingImages, trainingLabels);
196+
LoadCifar10(kBatchSize, 5000, false, trainingDataFilenames, trainingImages, trainingLabels);
197197
std::vector<std::string> testDataFilenames;
198198
testDataFilenames.push_back("cifar-10/test_batch.bin");
199199
std::vector<ff::CudaTensor> testImages;
200200
std::vector<ff::CudaTensor> testLabels;
201-
LoadCifar10(kBatchSize, 10000, false, testDataFilenames, testImages, testLabels);
201+
LoadCifar10(kBatchSize, 1000, false, testDataFilenames, testImages, testLabels);
202202

203203
#if 1
204204
ff::CudaNn nn;

ffCudaNn.cpp

+12-9
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ namespace ff
500500
int nJobs = _wG._dataSize;
501501
int numBlocks = (nJobs + K_THREAD_PER_BLOCK - 1) / K_THREAD_PER_BLOCK;
502502
dim3 blocks(numBlocks), threads(K_THREAD_PER_BLOCK);
503-
BackwardConv2d_Wg_Cuda <<<blocks, threads>>> (
503+
BackwardConv2d_Wg_Cuda <<<blocks, threads >>> (
504504
_wG._dataGpu, _pX->_dataGpu, yG->_dataGpu,
505505
_wG._d3, _wG._d2,
506506
_y._d3, _y._d1, _y._d0, _pX->_d1, _pX->_d0,
@@ -864,13 +864,14 @@ namespace ff
864864
int ch = blockIdx.x;
865865
int image = threadIdx.x;
866866

867-
__shared__ float meanArr[BLOCK_SIZE];
868-
meanArr[image] = 0.0f;
869867
int mDash = nImages * nRow * nCol;
870868
int imageStride = nChannel * nRow * nCol;
871869
int channelStride = nRow * nCol;
872870
int currChBaseIndex = ch * channelStride;
873871
int baseIndex = image * imageStride + currChBaseIndex;
872+
873+
__shared__ float meanArr[BLOCK_SIZE];
874+
meanArr[image] = 0.0f;
874875
for (int i = 0; i < channelStride; ++i)
875876
{
876877
meanArr[image] += x[baseIndex + i];
@@ -900,10 +901,13 @@ namespace ff
900901

901902
float alpha = w[ch * 2 + 0];
902903
float beta = w[ch * 2 + 1];
903-
meanAndVariance[ch * 2 + 0] = mean;
904-
meanAndVariance[ch * 2 + 1] = variance;
905-
meanAndVarianceAcc[(ch + 1) * 2 + 0] += mean;
906-
meanAndVarianceAcc[(ch + 1) * 2 + 1] += variance;
904+
if (threadIdx.x == 0)
905+
{
906+
meanAndVariance[ch * 2 + 0] = mean;
907+
meanAndVariance[ch * 2 + 1] = variance;
908+
meanAndVarianceAcc[(ch + 1) * 2 + 0] += mean;
909+
meanAndVarianceAcc[(ch + 1) * 2 + 1] += variance;
910+
}
907911
float d = rsqrtf(variance + 1e-8f);
908912
for (int i = 0; i < channelStride; ++i)
909913
{
@@ -947,8 +951,7 @@ namespace ff
947951

948952
if (_nn->IsTraining())
949953
{
950-
++_accCount;
951-
if (_accCount <= 24)
954+
if (++_accCount <= 24)
952955
{
953956
ForwardBatchNorm2d_Train_0_Cuda <<< 1, 1 >>> (_meanAndVarianceAcc._dataGpu);
954957
}

main.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ int cifar10();
77
int simple()
88
{
99
#if 1
10+
float learningRate = 0.01f;
1011
ff::CudaNn nn;
1112
nn.AddFc(1000, 4096);
1213
nn.AddFc(4096, 1024);
@@ -22,6 +23,7 @@ int simple()
2223
x.SetRandom();
2324
y.SetRandom();
2425
#else
26+
float learningRate = 0.001f;
2527
ff::CudaNn nn;
2628
nn.AddConv2d(3, 1, 8, 1, 1); // 8 * 8 * 8
2729
nn.AddRelu();
@@ -39,7 +41,6 @@ int simple()
3941
y.SetRandom();
4042
#endif
4143

42-
float learningRate = 0.0001f;
4344
const ff::CudaTensor* yPred = nullptr;
4445
for (int i = 0; i < 10000; ++i)
4546
{

mnist.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ int mnist()
125125
LoadMnistData("mnist/train-images.idx3-ubyte", "mnist/train-labels.idx1-ubyte", kBatchSize, trainingImages, trainingLabels);
126126
LoadMnistData("mnist/t10k-images.idx3-ubyte", "mnist/t10k-labels.idx1-ubyte", kBatchSize, testImages, testLabels);
127127

128-
#if 0
128+
#if 1
129129
float learningRate = 0.001f;
130130
ff::CudaNn nn;
131131
nn.AddFc(28 * 28, 2048);
@@ -142,7 +142,7 @@ int mnist()
142142
testImages[i].Reshape(28, 28, 1, testImages[i]._dataSize / (28 * 28));
143143
}
144144
145-
float learningRate = 0.0001f;
145+
float learningRate = 0.001f;
146146
ff::CudaNn nn;
147147
nn.AddConv2d(3, 1, 4, 1, 1);
148148
nn.AddBatchNorm2d(4);

0 commit comments

Comments
 (0)