Skip to content
This repository was archived by the owner on Dec 9, 2024. It is now read-only.

Commit b5dfd9d

Browse files
authored
Merge pull request #274 from tresreid/mdranges
Mdranges
2 parents 2f3058f + 9940b51 commit b5dfd9d

File tree

11 files changed

+126
-112
lines changed

11 files changed

+126
-112
lines changed

SDL/Event.cu

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -303,23 +303,23 @@ void SDL::Event::resetEvent()
303303
}
304304
}
305305
if(hitsInGPU){cms::cuda::free_host(hitsInGPU);
306-
hitsInGPU = nullptr;}
306+
hitsInGPU = nullptr;}
307307
if(mdsInGPU){cms::cuda::free_host(mdsInGPU);
308-
mdsInGPU = nullptr;}
308+
mdsInGPU = nullptr;}
309309
if(rangesInGPU){cms::cuda::free_host(rangesInGPU);
310-
rangesInGPU = nullptr;}
310+
rangesInGPU = nullptr;}
311311
if(segmentsInGPU){cms::cuda::free_host(segmentsInGPU);
312-
segmentsInGPU = nullptr;}
312+
segmentsInGPU = nullptr;}
313313
if(tripletsInGPU){cms::cuda::free_host(tripletsInGPU);
314-
tripletsInGPU = nullptr;}
315-
if(quintupletsInGPU){cms::cuda::free_host(quintupletsInGPU);
314+
tripletsInGPU = nullptr;}
315+
if(quintupletsInGPU){cms::cuda::free_host(quintupletsInGPU);
316316
quintupletsInGPU = nullptr;}
317317
if(trackCandidatesInGPU){cms::cuda::free_host(trackCandidatesInGPU);
318-
trackCandidatesInGPU = nullptr;}
318+
trackCandidatesInGPU = nullptr;}
319319
if(pixelTripletsInGPU){cms::cuda::free_host(pixelTripletsInGPU);
320-
pixelTripletsInGPU = nullptr;}
320+
pixelTripletsInGPU = nullptr;}
321321
if(pixelQuintupletsInGPU){cms::cuda::free_host(pixelQuintupletsInGPU);
322-
pixelQuintupletsInGPU = nullptr;}
322+
pixelQuintupletsInGPU = nullptr;}
323323

324324
if(hitsInCPU != nullptr)
325325
{
@@ -679,9 +679,12 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
679679
{
680680
mdsInGPU = (SDL::miniDoublets*)cms::cuda::allocate_host(sizeof(SDL::miniDoublets), stream);
681681
unsigned int nTotalMDs;
682-
createMDArrayRanges(*modulesInGPU, *rangesInGPU, nLowerModules, nTotalMDs, stream, N_MAX_PIXEL_MD_PER_MODULES);
683-
createMDsInExplicitMemory(*mdsInGPU, nTotalMDs, nLowerModules, N_MAX_PIXEL_MD_PER_MODULES,stream);
684-
682+
cudaMemsetAsync(&rangesInGPU->miniDoubletModuleOccupancy[nLowerModules],N_MAX_PIXEL_MD_PER_MODULES, sizeof(unsigned int),stream);
683+
createMDArrayRangesGPU<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU);
684+
cudaMemcpyAsync(&nTotalMDs,rangesInGPU->device_nTotalMDs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
685+
cudaStreamSynchronize(stream);
686+
nTotalMDs+= N_MAX_PIXEL_MD_PER_MODULES;
687+
createMDsInExplicitMemory(*mdsInGPU, nTotalMDs, nLowerModules, N_MAX_PIXEL_MD_PER_MODULES,stream);
685688
cudaMemcpyAsync(mdsInGPU->nMemoryLocations, &nTotalMDs, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
686689
cudaStreamSynchronize(stream);
687690

@@ -692,13 +695,10 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
692695
//hardcoded range numbers for this will come from studies!
693696
// can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously.
694697
// If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them
695-
unsigned int *device_nTotalSegments;
696-
cudaMalloc((void **)&device_nTotalSegments, sizeof(unsigned int));
697-
createSegmentArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *mdsInGPU, device_nTotalSegments);
698-
cudaMemcpyAsync(&nTotalSegments,device_nTotalSegments,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
698+
createSegmentArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *mdsInGPU);
699+
cudaMemcpyAsync(&nTotalSegments,rangesInGPU->device_nTotalSegs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
699700
cudaStreamSynchronize(stream);
700701
nTotalSegments += N_MAX_PIXEL_SEGMENTS_PER_MODULE;
701-
cudaFree(device_nTotalSegments);
702702
createSegmentsInExplicitMemory(*segmentsInGPU, nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE,stream);
703703

704704
cudaMemcpyAsync(segmentsInGPU->nMemoryLocations, &nTotalSegments, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);;
@@ -895,7 +895,11 @@ void SDL::Event::createMiniDoublets()
895895
{
896896
//hardcoded range numbers for this will come from studies!
897897
unsigned int nTotalMDs;
898-
createMDArrayRanges(*modulesInGPU, *rangesInGPU, nLowerModules, nTotalMDs, stream, N_MAX_PIXEL_MD_PER_MODULES);
898+
cudaMemsetAsync(&rangesInGPU->miniDoubletModuleOccupancy[nLowerModules],N_MAX_PIXEL_MD_PER_MODULES, sizeof(unsigned int),stream);
899+
createMDArrayRangesGPU<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU);
900+
cudaMemcpyAsync(&nTotalMDs,rangesInGPU->device_nTotalMDs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
901+
cudaStreamSynchronize(stream);
902+
nTotalMDs+=N_MAX_PIXEL_MD_PER_MODULES;
899903

900904
if(mdsInGPU == nullptr)
901905
{
@@ -991,12 +995,9 @@ void SDL::Event::createTriplets()
991995
{
992996
tripletsInGPU = (SDL::triplets*)cms::cuda::allocate_host(sizeof(SDL::triplets), stream);
993997
unsigned int maxTriplets;
994-
unsigned int *device_maxTriplets;
995-
cudaMalloc((void **)&device_maxTriplets, sizeof(unsigned int));
996-
createTripletArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *segmentsInGPU, device_maxTriplets);
997-
cudaMemcpyAsync(&maxTriplets,device_maxTriplets,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
998+
createTripletArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *segmentsInGPU);
999+
cudaMemcpyAsync(&maxTriplets,rangesInGPU->device_nTotalTrips,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
9981000
cudaStreamSynchronize(stream);
999-
cudaFree(device_maxTriplets);
10001001
createTripletsInExplicitMemory(*tripletsInGPU, maxTriplets, nLowerModules,stream);
10011002

10021003
cudaMemcpyAsync(tripletsInGPU->nMemoryLocations, &maxTriplets, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
@@ -1010,7 +1011,7 @@ void SDL::Event::createTriplets()
10101011
uint16_t *index_gpu;
10111012
index_gpu = (uint16_t*)cms::cuda::allocate_device(dev, nLowerModules*sizeof(uint16_t), stream);
10121013
unsigned int *nSegments = (unsigned int*)malloc(nLowerModules*sizeof(unsigned int));
1013-
cudaMemcpyAsync((void *)nSegments, segmentsInGPU->nSegments, nLowerModules*sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
1014+
cudaMemcpyAsync((void *)nSegments, segmentsInGPU->nSegments, nLowerModules*sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
10141015
cudaStreamSynchronize(stream);
10151016

10161017
uint16_t* module_nConnectedModules;
@@ -1259,15 +1260,12 @@ void SDL::Event::createQuintuplets()
12591260
cudaMalloc(&(rangesInGPU->indicesOfEligibleT5Modules), nLowerModules * sizeof(uint16_t));
12601261
#endif
12611262
cudaMemsetAsync(rangesInGPU->quintupletModuleIndices, -1, sizeof(int) * (nLowerModules),stream);
1262-
cudaStreamSynchronize(stream);
1263+
cudaStreamSynchronize(stream);
12631264
unsigned int nTotalQuintuplets;
1264-
unsigned int *device_nTotalQuintuplets;
1265-
cudaMalloc((void **)&device_nTotalQuintuplets, sizeof(unsigned int));
1266-
createEligibleModulesListForQuintupletsGPU<<<1,1024,0,stream>>>(*modulesInGPU, *tripletsInGPU, device_nTotalQuintuplets, *rangesInGPU);
1265+
createEligibleModulesListForQuintupletsGPU<<<1,1024,0,stream>>>(*modulesInGPU, *tripletsInGPU, *rangesInGPU);
12671266
cudaMemcpyAsync(&nEligibleT5Modules,rangesInGPU->nEligibleT5Modules,sizeof(uint16_t),cudaMemcpyDeviceToHost,stream);
1268-
cudaMemcpyAsync(&nTotalQuintuplets,device_nTotalQuintuplets,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
1269-
cudaStreamSynchronize(stream);
1270-
cudaFree(device_nTotalQuintuplets);
1267+
cudaMemcpyAsync(&nTotalQuintuplets,rangesInGPU->device_nTotalQuints,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
1268+
cudaStreamSynchronize(stream);
12711269

12721270
if(quintupletsInGPU == nullptr)
12731271
{

SDL/MiniDoublet.cu

Lines changed: 45 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -17,70 +17,60 @@ void SDL::miniDoublets::resetMemory(unsigned int nMemoryLocationsx, unsigned int
1717
}
1818

1919

20-
void SDL::createMDArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, uint16_t& nLowerModules, unsigned int& nTotalMDs, cudaStream_t stream, const unsigned int& maxPixelMDs)
20+
__global__ void SDL::createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU)//, unsigned int* nTotalMDsx)
2121
{
22-
/*
23-
write code here that will deal with importing module parameters to CPU, and get the relevant occupancies for a given module!*/
24-
25-
int *module_miniDoubletModuleIndices;
26-
module_miniDoubletModuleIndices = (int*)cms::cuda::allocate_host((nLowerModules + 1) * sizeof(unsigned int), stream);
27-
short* module_subdets;
28-
module_subdets = (short*)cms::cuda::allocate_host(nLowerModules* sizeof(short), stream);
29-
cudaMemcpyAsync(module_subdets,modulesInGPU.subdets,nLowerModules*sizeof(short),cudaMemcpyDeviceToHost,stream);
30-
short* module_layers;
31-
module_layers = (short*)cms::cuda::allocate_host(nLowerModules * sizeof(short), stream);
32-
cudaMemcpyAsync(module_layers,modulesInGPU.layers,nLowerModules * sizeof(short),cudaMemcpyDeviceToHost,stream);
33-
short* module_rings;
34-
module_rings = (short*)cms::cuda::allocate_host(nLowerModules * sizeof(short), stream);
35-
cudaMemcpyAsync(module_rings,modulesInGPU.rings,nLowerModules * sizeof(short),cudaMemcpyDeviceToHost,stream);
36-
float* module_eta;
37-
module_eta = (float*)cms::cuda::allocate_host(nLowerModules * sizeof(float), stream);
38-
cudaMemcpyAsync(module_eta,modulesInGPU.eta,nLowerModules * sizeof(float),cudaMemcpyDeviceToHost,stream);
39-
40-
cudaStreamSynchronize(stream);
22+
short module_subdets;
23+
short module_layers;
24+
short module_rings;
25+
float module_eta;
4126

27+
__shared__ unsigned int nTotalMDs; //start!
4228
nTotalMDs = 0; //start!
43-
for(uint16_t i = 0; i < nLowerModules; i++)
29+
__syncthreads();
30+
int gid = blockIdx.x * blockDim.x + threadIdx.x;
31+
int np = gridDim.x * blockDim.x;
32+
for(uint16_t i = gid; i < *modulesInGPU.nLowerModules; i+= np)
4433
{
45-
module_miniDoubletModuleIndices[i] = nTotalMDs; //running counter - we start at the previous index!
34+
module_subdets = modulesInGPU.subdets[i];
35+
module_layers = modulesInGPU.layers[i];
36+
module_rings = modulesInGPU.rings[i];
37+
module_eta = abs(modulesInGPU.eta[i]);
4638
unsigned int occupancy;
4739
unsigned int category_number, eta_number;
48-
if (module_layers[i]<=3 && module_subdets[i]==5) category_number = 0;
49-
if (module_layers[i]>=4 && module_subdets[i]==5) category_number = 1;
50-
if (module_layers[i]<=2 && module_subdets[i]==4 && module_rings[i]>=11) category_number = 2;
51-
if (module_layers[i]>=3 && module_subdets[i]==4 && module_rings[i]>=8) category_number = 2;
52-
if (module_layers[i]<=2 && module_subdets[i]==4 && module_rings[i]<=10) category_number = 3;
53-
if (module_layers[i]>=3 && module_subdets[i]==4 && module_rings[i]<=7) category_number = 3;
54-
55-
if (abs(module_eta[i])<0.75) eta_number=0;
56-
if (abs(module_eta[i])>0.75 && abs(module_eta[i])<1.5) eta_number=1;
57-
if (abs(module_eta[i])>1.5 && abs(module_eta[i])<2.25) eta_number=2;
58-
if (abs(module_eta[i])>2.25 && abs(module_eta[i])<3) eta_number=3;
40+
if (module_layers<=3 && module_subdets==5) category_number = 0;
41+
else if (module_layers>=4 && module_subdets==5) category_number = 1;
42+
else if (module_layers<=2 && module_subdets==4 && module_rings>=11) category_number = 2;
43+
else if (module_layers>=3 && module_subdets==4 && module_rings>=8) category_number = 2;
44+
else if (module_layers<=2 && module_subdets==4 && module_rings<=10) category_number = 3;
45+
else if (module_layers>=3 && module_subdets==4 && module_rings<=7) category_number = 3;
46+
47+
if (module_eta<0.75) eta_number=0;
48+
else if (module_eta>0.75 && module_eta<1.5) eta_number=1;
49+
else if (module_eta>1.5 && module_eta<2.25) eta_number=2;
50+
else if (module_eta>2.25 && module_eta<3) eta_number=3;
5951

6052
if (category_number == 0 && eta_number == 0) occupancy = 49;
61-
if (category_number == 0 && eta_number == 1) occupancy = 42;
62-
if (category_number == 0 && eta_number == 2) occupancy = 37;
63-
if (category_number == 0 && eta_number == 3) occupancy = 41;
64-
if (category_number == 1) occupancy = 100;
65-
if (category_number == 2 && eta_number == 1) occupancy = 16;
66-
if (category_number == 2 && eta_number == 2) occupancy = 19;
67-
if (category_number == 3 && eta_number == 1) occupancy = 14;
68-
if (category_number == 3 && eta_number == 2) occupancy = 20;
69-
if (category_number == 3 && eta_number == 3) occupancy = 25;
70-
71-
nTotalMDs += occupancy;
53+
else if (category_number == 0 && eta_number == 1) occupancy = 42;
54+
else if (category_number == 0 && eta_number == 2) occupancy = 37;
55+
else if (category_number == 0 && eta_number == 3) occupancy = 41;
56+
else if (category_number == 1) occupancy = 100;
57+
else if (category_number == 2 && eta_number == 1) occupancy = 16;
58+
else if (category_number == 2 && eta_number == 2) occupancy = 19;
59+
else if (category_number == 3 && eta_number == 1) occupancy = 14;
60+
else if (category_number == 3 && eta_number == 2) occupancy = 20;
61+
else if (category_number == 3 && eta_number == 3) occupancy = 25;
62+
63+
unsigned int nTotMDs= atomicAdd(&nTotalMDs,occupancy);
64+
rangesInGPU.miniDoubletModuleIndices[i] = nTotMDs;
65+
rangesInGPU.miniDoubletModuleOccupancy[i] = occupancy;
66+
}
67+
__syncthreads();
68+
if(threadIdx.x==0){
69+
rangesInGPU.miniDoubletModuleIndices[*modulesInGPU.nLowerModules] = nTotalMDs;
70+
//*nTotalMDsx=nTotalMDs;
71+
*rangesInGPU.device_nTotalMDs=nTotalMDs;
7272
}
7373

74-
module_miniDoubletModuleIndices[nLowerModules] = nTotalMDs;
75-
nTotalMDs+=maxPixelMDs;
76-
77-
cudaMemcpyAsync(rangesInGPU.miniDoubletModuleIndices, module_miniDoubletModuleIndices, (nLowerModules + 1) * sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
78-
cudaStreamSynchronize(stream);
79-
cms::cuda::free_host(module_miniDoubletModuleIndices);
80-
cms::cuda::free_host(module_subdets);
81-
cms::cuda::free_host(module_layers);
82-
cms::cuda::free_host(module_rings);
83-
cms::cuda::free_host(module_eta);
8474
}
8575

8676
//FIXME:Add memory locations for the pixel MDs here!
@@ -928,7 +918,7 @@ __global__ void SDL::createMiniDoubletsInGPUv2(struct SDL::modules& modulesInGPU
928918
if(success)
929919
{
930920
unsigned int totOccupancyMDs = atomicAdd(&mdsInGPU.totOccupancyMDs[lowerModuleIndex],1);
931-
if(totOccupancyMDs >= (rangesInGPU.miniDoubletModuleIndices[lowerModuleIndex + 1] - rangesInGPU.miniDoubletModuleIndices[lowerModuleIndex]))
921+
if(totOccupancyMDs >= (rangesInGPU.miniDoubletModuleOccupancy[lowerModuleIndex]))
932922
{
933923
#ifdef Warnings
934924
printf("Mini-doublet excess alert! Module index = %d\n",lowerModuleIndex);

SDL/MiniDoublet.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ namespace SDL
9292
void createMDsInExplicitMemory(struct miniDoublets& mdsInGPU, unsigned int maxMDs,uint16_t nLowerModules, unsigned int maxPixelMDs,cudaStream_t stream);
9393

9494

95-
void createMDArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, uint16_t& nLowerModules, unsigned int& nTotalMDs, cudaStream_t stream, const unsigned int& maxPixelMDs);
95+
__global__ void createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU);//, unsigned int* nTotalMDs);
9696

9797
__global__ void addMiniDoubletRangesToEventExplicit(struct modules& modulesInGPU, struct miniDoublets& mdsInGPU, struct objectRanges& rangesInGPU, struct hits& hitsInGPU);
9898

SDL/Module.cu

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,17 @@ void SDL::createRangesInExplicitMemory(struct objectRanges& rangesInGPU,unsigned
3131
rangesInGPU.quintupletModuleIndices = (int*)cms::cuda::allocate_device(dev,nLowerModules * sizeof(int),stream);
3232
rangesInGPU.quintupletModuleOccupancy = (int*)cms::cuda::allocate_device(dev,nLowerModules * sizeof(int),stream);
3333
rangesInGPU.miniDoubletModuleIndices = (int*)cms::cuda::allocate_device(dev, (nLowerModules + 1) * sizeof(int), stream);
34+
rangesInGPU.miniDoubletModuleOccupancy = (int*)cms::cuda::allocate_device(dev, (nLowerModules + 1) * sizeof(int), stream);
3435
rangesInGPU.segmentModuleIndices = (int*)cms::cuda::allocate_device(dev, (nLowerModules + 1) * sizeof(int), stream);
3536
rangesInGPU.segmentModuleOccupancy = (int*)cms::cuda::allocate_device(dev, (nLowerModules + 1) * sizeof(int), stream);
3637
rangesInGPU.tripletModuleIndices = (int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(int), stream);
3738
rangesInGPU.tripletModuleOccupancy = (int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(int), stream);
3839

40+
rangesInGPU.device_nTotalMDs = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
41+
rangesInGPU.device_nTotalSegs = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
42+
rangesInGPU.device_nTotalTrips = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
43+
rangesInGPU.device_nTotalQuints = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
44+
3945
#else
4046
cudaMalloc(&rangesInGPU.hitRanges,nModules * 2 * sizeof(int));
4147
cudaMalloc(&rangesInGPU.hitRangesLower,nModules * sizeof(int));
@@ -53,10 +59,16 @@ void SDL::createRangesInExplicitMemory(struct objectRanges& rangesInGPU,unsigned
5359
cudaMalloc(&rangesInGPU.quintupletModuleOccupancy, nLowerModules * sizeof(int));
5460

5561
cudaMalloc(&rangesInGPU.miniDoubletModuleIndices, (nLowerModules + 1) * sizeof(int));
62+
cudaMalloc(&rangesInGPU.miniDoubletModuleOccupancy, (nLowerModules + 1) * sizeof(int));
5663
cudaMalloc(&rangesInGPU.segmentModuleIndices, (nLowerModules + 1) * sizeof(int));
5764
cudaMalloc(&rangesInGPU.segmentModuleOccupancy, (nLowerModules + 1) * sizeof(int));
5865
cudaMalloc(&rangesInGPU.tripletModuleIndices, nLowerModules * sizeof(int));
5966
cudaMalloc(&rangesInGPU.tripletModuleOccupancy, nLowerModules * sizeof(int));
67+
68+
cudaMalloc(&rangesInGPU.device_nTotalMDs, sizeof(unsigned int));
69+
cudaMalloc(&rangesInGPU.device_nTotalSegs, sizeof(unsigned int));
70+
cudaMalloc(&rangesInGPU.device_nTotalTrips, sizeof(unsigned int));
71+
cudaMalloc(&rangesInGPU.device_nTotalQuints, sizeof(unsigned int));
6072

6173
#endif
6274
}
@@ -113,10 +125,15 @@ void SDL::objectRanges::freeMemoryCache()//struct objectRanges& rangesInGPU)
113125
cms::cuda::free_device(dev, hitRangesnLower);
114126
cms::cuda::free_device(dev, hitRangesnUpper);
115127
cms::cuda::free_device(dev, miniDoubletModuleIndices);
128+
cms::cuda::free_device(dev, miniDoubletModuleOccupancy);
116129
cms::cuda::free_device(dev, segmentModuleIndices);
117130
cms::cuda::free_device(dev, segmentModuleOccupancy);
118131
cms::cuda::free_device(dev, tripletModuleIndices);
119132
cms::cuda::free_device(dev, tripletModuleOccupancy);
133+
cms::cuda::free_device(dev, device_nTotalMDs);
134+
cms::cuda::free_device(dev, device_nTotalSegs);
135+
cms::cuda::free_device(dev, device_nTotalTrips);
136+
cms::cuda::free_device(dev, device_nTotalQuints);
120137
}
121138
void SDL::objectRanges::freeMemory()
122139
{
@@ -136,10 +153,15 @@ void SDL::objectRanges::freeMemory()
136153
cudaFree(quintupletModuleIndices);
137154
cudaFree(quintupletModuleOccupancy);
138155
cudaFree(miniDoubletModuleIndices);
156+
cudaFree(miniDoubletModuleOccupancy);
139157
cudaFree(segmentModuleIndices);
140158
cudaFree(segmentModuleOccupancy);
141159
cudaFree(tripletModuleIndices);
142160
cudaFree(tripletModuleOccupancy);
161+
cudaFree(device_nTotalMDs);
162+
cudaFree(device_nTotalSegs);
163+
cudaFree(device_nTotalTrips);
164+
cudaFree(device_nTotalQuints);
143165
}
144166
void SDL::freeModulesCache(struct modules& modulesInGPU,struct pixelMap& pixelMapping)
145167
{

SDL/Module.cuh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,16 @@ namespace SDL
6767
int *quintupletModuleIndices;
6868
int *quintupletModuleOccupancy;
6969
int *miniDoubletModuleIndices;
70+
int *miniDoubletModuleOccupancy;
7071
int *segmentModuleIndices;
7172
int *segmentModuleOccupancy;
7273
int *tripletModuleIndices;
7374
int *tripletModuleOccupancy;
7475

75-
// unsigned int nTotalQuintuplets;
76+
unsigned int *device_nTotalMDs;
77+
unsigned int *device_nTotalSegs;
78+
unsigned int *device_nTotalTrips;
79+
unsigned int *device_nTotalQuints;
7680

7781
void freeMemoryCache();
7882
void freeMemory();

0 commit comments

Comments
 (0)