SegmentLinking
diff --git a/‎SDL/Event.cu
Lines changed: 29 additions & 31 deletions b/‎SDL/Event.cu
Lines changed: 29 additions & 31 deletions
diff --git a/‎SDL/MiniDoublet.cu
Lines changed: 45 additions & 55 deletions b/‎SDL/MiniDoublet.cu
Lines changed: 45 additions & 55 deletions
diff --git a/‎SDL/MiniDoublet.cuh
Lines changed: 1 addition & 1 deletion b/‎SDL/MiniDoublet.cuh
Lines changed: 1 addition & 1 deletion
diff --git a/‎SDL/Module.cu
Lines changed: 22 additions & 0 deletions b/‎SDL/Module.cu
Lines changed: 22 additions & 0 deletions
diff --git a/‎SDL/Module.cuh
Lines changed: 5 additions & 1 deletion b/‎SDL/Module.cuh
Lines changed: 5 additions & 1 deletion
@@ -303,23 +303,23 @@ void SDL::Event::resetEvent()
         }
     }
     if(hitsInGPU){cms::cuda::free_host(hitsInGPU);
-    hitsInGPU = nullptr;}
+      hitsInGPU = nullptr;}
     if(mdsInGPU){cms::cuda::free_host(mdsInGPU);
-    mdsInGPU = nullptr;}
+      mdsInGPU = nullptr;}
     if(rangesInGPU){cms::cuda::free_host(rangesInGPU);
-    rangesInGPU = nullptr;}
+      rangesInGPU = nullptr;}
     if(segmentsInGPU){cms::cuda::free_host(segmentsInGPU);
-    segmentsInGPU = nullptr;}
+      segmentsInGPU = nullptr;}
     if(tripletsInGPU){cms::cuda::free_host(tripletsInGPU);
-    tripletsInGPU = nullptr;}
-      if(quintupletsInGPU){cms::cuda::free_host(quintupletsInGPU);
+      tripletsInGPU = nullptr;}
+    if(quintupletsInGPU){cms::cuda::free_host(quintupletsInGPU);
       quintupletsInGPU = nullptr;}
     if(trackCandidatesInGPU){cms::cuda::free_host(trackCandidatesInGPU);
-    trackCandidatesInGPU = nullptr;}
+      trackCandidatesInGPU = nullptr;}
     if(pixelTripletsInGPU){cms::cuda::free_host(pixelTripletsInGPU);
-    pixelTripletsInGPU = nullptr;}
+      pixelTripletsInGPU = nullptr;}
     if(pixelQuintupletsInGPU){cms::cuda::free_host(pixelQuintupletsInGPU);
-    pixelQuintupletsInGPU = nullptr;}
+      pixelQuintupletsInGPU = nullptr;}
 
     if(hitsInCPU != nullptr)
     {
@@ -679,9 +679,12 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
     {
         mdsInGPU = (SDL::miniDoublets*)cms::cuda::allocate_host(sizeof(SDL::miniDoublets), stream);
         unsigned int nTotalMDs;
-        createMDArrayRanges(*modulesInGPU, *rangesInGPU, nLowerModules, nTotalMDs, stream, N_MAX_PIXEL_MD_PER_MODULES);
-    	createMDsInExplicitMemory(*mdsInGPU, nTotalMDs, nLowerModules, N_MAX_PIXEL_MD_PER_MODULES,stream);
-
+        cudaMemsetAsync(&rangesInGPU->miniDoubletModuleOccupancy[nLowerModules],N_MAX_PIXEL_MD_PER_MODULES, sizeof(unsigned int),stream);
+        createMDArrayRangesGPU<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU);
+        cudaMemcpyAsync(&nTotalMDs,rangesInGPU->device_nTotalMDs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        cudaStreamSynchronize(stream);
+        nTotalMDs+= N_MAX_PIXEL_MD_PER_MODULES;
+        createMDsInExplicitMemory(*mdsInGPU, nTotalMDs, nLowerModules, N_MAX_PIXEL_MD_PER_MODULES,stream);
         cudaMemcpyAsync(mdsInGPU->nMemoryLocations, &nTotalMDs, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
         cudaStreamSynchronize(stream);
 
@@ -692,13 +695,10 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
         //hardcoded range numbers for this will come from studies!
         // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously.
         // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them
-        unsigned int *device_nTotalSegments;
-        cudaMalloc((void **)&device_nTotalSegments, sizeof(unsigned int));
-        createSegmentArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *mdsInGPU, device_nTotalSegments);
-        cudaMemcpyAsync(&nTotalSegments,device_nTotalSegments,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        createSegmentArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *mdsInGPU);
+        cudaMemcpyAsync(&nTotalSegments,rangesInGPU->device_nTotalSegs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
         cudaStreamSynchronize(stream);
         nTotalSegments += N_MAX_PIXEL_SEGMENTS_PER_MODULE;
-        cudaFree(device_nTotalSegments);
         createSegmentsInExplicitMemory(*segmentsInGPU, nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE,stream);
 
         cudaMemcpyAsync(segmentsInGPU->nMemoryLocations, &nTotalSegments, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);;
@@ -895,7 +895,11 @@ void SDL::Event::createMiniDoublets()
 {
     //hardcoded range numbers for this will come from studies!
     unsigned int nTotalMDs;
-    createMDArrayRanges(*modulesInGPU, *rangesInGPU, nLowerModules, nTotalMDs, stream, N_MAX_PIXEL_MD_PER_MODULES);
+    cudaMemsetAsync(&rangesInGPU->miniDoubletModuleOccupancy[nLowerModules],N_MAX_PIXEL_MD_PER_MODULES, sizeof(unsigned int),stream);
+    createMDArrayRangesGPU<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU); 
+    cudaMemcpyAsync(&nTotalMDs,rangesInGPU->device_nTotalMDs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+    cudaStreamSynchronize(stream);
+    nTotalMDs+=N_MAX_PIXEL_MD_PER_MODULES;
 
     if(mdsInGPU == nullptr)
     {
@@ -991,12 +995,9 @@ void SDL::Event::createTriplets()
     {
         tripletsInGPU = (SDL::triplets*)cms::cuda::allocate_host(sizeof(SDL::triplets), stream);
         unsigned int maxTriplets;
-        unsigned int *device_maxTriplets;
-        cudaMalloc((void **)&device_maxTriplets, sizeof(unsigned int));
-        createTripletArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *segmentsInGPU, device_maxTriplets);
-        cudaMemcpyAsync(&maxTriplets,device_maxTriplets,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        createTripletArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *segmentsInGPU);
+        cudaMemcpyAsync(&maxTriplets,rangesInGPU->device_nTotalTrips,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
         cudaStreamSynchronize(stream);
-        cudaFree(device_maxTriplets);
         createTripletsInExplicitMemory(*tripletsInGPU, maxTriplets, nLowerModules,stream);
 
         cudaMemcpyAsync(tripletsInGPU->nMemoryLocations, &maxTriplets, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
@@ -1010,7 +1011,7 @@ void SDL::Event::createTriplets()
     uint16_t *index_gpu;
     index_gpu = (uint16_t*)cms::cuda::allocate_device(dev, nLowerModules*sizeof(uint16_t), stream);
     unsigned int *nSegments = (unsigned int*)malloc(nLowerModules*sizeof(unsigned int));
-    cudaMemcpyAsync((void *)nSegments, segmentsInGPU->nSegments, nLowerModules*sizeof(unsigned int), cudaMemcpyDeviceToHost,stream);
+    cudaMemcpyAsync((void *)nSegments, segmentsInGPU->nSegments, nLowerModules*sizeof(unsigned int), cudaMemcpyDeviceToHost,stream); 
     cudaStreamSynchronize(stream);
 
     uint16_t* module_nConnectedModules;
@@ -1259,15 +1260,12 @@ void SDL::Event::createQuintuplets()
         cudaMalloc(&(rangesInGPU->indicesOfEligibleT5Modules), nLowerModules * sizeof(uint16_t));
 #endif
     cudaMemsetAsync(rangesInGPU->quintupletModuleIndices, -1, sizeof(int) * (nLowerModules),stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
     unsigned int nTotalQuintuplets;
-    unsigned int *device_nTotalQuintuplets;
-    cudaMalloc((void **)&device_nTotalQuintuplets, sizeof(unsigned int));
-    createEligibleModulesListForQuintupletsGPU<<<1,1024,0,stream>>>(*modulesInGPU, *tripletsInGPU, device_nTotalQuintuplets, *rangesInGPU);
+    createEligibleModulesListForQuintupletsGPU<<<1,1024,0,stream>>>(*modulesInGPU, *tripletsInGPU, *rangesInGPU);
     cudaMemcpyAsync(&nEligibleT5Modules,rangesInGPU->nEligibleT5Modules,sizeof(uint16_t),cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(&nTotalQuintuplets,device_nTotalQuintuplets,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
-    cudaFree(device_nTotalQuintuplets);
+    cudaMemcpyAsync(&nTotalQuintuplets,rangesInGPU->device_nTotalQuints,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+    cudaStreamSynchronize(stream);
 
     if(quintupletsInGPU == nullptr)
     {
 
@@ -17,70 +17,60 @@ void SDL::miniDoublets::resetMemory(unsigned int nMemoryLocationsx, unsigned int
 }
 
 
-void SDL::createMDArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, uint16_t& nLowerModules, unsigned int& nTotalMDs, cudaStream_t stream, const unsigned int& maxPixelMDs)
+__global__ void SDL::createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU)//, unsigned int* nTotalMDsx)
 {
-    /*
-        write code here that will deal with importing module parameters to CPU, and get the relevant occupancies for a given module!*/
-
-    int *module_miniDoubletModuleIndices;
-    module_miniDoubletModuleIndices = (int*)cms::cuda::allocate_host((nLowerModules + 1) * sizeof(unsigned int), stream);
-    short* module_subdets;
-    module_subdets = (short*)cms::cuda::allocate_host(nLowerModules* sizeof(short), stream);
-    cudaMemcpyAsync(module_subdets,modulesInGPU.subdets,nLowerModules*sizeof(short),cudaMemcpyDeviceToHost,stream);
-    short* module_layers;
-    module_layers = (short*)cms::cuda::allocate_host(nLowerModules * sizeof(short), stream);
-    cudaMemcpyAsync(module_layers,modulesInGPU.layers,nLowerModules * sizeof(short),cudaMemcpyDeviceToHost,stream);
-    short* module_rings;
-    module_rings = (short*)cms::cuda::allocate_host(nLowerModules * sizeof(short), stream);
-    cudaMemcpyAsync(module_rings,modulesInGPU.rings,nLowerModules * sizeof(short),cudaMemcpyDeviceToHost,stream);
-    float* module_eta;
-    module_eta = (float*)cms::cuda::allocate_host(nLowerModules * sizeof(float), stream);
-    cudaMemcpyAsync(module_eta,modulesInGPU.eta,nLowerModules * sizeof(float),cudaMemcpyDeviceToHost,stream);
-
-    cudaStreamSynchronize(stream);
+    short module_subdets;
+    short module_layers;
+    short module_rings;
+    float module_eta;
 
+    __shared__ unsigned int nTotalMDs; //start!   
     nTotalMDs = 0; //start!   
-    for(uint16_t i = 0; i < nLowerModules; i++)
+    __syncthreads();
+    int gid = blockIdx.x * blockDim.x + threadIdx.x;
+    int np = gridDim.x * blockDim.x;
+    for(uint16_t i = gid; i < *modulesInGPU.nLowerModules; i+= np)
     {
-        module_miniDoubletModuleIndices[i] = nTotalMDs; //running counter - we start at the previous index!
+        module_subdets = modulesInGPU.subdets[i];
+        module_layers = modulesInGPU.layers[i];
+        module_rings = modulesInGPU.rings[i];
+        module_eta = abs(modulesInGPU.eta[i]);
         unsigned int occupancy;
         unsigned int category_number, eta_number;
-        if (module_layers[i]<=3 && module_subdets[i]==5) category_number = 0;
-        if (module_layers[i]>=4 && module_subdets[i]==5) category_number = 1;
-        if (module_layers[i]<=2 && module_subdets[i]==4 && module_rings[i]>=11) category_number = 2;
-        if (module_layers[i]>=3 && module_subdets[i]==4 && module_rings[i]>=8) category_number = 2;
-        if (module_layers[i]<=2 && module_subdets[i]==4 && module_rings[i]<=10) category_number = 3;
-        if (module_layers[i]>=3 && module_subdets[i]==4 && module_rings[i]<=7) category_number = 3;
-
-        if (abs(module_eta[i])<0.75) eta_number=0;
-        if (abs(module_eta[i])>0.75 && abs(module_eta[i])<1.5) eta_number=1;
-        if (abs(module_eta[i])>1.5 && abs(module_eta[i])<2.25) eta_number=2;
-        if (abs(module_eta[i])>2.25 && abs(module_eta[i])<3) eta_number=3;
+        if (module_layers<=3 && module_subdets==5) category_number = 0;
+        else if (module_layers>=4 && module_subdets==5) category_number = 1;
+        else if (module_layers<=2 && module_subdets==4 && module_rings>=11) category_number = 2;
+        else if (module_layers>=3 && module_subdets==4 && module_rings>=8) category_number = 2;
+        else if (module_layers<=2 && module_subdets==4 && module_rings<=10) category_number = 3;
+        else if (module_layers>=3 && module_subdets==4 && module_rings<=7) category_number = 3;
+
+        if (module_eta<0.75) eta_number=0;
+        else if (module_eta>0.75 && module_eta<1.5) eta_number=1;
+        else if (module_eta>1.5  && module_eta<2.25) eta_number=2;
+        else if (module_eta>2.25 && module_eta<3) eta_number=3;
 
         if (category_number == 0 && eta_number == 0) occupancy = 49;
-        if (category_number == 0 && eta_number == 1) occupancy = 42;
-        if (category_number == 0 && eta_number == 2) occupancy = 37;
-        if (category_number == 0 && eta_number == 3) occupancy = 41;
-        if (category_number == 1) occupancy = 100;
-        if (category_number == 2 && eta_number == 1) occupancy = 16;
-        if (category_number == 2 && eta_number == 2) occupancy = 19;
-        if (category_number == 3 && eta_number == 1) occupancy = 14;
-        if (category_number == 3 && eta_number == 2) occupancy = 20;
-        if (category_number == 3 && eta_number == 3) occupancy = 25;
-
-        nTotalMDs += occupancy;
+        else if (category_number == 0 && eta_number == 1) occupancy = 42;
+        else if (category_number == 0 && eta_number == 2) occupancy = 37;
+        else if (category_number == 0 && eta_number == 3) occupancy = 41;
+        else if (category_number == 1) occupancy = 100;
+        else if (category_number == 2 && eta_number == 1) occupancy = 16;
+        else if (category_number == 2 && eta_number == 2) occupancy = 19;
+        else if (category_number == 3 && eta_number == 1) occupancy = 14;
+        else if (category_number == 3 && eta_number == 2) occupancy = 20;
+        else if (category_number == 3 && eta_number == 3) occupancy = 25;
+
+        unsigned int nTotMDs= atomicAdd(&nTotalMDs,occupancy);
+        rangesInGPU.miniDoubletModuleIndices[i] = nTotMDs; 
+        rangesInGPU.miniDoubletModuleOccupancy[i] = occupancy;
+    }
+    __syncthreads();
+    if(threadIdx.x==0){
+      rangesInGPU.miniDoubletModuleIndices[*modulesInGPU.nLowerModules] = nTotalMDs;
+      //*nTotalMDsx=nTotalMDs;
+      *rangesInGPU.device_nTotalMDs=nTotalMDs;
     }
 
-    module_miniDoubletModuleIndices[nLowerModules] = nTotalMDs;
-    nTotalMDs+=maxPixelMDs;
-
-    cudaMemcpyAsync(rangesInGPU.miniDoubletModuleIndices, module_miniDoubletModuleIndices,  (nLowerModules + 1) * sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
-    cudaStreamSynchronize(stream);
-    cms::cuda::free_host(module_miniDoubletModuleIndices);
-    cms::cuda::free_host(module_subdets);
-    cms::cuda::free_host(module_layers);
-    cms::cuda::free_host(module_rings);
-    cms::cuda::free_host(module_eta);
 }
 
 //FIXME:Add memory locations for the pixel MDs here!
@@ -928,7 +918,7 @@ __global__ void SDL::createMiniDoubletsInGPUv2(struct SDL::modules& modulesInGPU
 if(success)
             {
                 unsigned int totOccupancyMDs = atomicAdd(&mdsInGPU.totOccupancyMDs[lowerModuleIndex],1);
-                if(totOccupancyMDs >= (rangesInGPU.miniDoubletModuleIndices[lowerModuleIndex + 1] - rangesInGPU.miniDoubletModuleIndices[lowerModuleIndex]))
+                if(totOccupancyMDs >= (rangesInGPU.miniDoubletModuleOccupancy[lowerModuleIndex]))
                 {
 #ifdef Warnings
                     printf("Mini-doublet excess alert! Module index =  %d\n",lowerModuleIndex);
 
@@ -92,7 +92,7 @@ namespace SDL
     void createMDsInExplicitMemory(struct miniDoublets& mdsInGPU, unsigned int maxMDs,uint16_t nLowerModules, unsigned int maxPixelMDs,cudaStream_t stream);
 
 
-    void createMDArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, uint16_t& nLowerModules, unsigned int& nTotalMDs, cudaStream_t stream, const unsigned int& maxPixelMDs);
+    __global__ void createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU);//, unsigned int* nTotalMDs);
 
     __global__ void addMiniDoubletRangesToEventExplicit(struct modules& modulesInGPU, struct miniDoublets& mdsInGPU, struct objectRanges& rangesInGPU, struct hits& hitsInGPU);
 
 
@@ -31,11 +31,17 @@ void SDL::createRangesInExplicitMemory(struct objectRanges& rangesInGPU,unsigned
     rangesInGPU.quintupletModuleIndices = (int*)cms::cuda::allocate_device(dev,nLowerModules * sizeof(int),stream);
     rangesInGPU.quintupletModuleOccupancy = (int*)cms::cuda::allocate_device(dev,nLowerModules * sizeof(int),stream);
     rangesInGPU.miniDoubletModuleIndices = (int*)cms::cuda::allocate_device(dev, (nLowerModules + 1) * sizeof(int), stream);
+    rangesInGPU.miniDoubletModuleOccupancy = (int*)cms::cuda::allocate_device(dev, (nLowerModules + 1) * sizeof(int), stream);
     rangesInGPU.segmentModuleIndices = (int*)cms::cuda::allocate_device(dev, (nLowerModules + 1) * sizeof(int), stream);
     rangesInGPU.segmentModuleOccupancy = (int*)cms::cuda::allocate_device(dev, (nLowerModules + 1) * sizeof(int), stream);
     rangesInGPU.tripletModuleIndices = (int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(int), stream);
     rangesInGPU.tripletModuleOccupancy = (int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(int), stream);
 
+    rangesInGPU.device_nTotalMDs = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
+    rangesInGPU.device_nTotalSegs = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
+    rangesInGPU.device_nTotalTrips = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
+    rangesInGPU.device_nTotalQuints = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
+
 #else
     cudaMalloc(&rangesInGPU.hitRanges,nModules * 2 * sizeof(int));
     cudaMalloc(&rangesInGPU.hitRangesLower,nModules  * sizeof(int));
@@ -53,10 +59,16 @@ void SDL::createRangesInExplicitMemory(struct objectRanges& rangesInGPU,unsigned
     cudaMalloc(&rangesInGPU.quintupletModuleOccupancy, nLowerModules * sizeof(int));
 
     cudaMalloc(&rangesInGPU.miniDoubletModuleIndices, (nLowerModules + 1) * sizeof(int));
+    cudaMalloc(&rangesInGPU.miniDoubletModuleOccupancy, (nLowerModules + 1) * sizeof(int));
     cudaMalloc(&rangesInGPU.segmentModuleIndices, (nLowerModules + 1) * sizeof(int));
     cudaMalloc(&rangesInGPU.segmentModuleOccupancy, (nLowerModules + 1) * sizeof(int));
     cudaMalloc(&rangesInGPU.tripletModuleIndices, nLowerModules * sizeof(int));
     cudaMalloc(&rangesInGPU.tripletModuleOccupancy, nLowerModules * sizeof(int));
+    
+    cudaMalloc(&rangesInGPU.device_nTotalMDs, sizeof(unsigned int));
+    cudaMalloc(&rangesInGPU.device_nTotalSegs, sizeof(unsigned int));
+    cudaMalloc(&rangesInGPU.device_nTotalTrips, sizeof(unsigned int));
+    cudaMalloc(&rangesInGPU.device_nTotalQuints, sizeof(unsigned int));
 
 #endif
 }
@@ -113,10 +125,15 @@ void SDL::objectRanges::freeMemoryCache()//struct objectRanges& rangesInGPU)
   cms::cuda::free_device(dev, hitRangesnLower);
   cms::cuda::free_device(dev, hitRangesnUpper);
   cms::cuda::free_device(dev, miniDoubletModuleIndices);
+  cms::cuda::free_device(dev, miniDoubletModuleOccupancy);
   cms::cuda::free_device(dev, segmentModuleIndices);
   cms::cuda::free_device(dev, segmentModuleOccupancy);
   cms::cuda::free_device(dev, tripletModuleIndices);
   cms::cuda::free_device(dev, tripletModuleOccupancy);
+  cms::cuda::free_device(dev, device_nTotalMDs);
+  cms::cuda::free_device(dev, device_nTotalSegs);
+  cms::cuda::free_device(dev, device_nTotalTrips);
+  cms::cuda::free_device(dev, device_nTotalQuints);
 }
 void SDL::objectRanges::freeMemory()
 {
@@ -136,10 +153,15 @@ void SDL::objectRanges::freeMemory()
   cudaFree(quintupletModuleIndices);
   cudaFree(quintupletModuleOccupancy);
   cudaFree(miniDoubletModuleIndices);
+  cudaFree(miniDoubletModuleOccupancy);
   cudaFree(segmentModuleIndices);
   cudaFree(segmentModuleOccupancy);
   cudaFree(tripletModuleIndices);
   cudaFree(tripletModuleOccupancy);
+  cudaFree(device_nTotalMDs);
+  cudaFree(device_nTotalSegs);
+  cudaFree(device_nTotalTrips);
+  cudaFree(device_nTotalQuints);
 }
 void SDL::freeModulesCache(struct modules& modulesInGPU,struct pixelMap& pixelMapping)
 {
 
@@ -67,12 +67,16 @@ namespace SDL
         int *quintupletModuleIndices;
         int *quintupletModuleOccupancy;
         int *miniDoubletModuleIndices;
+        int *miniDoubletModuleOccupancy;
         int *segmentModuleIndices;
         int *segmentModuleOccupancy;
         int *tripletModuleIndices;
         int *tripletModuleOccupancy;
 
-//        unsigned int nTotalQuintuplets;
+        unsigned int *device_nTotalMDs;
+        unsigned int *device_nTotalSegs;
+        unsigned int *device_nTotalTrips;
+        unsigned int *device_nTotalQuints;
 
         void freeMemoryCache();
         void freeMemory();