remove cudaMalloc in favor of caching within rangeInGPU

tresreid · tresreid · commit 84326ad00411 · 2023-04-21T11:50:24.000-04:00
diff --git a/SDL/Event.cu b/SDL/Event.cu
@@ -303,23 +303,23 @@ void SDL::Event::resetEvent()
         }
     }
     if(hitsInGPU){cms::cuda::free_host(hitsInGPU);
-    hitsInGPU = nullptr;}
+      hitsInGPU = nullptr;}
     if(mdsInGPU){cms::cuda::free_host(mdsInGPU);
-    mdsInGPU = nullptr;}
+      mdsInGPU = nullptr;}
     if(rangesInGPU){cms::cuda::free_host(rangesInGPU);
-    rangesInGPU = nullptr;}
+      rangesInGPU = nullptr;}
     if(segmentsInGPU){cms::cuda::free_host(segmentsInGPU);
-    segmentsInGPU = nullptr;}
+      segmentsInGPU = nullptr;}
     if(tripletsInGPU){cms::cuda::free_host(tripletsInGPU);
-    tripletsInGPU = nullptr;}
-      if(quintupletsInGPU){cms::cuda::free_host(quintupletsInGPU);
+      tripletsInGPU = nullptr;}
+    if(quintupletsInGPU){cms::cuda::free_host(quintupletsInGPU);
       quintupletsInGPU = nullptr;}
     if(trackCandidatesInGPU){cms::cuda::free_host(trackCandidatesInGPU);
-    trackCandidatesInGPU = nullptr;}
+      trackCandidatesInGPU = nullptr;}
     if(pixelTripletsInGPU){cms::cuda::free_host(pixelTripletsInGPU);
-    pixelTripletsInGPU = nullptr;}
+      pixelTripletsInGPU = nullptr;}
     if(pixelQuintupletsInGPU){cms::cuda::free_host(pixelQuintupletsInGPU);
-    pixelQuintupletsInGPU = nullptr;}
+      pixelQuintupletsInGPU = nullptr;}
 
     if(hitsInCPU != nullptr)
     {
@@ -679,14 +679,11 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
     {
         mdsInGPU = (SDL::miniDoublets*)cms::cuda::allocate_host(sizeof(SDL::miniDoublets), stream);
         unsigned int nTotalMDs;
-        unsigned int *device_nTotalMDs;
-        cudaMalloc((void **)&device_nTotalMDs, sizeof(unsigned int));
         cudaMemsetAsync(&rangesInGPU->miniDoubletModuleOccupancy[nLowerModules],N_MAX_PIXEL_MD_PER_MODULES, sizeof(unsigned int),stream);
-        createMDArrayRangesGPU<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, device_nTotalMDs); 
-        cudaMemcpyAsync(&nTotalMDs,device_nTotalMDs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        createMDArrayRangesGPU<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU);
+        cudaMemcpyAsync(&nTotalMDs,rangesInGPU->device_nTotalMDs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
         cudaStreamSynchronize(stream);
         nTotalMDs+= N_MAX_PIXEL_MD_PER_MODULES;
-        cudaFree(device_nTotalMDs);
         createMDsInExplicitMemory(*mdsInGPU, nTotalMDs, nLowerModules, N_MAX_PIXEL_MD_PER_MODULES,stream);
         cudaMemcpyAsync(mdsInGPU->nMemoryLocations, &nTotalMDs, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
         cudaStreamSynchronize(stream);
@@ -698,13 +695,10 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
         //hardcoded range numbers for this will come from studies!
         // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously.
         // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them
-        unsigned int *device_nTotalSegments;
-        cudaMalloc((void **)&device_nTotalSegments, sizeof(unsigned int));
-        createSegmentArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *mdsInGPU, device_nTotalSegments);
-        cudaMemcpyAsync(&nTotalSegments,device_nTotalSegments,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        createSegmentArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *mdsInGPU);
+        cudaMemcpyAsync(&nTotalSegments,rangesInGPU->device_nTotalSegs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
         cudaStreamSynchronize(stream);
         nTotalSegments += N_MAX_PIXEL_SEGMENTS_PER_MODULE;
-        cudaFree(device_nTotalSegments);
         createSegmentsInExplicitMemory(*segmentsInGPU, nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE,stream);
 
         cudaMemcpyAsync(segmentsInGPU->nMemoryLocations, &nTotalSegments, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);;
@@ -901,14 +895,11 @@ void SDL::Event::createMiniDoublets()
 {
     //hardcoded range numbers for this will come from studies!
     unsigned int nTotalMDs;
-    unsigned int *device_nTotalMDs;
-    cudaMalloc((void **)&device_nTotalMDs, sizeof(unsigned int));
     cudaMemsetAsync(&rangesInGPU->miniDoubletModuleOccupancy[nLowerModules],N_MAX_PIXEL_MD_PER_MODULES, sizeof(unsigned int),stream);
-    createMDArrayRangesGPU<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, device_nTotalMDs); 
-    cudaMemcpyAsync(&nTotalMDs,device_nTotalMDs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+    createMDArrayRangesGPU<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU); 
+    cudaMemcpyAsync(&nTotalMDs,rangesInGPU->device_nTotalMDs,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
     cudaStreamSynchronize(stream);
     nTotalMDs+=N_MAX_PIXEL_MD_PER_MODULES;
-    cudaFree(device_nTotalMDs);
 
     if(mdsInGPU == nullptr)
     {
@@ -1004,12 +995,9 @@ void SDL::Event::createTriplets()
     {
         tripletsInGPU = (SDL::triplets*)cms::cuda::allocate_host(sizeof(SDL::triplets), stream);
         unsigned int maxTriplets;
-        unsigned int *device_maxTriplets;
-        cudaMalloc((void **)&device_maxTriplets, sizeof(unsigned int));
-        createTripletArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *segmentsInGPU, device_maxTriplets);
-        cudaMemcpyAsync(&maxTriplets,device_maxTriplets,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+        createTripletArrayRanges<<<1,1024,0,stream>>>(*modulesInGPU, *rangesInGPU, *segmentsInGPU);
+        cudaMemcpyAsync(&maxTriplets,rangesInGPU->device_nTotalTrips,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
         cudaStreamSynchronize(stream);
-        cudaFree(device_maxTriplets);
         createTripletsInExplicitMemory(*tripletsInGPU, maxTriplets, nLowerModules,stream);
 
         cudaMemcpyAsync(tripletsInGPU->nMemoryLocations, &maxTriplets, sizeof(unsigned int), cudaMemcpyHostToDevice, stream);
@@ -1272,15 +1260,12 @@ void SDL::Event::createQuintuplets()
         cudaMalloc(&(rangesInGPU->indicesOfEligibleT5Modules), nLowerModules * sizeof(uint16_t));
 #endif
     cudaMemsetAsync(rangesInGPU->quintupletModuleIndices, -1, sizeof(int) * (nLowerModules),stream);
-cudaStreamSynchronize(stream);
+    cudaStreamSynchronize(stream);
     unsigned int nTotalQuintuplets;
-    unsigned int *device_nTotalQuintuplets;
-    cudaMalloc((void **)&device_nTotalQuintuplets, sizeof(unsigned int));
-    createEligibleModulesListForQuintupletsGPU<<<1,1024,0,stream>>>(*modulesInGPU, *tripletsInGPU, device_nTotalQuintuplets, *rangesInGPU);
+    createEligibleModulesListForQuintupletsGPU<<<1,1024,0,stream>>>(*modulesInGPU, *tripletsInGPU, *rangesInGPU);
     cudaMemcpyAsync(&nEligibleT5Modules,rangesInGPU->nEligibleT5Modules,sizeof(uint16_t),cudaMemcpyDeviceToHost,stream);
-    cudaMemcpyAsync(&nTotalQuintuplets,device_nTotalQuintuplets,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
-cudaStreamSynchronize(stream);
-    cudaFree(device_nTotalQuintuplets);
+    cudaMemcpyAsync(&nTotalQuintuplets,rangesInGPU->device_nTotalQuints,sizeof(unsigned int),cudaMemcpyDeviceToHost,stream);
+    cudaStreamSynchronize(stream);
 
     if(quintupletsInGPU == nullptr)
     {
diff --git a/SDL/MiniDoublet.cu b/SDL/MiniDoublet.cu
@@ -17,7 +17,7 @@ void SDL::miniDoublets::resetMemory(unsigned int nMemoryLocationsx, unsigned int
 }
 
 
-__global__ void SDL::createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, unsigned int* nTotalMDsx)
+__global__ void SDL::createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU)//, unsigned int* nTotalMDsx)
 {
     short module_subdets;
     short module_layers;
@@ -67,7 +67,8 @@ __global__ void SDL::createMDArrayRangesGPU(struct modules& modulesInGPU, struct
     __syncthreads();
     if(threadIdx.x==0){
       rangesInGPU.miniDoubletModuleIndices[*modulesInGPU.nLowerModules] = nTotalMDs;
-      *nTotalMDsx=nTotalMDs;
+      //*nTotalMDsx=nTotalMDs;
+      *rangesInGPU.device_nTotalMDs=nTotalMDs;
     }
 
 }
diff --git a/SDL/MiniDoublet.cuh b/SDL/MiniDoublet.cuh
@@ -92,7 +92,7 @@ namespace SDL
     void createMDsInExplicitMemory(struct miniDoublets& mdsInGPU, unsigned int maxMDs,uint16_t nLowerModules, unsigned int maxPixelMDs,cudaStream_t stream);
 
 
-    __global__ void createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, unsigned int* nTotalMDs);
+    __global__ void createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU);//, unsigned int* nTotalMDs);
 
     __global__ void addMiniDoubletRangesToEventExplicit(struct modules& modulesInGPU, struct miniDoublets& mdsInGPU, struct objectRanges& rangesInGPU, struct hits& hitsInGPU);
 
diff --git a/SDL/Module.cu b/SDL/Module.cu
@@ -37,6 +37,11 @@ void SDL::createRangesInExplicitMemory(struct objectRanges& rangesInGPU,unsigned
     rangesInGPU.tripletModuleIndices = (int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(int), stream);
     rangesInGPU.tripletModuleOccupancy = (int*)cms::cuda::allocate_device(dev, nLowerModules * sizeof(int), stream);
 
+    rangesInGPU.device_nTotalMDs = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
+    rangesInGPU.device_nTotalSegs = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
+    rangesInGPU.device_nTotalTrips = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
+    rangesInGPU.device_nTotalQuints = (unsigned int*)cms::cuda::allocate_device(dev, sizeof(unsigned int), stream);
+
 #else
     cudaMalloc(&rangesInGPU.hitRanges,nModules * 2 * sizeof(int));
     cudaMalloc(&rangesInGPU.hitRangesLower,nModules  * sizeof(int));
@@ -59,6 +64,11 @@ void SDL::createRangesInExplicitMemory(struct objectRanges& rangesInGPU,unsigned
     cudaMalloc(&rangesInGPU.segmentModuleOccupancy, (nLowerModules + 1) * sizeof(int));
     cudaMalloc(&rangesInGPU.tripletModuleIndices, nLowerModules * sizeof(int));
     cudaMalloc(&rangesInGPU.tripletModuleOccupancy, nLowerModules * sizeof(int));
+    
+    cudaMalloc(&rangesInGPU.device_nTotalMDs, sizeof(unsigned int));
+    cudaMalloc(&rangesInGPU.device_nTotalSegs, sizeof(unsigned int));
+    cudaMalloc(&rangesInGPU.device_nTotalTrips, sizeof(unsigned int));
+    cudaMalloc(&rangesInGPU.device_nTotalQuints, sizeof(unsigned int));
 
 #endif
 }
@@ -120,6 +130,10 @@ void SDL::objectRanges::freeMemoryCache()//struct objectRanges& rangesInGPU)
   cms::cuda::free_device(dev, segmentModuleOccupancy);
   cms::cuda::free_device(dev, tripletModuleIndices);
   cms::cuda::free_device(dev, tripletModuleOccupancy);
+  cms::cuda::free_device(dev, device_nTotalMDs);
+  cms::cuda::free_device(dev, device_nTotalSegs);
+  cms::cuda::free_device(dev, device_nTotalTrips);
+  cms::cuda::free_device(dev, device_nTotalQuints);
 }
 void SDL::objectRanges::freeMemory()
 {
@@ -144,6 +158,10 @@ void SDL::objectRanges::freeMemory()
   cudaFree(segmentModuleOccupancy);
   cudaFree(tripletModuleIndices);
   cudaFree(tripletModuleOccupancy);
+  cudaFree(device_nTotalMDs);
+  cudaFree(device_nTotalSegs);
+  cudaFree(device_nTotalTrips);
+  cudaFree(device_nTotalQuints);
 }
 void SDL::freeModulesCache(struct modules& modulesInGPU,struct pixelMap& pixelMapping)
 {
diff --git a/SDL/Module.cuh b/SDL/Module.cuh
@@ -73,7 +73,10 @@ namespace SDL
         int *tripletModuleIndices;
         int *tripletModuleOccupancy;
 
-//        unsigned int nTotalQuintuplets;
+        unsigned int *device_nTotalMDs;
+        unsigned int *device_nTotalSegs;
+        unsigned int *device_nTotalTrips;
+        unsigned int *device_nTotalQuints;
     
         void freeMemoryCache();
         void freeMemory();
diff --git a/SDL/Quintuplet.cu b/SDL/Quintuplet.cu
@@ -83,7 +83,7 @@ void SDL::quintuplets::freeMemory(cudaStream_t stream)
     cudaStreamSynchronize(stream);
 }
 //TODO:Reuse the track candidate one instead of this!
-__global__ void SDL::createEligibleModulesListForQuintupletsGPU(struct modules& modulesInGPU,struct triplets& tripletsInGPU, unsigned int* device_nTotalQuintuplets, struct objectRanges& rangesInGPU)
+__global__ void SDL::createEligibleModulesListForQuintupletsGPU(struct modules& modulesInGPU,struct triplets& tripletsInGPU, struct objectRanges& rangesInGPU)
 {
     __shared__ int nEligibleT5Modulesx;
     __shared__ unsigned int nTotalQuintupletsx;
@@ -140,7 +140,7 @@ __global__ void SDL::createEligibleModulesListForQuintupletsGPU(struct modules&
     __syncthreads();
     if(threadIdx.x==0){
         *rangesInGPU.nEligibleT5Modules = static_cast<uint16_t>(nEligibleT5Modulesx);
-        *device_nTotalQuintuplets = nTotalQuintupletsx;
+        *rangesInGPU.device_nTotalQuints = nTotalQuintupletsx;
     }
 }
 
diff --git a/SDL/Quintuplet.cuh b/SDL/Quintuplet.cuh
@@ -60,7 +60,7 @@ namespace SDL
 
     void createQuintupletsInExplicitMemory(struct SDL::quintuplets& quintupletsInGPU, const unsigned int& maxQuintuplets, const uint16_t& nLowerModules, const uint16_t& nEligibleModules,cudaStream_t stream);
 
-    __global__ void createEligibleModulesListForQuintupletsGPU(struct modules& modulesInGPU, struct triplets& tripletsInGPU, unsigned int* nTotalQuintuplets, struct objectRanges& rangesInGPU);
+    __global__ void createEligibleModulesListForQuintupletsGPU(struct modules& modulesInGPU, struct triplets& tripletsInGPU, struct objectRanges& rangesInGPU);
     __global__ void addQuintupletRangesToEventExplicit(struct modules& modulesInGPU, struct quintuplets& quintupletsInGPU, struct objectRanges& rangesInGPU);
 
 //  CUDA_DEV void rmQuintupletToMemory(struct SDL::quintuplets& quintupletsInGPU, unsigned int quintupletIndex);
diff --git a/SDL/Segment.cu b/SDL/Segment.cu
@@ -30,7 +30,7 @@ void SDL::segments::resetMemory(unsigned int nMemoryLocationsx, unsigned int nLo
 }
 
 
-__global__ void SDL::createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsInGPU, unsigned int* nTotalSegmentsx)
+__global__ void SDL::createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsInGPU)
 {
     short module_subdets;
     short module_layers;
@@ -88,7 +88,7 @@ __global__ void SDL::createSegmentArrayRanges(struct modules& modulesInGPU, stru
     __syncthreads();
     if(threadIdx.x==0){
       rangesInGPU.segmentModuleIndices[*modulesInGPU.nLowerModules] = nTotalSegments;
-      *nTotalSegmentsx = nTotalSegments;
+      *rangesInGPU.device_nTotalSegs = nTotalSegments;
     }
 }
 
diff --git a/SDL/Segment.cuh b/SDL/Segment.cuh
@@ -74,7 +74,7 @@ namespace SDL
 
     void createSegmentsInExplicitMemory(struct segments& segmentsInGPU, unsigned int maxSegments, uint16_t nLowerModules, unsigned int maxPixelSegments,cudaStream_t stream);
 
-    __global__ void createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsinGPU, unsigned int* nSegments);
+    __global__ void createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsinGPU);
 
 
     __global__ void addSegmentRangesToEventExplicit(struct modules& modulesInGPU, struct segments& segmentsInGPU, struct objectRanges& rangesInGPU);
diff --git a/SDL/Triplet.cu b/SDL/Triplet.cu
@@ -16,7 +16,7 @@ void SDL::triplets::resetMemory(unsigned int maxTriplets, unsigned int nLowerMod
     cudaMemsetAsync(partOfPT3, 0, maxTriplets * sizeof(bool), stream);
 }
 
-__global__ void SDL::createTripletArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct segments& segmentsInGPU, unsigned int* nTotalTripletsx)
+__global__ void SDL::createTripletArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct segments& segmentsInGPU)
 {
 
     short module_subdets;
@@ -71,7 +71,7 @@ __global__ void SDL::createTripletArrayRanges(struct modules& modulesInGPU, stru
     }
     __syncthreads();
     if(threadIdx.x==0){
-      *nTotalTripletsx = nTotalTriplets;
+      *rangesInGPU.device_nTotalTrips = nTotalTriplets;
     }
 }
 
diff --git a/SDL/Triplet.cuh b/SDL/Triplet.cuh
@@ -73,7 +73,7 @@ namespace SDL
         void resetMemory(unsigned int maxTriplets, unsigned int nLowerModules,cudaStream_t stream);
     };
 
-    __global__ void createTripletArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct segments& segmentsInGPU, unsigned int* nTotalTriplets);
+    __global__ void createTripletArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct segments& segmentsInGPU);
     __global__ void addTripletRangesToEventExplicit(struct modules& modulesInGPU, struct triplets& tripletsInGPU, struct objectRanges& rangesInGPU);
 
     void createTripletsInExplicitMemory(struct triplets& tripletsInGPU, unsigned int maxTriplets, uint16_t nLowerModules,cudaStream_t stream);

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@ void SDL::miniDoublets::resetMemory(unsigned int nMemoryLocationsx, unsigned int`
`17`	`17`	`}`
`18`	`18`
`19`	`19`
`20`		`-__global__ void SDL::createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, unsigned int* nTotalMDsx)`
	`20`	`+__global__ void SDL::createMDArrayRangesGPU(struct modules& modulesInGPU, struct objectRanges& rangesInGPU)//, unsigned int* nTotalMDsx)`
`21`	`21`	`{`
`22`	`22`	`short module_subdets;`
`23`	`23`	`short module_layers;`
`@@ -67,7 +67,8 @@ __global__ void SDL::createMDArrayRangesGPU(struct modules& modulesInGPU, struct`
`67`	`67`	`__syncthreads();`
`68`	`68`	`if(threadIdx.x==0){`
`69`	`69`	`rangesInGPU.miniDoubletModuleIndices[*modulesInGPU.nLowerModules] = nTotalMDs;`
`70`		`- *nTotalMDsx=nTotalMDs;`
	`70`	`+ //*nTotalMDsx=nTotalMDs;`
	`71`	`+ *rangesInGPU.device_nTotalMDs=nTotalMDs;`
`71`	`72`	`}`
`72`	`73`
`73`	`74`	`}`
Original file line number	Diff line number	Diff line change
`@@ -83,7 +83,7 @@ void SDL::quintuplets::freeMemory(cudaStream_t stream)`
`83`	`83`	`cudaStreamSynchronize(stream);`
`84`	`84`	`}`
`85`	`85`	`//TODO:Reuse the track candidate one instead of this!`
`86`		`-__global__ void SDL::createEligibleModulesListForQuintupletsGPU(struct modules& modulesInGPU,struct triplets& tripletsInGPU, unsigned int* device_nTotalQuintuplets, struct objectRanges& rangesInGPU)`
	`86`	`+__global__ void SDL::createEligibleModulesListForQuintupletsGPU(struct modules& modulesInGPU,struct triplets& tripletsInGPU, struct objectRanges& rangesInGPU)`
`87`	`87`	`{`
`88`	`88`	`__shared__ int nEligibleT5Modulesx;`
`89`	`89`	`__shared__ unsigned int nTotalQuintupletsx;`
`@@ -140,7 +140,7 @@ __global__ void SDL::createEligibleModulesListForQuintupletsGPU(struct modules&`
`140`	`140`	`__syncthreads();`
`141`	`141`	`if(threadIdx.x==0){`
`142`	`142`	`*rangesInGPU.nEligibleT5Modules = static_cast<uint16_t>(nEligibleT5Modulesx);`
`143`		`- *device_nTotalQuintuplets = nTotalQuintupletsx;`
	`143`	`+ *rangesInGPU.device_nTotalQuints = nTotalQuintupletsx;`
`144`	`144`	`}`
`145`	`145`	`}`
`146`	`146`
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ void SDL::segments::resetMemory(unsigned int nMemoryLocationsx, unsigned int nLo`
`30`	`30`	`}`
`31`	`31`
`32`	`32`
`33`		`-__global__ void SDL::createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsInGPU, unsigned int* nTotalSegmentsx)`
	`33`	`+__global__ void SDL::createSegmentArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct miniDoublets& mdsInGPU)`
`34`	`34`	`{`
`35`	`35`	`short module_subdets;`
`36`	`36`	`short module_layers;`
`@@ -88,7 +88,7 @@ __global__ void SDL::createSegmentArrayRanges(struct modules& modulesInGPU, stru`
`88`	`88`	`__syncthreads();`
`89`	`89`	`if(threadIdx.x==0){`
`90`	`90`	`rangesInGPU.segmentModuleIndices[*modulesInGPU.nLowerModules] = nTotalSegments;`
`91`		`- *nTotalSegmentsx = nTotalSegments;`
	`91`	`+ *rangesInGPU.device_nTotalSegs = nTotalSegments;`
`92`	`92`	`}`
`93`	`93`	`}`
`94`	`94`
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@ void SDL::triplets::resetMemory(unsigned int maxTriplets, unsigned int nLowerMod`
`16`	`16`	`cudaMemsetAsync(partOfPT3, 0, maxTriplets * sizeof(bool), stream);`
`17`	`17`	`}`
`18`	`18`
`19`		`-__global__ void SDL::createTripletArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct segments& segmentsInGPU, unsigned int* nTotalTripletsx)`
	`19`	`+__global__ void SDL::createTripletArrayRanges(struct modules& modulesInGPU, struct objectRanges& rangesInGPU, struct segments& segmentsInGPU)`
`20`	`20`	`{`
`21`	`21`
`22`	`22`	`short module_subdets;`
`@@ -71,7 +71,7 @@ __global__ void SDL::createTripletArrayRanges(struct modules& modulesInGPU, stru`
`71`	`71`	`}`
`72`	`72`	`__syncthreads();`
`73`	`73`	`if(threadIdx.x==0){`
`74`		`- *nTotalTripletsx = nTotalTriplets;`
	`74`	`+ *rangesInGPU.device_nTotalTrips = nTotalTriplets;`
`75`	`75`	`}`
`76`	`76`	`}`
`77`	`77`