@@ -303,23 +303,23 @@ void SDL::Event::resetEvent()
303303 }
304304 }
305305 if (hitsInGPU){cms::cuda::free_host (hitsInGPU);
306- hitsInGPU = nullptr ;}
306+ hitsInGPU = nullptr ;}
307307 if (mdsInGPU){cms::cuda::free_host (mdsInGPU);
308- mdsInGPU = nullptr ;}
308+ mdsInGPU = nullptr ;}
309309 if (rangesInGPU){cms::cuda::free_host (rangesInGPU);
310- rangesInGPU = nullptr ;}
310+ rangesInGPU = nullptr ;}
311311 if (segmentsInGPU){cms::cuda::free_host (segmentsInGPU);
312- segmentsInGPU = nullptr ;}
312+ segmentsInGPU = nullptr ;}
313313 if (tripletsInGPU){cms::cuda::free_host (tripletsInGPU);
314- tripletsInGPU = nullptr ;}
315- if (quintupletsInGPU){cms::cuda::free_host (quintupletsInGPU);
314+ tripletsInGPU = nullptr ;}
315+ if (quintupletsInGPU){cms::cuda::free_host (quintupletsInGPU);
316316 quintupletsInGPU = nullptr ;}
317317 if (trackCandidatesInGPU){cms::cuda::free_host (trackCandidatesInGPU);
318- trackCandidatesInGPU = nullptr ;}
318+ trackCandidatesInGPU = nullptr ;}
319319 if (pixelTripletsInGPU){cms::cuda::free_host (pixelTripletsInGPU);
320- pixelTripletsInGPU = nullptr ;}
320+ pixelTripletsInGPU = nullptr ;}
321321 if (pixelQuintupletsInGPU){cms::cuda::free_host (pixelQuintupletsInGPU);
322- pixelQuintupletsInGPU = nullptr ;}
322+ pixelQuintupletsInGPU = nullptr ;}
323323
324324 if (hitsInCPU != nullptr )
325325 {
@@ -679,14 +679,11 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
679679 {
680680 mdsInGPU = (SDL::miniDoublets*)cms::cuda::allocate_host (sizeof (SDL::miniDoublets), stream);
681681 unsigned int nTotalMDs;
682- unsigned int *device_nTotalMDs;
683- cudaMalloc ((void **)&device_nTotalMDs, sizeof (unsigned int ));
684682 cudaMemsetAsync (&rangesInGPU->miniDoubletModuleOccupancy [nLowerModules],N_MAX_PIXEL_MD_PER_MODULES, sizeof (unsigned int ),stream);
685- createMDArrayRangesGPU<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *rangesInGPU, device_nTotalMDs);
686- cudaMemcpyAsync (&nTotalMDs,device_nTotalMDs,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
683+ createMDArrayRangesGPU<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *rangesInGPU);
684+ cudaMemcpyAsync (&nTotalMDs,rangesInGPU-> device_nTotalMDs ,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
687685 cudaStreamSynchronize (stream);
688686 nTotalMDs+= N_MAX_PIXEL_MD_PER_MODULES;
689- cudaFree (device_nTotalMDs);
690687 createMDsInExplicitMemory (*mdsInGPU, nTotalMDs, nLowerModules, N_MAX_PIXEL_MD_PER_MODULES,stream);
691688 cudaMemcpyAsync (mdsInGPU->nMemoryLocations , &nTotalMDs, sizeof (unsigned int ), cudaMemcpyHostToDevice, stream);
692689 cudaStreamSynchronize (stream);
@@ -698,13 +695,10 @@ void SDL::Event::addPixelSegmentToEvent(std::vector<unsigned int> hitIndices0,st
698695 // hardcoded range numbers for this will come from studies!
699696 // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously.
700697 // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them
701- unsigned int *device_nTotalSegments;
702- cudaMalloc ((void **)&device_nTotalSegments, sizeof (unsigned int ));
703- createSegmentArrayRanges<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *rangesInGPU, *mdsInGPU, device_nTotalSegments);
704- cudaMemcpyAsync (&nTotalSegments,device_nTotalSegments,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
698+ createSegmentArrayRanges<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *rangesInGPU, *mdsInGPU);
699+ cudaMemcpyAsync (&nTotalSegments,rangesInGPU->device_nTotalSegs ,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
705700 cudaStreamSynchronize (stream);
706701 nTotalSegments += N_MAX_PIXEL_SEGMENTS_PER_MODULE;
707- cudaFree (device_nTotalSegments);
708702 createSegmentsInExplicitMemory (*segmentsInGPU, nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE,stream);
709703
710704 cudaMemcpyAsync (segmentsInGPU->nMemoryLocations , &nTotalSegments, sizeof (unsigned int ), cudaMemcpyHostToDevice, stream);;
@@ -901,14 +895,11 @@ void SDL::Event::createMiniDoublets()
901895{
902896 // hardcoded range numbers for this will come from studies!
903897 unsigned int nTotalMDs;
904- unsigned int *device_nTotalMDs;
905- cudaMalloc ((void **)&device_nTotalMDs, sizeof (unsigned int ));
906898 cudaMemsetAsync (&rangesInGPU->miniDoubletModuleOccupancy [nLowerModules],N_MAX_PIXEL_MD_PER_MODULES, sizeof (unsigned int ),stream);
907- createMDArrayRangesGPU<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *rangesInGPU, device_nTotalMDs );
908- cudaMemcpyAsync (&nTotalMDs,device_nTotalMDs,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
899+ createMDArrayRangesGPU<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *rangesInGPU);
900+ cudaMemcpyAsync (&nTotalMDs,rangesInGPU-> device_nTotalMDs ,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
909901 cudaStreamSynchronize (stream);
910902 nTotalMDs+=N_MAX_PIXEL_MD_PER_MODULES;
911- cudaFree (device_nTotalMDs);
912903
913904 if (mdsInGPU == nullptr )
914905 {
@@ -1004,12 +995,9 @@ void SDL::Event::createTriplets()
1004995 {
1005996 tripletsInGPU = (SDL::triplets*)cms::cuda::allocate_host (sizeof (SDL::triplets), stream);
1006997 unsigned int maxTriplets;
1007- unsigned int *device_maxTriplets;
1008- cudaMalloc ((void **)&device_maxTriplets, sizeof (unsigned int ));
1009- createTripletArrayRanges<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *rangesInGPU, *segmentsInGPU, device_maxTriplets);
1010- cudaMemcpyAsync (&maxTriplets,device_maxTriplets,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
998+ createTripletArrayRanges<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *rangesInGPU, *segmentsInGPU);
999+ cudaMemcpyAsync (&maxTriplets,rangesInGPU->device_nTotalTrips ,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
10111000 cudaStreamSynchronize (stream);
1012- cudaFree (device_maxTriplets);
10131001 createTripletsInExplicitMemory (*tripletsInGPU, maxTriplets, nLowerModules,stream);
10141002
10151003 cudaMemcpyAsync (tripletsInGPU->nMemoryLocations , &maxTriplets, sizeof (unsigned int ), cudaMemcpyHostToDevice, stream);
@@ -1272,15 +1260,12 @@ void SDL::Event::createQuintuplets()
12721260 cudaMalloc (&(rangesInGPU->indicesOfEligibleT5Modules ), nLowerModules * sizeof (uint16_t ));
12731261#endif
12741262 cudaMemsetAsync (rangesInGPU->quintupletModuleIndices , -1 , sizeof (int ) * (nLowerModules),stream);
1275- cudaStreamSynchronize (stream);
1263+ cudaStreamSynchronize (stream);
12761264 unsigned int nTotalQuintuplets;
1277- unsigned int *device_nTotalQuintuplets;
1278- cudaMalloc ((void **)&device_nTotalQuintuplets, sizeof (unsigned int ));
1279- createEligibleModulesListForQuintupletsGPU<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *tripletsInGPU, device_nTotalQuintuplets, *rangesInGPU);
1265+ createEligibleModulesListForQuintupletsGPU<<<1 ,1024 ,0 ,stream>>> (*modulesInGPU, *tripletsInGPU, *rangesInGPU);
12801266 cudaMemcpyAsync (&nEligibleT5Modules,rangesInGPU->nEligibleT5Modules ,sizeof (uint16_t ),cudaMemcpyDeviceToHost,stream);
1281- cudaMemcpyAsync (&nTotalQuintuplets,device_nTotalQuintuplets,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
1282- cudaStreamSynchronize (stream);
1283- cudaFree (device_nTotalQuintuplets);
1267+ cudaMemcpyAsync (&nTotalQuintuplets,rangesInGPU->device_nTotalQuints ,sizeof (unsigned int ),cudaMemcpyDeviceToHost,stream);
1268+ cudaStreamSynchronize (stream);
12841269
12851270 if (quintupletsInGPU == nullptr )
12861271 {
0 commit comments