Skip to content

Commit 59016d2

Browse files
committed
Merge commit '6d20adb7043719912b7ac51e8b3e90722c6140c0'
2 parents 9c102e1 + 6d20adb commit 59016d2

File tree

1 file changed

+23
-21
lines changed

1 file changed

+23
-21
lines changed

Diff for: lib/libmarv/src/cudasw4.cuh

+23-21
Original file line numberDiff line numberDiff line change
@@ -2884,27 +2884,29 @@ namespace cudasw4{
28842884
cudaSetDevice(deviceIds[gpu]); CUERR;
28852885
auto& ws = *workingSets[gpu];
28862886

2887-
if(!batchPlansDstInfoVec[gpu][0].isUploaded){
2888-
//all batches for cached db are now resident in gpu memory. update the flags
2889-
if(ws.getNumBatchesInCachedDB() > 0){
2890-
markCachedDBBatchesAsUploaded(gpu);
2891-
2892-
// current offsets in cached db store the offsets for each batch, i.e. for each batch the offsets will start again at 0
2893-
// compute prefix sum to obtain the single-batch offsets
2894-
2895-
cudaMemsetAsync(ws.d_cacheddb->getOffsetData(), 0, sizeof(size_t), ws.workStreamForTempUsage); CUERR;
2896-
2897-
auto d_paddedLengths = thrust::make_transform_iterator(
2898-
ws.d_cacheddb->getLengthData(),
2899-
RoundToNextMultiple<size_t, 4>{}
2900-
);
2901-
2902-
thrust::inclusive_scan(
2903-
thrust::cuda::par_nosync(thrust_async_allocator<char>(ws.workStreamForTempUsage)).on(ws.workStreamForTempUsage),
2904-
d_paddedLengths,
2905-
d_paddedLengths + ws.getNumSequencesInCachedDB(),
2906-
ws.d_cacheddb->getOffsetData() + 1
2907-
);
2887+
if(batchPlansDstInfoVec[gpu].size() > 0){
2888+
if(!batchPlansDstInfoVec[gpu][0].isUploaded){
2889+
//all batches for cached db are now resident in gpu memory. update the flags
2890+
if(ws.getNumBatchesInCachedDB() > 0){
2891+
markCachedDBBatchesAsUploaded(gpu);
2892+
2893+
// current offsets in cached db store the offsets for each batch, i.e. for each batch the offsets will start again at 0
2894+
// compute prefix sum to obtain the single-batch offsets
2895+
2896+
cudaMemsetAsync(ws.d_cacheddb->getOffsetData(), 0, sizeof(size_t), ws.workStreamForTempUsage); CUERR;
2897+
2898+
auto d_paddedLengths = thrust::make_transform_iterator(
2899+
ws.d_cacheddb->getLengthData(),
2900+
RoundToNextMultiple<size_t, 4>{}
2901+
);
2902+
2903+
thrust::inclusive_scan(
2904+
thrust::cuda::par_nosync(thrust_async_allocator<char>(ws.workStreamForTempUsage)).on(ws.workStreamForTempUsage),
2905+
d_paddedLengths,
2906+
d_paddedLengths + ws.getNumSequencesInCachedDB(),
2907+
ws.d_cacheddb->getOffsetData() + 1
2908+
);
2909+
}
29082910
}
29092911
}
29102912
}

0 commit comments

Comments
 (0)