Skip to content

Commit eb20f96

Browse files
authored
torch::empty is more efficient than torch::zeros
1 parent 4108dd9 commit eb20f96

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

interpolation_kernel.cu

+3-3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ torch::Tensor trilinear_fw_cu(
3838
){
3939
const int N = feats.size(0), F = feats.size(2);
4040

41-
torch::Tensor feat_interp = torch::zeros({N, F}, feats.options());
41+
torch::Tensor feat_interp = torch::empty({N, F}, feats.options());
4242

4343
const dim3 threads(16, 16);
4444
const dim3 blocks((N+threads.x-1)/threads.x, (F+threads.y-1)/threads.y);
@@ -96,7 +96,7 @@ torch::Tensor trilinear_bw_cu(
9696
){
9797
const int N = feats.size(0), F = feats.size(2);
9898

99-
torch::Tensor dL_dfeats = torch::zeros({N, 8, F}, feats.options());
99+
torch::Tensor dL_dfeats = torch::empty({N, 8, F}, feats.options());
100100

101101
const dim3 threads(16, 16);
102102
const dim3 blocks((N+threads.x-1)/threads.x, (F+threads.y-1)/threads.y);
@@ -112,4 +112,4 @@ torch::Tensor trilinear_bw_cu(
112112
}));
113113

114114
return dL_dfeats;
115-
}
115+
}

0 commit comments

Comments
 (0)