torch::empty is more efficient than torch::zeros

kwea123 · web-flow · commit eb20f9604a35 · 2023-04-17T12:48:54.000+08:00
diff --git a/interpolation_kernel.cu b/interpolation_kernel.cu
@@ -38,7 +38,7 @@ torch::Tensor trilinear_fw_cu(
 ){
     const int N = feats.size(0), F = feats.size(2);
     
-    torch::Tensor feat_interp = torch::zeros({N, F}, feats.options());
+    torch::Tensor feat_interp = torch::empty({N, F}, feats.options());
 
     const dim3 threads(16, 16);
     const dim3 blocks((N+threads.x-1)/threads.x, (F+threads.y-1)/threads.y);
@@ -96,7 +96,7 @@ torch::Tensor trilinear_bw_cu(
 ){
     const int N = feats.size(0), F = feats.size(2);
     
-    torch::Tensor dL_dfeats = torch::zeros({N, 8, F}, feats.options());
+    torch::Tensor dL_dfeats = torch::empty({N, 8, F}, feats.options());
 
     const dim3 threads(16, 16);
     const dim3 blocks((N+threads.x-1)/threads.x, (F+threads.y-1)/threads.y);
@@ -112,4 +112,4 @@ torch::Tensor trilinear_bw_cu(
     }));
 
     return dL_dfeats;
-}
+}