add 4th video content

kwea123 · kwea123 · commit e53393c664f0 · 2022-08-16T11:56:58.000+09:00
diff --git a/interpolation_kernel.cu b/interpolation_kernel.cu
@@ -1,9 +1,40 @@
 #include <torch/extension.h>
 
 
+template <typename scalar_t>
+__global__ void trilinear_fw_kernel(
+    const torch::PackedTensorAccessor<scalar_t, 3, torch::RestrictPtrTraits, size_t> feats,
+    const torch::PackedTensorAccessor<scalar_t, 2, torch::RestrictPtrTraits, size_t> points,
+    torch::PackedTensorAccessor<scalar_t, 2, torch::RestrictPtrTraits, size_t> feat_interp
+){
+    const int n = blockIdx.x * blockDim.x + threadIdx.x;
+    const int f = blockIdx.y * blockDim.y + threadIdx.y;
+
+    if (n>=feats.size(0) || f>=feats.size(2)) return;
+
+    // point -1~1
+    const scalar_t u = (points[n][0]+1)/2;
+    const scalar_t v = (points[n][1]+1)/2;
+    const scalar_t w = (points[n][2]+1)/2;
+    
+    const scalar_t a = (1-v)*(1-w);
+    const scalar_t b = (1-v)*w;
+    const scalar_t c = v*(1-w);
+    const scalar_t d = 1-a-b-c;
+    feat_interp[n][f] = (1-u)*(a*feats[n][0][f] +
+                                b*feats[n][1][f] +
+                                c*feats[n][2][f] +
+                                d*feats[n][3][f]) + 
+                            u*(a*feats[n][4][f] +
+                                b*feats[n][5][f] +
+                                c*feats[n][6][f] +
+                                d*feats[n][7][f]);
+}
+
+
 torch::Tensor trilinear_fw_cu(
     torch::Tensor feats,
-    torch::Tensor points
+    torch::Tensor points,
 ){
     const int N = feats.size(0), F = feats.size(2);