-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmodel.py
92 lines (73 loc) · 3.53 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.utils.data
import numpy as np
import torch.nn.functional as F
# We copied the TransformerEncoder, TransformerEncoderLayer and MultiheadAttention code from pytorch 1.3 code base
# so we can run with PyTorch >= 1.1.
from transplant_attn.transformer_from_torch import TransformerEncoder, TransformerEncoderLayer
from transplant_attn.MultiheadAttention_from_torch import MultiheadAttention
def run_attn(attn, x, use_ffn):
"""
:param attn: Attention functions, currently support nn.Multihead and TransformerEncoder
:param x: Input embeddings in shape (B, K, N, C), C denotes the No. of channels for each point, e.g. 512.
:param use_ffn: if choose use_ffn, the input is only x
:return: Soft attn output () and weights. The returned weights is None if use TransformerEncoder
"""
attn_out_list = []
weights = None
B = x.shape[0]
if use_ffn:
for b in range(B):
attn_out, weights = attn(x[b]) # x: (K, N, 512), attn_out: (K, N, 512), weights: (N, K, K)
attn_out_list.append(attn_out)
else:
for b in range(B):
attn_out, weights = attn(x[b], x[b], x[b]) # x: (K, N, 512), attn_out: (K, N, 512), weights: (N, K, K)
attn_out_list.append(attn_out)
# The weights are only for debug and visualisation.
# We just return the (N, K) matrix, not the full (N, K, K) tensor.
if weights is not None:
weights = weights[:, 0, :]
x = torch.stack(attn_out_list)
return x, weights
class NINormalNet(nn.Module):
def __init__(self):
super(NINormalNet, self).__init__()
self.conv0 = nn.Conv2d(3, 64, kernel_size=(1, 1))
self.conv1 = nn.Conv2d(64, 256, kernel_size=(1, 1))
self.conv2 = nn.Conv2d(256, 512, kernel_size=(1, 1))
self.bn0 = nn.BatchNorm2d(64)
self.bn1 = nn.BatchNorm2d(256)
self.bn2 = nn.BatchNorm2d(512)
encoder_layer = TransformerEncoderLayer(d_model=512, nhead=8, dim_feedforward=2048, dropout=0.0)
self.attn = TransformerEncoder(encoder_layer, num_layers=1)
self.fc0 = nn.Conv1d(512, 256, kernel_size=1)
self.fc1 = nn.Conv1d(256, 64, kernel_size=1)
self.fc2 = nn.Conv1d(64, 3, kernel_size=1)
self.bn_fc0 = nn.BatchNorm1d(256)
self.bn_fc1 = nn.BatchNorm1d(64)
# the temperature is just a scalar learnable that controls the softmax strength
self.temp = nn.Parameter(torch.tensor(1.0, dtype=torch.float32), requires_grad=True) # (1, )
def forward(self, pts):
"""
:param pts: (B, K, N, 3) input points
:return: (B, N, 3) normals
"""
x = pts.permute(0, 3, 1, 2) # (B, 3, K, N)
x = F.relu(self.bn0(self.conv0(x))) # (B, C, K, N)
x = F.relu(self.bn1(self.conv1(x))) # (B, C, K, N)
x = F.relu(self.bn2(self.conv2(x))) # (B, C, K, N)
# learn a temperature
x = x / self.temp
x, weights = run_attn(attn=self.attn, x=x.permute(0, 2, 3, 1), use_ffn=True)
x, _ = torch.max(x, dim=1) # (B, K, N, 512) -> (B, N, 512)
x = x.transpose(1, 2) # (B, C, N)
x = F.relu(self.bn_fc0(self.fc0(x))) # (B, C, N)
x = F.relu(self.bn_fc1(self.fc1(x))) # (B, C, N)
x = self.fc2(x) # (B, 3, N)
x = x.transpose(1, 2) # (B, N, 3)
# normalise all normal predictions to unit length, we only care about angle in normal estimation task.
x = F.normalize(x, dim=2)
return x, weights