initial commit

ZeroAct · ZeroAct · commit 31357410413c · 2021-05-22T17:34:56.000+09:00
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,8 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# etc
+.png
+.jpg
+.pth
diff --git a/create_dataset.py b/create_dataset.py
@@ -0,0 +1,111 @@
+import os
+import cv2
+import glob
+import random
+import progressbar
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+rand_color = lambda : (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
+rand_pos   = lambda a, b: (random.randint(a, b-1), random.randint(a, b-1))
+
+target_size = 256
+imgs_per_back = 30
+
+backs = glob.glob('./dataset/backs/*.png')
+fonts = glob.glob('./dataset/font_mask/*.png')
+
+os.makedirs('./dataset/train/I', exist_ok=True)
+os.makedirs('./dataset/train/Itegt', exist_ok=True)
+os.makedirs('./dataset/train/Mm', exist_ok=True)
+os.makedirs('./dataset/train/Msgt', exist_ok=True)
+
+os.makedirs('./dataset/val/I', exist_ok=True)
+os.makedirs('./dataset/val/Itegt', exist_ok=True)
+os.makedirs('./dataset/val/Mm', exist_ok=True)
+os.makedirs('./dataset/val/Msgt', exist_ok=True)
+
+t_idx = len(os.listdir('./dataset/train/I'))
+v_idx = len(os.listdir('./dataset/val/I'))
+
+bar = progressbar.ProgressBar(maxval=len(backs)*imgs_per_back)
+bar.start()
+for back in backs:
+    back_img = cv2.imread(back)
+    bh, bw, _ = back_img.shape
+    if bh < target_size or bw < target_size:
+        back_img = cv2.resize(back_img, (target_size, target_size), interpolation=cv2.INTER_CUBIC)
+        bh, bw, _ = back_img.shape
+
+    for bi in range(imgs_per_back):
+        sx, sy = random.randint(0, bw-target_size), random.randint(0, bh-target_size)
+        
+        Itegt = back_img[sy:sy+target_size, sx:sx+target_size, :].copy()
+        I     = Itegt.copy()
+        Mm    = np.zeros_like(I)
+        Msgt  = np.zeros_like(I)
+        
+        hist = []
+        for font in random.sample(fonts, random.randint(2, 4)):
+            font_img = cv2.imread(font)
+            mask_img = np.ones_like(font_img, dtype=np.uint8)*255
+            
+            height, width, _ = font_img.shape
+            
+            angle = random.randint(-30, +30)
+            fs = random.randint(90, 120)
+            ratio = fs / height - 0.2
+            
+            matrix = cv2.getRotationMatrix2D((width/2, height/2), angle, ratio)
+            font_rot = cv2.warpAffine(font_img, matrix, (width, height), cv2.INTER_CUBIC)
+            mask_rot = cv2.warpAffine(mask_img, matrix, (width, height), cv2.INTER_CUBIC)
+            
+            h, w, _ = font_rot.shape
+            
+            font_in_I = np.zeros_like(I)
+            mask_in_I = np.zeros_like(I)
+            
+            allow = 0
+            while True:
+                sx, sy = rand_pos(0, target_size-w)
+                
+                done = True
+                for sx_, sy_ in hist:
+                    if (sx_ - sx)**2 + (sy_ - sy)**2 < (fs * ratio)**2 - allow:
+                        done = False
+                        break
+                allow += 5
+                
+                if done:
+                    hist.append([sx, sy])
+                    break
+            
+            font_in_I[sy:sy+h, sx:sx+w, :] = font_rot
+            mask_in_I[sy:sy+h, sx:sx+w, :] = mask_rot
+            
+            font_in_I[font_in_I > 30] = 255
+            mask_in_I[mask_in_I > 30] = 255
+            
+            I = cv2.bitwise_and(I, 255-font_in_I)
+            I = cv2.bitwise_or(I, (font_in_I // 255 * rand_color()).astype(np.uint8))
+            
+            Mm = cv2.bitwise_or(Mm, mask_in_I)
+            Msgt = cv2.bitwise_or(Msgt, font_in_I)
+        
+        if bi < imgs_per_back*0.8:
+            cv2.imwrite(f'dataset/train/I/{t_idx}.png', I)
+            cv2.imwrite(f'dataset/train/Itegt/{t_idx}.png', Itegt)
+            cv2.imwrite(f'dataset/train/Mm/{t_idx}.png', Mm)
+            cv2.imwrite(f'dataset/train/Msgt/{t_idx}.png', Msgt)
+            t_idx += 1
+        else:
+            cv2.imwrite(f'dataset/val/I/{v_idx}.png', I)
+            cv2.imwrite(f'dataset/val/Itegt/{v_idx}.png', Itegt)
+            cv2.imwrite(f'dataset/val/Mm/{v_idx}.png', Mm)
+            cv2.imwrite(f'dataset/val/Msgt/{v_idx}.png', Msgt)
+            v_idx += 1
+            
+        bar.update(t_idx + v_idx)
+bar.finish()
diff --git a/dataset.py b/dataset.py
@@ -0,0 +1,75 @@
+import os, cv2
+import numpy as np
+
+import torch
+from torch.utils.data import Dataset
+
+def mat_to_tensor(mat):
+    mat = mat.transpose((2, 0, 1))
+    tensor = torch.Tensor(mat)
+    return tensor
+
+def tensor_to_mat(tensor):
+    mat = tensor.detach().cpu().numpy()
+    mat = mat.transpose((0, 2, 3, 1))
+    return mat
+
+def preprocess_image(img, target_shape: tuple):
+    img = cv2.resize(img, target_shape, interpolation=cv2.INTER_CUBIC).astype(np.float32)
+    img = img / 255.
+    if len(img.shape) == 2:
+        img = img.reshape(*img.shape, 1)
+    
+    return img
+
+def postprocess_image(img):
+    # img = img * 255
+    img = (img - img.min()) / (img.max() - img.min()) * 255
+    return img.astype(np.uint8)
+
+class CustomDataset(Dataset):
+    def __init__(self,
+                 data_dir,
+                 set_name="train",
+                 target_size=(256, 256)):
+        
+        super().__init__()
+        
+        self.root_dir = os.path.join(data_dir, set_name)
+        self.target_size = target_size
+        
+        self.I_dir = os.path.join(self.root_dir, "I")
+        self.Itegt_dir = os.path.join(self.root_dir, "Itegt")
+        self.Mm_dir = os.path.join(self.root_dir, "Mm")
+        self.Msgt_dir = os.path.join(self.root_dir, "Msgt")
+        
+        self.datas = os.listdir(self.I_dir)
+        
+    def __len__(self):
+        return len(self.datas)
+    
+    def __getitem__(self, idx):
+        img_name = self.datas[idx]
+        
+        I      = cv2.imread(os.path.join(self.I_dir, img_name))
+        Itegt  = cv2.imread(os.path.join(self.Itegt_dir, img_name))
+        Mm     = cv2.imread(os.path.join(self.Mm_dir, img_name), cv2.IMREAD_GRAYSCALE)
+        Msgt   = cv2.imread(os.path.join(self.Msgt_dir, img_name), cv2.IMREAD_GRAYSCALE)
+        
+        I      = mat_to_tensor(preprocess_image(I,     self.target_size))
+        Itegt  = mat_to_tensor(preprocess_image(Itegt, self.target_size))
+        Mm     = mat_to_tensor(preprocess_image(Mm,    self.target_size))
+        Msgt   = mat_to_tensor(preprocess_image(Msgt,  self.target_size))
+        
+        return I, Itegt, Mm, Msgt
+        
+
+if __name__ == "__main__":
+    ds = CustomDataset('dataset', 'train')
+    
+    I, Itegt, Mm, Ms = ds.__getitem__(0)
+    print(f"Dataset length : {len(ds)}")
+    print(f"I shape : {I.shape}")
+    print(f"Itegt shape : {Itegt.shape}")
+    print(f"Mm shape : {Mm.shape}")
+    print(f"Ms shape : {Ms.shape}")
diff --git a/losses.py b/losses.py
@@ -0,0 +1,16 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+def TSDLoss(Mgt, Ms, Ms_, r=10):
+    return torch.mean(torch.abs(Ms-Mgt) + r * torch.abs(Ms_-Mgt))
+
+def TRGLoss(Mm, Ms, Ms_, Itegt, Ite, Ite_, rm=5, rs=5, rr=10):
+    
+    Mw  = torch.ones_like(Mm) + rm * Mm + rs * Ms
+    Mw_ = torch.ones_like(Mm) + rm * Mm + rs * Ms_
+    
+    Ltrg = torch.mean(torch.abs(torch.mul(Ite, Mw) - torch.mul(Itegt, Mw)) + \
+                     rr * torch.abs(torch.mul(Ite_, Mw_) - torch.mul(Itegt, Mw_)))
+    
+    return Ltrg
diff --git a/modules.py b/modules.py
@@ -0,0 +1,139 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# dis_conv 
+# (https://github.com/JiahuiYu/generative_inpainting/blob/3a5324373ba52c68c79587ca183bc10b9e57b783/inpaint_ops.py#L84)
+class _dis_conv(nn.Module):
+    
+    def __init__(self, in_channels, out_channels, kernel_size=5, stride=2, padding=2):
+        super().__init__()
+        
+        self._conv = nn.Sequential(
+            nn.utils.spectral_norm(
+                nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
+            ),
+            nn.LeakyReLU(inplace=True)
+        )
+        
+        # weight initialization
+        def weight_init(m):
+            if isinstance(m, nn.Conv2d):
+                # nn.utils.spectral_norm(m.weight)
+                nn.init.zeros_(m.bias)
+        
+        self.apply(weight_init)
+
+    def forward(self, x):
+        return self._conv(x)
+
+# weights are fixed to one, bias to zero
+class _one_conv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=5, stride=2, padding=2):
+        super().__init__()
+        
+        self._conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
+        )
+        
+        # weight initialization
+        def weight_init(m):
+            if isinstance(m, nn.Conv2d):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+                m.weight.requires_grad = False
+                m.bias.requires_grad = False
+        
+        self.apply(weight_init)
+
+    def forward(self, x):
+        return self._conv(x)
+
+class _double_conv2d(nn.Module):
+    
+    def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, mid_channels=None):
+        super().__init__()
+        
+        if not mid_channels:
+            mid_channels = out_channels
+            
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, mid_channels, kernel_size=kernel_size, padding=padding),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+            
+            nn.Conv2d(mid_channels, out_channels, kernel_size=kernel_size, padding=padding),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+        
+        # weight initialization
+        def weight_init(m):
+            if isinstance(m, nn.Conv2d):
+                nn.init.xavier_normal_(m.weight, gain=nn.init.calculate_gain('relu'))
+                nn.init.zeros_(m.bias)
+        
+        self.apply(weight_init)
+
+    def forward(self, x):
+        return self.double_conv(x)
+
+
+class _down_conv2d(nn.Module):
+    
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size):
+        
+        super().__init__()
+        
+        self.seq_model = nn.Sequential(
+                nn.MaxPool2d(2),
+                _double_conv2d(in_channels, out_channels)
+            )
+        
+        
+    def forward(self, x):
+        return self.seq_model(x)
+
+
+class _up_conv2d(nn.Module):
+    
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size):
+        
+        super().__init__()
+        
+        self.conv_t = nn.ConvTranspose2d(in_channels, in_channels//2, 2, 2)
+        self.conv   = _double_conv2d(in_channels, out_channels)
+        
+    # x1 : input, x2 : matching down_conv2d output
+    def forward(self, x1, x2):
+        x1 = self.conv_t(x1)
+        
+        diffY = x2.size()[2] - x1.size()[2]
+        diffX = x2.size()[3] - x1.size()[3]
+
+        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
+                        diffY // 2, diffY - diffY // 2])
+        
+        x = torch.cat([x2, x1], dim=1)
+        return self.conv(x)
+
+
+class _final_conv2d(nn.Module):
+    
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size):
+        
+        super().__init__()
+        
+        self.conv = nn.Conv2d(in_channels, out_channels, 1, 1)
+        
+    def forward(self, x):
+        return self.conv(x)
diff --git a/network.py b/network.py
diff --git a/train.py b/train.py

-Original file line number
+Diff line change
 # Pyre type checker
 .pyre/
++
 +# etc
 +.png
 +.jpg
 +.pth