got some preliminary training going. The loss kept increasing in the negative direction with negative values. Will start implementing pixel normalization and use dice loss instead of binary cross-entropy loss.

harshakmohan · harshakmohan · commit 0716e8c39c40 · 2022-02-10T16:22:51.000-05:00
diff --git a/requirements.txt b/requirements.txt
@@ -8,4 +8,5 @@ Pillow
 scipy
 argparse
 albumentations
-pytorch3d
+pytorch3d
+cuda-python
diff --git a/tutorial/dataset.py b/tutorial/dataset.py
@@ -2,7 +2,9 @@
 from PIL import Image
 from torch.utils.data import Dataset
 import numpy as np
+import torch
 import scipy
+import torchvision.transforms.functional as TF
 
 
 # def readKinematics(path):
@@ -30,8 +32,15 @@ def __getitem__(self, index):
         img_path = os.path.join(self.image_dir, self.images[index])
         mask_path = os.path.join(self.mask_dir, self.images[index])
 
-        image = np.array(Image.open(img_path).convert('RGB'))
-        mask = np.array(Image.open(mask_path).convert('L'), dtype=np.float32)
+        #image = TF.pil_to_tensor(Image.open(img_path).convert('RGB'))
+        #mask = TF.pil_to_tensor(Image.open(mask_path).convert('L'))
+
+        image = torch.from_numpy(np.array(Image.open(img_path).convert('RGB'))/255.0).float()
+        image = torch.permute(image, (2, 0, 1))
+        mask = torch.from_numpy(np.array(Image.open(mask_path).convert('L'), dtype=np.float32)/255.0).float()
+
+        #print('image dim: ', image.size())
+        #print('mask dim: ', mask.size())
 
         #mask[mask == 255.0] = 1.0
 
diff --git a/tutorial/train.py b/tutorial/train.py
@@ -11,7 +11,7 @@
 # Hyperparameters
 LEARNING_RATE = 1e-4
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-BATCH_SIZE = 10
+BATCH_SIZE = 5
 NUM_EPOCHS = 3
 NUM_WORKERS = 2 # What does this do?
 IMAGE_HEIGHT = 538  # 1280 originally
@@ -47,7 +47,7 @@ def train_fn(loader, model, optimizer, loss_fn, scaler):
 def main():
 
     model = UNET(in_channels=3, out_channels=1).to(device=DEVICE)
-    loss_fn = nn.BCEWithLogitsLoss()
+    loss_fn = nn.BCEWithLogitsLoss() # LOSS FUNCTION DEFINED HERE. Perhaps change it to dice score
     optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
 
     train_loader, val_loader = get_loaders(train_dir=TRAIN_IMG_DIR,
@@ -63,6 +63,7 @@ def main():
     scaler = torch.cuda.amp.GradScaler()
 
     for epoch in range(NUM_EPOCHS):
+        print(type(train_loader))
         train_fn(train_loader, model, optimizer, loss_fn, scaler)
 
         # save model