DL4DS · jazzmine-p · Mar 29, 2024 · Mar 31, 2024 · Apr 2, 2024
diff --git a/cnnlstm_train.sh b/cnnlstm_train.sh
@@ -2,16 +2,18 @@
 
 # Set SCC project
 #$ -P ds598
+#$ -m beas
+#$ -M [email protected]
 
 module load miniconda
 module load academic-ml/spring-2024
 
 conda activate spring-2024-pyt
 
 # Change this path to point to your project directory
-export PYTHONPATH="/projectnb/ds598/admin/tgardos/sp2024_midterm:$PYTHONPATH" # Set this!!!
+export PYTHONPATH="/projectnb/ds598/students/jazzmine/sp2024_midterm:$PYTHONPATH" # Set this!!!
 
-python -m spacy download en_core_web_sm   # download spacy model
+# python -m spacy download en_core_web_sm   # download spacy model
 python src/cnn_lstm/train.py
 
 ### The command below is used to submit the job to the cluster

diff --git a/demo_test.sh b/demo_test.sh
@@ -2,6 +2,8 @@
 
 # Set SCC project
 #$ -P ds598
+#$ -m beas
+#$ -M [email protected]
 
 module load miniconda
 module load academic-ml/spring-2024
@@ -14,4 +16,4 @@ export PYTHONPATH="/projectnb/ds598/admin/tgardos/sp2024_midterm:$PYTHONPATH" #
 python src/demo_model/test.py
 
 ### The command below is used to submit the job to the cluster
-### qsub -pe omp 4 -P ds598 -l gpus=1 git_test.sh
+### qsub -pe omp 4 -P ds598 -l gpus=1 demo_test.sh
diff --git a/demo_train.sh b/demo_train.sh
@@ -2,15 +2,18 @@
 
 # Set SCC project
 #$ -P ds598
+#$ -m beas
+#$ -M [email protected]
 
 module load miniconda
 module load academic-ml/spring-2024
 
 conda activate spring-2024-pyt
 
 # Change this path to point to your project directory
-export PYTHONPATH="/projectnb/ds598/admin/tgardos/sp2024_midterm:$PYTHONPATH"
+export PYTHONPATH="/projectnb/ds598/students/jazzmine/sp2024_midterm:$PYTHONPATH"
 
+# python -m spacy download en_core_web_sm
 python src/demo_model/train.py
 
 ### The command below is used to submit the job to the cluster

diff --git a/src/base/constants.py b/src/base/constants.py
@@ -5,7 +5,7 @@
 import spacy
 
 # set this path to where you want to save results
-BASE_DIR = "/projectnb/ds598/projects/tgardos/sp2024_midterm/"
+BASE_DIR = "/projectnb/ds598/students/jazzmine/sp2024_midterm/"
 
 # Do not edit. This points to the dataset folder
 DATA_BASE_DIR = "/projectnb/ds598/materials/datasets/vizwiz/captions/"
@@ -53,5 +53,5 @@
 DEMO_STD = np.array([58.395, 57.120, 57.375]) / 255
 
 # SAVE PATHS
-DEMO_SAVE_PATH = BASE_DIR + "RESULTS/git"
+DEMO_SAVE_PATH = BASE_DIR + "RESULTS/blip-1e-5"
 CNNLSTM_SAVE_PATH = BASE_DIR + "RESULTS/cnn_lstm"
diff --git a/src/demo_model/test.py b/src/demo_model/test.py
@@ -13,6 +13,7 @@
 import os
 import json
 from tqdm import tqdm
+from transformers import BlipProcessor, BlipForConditionalGeneration
 
 CACHE_DIR = os.environ.get("TRANSFORMERS_CACHE")
 
@@ -23,7 +24,8 @@
 MODEL_PATH = f"{DEMO_SAVE_PATH}/best_model"
 
 # Load your fine tuned model
-model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, cache_dir=CACHE_DIR)
+#model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, cache_dir=CACHE_DIR)
+model = BlipForConditionalGeneration.from_pretrained(MODEL_PATH, cache_dir=CACHE_DIR)
 
 ## TODO
 # You can use the AutoProcessor.from_pretrained() method to load the HuggingFace
@@ -33,7 +35,8 @@
 #
 # Of course you should use the same model you trained with.
 try:
-    processor = AutoProcessor.from_pretrained("replace-with-model-choice", cache_dir=CACHE_DIR)
+    #processor = AutoProcessor.from_pretrained("replace-with-model-choice", cache_dir=CACHE_DIR)
+    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base", cache_dir=CACHE_DIR)
 except Exception as e:
     print("You need to pick a pre-trained model from HuggingFace.")
     print("Exception: ", e)

diff --git a/src/demo_model/train.py b/src/demo_model/train.py
@@ -12,6 +12,9 @@
 import matplotlib.pyplot as plt
 import os
 import json
+import torch.optim as optim
+from transformers import BlipProcessor, BlipForConditionalGeneration
+
 
 ################################################################################
 # This is template code that will not run as is since a model is not defined but
@@ -32,7 +35,8 @@
 # to encode and decode text and images.
 # https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoProcessor
 try:
-    processor = AutoProcessor.from_pretrained("replace-with-model-choice", cache_dir=CACHE_DIR)
+    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base", cache_dir=CACHE_DIR)
+    #processor = AutoProcessor.from_pretrained("microsoft/git-large", cache_dir=CACHE_DIR)
 except Exception as e:
     print("You need to pick a pre-trained model from HuggingFace.")
     print("Exception: ", e)
@@ -51,10 +55,10 @@
 )
 
 ### Use the Subset while debugging ###
-# train_dataset = Subset(train_dataset, range(100))
-# val_dataset = Subset(val_dataset, range(10))
+#train_dataset = Subset(train_dataset, range(10))
+#val_dataset = Subset(val_dataset, range(10))
 
-### Since, subset is used above, the dataset object needs to be called with a .dataset, to access the original dataset. So while using the full dataset, the below is done. ###
+# ### Since, subset is used above, the dataset object needs to be called with a .dataset, to access the original dataset. So while using the full dataset, the below is done. ###
 train_dataset = Subset(train_dataset, range(len(train_dataset)))
 val_dataset = Subset(val_dataset, range(len(val_dataset)))
 
@@ -64,23 +68,23 @@
 print("SANITY CHECK DONE!!")
 
 
-train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=8)
-val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=32)
+train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=4)
+val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=16)
 
 ## TODO
 # You can use the AutoModelForCausalLM.from_pretrained() method to load the HuggingFace
 # model you want to fine-tune. This will allow you to use the model to train and evaluate
 # on the VizWiz dataset.
 try:
-    model = AutoModelForCausalLM.from_pretrained("replace-with-model-choice", cache_dir=CACHE_DIR)
+    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", cache_dir=CACHE_DIR)
+    #model = AutoModelForCausalLM.from_pretrained("microsoft/git-large", cache_dir=CACHE_DIR)
 except Exception as e:
     print("You need to pick a pre-trained model from HuggingFace.")
     print("Exception: ", e)
 
 ## TODO Select your model optimizer
 try:
-    raise NotImplementedError("Select your model optimizer")
-    optimizer = None   # pick one from torch.optim
+    optimizer = optim.AdamW(model.parameters(), lr=1e-5)   
 except Exception as e:
     print("You need to pick an optimizer from torch.optim.")
     print("Exception: ", e)
@@ -103,11 +107,12 @@ def train(loger, train_dataloader, model, optimizer, device, processor):
     for idx, batch in progress_bar:
         input_ids = batch.pop("input_ids").to(device)
         pixel_values = batch.pop("pixel_values").to(device)
+        attn_mask = batch.pop("attention_mask").to(device)
 
         optimizer.zero_grad()
 
         outputs = model(
-            input_ids=input_ids, pixel_values=pixel_values, labels=input_ids
+            input_ids=input_ids, pixel_values=pixel_values, labels=input_ids, attention_mask=attn_mask
         )
 
         loss = outputs.loss
@@ -122,7 +127,6 @@ def train(loger, train_dataloader, model, optimizer, device, processor):
 
     return loss.item()
 
-
 def evaluate(
     logger, epoch, save_path, best_score, val_dataloader, model, processor, device
 ):
@@ -132,9 +136,15 @@ def evaluate(
     for idx, batch in enumerate(val_dataloader):
         image_ids = batch.pop("image_ids").to(device)
         pixel_values = batch.pop("pixel_values").to(device)
+        attn_mask = batch.pop("attention_mask").to(device)
 
         with torch.no_grad():
-            outputs = model.generate(pixel_values=pixel_values, max_length=50)
+            if torch.cuda.device_count() > 1:
+	            outputs = model.module.generate(pixel_values=pixel_values, max_length=50)
+            else:
+                outputs = model.generate(
+                    pixel_values=pixel_values,
+                    max_length=50)
 
         # Decode the generated ids to text
         generated_captions = processor.batch_decode(outputs, skip_special_tokens=True)
@@ -156,7 +166,7 @@ def evaluate(
     vizwizEval = VizWizEvalCap(val_dataset.dataset.vizwiz, vizwizRes)
     vizwizEval.evaluate()
 
-    logger.info(f"Validation scores at epoch: {epoch}")
+    logger.info(f"Validation scores at epoch: {epoch+1}")
     for method in vizwizEval.eval:
         logger.info(f"  Method: {method}, Score: {vizwizEval.eval[method]:.4f}")
 
@@ -223,7 +233,7 @@ def get_val_examples(vizwizEval, vizwizRes, plot_captions_dict, epoch, method="C
 
 
 best_score = 0
-for epoch in range(3):
+for epoch in range(5):
     print(f"Epoch: {epoch+1}")
     # Wrap the dataloader with tqdm for a progress bar
     progress_bar = tqdm(
@@ -232,10 +242,10 @@ def get_val_examples(vizwizEval, vizwizRes, plot_captions_dict, epoch, method="C
 
     # Train the model
     loss = train(logger, train_dataloader, model, optimizer, device, processor)
-    logger.info(f"Loss at epoch {epoch}: {loss}")
+    logger.info(f"Loss at epoch {epoch+1}: {loss}")
 
-    # Evaluate the model every 3 epochs
-    if epoch % 3 == 0:
+    # Evaluate the model every epoch
+    if epoch % 1 == 0:
         vizwizEval, vizwizRes, plot_captions_dict = evaluate(
             logger,
             epoch,
@@ -249,7 +259,11 @@ def get_val_examples(vizwizEval, vizwizRes, plot_captions_dict, epoch, method="C
         score = vizwizEval.eval[method]
         if score > best_score:
             best_score = score
-            model.save_pretrained(f"{DEMO_SAVE_PATH}/best_model")
+            if torch.cuda.device_count() > 1:
+	            model.module.save_pretrained(f"{DEMO_SAVE_PATH}/best_model")
+            else:
+                model.save_pretrained(f"{DEMO_SAVE_PATH}/best_model")
+
             logger.info(f"New best score: {best_score}. Model saved")
 
         get_val_examples(vizwizEval, vizwizRes, plot_captions_dict, epoch, method)