DL4DS · HangyuDemo · Apr 1, 2024
diff --git a/README.md b/README.md
@@ -116,4 +116,6 @@ State-of-the-art CIDEr-D scores on VizWiz Image Captioning is ~125. We're asking
 3. [BLEU Metric](https://huggingface.co/spaces/evaluate-metric/bleu), HuggingFace space
 
 
+I tried a lot of models, including BILP, GPT, CLIP, CLVP, but they all reported errors during operation. It took me a lot of time. Finally, I chose the git model, My optimizer is set to Optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=1e-6, dampening=0, nesterov=True).
 
+I also changed the database loading method to inputs = self.processor(images=img, text=caption, padding="max_length", return_tensors="pt"), attention_mask was added to encoding.
diff --git a/cnnlstm_test.sh b/cnnlstm_test.sh
diff --git a/cnnlstm_train.sh b/cnnlstm_train.sh
diff --git a/demo_train.sh b/demo_train.sh
@@ -3,15 +3,21 @@
 # Set SCC project
 #$ -P ds598
 
+# load and activate the academic-ml conda environment on SCC
 module load miniconda
 module load academic-ml/spring-2024
-
 conda activate spring-2024-pyt
 
-# Change this path to point to your project directory
-export PYTHONPATH="/projectnb/ds598/admin/tgardos/sp2024_midterm:$PYTHONPATH"
 
-python src/demo_model/train.py
+# Add the path to your source project directory to the python search path
+# so that the local `import` commands will work.
+export PYTHONPATH="/projectnb/ds598/students/demoyu/sp2024_midterm:$PYTHONPATH"
+
+# Update this path to point to your training file
+python src/demo_model/test_train.py
+python src/demo_model/test.py
 
-### The command below is used to submit the job to the cluster
-### qsub -pe omp 4 -P ds598 -l gpus=1 demo_train.sh
+# After updating the two paths above, run the command below from an SCC
+# command prompt in the same directory as this file to submit this as a
+# batch job.
+### qsub -pe omp 4 -P ds598 -l gpus=1 train.sh
diff --git a/src/base/constants.py b/src/base/constants.py
@@ -5,23 +5,23 @@
 import spacy
 
 # set this path to where you want to save results
-BASE_DIR = "/projectnb/ds598/projects/tgardos/sp2024_midterm/"
+BASE_DIR = "/projectnb/ds598/students/demoyu/sp2024_midterm/"
 
 # Do not edit. This points to the dataset folder
 DATA_BASE_DIR = "/projectnb/ds598/materials/datasets/vizwiz/captions/"
 
-os.environ["SPACY_DATA"] = BASE_DIR + "/misc/spacy_data"
+os.environ["SPACY_DATA"] = BASE_DIR + "/misc2/spacy_data"
 
-nltk_data_directory = BASE_DIR + "misc/nltk_data"
+nltk_data_directory = BASE_DIR + "misc2/nltk_data"
 nltk.data.path.append(nltk_data_directory)
 nltk.download("punkt", download_dir=nltk_data_directory)
 
 # Set the Transformers cache directory
-os.environ["TRANSFORMERS_CACHE"] = BASE_DIR + "misc"
+os.environ["TRANSFORMERS_CACHE"] = BASE_DIR + "misc2"
 
 # Set the Hugging Face home directory (this includes datasets cache)
-os.environ["HF_HOME"] = BASE_DIR + "misc"
-os.environ["TORCH_HOME"] = BASE_DIR + "misc"
+os.environ["HF_HOME"] = BASE_DIR + "misc2"
+os.environ["TORCH_HOME"] = BASE_DIR + "misc2"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
 # DATA PATHS
@@ -53,5 +53,5 @@
 DEMO_STD = np.array([58.395, 57.120, 57.375]) / 255
 
 # SAVE PATHS
-DEMO_SAVE_PATH = BASE_DIR + "RESULTS/git"
-CNNLSTM_SAVE_PATH = BASE_DIR + "RESULTS/cnn_lstm"
+DEMO_SAVE_PATH = BASE_DIR + "RESULTS2/git"
+CNNLSTM_SAVE_PATH = BASE_DIR + "RESULTS2/cnn_lstm"
diff --git a/src/cnn_lstm/dataset.py b/src/cnn_lstm/dataset.py
diff --git a/src/cnn_lstm/model.py b/src/cnn_lstm/model.py
diff --git a/src/cnn_lstm/test.py b/src/cnn_lstm/test.py
Original file line number	Diff line number	Diff line change
Expand Up		@@ -116,4 +116,6 @@ State-of-the-art CIDEr-D scores on VizWiz Image Captioning is ~125. We're asking
		3. [BLEU Metric](https://huggingface.co/spaces/evaluate-metric/bleu), HuggingFace space


		I tried a lot of models, including BILP, GPT, CLIP, CLVP, but they all reported errors during operation. It took me a lot of time. Finally, I chose the git model, My optimizer is set to Optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=1e-6, dampening=0, nesterov=True).

		I also changed the database loading method to inputs = self.processor(images=img, text=caption, padding="max_length", return_tensors="pt"), attention_mask was added to encoding.