Skip to content

Commit 8652b6b

Browse files
Exclude .git for cloud storage (skypilot-org#1494)
* s3 exclude .git * gcs * comments * warning and fix doc * Update sky/data/storage.py Co-authored-by: Zongheng Yang <[email protected]> * fix Co-authored-by: Zongheng Yang <[email protected]>
1 parent e37062f commit 8652b6b

File tree

3 files changed

+14
-10
lines changed

3 files changed

+14
-10
lines changed

docs/source/examples/spot-jobs.rst

+2-4
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ We can launch it with the following:
4242
4343
# Assume your working directory is under `~/transformers`.
4444
# To make this example work, please run the following command:
45-
# git clone https://github.com/huggingface/transformers.git ~/transformers
45+
# git clone https://github.com/huggingface/transformers.git ~/transformers -b v4.18.0
4646
workdir: ~/transformers
4747
4848
setup: |
@@ -51,7 +51,6 @@ We can launch it with the following:
5151
# to pass the key in the command line, during `sky spot launch`.
5252
echo export WANDB_API_KEY=[YOUR-WANDB-API-KEY] >> ~/.bashrc
5353
54-
git checkout v4.18.0
5554
pip install -e .
5655
cd examples/pytorch/question-answering/
5756
pip install -r requirements.txt torch==1.12.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
@@ -117,7 +116,7 @@ Below we show an `example <https://github.com/skypilot-org/skypilot/blob/master/
117116
118117
# Assume your working directory is under `~/transformers`.
119118
# To make this example work, please run the following command:
120-
# git clone https://github.com/huggingface/transformers.git ~/transformers
119+
# git clone https://github.com/huggingface/transformers.git ~/transformers -b v4.18.0
121120
workdir: ~/transformers
122121
123122
file_mounts:
@@ -131,7 +130,6 @@ Below we show an `example <https://github.com/skypilot-org/skypilot/blob/master/
131130
# to pass the key in the command line, during `sky spot launch`.
132131
echo export WANDB_API_KEY=[YOUR-WANDB-API-KEY] >> ~/.bashrc
133132
134-
git checkout v4.18.0
135133
pip install -e .
136134
cd examples/pytorch/question-answering/
137135
pip install -r requirements.txt

examples/spot/bert_qa.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ resources:
55

66
# Assume your working directory is under `~/transformers`.
77
# To make this example work, please run the following command:
8-
# git clone https://github.com/huggingface/transformers.git ~/transformers
8+
# git clone https://github.com/huggingface/transformers.git ~/transformers -b v4.18.0
99
workdir: ~/transformers
1010

1111
file_mounts:
@@ -17,7 +17,6 @@ setup: |
1717
# Fill in your wandb key: copy from https://wandb.ai/authorize
1818
echo export WANDB_API_KEY=[YOUR-WANDB-API-KEY] >> ~/.bashrc
1919
20-
git checkout v4.18.0
2120
pip install -e .
2221
cd examples/pytorch/question-answering/
2322
pip install -r requirements.txt torch==1.12.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113

sky/data/storage.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,10 @@ def sync_all_stores(self):
721721
def _sync_store(self, store: AbstractStore):
722722
"""Runs the upload routine for the store and handles failures"""
723723
try:
724+
if self.source is not None and os.path.isdir(
725+
os.path.join(self.source, '.git')):
726+
logger.warning(f'\'.git\' directory under \'{self.source}\' '
727+
'is excluded during sync.')
724728
store.upload()
725729
except exceptions.StorageUploadError:
726730
logger.error(f'Could not upload {self.source} to store '
@@ -906,9 +910,11 @@ def get_file_sync_command(base_dir_path, file_names):
906910
return sync_command
907911

908912
def get_dir_sync_command(src_dir_path, dest_dir_name):
909-
sync_command = ('aws s3 sync --no-follow-symlinks '
910-
f'{src_dir_path} '
911-
f's3://{self.name}/{dest_dir_name}')
913+
# we exclude .git directory from the sync
914+
sync_command = (
915+
'aws s3 sync --no-follow-symlinks --exclude ".git/*" '
916+
f'{src_dir_path} '
917+
f's3://{self.name}/{dest_dir_name}')
912918
return sync_command
913919

914920
# Generate message for upload
@@ -1219,7 +1225,8 @@ def get_file_sync_command(base_dir_path, file_names):
12191225
return sync_command
12201226

12211227
def get_dir_sync_command(src_dir_path, dest_dir_name):
1222-
sync_command = (f'gsutil -m rsync -r {src_dir_path} '
1228+
# we exclude .git directory from the sync
1229+
sync_command = (f'gsutil -m rsync -r -x \'.git/*\' {src_dir_path} '
12231230
f'gs://{self.name}/{dest_dir_name}')
12241231
return sync_command
12251232

0 commit comments

Comments
 (0)