-
Notifications
You must be signed in to change notification settings - Fork 72
/
Copy path1_dataset.sh
executable file
·89 lines (76 loc) · 2.83 KB
/
1_dataset.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/bash
# Main function
main() {
# Check if required commands are available
if ! command -v jq &> /dev/null; then
echo "Error: jq is not installed."
exit 1
fi
# Read the JSON configuration file
dir=$(cd "$(dirname "$0")"; pwd)
config_file="${dir}/config/1_dataset.json"
# Check if the configuration file exists
if [ ! -f "$config_file" ]; then
echo "Configuration file not found: $config_file"
exit 1
fi
# Extract general settings
default_directory=$(jq -r '.general_settings.default_directory' "$config_file")
default_directory=${default_directory/#\~/$HOME}
# Iterate over each dataset
datasets=$(jq -r '.datasets | to_entries[] | @base64' "$config_file")
# Decode each dataset entry and process it
echo "$datasets" | while IFS= read -r dataset_b64; do
dataset=$(echo "${dataset_b64}" | base64 --decode)
dataset_name=$(echo "$dataset" | jq -r '.key')
download_link=$(echo "$dataset" | jq -r '.value.download_link')
directory=$(echo "$dataset" | jq -r '.value.directory // empty')
directory=${directory/#\~/$HOME}
directory=${directory:-$default_directory}
top_level_dir=$(echo "$dataset" | jq -r '.value.top_level_dir')
echo "======================================== ${dataset_name} ========================================"
# Create the directory if it doesn't exist
mkdir -p "$directory"
# Extract the file name from the download link
file_name=$(basename "$download_link")
# Check if the folder exists before downloading the dataset
dataset_folder="$directory/$top_level_dir"
if [ ! -d "$dataset_folder" ]; then
echo "Created directory: $dataset_folder"
mkdir -p "$dataset_folder"
# Download the dataset if it doesn't exist
if [ ! -f "$directory/$file_name" ]; then
echo "Downloading $file_name..."
if ! wget -O "$directory/$file_name" "$download_link"; then
echo "Failed to download $file_name"
continue
fi
fi
# Determine the file extension and unzip accordingly
echo "Unzipping $file_name into $directory..."
case "$file_name" in
*.tar.bz2)
if tar -xvjf "$directory/$file_name" -C "$directory" --strip-components=1 --one-top-level="$top_level_dir"; then
echo "Finished unzipping $file_name."
else
echo "Failed to unzip $file_name"
fi
;;
*.gz)
if gunzip -c "$directory/$file_name" > "$directory/$top_level_dir/${file_name%.gz}"; then
echo "Finished unzipping $file_name."
else
echo "Failed to unzip $file_name"
fi
;;
*)
echo "Unsupported file format: $file_name"
;;
esac
else
echo "Directory $dataset_folder already exists, skipping unzipping."
fi
done
}
# Run the main function
main