From 884f98eb65bf9b2b67eb0936cb4c8d461d6e6628 Mon Sep 17 00:00:00 2001 From: Blazej Banaszewski Date: Mon, 11 Dec 2023 09:36:33 +0000 Subject: [PATCH] download LargeMix datasets in a single command --- download_datasets.sh | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 download_datasets.sh diff --git a/download_datasets.sh b/download_datasets.sh new file mode 100755 index 000000000..90159cc39 --- /dev/null +++ b/download_datasets.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Function to download dataset +download_dataset() { + local dataset_url=$1 + local dataset_path=$2 + + # Create directory if it does not exist + mkdir -p $(dirname "${dataset_path}") + + # Download the dataset + wget -O "${dataset_path}" "${dataset_url}" +} + +# L1000_VCAP +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz" "graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz" +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt" "graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt" + +# l1000_MCF7 +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz" "graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz" +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt" "graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt" + +# PCBA_1328 +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet" "graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet" +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt" "graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt" + +# PCQM4M_G25 +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet" "graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet" +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt" "graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt" + +# PCQM4M_N4 +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet" "graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet" +download_dataset "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt" "graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt"