@@ -33,23 +33,23 @@ rm "$PROJDIR"/project/datasets/DIPS/final/raw/pairs-postprocessed.txt "$PROJDIR"
33
33
mkdir " $PROJDIR " /project/datasets/DIPS/raw " $PROJDIR " /project/datasets/DIPS/raw/pdb " $PROJDIR " /project/datasets/DIPS/interim " $PROJDIR " /project/datasets/DIPS/interim/external_feats " $PROJDIR " /project/datasets/DIPS/interim/external_feats/PSAIA " $PROJDIR " /project/datasets/DIPS/interim/external_feats/PSAIA/RCSB " $PROJDIR " /project/datasets/DIPS/final " $PROJDIR " /project/datasets/DIPS/final/raw
34
34
rsync -rlpt -v -z --delete --port=33444 --include=' *.gz' --include=' */' --exclude ' *' rsync.rcsb.org::ftp_data/biounit/coordinates/divided/ " $PROJDIR " /project/datasets/DIPS/raw/pdb
35
35
36
- python " $PROJDIR " /project/datasets/builder/extract_raw_pdb_gz_archives.py " $PROJDIR " /project/datasets/DIPS/raw/pdb --rank " $1 " --size " $2 "
36
+ python3 " $PROJDIR " /project/datasets/builder/extract_raw_pdb_gz_archives.py " $PROJDIR " /project/datasets/DIPS/raw/pdb --rank " $1 " --size " $2 "
37
37
38
- python " $PROJDIR " /project/datasets/builder/make_dataset.py " $PROJDIR " /project/datasets/DIPS/raw/pdb " $PROJDIR " /project/datasets/DIPS/interim --num_cpus 32 --rank " $1 " --size " $2 " --source_type rcsb --bound
38
+ python3 " $PROJDIR " /project/datasets/builder/make_dataset.py " $PROJDIR " /project/datasets/DIPS/raw/pdb " $PROJDIR " /project/datasets/DIPS/interim --num_cpus 32 --rank " $1 " --size " $2 " --source_type rcsb --bound
39
39
40
- python " $PROJDIR " /project/datasets/builder/prune_pairs.py " $PROJDIR " /project/datasets/DIPS/interim/pairs " $PROJDIR " /project/datasets/DIPS/filters " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned --num_cpus 32 --rank " $1 " --size " $2 "
40
+ python3 " $PROJDIR " /project/datasets/builder/prune_pairs.py " $PROJDIR " /project/datasets/DIPS/interim/pairs " $PROJDIR " /project/datasets/DIPS/filters " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned --num_cpus 32 --rank " $1 " --size " $2 "
41
41
42
- python " $PROJDIR " /project/datasets/builder/generate_psaia_features.py " $PSAIADIR " " $PROJDIR " /project/datasets/builder/psaia_config_file_dips.txt " $PROJDIR " /project/datasets/DIPS/raw/pdb " $PROJDIR " /project/datasets/DIPS/interim/parsed " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned " $PROJDIR " /project/datasets/DIPS/interim/external_feats --source_type rcsb --rank " $1 " --size " $2 "
43
- srun python " $PROJDIR " /project/datasets/builder/generate_hhsuite_features.py " $PROJDIR " /project/datasets/DIPS/interim/parsed " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned " $HHSUITE_DB " " $PROJDIR " /project/datasets/DIPS/interim/external_feats --rank " $1 " --size " $2 " --num_cpu_jobs 4 --num_cpus_per_job 8 --num_iter 2 --source_type rcsb --write_file
42
+ python3 " $PROJDIR " /project/datasets/builder/generate_psaia_features.py " $PSAIADIR " " $PROJDIR " /project/datasets/builder/psaia_config_file_dips.txt " $PROJDIR " /project/datasets/DIPS/raw/pdb " $PROJDIR " /project/datasets/DIPS/interim/parsed " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned " $PROJDIR " /project/datasets/DIPS/interim/external_feats --source_type rcsb --rank " $1 " --size " $2 "
43
+ srun python3 " $PROJDIR " /project/datasets/builder/generate_hhsuite_features.py " $PROJDIR " /project/datasets/DIPS/interim/parsed " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned " $HHSUITE_DB " " $PROJDIR " /project/datasets/DIPS/interim/external_feats --rank " $1 " --size " $2 " --num_cpu_jobs 4 --num_cpus_per_job 8 --num_iter 2 --source_type rcsb --write_file
44
44
45
45
# Retroactively download the PDB files corresponding to complexes that made it through DIPS-Plus' RCSB complex pruning to reduce storage requirements
46
- python " $PROJDIR " /project/datasets/builder/download_missing_pruned_pair_pdbs.py " $PROJDIR " /project/datasets/DIPS/raw/pdb " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned --num_cpus 32 --rank " $1 " --size " $2 "
47
- srun python " $PROJDIR " /project/datasets/builder/postprocess_pruned_pairs.py " $PROJDIR " /project/datasets/DIPS/raw/pdb " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned " $PROJDIR " /project/datasets/DIPS/interim/external_feats " $PROJDIR " /project/datasets/DIPS/final/raw --num_cpus 32 --rank " $1 " --size " $2 "
46
+ python3 " $PROJDIR " /project/datasets/builder/download_missing_pruned_pair_pdbs.py " $PROJDIR " /project/datasets/DIPS/raw/pdb " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned --num_cpus 32 --rank " $1 " --size " $2 "
47
+ srun python3 " $PROJDIR " /project/datasets/builder/postprocess_pruned_pairs.py " $PROJDIR " /project/datasets/DIPS/raw/pdb " $PROJDIR " /project/datasets/DIPS/interim/pairs-pruned " $PROJDIR " /project/datasets/DIPS/interim/external_feats " $PROJDIR " /project/datasets/DIPS/final/raw --num_cpus 32 --rank " $1 " --size " $2 "
48
48
49
- python " $PROJDIR " /project/datasets/builder/partition_dataset_filenames.py " $PROJDIR " /project/datasets/DIPS/final/raw --source_type rcsb --filter_by_atom_count True --max_atom_count 17500 --rank " $1 " --size " $2 "
50
- python " $PROJDIR " /project/datasets/builder/collect_dataset_statistics.py " $PROJDIR " /project/datasets/DIPS/final/raw --rank " $1 " --size " $2 "
51
- python " $PROJDIR " /project/datasets/builder/log_dataset_statistics.py " $PROJDIR " /project/datasets/DIPS/final/raw --rank " $1 " --size " $2 "
52
- python " $PROJDIR " /project/datasets/builder/impute_missing_feature_values.py " $PROJDIR " /project/datasets/DIPS/final/raw --impute_atom_features False --num_cpus 32 --rank " $1 " --size " $2 "
49
+ python3 " $PROJDIR " /project/datasets/builder/partition_dataset_filenames.py " $PROJDIR " /project/datasets/DIPS/final/raw --source_type rcsb --filter_by_atom_count True --max_atom_count 17500 --rank " $1 " --size " $2 "
50
+ python3 " $PROJDIR " /project/datasets/builder/collect_dataset_statistics.py " $PROJDIR " /project/datasets/DIPS/final/raw --rank " $1 " --size " $2 "
51
+ python3 " $PROJDIR " /project/datasets/builder/log_dataset_statistics.py " $PROJDIR " /project/datasets/DIPS/final/raw --rank " $1 " --size " $2 "
52
+ python3 " $PROJDIR " /project/datasets/builder/impute_missing_feature_values.py " $PROJDIR " /project/datasets/DIPS/final/raw --impute_atom_features False --num_cpus 32 --rank " $1 " --size " $2 "
53
53
54
54
# Optionally convert each postprocessed (final 'raw') complex into a pair of DGL graphs (final 'processed') with labels
55
- python " $PROJDIR " /project/datasets/builder/convert_complexes_to_graphs.py " $PROJDIR " /project/datasets/DIPS/final/raw " $PROJDIR " /project/datasets/DIPS/final/processed --num_cpus 32 --edge_dist_cutoff 15.0 --edge_limit 5000 --self_loops True --rank " $1 " --size " $2 "
55
+ python3 " $PROJDIR " /project/datasets/builder/convert_complexes_to_graphs.py " $PROJDIR " /project/datasets/DIPS/final/raw " $PROJDIR " /project/datasets/DIPS/final/processed --num_cpus 32 --edge_dist_cutoff 15.0 --edge_limit 5000 --self_loops True --rank " $1 " --size " $2 "
0 commit comments