DeepGraphLearning
diff --git a/‎README.md
Lines changed: 45 additions & 1 deletion b/‎README.md
Lines changed: 45 additions & 1 deletion
diff --git a/‎config/downstream/EC/BERT.yaml
Lines changed: 38 additions & 0 deletions b/‎config/downstream/EC/BERT.yaml
Lines changed: 38 additions & 0 deletions
diff --git a/‎config/downstream/EC/CNN.yaml
Lines changed: 35 additions & 0 deletions b/‎config/downstream/EC/CNN.yaml
Lines changed: 35 additions & 0 deletions
diff --git a/‎config/downstream/EC/ESM.yaml
Lines changed: 39 additions & 0 deletions b/‎config/downstream/EC/ESM.yaml
Lines changed: 39 additions & 0 deletions
diff --git a/‎config/downstream/EC/LSTM.yaml
Lines changed: 34 additions & 0 deletions b/‎config/downstream/EC/LSTM.yaml
Lines changed: 34 additions & 0 deletions
diff --git a/‎config/downstream/EC/ResNet.yaml
Lines changed: 35 additions & 0 deletions b/‎config/downstream/EC/ResNet.yaml
Lines changed: 35 additions & 0 deletions
diff --git a/‎config/downstream/EC/gearnet.yaml
Lines changed: 53 additions & 0 deletions b/‎config/downstream/EC/gearnet.yaml
Lines changed: 53 additions & 0 deletions
diff --git a/‎config/downstream/EC/gearnet_edge.yaml
Lines changed: 57 additions & 0 deletions b/‎config/downstream/EC/gearnet_edge.yaml
Lines changed: 57 additions & 0 deletions
diff --git a/‎config/downstream/GO-BP/BERT.yaml
Lines changed: 39 additions & 0 deletions b/‎config/downstream/GO-BP/BERT.yaml
Lines changed: 39 additions & 0 deletions
@@ -1 +1,45 @@
-# GearNet
+# GearNet: Geometry-Aware Relational Graph Neural Network
+
+
+This is the official codebase of the paper
+
+[Protein Representation Learning by Geometric Structure Pretraining](https://arxiv.org/abs/2203.06125)
+
+[Zuobai Zhang](https://oxer11.github.io/), [Minghao Xu](https://chrisallenming.github.io/), [Arian Jamasb](https://jamasb.io/), [Vijil Chenthamarakshan](https://researcher.watson.ibm.com/researcher/view.php?person=us-ecvijil), [Aurelie Lozano](https://researcher.watson.ibm.com/researcher/view.php?person=us-aclozano), [Payel Das](https://researcher.watson.ibm.com/researcher/view.php?person=us-daspa), [Jian Tang](https://jian-tang.com/)
+
+## Overview
+
+This codebase is based on PyTorch and [TorchDrug] ([TorchProtein](https://torchprotein.ai)). It supports training and inference
+with multiple GPUs or multiple machines.
+
+[TorchDrug]: https://github.com/DeepGraphLearning/torchdrug
+
+## Installation
+
+You may install the dependencies via either conda or pip. Generally, NBFNet works
+with Python 3.7/3.8 and PyTorch version >= 1.8.0.
+
+### From Conda
+
+```bash
+conda install pytorch=1.8.0 cudatoolkit=11.1 -c pytorch -c conda-forge
+conda install pyg -c pyg
+conda install rdkit easydict pyyaml -c conda-forge
+```
+
+
+## Reproduction
+
+To reproduce the results of GearBind, use the following command. Alternatively, you
+may use `--gpus null` to run GearBind on a CPU. All the datasets will be automatically
+downloaded in the code.
+
+We provide the hyperparameters for each experiment in configuration files.
+All the configuration files can be found in `config/*.yaml`.
+
+To run GearBind with multiple GPUs, use the following commands
+
+```bash
+python -m torch.distributed.launch --nproc_per_node=4 script/run.py -c config/downstream/gearnet.yaml --gpus [0,1,2,3]
+```
+
@@ -0,0 +1,38 @@
+output_dir: ~/scratch/protein_output
+
+dataset:
+  class: EnzymeCommission
+  path: ~/scratch/protein-datasets/
+  test_cutoff: 0.95
+  transform:
+    class: ProteinView
+    view: residue
+
+task:
+  class: MultipleBinaryClassification
+  model:
+    class: ProteinBERT
+    input_dim: 21
+    hidden_dim: 512
+    num_layers: 4
+    num_heads: 8
+    intermediate_dim: 2048
+    hidden_dropout: 0.1
+    attention_dropout: 0.1
+  criterion: bce
+  metric: ['auprc@micro', 'f1_max']
+  num_mlp_layer: 2
+
+optimizer:
+  class: Adam
+  lr: 5.0e-5
+
+engine:
+  gpus: {{ gpus }}
+  batch_size: 8
+  log_interval: 1000
+
+metric: f1_max
+
+train:
+  num_epoch: 200
@@ -0,0 +1,35 @@
+output_dir: ~/scratch/protein_output
+
+dataset:
+  class: EnzymeCommission
+  path: ~/scratch/protein-datasets/
+  test_cutoff: 0.95
+  transform:
+    class: ProteinView
+    view: residue
+
+task:
+  class: MultipleBinaryClassification
+  model:
+    class: ProteinConvolutionalNetwork
+    input_dim: 21
+    hidden_dims: [1024, 1024]
+    kernel_size: 5
+    padding: 2
+  criterion: bce
+  metric: ['auprc@micro', 'f1_max']
+  num_mlp_layer: 2
+
+optimizer:
+  class: Adam
+  lr: 1.0e-4
+
+engine:
+  gpus: {{ gpus }}
+  batch_size: 32
+  log_interval: 1000
+
+metric: f1_max
+
+train:
+  num_epoch: 200
@@ -0,0 +1,39 @@
+output_dir: ~/scratch/protein_output
+
+dataset:
+  class: EnzymeCommission
+  path: ~/scratch/protein-datasets/
+  test_cutoff: 0.95
+  transform:
+    class: Compose
+    transforms:
+      - class: ProteinView
+        view: residue
+      - class: TruncateProtein
+        max_length: 550
+
+task:
+  class: MultipleBinaryClassification
+  model:
+    class: ESM
+    path: ~/scratch/protein-model-weights/esm-model-weights/
+    model: ESM-1b
+  criterion: bce
+  metric: ['auprc@micro', 'f1_max']
+  num_mlp_layer: 2
+
+optimizer:
+  class: Adam
+  lr: 1.0e-4
+
+engine:
+  gpus: {{ gpus }}
+  batch_size: 2
+  log_interval: 1000
+
+lr_ratio: 0.1
+
+metric: f1_max
+
+train:
+  num_epoch: 200
@@ -0,0 +1,34 @@
+output_dir: ~/scratch/protein_output
+
+dataset:
+  class: EnzymeCommission
+  path: ~/scratch/protein-datasets/
+  test_cutoff: 0.95
+  transform:
+    class: ProteinView
+    view: residue
+
+task:
+  class: MultipleBinaryClassification
+  model:
+    class: ProteinLSTM
+    input_dim: 21
+    hidden_dim: 640
+    num_layers: 3
+  criterion: bce
+  metric: ['auprc@micro', 'f1_max']
+  num_mlp_layer: 2
+
+optimizer:
+  class: Adam
+  lr: 5.0e-5
+
+engine:
+  gpus: {{ gpus }}
+  batch_size: 8
+  log_interval: 1000
+
+metric: f1_max
+
+train:
+  num_epoch: 200
@@ -0,0 +1,35 @@
+output_dir: ~/scratch/protein_output
+
+dataset:
+  class: EnzymeCommission
+  path: ~/scratch/protein-datasets/
+  test_cutoff: 0.95
+  transform:
+    class: ProteinView
+    view: residue
+
+task:
+  class: MultipleBinaryClassification
+  model:
+    class: ProteinResNet
+    input_dim: 21
+    hidden_dims: [512, 512, 512, 512, 512, 512, 512, 512]
+    layer_norm: True
+    dropout: 0.1
+  criterion: bce
+  metric: ['auprc@micro', 'f1_max']
+  num_mlp_layer: 2
+
+optimizer:
+  class: Adam
+  lr: 2.0e-4
+
+engine:
+  gpus: {{ gpus }}
+  batch_size: 8
+  log_interval: 1000
+
+metric: f1_max
+
+train:
+  num_epoch: 200
@@ -0,0 +1,53 @@
+output_dir: ~/scratch/protein_output
+
+dataset:
+  class: EnzymeCommission
+  path: ~/scratch/protein-datasets/
+  test_cutoff: 0.95
+  transform:
+    class: ProteinView
+    view: residue
+
+task:
+  class: MultipleBinaryClassification
+  model:
+    class: GearNet
+    input_dim: 21
+    hidden_dims: [512, 512, 512, 512, 512, 512]
+    batch_norm: True
+    concat_hidden: True
+    short_cut: True
+    readout: 'sum'
+    num_relation: 7
+  graph_construction_model:
+    class: GraphConstruction
+    node_layers:
+      - class: AlphaCarbonNode
+    edge_layers:
+      - class: SequentialEdge
+        max_distance: 2
+      - class: SpatialEdge
+        radius: 10.0
+        min_distance: 5
+      - class: KNNEdge
+        k: 10
+        min_distance: 5
+    edge_feature: gearnet
+  criterion: bce
+  num_mlp_layer: 3
+  metric: ['auprc@micro', 'f1_max']
+
+optimizer:
+  class: AdamW
+  lr: 1.0e-4
+  weight_decay: 0
+
+engine:
+  gpus: {{ gpus }}
+  batch_size: 2
+  log_interval: 1000
+
+metric: f1_max
+
+train:
+  num_epoch: 200
@@ -0,0 +1,57 @@
+output_dir: ~/scratch/protein_output
+
+dataset:
+  class: EnzymeCommission
+  path: ~/scratch/protein-datasets/
+  test_cutoff: 0.95
+  transform:
+    class: ProteinView
+    view: residue
+
+task:
+  class: MultipleBinaryClassification
+  model:
+    class: GearNet
+    input_dim: 21
+    hidden_dims: [512, 512, 512, 512, 512, 512]
+    batch_norm: True
+    concat_hidden: True
+    short_cut: True
+    readout: 'sum'
+    num_relation: 7
+    edge_input_dim: 59
+    num_angle_bin: 8
+  graph_construction_model:
+    class: GraphConstruction
+    node_layers:
+      - class: AlphaCarbonNode
+    edge_layers:
+      - class: SequentialEdge
+        max_distance: 2
+      - class: SpatialEdge
+        radius: 10.0
+        min_distance: 5
+      - class: KNNEdge
+        k: 10
+        min_distance: 5
+    edge_feature: gearnet
+  criterion: bce
+  num_mlp_layer: 3
+  metric: ['auprc@micro', 'f1_max']
+
+optimizer:
+  class: AdamW
+  lr: 1.0e-4
+  weight_decay: 0
+
+engine:
+  gpus: {{ gpus }}
+  batch_size: 2
+  log_interval: 1000
+
+model_checkpoint: {{ ckpt }}
+
+metric: f1_max
+
+train:
+  num_epoch: 200
@@ -0,0 +1,39 @@
+output_dir: ~/scratch/protein_output
+
+dataset:
+  class: GeneOntology
+  path: ~/scratch/protein-datasets/
+  branch: BP
+  test_cutoff: 0.95
+  transform:
+    class: ProteinView
+    view: residue
+
+task:
+  class: MultipleBinaryClassification
+  model:
+    class: ProteinBERT
+    input_dim: 21
+    hidden_dim: 512
+    num_layers: 4
+    num_heads: 8
+    intermediate_dim: 2048
+    hidden_dropout: 0.1
+    attention_dropout: 0.1
+  criterion: bce
+  metric: ['auprc@micro', 'f1_max']
+  num_mlp_layer: 2
+
+optimizer:
+  class: Adam
+  lr: 5.0e-5
+
+engine:
+  gpus: {{ gpus }}
+  batch_size: 8
+  log_interval: 1000
+
+metric: f1_max
+
+train:
+  num_epoch: 200