Skip to content

Commit 58ffd58

Browse files
authored
Add GearNet_edge_ieconv & Fold3D dataset (#15)
* init commit for ieconv * fix bugs in last commit * new gearnet conv * add new fold3d dataset * fix bugs in last commit * reproduce results w/o pre-training * reproducible results on Fold3D * clean code
1 parent 7d21a91 commit 58ffd58

15 files changed

+1053
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
slurm-*
2+
13
# Byte-compiled / optimized / DLL files
24
__pycache__/
35
*.py[cod]

config/downstream/Fold3D/gearnet.yaml

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
output_dir: ~/scratch/protein_output
2+
3+
dataset:
4+
class: Fold3D
5+
path: ~/scratch/protein-datasets/
6+
test_split: test_fold
7+
transform:
8+
class: ProteinView
9+
view: residue
10+
11+
task:
12+
class: PropertyPrediction
13+
num_mlp_layer: 3
14+
mlp_batch_norm: True
15+
mlp_dropout: 0.5
16+
model:
17+
class: GearNetIEConv
18+
input_dim: 21
19+
embedding_dim: 512
20+
hidden_dims: [512, 512, 512, 512, 512, 512]
21+
batch_norm: True
22+
concat_hidden: True
23+
short_cut: True
24+
readout: 'sum'
25+
num_relation: 7
26+
# edge_input_dim: 59
27+
# num_angle_bin: 8
28+
layer_norm: True
29+
dropout: 0.2
30+
# use_ieconv: True
31+
graph_construction_model:
32+
class: GraphConstruction
33+
node_layers:
34+
- class: AlphaCarbonNode
35+
edge_layers:
36+
- class: SequentialEdge
37+
max_distance: 2
38+
- class: SpatialEdge
39+
radius: 10.0
40+
min_distance: 5
41+
- class: KNNEdge
42+
k: 10
43+
min_distance: 5
44+
edge_feature: gearnet
45+
criterion: ce
46+
metric: ['acc']
47+
num_class: 1195
48+
49+
optimizer:
50+
class: SGD
51+
lr: 1.0e-3
52+
weight_decay: 5.0e-4
53+
momentum: 0.98
54+
55+
scheduler:
56+
class: StepLR
57+
step_size: 50
58+
gamma: 0.5
59+
60+
engine:
61+
gpus: {{ gpus }}
62+
batch_size: 2
63+
log_interval: 1000
64+
65+
model_checkpoint: {{ ckpt }}
66+
67+
metric: accuracy [fold_label]
68+
69+
train:
70+
num_epoch: 300
+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
output_dir: ~/scratch/protein_output
2+
3+
dataset:
4+
class: Fold3D
5+
path: ~/scratch/protein-datasets/
6+
test_split: test_fold
7+
transform:
8+
class: ProteinView
9+
view: residue
10+
11+
task:
12+
class: PropertyPrediction
13+
num_mlp_layer: 3
14+
mlp_batch_norm: True
15+
mlp_dropout: 0.5
16+
model:
17+
class: GearNetIEConv
18+
input_dim: 21
19+
embedding_dim: 512
20+
hidden_dims: [512, 512, 512, 512, 512, 512]
21+
batch_norm: True
22+
concat_hidden: True
23+
short_cut: True
24+
readout: 'sum'
25+
num_relation: 7
26+
edge_input_dim: 59
27+
num_angle_bin: 8
28+
layer_norm: True
29+
dropout: 0.2
30+
# use_ieconv: True
31+
graph_construction_model:
32+
class: GraphConstruction
33+
node_layers:
34+
- class: AlphaCarbonNode
35+
edge_layers:
36+
- class: SequentialEdge
37+
max_distance: 2
38+
- class: SpatialEdge
39+
radius: 10.0
40+
min_distance: 5
41+
- class: KNNEdge
42+
k: 10
43+
min_distance: 5
44+
edge_feature: gearnet
45+
criterion: ce
46+
metric: ['acc']
47+
num_class: 1195
48+
49+
optimizer:
50+
class: SGD
51+
lr: 1.0e-3
52+
weight_decay: 5.0e-4
53+
momentum: 0.98
54+
55+
scheduler:
56+
class: StepLR
57+
step_size: 50
58+
gamma: 0.5
59+
60+
engine:
61+
gpus: {{ gpus }}
62+
batch_size: 2
63+
log_interval: 1000
64+
65+
model_checkpoint: {{ ckpt }}
66+
67+
metric: accuracy [fold_label]
68+
69+
train:
70+
num_epoch: 300
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
output_dir: ~/scratch/protein_output
2+
3+
dataset:
4+
class: Fold3D
5+
path: ~/scratch/protein-datasets/
6+
test_split: test_fold
7+
transform:
8+
class: ProteinView
9+
view: residue
10+
11+
task:
12+
class: PropertyPrediction
13+
num_mlp_layer: 3
14+
mlp_batch_norm: True
15+
mlp_dropout: 0.5
16+
model:
17+
class: GearNetIEConv
18+
input_dim: 21
19+
embedding_dim: 512
20+
hidden_dims: [512, 512, 512, 512, 512, 512]
21+
batch_norm: True
22+
concat_hidden: True
23+
short_cut: True
24+
readout: 'sum'
25+
num_relation: 7
26+
edge_input_dim: 59
27+
num_angle_bin: 8
28+
layer_norm: True
29+
dropout: 0.2
30+
use_ieconv: True
31+
graph_construction_model:
32+
class: GraphConstruction
33+
node_layers:
34+
- class: AlphaCarbonNode
35+
edge_layers:
36+
- class: SequentialEdge
37+
max_distance: 2
38+
- class: SpatialEdge
39+
radius: 10.0
40+
min_distance: 5
41+
- class: KNNEdge
42+
k: 10
43+
min_distance: 5
44+
edge_feature: gearnet
45+
criterion: ce
46+
metric: ['acc']
47+
num_class: 1195
48+
49+
optimizer:
50+
class: SGD
51+
lr: 1.0e-3
52+
weight_decay: 5.0e-4
53+
momentum: 0.98
54+
55+
scheduler:
56+
class: StepLR
57+
step_size: 50
58+
gamma: 0.5
59+
60+
engine:
61+
gpus: {{ gpus }}
62+
batch_size: 2
63+
log_interval: 1000
64+
65+
model_checkpoint: {{ ckpt }}
66+
67+
metric: accuracy [fold_label]
68+
69+
train:
70+
num_epoch: 300
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
output_dir: ~/scratch/protein_output
2+
3+
dataset:
4+
class: Fold3D
5+
path: ~/scratch/protein-datasets/
6+
test_split: test_fold
7+
transform:
8+
class: ProteinView
9+
view: residue
10+
11+
task:
12+
class: PropertyPrediction
13+
num_mlp_layer: 3
14+
mlp_batch_norm: True
15+
mlp_dropout: 0.5
16+
model:
17+
class: GearNetIEConv
18+
input_dim: 21
19+
embedding_dim: 512
20+
hidden_dims: [512, 512, 512, 512, 512, 512]
21+
batch_norm: True
22+
concat_hidden: True
23+
short_cut: True
24+
readout: 'sum'
25+
num_relation: 7
26+
# edge_input_dim: 59
27+
# num_angle_bin: 8
28+
layer_norm: True
29+
dropout: 0.2
30+
use_ieconv: True
31+
graph_construction_model:
32+
class: GraphConstruction
33+
node_layers:
34+
- class: AlphaCarbonNode
35+
edge_layers:
36+
- class: SequentialEdge
37+
max_distance: 2
38+
- class: SpatialEdge
39+
radius: 10.0
40+
min_distance: 5
41+
- class: KNNEdge
42+
k: 10
43+
min_distance: 5
44+
edge_feature: gearnet
45+
criterion: ce
46+
metric: ['acc']
47+
num_class: 1195
48+
49+
optimizer:
50+
class: SGD
51+
lr: 1.0e-3
52+
weight_decay: 5.0e-4
53+
momentum: 0.98
54+
55+
scheduler:
56+
class: StepLR
57+
step_size: 50
58+
gamma: 0.5
59+
60+
engine:
61+
gpus: {{ gpus }}
62+
batch_size: 2
63+
log_interval: 1000
64+
65+
model_checkpoint: {{ ckpt }}
66+
67+
metric: accuracy [fold_label]
68+
69+
train:
70+
num_epoch: 300
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
output_dir: ~/scratch/protein_output
2+
3+
dataset:
4+
class: AlphaFoldDB
5+
path: ~/scratch/protein-datasets/alphafold
6+
species_start: 0
7+
species_end: 22
8+
# species_id: 3
9+
# split_id: 1
10+
atom_feature: null
11+
bond_feature: null
12+
transform:
13+
class: Compose
14+
transforms:
15+
- class: ProteinView
16+
view: residue
17+
- class: TruncateProtein
18+
max_length: 100
19+
random: True
20+
21+
task:
22+
class: AnglePrediction
23+
num_sample: 512
24+
num_class: 8
25+
num_mlp_layer: 2
26+
model:
27+
class: GearNetIEConv
28+
input_dim: 21
29+
embedding_dim: 512
30+
hidden_dims: [512, 512, 512, 512, 512, 512]
31+
batch_norm: True
32+
concat_hidden: True
33+
short_cut: True
34+
readout: 'sum'
35+
num_relation: 7
36+
edge_input_dim: 59
37+
num_angle_bin: 8
38+
dropout: 0.2
39+
use_ieconv: True
40+
layer_norm: True
41+
graph_construction_model:
42+
class: GraphConstruction
43+
node_layers:
44+
- class: AlphaCarbonNode
45+
edge_layers:
46+
- class: SequentialEdge
47+
max_distance: 2
48+
- class: SpatialEdge
49+
radius: 10.0
50+
min_distance: 5
51+
- class: KNNEdge
52+
k: 10
53+
min_distance: 5
54+
edge_feature: gearnet
55+
56+
optimizer:
57+
class: Adam
58+
lr: 1.0e-3
59+
60+
engine:
61+
gpus: {{ gpus }}
62+
batch_size: 32
63+
log_interval: 100
64+
65+
save_interval: 5
66+
67+
train:
68+
num_epoch: 100

0 commit comments

Comments
 (0)