@@ -25,15 +25,16 @@ Training
25
25
.. code-block :: python
26
26
27
27
# Import nemo and variantworks modules
28
+ import os
28
29
import nemo
29
- from variantworks.dataloader import *
30
- from variantworks.io.vcfio import *
31
- from variantworks.networks import *
32
- from variantworks.sample_encoders import *
30
+ from variantworks.dataloader import ReadPileupDataLoader
31
+ from variantworks.io.vcfio import VCFReader
32
+ from variantworks.networks import AlexNet
33
+ from variantworks.sample_encoder import PileupEncoder, ZygosityLabelEncoder
33
34
34
35
# Create neural factory
35
36
nf = nemo.core.NeuralModuleFactory(
36
- placement = nemo.core.neural_factory.DeviceType.GPU , checkpoint_dir = tempdir )
37
+ placement = nemo.core.neural_factory.DeviceType.GPU , checkpoint_dir = " ./ " )
37
38
38
39
# Create pileup encoder by selecting layers to encode. More encoding layers
39
40
# can be found in the documentation for PilupEncoder class.
@@ -77,14 +78,27 @@ Training
77
78
# Logger callback
78
79
logger_callback = nemo.core.SimpleLossLoggerCallback(
79
80
tensors = [vz_loss],
80
- print_func = lambda x : logging.info(f ' Train Loss: { str (x[0 ].item())} ' ))
81
+ print_func = lambda x : nemo.logging.info(f ' Train Loss: { str (x[0 ].item())} ' )
82
+ )
83
+
84
+ # Checkpointing models through NeMo callback
85
+ checkpoint_callback = nemo.core.CheckpointCallback(
86
+ folder = " ./" ,
87
+ load_from_folder = None ,
88
+ # Checkpointing frequency in steps
89
+ step_freq = - 1 ,
90
+ # Checkpointing frequency in epochs
91
+ epoch_freq = 1 ,
92
+ # Number of checkpoints to keep
93
+ checkpoints_to_keep = 1 ,
94
+ # If True, CheckpointCallback will raise an Error if restoring fails
95
+ force_load = False
81
96
)
82
97
83
98
# Kick off training
84
99
nf.train([vz_loss],
85
- callbacks = [logger_callback,
86
- checkpoint_callback, evaluator_callback],
87
- optimization_params = {" num_epochs" : 4 , " lr" : 0.001 },
100
+ callbacks = [logger_callback, checkpoint_callback],
101
+ optimization_params = {" num_epochs" : 10 , " lr" : 0.001 },
88
102
optimizer = " adam" )
89
103
90
104
@@ -96,28 +110,30 @@ The inference pipeline works in a very similar fashion, except the final NeMo DA
96
110
.. code-block :: python
97
111
98
112
# Import nemo and variantworks modules
113
+ import os
99
114
import nemo
100
- from variantworks.dataloader import *
101
- from variantworks.io.vcfio import *
102
- from variantworks.networks import *
103
- from variantworks.sample_encoders import *
104
- from variantworks.result_writer import *
115
+ import torch
116
+ from variantworks.dataloader import ReadPileupDataLoader
117
+ from variantworks.io.vcfio import VCFReader
118
+ from variantworks.networks import AlexNet
119
+ from variantworks.sample_encoder import PileupEncoder, ZygosityLabelDecoder
120
+ from variantworks.result_writer import VCFResultWriter
105
121
106
122
# Create neural factory. In this case, the checkpoint_dir has to be set for NeMo to pick
107
123
# up a pre-trained model.
108
124
nf = nemo.core.NeuralModuleFactory(
109
- placement = nemo.core.neural_factory.DeviceType.GPU , checkpoint_dir = model_dir)
110
-
111
- # Neural Network
112
- model = AlexNet(num_input_channels = len (
113
- encoding_layers), num_output_logits = 3 )
125
+ placement = nemo.core.neural_factory.DeviceType.GPU , checkpoint_dir = " ./" )
114
126
115
127
# Dataset generation is done in a similar manner. It's important to note that the encoder used
116
128
# for inference much match that for training.
117
129
encoding_layers = [PileupEncoder.Layer.READ , PileupEncoder.Layer.BASE_QUALITY ]
118
130
pileup_encoder = PileupEncoder(
119
131
window_size = 100 , max_reads = 100 , layers = encoding_layers)
120
132
133
+ # Neural Network
134
+ model = AlexNet(num_input_channels = len (
135
+ encoding_layers), num_output_logits = 3 )
136
+
121
137
# Similar to training, a dataloader needs to be setup for the relevant datasets. In the case of
122
138
# inference, it doesn't matter if the files are tagged as false positive or not. Each example will be
123
139
# evaluated by the network. For simplicity the example is using the same dataset from training.
@@ -135,21 +151,24 @@ The inference pipeline works in a very similar fashion, except the final NeMo DA
135
151
vz = model(encoding = encoding)
136
152
137
153
# Invoke the "infer" action.
138
- results = nf.infer([vz], checkpoint_dir = model_dir , verbose = True )
154
+ results = nf.infer([vz], checkpoint_dir = " ./ " , verbose = True )
139
155
140
156
# Instantiate a decoder that converts the predicted output of the network to
141
157
# a zygosity enum.
142
158
zyg_decoder = ZygosityLabelDecoder()
143
159
144
160
# Decode inference results to labels
161
+ inferred_zygosity = []
145
162
for tensor_batches in results:
146
163
for batch in tensor_batches:
147
164
predicted_classes = torch.argmax(batch, dim = 1 )
148
165
inferred_zygosity += [zyg_decoder(pred)
149
166
for pred in predicted_classes]
150
167
151
168
# Use the VCFResultWriter to output predicted zygosities to a VCF file.
152
- result_writer = VCFResultWriter(vcf_loader, inferred_zygosity)
169
+ result_writer = VCFResultWriter(vcf_loader,
170
+ inferred_zygosities = inferred_zygosity,
171
+ output_location = " ./" )
153
172
154
173
result_writer.write_output()
155
174
0 commit comments