7
7
import rbfSVC_RNASeq
8
8
import neuralNetwork_RNASeq
9
9
import knn_RNASeq
10
+ import randomForest_RNASeq
10
11
import analysis
11
12
12
-
13
- # Resource: http://machinelearningmastery.com/get-your-hands-dirty-with-scikit-learn-now/
14
- # Python for Java Programmers: http://python4java.necaiseweb.org/Fundamentals/TheBasics
15
-
16
13
# run with down sampling and cross validation: python main.py GSE60361C13005Expression.txt expressionmRNAAnnotations.txt 1 1 1
17
14
# run with down sampling and without cross validation: python main.py GSE60361C13005Expression.txt expressionmRNAAnnotations.txt 1 1 0
18
15
# run without downsampling and with cross validation: python main.py GSE60361C13005Expression.txt expressionmRNAAnnotations.txt 1 0 1
@@ -54,6 +51,15 @@ def knn(trainingData, testingData, trainingDataTargets, testingDataTargets):
54
51
55
52
return knn_predictionResults
56
53
54
+ def rf (trainingData , testingData , trainingDataTargets , testingDataTargets ):
55
+ # fit training data to rf
56
+ randomForest_RNASeq .fitTrainingData (trainingData , trainingDataTargets )
57
+
58
+ # predict the values using random forest classifier
59
+ rf_predictionResults = randomForest_RNASeq .predictTestData (testingData )
60
+
61
+ return rf_predictionResults
62
+
57
63
if __name__ == '__main__' :
58
64
t0 = time .clock ()
59
65
print "start"
@@ -96,6 +102,11 @@ def knn(trainingData, testingData, trainingDataTargets, testingDataTargets):
96
102
print " - Using Multi-Layer Perceptron (Neural Network)"
97
103
elif classifier == 3 :
98
104
print " - Using K Nearest Neighbor Classifier with k = {k}" .format (k = n_neighbors )
105
+ elif classifier == 4 :
106
+ print " - Using Random Forest Classifier"
107
+ else :
108
+ print "** ERROR: invalid classifier selection"
109
+ sys .exit (0 )
99
110
100
111
if downSampleFlag :
101
112
print "** Down sampling enabled **"
@@ -197,6 +208,15 @@ def knn(trainingData, testingData, trainingDataTargets, testingDataTargets):
197
208
foldsEvaluations .append (analysis .calculateEvaluations (knn_predictionResults , testingDataKey ))
198
209
# ***************** END KNN *****************
199
210
211
+ elif classifier == 4 :
212
+ # ***************** RF *****************
213
+ # fit and make predictions
214
+ rf_predictionResults = rf (trainingFolds , testingData , trainingKeys , testingDataKey )
215
+
216
+ # add the accuracies for this fold to the accuracies list
217
+ foldsEvaluations .append (analysis .calculateEvaluations (rf_predictionResults , testingDataKey ))
218
+ # ***************** END RF *****************
219
+
200
220
201
221
# increment iterator to process the next fold as testing data
202
222
iterator += 1
@@ -218,6 +238,10 @@ def knn(trainingData, testingData, trainingDataTargets, testingDataTargets):
218
238
# ***************** KNN *****************
219
239
analysis .analyzeAndWriteToFile ("KNearestNeighbor Classifier_{k}" .format (k = n_neighbors ), knn_predictionResults , testingDataKey , foldsEvaluations , 10 , 0 )
220
240
# ***************** END KNN *****************
241
+ elif classifier == 4 :
242
+ # ***************** RF *****************
243
+ analysis .analyzeAndWriteToFile ("Random Forest Classifier" , rf_predictionResults , testingDataKey , foldsEvaluations , 10 , 0 )
244
+ # ***************** END RF *****************
221
245
222
246
else :
223
247
# partition the down sampled data set into 70% training and 30% testing
@@ -258,7 +282,16 @@ def knn(trainingData, testingData, trainingDataTargets, testingDataTargets):
258
282
259
283
analysis .analyzeAndWriteToFile ("KNearestNeighbor Classifier_{k}" .format (k = n_neighbors ), knn_predictionResults , data .getDSTestingDataTargetValues (), foldsEvaluations , 1 , 1 )
260
284
# ***************** END KNN *****************
285
+ elif classifier == 4 :
286
+ # ***************** RF *****************
287
+ rf_predictionResults = rf (data .getDSTrainingData (), data .getDSTestingData (), data .getDSTargetValues (),
288
+ data .getDSTestingDataTargetValues ())
261
289
290
+ foldsEvaluations = [] # single fold list but we still need to use a 3D list
291
+ foldsEvaluations .append (analysis .calculateEvaluations (rf_predictionResults , data .getDSTestingDataTargetValues ()))
292
+
293
+ analysis .analyzeAndWriteToFile ("Random Forest Classifier" , rf_predictionResults , data .getDSTestingDataTargetValues (), foldsEvaluations , 1 , 1 )
294
+ # ***************** END RF *****************
262
295
263
296
else :
264
297
if crossValidateFlag :
@@ -319,6 +352,14 @@ def knn(trainingData, testingData, trainingDataTargets, testingDataTargets):
319
352
# add the accuracies for this fold to accuracies list
320
353
foldsEvaluations .append (analysis .calculateEvaluations (knn_predictionResults , testingDataKey ))
321
354
# ***************** END KNN *****************
355
+ elif classifier == 4 :
356
+ # ***************** RF *****************
357
+ # fit and make predictions
358
+ rf_predictionResults = rf (trainingFolds , testingData , trainingKeys , testingDataKey )
359
+
360
+ # add the accuracies for this fold to accuracies list
361
+ foldsEvaluations .append (analysis .calculateEvaluations (rf_predictionResults , testingDataKey ))
362
+ # ***************** END RF *****************
322
363
323
364
324
365
# increment iterator to process the next fold as testing data
@@ -341,6 +382,10 @@ def knn(trainingData, testingData, trainingDataTargets, testingDataTargets):
341
382
# ***************** KNN *****************
342
383
analysis .analyzeAndWriteToFile ("KNearestNeighbor Classifier_{k}" .format (k = n_neighbors ), knn_predictionResults , testingDataKey , foldsEvaluations , 10 , 2 )
343
384
# ***************** END KNN *****************
385
+ elif classifier == 4 :
386
+ # ***************** RF *****************
387
+ analysis .analyzeAndWriteToFile ("Random Forest Classifier" , rf_predictionResults , testingDataKey , foldsEvaluations , 10 , 2 )
388
+ # ***************** END RF *****************
344
389
345
390
else :
346
391
# partition the data set into 70% training and 30% testing
@@ -386,5 +431,18 @@ def knn(trainingData, testingData, trainingDataTargets, testingDataTargets):
386
431
analysis .analyzeAndWriteToFile ("KNearestNeighbor Classifier_{k}" .format (k = n_neighbors ), knn_predictionResults , data .getTestingDataTargetValues (), foldsEvaluations , 1 , 3 )
387
432
# ***************** END KNN *****************
388
433
434
+ elif classifier == 4 :
435
+ # ***************** RF *****************
436
+ rf_predictionResults = rf (data .getTrainingData (), data .getTestingData (), data .getTrainingDataTargetValues (),
437
+ data .getTestingDataTargetValues ())
438
+
439
+ # analyze results using robust evaluations
440
+ foldsEvaluations = []
441
+
442
+ foldsEvaluations .append (analysis .calculateEvaluations (rf_predictionResults , data .getTestingDataTargetValues ()))
443
+
444
+ analysis .analyzeAndWriteToFile ("Random Forest Classifier" , rf_predictionResults , data .getTestingDataTargetValues (), foldsEvaluations , 1 , 3 )
445
+ # ***************** END RF *****************
446
+
389
447
print "\n program execution: {t} seconds" .format (t = time .clock ()- t0 )
390
448
print "exiting"
0 commit comments