Update demo scripts to use installed python library

jseabold · jseabold · commit a0e07f16c491 · 2015-04-08T14:22:54.000-05:00
diff --git a/demo/binary_classification/mapfeat.py b/demo/binary_classification/mapfeat.py
@@ -1,17 +1,16 @@
 #!/usr/bin/python
-import sys
 
 def loadfmap( fname ):
     fmap = {}
     nmap = {}
-    
+
     for l in open( fname ):
         arr = l.split()
-        if arr[0].find('.') != -1:            
+        if arr[0].find('.') != -1:
             idx = int( arr[0].strip('.') )
-            assert idx not in fmap        
+            assert idx not in fmap
             fmap[ idx ] = {}
-            ftype = arr[1].strip(':')        
+            ftype = arr[1].strip(':')
             content = arr[2]
         else:
             content = arr[0]
@@ -23,7 +22,7 @@ def loadfmap( fname ):
             nmap[ len(nmap) ] = ftype+'='+k
     return fmap, nmap
 
-def write_nmap( fo, nmap ):    
+def write_nmap( fo, nmap ):
     for i in range( len(nmap) ):
         fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
 
@@ -33,7 +32,7 @@ def write_nmap( fo, nmap ):
 write_nmap( fo, nmap )
 fo.close()
 
-fo = open( 'agaricus.txt', 'w' ) 
+fo = open( 'agaricus.txt', 'w' )
 for l in open( 'agaricus-lepiota.data' ):
     arr = l.split(',')
     if arr[0] == 'p':
@@ -47,4 +46,4 @@ def write_nmap( fo, nmap ):
 
 fo.close()
 
- 
+
diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py
@@ -1,10 +1,6 @@
 #!/usr/bin/python
-import sys
 import numpy as np
 import scipy.sparse
-# append the path to xgboost, you may need to change the following line
-# alternatively, you can add the path to PYTHONPATH environment variable
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### simple example
@@ -33,7 +29,7 @@
 # save dmatrix into binary buffer
 dtest.save_binary('dtest.buffer')
 bst.save_model('xgb.model')
-# load model and data in 
+# load model and data in
 bst2 = xgb.Booster(model_file='xgb.model')
 dtest2 = xgb.DMatrix('dtest.buffer')
 preds2 = bst2.predict(dtest2)
diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### load data in do training
@@ -56,7 +54,7 @@ def evalerror(preds, dtrain):
     labels = dtrain.get_label()
     return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
 
-param = {'max_depth':2, 'eta':1, 'silent':1} 
+param = {'max_depth':2, 'eta':1, 'silent':1}
 # train with customized objective
 xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
        obj = logregobj, feval=evalerror)
diff --git a/demo/guide-python/custom_objective.py b/demo/guide-python/custom_objective.py
@@ -1,11 +1,9 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 ###
 # advanced: cutomsized loss function
-# 
+#
 print ('start running example to used cutomized objective function')
 
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py
@@ -1,6 +1,4 @@
 #!/usr/bin/python
-import sys
-sys.path.append('../../wrapper')
 import xgboost as xgb
 ##
 #  this script demonstrate how to fit generalized linear model in xgboost
@@ -9,17 +7,17 @@
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 dtest = xgb.DMatrix('../data/agaricus.txt.test')
 # change booster to gblinear, so that we are fitting a linear model
-# alpha is the L1 regularizer 
+# alpha is the L1 regularizer
 # lambda is the L2 regularizer
 # you can also set lambda_bias which is L2 regularizer on the bias term
 param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
          'alpha': 0.0001, 'lambda': 1 }
 
 # normally, you do not need to set eta (step_size)
-# XGBoost uses a parallel coordinate descent algorithm (shotgun), 
+# XGBoost uses a parallel coordinate descent algorithm (shotgun),
 # there could be affection on convergence with parallelization on certain cases
 # setting eta to be smaller value, e.g 0.5 can make the optimization more stable
-# param['eta'] = 1 
+# param['eta'] = 1
 
 ##
 # the rest of settings are the same
diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### load data in do training
diff --git a/demo/guide-python/predict_leaf_indices.py b/demo/guide-python/predict_leaf_indices.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### load data in do training
diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py
@@ -4,8 +4,6 @@
 @author: Jamie Hall
 '''
 
-import sys
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 import numpy as np
diff --git a/demo/kaggle-higgs/higgs-cv.py b/demo/kaggle-higgs/higgs-cv.py
@@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 
 ### load data in do training
diff --git a/demo/kaggle-higgs/higgs-numpy.py b/demo/kaggle-higgs/higgs-numpy.py
@@ -1,14 +1,6 @@
 #!/usr/bin/python
-# this is the example script to use xgboost to train 
-import inspect
-import os
-import sys
+# this is the example script to use xgboost to train
 import numpy as np
-# add path of xgboost python module
-code_path = os.path.join(
-    os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
-
-sys.path.append(code_path)
 
 import xgboost as xgb
 
@@ -29,7 +21,7 @@
 sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
 sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )
 
-# print weight statistics 
+# print weight statistics
 print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
 
 # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
@@ -42,13 +34,13 @@
 param['objective'] = 'binary:logitraw'
 # scale weight of positive examples
 param['scale_pos_weight'] = sum_wneg/sum_wpos
-param['eta'] = 0.1 
+param['eta'] = 0.1
 param['max_depth'] = 6
 param['eval_metric'] = 'auc'
 param['silent'] = 1
 param['nthread'] = 16
 
-# you can directly throw param in, though we want to watch multiple metrics here 
+# you can directly throw param in, though we want to watch multiple metrics here
 plst = list(param.items())+[('eval_metric', 'ams@0.15')]
 
 watchlist = [ (xgmat,'train') ]
diff --git a/demo/kaggle-higgs/higgs-pred.py b/demo/kaggle-higgs/higgs-pred.py
@@ -1,17 +1,14 @@
 #!/usr/bin/python
-# make prediction 
-import sys
+# make prediction
 import numpy as np
-# add path of xgboost python module
-sys.path.append('../../wrapper/')
 import xgboost as xgb
 
 # path to where the data lies
 dpath = 'data'
 
 modelfile = 'higgs.model'
 outfile = 'higgs.pred.csv'
-# make top 15% as positive 
+# make top 15% as positive
 threshold_ratio = 0.15
 
 # load in training data, directly use numpy
@@ -24,7 +21,7 @@
 bst = xgb.Booster({'nthread':16}, model_file = modelfile)
 ypred = bst.predict( xgmat )
 
-res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] 
+res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
 
 rorder = {}
 for k, v in sorted( res, key = lambda x:-x[1] ):
@@ -36,12 +33,12 @@
 nhit = 0
 ntot = 0
 fo.write('EventId,RankOrder,Class\n')
-for k, v in res:        
+for k, v in res:
     if rorder[k] <= ntop:
         lb = 's'
         nhit += 1
     else:
-        lb = 'b'        
+        lb = 'b'
     # change output rank order to follow Kaggle convention
     fo.write('%s,%d,%s\n' % ( k,  len(rorder)+1-rorder[k], lb ) )
     ntot += 1
diff --git a/demo/kaggle-higgs/speedtest.py b/demo/kaggle-higgs/speedtest.py
@@ -1,9 +1,6 @@
 #!/usr/bin/python
 # this is the example script to use xgboost to train
-import sys
 import numpy as np
-# add path of xgboost python module
-sys.path.append('../../wrapper/')
 import xgboost as xgb
 from sklearn.ensemble import GradientBoostingClassifier
 import time
diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py
@@ -1,7 +1,5 @@
 #! /usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper/')
 import xgboost as xgb
 
 # label need to be 0 to num_class -1
diff --git a/demo/regression/mapfeat.py b/demo/regression/mapfeat.py
@@ -1,28 +1,27 @@
 #!/usr/bin/python
-import sys
 
-fo = open( 'machine.txt', 'w' ) 
+fo = open( 'machine.txt', 'w' )
 cnt = 6
 fmap = {}
 for l in open( 'machine.data' ):
     arr = l.split(',')
     fo.write(arr[8])
     for i in range( 0,6 ):
         fo.write( ' %d:%s' %(i,arr[i+2]) )
-    
+
     if arr[0] not in fmap:
         fmap[arr[0]] = cnt
         cnt += 1
-    
-    fo.write( ' %d:1' % fmap[arr[0]] )	
+
+    fo.write( ' %d:1' % fmap[arr[0]] )
     fo.write('\n')
 
 fo.close()
 
 # create feature map for machine data
 fo = open('featmap.txt', 'w')
 # list from machine.names
-names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ]; 
+names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
 
 for i in range(0,6):
     fo.write( '%d\t%s\tint\n' % (i, names[i+1]))