Skip to content

Commit a0e07f1

Browse files
committed
Update demo scripts to use installed python library
1 parent ceb62e9 commit a0e07f1

15 files changed

+27
-65
lines changed

demo/binary_classification/mapfeat.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
#!/usr/bin/python
2-
import sys
32

43
def loadfmap( fname ):
54
fmap = {}
65
nmap = {}
7-
6+
87
for l in open( fname ):
98
arr = l.split()
10-
if arr[0].find('.') != -1:
9+
if arr[0].find('.') != -1:
1110
idx = int( arr[0].strip('.') )
12-
assert idx not in fmap
11+
assert idx not in fmap
1312
fmap[ idx ] = {}
14-
ftype = arr[1].strip(':')
13+
ftype = arr[1].strip(':')
1514
content = arr[2]
1615
else:
1716
content = arr[0]
@@ -23,7 +22,7 @@ def loadfmap( fname ):
2322
nmap[ len(nmap) ] = ftype+'='+k
2423
return fmap, nmap
2524

26-
def write_nmap( fo, nmap ):
25+
def write_nmap( fo, nmap ):
2726
for i in range( len(nmap) ):
2827
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
2928

@@ -33,7 +32,7 @@ def write_nmap( fo, nmap ):
3332
write_nmap( fo, nmap )
3433
fo.close()
3534

36-
fo = open( 'agaricus.txt', 'w' )
35+
fo = open( 'agaricus.txt', 'w' )
3736
for l in open( 'agaricus-lepiota.data' ):
3837
arr = l.split(',')
3938
if arr[0] == 'p':
@@ -47,4 +46,4 @@ def write_nmap( fo, nmap ):
4746

4847
fo.close()
4948

50-
49+

demo/guide-python/basic_walkthrough.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
#!/usr/bin/python
2-
import sys
32
import numpy as np
43
import scipy.sparse
5-
# append the path to xgboost, you may need to change the following line
6-
# alternatively, you can add the path to PYTHONPATH environment variable
7-
sys.path.append('../../wrapper')
84
import xgboost as xgb
95

106
### simple example
@@ -33,7 +29,7 @@
3329
# save dmatrix into binary buffer
3430
dtest.save_binary('dtest.buffer')
3531
bst.save_model('xgb.model')
36-
# load model and data in
32+
# load model and data in
3733
bst2 = xgb.Booster(model_file='xgb.model')
3834
dtest2 = xgb.DMatrix('dtest.buffer')
3935
preds2 = bst2.predict(dtest2)

demo/guide-python/boost_from_prediction.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#!/usr/bin/python
2-
import sys
32
import numpy as np
4-
sys.path.append('../../wrapper')
53
import xgboost as xgb
64

75
dtrain = xgb.DMatrix('../data/agaricus.txt.train')

demo/guide-python/cross_validation.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#!/usr/bin/python
2-
import sys
32
import numpy as np
4-
sys.path.append('../../wrapper')
53
import xgboost as xgb
64

75
### load data in do training
@@ -56,7 +54,7 @@ def evalerror(preds, dtrain):
5654
labels = dtrain.get_label()
5755
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
5856

59-
param = {'max_depth':2, 'eta':1, 'silent':1}
57+
param = {'max_depth':2, 'eta':1, 'silent':1}
6058
# train with customized objective
6159
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
6260
obj = logregobj, feval=evalerror)

demo/guide-python/custom_objective.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
#!/usr/bin/python
2-
import sys
32
import numpy as np
4-
sys.path.append('../../wrapper')
53
import xgboost as xgb
64
###
75
# advanced: cutomsized loss function
8-
#
6+
#
97
print ('start running example to used cutomized objective function')
108

119
dtrain = xgb.DMatrix('../data/agaricus.txt.train')

demo/guide-python/generalized_linear_model.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
#!/usr/bin/python
2-
import sys
3-
sys.path.append('../../wrapper')
42
import xgboost as xgb
53
##
64
# this script demonstrate how to fit generalized linear model in xgboost
@@ -9,17 +7,17 @@
97
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
108
dtest = xgb.DMatrix('../data/agaricus.txt.test')
119
# change booster to gblinear, so that we are fitting a linear model
12-
# alpha is the L1 regularizer
10+
# alpha is the L1 regularizer
1311
# lambda is the L2 regularizer
1412
# you can also set lambda_bias which is L2 regularizer on the bias term
1513
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
1614
'alpha': 0.0001, 'lambda': 1 }
1715

1816
# normally, you do not need to set eta (step_size)
19-
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
17+
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
2018
# there could be affection on convergence with parallelization on certain cases
2119
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
22-
# param['eta'] = 1
20+
# param['eta'] = 1
2321

2422
##
2523
# the rest of settings are the same

demo/guide-python/predict_first_ntree.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#!/usr/bin/python
2-
import sys
32
import numpy as np
4-
sys.path.append('../../wrapper')
53
import xgboost as xgb
64

75
### load data in do training

demo/guide-python/predict_leaf_indices.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#!/usr/bin/python
2-
import sys
32
import numpy as np
4-
sys.path.append('../../wrapper')
53
import xgboost as xgb
64

75
### load data in do training

demo/guide-python/sklearn_examples.py

-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
@author: Jamie Hall
55
'''
66

7-
import sys
8-
sys.path.append('../../wrapper')
97
import xgboost as xgb
108

119
import numpy as np

demo/kaggle-higgs/higgs-cv.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#!/usr/bin/python
2-
import sys
32
import numpy as np
4-
sys.path.append('../../wrapper')
53
import xgboost as xgb
64

75
### load data in do training

demo/kaggle-higgs/higgs-numpy.py

+4-12
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
#!/usr/bin/python
2-
# this is the example script to use xgboost to train
3-
import inspect
4-
import os
5-
import sys
2+
# this is the example script to use xgboost to train
63
import numpy as np
7-
# add path of xgboost python module
8-
code_path = os.path.join(
9-
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
10-
11-
sys.path.append(code_path)
124

135
import xgboost as xgb
146

@@ -29,7 +21,7 @@
2921
sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0 )
3022
sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0 )
3123

32-
# print weight statistics
24+
# print weight statistics
3325
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
3426

3527
# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
@@ -42,13 +34,13 @@
4234
param['objective'] = 'binary:logitraw'
4335
# scale weight of positive examples
4436
param['scale_pos_weight'] = sum_wneg/sum_wpos
45-
param['eta'] = 0.1
37+
param['eta'] = 0.1
4638
param['max_depth'] = 6
4739
param['eval_metric'] = 'auc'
4840
param['silent'] = 1
4941
param['nthread'] = 16
5042

51-
# you can directly throw param in, though we want to watch multiple metrics here
43+
# you can directly throw param in, though we want to watch multiple metrics here
5244
plst = list(param.items())+[('eval_metric', '[email protected]')]
5345

5446
watchlist = [ (xgmat,'train') ]

demo/kaggle-higgs/higgs-pred.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
#!/usr/bin/python
2-
# make prediction
3-
import sys
2+
# make prediction
43
import numpy as np
5-
# add path of xgboost python module
6-
sys.path.append('../../wrapper/')
74
import xgboost as xgb
85

96
# path to where the data lies
107
dpath = 'data'
118

129
modelfile = 'higgs.model'
1310
outfile = 'higgs.pred.csv'
14-
# make top 15% as positive
11+
# make top 15% as positive
1512
threshold_ratio = 0.15
1613

1714
# load in training data, directly use numpy
@@ -24,7 +21,7 @@
2421
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
2522
ypred = bst.predict( xgmat )
2623

27-
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
24+
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
2825

2926
rorder = {}
3027
for k, v in sorted( res, key = lambda x:-x[1] ):
@@ -36,12 +33,12 @@
3633
nhit = 0
3734
ntot = 0
3835
fo.write('EventId,RankOrder,Class\n')
39-
for k, v in res:
36+
for k, v in res:
4037
if rorder[k] <= ntop:
4138
lb = 's'
4239
nhit += 1
4340
else:
44-
lb = 'b'
41+
lb = 'b'
4542
# change output rank order to follow Kaggle convention
4643
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
4744
ntot += 1

demo/kaggle-higgs/speedtest.py

-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
#!/usr/bin/python
22
# this is the example script to use xgboost to train
3-
import sys
43
import numpy as np
5-
# add path of xgboost python module
6-
sys.path.append('../../wrapper/')
74
import xgboost as xgb
85
from sklearn.ensemble import GradientBoostingClassifier
96
import time

demo/multiclass_classification/train.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#! /usr/bin/python
2-
import sys
32
import numpy as np
4-
sys.path.append('../../wrapper/')
53
import xgboost as xgb
64

75
# label need to be 0 to num_class -1

demo/regression/mapfeat.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,27 @@
11
#!/usr/bin/python
2-
import sys
32

4-
fo = open( 'machine.txt', 'w' )
3+
fo = open( 'machine.txt', 'w' )
54
cnt = 6
65
fmap = {}
76
for l in open( 'machine.data' ):
87
arr = l.split(',')
98
fo.write(arr[8])
109
for i in range( 0,6 ):
1110
fo.write( ' %d:%s' %(i,arr[i+2]) )
12-
11+
1312
if arr[0] not in fmap:
1413
fmap[arr[0]] = cnt
1514
cnt += 1
16-
17-
fo.write( ' %d:1' % fmap[arr[0]] )
15+
16+
fo.write( ' %d:1' % fmap[arr[0]] )
1817
fo.write('\n')
1918

2019
fo.close()
2120

2221
# create feature map for machine data
2322
fo = open('featmap.txt', 'w')
2423
# list from machine.names
25-
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
24+
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
2625

2726
for i in range(0,6):
2827
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))

0 commit comments

Comments
 (0)