Update demo scripts to use installed python library

2015-04-08 14:22:54 -05:00 · 2015-04-08 14:22:54 -05:00 · a0e07f16c4
commit a0e07f16c4
parent ceb62e9231
15 changed files with 27 additions and 65 deletions
--- a/demo/binary_classification/mapfeat.py
+++ b/demo/binary_classification/mapfeat.py
@ -1,17 +1,16 @@
 #!/usr/bin/python
 import sys
 def loadfmap( fname ):
    fmap = {}
    nmap = {}
-    
+
    for l in open( fname ):
        arr = l.split()
-        if arr[0].find('.') != -1:            
+        if arr[0].find('.') != -1:
            idx = int( arr[0].strip('.') )
-            assert idx not in fmap        
+            assert idx not in fmap
            fmap[ idx ] = {}
-            ftype = arr[1].strip(':')        
+            ftype = arr[1].strip(':')
            content = arr[2]
        else:
            content = arr[0]
@ -23,7 +22,7 @@ def loadfmap( fname ):
            nmap[ len(nmap) ] = ftype+'='+k
    return fmap, nmap
-def write_nmap( fo, nmap ):    
+def write_nmap( fo, nmap ):
    for i in range( len(nmap) ):
        fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
@ -33,7 +32,7 @@ fo = open( 'featmap.txt', 'w' )
 write_nmap( fo, nmap )
 fo.close()
-fo = open( 'agaricus.txt', 'w' ) 
+fo = open( 'agaricus.txt', 'w' )
 for l in open( 'agaricus-lepiota.data' ):
    arr = l.split(',')
    if arr[0] == 'p':
@ -47,4 +46,4 @@ for l in open( 'agaricus-lepiota.data' ):
 fo.close()
- 
+
--- a/demo/guide-python/basic_walkthrough.py
+++ b/demo/guide-python/basic_walkthrough.py
@ -1,10 +1,6 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 import scipy.sparse
 # append the path to xgboost, you may need to change the following line
 # alternatively, you can add the path to PYTHONPATH environment variable
 sys.path.append('../../wrapper')
 import xgboost as xgb
 ### simple example
@ -33,7 +29,7 @@ bst.dump_model('dump.nice.txt','../data/featmap.txt')
 # save dmatrix into binary buffer
 dtest.save_binary('dtest.buffer')
 bst.save_model('xgb.model')
-# load model and data in 
+# load model and data in
 bst2 = xgb.Booster(model_file='xgb.model')
 dtest2 = xgb.DMatrix('dtest.buffer')
 preds2 = bst2.predict(dtest2)
--- a/demo/guide-python/boost_from_prediction.py
+++ b/demo/guide-python/boost_from_prediction.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 sys.path.append('../../wrapper')
 import xgboost as xgb
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
--- a/demo/guide-python/cross_validation.py
+++ b/demo/guide-python/cross_validation.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 sys.path.append('../../wrapper')
 import xgboost as xgb
 ### load data in do training
@ -56,7 +54,7 @@ def evalerror(preds, dtrain):
    labels = dtrain.get_label()
    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
-param = {'max_depth':2, 'eta':1, 'silent':1} 
+param = {'max_depth':2, 'eta':1, 'silent':1}
 # train with customized objective
 xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
       obj = logregobj, feval=evalerror)
--- a/demo/guide-python/custom_objective.py
+++ b/demo/guide-python/custom_objective.py
@ -1,11 +1,9 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 sys.path.append('../../wrapper')
 import xgboost as xgb
 ###
 # advanced: cutomsized loss function
-# 
+#
 print ('start running example to used cutomized objective function')
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
--- a/demo/guide-python/generalized_linear_model.py
+++ b/demo/guide-python/generalized_linear_model.py
@ -1,6 +1,4 @@
 #!/usr/bin/python
 import sys
 sys.path.append('../../wrapper')
 import xgboost as xgb
 ##
 #  this script demonstrate how to fit generalized linear model in xgboost
@ -9,17 +7,17 @@ import xgboost as xgb
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 dtest = xgb.DMatrix('../data/agaricus.txt.test')
 # change booster to gblinear, so that we are fitting a linear model
-# alpha is the L1 regularizer 
+# alpha is the L1 regularizer
 # lambda is the L2 regularizer
 # you can also set lambda_bias which is L2 regularizer on the bias term
 param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
         'alpha': 0.0001, 'lambda': 1 }
 # normally, you do not need to set eta (step_size)
-# XGBoost uses a parallel coordinate descent algorithm (shotgun), 
+# XGBoost uses a parallel coordinate descent algorithm (shotgun),
 # there could be affection on convergence with parallelization on certain cases
 # setting eta to be smaller value, e.g 0.5 can make the optimization more stable
-# param['eta'] = 1 
+# param['eta'] = 1
 ##
 # the rest of settings are the same
--- a/demo/guide-python/predict_first_ntree.py
+++ b/demo/guide-python/predict_first_ntree.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 sys.path.append('../../wrapper')
 import xgboost as xgb
 ### load data in do training
--- a/demo/guide-python/predict_leaf_indices.py
+++ b/demo/guide-python/predict_leaf_indices.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 sys.path.append('../../wrapper')
 import xgboost as xgb
 ### load data in do training
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@ -4,8 +4,6 @@ Created on 1 Apr 2015
@author: Jamie Hall
 '''
 import sys
 sys.path.append('../../wrapper')
 import xgboost as xgb
 import numpy as np
--- a/demo/kaggle-higgs/higgs-cv.py
+++ b/demo/kaggle-higgs/higgs-cv.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
 import sys
 import numpy as np
 sys.path.append('../../wrapper')
 import xgboost as xgb
 ### load data in do training
--- a/demo/kaggle-higgs/higgs-numpy.py
+++ b/demo/kaggle-higgs/higgs-numpy.py
@ -1,14 +1,6 @@
 #!/usr/bin/python
-# this is the example script to use xgboost to train 
+# this is the example script to use xgboost to train
 import inspect
 import os
 import sys
 import numpy as np
 # add path of xgboost python module
 code_path = os.path.join(
    os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
 sys.path.append(code_path)
 import xgboost as xgb
@ -29,7 +21,7 @@ weight = dtrain[:,31] * float(test_size) / len(label)
 sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
 sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )
-# print weight statistics 
+# print weight statistics
 print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
 # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
@ -42,13 +34,13 @@ param = {}
 param['objective'] = 'binary:logitraw'
 # scale weight of positive examples
 param['scale_pos_weight'] = sum_wneg/sum_wpos
-param['eta'] = 0.1 
+param['eta'] = 0.1
 param['max_depth'] = 6
 param['eval_metric'] = 'auc'
 param['silent'] = 1
 param['nthread'] = 16
-# you can directly throw param in, though we want to watch multiple metrics here 
+# you can directly throw param in, though we want to watch multiple metrics here
 plst = list(param.items())+[('eval_metric', 'ams@0.15')]
 watchlist = [ (xgmat,'train') ]
--- a/demo/kaggle-higgs/higgs-pred.py
+++ b/demo/kaggle-higgs/higgs-pred.py
@ -1,9 +1,6 @@
 #!/usr/bin/python
-# make prediction 
+# make prediction
 import sys
 import numpy as np
 # add path of xgboost python module
 sys.path.append('../../wrapper/')
 import xgboost as xgb
 # path to where the data lies
@ -11,7 +8,7 @@ dpath = 'data'
 modelfile = 'higgs.model'
 outfile = 'higgs.pred.csv'
-# make top 15% as positive 
+# make top 15% as positive
 threshold_ratio = 0.15
 # load in training data, directly use numpy
@ -24,7 +21,7 @@ xgmat = xgb.DMatrix( data, missing = -999.0 )
 bst = xgb.Booster({'nthread':16}, model_file = modelfile)
 ypred = bst.predict( xgmat )
-res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] 
+res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
 rorder = {}
 for k, v in sorted( res, key = lambda x:-x[1] ):
@ -36,12 +33,12 @@ fo = open(outfile, 'w')
 nhit = 0
 ntot = 0
 fo.write('EventId,RankOrder,Class\n')
-for k, v in res:        
+for k, v in res:
    if rorder[k] <= ntop:
        lb = 's'
        nhit += 1
    else:
-        lb = 'b'        
+        lb = 'b'
    # change output rank order to follow Kaggle convention
    fo.write('%s,%d,%s\n' % ( k,  len(rorder)+1-rorder[k], lb ) )
    ntot += 1
--- a/demo/kaggle-higgs/speedtest.py
+++ b/demo/kaggle-higgs/speedtest.py
@ -1,9 +1,6 @@
 #!/usr/bin/python
 # this is the example script to use xgboost to train
 import sys
 import numpy as np
 # add path of xgboost python module
 sys.path.append('../../wrapper/')
 import xgboost as xgb
 from sklearn.ensemble import GradientBoostingClassifier
 import time
--- a/demo/multiclass_classification/train.py
+++ b/demo/multiclass_classification/train.py
@ -1,7 +1,5 @@
 #! /usr/bin/python
 import sys
 import numpy as np
 sys.path.append('../../wrapper/')
 import xgboost as xgb
 # label need to be 0 to num_class -1
--- a/demo/regression/mapfeat.py
+++ b/demo/regression/mapfeat.py
@ -1,7 +1,6 @@
 #!/usr/bin/python
 import sys
-fo = open( 'machine.txt', 'w' ) 
+fo = open( 'machine.txt', 'w' )
 cnt = 6
 fmap = {}
 for l in open( 'machine.data' ):
@ -9,12 +8,12 @@ for l in open( 'machine.data' ):
    fo.write(arr[8])
    for i in range( 0,6 ):
        fo.write( ' %d:%s' %(i,arr[i+2]) )
-    
+
    if arr[0] not in fmap:
        fmap[arr[0]] = cnt
        cnt += 1
-    
+
-    fo.write( ' %d:1' % fmap[arr[0]] )	
+    fo.write( ' %d:1' % fmap[arr[0]] )
    fo.write('\n')
 fo.close()
@ -22,7 +21,7 @@ fo.close()
 # create feature map for machine data
 fo = open('featmap.txt', 'w')
 # list from machine.names
-names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ]; 
+names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
 for i in range(0,6):
    fo.write( '%d\t%s\tint\n' % (i, names[i+1]))