From e6b8b23a2c84d38fbef753aea11e8a2d3b6d7260 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 16 May 2015 12:59:55 -0700 Subject: [PATCH] allow booster to be pickable, add copy function --- demo/guide-python/basic_walkthrough.py | 27 ++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py index ba8a4319f..cdff65c33 100755 --- a/demo/guide-python/basic_walkthrough.py +++ b/demo/guide-python/basic_walkthrough.py @@ -1,7 +1,9 @@ #!/usr/bin/python import numpy as np import scipy.sparse +import pickle import xgboost as xgb +import copy ### simple example # load file from text file, also binary buffer generated by xgboost @@ -19,7 +21,7 @@ bst = xgb.train(param, dtrain, num_round, watchlist) # this is prediction preds = bst.predict(dtest) labels = dtest.get_label() -print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))) +print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))) bst.save_model('0001.model') # dump model bst.dump_model('dump.raw.txt') @@ -28,6 +30,7 @@ bst.dump_model('dump.nice.txt','../data/featmap.txt') # save dmatrix into binary buffer dtest.save_binary('dtest.buffer') +# save model bst.save_model('xgb.model') # load model and data in bst2 = xgb.Booster(model_file='xgb.model') @@ -36,6 +39,14 @@ preds2 = bst2.predict(dtest2) # assert they are the same assert np.sum(np.abs(preds2-preds)) == 0 +# alternatively, you can pickle the booster +pks = pickle.dumps(bst2) +# load model and data in +bst3 = pickle.loads(pks) +preds3 = bst2.predict(dtest2) +# assert they are the same +assert np.sum(np.abs(preds3-preds)) == 0 + ### # build dmatrix from scipy.sparse print ('start running example of build DMatrix from scipy.sparse CSR Matrix') @@ -44,22 +55,22 @@ row = []; col = []; dat = [] i = 0 for l in open('../data/agaricus.txt.train'): arr = l.split() - labels.append( int(arr[0])) + labels.append(int(arr[0])) for it in arr[1:]: k,v = it.split(':') row.append(i); col.append(int(k)); dat.append(float(v)) i += 1 -csr = scipy.sparse.csr_matrix( (dat, (row,col)) ) -dtrain = xgb.DMatrix( csr, label = labels ) +csr = scipy.sparse.csr_matrix((dat, (row,col))) +dtrain = xgb.DMatrix(csr, label = labels) watchlist = [(dtest,'eval'), (dtrain,'train')] -bst = xgb.train( param, dtrain, num_round, watchlist ) +bst = xgb.train(param, dtrain, num_round, watchlist) print ('start running example of build DMatrix from scipy.sparse CSC Matrix') # we can also construct from csc matrix -csc = scipy.sparse.csc_matrix( (dat, (row,col)) ) +csc = scipy.sparse.csc_matrix((dat, (row,col))) dtrain = xgb.DMatrix(csc, label=labels) watchlist = [(dtest,'eval'), (dtrain,'train')] -bst = xgb.train( param, dtrain, num_round, watchlist ) +bst = xgb.train(param, dtrain, num_round, watchlist) print ('start running example of build DMatrix from numpy array') # NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation @@ -67,6 +78,6 @@ print ('start running example of build DMatrix from numpy array') npymat = csr.todense() dtrain = xgb.DMatrix(npymat, label = labels) watchlist = [(dtest,'eval'), (dtrain,'train')] -bst = xgb.train( param, dtrain, num_round, watchlist ) +bst = xgb.train(param, dtrain, num_round, watchlist)