python module pass basic test
This commit is contained in:
parent
af100dd869
commit
301685e0a4
@ -27,26 +27,7 @@ bst.save_model('0001.model')
|
|||||||
# dump model
|
# dump model
|
||||||
bst.dump_model('dump.raw.txt')
|
bst.dump_model('dump.raw.txt')
|
||||||
# dump model with feature map
|
# dump model with feature map
|
||||||
bst.dump_model('dump.raw.txt','featmap.txt')
|
bst.dump_model('dump.nice.txt','featmap.txt')
|
||||||
|
|
||||||
###
|
|
||||||
# build dmatrix in python iteratively
|
|
||||||
#
|
|
||||||
print ('start running example of build DMatrix in python')
|
|
||||||
dtrain = xgb.DMatrix()
|
|
||||||
labels = []
|
|
||||||
for l in open('agaricus.txt.train'):
|
|
||||||
arr = l.split()
|
|
||||||
labels.append( int(arr[0]))
|
|
||||||
feats = []
|
|
||||||
for it in arr[1:]:
|
|
||||||
k,v = it.split(':')
|
|
||||||
feats.append( (int(k), float(v)) )
|
|
||||||
dtrain.add_row( feats )
|
|
||||||
dtrain.set_label( labels )
|
|
||||||
evallist = [(dtest,'eval'), (dtrain,'train')]
|
|
||||||
|
|
||||||
bst = xgb.train( param, dtrain, num_round, evallist )
|
|
||||||
|
|
||||||
###
|
###
|
||||||
# build dmatrix from scipy.sparse
|
# build dmatrix from scipy.sparse
|
||||||
@ -61,7 +42,6 @@ for l in open('agaricus.txt.train'):
|
|||||||
k,v = it.split(':')
|
k,v = it.split(':')
|
||||||
row.append(i); col.append(int(k)); dat.append(float(v))
|
row.append(i); col.append(int(k)); dat.append(float(v))
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
|
csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
|
||||||
dtrain = xgb.DMatrix( csr )
|
dtrain = xgb.DMatrix( csr )
|
||||||
dtrain.set_label(labels)
|
dtrain.set_label(labels)
|
||||||
@ -81,16 +61,25 @@ bst = xgb.train( param, dtrain, num_round, evallist )
|
|||||||
#
|
#
|
||||||
print ('start running example to used cutomized objective function')
|
print ('start running example to used cutomized objective function')
|
||||||
|
|
||||||
# note: set objective= binary:logistic means the prediction will get logistic transformed
|
# note: for customized objective function, we leave objective as default
|
||||||
# in most case, we may want to leave it as default
|
# note: what we are getting is margin value in prediction
|
||||||
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
|
# you must know what you are doing
|
||||||
|
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
|
||||||
|
|
||||||
# user define objective function, given prediction, return gradient and second order gradient
|
# user define objective function, given prediction, return gradient and second order gradient
|
||||||
|
# this is loglikelihood loss
|
||||||
def logregobj(preds, dtrain):
|
def logregobj(preds, dtrain):
|
||||||
labels = dtrain.get_label()
|
labels = dtrain.get_label()
|
||||||
|
preds = 1.0 / (1.0 + np.exp(-preds))
|
||||||
grad = preds - labels
|
grad = preds - labels
|
||||||
hess = preds * (1.0-preds)
|
hess = preds * (1.0-preds)
|
||||||
return grad, hess
|
return grad, hess
|
||||||
|
|
||||||
# training with customized objective, we can also do step by step training, simply look at xgboost.py's implementation of train
|
# user defined evaluation function, return a pair metric_name, result
|
||||||
bst = xgb.train( param, dtrain, num_round, evallist, logregobj )
|
def evalerror(preds, dtrain):
|
||||||
|
labels = dtrain.get_label()
|
||||||
|
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||||
|
|
||||||
|
# training with customized objective, we can also do step by step training
|
||||||
|
# simply look at xgboost.py's implementation of train
|
||||||
|
bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import ctypes
|
|||||||
import os
|
import os
|
||||||
# optinally have scipy sparse, though not necessary
|
# optinally have scipy sparse, though not necessary
|
||||||
import numpy
|
import numpy
|
||||||
|
import sys
|
||||||
import numpy.ctypeslib
|
import numpy.ctypeslib
|
||||||
import scipy.sparse as scp
|
import scipy.sparse as scp
|
||||||
|
|
||||||
@ -13,13 +14,19 @@ XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
|
|||||||
# load in xgboost library
|
# load in xgboost library
|
||||||
xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
|
xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
|
||||||
|
|
||||||
xglib.XGDMatrixCreate.restype = ctypes.c_void_p
|
xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
|
||||||
xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
|
xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
|
||||||
|
xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
|
||||||
|
xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
|
||||||
xglib.XGDMatrixGetLabel.restype = ctypes.POINTER(ctypes.c_float)
|
xglib.XGDMatrixGetLabel.restype = ctypes.POINTER(ctypes.c_float)
|
||||||
xglib.XGDMatrixGetWeight.restype = ctypes.POINTER(ctypes.c_float)
|
xglib.XGDMatrixGetWeight.restype = ctypes.POINTER(ctypes.c_float)
|
||||||
xglib.XGDMatrixGetRow.restype = ctypes.POINTER( REntry )
|
xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
|
||||||
|
|
||||||
xglib.XGBoosterCreate.restype = ctypes.c_void_p
|
xglib.XGBoosterCreate.restype = ctypes.c_void_p
|
||||||
xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
|
xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
|
||||||
|
xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
|
||||||
|
xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
|
||||||
|
|
||||||
|
|
||||||
def ctypes2numpy(cptr, length):
|
def ctypes2numpy(cptr, length):
|
||||||
# convert a ctypes pointer array to numpy
|
# convert a ctypes pointer array to numpy
|
||||||
@ -31,13 +38,13 @@ def ctypes2numpy( cptr, length ):
|
|||||||
# data matrix used in xgboost
|
# data matrix used in xgboost
|
||||||
class DMatrix:
|
class DMatrix:
|
||||||
# constructor
|
# constructor
|
||||||
def __init__(self, data=None, label=None, missing=0.0, weight = None):
|
def __init__(self, data, label=None, missing=0.0, weight = None):
|
||||||
# force into void_p, mac need to pass things in as void_p
|
# force into void_p, mac need to pass things in as void_p
|
||||||
self.handle = ctypes.c_void_p( xglib.XGDMatrixCreate() )
|
|
||||||
if data == None:
|
if data == None:
|
||||||
|
self.handle = None
|
||||||
return
|
return
|
||||||
if isinstance(data, str):
|
if isinstance(data, str):
|
||||||
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data.encode('utf-8')), 1)
|
self.handle = xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1)
|
||||||
elif isinstance(data, scp.csr_matrix):
|
elif isinstance(data, scp.csr_matrix):
|
||||||
self.__init_from_csr(data)
|
self.__init_from_csr(data)
|
||||||
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
|
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
|
||||||
@ -52,11 +59,10 @@ class DMatrix:
|
|||||||
self.set_label(label)
|
self.set_label(label)
|
||||||
if weight !=None:
|
if weight !=None:
|
||||||
self.set_weight(weight)
|
self.set_weight(weight)
|
||||||
|
|
||||||
# convert data from csr matrix
|
# convert data from csr matrix
|
||||||
def __init_from_csr(self, csr):
|
def __init_from_csr(self, csr):
|
||||||
assert len(csr.indices) == len(csr.data)
|
assert len(csr.indices) == len(csr.data)
|
||||||
xglib.XGDMatrixParseCSR( self.handle,
|
self.handle = xglib.XGDMatrixCreateFromCSR(
|
||||||
(ctypes.c_ulong * len(csr.indptr))(*csr.indptr),
|
(ctypes.c_ulong * len(csr.indptr))(*csr.indptr),
|
||||||
(ctypes.c_uint * len(csr.indices))(*csr.indices),
|
(ctypes.c_uint * len(csr.indices))(*csr.indices),
|
||||||
(ctypes.c_float * len(csr.data))(*csr.data),
|
(ctypes.c_float * len(csr.data))(*csr.data),
|
||||||
@ -64,16 +70,13 @@ class DMatrix:
|
|||||||
# convert data from numpy matrix
|
# convert data from numpy matrix
|
||||||
def __init_from_npy2d(self,mat,missing):
|
def __init_from_npy2d(self,mat,missing):
|
||||||
data = numpy.array(mat.reshape(mat.size), dtype='float32')
|
data = numpy.array(mat.reshape(mat.size), dtype='float32')
|
||||||
xglib.XGDMatrixParseMat( self.handle,
|
self.handle = xglib.XGDMatrixCreateFromMat(
|
||||||
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
||||||
mat.shape[0], mat.shape[1], ctypes.c_float(missing))
|
mat.shape[0], mat.shape[1], ctypes.c_float(missing))
|
||||||
# destructor
|
# destructor
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
xglib.XGDMatrixFree(self.handle)
|
xglib.XGDMatrixFree(self.handle)
|
||||||
# load data from file
|
# load data from file
|
||||||
def load(self, fname, silent=True):
|
|
||||||
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
|
|
||||||
# load data from file
|
|
||||||
def save_binary(self, fname, silent=True):
|
def save_binary(self, fname, silent=True):
|
||||||
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
|
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
|
||||||
# set label of dmatrix
|
# set label of dmatrix
|
||||||
@ -89,25 +92,20 @@ class DMatrix:
|
|||||||
def get_label(self):
|
def get_label(self):
|
||||||
length = ctypes.c_ulong()
|
length = ctypes.c_ulong()
|
||||||
labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length))
|
labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length))
|
||||||
return ctypes2numpy( labels, length.value );
|
return ctypes2numpy(labels, length.value)
|
||||||
# get weight from dmatrix
|
# get weight from dmatrix
|
||||||
def get_weight(self):
|
def get_weight(self):
|
||||||
length = ctypes.c_ulong()
|
length = ctypes.c_ulong()
|
||||||
weights = xglib.XGDMatrixGetWeight(self.handle, ctypes.byref(length))
|
weights = xglib.XGDMatrixGetWeight(self.handle, ctypes.byref(length))
|
||||||
return ctypes2numpy( weights, length.value );
|
return ctypes2numpy(weights, length.value)
|
||||||
# clear everything
|
|
||||||
def clear(self):
|
|
||||||
xglib.XGDMatrixClear(self.handle)
|
|
||||||
def num_row(self):
|
def num_row(self):
|
||||||
return xglib.XGDMatrixNumRow(self.handle)
|
return xglib.XGDMatrixNumRow(self.handle)
|
||||||
# append a row to DMatrix
|
# slice the DMatrix to return a new DMatrix that only contains rindex
|
||||||
def add_row(self, row):
|
def slice(self, rindex):
|
||||||
xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row) )
|
res = DMatrix(None)
|
||||||
# get n-throw from DMatrix
|
res.handle = xglib.XGDMatrixSliceDMatrix(
|
||||||
def __getitem__(self, ridx):
|
self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex))
|
||||||
length = ctypes.c_ulong()
|
return res
|
||||||
row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
|
|
||||||
return [ (int(row[i].findex),row[i].fvalue) for i in range(length.value) ]
|
|
||||||
|
|
||||||
class Booster:
|
class Booster:
|
||||||
"""learner class """
|
"""learner class """
|
||||||
@ -136,25 +134,18 @@ class Booster:
|
|||||||
xglib.XGBoosterSetParam(
|
xglib.XGBoosterSetParam(
|
||||||
self.handle, ctypes.c_char_p(k.encode('utf-8')),
|
self.handle, ctypes.c_char_p(k.encode('utf-8')),
|
||||||
ctypes.c_char_p(str(v).encode('utf-8')))
|
ctypes.c_char_p(str(v).encode('utf-8')))
|
||||||
def update(self, dtrain):
|
def update(self, dtrain, it):
|
||||||
""" update """
|
""" update """
|
||||||
assert isinstance(dtrain, DMatrix)
|
assert isinstance(dtrain, DMatrix)
|
||||||
xglib.XGBoosterUpdateOneIter( self.handle, dtrain.handle )
|
xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
|
||||||
def boost(self, dtrain, grad, hess, bst_group = -1):
|
def boost(self, dtrain, grad, hess):
|
||||||
""" update """
|
""" update """
|
||||||
assert len(grad) == len(hess)
|
assert len(grad) == len(hess)
|
||||||
assert isinstance(dtrain, DMatrix)
|
assert isinstance(dtrain, DMatrix)
|
||||||
xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
|
xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
|
||||||
(ctypes.c_float*len(grad))(*grad),
|
(ctypes.c_float*len(grad))(*grad),
|
||||||
(ctypes.c_float*len(hess))(*hess),
|
(ctypes.c_float*len(hess))(*hess),
|
||||||
len(grad), bst_group )
|
len(grad))
|
||||||
def update_interact(self, dtrain, action, booster_index=None):
|
|
||||||
""" beta: update with specified action"""
|
|
||||||
assert isinstance(dtrain, DMatrix)
|
|
||||||
if booster_index != None:
|
|
||||||
self.set_param('interact:booster_index', str(booster_index))
|
|
||||||
xglib.XGBoosterUpdateInteract(
|
|
||||||
self.handle, dtrain.handle, ctypes.c_char_p(str(action)) )
|
|
||||||
def eval_set(self, evals, it = 0):
|
def eval_set(self, evals, it = 0):
|
||||||
for d in evals:
|
for d in evals:
|
||||||
assert isinstance(d[0], DMatrix)
|
assert isinstance(d[0], DMatrix)
|
||||||
@ -162,12 +153,12 @@ class Booster:
|
|||||||
dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
|
dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
|
||||||
evnames = (ctypes.c_char_p * len(evals))(
|
evnames = (ctypes.c_char_p * len(evals))(
|
||||||
* [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
|
* [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
|
||||||
xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) )
|
return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
|
||||||
def eval(self, mat, name = 'eval', it = 0):
|
def eval(self, mat, name = 'eval', it = 0):
|
||||||
self.eval_set( [(mat,name)], it)
|
return self.eval_set( [(mat,name)], it)
|
||||||
def predict(self, data, bst_group = -1):
|
def predict(self, data):
|
||||||
length = ctypes.c_ulong()
|
length = ctypes.c_ulong()
|
||||||
preds = xglib.XGBoosterPredict( self.handle, data.handle, ctypes.byref(length), bst_group)
|
preds = xglib.XGBoosterPredict(self.handle, data.handle, ctypes.byref(length))
|
||||||
return ctypes2numpy(preds, length.value)
|
return ctypes2numpy(preds, length.value)
|
||||||
def save_model(self, fname):
|
def save_model(self, fname):
|
||||||
""" save model to file """
|
""" save model to file """
|
||||||
@ -175,27 +166,74 @@ class Booster:
|
|||||||
def load_model(self, fname):
|
def load_model(self, fname):
|
||||||
"""load model from file"""
|
"""load model from file"""
|
||||||
xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
|
xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
|
||||||
def dump_model(self, fname, fmap=''):
|
def dump_model(self, fo, fmap=''):
|
||||||
"""dump model into text file"""
|
"""dump model into text file"""
|
||||||
xglib.XGBoosterDumpModel(
|
if isinstance(fo,str):
|
||||||
self.handle, ctypes.c_char_p(fname.encode('utf-8')),
|
fo = open(fo,'w')
|
||||||
ctypes.c_char_p(fmap.encode('utf-8')))
|
need_close = True
|
||||||
|
else:
|
||||||
|
need_close = False
|
||||||
|
ret = self.get_dump(fmap)
|
||||||
|
for i in range(len(ret)):
|
||||||
|
fo.write('booster[%d]:\n' %i)
|
||||||
|
fo.write( ret[i] )
|
||||||
|
if need_close:
|
||||||
|
fo.close()
|
||||||
|
def get_dump(self, fmap=''):
|
||||||
|
"""get dump of model as list of strings """
|
||||||
|
length = ctypes.c_ulong()
|
||||||
|
sarr = xglib.XGBoosterDumpModel(self.handle, ctypes.c_char_p(fmap.encode('utf-8')), ctypes.byref(length))
|
||||||
|
res = []
|
||||||
|
for i in range(length.value):
|
||||||
|
res.append( str(sarr[i]) )
|
||||||
|
return res
|
||||||
|
def get_fscore(self, fmap=''):
|
||||||
|
""" get feature importance of each feature """
|
||||||
|
trees = self.get_dump(fmap)
|
||||||
|
fmap = {}
|
||||||
|
for tree in trees:
|
||||||
|
print tree
|
||||||
|
for l in tree.split('\n'):
|
||||||
|
arr = l.split('[')
|
||||||
|
if len(arr) == 1:
|
||||||
|
continue
|
||||||
|
fid = arr[1].split(']')[0]
|
||||||
|
fid = fid.split('<')[0]
|
||||||
|
if fid not in fmap:
|
||||||
|
fmap[fid] = 1
|
||||||
|
else:
|
||||||
|
fmap[fid]+= 1
|
||||||
|
return fmap
|
||||||
|
|
||||||
def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
|
def evaluate(bst, evals, it, feval = None):
|
||||||
|
"""evaluation on eval set"""
|
||||||
|
if feval != None:
|
||||||
|
res = '[%d]' % it
|
||||||
|
for dm, evname in evals:
|
||||||
|
name, val = feval(bst.predict(dm), dm)
|
||||||
|
res += '\t%s-%s:%f' % (evname, name, val)
|
||||||
|
else:
|
||||||
|
res = bst.eval_set(evals, it)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
|
||||||
""" train a booster with given paramaters """
|
""" train a booster with given paramaters """
|
||||||
bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
|
bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
|
||||||
if obj == None:
|
if obj == None:
|
||||||
for i in range(num_boost_round):
|
for i in range(num_boost_round):
|
||||||
bst.update( dtrain )
|
bst.update( dtrain, i )
|
||||||
if len(evals) != 0:
|
if len(evals) != 0:
|
||||||
bst.eval_set( evals, i )
|
sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
|
||||||
else:
|
else:
|
||||||
|
if len(evals) != 0 and feval == None:
|
||||||
|
print 'you need to provide your own evaluation function'
|
||||||
|
|
||||||
# try customized objective function
|
# try customized objective function
|
||||||
for i in range(num_boost_round):
|
for i in range(num_boost_round):
|
||||||
pred = bst.predict( dtrain )
|
pred = bst.predict( dtrain )
|
||||||
grad, hess = obj( pred, dtrain )
|
grad, hess = obj( pred, dtrain )
|
||||||
bst.boost( dtrain, grad, hess )
|
bst.boost( dtrain, grad, hess )
|
||||||
if len(evals) != 0:
|
if len(evals) != 0:
|
||||||
bst.eval_set( evals, i )
|
sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
|
||||||
return bst
|
return bst
|
||||||
|
|
||||||
|
|||||||
@ -20,9 +20,11 @@ class Booster: public learner::BoostLearner<FMatrixS> {
|
|||||||
public:
|
public:
|
||||||
explicit Booster(const std::vector<DataMatrix*>& mats) {
|
explicit Booster(const std::vector<DataMatrix*>& mats) {
|
||||||
this->silent = 1;
|
this->silent = 1;
|
||||||
|
this->init_model = false;
|
||||||
this->SetCacheData(mats);
|
this->SetCacheData(mats);
|
||||||
}
|
}
|
||||||
const float *Pred(const DataMatrix &dmat, size_t *len) {
|
const float *Pred(const DataMatrix &dmat, size_t *len) {
|
||||||
|
this->CheckInitModel();
|
||||||
this->Predict(dmat, &this->preds_);
|
this->Predict(dmat, &this->preds_);
|
||||||
*len = this->preds_.size();
|
*len = this->preds_.size();
|
||||||
return &this->preds_[0];
|
return &this->preds_[0];
|
||||||
@ -37,6 +39,15 @@ class Booster: public learner::BoostLearner<FMatrixS> {
|
|||||||
}
|
}
|
||||||
gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
|
gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
|
||||||
}
|
}
|
||||||
|
inline void CheckInitModel(void) {
|
||||||
|
if (!init_model) {
|
||||||
|
this->InitModel(); init_model = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inline void LoadModel(const char *fname) {
|
||||||
|
learner::BoostLearner<FMatrixS>::LoadModel(fname);
|
||||||
|
this->init_model = true;
|
||||||
|
}
|
||||||
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
|
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
|
||||||
model_dump = this->DumpModel(fmap, with_stats);
|
model_dump = this->DumpModel(fmap, with_stats);
|
||||||
model_dump_cptr.resize(model_dump.size());
|
model_dump_cptr.resize(model_dump.size());
|
||||||
@ -52,6 +63,9 @@ class Booster: public learner::BoostLearner<FMatrixS> {
|
|||||||
// temporal space to save model dump
|
// temporal space to save model dump
|
||||||
std::vector<std::string> model_dump;
|
std::vector<std::string> model_dump;
|
||||||
std::vector<const char*> model_dump_cptr;
|
std::vector<const char*> model_dump_cptr;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool init_model;
|
||||||
};
|
};
|
||||||
} // namespace wrapper
|
} // namespace wrapper
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
@ -199,6 +213,7 @@ extern "C"{
|
|||||||
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
|
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
|
||||||
Booster *bst = static_cast<Booster*>(handle);
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
||||||
|
bst->CheckInitModel();
|
||||||
bst->CheckInit(dtr);
|
bst->CheckInit(dtr);
|
||||||
bst->UpdateOneIter(iter, *dtr);
|
bst->UpdateOneIter(iter, *dtr);
|
||||||
}
|
}
|
||||||
@ -206,6 +221,7 @@ extern "C"{
|
|||||||
float *grad, float *hess, size_t len) {
|
float *grad, float *hess, size_t len) {
|
||||||
Booster *bst = static_cast<Booster*>(handle);
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
||||||
|
bst->CheckInitModel();
|
||||||
bst->CheckInit(dtr);
|
bst->CheckInit(dtr);
|
||||||
bst->BoostOneIter(*dtr, grad, hess, len);
|
bst->BoostOneIter(*dtr, grad, hess, len);
|
||||||
}
|
}
|
||||||
@ -217,6 +233,7 @@ extern "C"{
|
|||||||
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
|
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
|
||||||
names.push_back(std::string(evnames[i]));
|
names.push_back(std::string(evnames[i]));
|
||||||
}
|
}
|
||||||
|
bst->CheckInitModel();
|
||||||
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
||||||
return bst->eval_str.c_str();
|
return bst->eval_str.c_str();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -242,7 +242,7 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
|
|||||||
* \brief save column access data into stream
|
* \brief save column access data into stream
|
||||||
* \param fo output stream to save to
|
* \param fo output stream to save to
|
||||||
*/
|
*/
|
||||||
inline void SaveColAccess(utils::IStream &fo) {
|
inline void SaveColAccess(utils::IStream &fo) const {
|
||||||
fo.Write(&num_buffered_row_, sizeof(num_buffered_row_));
|
fo.Write(&num_buffered_row_, sizeof(num_buffered_row_));
|
||||||
if (num_buffered_row_ != 0) {
|
if (num_buffered_row_ != 0) {
|
||||||
SaveBinary(fo, col_ptr_, col_data_);
|
SaveBinary(fo, col_ptr_, col_data_);
|
||||||
|
|||||||
@ -15,8 +15,13 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
|
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
|
||||||
|
if (dmat.magic == DMatrixSimple::kMagic){
|
||||||
|
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
|
||||||
|
p_dmat->SaveBinary(fname, silent);
|
||||||
|
} else {
|
||||||
utils::Error("not implemented");
|
utils::Error("not implemented");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace io
|
} // namespace io
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -148,7 +148,7 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
* \param fname name of binary data
|
* \param fname name of binary data
|
||||||
* \param silent whether print information or not
|
* \param silent whether print information or not
|
||||||
*/
|
*/
|
||||||
inline void SaveBinary(const char* fname, bool silent = false) {
|
inline void SaveBinary(const char* fname, bool silent = false) const {
|
||||||
utils::FileStream fs(utils::FopenCheck(fname, "wb"));
|
utils::FileStream fs(utils::FopenCheck(fname, "wb"));
|
||||||
int magic = kMagic;
|
int magic = kMagic;
|
||||||
fs.Write(&magic, sizeof(magic));
|
fs.Write(&magic, sizeof(magic));
|
||||||
|
|||||||
@ -58,7 +58,7 @@ struct MetaInfo {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline void SaveBinary(utils::IStream &fo) {
|
inline void SaveBinary(utils::IStream &fo) const {
|
||||||
fo.Write(&num_row, sizeof(num_row));
|
fo.Write(&num_row, sizeof(num_row));
|
||||||
fo.Write(&num_col, sizeof(num_col));
|
fo.Write(&num_col, sizeof(num_col));
|
||||||
fo.Write(labels);
|
fo.Write(labels);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user