add base_margin
This commit is contained in:
@@ -90,3 +90,22 @@ def evalerror(preds, dtrain):
|
||||
# training with customized objective, we can also do step by step training
|
||||
# simply look at xgboost.py's implementation of train
|
||||
bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
|
||||
|
||||
|
||||
###
|
||||
# advanced: start from a initial base prediction
|
||||
#
|
||||
print ('start running example to start from a initial prediction')
|
||||
# specify parameters via map, definition are same as c++ version
|
||||
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||
# train xgboost for 1 round
|
||||
bst = xgb.train( param, dtrain, 1, evallist )
|
||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||
# do predict with output_margin=True, will always give you margin values before logistic transformation
|
||||
ptrain = bst.predict(dtrain, output_margin=True)
|
||||
ptest = bst.predict(dtest, output_margin=True)
|
||||
dtrain.set_base_margin(ptrain)
|
||||
dtest.set_base_margin(ptest)
|
||||
|
||||
print ('this is result of running from initial prediction')
|
||||
bst = xgb.train( param, dtrain, 1, evallist )
|
||||
|
||||
@@ -18,8 +18,7 @@ xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixGetLabel.restype = ctypes.POINTER(ctypes.c_float)
|
||||
xglib.XGDMatrixGetWeight.restype = ctypes.POINTER(ctypes.c_float)
|
||||
xglib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
|
||||
xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
|
||||
|
||||
xglib.XGBoosterCreate.restype = ctypes.c_void_p
|
||||
@@ -77,28 +76,46 @@ class DMatrix:
|
||||
# destructor
|
||||
def __del__(self):
|
||||
xglib.XGDMatrixFree(self.handle)
|
||||
# load data from file
|
||||
def __get_float_info(self, field):
|
||||
length = ctypes.c_ulong()
|
||||
ret = xglib.XGDMatrixGetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
|
||||
ctypes.byref(length))
|
||||
return ctypes2numpy(ret, length.value)
|
||||
def __set_float_info(self, field, data):
|
||||
xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')),
|
||||
(ctypes.c_float*len(data))(*data), len(data))
|
||||
# load data from file
|
||||
def save_binary(self, fname, silent=True):
|
||||
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
|
||||
# set label of dmatrix
|
||||
def set_label(self, label):
|
||||
xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label))
|
||||
self.__set_float_info('label', label)
|
||||
# set weight of each instances
|
||||
def set_weight(self, weight):
|
||||
self.__set_float_info('weight', label)
|
||||
# set initialized margin prediction
|
||||
def set_base_margin(self, margin):
|
||||
"""
|
||||
set base margin of booster to start from
|
||||
this can be used to specify a prediction value of
|
||||
existing model to be base_margin
|
||||
However, remember margin is needed, instead of transformed prediction
|
||||
e.g. for logistic regression: need to put in value before logistic transformation
|
||||
see also example/demo.py
|
||||
"""
|
||||
self.__set_float_info('base_margin', margin)
|
||||
# set group size of dmatrix, used for rank
|
||||
def set_group(self, group):
|
||||
xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
|
||||
# set weight of each instances
|
||||
def set_weight(self, weight):
|
||||
xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_float*len(weight))(*weight), len(weight))
|
||||
# get label from dmatrix
|
||||
def get_label(self):
|
||||
length = ctypes.c_ulong()
|
||||
labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length))
|
||||
return ctypes2numpy(labels, length.value)
|
||||
return self.__get_float_info('label')
|
||||
# get weight from dmatrix
|
||||
def get_weight(self):
|
||||
length = ctypes.c_ulong()
|
||||
weights = xglib.XGDMatrixGetWeight(self.handle, ctypes.byref(length))
|
||||
return ctypes2numpy(weights, length.value)
|
||||
return self.__get_float_info('weight')
|
||||
# get base_margin from dmatrix
|
||||
def get_base_margin(self):
|
||||
return self.__get_float_info('base_margin')
|
||||
def num_row(self):
|
||||
return xglib.XGDMatrixNumRow(self.handle)
|
||||
# slice the DMatrix to return a new DMatrix that only contains rindex
|
||||
@@ -161,9 +178,15 @@ class Booster:
|
||||
return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
|
||||
def eval(self, mat, name = 'eval', it = 0):
|
||||
return self.eval_set( [(mat,name)], it)
|
||||
def predict(self, data):
|
||||
def predict(self, data, output_margin=False):
|
||||
"""
|
||||
predict with data
|
||||
data: the dmatrix storing the input
|
||||
output_margin: whether output raw margin value that is untransformed
|
||||
"""
|
||||
length = ctypes.c_ulong()
|
||||
preds = xglib.XGBoosterPredict(self.handle, data.handle, ctypes.byref(length))
|
||||
preds = xglib.XGBoosterPredict(self.handle, data.handle,
|
||||
int(output_margin), ctypes.byref(length))
|
||||
return ctypes2numpy(preds, length.value)
|
||||
def save_model(self, fname):
|
||||
""" save model to file """
|
||||
|
||||
@@ -23,9 +23,9 @@ class Booster: public learner::BoostLearner<FMatrixS> {
|
||||
this->init_model = false;
|
||||
this->SetCacheData(mats);
|
||||
}
|
||||
const float *Pred(const DataMatrix &dmat, size_t *len) {
|
||||
const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) {
|
||||
this->CheckInitModel();
|
||||
this->Predict(dmat, &this->preds_);
|
||||
this->Predict(dmat, output_margin, &this->preds_);
|
||||
*len = this->preds_.size();
|
||||
return &this->preds_[0];
|
||||
}
|
||||
@@ -163,15 +163,11 @@ extern "C"{
|
||||
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
|
||||
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
|
||||
}
|
||||
void XGDMatrixSetLabel(void *handle, const float *label, size_t len) {
|
||||
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||
pmat->info.labels.resize(len);
|
||||
memcpy(&(pmat->info).labels[0], label, sizeof(float) * len);
|
||||
}
|
||||
void XGDMatrixSetWeight(void *handle, const float *weight, size_t len) {
|
||||
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||
pmat->info.weights.resize(len);
|
||||
memcpy(&(pmat->info).weights[0], weight, sizeof(float) * len);
|
||||
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) {
|
||||
std::vector<float> &vec =
|
||||
static_cast<DataMatrix*>(handle)->info.GetInfo(field);
|
||||
vec.resize(len);
|
||||
memcpy(&vec[0], info, sizeof(float) * len);
|
||||
}
|
||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) {
|
||||
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||
@@ -181,15 +177,11 @@ extern "C"{
|
||||
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
|
||||
}
|
||||
}
|
||||
const float* XGDMatrixGetLabel(const void *handle, size_t* len) {
|
||||
const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
|
||||
*len = pmat->info.labels.size();
|
||||
return &(pmat->info.labels[0]);
|
||||
}
|
||||
const float* XGDMatrixGetWeight(const void *handle, size_t* len) {
|
||||
const DataMatrix *pmat = static_cast<const DataMatrix*>(handle);
|
||||
*len = pmat->info.weights.size();
|
||||
return &(pmat->info.weights[0]);
|
||||
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) {
|
||||
const std::vector<float> &vec =
|
||||
static_cast<const DataMatrix*>(handle)->info.GetInfo(field);
|
||||
*len = vec.size();
|
||||
return &vec[0];
|
||||
}
|
||||
size_t XGDMatrixNumRow(const void *handle) {
|
||||
return static_cast<const DataMatrix*>(handle)->info.num_row;
|
||||
@@ -238,8 +230,8 @@ extern "C"{
|
||||
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
||||
return bst->eval_str.c_str();
|
||||
}
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, size_t *len) {
|
||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), len);
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) {
|
||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
|
||||
}
|
||||
void XGBoosterLoadModel(void *handle, const char *fname) {
|
||||
static_cast<Booster*>(handle)->LoadModel(fname);
|
||||
|
||||
@@ -64,19 +64,13 @@ extern "C" {
|
||||
*/
|
||||
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
|
||||
/*!
|
||||
* \brief set label of the training matrix
|
||||
* \brief set float vector to a content in info
|
||||
* \param handle a instance of data matrix
|
||||
* \param label pointer to label
|
||||
* \param field field name, can be label, weight
|
||||
* \param array pointer to float vector
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetLabel(void *handle, const float *label, size_t len);
|
||||
/*!
|
||||
* \brief set weight of each instance
|
||||
* \param handle a instance of data matrix
|
||||
* \param weight data pointer to weights
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetWeight(void *handle, const float *weight, size_t len);
|
||||
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len);
|
||||
/*!
|
||||
* \brief set label of the training matrix
|
||||
* \param handle a instance of data matrix
|
||||
@@ -85,19 +79,13 @@ extern "C" {
|
||||
*/
|
||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
|
||||
/*!
|
||||
* \brief get label set from matrix
|
||||
* \brief get float info vector from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param len used to set result length
|
||||
* \param field field name
|
||||
* \return pointer to the label
|
||||
*/
|
||||
const float* XGDMatrixGetLabel(const void *handle, size_t* out_len);
|
||||
/*!
|
||||
* \brief get weight set from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param len used to set result length
|
||||
* \return pointer to the weight
|
||||
*/
|
||||
const float* XGDMatrixGetWeight(const void *handle, size_t* out_len);
|
||||
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
|
||||
/*!
|
||||
* \brief return number of rows
|
||||
*/
|
||||
@@ -154,9 +142,10 @@ extern "C" {
|
||||
* \brief make prediction based on dmat
|
||||
* \param handle handle
|
||||
* \param dmat data matrix
|
||||
* \param output_margin whether only output raw margin value
|
||||
* \param len used to store length of returning result
|
||||
*/
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, size_t *len);
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len);
|
||||
/*!
|
||||
* \brief load model from existing file
|
||||
* \param handle handle
|
||||
|
||||
Reference in New Issue
Block a user