Merge branch 'unity'
Conflicts: .gitignore R-package/src/xgboost_R.cpp src/gbm/gblinear-inl.hpp tools/xgcombine_buffer.cpp
This commit is contained in:
@@ -34,7 +34,6 @@ xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
|
||||
xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
|
||||
xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
|
||||
|
||||
|
||||
def ctypes2numpy(cptr, length, dtype):
|
||||
"""convert a ctypes pointer array to numpy array """
|
||||
assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
|
||||
@@ -304,6 +303,7 @@ class Booster:
|
||||
(ctypes.c_float*len(grad))(*grad),
|
||||
(ctypes.c_float*len(hess))(*hess),
|
||||
len(grad))
|
||||
|
||||
def eval_set(self, evals, it = 0, feval = None):
|
||||
"""evaluates by metric
|
||||
Args:
|
||||
@@ -332,24 +332,38 @@ class Booster:
|
||||
return res
|
||||
def eval(self, mat, name = 'eval', it = 0):
|
||||
return self.eval_set( [(mat,name)], it)
|
||||
def predict(self, data, output_margin=False, ntree_limit=0):
|
||||
def predict(self, data, output_margin=False, ntree_limit=0, pred_leaf=False):
|
||||
"""
|
||||
predict with data
|
||||
Args:
|
||||
data: DMatrix
|
||||
the dmatrix storing the input
|
||||
the dmatrix storing the input
|
||||
output_margin: bool
|
||||
whether output raw margin value that is untransformed
|
||||
|
||||
whether output raw margin value that is untransformed
|
||||
ntree_limit: int
|
||||
limit number of trees in prediction, default to 0, 0 means using all the trees
|
||||
limit number of trees in prediction, default to 0, 0 means using all the trees
|
||||
pred_leaf: bool
|
||||
when this option is on, the output will be a matrix of (nsample, ntrees)
|
||||
with each record indicate the predicted leaf index of each sample in each tree
|
||||
Note that the leaf index of tree is unique per tree, so you may find leaf 1 in both tree 1 and tree 0
|
||||
Returns:
|
||||
numpy array of prediction
|
||||
"""
|
||||
option_mask = 0
|
||||
if output_margin:
|
||||
option_mask += 1
|
||||
if pred_leaf:
|
||||
option_mask += 2
|
||||
length = ctypes.c_ulong()
|
||||
preds = xglib.XGBoosterPredict(self.handle, data.handle,
|
||||
int(output_margin), ntree_limit, ctypes.byref(length))
|
||||
return ctypes2numpy(preds, length.value, 'float32')
|
||||
option_mask, ntree_limit, ctypes.byref(length))
|
||||
preds = ctypes2numpy(preds, length.value, 'float32')
|
||||
if pred_leaf:
|
||||
preds = preds.astype('int32')
|
||||
nrow = data.num_row()
|
||||
if preds.size != nrow and preds.size % nrow == 0:
|
||||
preds = preds.reshape(nrow, preds.size / nrow)
|
||||
return preds
|
||||
def save_model(self, fname):
|
||||
""" save model to file
|
||||
Args:
|
||||
@@ -542,3 +556,4 @@ def cv(params, dtrain, num_boost_round = 10, nfold=3, metrics=[], \
|
||||
sys.stderr.write(res+'\n')
|
||||
results.append(res)
|
||||
return results
|
||||
|
||||
|
||||
@@ -8,7 +8,9 @@
|
||||
#include <algorithm>
|
||||
// include all std functions
|
||||
using namespace std;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define isnan(x) (_isnan(x) != 0)
|
||||
#endif
|
||||
#include "./xgboost_wrapper.h"
|
||||
#include "../src/data.h"
|
||||
#include "../src/learner/learner-inl.hpp"
|
||||
@@ -30,9 +32,9 @@ class Booster: public learner::BoostLearner {
|
||||
this->init_model = false;
|
||||
this->SetCacheData(mats);
|
||||
}
|
||||
inline const float *Pred(const DataMatrix &dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
|
||||
inline const float *Pred(const DataMatrix &dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) {
|
||||
this->CheckInitModel();
|
||||
this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit);
|
||||
this->Predict(dmat, (option_mask&1) != 0, &this->preds_, ntree_limit, (option_mask&2) != 0);
|
||||
*len = static_cast<bst_ulong>(this->preds_.size());
|
||||
return BeginPtr(this->preds_);
|
||||
}
|
||||
@@ -44,7 +46,7 @@ class Booster: public learner::BoostLearner {
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
gpair_[j] = bst_gpair(grad[j], hess[j]);
|
||||
}
|
||||
gbm_->DoBoost(train.fmat(), train.info.info, &gpair_);
|
||||
gbm_->DoBoost(train.fmat(), this->FindBufferOffset(train), train.info.info, &gpair_);
|
||||
}
|
||||
inline void CheckInitModel(void) {
|
||||
if (!init_model) {
|
||||
@@ -132,7 +134,7 @@ extern "C"{
|
||||
bst_ulong nrow,
|
||||
bst_ulong ncol,
|
||||
float missing) {
|
||||
bool nan_missing = std::isnan(missing);
|
||||
bool nan_missing = isnan(missing);
|
||||
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||
DMatrixSimple &mat = *p_mat;
|
||||
mat.info.info.num_row = nrow;
|
||||
@@ -140,7 +142,7 @@ extern "C"{
|
||||
for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
|
||||
bst_ulong nelem = 0;
|
||||
for (bst_ulong j = 0; j < ncol; ++j) {
|
||||
if (std::isnan(data[j])) {
|
||||
if (isnan(data[j])) {
|
||||
utils::Check(nan_missing, "There are NAN in the matrix, however, you did not set missing=NAN");
|
||||
} else {
|
||||
if (nan_missing || data[j] != missing) {
|
||||
@@ -284,8 +286,8 @@ extern "C"{
|
||||
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
||||
return bst->eval_str.c_str();
|
||||
}
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
|
||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, ntree_limit, len);
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) {
|
||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), option_mask, ntree_limit, len);
|
||||
}
|
||||
void XGBoosterLoadModel(void *handle, const char *fname) {
|
||||
static_cast<Booster*>(handle)->LoadModel(fname);
|
||||
|
||||
@@ -17,6 +17,28 @@ typedef unsigned long bst_ulong;
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/*!
|
||||
* \brief initialize sync module, this is needed if used in distributed model
|
||||
* normally, argv need to contain master_uri and master_port
|
||||
* if start using submit_job_tcp script, then pass args to this will do
|
||||
* \param argc number of arguments
|
||||
* \param argv the arguments to be passed in sync module
|
||||
*/
|
||||
XGB_DLL void XGSyncInit(int argc, char *argv[]);
|
||||
/*!
|
||||
* \brief finalize sync module, call this when everything is done
|
||||
*/
|
||||
XGB_DLL void XGSyncFinalize(void);
|
||||
/*!
|
||||
* \brief get the rank
|
||||
* \return return the rank of
|
||||
*/
|
||||
XGB_DLL int XGSyncGetRank(void);
|
||||
/*!
|
||||
* \brief get the world size from sync
|
||||
* \return return the number of distributed job ran in the group
|
||||
*/
|
||||
XGB_DLL int XGSyncGetWorldSize(void);
|
||||
/*!
|
||||
* \brief load a data matrix
|
||||
* \return a loaded data matrix
|
||||
@@ -41,7 +63,7 @@ extern "C" {
|
||||
* \param col_ptr pointer to col headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matix + 1
|
||||
* \param nindptr number of rows in the matix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \return created dmatrix
|
||||
*/
|
||||
@@ -178,12 +200,18 @@ extern "C" {
|
||||
* \brief make prediction based on dmat
|
||||
* \param handle handle
|
||||
* \param dmat data matrix
|
||||
* \param output_margin whether only output raw margin value
|
||||
* \param option_mask bit-mask of options taken in prediction, possible values
|
||||
* 0:normal prediction
|
||||
* 1:output margin instead of transformed value
|
||||
* 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree
|
||||
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
|
||||
* when the parameter is set to 0, we will use all the trees
|
||||
* \param len used to store length of returning result
|
||||
*/
|
||||
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len);
|
||||
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat,
|
||||
int option_mask,
|
||||
unsigned ntree_limit,
|
||||
bst_ulong *len);
|
||||
/*!
|
||||
* \brief load model from existing file
|
||||
* \param handle handle
|
||||
|
||||
Reference in New Issue
Block a user