From 3c1ed847fb374d7a15b03deb0f48e0af2e629f57 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 26 Aug 2014 18:06:22 -0700 Subject: [PATCH 1/9] remove dependency on bst --- demo/binary_classification/mushroom.conf | 8 ++++---- demo/kaggle-higgs/higgs-numpy.py | 4 ++-- demo/multiclass_classification/train.py | 4 ++-- demo/rank/mq2008.conf | 8 ++++---- demo/regression/machine.conf | 8 ++++---- src/learner/learner-inl.hpp | 7 ++++++- wrapper/python-example/demo.py | 6 +++--- 7 files changed, 25 insertions(+), 20 deletions(-) diff --git a/demo/binary_classification/mushroom.conf b/demo/binary_classification/mushroom.conf index d364905f7..d2566f132 100644 --- a/demo/binary_classification/mushroom.conf +++ b/demo/binary_classification/mushroom.conf @@ -6,13 +6,13 @@ objective = binary:logistic # Tree Booster Parameters # step size shrinkage -bst:eta = 1.0 +eta = 1.0 # minimum loss reduction required to make a further partition -bst:gamma = 1.0 +gamma = 1.0 # minimum sum of instance weight(hessian) needed in a child -bst:min_child_weight = 1 +min_child_weight = 1 # maximum depth of a tree -bst:max_depth = 3 +max_depth = 3 # Task Parameters # the number of round to do boosting diff --git a/demo/kaggle-higgs/higgs-numpy.py b/demo/kaggle-higgs/higgs-numpy.py index bd60f074f..1e7448a4c 100755 --- a/demo/kaggle-higgs/higgs-numpy.py +++ b/demo/kaggle-higgs/higgs-numpy.py @@ -42,8 +42,8 @@ param = {} param['objective'] = 'binary:logitraw' # scale weight of positive examples param['scale_pos_weight'] = sum_wneg/sum_wpos -param['bst:eta'] = 0.1 -param['bst:max_depth'] = 6 +param['eta'] = 0.1 +param['max_depth'] = 6 param['eval_metric'] = 'auc' param['silent'] = 1 param['nthread'] = 16 diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py index 702542a4c..f387de7c0 100755 --- a/demo/multiclass_classification/train.py +++ b/demo/multiclass_classification/train.py @@ -25,8 +25,8 @@ param = {} # use softmax multi-class classification param['objective'] = 'multi:softmax' # scale weight of positive examples -param['bst:eta'] = 0.1 -param['bst:max_depth'] = 6 +param['eta'] = 0.1 +param['max_depth'] = 6 param['silent'] = 1 param['nthread'] = 4 param['num_class'] = 6 diff --git a/demo/rank/mq2008.conf b/demo/rank/mq2008.conf index 90aadec4e..a19758bb7 100644 --- a/demo/rank/mq2008.conf +++ b/demo/rank/mq2008.conf @@ -5,13 +5,13 @@ objective="rank:pairwise" # Tree Booster Parameters # step size shrinkage -bst:eta = 0.1 +eta = 0.1 # minimum loss reduction required to make a further partition -bst:gamma = 1.0 +gamma = 1.0 # minimum sum of instance weight(hessian) needed in a child -bst:min_child_weight = 0.1 +min_child_weight = 0.1 # maximum depth of a tree -bst:max_depth = 6 +max_depth = 6 # Task parameters # the number of round to do boosting diff --git a/demo/regression/machine.conf b/demo/regression/machine.conf index f5a5163a8..8c677a502 100644 --- a/demo/regression/machine.conf +++ b/demo/regression/machine.conf @@ -7,13 +7,13 @@ objective = reg:linear # Tree Booster Parameters # step size shrinkage -bst:eta = 1.0 +eta = 1.0 # minimum loss reduction required to make a further partition -bst:gamma = 1.0 +gamma = 1.0 # minimum sum of instance weight(hessian) needed in a child -bst:min_child_weight = 1 +min_child_weight = 1 # maximum depth of a tree -bst:max_depth = 3 +max_depth = 3 # Task parameters # the number of round to do boosting diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index 7bf8c33ac..40ef274ee 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -79,6 +79,11 @@ class BoostLearner { * \param val value of the parameter */ inline void SetParam(const char *name, const char *val) { + // in this version, bst: prefix is no longer required + if (strncmp(name, "bst:", 4) != 0) { + std::string n = "bst:"; n += name; + this->SetParam(n.c_str(), val); + } if (!strcmp(name, "silent")) silent = atoi(val); if (!strcmp(name, "prob_buffer_row")) prob_buffer_row = static_cast(atof(val)); if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val); @@ -91,7 +96,7 @@ class BoostLearner { if (!strcmp(name, "objective")) name_obj_ = val; if (!strcmp(name, "booster")) name_gbm_ = val; mparam.SetParam(name, val); - } + } if (gbm_ != NULL) gbm_->SetParam(name, val); if (obj_ != NULL) obj_->SetParam(name, val); if (gbm_ == NULL || obj_ == NULL) { diff --git a/wrapper/python-example/demo.py b/wrapper/python-example/demo.py index 52d565456..687b491a4 100755 --- a/wrapper/python-example/demo.py +++ b/wrapper/python-example/demo.py @@ -13,7 +13,7 @@ dtrain = xgb.DMatrix('agaricus.txt.train') dtest = xgb.DMatrix('agaricus.txt.test') # specify parameters via map, definition are same as c++ version -param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' } +param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } # specify validations set to watch performance evallist = [(dtest,'eval'), (dtrain,'train')] @@ -75,7 +75,7 @@ print ('start running example to used cutomized objective function') # note: for customized objective function, we leave objective as default # note: what we are getting is margin value in prediction # you must know what you are doing -param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 } +param = {'max_depth':2, 'eta':1, 'silent':1 } # user define objective function, given prediction, return gradient and second order gradient # this is loglikelihood loss @@ -107,7 +107,7 @@ bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror) # print ('start running example to start from a initial prediction') # specify parameters via map, definition are same as c++ version -param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' } +param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } # train xgboost for 1 round bst = xgb.train( param, dtrain, 1, evallist ) # Note: we need the margin value instead of transformed prediction in set_base_margin From 97467fe807d4f22e0842c926f46e138248a1d6dc Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 26 Aug 2014 19:12:51 -0700 Subject: [PATCH 2/9] chg size_t to uint64_t --- wrapper/xgboost_R.cpp | 6 ++-- wrapper/xgboost_wrapper.cpp | 64 +++++++++++++++++++------------------ wrapper/xgboost_wrapper.h | 36 +++++++++++---------- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/wrapper/xgboost_R.cpp b/wrapper/xgboost_R.cpp index 65085c885..76a1f2840 100644 --- a/wrapper/xgboost_R.cpp +++ b/wrapper/xgboost_R.cpp @@ -119,7 +119,7 @@ extern "C" { } } SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { - size_t olen; + uint64_t olen; const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), &olen); SEXP ret = PROTECT(allocVector(REALSXP, olen)); @@ -188,7 +188,7 @@ extern "C" { &vec_dmats[0], &vec_sptr[0], len)); } SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { - size_t olen; + uint64_t olen; const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dmat), asInteger(output_margin), @@ -207,7 +207,7 @@ extern "C" { XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); } void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { - size_t olen; + uint64_t olen; const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), CHAR(asChar(fmap)), &olen); diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 975d48015..32e382070 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -23,14 +23,14 @@ class Booster: public learner::BoostLearner { this->init_model = false; this->SetCacheData(mats); } - const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) { + const float *Pred(const DataMatrix &dmat, int output_margin, uint64_t *len) { this->CheckInitModel(); this->Predict(dmat, output_margin, &this->preds_); *len = this->preds_.size(); return &this->preds_[0]; } inline void BoostOneIter(const DataMatrix &train, - float *grad, float *hess, size_t len) { + float *grad, float *hess, uint64_t len) { this->gpair_.resize(len); const unsigned ndata = static_cast(len); #pragma omp parallel for schedule(static) @@ -48,7 +48,7 @@ class Booster: public learner::BoostLearner { learner::BoostLearner::LoadModel(fname); this->init_model = true; } - inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) { + inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, uint64_t *len) { model_dump = this->DumpModel(fmap, with_stats); model_dump_cptr.resize(model_dump.size()); for (size_t i = 0; i < model_dump.size(); ++i) { @@ -76,35 +76,37 @@ extern "C"{ void* XGDMatrixCreateFromFile(const char *fname, int silent) { return LoadDataMatrix(fname, silent, false); } - void* XGDMatrixCreateFromCSR(const size_t *indptr, + void* XGDMatrixCreateFromCSR(const uint64_t *indptr, const unsigned *indices, const float *data, - size_t nindptr, - size_t nelem) { + uint64_t nindptr, + uint64_t nelem) { DMatrixSimple *p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; mat.row_ptr_.resize(nindptr); - memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr); + for (uint64_t i = 0; i < nindptr; ++i) { + mat.row_ptr_[i] = static_cast(indptr[i]); + } mat.row_data_.resize(nelem); - for (size_t i = 0; i < nelem; ++i) { + for (uint64_t i = 0; i < nelem; ++i) { mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]); mat.info.info.num_col = std::max(mat.info.info.num_col, - static_cast(indices[i]+1)); + static_cast(indices[i]+1)); } mat.info.info.num_row = nindptr - 1; return p_mat; } void* XGDMatrixCreateFromMat(const float *data, - size_t nrow, - size_t ncol, + uint64_t nrow, + uint64_t ncol, float missing) { DMatrixSimple *p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; mat.info.info.num_row = nrow; mat.info.info.num_col = ncol; - for (size_t i = 0; i < nrow; ++i, data += ncol) { - size_t nelem = 0; - for (size_t j = 0; j < ncol; ++j) { + for (uint64_t i = 0; i < nrow; ++i, data += ncol) { + uint64_t nelem = 0; + for (uint64_t j = 0; j < ncol; ++j) { if (data[j] != missing) { mat.row_data_.push_back(SparseBatch::Entry(j, data[j])); ++nelem; @@ -116,7 +118,7 @@ extern "C"{ } void* XGDMatrixSliceDMatrix(void *handle, const int *idxset, - size_t len) { + uint64_t len) { DMatrixSimple tmp; DataMatrix &dsrc = *static_cast(handle); if (dsrc.magic != DMatrixSimple::kMagic) { @@ -137,10 +139,10 @@ extern "C"{ iter->BeforeFirst(); utils::Assert(iter->Next(), "slice"); const SparseBatch &batch = iter->Value(); - for (size_t i = 0; i < len; ++i) { + for (uint64_t i = 0; i < len; ++i) { const int ridx = idxset[i]; SparseBatch::Inst inst = batch[ridx]; - utils::Check(static_cast(ridx) < batch.size, "slice index exceed number of rows"); + utils::Check(static_cast(ridx) < batch.size, "slice index exceed number of rows"); ret.row_data_.resize(ret.row_data_.size() + inst.length); memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data, sizeof(SparseBatch::Entry) * inst.length); @@ -163,46 +165,46 @@ extern "C"{ void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) { SaveDataMatrix(*static_cast(handle), fname, silent); } - void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) { + void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, uint64_t len) { std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); vec.resize(len); memcpy(&vec[0], info, sizeof(float) * len); } - void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, size_t len) { + void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, uint64_t len) { std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); vec.resize(len); memcpy(&vec[0], info, sizeof(unsigned) * len); } - void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) { + void XGDMatrixSetGroup(void *handle, const unsigned *group, uint64_t len) { DataMatrix *pmat = static_cast(handle); pmat->info.group_ptr.resize(len + 1); pmat->info.group_ptr[0] = 0; - for (size_t i = 0; i < len; ++i) { + for (uint64_t i = 0; i < len; ++i) { pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i]; } } - const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) { + const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, uint64_t* len) { const std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); *len = vec.size(); return &vec[0]; } - const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, size_t* len) { + const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* len) { const std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); *len = vec.size(); return &vec[0]; } - size_t XGDMatrixNumRow(const void *handle) { + uint64_t XGDMatrixNumRow(const void *handle) { return static_cast(handle)->info.num_row(); } // xgboost implementation - void *XGBoosterCreate(void *dmats[], size_t len) { + void *XGBoosterCreate(void *dmats[], uint64_t len) { std::vector mats; - for (size_t i = 0; i < len; ++i) { + for (uint64_t i = 0; i < len; ++i) { DataMatrix *dtr = static_cast(dmats[i]); mats.push_back(dtr); } @@ -222,7 +224,7 @@ extern "C"{ bst->UpdateOneIter(iter, *dtr); } void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, size_t len) { + float *grad, float *hess, uint64_t len) { Booster *bst = static_cast(handle); DataMatrix *dtr = static_cast(dtrain); bst->CheckInitModel(); @@ -230,11 +232,11 @@ extern "C"{ bst->BoostOneIter(*dtr, grad, hess, len); } const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], size_t len) { + const char *evnames[], uint64_t len) { Booster *bst = static_cast(handle); std::vector names; std::vector mats; - for (size_t i = 0; i < len; ++i) { + for (uint64_t i = 0; i < len; ++i) { mats.push_back(static_cast(dmats[i])); names.push_back(std::string(evnames[i])); } @@ -242,7 +244,7 @@ extern "C"{ bst->eval_str = bst->EvalOneIter(iter, mats, names); return bst->eval_str.c_str(); } - const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) { + const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len) { return static_cast(handle)->Pred(*static_cast(dmat), output_margin, len); } void XGBoosterLoadModel(void *handle, const char *fname) { @@ -251,7 +253,7 @@ extern "C"{ void XGBoosterSaveModel(const void *handle, const char *fname) { static_cast(handle)->SaveModel(fname); } - const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){ + const char** XGBoosterDumpModel(void *handle, const char *fmap, uint64_t *len){ utils::FeatMap featmap; if (strlen(fmap) != 0) { featmap.LoadText(fmap); diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h index d4c695647..5351117e7 100644 --- a/wrapper/xgboost_wrapper.h +++ b/wrapper/xgboost_wrapper.h @@ -7,6 +7,8 @@ * can be used to create wrapper of other languages */ #include +// define uint64_t to be unsigned long +typedef unsigned long uint64_t; extern "C" { /*! @@ -23,11 +25,11 @@ extern "C" { * \param nelem number of nonzero elements in the matrix * \return created dmatrix */ - void* XGDMatrixCreateFromCSR(const size_t *indptr, + void* XGDMatrixCreateFromCSR(const uint64_t *indptr, const unsigned *indices, const float *data, - size_t nindptr, - size_t nelem); + uint64_t nindptr, + uint64_t nelem); /*! * \brief create matrix content from dense matrix * \param data pointer to the data space @@ -37,8 +39,8 @@ extern "C" { * \return created dmatrix */ void* XGDMatrixCreateFromMat(const float *data, - size_t nrow, - size_t ncol, + uint64_t nrow, + uint64_t ncol, float missing); /*! * \brief create a new dmatrix from sliced content of existing matrix @@ -49,7 +51,7 @@ extern "C" { */ void* XGDMatrixSliceDMatrix(void *handle, const int *idxset, - size_t len); + uint64_t len); /*! * \brief free space in data matrix */ @@ -68,7 +70,7 @@ extern "C" { * \param array pointer to float vector * \param len length of array */ - void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len); + void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, uint64_t len); /*! * \brief set uint32 vector to a content in info * \param handle a instance of data matrix @@ -76,14 +78,14 @@ extern "C" { * \param array pointer to float vector * \param len length of array */ - void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, size_t len); + void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, uint64_t len); /*! * \brief set label of the training matrix * \param handle a instance of data matrix * \param group pointer to group size * \param len length of array */ - void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len); + void XGDMatrixSetGroup(void *handle, const unsigned *group, uint64_t len); /*! * \brief get float info vector from matrix * \param handle a instance of data matrix @@ -91,7 +93,7 @@ extern "C" { * \param out_len used to set result length * \return pointer to the result */ - const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len); + const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, uint64_t* out_len); /*! * \brief get uint32 info vector from matrix * \param handle a instance of data matrix @@ -99,18 +101,18 @@ extern "C" { * \param out_len used to set result length * \return pointer to the result */ - const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, size_t* out_len); + const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* out_len); /*! * \brief return number of rows */ - size_t XGDMatrixNumRow(const void *handle); + uint64_t XGDMatrixNumRow(const void *handle); // --- start XGBoost class /*! * \brief create xgboost learner * \param dmats matrices that are set to be cached * \param len length of dmats */ - void *XGBoosterCreate(void* dmats[], size_t len); + void *XGBoosterCreate(void* dmats[], uint64_t len); /*! * \brief free obj in handle * \param handle handle to be freed @@ -140,7 +142,7 @@ extern "C" { * \param len length of grad/hess array */ void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, size_t len); + float *grad, float *hess, uint64_t len); /*! * \brief get evaluation statistics for xgboost * \param handle handle @@ -151,7 +153,7 @@ extern "C" { * \return the string containing evaluation stati */ const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], size_t len); + const char *evnames[], uint64_t len); /*! * \brief make prediction based on dmat * \param handle handle @@ -159,7 +161,7 @@ extern "C" { * \param output_margin whether only output raw margin value * \param len used to store length of returning result */ - const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len); + const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len); /*! * \brief load model from existing file * \param handle handle @@ -180,6 +182,6 @@ extern "C" { * \return char *data[], representing dump of each model */ const char **XGBoosterDumpModel(void *handle, const char *fmap, - size_t *out_len); + uint64_t *out_len); }; #endif // XGBOOST_WRAPPER_H_ From 7739f57c8b47c5bbbcb978c8591ebb714a284b49 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 26 Aug 2014 19:37:04 -0700 Subject: [PATCH 3/9] change omp loop var to bst_omp_uint, add XGB_DLL to wrapper --- R-package/src/xgboost_R.cpp | 16 +++---- src/data.h | 5 ++- src/gbm/gblinear-inl.hpp | 14 +++---- src/gbm/gbtree-inl.hpp | 7 ++-- src/learner/evaluation-inl.hpp | 16 +++---- src/learner/learner-inl.hpp | 6 +-- src/learner/objective-inl.hpp | 20 ++++----- src/tree/updater_colmaker-inl.hpp | 16 +++---- src/tree/updater_refresh-inl.hpp | 4 +- src/utils/omp.h | 9 ++++ wrapper/xgboost_R.cpp | 2 +- wrapper/xgboost_wrapper.cpp | 4 +- wrapper/xgboost_wrapper.h | 69 ++++++++++++++++--------------- 13 files changed, 100 insertions(+), 88 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 8bca0de5f..76a1f2840 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -3,10 +3,10 @@ #include #include #include "xgboost_R.h" -#include "../../wrapper/xgboost_wrapper.h" -#include "../../src/utils/utils.h" -#include "../../src/utils/omp.h" -#include "../../src/utils/matrix_csr.h" +#include "xgboost_wrapper.h" +#include "../src/utils/utils.h" +#include "../src/utils/omp.h" +#include "../src/utils/matrix_csr.h" using namespace xgboost; // implements error handling @@ -119,7 +119,7 @@ extern "C" { } } SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { - size_t olen; + uint64_t olen; const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), &olen); SEXP ret = PROTECT(allocVector(REALSXP, olen)); @@ -188,7 +188,7 @@ extern "C" { &vec_dmats[0], &vec_sptr[0], len)); } SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { - size_t olen; + uint64_t olen; const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dmat), asInteger(output_margin), @@ -207,13 +207,13 @@ extern "C" { XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); } void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { - size_t olen; + uint64_t olen; const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), CHAR(asChar(fmap)), &olen); FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w"); for (size_t i = 0; i < olen; ++i) { - fprintf(fo, "booster[%u]:\n", static_cast(i)); + fprintf(fo, "booster[%lu]:\n", i); fprintf(fo, "%s", res[i]); } fclose(fo); diff --git a/src/data.h b/src/data.h index 1c9d9a290..4316885b1 100644 --- a/src/data.h +++ b/src/data.h @@ -12,6 +12,7 @@ #include #include #include "utils/io.h" +#include "utils/omp.h" #include "utils/utils.h" #include "utils/iterator.h" #include "utils/random.h" @@ -370,9 +371,9 @@ class FMatrixS : public FMatrixInterface{ } // sort columns - unsigned ncol = static_cast(this->NumCol()); + bst_omp_uint ncol = static_cast(this->NumCol()); #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < ncol; ++i) { + for (bst_omp_uint i = 0; i < ncol; ++i) { std::sort(&col_data_[0] + col_ptr_[i], &col_data_[0] + col_ptr_[i + 1], Entry::CmpValue); } diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp index 4f9bd0707..9a7e3d8b6 100644 --- a/src/gbm/gblinear-inl.hpp +++ b/src/gbm/gblinear-inl.hpp @@ -51,9 +51,9 @@ class GBLinear : public IGradBooster { // for all the output group for (int gid = 0; gid < ngroup; ++gid) { double sum_grad = 0.0, sum_hess = 0.0; - const unsigned ndata = static_cast(rowset.size()); + const bst_omp_uint ndata = static_cast(rowset.size()); #pragma omp parallel for schedule(static) reduction(+: sum_grad, sum_hess) - for (unsigned i = 0; i < ndata; ++i) { + for (bst_omp_uint i = 0; i < ndata; ++i) { bst_gpair &p = gpair[rowset[i] * ngroup + gid]; if (p.hess >= 0.0f) { sum_grad += p.grad; sum_hess += p.hess; @@ -65,7 +65,7 @@ class GBLinear : public IGradBooster { model.bias()[gid] += dw; // update grad value #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < ndata; ++i) { + for (bst_omp_uint i = 0; i < ndata; ++i) { bst_gpair &p = gpair[rowset[i] * ngroup + gid]; if (p.hess >= 0.0f) { p.grad += p.hess * dw; @@ -73,9 +73,9 @@ class GBLinear : public IGradBooster { } } // number of features - const unsigned nfeat = static_cast(feat_index.size()); + const bst_omp_uint nfeat = static_cast(feat_index.size()); #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < nfeat; ++i) { + for (bst_omp_uint i = 0; i < nfeat; ++i) { const bst_uint fid = feat_index[i]; for (int gid = 0; gid < ngroup; ++gid) { double sum_grad = 0.0, sum_hess = 0.0; @@ -117,9 +117,9 @@ class GBLinear : public IGradBooster { // k is number of group preds.resize(preds.size() + batch.size * ngroup); // parallel over local batch - const unsigned nsize = static_cast(batch.size); + const bst_omp_uint nsize = static_cast(batch.size); #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < nsize; ++i) { + for (bst_omp_uint i = 0; i < nsize; ++i) { const size_t ridx = batch.base_rowid + i; // loop over output groups for (int gid = 0; gid < ngroup; ++gid) { diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp index 2c48f501c..d70e01438 100644 --- a/src/gbm/gbtree-inl.hpp +++ b/src/gbm/gbtree-inl.hpp @@ -94,8 +94,9 @@ class GBTree : public IGradBooster { "must have exactly ngroup*nrow gpairs"); std::vector tmp(gpair.size()/ngroup); for (int gid = 0; gid < ngroup; ++gid) { + bst_omp_uint nsize = static_cast(tmp.size()); #pragma omp parallel for schedule(static) - for (size_t i = 0; i < tmp.size(); ++i) { + for (bst_omp_uint i = 0; i < nsize; ++i) { tmp[i] = gpair[i * ngroup + gid]; } this->BoostNewTrees(tmp, fmat, info, gid); @@ -129,9 +130,9 @@ class GBTree : public IGradBooster { // k is number of group preds.resize(preds.size() + batch.size * mparam.num_output_group); // parallel over local batch - const unsigned nsize = static_cast(batch.size); + const bst_omp_uint nsize = static_cast(batch.size); #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < nsize; ++i) { + for (bst_omp_uint i = 0; i < nsize; ++i) { const int tid = omp_get_thread_num(); tree::RegTree::FVec &feats = thread_temp[tid]; int64_t ridx = static_cast(batch.base_rowid + i); diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp index 0b207f4b9..d5cb9fc36 100644 --- a/src/learner/evaluation-inl.hpp +++ b/src/learner/evaluation-inl.hpp @@ -26,10 +26,10 @@ struct EvalEWiseBase : public IEvaluator { const MetaInfo &info) const { utils::Check(preds.size() == info.labels.size(), "label and prediction size not match"); - const unsigned ndata = static_cast(preds.size()); + const bst_omp_uint ndata = static_cast(preds.size()); float sum = 0.0, wsum = 0.0; #pragma omp parallel for reduction(+: sum, wsum) schedule(static) - for (unsigned i = 0; i < ndata; ++i) { + for (bst_omp_uint i = 0; i < ndata; ++i) { const float wt = info.GetWeight(i); sum += Derived::EvalRow(info.labels[i], preds[i]) * wt; wsum += wt; @@ -109,12 +109,12 @@ struct EvalAMS : public IEvaluator { } virtual float Eval(const std::vector &preds, const MetaInfo &info) const { - const unsigned ndata = static_cast(preds.size()); + const bst_omp_uint ndata = static_cast(preds.size()); utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams"); std::vector< std::pair > rec(ndata); #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < ndata; ++i) { + for (bst_omp_uint i = 0; i < ndata; ++i) { rec[i] = std::make_pair(preds[i], i); } std::sort(rec.begin(), rec.end(), CmpFirst); @@ -211,7 +211,7 @@ struct EvalAuc : public IEvaluator { const std::vector &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; utils::Check(gptr.back() == preds.size(), "EvalAuc: group structure must match number of prediction"); - const unsigned ngroup = static_cast(gptr.size() - 1); + const bst_omp_uint ngroup = static_cast(gptr.size() - 1); // sum statictis double sum_auc = 0.0f; #pragma omp parallel reduction(+:sum_auc) @@ -219,7 +219,7 @@ struct EvalAuc : public IEvaluator { // each thread takes a local rec std::vector< std::pair > rec; #pragma omp for schedule(static) - for (unsigned k = 0; k < ngroup; ++k) { + for (bst_omp_uint k = 0; k < ngroup; ++k) { rec.clear(); for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { rec.push_back(std::make_pair(preds[j], j)); @@ -269,7 +269,7 @@ struct EvalRankList : public IEvaluator { utils::Assert(gptr.size() != 0, "must specify group when constructing rank file"); utils::Assert(gptr.back() == preds.size(), "EvalRanklist: group structure must match number of prediction"); - const unsigned ngroup = static_cast(gptr.size() - 1); + const bst_omp_uint ngroup = static_cast(gptr.size() - 1); // sum statistics double sum_metric = 0.0f; #pragma omp parallel reduction(+:sum_metric) @@ -277,7 +277,7 @@ struct EvalRankList : public IEvaluator { // each thread takes a local rec std::vector< std::pair > rec; #pragma omp for schedule(static) - for (unsigned k = 0; k < ngroup; ++k) { + for (bst_omp_uint k = 0; k < ngroup; ++k) { rec.clear(); for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { rec.push_back(std::make_pair(preds[j], static_cast(info.labels[j]))); diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index 40ef274ee..387d1a57b 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -253,17 +253,17 @@ class BoostLearner { data.info.info, out_preds); // add base margin std::vector &preds = *out_preds; - const unsigned ndata = static_cast(preds.size()); + const bst_omp_uint ndata = static_cast(preds.size()); if (data.info.base_margin.size() != 0) { utils::Check(preds.size() == data.info.base_margin.size(), "base_margin.size does not match with prediction size"); #pragma omp parallel for schedule(static) - for (unsigned j = 0; j < ndata; ++j) { + for (bst_omp_uint j = 0; j < ndata; ++j) { preds[j] += data.info.base_margin[j]; } } else { #pragma omp parallel for schedule(static) - for (unsigned j = 0; j < ndata; ++j) { + for (bst_omp_uint j = 0; j < ndata; ++j) { preds[j] += mparam.base_score; } } diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp index 7f7f08cc3..02c896274 100644 --- a/src/learner/objective-inl.hpp +++ b/src/learner/objective-inl.hpp @@ -116,9 +116,9 @@ class RegLossObj : public IObjFunction{ gpair.resize(preds.size()); // start calculating gradient const unsigned nstep = static_cast(info.labels.size()); - const unsigned ndata = static_cast(preds.size()); + const bst_omp_uint ndata = static_cast(preds.size()); #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < ndata; ++i) { + for (bst_omp_uint i = 0; i < ndata; ++i) { const unsigned j = i % nstep; float p = loss.PredTransform(preds[i]); float w = info.GetWeight(j); @@ -132,9 +132,9 @@ class RegLossObj : public IObjFunction{ } virtual void PredTransform(std::vector *io_preds) { std::vector &preds = *io_preds; - const unsigned ndata = static_cast(preds.size()); + const bst_omp_uint ndata = static_cast(preds.size()); #pragma omp parallel for schedule(static) - for (unsigned j = 0; j < ndata; ++j) { + for (bst_omp_uint j = 0; j < ndata; ++j) { preds[j] = loss.PredTransform(preds[j]); } } @@ -169,12 +169,12 @@ class SoftmaxMultiClassObj : public IObjFunction { std::vector &gpair = *out_gpair; gpair.resize(preds.size()); const unsigned nstep = static_cast(info.labels.size() * nclass); - const unsigned ndata = static_cast(preds.size() / nclass); + const unsigned ndata = static_cast(preds.size() / nclass); #pragma omp parallel { std::vector rec(nclass); #pragma omp for schedule(static) - for (unsigned i = 0; i < ndata; ++i) { + for (bst_omp_uint i = 0; i < ndata; ++i) { for (int k = 0; k < nclass; ++k) { rec[k] = preds[i * nclass + k]; } @@ -210,13 +210,13 @@ class SoftmaxMultiClassObj : public IObjFunction { utils::Check(nclass != 0, "must set num_class to use softmax"); std::vector &preds = *io_preds; std::vector tmp; - const unsigned ndata = static_cast(preds.size()/nclass); + const bst_omp_uint ndata = static_cast(preds.size()/nclass); if (prob == 0) tmp.resize(ndata); #pragma omp parallel { std::vector rec(nclass); #pragma omp for schedule(static) - for (unsigned j = 0; j < ndata; ++j) { + for (bst_omp_uint j = 0; j < ndata; ++j) { for (int k = 0; k < nclass; ++k) { rec[k] = preds[j * nclass + k]; } @@ -263,7 +263,7 @@ class LambdaRankObj : public IObjFunction { const std::vector &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; utils::Check(gptr.size() != 0 && gptr.back() == info.labels.size(), "group structure not consistent with #rows"); - const unsigned ngroup = static_cast(gptr.size() - 1); + const bst_omp_uint ngroup = static_cast(gptr.size() - 1); #pragma omp parallel { // parall construct, declare random number generator here, so that each @@ -273,7 +273,7 @@ class LambdaRankObj : public IObjFunction { std::vector lst; std::vector< std::pair > rec; #pragma omp for schedule(static) - for (unsigned k = 0; k < ngroup; ++k) { + for (bst_omp_uint k = 0; k < ngroup; ++k) { lst.clear(); pairs.clear(); for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) { lst.push_back(ListEntry(preds[j], info.labels[j], j)); diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp index 0c679e748..29e4a24e8 100644 --- a/src/tree/updater_colmaker-inl.hpp +++ b/src/tree/updater_colmaker-inl.hpp @@ -186,9 +186,9 @@ class ColMaker: public IUpdater { } const std::vector &rowset = fmat.buffered_rowset(); // setup position - const unsigned ndata = static_cast(rowset.size()); + const bst_omp_uint ndata = static_cast(rowset.size()); #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < ndata; ++i) { + for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; const int tid = omp_get_thread_num(); if (position[ridx] < 0) continue; @@ -286,12 +286,12 @@ class ColMaker: public IUpdater { feat_set.resize(n); } // start enumeration - const unsigned nsize = static_cast(feat_set.size()); + const bst_omp_uint nsize = static_cast(feat_set.size()); #if defined(_OPENMP) const int batch_size = std::max(static_cast(nsize / this->nthread / 32), 1); #endif #pragma omp parallel for schedule(dynamic, batch_size) - for (unsigned i = 0; i < nsize; ++i) { + for (bst_omp_uint i = 0; i < nsize; ++i) { const unsigned fid = feat_set[i]; const int tid = omp_get_thread_num(); if (param.need_forward_search(fmat.GetColDensity(fid))) { @@ -321,9 +321,9 @@ class ColMaker: public IUpdater { inline void ResetPosition(const std::vector &qexpand, const FMatrix &fmat, const RegTree &tree) { const std::vector &rowset = fmat.buffered_rowset(); // step 1, set default direct nodes to default, and leaf nodes to -1 - const unsigned ndata = static_cast(rowset.size()); + const bst_omp_uint ndata = static_cast(rowset.size()); #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < ndata; ++i) { + for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; const int nid = position[ridx]; if (nid >= 0) { @@ -344,9 +344,9 @@ class ColMaker: public IUpdater { std::sort(fsplits.begin(), fsplits.end()); fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); // start put things into right place - const unsigned nfeats = static_cast(fsplits.size()); + const bst_omp_uint nfeats = static_cast(fsplits.size()); #pragma omp parallel for schedule(dynamic, 1) - for (unsigned i = 0; i < nfeats; ++i) { + for (bst_omp_uint i = 0; i < nfeats; ++i) { const unsigned fid = fsplits[i]; for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) { const bst_uint ridx = it.rindex(); diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp index ff6cf14b0..299f8414a 100644 --- a/src/tree/updater_refresh-inl.hpp +++ b/src/tree/updater_refresh-inl.hpp @@ -56,9 +56,9 @@ class TreeRefresher: public IUpdater { const SparseBatch &batch = iter->Value(); utils::Check(batch.size < std::numeric_limits::max(), "too large batch size "); - const unsigned nbatch = static_cast(batch.size); + const bst_omp_uint nbatch = static_cast(batch.size); #pragma omp parallel for schedule(static) - for (unsigned i = 0; i < nbatch; ++i) { + for (bst_omp_uint i = 0; i < nbatch; ++i) { SparseBatch::Inst inst = batch[i]; const int tid = omp_get_thread_num(); const bst_uint ridx = static_cast(batch.base_rowid + i); diff --git a/src/utils/omp.h b/src/utils/omp.h index 8d6531526..81e493d90 100644 --- a/src/utils/omp.h +++ b/src/utils/omp.h @@ -7,6 +7,15 @@ */ #if defined(_OPENMP) #include +namespace xgboost { +// loop variable used in openmp +#ifdef _MSC_VER +typedef int bst_omp_uint; +#else +typedef unsigned bst_omp_uint; +#endif +} // namespace xgboost + #else #ifndef DISABLE_OPENMP #ifndef _MSC_VER diff --git a/wrapper/xgboost_R.cpp b/wrapper/xgboost_R.cpp index 76a1f2840..c3a3044a9 100644 --- a/wrapper/xgboost_R.cpp +++ b/wrapper/xgboost_R.cpp @@ -213,7 +213,7 @@ extern "C" { &olen); FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w"); for (size_t i = 0; i < olen; ++i) { - fprintf(fo, "booster[%lu]:\n", i); + fprintf(fo, "booster[%u]:\n", static_cast(i)); fprintf(fo, "%s", res[i]); } fclose(fo); diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 32e382070..860bc822c 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -32,9 +32,9 @@ class Booster: public learner::BoostLearner { inline void BoostOneIter(const DataMatrix &train, float *grad, float *hess, uint64_t len) { this->gpair_.resize(len); - const unsigned ndata = static_cast(len); + const bst_omp_uint ndata = static_cast(len); #pragma omp parallel for schedule(static) - for (unsigned j = 0; j < ndata; ++j) { + for (bst_omp_uint j = 0; j < ndata; ++j) { gpair_[j] = bst_gpair(grad[j], hess[j]); } gbm_->DoBoost(train.fmat, train.info.info, &gpair_); diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h index 5351117e7..e03be6265 100644 --- a/wrapper/xgboost_wrapper.h +++ b/wrapper/xgboost_wrapper.h @@ -9,13 +9,14 @@ #include // define uint64_t to be unsigned long typedef unsigned long uint64_t; +#define XGB_DLL extern "C" { /*! * \brief load a data matrix * \return a loaded data matrix */ - void* XGDMatrixCreateFromFile(const char *fname, int silent); + XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent); /*! * \brief create a matrix content from csr format * \param indptr pointer to row headers @@ -25,11 +26,11 @@ extern "C" { * \param nelem number of nonzero elements in the matrix * \return created dmatrix */ - void* XGDMatrixCreateFromCSR(const uint64_t *indptr, - const unsigned *indices, - const float *data, - uint64_t nindptr, - uint64_t nelem); + XGB_DLL void* XGDMatrixCreateFromCSR(const uint64_t *indptr, + const unsigned *indices, + const float *data, + uint64_t nindptr, + uint64_t nelem); /*! * \brief create matrix content from dense matrix * \param data pointer to the data space @@ -38,10 +39,10 @@ extern "C" { * \param missing which value to represent missing value * \return created dmatrix */ - void* XGDMatrixCreateFromMat(const float *data, - uint64_t nrow, - uint64_t ncol, - float missing); + XGB_DLL void* XGDMatrixCreateFromMat(const float *data, + uint64_t nrow, + uint64_t ncol, + float missing); /*! * \brief create a new dmatrix from sliced content of existing matrix * \param handle instance of data matrix to be sliced @@ -49,20 +50,20 @@ extern "C" { * \param len length of index set * \return a sliced new matrix */ - void* XGDMatrixSliceDMatrix(void *handle, - const int *idxset, - uint64_t len); + XGB_DLL void* XGDMatrixSliceDMatrix(void *handle, + const int *idxset, + uint64_t len); /*! * \brief free space in data matrix */ - void XGDMatrixFree(void *handle); + XGB_DLL void XGDMatrixFree(void *handle); /*! * \brief load a data matrix into binary file * \param handle a instance of data matrix * \param fname file name * \param silent print statistics when saving */ - void XGDMatrixSaveBinary(void *handle, const char *fname, int silent); + XGB_DLL void XGDMatrixSaveBinary(void *handle, const char *fname, int silent); /*! * \brief set float vector to a content in info * \param handle a instance of data matrix @@ -70,7 +71,7 @@ extern "C" { * \param array pointer to float vector * \param len length of array */ - void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, uint64_t len); + XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, uint64_t len); /*! * \brief set uint32 vector to a content in info * \param handle a instance of data matrix @@ -78,14 +79,14 @@ extern "C" { * \param array pointer to float vector * \param len length of array */ - void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, uint64_t len); + XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, uint64_t len); /*! * \brief set label of the training matrix * \param handle a instance of data matrix * \param group pointer to group size * \param len length of array */ - void XGDMatrixSetGroup(void *handle, const unsigned *group, uint64_t len); + XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, uint64_t len); /*! * \brief get float info vector from matrix * \param handle a instance of data matrix @@ -93,7 +94,7 @@ extern "C" { * \param out_len used to set result length * \return pointer to the result */ - const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, uint64_t* out_len); + XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, uint64_t* out_len); /*! * \brief get uint32 info vector from matrix * \param handle a instance of data matrix @@ -101,37 +102,37 @@ extern "C" { * \param out_len used to set result length * \return pointer to the result */ - const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* out_len); + XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* out_len); /*! * \brief return number of rows */ - uint64_t XGDMatrixNumRow(const void *handle); + XGB_DLL uint64_t XGDMatrixNumRow(const void *handle); // --- start XGBoost class /*! * \brief create xgboost learner * \param dmats matrices that are set to be cached * \param len length of dmats */ - void *XGBoosterCreate(void* dmats[], uint64_t len); + XGB_DLL void *XGBoosterCreate(void* dmats[], uint64_t len); /*! * \brief free obj in handle * \param handle handle to be freed */ - void XGBoosterFree(void* handle); + XGB_DLL void XGBoosterFree(void* handle); /*! * \brief set parameters * \param handle handle * \param name parameter name * \param val value of parameter */ - void XGBoosterSetParam(void *handle, const char *name, const char *value); + XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value); /*! * \brief update the model in one round using dtrain * \param handle handle * \param iter current iteration rounds * \param dtrain training data */ - void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain); + XGB_DLL void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain); /*! * \brief update the model, by directly specify gradient and second order gradient, * this can be used to replace UpdateOneIter, to support customized loss function @@ -141,8 +142,8 @@ extern "C" { * \param hess second order gradient statistics * \param len length of grad/hess array */ - void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, uint64_t len); + XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain, + float *grad, float *hess, uint64_t len); /*! * \brief get evaluation statistics for xgboost * \param handle handle @@ -152,8 +153,8 @@ extern "C" { * \param len length of dmats * \return the string containing evaluation stati */ - const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], uint64_t len); + XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], + const char *evnames[], uint64_t len); /*! * \brief make prediction based on dmat * \param handle handle @@ -161,19 +162,19 @@ extern "C" { * \param output_margin whether only output raw margin value * \param len used to store length of returning result */ - const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len); + XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len); /*! * \brief load model from existing file * \param handle handle * \param fname file name */ - void XGBoosterLoadModel(void *handle, const char *fname); + XGB_DLL void XGBoosterLoadModel(void *handle, const char *fname); /*! * \brief save model into existing file * \param handle handle * \param fname file name */ - void XGBoosterSaveModel(const void *handle, const char *fname); + XGB_DLL void XGBoosterSaveModel(const void *handle, const char *fname); /*! * \brief dump model, return array of strings representing model dump * \param handle handle @@ -181,7 +182,7 @@ extern "C" { * \param out_len length of output array * \return char *data[], representing dump of each model */ - const char **XGBoosterDumpModel(void *handle, const char *fmap, - uint64_t *out_len); + XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap, + uint64_t *out_len); }; #endif // XGBOOST_WRAPPER_H_ From 90226035fa31be5618059b995acf91db8db9cfba Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 26 Aug 2014 19:39:34 -0700 Subject: [PATCH 4/9] chg r package path back --- R-package/src/xgboost_R.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 76a1f2840..35ea0d096 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -3,10 +3,10 @@ #include #include #include "xgboost_R.h" -#include "xgboost_wrapper.h" -#include "../src/utils/utils.h" -#include "../src/utils/omp.h" -#include "../src/utils/matrix_csr.h" +#include "../../wrapper/xgboost_wrapper.h" +#include "../../src/utils/utils.h" +#include "../../src/utils/omp.h" +#include "../../src/utils/matrix_csr.h" using namespace xgboost; // implements error handling From 9d2c1cf9f56242d5b36a15d8953f6be48b4b197c Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Tue, 26 Aug 2014 19:59:55 -0700 Subject: [PATCH 5/9] add omp uint when openmp is not there --- src/utils/omp.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/utils/omp.h b/src/utils/omp.h index 81e493d90..0380ebd67 100644 --- a/src/utils/omp.h +++ b/src/utils/omp.h @@ -7,15 +7,6 @@ */ #if defined(_OPENMP) #include -namespace xgboost { -// loop variable used in openmp -#ifdef _MSC_VER -typedef int bst_omp_uint; -#else -typedef unsigned bst_omp_uint; -#endif -} // namespace xgboost - #else #ifndef DISABLE_OPENMP #ifndef _MSC_VER @@ -30,4 +21,14 @@ inline int omp_get_thread_num() { return 0; } inline int omp_get_num_threads() { return 1; } inline void omp_set_num_threads(int nthread) {} #endif + +// loop variable used in openmp +namespace xgboost { +#ifdef _MSC_VER +typedef int bst_omp_uint; +#else +typedef unsigned bst_omp_uint; +#endif +} // namespace xgboost + #endif // XGBOOST_UTILS_OMP_H_ From 3e5cb25830a40442a48224d01937a474ea97b2e8 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Tue, 26 Aug 2014 20:02:10 -0700 Subject: [PATCH 6/9] minor fix, add openmp --- src/learner/evaluation-inl.hpp | 2 +- src/learner/objective-inl.hpp | 2 +- windows/xgboost/xgboost.vcxproj | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp index d5cb9fc36..0b73ec1e1 100644 --- a/src/learner/evaluation-inl.hpp +++ b/src/learner/evaluation-inl.hpp @@ -123,7 +123,7 @@ struct EvalAMS : public IEvaluator { const double br = 10.0; unsigned thresindex = 0; double s_tp = 0.0, b_fp = 0.0, tams = 0.0; - for (unsigned i = 0; i < ndata-1 && i < ntop; ++i) { + for (unsigned i = 0; i < static_cast(ndata-1) && i < ntop; ++i) { const unsigned ridx = rec[i].second; const float wt = info.weights[ridx]; if (info.labels[ridx] > 0.5f) { diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp index 02c896274..0e6b5ec7d 100644 --- a/src/learner/objective-inl.hpp +++ b/src/learner/objective-inl.hpp @@ -169,7 +169,7 @@ class SoftmaxMultiClassObj : public IObjFunction { std::vector &gpair = *out_gpair; gpair.resize(preds.size()); const unsigned nstep = static_cast(info.labels.size() * nclass); - const unsigned ndata = static_cast(preds.size() / nclass); + const bst_omp_uint ndata = static_cast(preds.size() / nclass); #pragma omp parallel { std::vector rec(nclass); diff --git a/windows/xgboost/xgboost.vcxproj b/windows/xgboost/xgboost.vcxproj index 064dd6ee6..8b88dbf2e 100644 --- a/windows/xgboost/xgboost.vcxproj +++ b/windows/xgboost/xgboost.vcxproj @@ -99,6 +99,7 @@ MaxSpeed true true + true true From d00f27dc6b51513701c29fa72c147ee6488769fd Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Tue, 26 Aug 2014 20:08:13 -0700 Subject: [PATCH 7/9] change uint64_t to depend on utils --- R-package/src/xgboost_R.cpp | 2 +- wrapper/xgboost_wrapper.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 35ea0d096..279ef3aea 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -213,7 +213,7 @@ extern "C" { &olen); FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w"); for (size_t i = 0; i < olen; ++i) { - fprintf(fo, "booster[%lu]:\n", i); + fprintf(fo, "booster[%u]:\n", static_cast(i)); fprintf(fo, "%s", res[i]); } fclose(fo); diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h index e03be6265..b0c0052ed 100644 --- a/wrapper/xgboost_wrapper.h +++ b/wrapper/xgboost_wrapper.h @@ -7,8 +7,7 @@ * can be used to create wrapper of other languages */ #include -// define uint64_t to be unsigned long -typedef unsigned long uint64_t; +#include "../src/utils/utils.h" #define XGB_DLL extern "C" { From 4787108b5f84868a493e8874ea0b2d05b0a9b34f Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 26 Aug 2014 20:10:07 -0700 Subject: [PATCH 8/9] change uint64_t to ulong, to make mac happy, this is final change --- R-package/src/xgboost_R.cpp | 6 ++-- wrapper/xgboost_R.cpp | 6 ++-- wrapper/xgboost_wrapper.cpp | 62 ++++++++++++++++++------------------- wrapper/xgboost_wrapper.h | 37 +++++++++++----------- 4 files changed, 56 insertions(+), 55 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 279ef3aea..63b74df31 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -119,7 +119,7 @@ extern "C" { } } SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { - uint64_t olen; + bst_ulong olen; const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), &olen); SEXP ret = PROTECT(allocVector(REALSXP, olen)); @@ -188,7 +188,7 @@ extern "C" { &vec_dmats[0], &vec_sptr[0], len)); } SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { - uint64_t olen; + bst_ulong olen; const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dmat), asInteger(output_margin), @@ -207,7 +207,7 @@ extern "C" { XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); } void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { - uint64_t olen; + bst_ulong olen; const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), CHAR(asChar(fmap)), &olen); diff --git a/wrapper/xgboost_R.cpp b/wrapper/xgboost_R.cpp index c3a3044a9..88a320a4a 100644 --- a/wrapper/xgboost_R.cpp +++ b/wrapper/xgboost_R.cpp @@ -119,7 +119,7 @@ extern "C" { } } SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { - uint64_t olen; + bst_ulong olen; const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), &olen); SEXP ret = PROTECT(allocVector(REALSXP, olen)); @@ -188,7 +188,7 @@ extern "C" { &vec_dmats[0], &vec_sptr[0], len)); } SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { - uint64_t olen; + bst_ulong olen; const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dmat), asInteger(output_margin), @@ -207,7 +207,7 @@ extern "C" { XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); } void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { - uint64_t olen; + bst_ulong olen; const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), CHAR(asChar(fmap)), &olen); diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 860bc822c..d707a9dbc 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -23,14 +23,14 @@ class Booster: public learner::BoostLearner { this->init_model = false; this->SetCacheData(mats); } - const float *Pred(const DataMatrix &dmat, int output_margin, uint64_t *len) { + const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) { this->CheckInitModel(); this->Predict(dmat, output_margin, &this->preds_); *len = this->preds_.size(); return &this->preds_[0]; } inline void BoostOneIter(const DataMatrix &train, - float *grad, float *hess, uint64_t len) { + float *grad, float *hess, bst_ulong len) { this->gpair_.resize(len); const bst_omp_uint ndata = static_cast(len); #pragma omp parallel for schedule(static) @@ -48,7 +48,7 @@ class Booster: public learner::BoostLearner { learner::BoostLearner::LoadModel(fname); this->init_model = true; } - inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, uint64_t *len) { + inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, bst_ulong *len) { model_dump = this->DumpModel(fmap, with_stats); model_dump_cptr.resize(model_dump.size()); for (size_t i = 0; i < model_dump.size(); ++i) { @@ -76,37 +76,37 @@ extern "C"{ void* XGDMatrixCreateFromFile(const char *fname, int silent) { return LoadDataMatrix(fname, silent, false); } - void* XGDMatrixCreateFromCSR(const uint64_t *indptr, + void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, const unsigned *indices, const float *data, - uint64_t nindptr, - uint64_t nelem) { + bst_ulong nindptr, + bst_ulong nelem) { DMatrixSimple *p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; mat.row_ptr_.resize(nindptr); - for (uint64_t i = 0; i < nindptr; ++i) { + for (bst_ulong i = 0; i < nindptr; ++i) { mat.row_ptr_[i] = static_cast(indptr[i]); } mat.row_data_.resize(nelem); - for (uint64_t i = 0; i < nelem; ++i) { + for (bst_ulong i = 0; i < nelem; ++i) { mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]); mat.info.info.num_col = std::max(mat.info.info.num_col, - static_cast(indices[i]+1)); + static_cast(indices[i]+1)); } mat.info.info.num_row = nindptr - 1; return p_mat; } void* XGDMatrixCreateFromMat(const float *data, - uint64_t nrow, - uint64_t ncol, + bst_ulong nrow, + bst_ulong ncol, float missing) { DMatrixSimple *p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; mat.info.info.num_row = nrow; mat.info.info.num_col = ncol; - for (uint64_t i = 0; i < nrow; ++i, data += ncol) { - uint64_t nelem = 0; - for (uint64_t j = 0; j < ncol; ++j) { + for (bst_ulong i = 0; i < nrow; ++i, data += ncol) { + bst_ulong nelem = 0; + for (bst_ulong j = 0; j < ncol; ++j) { if (data[j] != missing) { mat.row_data_.push_back(SparseBatch::Entry(j, data[j])); ++nelem; @@ -118,7 +118,7 @@ extern "C"{ } void* XGDMatrixSliceDMatrix(void *handle, const int *idxset, - uint64_t len) { + bst_ulong len) { DMatrixSimple tmp; DataMatrix &dsrc = *static_cast(handle); if (dsrc.magic != DMatrixSimple::kMagic) { @@ -139,10 +139,10 @@ extern "C"{ iter->BeforeFirst(); utils::Assert(iter->Next(), "slice"); const SparseBatch &batch = iter->Value(); - for (uint64_t i = 0; i < len; ++i) { + for (bst_ulong i = 0; i < len; ++i) { const int ridx = idxset[i]; SparseBatch::Inst inst = batch[ridx]; - utils::Check(static_cast(ridx) < batch.size, "slice index exceed number of rows"); + utils::Check(static_cast(ridx) < batch.size, "slice index exceed number of rows"); ret.row_data_.resize(ret.row_data_.size() + inst.length); memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data, sizeof(SparseBatch::Entry) * inst.length); @@ -165,46 +165,46 @@ extern "C"{ void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) { SaveDataMatrix(*static_cast(handle), fname, silent); } - void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, uint64_t len) { + void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) { std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); vec.resize(len); memcpy(&vec[0], info, sizeof(float) * len); } - void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, uint64_t len) { + void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) { std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); vec.resize(len); memcpy(&vec[0], info, sizeof(unsigned) * len); } - void XGDMatrixSetGroup(void *handle, const unsigned *group, uint64_t len) { + void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) { DataMatrix *pmat = static_cast(handle); pmat->info.group_ptr.resize(len + 1); pmat->info.group_ptr[0] = 0; - for (uint64_t i = 0; i < len; ++i) { + for (bst_ulong i = 0; i < len; ++i) { pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i]; } } - const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, uint64_t* len) { + const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) { const std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); *len = vec.size(); return &vec[0]; } - const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* len) { + const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) { const std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); *len = vec.size(); return &vec[0]; } - uint64_t XGDMatrixNumRow(const void *handle) { + bst_ulong XGDMatrixNumRow(const void *handle) { return static_cast(handle)->info.num_row(); } // xgboost implementation - void *XGBoosterCreate(void *dmats[], uint64_t len) { + void *XGBoosterCreate(void *dmats[], bst_ulong len) { std::vector mats; - for (uint64_t i = 0; i < len; ++i) { + for (bst_ulong i = 0; i < len; ++i) { DataMatrix *dtr = static_cast(dmats[i]); mats.push_back(dtr); } @@ -224,7 +224,7 @@ extern "C"{ bst->UpdateOneIter(iter, *dtr); } void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, uint64_t len) { + float *grad, float *hess, bst_ulong len) { Booster *bst = static_cast(handle); DataMatrix *dtr = static_cast(dtrain); bst->CheckInitModel(); @@ -232,11 +232,11 @@ extern "C"{ bst->BoostOneIter(*dtr, grad, hess, len); } const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], uint64_t len) { + const char *evnames[], bst_ulong len) { Booster *bst = static_cast(handle); std::vector names; std::vector mats; - for (uint64_t i = 0; i < len; ++i) { + for (bst_ulong i = 0; i < len; ++i) { mats.push_back(static_cast(dmats[i])); names.push_back(std::string(evnames[i])); } @@ -244,7 +244,7 @@ extern "C"{ bst->eval_str = bst->EvalOneIter(iter, mats, names); return bst->eval_str.c_str(); } - const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len) { + const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) { return static_cast(handle)->Pred(*static_cast(dmat), output_margin, len); } void XGBoosterLoadModel(void *handle, const char *fname) { @@ -253,7 +253,7 @@ extern "C"{ void XGBoosterSaveModel(const void *handle, const char *fname) { static_cast(handle)->SaveModel(fname); } - const char** XGBoosterDumpModel(void *handle, const char *fmap, uint64_t *len){ + const char** XGBoosterDumpModel(void *handle, const char *fmap, bst_ulong *len){ utils::FeatMap featmap; if (strlen(fmap) != 0) { featmap.LoadText(fmap); diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h index b0c0052ed..04c1763c2 100644 --- a/wrapper/xgboost_wrapper.h +++ b/wrapper/xgboost_wrapper.h @@ -7,8 +7,9 @@ * can be used to create wrapper of other languages */ #include -#include "../src/utils/utils.h" #define XGB_DLL +// manually define unsign long +typedef unsigned long bst_ulong; extern "C" { /*! @@ -25,11 +26,11 @@ extern "C" { * \param nelem number of nonzero elements in the matrix * \return created dmatrix */ - XGB_DLL void* XGDMatrixCreateFromCSR(const uint64_t *indptr, + XGB_DLL void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, const unsigned *indices, const float *data, - uint64_t nindptr, - uint64_t nelem); + bst_ulong nindptr, + bst_ulong nelem); /*! * \brief create matrix content from dense matrix * \param data pointer to the data space @@ -39,8 +40,8 @@ extern "C" { * \return created dmatrix */ XGB_DLL void* XGDMatrixCreateFromMat(const float *data, - uint64_t nrow, - uint64_t ncol, + bst_ulong nrow, + bst_ulong ncol, float missing); /*! * \brief create a new dmatrix from sliced content of existing matrix @@ -51,7 +52,7 @@ extern "C" { */ XGB_DLL void* XGDMatrixSliceDMatrix(void *handle, const int *idxset, - uint64_t len); + bst_ulong len); /*! * \brief free space in data matrix */ @@ -70,7 +71,7 @@ extern "C" { * \param array pointer to float vector * \param len length of array */ - XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, uint64_t len); + XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, bst_ulong len); /*! * \brief set uint32 vector to a content in info * \param handle a instance of data matrix @@ -78,14 +79,14 @@ extern "C" { * \param array pointer to float vector * \param len length of array */ - XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, uint64_t len); + XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, bst_ulong len); /*! * \brief set label of the training matrix * \param handle a instance of data matrix * \param group pointer to group size * \param len length of array */ - XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, uint64_t len); + XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len); /*! * \brief get float info vector from matrix * \param handle a instance of data matrix @@ -93,7 +94,7 @@ extern "C" { * \param out_len used to set result length * \return pointer to the result */ - XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, uint64_t* out_len); + XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* out_len); /*! * \brief get uint32 info vector from matrix * \param handle a instance of data matrix @@ -101,18 +102,18 @@ extern "C" { * \param out_len used to set result length * \return pointer to the result */ - XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* out_len); + XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* out_len); /*! * \brief return number of rows */ - XGB_DLL uint64_t XGDMatrixNumRow(const void *handle); + XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle); // --- start XGBoost class /*! * \brief create xgboost learner * \param dmats matrices that are set to be cached * \param len length of dmats */ - XGB_DLL void *XGBoosterCreate(void* dmats[], uint64_t len); + XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len); /*! * \brief free obj in handle * \param handle handle to be freed @@ -142,7 +143,7 @@ extern "C" { * \param len length of grad/hess array */ XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, uint64_t len); + float *grad, float *hess, bst_ulong len); /*! * \brief get evaluation statistics for xgboost * \param handle handle @@ -153,7 +154,7 @@ extern "C" { * \return the string containing evaluation stati */ XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], uint64_t len); + const char *evnames[], bst_ulong len); /*! * \brief make prediction based on dmat * \param handle handle @@ -161,7 +162,7 @@ extern "C" { * \param output_margin whether only output raw margin value * \param len used to store length of returning result */ - XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len); + XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len); /*! * \brief load model from existing file * \param handle handle @@ -182,6 +183,6 @@ extern "C" { * \return char *data[], representing dump of each model */ XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap, - uint64_t *out_len); + bst_ulong *out_len); }; #endif // XGBOOST_WRAPPER_H_ From 84e5fc285b5c3ce766f048ee3d89c0ba1e076413 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Tue, 26 Aug 2014 20:32:33 -0700 Subject: [PATCH 9/9] bst_ulong supported by sparsematrix builder --- R-package/src/xgboost_R.cpp | 4 ++-- src/utils/matrix_csr.h | 12 ++++++------ wrapper/xgboost_R.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 63b74df31..1ca232509 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -62,9 +62,9 @@ extern "C" { int ncol = length(indptr) - 1; int ndata = length(data); // transform into CSR format - std::vector row_ptr; + std::vector row_ptr; std::vector< std::pair > csr_data; - utils::SparseCSRMBuilder< std::pair > builder(row_ptr, csr_data); + utils::SparseCSRMBuilder, false, bst_ulong> builder(row_ptr, csr_data); builder.InitBudget(); for (int i = 0; i < ncol; ++i) { for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) { diff --git a/src/utils/matrix_csr.h b/src/utils/matrix_csr.h index 31022553b..0f3b20a14 100644 --- a/src/utils/matrix_csr.h +++ b/src/utils/matrix_csr.h @@ -17,26 +17,26 @@ namespace utils { * \tparam IndexType type of index used to store the index position, usually unsigned or size_t * \tparam whether enabling the usage of aclist, this option must be enabled manually */ -template +template struct SparseCSRMBuilder { private: /*! \brief dummy variable used in the indicator matrix construction */ std::vector dummy_aclist; /*! \brief pointer to each of the row */ - std::vector &rptr; + std::vector &rptr; /*! \brief index of nonzero entries in each row */ std::vector &findex; /*! \brief a list of active rows, used when many rows are empty */ std::vector &aclist; public: - SparseCSRMBuilder(std::vector &p_rptr, + SparseCSRMBuilder(std::vector &p_rptr, std::vector &p_findex) :rptr(p_rptr), findex(p_findex), aclist(dummy_aclist) { Assert(!UseAcList, "enabling bug"); } /*! \brief use with caution! rptr must be cleaned before use */ - SparseCSRMBuilder(std::vector &p_rptr, + SparseCSRMBuilder(std::vector &p_rptr, std::vector &p_findex, std::vector &p_aclist) :rptr(p_rptr), findex(p_findex), aclist(p_aclist) { @@ -62,7 +62,7 @@ struct SparseCSRMBuilder { * \param row_id the id of the row * \param nelem number of element budget add to this row */ - inline void AddBudget(size_t row_id, size_t nelem = 1) { + inline void AddBudget(size_t row_id, SizeType nelem = 1) { if (rptr.size() < row_id + 2) { rptr.resize(row_id + 2, 0); } @@ -101,7 +101,7 @@ struct SparseCSRMBuilder { * element to each row, the number of calls shall be exactly same as add_budget */ inline void PushElem(size_t row_id, IndexType col_id) { - size_t &rp = rptr[row_id + 1]; + SizeType &rp = rptr[row_id + 1]; findex[rp++] = col_id; } /*! diff --git a/wrapper/xgboost_R.cpp b/wrapper/xgboost_R.cpp index 88a320a4a..4be565d1a 100644 --- a/wrapper/xgboost_R.cpp +++ b/wrapper/xgboost_R.cpp @@ -62,9 +62,9 @@ extern "C" { int ncol = length(indptr) - 1; int ndata = length(data); // transform into CSR format - std::vector row_ptr; + std::vector row_ptr; std::vector< std::pair > csr_data; - utils::SparseCSRMBuilder< std::pair > builder(row_ptr, csr_data); + utils::SparseCSRMBuilder, false, bst_ulong> builder(row_ptr, csr_data); builder.InitBudget(); for (int i = 0; i < ncol; ++i) { for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {