Merge branch 'master' into unity
Conflicts: src/learner/evaluation-inl.hpp wrapper/xgboost_R.cpp wrapper/xgboost_wrapper.cpp wrapper/xgboost_wrapper.h
This commit is contained in:
commit
414e7f27ff
@ -62,9 +62,9 @@ extern "C" {
|
|||||||
int ncol = length(indptr) - 1;
|
int ncol = length(indptr) - 1;
|
||||||
int ndata = length(data);
|
int ndata = length(data);
|
||||||
// transform into CSR format
|
// transform into CSR format
|
||||||
std::vector<size_t> row_ptr;
|
std::vector<bst_ulong> row_ptr;
|
||||||
std::vector< std::pair<unsigned, float> > csr_data;
|
std::vector< std::pair<unsigned, float> > csr_data;
|
||||||
utils::SparseCSRMBuilder< std::pair<unsigned,float> > builder(row_ptr, csr_data);
|
utils::SparseCSRMBuilder<std::pair<unsigned,float>, false, bst_ulong> builder(row_ptr, csr_data);
|
||||||
builder.InitBudget();
|
builder.InitBudget();
|
||||||
for (int i = 0; i < ncol; ++i) {
|
for (int i = 0; i < ncol; ++i) {
|
||||||
for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
|
for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
|
||||||
@ -119,7 +119,7 @@ extern "C" {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
||||||
size_t olen;
|
bst_ulong olen;
|
||||||
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(field)), &olen);
|
CHAR(asChar(field)), &olen);
|
||||||
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||||
@ -188,7 +188,7 @@ extern "C" {
|
|||||||
&vec_dmats[0], &vec_sptr[0], len));
|
&vec_dmats[0], &vec_sptr[0], len));
|
||||||
}
|
}
|
||||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
|
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
|
||||||
size_t olen;
|
bst_ulong olen;
|
||||||
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
||||||
R_ExternalPtrAddr(dmat),
|
R_ExternalPtrAddr(dmat),
|
||||||
asInteger(output_margin),
|
asInteger(output_margin),
|
||||||
@ -207,7 +207,7 @@ extern "C" {
|
|||||||
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||||
}
|
}
|
||||||
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
||||||
size_t olen;
|
bst_ulong olen;
|
||||||
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(fmap)),
|
CHAR(asChar(fmap)),
|
||||||
&olen);
|
&olen);
|
||||||
|
|||||||
@ -6,13 +6,13 @@ objective = binary:logistic
|
|||||||
|
|
||||||
# Tree Booster Parameters
|
# Tree Booster Parameters
|
||||||
# step size shrinkage
|
# step size shrinkage
|
||||||
bst:eta = 1.0
|
eta = 1.0
|
||||||
# minimum loss reduction required to make a further partition
|
# minimum loss reduction required to make a further partition
|
||||||
bst:gamma = 1.0
|
gamma = 1.0
|
||||||
# minimum sum of instance weight(hessian) needed in a child
|
# minimum sum of instance weight(hessian) needed in a child
|
||||||
bst:min_child_weight = 1
|
min_child_weight = 1
|
||||||
# maximum depth of a tree
|
# maximum depth of a tree
|
||||||
bst:max_depth = 3
|
max_depth = 3
|
||||||
|
|
||||||
# Task Parameters
|
# Task Parameters
|
||||||
# the number of round to do boosting
|
# the number of round to do boosting
|
||||||
|
|||||||
@ -42,8 +42,8 @@ param = {}
|
|||||||
param['objective'] = 'binary:logitraw'
|
param['objective'] = 'binary:logitraw'
|
||||||
# scale weight of positive examples
|
# scale weight of positive examples
|
||||||
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
||||||
param['bst:eta'] = 0.1
|
param['eta'] = 0.1
|
||||||
param['bst:max_depth'] = 6
|
param['max_depth'] = 6
|
||||||
param['eval_metric'] = 'auc'
|
param['eval_metric'] = 'auc'
|
||||||
param['silent'] = 1
|
param['silent'] = 1
|
||||||
param['nthread'] = 16
|
param['nthread'] = 16
|
||||||
|
|||||||
@ -25,8 +25,8 @@ param = {}
|
|||||||
# use softmax multi-class classification
|
# use softmax multi-class classification
|
||||||
param['objective'] = 'multi:softmax'
|
param['objective'] = 'multi:softmax'
|
||||||
# scale weight of positive examples
|
# scale weight of positive examples
|
||||||
param['bst:eta'] = 0.1
|
param['eta'] = 0.1
|
||||||
param['bst:max_depth'] = 6
|
param['max_depth'] = 6
|
||||||
param['silent'] = 1
|
param['silent'] = 1
|
||||||
param['nthread'] = 4
|
param['nthread'] = 4
|
||||||
param['num_class'] = 6
|
param['num_class'] = 6
|
||||||
|
|||||||
@ -5,13 +5,13 @@ objective="rank:pairwise"
|
|||||||
|
|
||||||
# Tree Booster Parameters
|
# Tree Booster Parameters
|
||||||
# step size shrinkage
|
# step size shrinkage
|
||||||
bst:eta = 0.1
|
eta = 0.1
|
||||||
# minimum loss reduction required to make a further partition
|
# minimum loss reduction required to make a further partition
|
||||||
bst:gamma = 1.0
|
gamma = 1.0
|
||||||
# minimum sum of instance weight(hessian) needed in a child
|
# minimum sum of instance weight(hessian) needed in a child
|
||||||
bst:min_child_weight = 0.1
|
min_child_weight = 0.1
|
||||||
# maximum depth of a tree
|
# maximum depth of a tree
|
||||||
bst:max_depth = 6
|
max_depth = 6
|
||||||
|
|
||||||
# Task parameters
|
# Task parameters
|
||||||
# the number of round to do boosting
|
# the number of round to do boosting
|
||||||
|
|||||||
@ -7,13 +7,13 @@ objective = reg:linear
|
|||||||
|
|
||||||
# Tree Booster Parameters
|
# Tree Booster Parameters
|
||||||
# step size shrinkage
|
# step size shrinkage
|
||||||
bst:eta = 1.0
|
eta = 1.0
|
||||||
# minimum loss reduction required to make a further partition
|
# minimum loss reduction required to make a further partition
|
||||||
bst:gamma = 1.0
|
gamma = 1.0
|
||||||
# minimum sum of instance weight(hessian) needed in a child
|
# minimum sum of instance weight(hessian) needed in a child
|
||||||
bst:min_child_weight = 1
|
min_child_weight = 1
|
||||||
# maximum depth of a tree
|
# maximum depth of a tree
|
||||||
bst:max_depth = 3
|
max_depth = 3
|
||||||
|
|
||||||
# Task parameters
|
# Task parameters
|
||||||
# the number of round to do boosting
|
# the number of round to do boosting
|
||||||
|
|||||||
@ -12,6 +12,7 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "utils/io.h"
|
#include "utils/io.h"
|
||||||
|
#include "utils/omp.h"
|
||||||
#include "utils/utils.h"
|
#include "utils/utils.h"
|
||||||
#include "utils/iterator.h"
|
#include "utils/iterator.h"
|
||||||
#include "utils/random.h"
|
#include "utils/random.h"
|
||||||
@ -370,9 +371,9 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sort columns
|
// sort columns
|
||||||
unsigned ncol = static_cast<unsigned>(this->NumCol());
|
bst_omp_uint ncol = static_cast<bst_omp_uint>(this->NumCol());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < ncol; ++i) {
|
for (bst_omp_uint i = 0; i < ncol; ++i) {
|
||||||
std::sort(&col_data_[0] + col_ptr_[i],
|
std::sort(&col_data_[0] + col_ptr_[i],
|
||||||
&col_data_[0] + col_ptr_[i + 1], Entry::CmpValue);
|
&col_data_[0] + col_ptr_[i + 1], Entry::CmpValue);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -51,9 +51,9 @@ class GBLinear : public IGradBooster<FMatrix> {
|
|||||||
// for all the output group
|
// for all the output group
|
||||||
for (int gid = 0; gid < ngroup; ++gid) {
|
for (int gid = 0; gid < ngroup; ++gid) {
|
||||||
double sum_grad = 0.0, sum_hess = 0.0;
|
double sum_grad = 0.0, sum_hess = 0.0;
|
||||||
const unsigned ndata = static_cast<unsigned>(rowset.size());
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||||
#pragma omp parallel for schedule(static) reduction(+: sum_grad, sum_hess)
|
#pragma omp parallel for schedule(static) reduction(+: sum_grad, sum_hess)
|
||||||
for (unsigned i = 0; i < ndata; ++i) {
|
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||||
bst_gpair &p = gpair[rowset[i] * ngroup + gid];
|
bst_gpair &p = gpair[rowset[i] * ngroup + gid];
|
||||||
if (p.hess >= 0.0f) {
|
if (p.hess >= 0.0f) {
|
||||||
sum_grad += p.grad; sum_hess += p.hess;
|
sum_grad += p.grad; sum_hess += p.hess;
|
||||||
@ -65,7 +65,7 @@ class GBLinear : public IGradBooster<FMatrix> {
|
|||||||
model.bias()[gid] += dw;
|
model.bias()[gid] += dw;
|
||||||
// update grad value
|
// update grad value
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < ndata; ++i) {
|
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||||
bst_gpair &p = gpair[rowset[i] * ngroup + gid];
|
bst_gpair &p = gpair[rowset[i] * ngroup + gid];
|
||||||
if (p.hess >= 0.0f) {
|
if (p.hess >= 0.0f) {
|
||||||
p.grad += p.hess * dw;
|
p.grad += p.hess * dw;
|
||||||
@ -73,9 +73,9 @@ class GBLinear : public IGradBooster<FMatrix> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// number of features
|
// number of features
|
||||||
const unsigned nfeat = static_cast<unsigned>(feat_index.size());
|
const bst_omp_uint nfeat = static_cast<bst_omp_uint>(feat_index.size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < nfeat; ++i) {
|
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||||
const bst_uint fid = feat_index[i];
|
const bst_uint fid = feat_index[i];
|
||||||
for (int gid = 0; gid < ngroup; ++gid) {
|
for (int gid = 0; gid < ngroup; ++gid) {
|
||||||
double sum_grad = 0.0, sum_hess = 0.0;
|
double sum_grad = 0.0, sum_hess = 0.0;
|
||||||
@ -117,9 +117,9 @@ class GBLinear : public IGradBooster<FMatrix> {
|
|||||||
// k is number of group
|
// k is number of group
|
||||||
preds.resize(preds.size() + batch.size * ngroup);
|
preds.resize(preds.size() + batch.size * ngroup);
|
||||||
// parallel over local batch
|
// parallel over local batch
|
||||||
const unsigned nsize = static_cast<unsigned>(batch.size);
|
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < nsize; ++i) {
|
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||||
const size_t ridx = batch.base_rowid + i;
|
const size_t ridx = batch.base_rowid + i;
|
||||||
// loop over output groups
|
// loop over output groups
|
||||||
for (int gid = 0; gid < ngroup; ++gid) {
|
for (int gid = 0; gid < ngroup; ++gid) {
|
||||||
|
|||||||
@ -94,8 +94,9 @@ class GBTree : public IGradBooster<FMatrix> {
|
|||||||
"must have exactly ngroup*nrow gpairs");
|
"must have exactly ngroup*nrow gpairs");
|
||||||
std::vector<bst_gpair> tmp(gpair.size()/ngroup);
|
std::vector<bst_gpair> tmp(gpair.size()/ngroup);
|
||||||
for (int gid = 0; gid < ngroup; ++gid) {
|
for (int gid = 0; gid < ngroup; ++gid) {
|
||||||
|
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (size_t i = 0; i < tmp.size(); ++i) {
|
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||||
tmp[i] = gpair[i * ngroup + gid];
|
tmp[i] = gpair[i * ngroup + gid];
|
||||||
}
|
}
|
||||||
this->BoostNewTrees(tmp, fmat, info, gid);
|
this->BoostNewTrees(tmp, fmat, info, gid);
|
||||||
@ -125,9 +126,9 @@ class GBTree : public IGradBooster<FMatrix> {
|
|||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
const SparseBatch &batch = iter->Value();
|
const SparseBatch &batch = iter->Value();
|
||||||
// parallel over local batch
|
// parallel over local batch
|
||||||
const unsigned nsize = static_cast<unsigned>(batch.size);
|
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < nsize; ++i) {
|
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||||
const int tid = omp_get_thread_num();
|
const int tid = omp_get_thread_num();
|
||||||
tree::RegTree::FVec &feats = thread_temp[tid];
|
tree::RegTree::FVec &feats = thread_temp[tid];
|
||||||
int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
|
int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
|
||||||
|
|||||||
@ -27,10 +27,12 @@ struct EvalEWiseBase : public IEvaluator {
|
|||||||
utils::Check(info.labels.size() != 0, "label set cannot be empty");
|
utils::Check(info.labels.size() != 0, "label set cannot be empty");
|
||||||
utils::Check(preds.size() % info.labels.size() == 0,
|
utils::Check(preds.size() % info.labels.size() == 0,
|
||||||
"label and prediction size not match");
|
"label and prediction size not match");
|
||||||
const unsigned ndata = static_cast<unsigned>(info.labels.size());
|
|
||||||
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
|
||||||
|
|
||||||
float sum = 0.0, wsum = 0.0;
|
float sum = 0.0, wsum = 0.0;
|
||||||
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
|
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
|
||||||
for (unsigned i = 0; i < ndata; ++i) {
|
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||||
const float wt = info.GetWeight(i);
|
const float wt = info.GetWeight(i);
|
||||||
sum += Derived::EvalRow(info.labels[i], preds[i]) * wt;
|
sum += Derived::EvalRow(info.labels[i], preds[i]) * wt;
|
||||||
wsum += wt;
|
wsum += wt;
|
||||||
@ -149,12 +151,13 @@ struct EvalAMS : public IEvaluator {
|
|||||||
}
|
}
|
||||||
virtual float Eval(const std::vector<float> &preds,
|
virtual float Eval(const std::vector<float> &preds,
|
||||||
const MetaInfo &info) const {
|
const MetaInfo &info) const {
|
||||||
const unsigned ndata = static_cast<unsigned>(info.labels.size());
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
|
||||||
|
|
||||||
utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
|
utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
|
||||||
std::vector< std::pair<float, unsigned> > rec(ndata);
|
std::vector< std::pair<float, unsigned> > rec(ndata);
|
||||||
|
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < ndata; ++i) {
|
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||||
rec[i] = std::make_pair(preds[i], i);
|
rec[i] = std::make_pair(preds[i], i);
|
||||||
}
|
}
|
||||||
std::sort(rec.begin(), rec.end(), CmpFirst);
|
std::sort(rec.begin(), rec.end(), CmpFirst);
|
||||||
@ -163,7 +166,7 @@ struct EvalAMS : public IEvaluator {
|
|||||||
const double br = 10.0;
|
const double br = 10.0;
|
||||||
unsigned thresindex = 0;
|
unsigned thresindex = 0;
|
||||||
double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
|
double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
|
||||||
for (unsigned i = 0; i < ndata-1 && i < ntop; ++i) {
|
for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) {
|
||||||
const unsigned ridx = rec[i].second;
|
const unsigned ridx = rec[i].second;
|
||||||
const float wt = info.weights[ridx];
|
const float wt = info.weights[ridx];
|
||||||
if (info.labels[ridx] > 0.5f) {
|
if (info.labels[ridx] > 0.5f) {
|
||||||
@ -257,7 +260,7 @@ struct EvalAuc : public IEvaluator {
|
|||||||
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
||||||
utils::Check(gptr.back() == info.labels.size(),
|
utils::Check(gptr.back() == info.labels.size(),
|
||||||
"EvalAuc: group structure must match number of prediction");
|
"EvalAuc: group structure must match number of prediction");
|
||||||
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
|
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||||
// sum statictis
|
// sum statictis
|
||||||
double sum_auc = 0.0f;
|
double sum_auc = 0.0f;
|
||||||
#pragma omp parallel reduction(+:sum_auc)
|
#pragma omp parallel reduction(+:sum_auc)
|
||||||
@ -265,7 +268,7 @@ struct EvalAuc : public IEvaluator {
|
|||||||
// each thread takes a local rec
|
// each thread takes a local rec
|
||||||
std::vector< std::pair<float, unsigned> > rec;
|
std::vector< std::pair<float, unsigned> > rec;
|
||||||
#pragma omp for schedule(static)
|
#pragma omp for schedule(static)
|
||||||
for (unsigned k = 0; k < ngroup; ++k) {
|
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||||
rec.clear();
|
rec.clear();
|
||||||
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
|
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
|
||||||
rec.push_back(std::make_pair(preds[j], j));
|
rec.push_back(std::make_pair(preds[j], j));
|
||||||
@ -315,7 +318,7 @@ struct EvalRankList : public IEvaluator {
|
|||||||
utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
|
utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
|
||||||
utils::Assert(gptr.back() == preds.size(),
|
utils::Assert(gptr.back() == preds.size(),
|
||||||
"EvalRanklist: group structure must match number of prediction");
|
"EvalRanklist: group structure must match number of prediction");
|
||||||
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
|
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||||
// sum statistics
|
// sum statistics
|
||||||
double sum_metric = 0.0f;
|
double sum_metric = 0.0f;
|
||||||
#pragma omp parallel reduction(+:sum_metric)
|
#pragma omp parallel reduction(+:sum_metric)
|
||||||
@ -323,7 +326,7 @@ struct EvalRankList : public IEvaluator {
|
|||||||
// each thread takes a local rec
|
// each thread takes a local rec
|
||||||
std::vector< std::pair<float, unsigned> > rec;
|
std::vector< std::pair<float, unsigned> > rec;
|
||||||
#pragma omp for schedule(static)
|
#pragma omp for schedule(static)
|
||||||
for (unsigned k = 0; k < ngroup; ++k) {
|
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||||
rec.clear();
|
rec.clear();
|
||||||
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
|
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
|
||||||
rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j])));
|
rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j])));
|
||||||
|
|||||||
@ -79,6 +79,11 @@ class BoostLearner {
|
|||||||
* \param val value of the parameter
|
* \param val value of the parameter
|
||||||
*/
|
*/
|
||||||
inline void SetParam(const char *name, const char *val) {
|
inline void SetParam(const char *name, const char *val) {
|
||||||
|
// in this version, bst: prefix is no longer required
|
||||||
|
if (strncmp(name, "bst:", 4) != 0) {
|
||||||
|
std::string n = "bst:"; n += name;
|
||||||
|
this->SetParam(n.c_str(), val);
|
||||||
|
}
|
||||||
if (!strcmp(name, "silent")) silent = atoi(val);
|
if (!strcmp(name, "silent")) silent = atoi(val);
|
||||||
if (!strcmp(name, "prob_buffer_row")) prob_buffer_row = static_cast<float>(atof(val));
|
if (!strcmp(name, "prob_buffer_row")) prob_buffer_row = static_cast<float>(atof(val));
|
||||||
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
|
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
|
||||||
@ -248,17 +253,17 @@ class BoostLearner {
|
|||||||
data.info.info, out_preds);
|
data.info.info, out_preds);
|
||||||
// add base margin
|
// add base margin
|
||||||
std::vector<float> &preds = *out_preds;
|
std::vector<float> &preds = *out_preds;
|
||||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||||
if (data.info.base_margin.size() != 0) {
|
if (data.info.base_margin.size() != 0) {
|
||||||
utils::Check(preds.size() == data.info.base_margin.size(),
|
utils::Check(preds.size() == data.info.base_margin.size(),
|
||||||
"base_margin.size does not match with prediction size");
|
"base_margin.size does not match with prediction size");
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned j = 0; j < ndata; ++j) {
|
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||||
preds[j] += data.info.base_margin[j];
|
preds[j] += data.info.base_margin[j];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned j = 0; j < ndata; ++j) {
|
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||||
preds[j] += mparam.base_score;
|
preds[j] += mparam.base_score;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -116,9 +116,9 @@ class RegLossObj : public IObjFunction{
|
|||||||
gpair.resize(preds.size());
|
gpair.resize(preds.size());
|
||||||
// start calculating gradient
|
// start calculating gradient
|
||||||
const unsigned nstep = static_cast<unsigned>(info.labels.size());
|
const unsigned nstep = static_cast<unsigned>(info.labels.size());
|
||||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < ndata; ++i) {
|
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||||
const unsigned j = i % nstep;
|
const unsigned j = i % nstep;
|
||||||
float p = loss.PredTransform(preds[i]);
|
float p = loss.PredTransform(preds[i]);
|
||||||
float w = info.GetWeight(j);
|
float w = info.GetWeight(j);
|
||||||
@ -132,9 +132,9 @@ class RegLossObj : public IObjFunction{
|
|||||||
}
|
}
|
||||||
virtual void PredTransform(std::vector<float> *io_preds) {
|
virtual void PredTransform(std::vector<float> *io_preds) {
|
||||||
std::vector<float> &preds = *io_preds;
|
std::vector<float> &preds = *io_preds;
|
||||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned j = 0; j < ndata; ++j) {
|
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||||
preds[j] = loss.PredTransform(preds[j]);
|
preds[j] = loss.PredTransform(preds[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -169,12 +169,12 @@ class SoftmaxMultiClassObj : public IObjFunction {
|
|||||||
std::vector<bst_gpair> &gpair = *out_gpair;
|
std::vector<bst_gpair> &gpair = *out_gpair;
|
||||||
gpair.resize(preds.size());
|
gpair.resize(preds.size());
|
||||||
const unsigned nstep = static_cast<unsigned>(info.labels.size() * nclass);
|
const unsigned nstep = static_cast<unsigned>(info.labels.size() * nclass);
|
||||||
const unsigned ndata = static_cast<unsigned>(preds.size() / nclass);
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size() / nclass);
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
std::vector<float> rec(nclass);
|
std::vector<float> rec(nclass);
|
||||||
#pragma omp for schedule(static)
|
#pragma omp for schedule(static)
|
||||||
for (unsigned i = 0; i < ndata; ++i) {
|
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||||
for (int k = 0; k < nclass; ++k) {
|
for (int k = 0; k < nclass; ++k) {
|
||||||
rec[k] = preds[i * nclass + k];
|
rec[k] = preds[i * nclass + k];
|
||||||
}
|
}
|
||||||
@ -210,13 +210,13 @@ class SoftmaxMultiClassObj : public IObjFunction {
|
|||||||
utils::Check(nclass != 0, "must set num_class to use softmax");
|
utils::Check(nclass != 0, "must set num_class to use softmax");
|
||||||
std::vector<float> &preds = *io_preds;
|
std::vector<float> &preds = *io_preds;
|
||||||
std::vector<float> tmp;
|
std::vector<float> tmp;
|
||||||
const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size()/nclass);
|
||||||
if (prob == 0) tmp.resize(ndata);
|
if (prob == 0) tmp.resize(ndata);
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
std::vector<float> rec(nclass);
|
std::vector<float> rec(nclass);
|
||||||
#pragma omp for schedule(static)
|
#pragma omp for schedule(static)
|
||||||
for (unsigned j = 0; j < ndata; ++j) {
|
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||||
for (int k = 0; k < nclass; ++k) {
|
for (int k = 0; k < nclass; ++k) {
|
||||||
rec[k] = preds[j * nclass + k];
|
rec[k] = preds[j * nclass + k];
|
||||||
}
|
}
|
||||||
@ -263,7 +263,7 @@ class LambdaRankObj : public IObjFunction {
|
|||||||
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
||||||
utils::Check(gptr.size() != 0 && gptr.back() == info.labels.size(),
|
utils::Check(gptr.size() != 0 && gptr.back() == info.labels.size(),
|
||||||
"group structure not consistent with #rows");
|
"group structure not consistent with #rows");
|
||||||
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
|
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
// parall construct, declare random number generator here, so that each
|
// parall construct, declare random number generator here, so that each
|
||||||
@ -273,7 +273,7 @@ class LambdaRankObj : public IObjFunction {
|
|||||||
std::vector<ListEntry> lst;
|
std::vector<ListEntry> lst;
|
||||||
std::vector< std::pair<float, unsigned> > rec;
|
std::vector< std::pair<float, unsigned> > rec;
|
||||||
#pragma omp for schedule(static)
|
#pragma omp for schedule(static)
|
||||||
for (unsigned k = 0; k < ngroup; ++k) {
|
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||||
lst.clear(); pairs.clear();
|
lst.clear(); pairs.clear();
|
||||||
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
|
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
|
||||||
lst.push_back(ListEntry(preds[j], info.labels[j], j));
|
lst.push_back(ListEntry(preds[j], info.labels[j], j));
|
||||||
|
|||||||
@ -186,9 +186,9 @@ class ColMaker: public IUpdater<FMatrix> {
|
|||||||
}
|
}
|
||||||
const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
|
const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
|
||||||
// setup position
|
// setup position
|
||||||
const unsigned ndata = static_cast<unsigned>(rowset.size());
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < ndata; ++i) {
|
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||||
const bst_uint ridx = rowset[i];
|
const bst_uint ridx = rowset[i];
|
||||||
const int tid = omp_get_thread_num();
|
const int tid = omp_get_thread_num();
|
||||||
if (position[ridx] < 0) continue;
|
if (position[ridx] < 0) continue;
|
||||||
@ -286,12 +286,12 @@ class ColMaker: public IUpdater<FMatrix> {
|
|||||||
feat_set.resize(n);
|
feat_set.resize(n);
|
||||||
}
|
}
|
||||||
// start enumeration
|
// start enumeration
|
||||||
const unsigned nsize = static_cast<unsigned>(feat_set.size());
|
const bst_omp_uint nsize = static_cast<bst_omp_uint>(feat_set.size());
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
|
const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
|
||||||
#endif
|
#endif
|
||||||
#pragma omp parallel for schedule(dynamic, batch_size)
|
#pragma omp parallel for schedule(dynamic, batch_size)
|
||||||
for (unsigned i = 0; i < nsize; ++i) {
|
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||||
const unsigned fid = feat_set[i];
|
const unsigned fid = feat_set[i];
|
||||||
const int tid = omp_get_thread_num();
|
const int tid = omp_get_thread_num();
|
||||||
if (param.need_forward_search(fmat.GetColDensity(fid))) {
|
if (param.need_forward_search(fmat.GetColDensity(fid))) {
|
||||||
@ -321,9 +321,9 @@ class ColMaker: public IUpdater<FMatrix> {
|
|||||||
inline void ResetPosition(const std::vector<int> &qexpand, const FMatrix &fmat, const RegTree &tree) {
|
inline void ResetPosition(const std::vector<int> &qexpand, const FMatrix &fmat, const RegTree &tree) {
|
||||||
const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
|
const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
|
||||||
// step 1, set default direct nodes to default, and leaf nodes to -1
|
// step 1, set default direct nodes to default, and leaf nodes to -1
|
||||||
const unsigned ndata = static_cast<unsigned>(rowset.size());
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < ndata; ++i) {
|
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||||
const bst_uint ridx = rowset[i];
|
const bst_uint ridx = rowset[i];
|
||||||
const int nid = position[ridx];
|
const int nid = position[ridx];
|
||||||
if (nid >= 0) {
|
if (nid >= 0) {
|
||||||
@ -344,9 +344,9 @@ class ColMaker: public IUpdater<FMatrix> {
|
|||||||
std::sort(fsplits.begin(), fsplits.end());
|
std::sort(fsplits.begin(), fsplits.end());
|
||||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||||
// start put things into right place
|
// start put things into right place
|
||||||
const unsigned nfeats = static_cast<unsigned>(fsplits.size());
|
const bst_omp_uint nfeats = static_cast<bst_omp_uint>(fsplits.size());
|
||||||
#pragma omp parallel for schedule(dynamic, 1)
|
#pragma omp parallel for schedule(dynamic, 1)
|
||||||
for (unsigned i = 0; i < nfeats; ++i) {
|
for (bst_omp_uint i = 0; i < nfeats; ++i) {
|
||||||
const unsigned fid = fsplits[i];
|
const unsigned fid = fsplits[i];
|
||||||
for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
|
for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
|
||||||
const bst_uint ridx = it.rindex();
|
const bst_uint ridx = it.rindex();
|
||||||
|
|||||||
@ -56,9 +56,9 @@ class TreeRefresher: public IUpdater<FMatrix> {
|
|||||||
const SparseBatch &batch = iter->Value();
|
const SparseBatch &batch = iter->Value();
|
||||||
utils::Check(batch.size < std::numeric_limits<unsigned>::max(),
|
utils::Check(batch.size < std::numeric_limits<unsigned>::max(),
|
||||||
"too large batch size ");
|
"too large batch size ");
|
||||||
const unsigned nbatch = static_cast<unsigned>(batch.size);
|
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned i = 0; i < nbatch; ++i) {
|
for (bst_omp_uint i = 0; i < nbatch; ++i) {
|
||||||
SparseBatch::Inst inst = batch[i];
|
SparseBatch::Inst inst = batch[i];
|
||||||
const int tid = omp_get_thread_num();
|
const int tid = omp_get_thread_num();
|
||||||
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||||
|
|||||||
@ -17,26 +17,26 @@ namespace utils {
|
|||||||
* \tparam IndexType type of index used to store the index position, usually unsigned or size_t
|
* \tparam IndexType type of index used to store the index position, usually unsigned or size_t
|
||||||
* \tparam whether enabling the usage of aclist, this option must be enabled manually
|
* \tparam whether enabling the usage of aclist, this option must be enabled manually
|
||||||
*/
|
*/
|
||||||
template<typename IndexType, bool UseAcList = false>
|
template<typename IndexType, bool UseAcList = false, typename SizeType = size_t>
|
||||||
struct SparseCSRMBuilder {
|
struct SparseCSRMBuilder {
|
||||||
private:
|
private:
|
||||||
/*! \brief dummy variable used in the indicator matrix construction */
|
/*! \brief dummy variable used in the indicator matrix construction */
|
||||||
std::vector<size_t> dummy_aclist;
|
std::vector<size_t> dummy_aclist;
|
||||||
/*! \brief pointer to each of the row */
|
/*! \brief pointer to each of the row */
|
||||||
std::vector<size_t> &rptr;
|
std::vector<SizeType> &rptr;
|
||||||
/*! \brief index of nonzero entries in each row */
|
/*! \brief index of nonzero entries in each row */
|
||||||
std::vector<IndexType> &findex;
|
std::vector<IndexType> &findex;
|
||||||
/*! \brief a list of active rows, used when many rows are empty */
|
/*! \brief a list of active rows, used when many rows are empty */
|
||||||
std::vector<size_t> &aclist;
|
std::vector<size_t> &aclist;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SparseCSRMBuilder(std::vector<size_t> &p_rptr,
|
SparseCSRMBuilder(std::vector<SizeType> &p_rptr,
|
||||||
std::vector<IndexType> &p_findex)
|
std::vector<IndexType> &p_findex)
|
||||||
:rptr(p_rptr), findex(p_findex), aclist(dummy_aclist) {
|
:rptr(p_rptr), findex(p_findex), aclist(dummy_aclist) {
|
||||||
Assert(!UseAcList, "enabling bug");
|
Assert(!UseAcList, "enabling bug");
|
||||||
}
|
}
|
||||||
/*! \brief use with caution! rptr must be cleaned before use */
|
/*! \brief use with caution! rptr must be cleaned before use */
|
||||||
SparseCSRMBuilder(std::vector<size_t> &p_rptr,
|
SparseCSRMBuilder(std::vector<SizeType> &p_rptr,
|
||||||
std::vector<IndexType> &p_findex,
|
std::vector<IndexType> &p_findex,
|
||||||
std::vector<size_t> &p_aclist)
|
std::vector<size_t> &p_aclist)
|
||||||
:rptr(p_rptr), findex(p_findex), aclist(p_aclist) {
|
:rptr(p_rptr), findex(p_findex), aclist(p_aclist) {
|
||||||
@ -62,7 +62,7 @@ struct SparseCSRMBuilder {
|
|||||||
* \param row_id the id of the row
|
* \param row_id the id of the row
|
||||||
* \param nelem number of element budget add to this row
|
* \param nelem number of element budget add to this row
|
||||||
*/
|
*/
|
||||||
inline void AddBudget(size_t row_id, size_t nelem = 1) {
|
inline void AddBudget(size_t row_id, SizeType nelem = 1) {
|
||||||
if (rptr.size() < row_id + 2) {
|
if (rptr.size() < row_id + 2) {
|
||||||
rptr.resize(row_id + 2, 0);
|
rptr.resize(row_id + 2, 0);
|
||||||
}
|
}
|
||||||
@ -101,7 +101,7 @@ struct SparseCSRMBuilder {
|
|||||||
* element to each row, the number of calls shall be exactly same as add_budget
|
* element to each row, the number of calls shall be exactly same as add_budget
|
||||||
*/
|
*/
|
||||||
inline void PushElem(size_t row_id, IndexType col_id) {
|
inline void PushElem(size_t row_id, IndexType col_id) {
|
||||||
size_t &rp = rptr[row_id + 1];
|
SizeType &rp = rptr[row_id + 1];
|
||||||
findex[rp++] = col_id;
|
findex[rp++] = col_id;
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
|
|||||||
@ -21,4 +21,14 @@ inline int omp_get_thread_num() { return 0; }
|
|||||||
inline int omp_get_num_threads() { return 1; }
|
inline int omp_get_num_threads() { return 1; }
|
||||||
inline void omp_set_num_threads(int nthread) {}
|
inline void omp_set_num_threads(int nthread) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// loop variable used in openmp
|
||||||
|
namespace xgboost {
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
typedef int bst_omp_uint;
|
||||||
|
#else
|
||||||
|
typedef unsigned bst_omp_uint;
|
||||||
|
#endif
|
||||||
|
} // namespace xgboost
|
||||||
|
|
||||||
#endif // XGBOOST_UTILS_OMP_H_
|
#endif // XGBOOST_UTILS_OMP_H_
|
||||||
|
|||||||
@ -99,6 +99,7 @@
|
|||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<OpenMPSupport>true</OpenMPSupport>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
|||||||
@ -13,7 +13,7 @@ dtrain = xgb.DMatrix('agaricus.txt.train')
|
|||||||
dtest = xgb.DMatrix('agaricus.txt.test')
|
dtest = xgb.DMatrix('agaricus.txt.test')
|
||||||
|
|
||||||
# specify parameters via map, definition are same as c++ version
|
# specify parameters via map, definition are same as c++ version
|
||||||
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
|
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||||
|
|
||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
evallist = [(dtest,'eval'), (dtrain,'train')]
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
@ -75,7 +75,7 @@ print ('start running example to used cutomized objective function')
|
|||||||
# note: for customized objective function, we leave objective as default
|
# note: for customized objective function, we leave objective as default
|
||||||
# note: what we are getting is margin value in prediction
|
# note: what we are getting is margin value in prediction
|
||||||
# you must know what you are doing
|
# you must know what you are doing
|
||||||
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
|
param = {'max_depth':2, 'eta':1, 'silent':1 }
|
||||||
|
|
||||||
# user define objective function, given prediction, return gradient and second order gradient
|
# user define objective function, given prediction, return gradient and second order gradient
|
||||||
# this is loglikelihood loss
|
# this is loglikelihood loss
|
||||||
@ -107,7 +107,7 @@ bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
|
|||||||
#
|
#
|
||||||
print ('start running example to start from a initial prediction')
|
print ('start running example to start from a initial prediction')
|
||||||
# specify parameters via map, definition are same as c++ version
|
# specify parameters via map, definition are same as c++ version
|
||||||
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
|
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||||
# train xgboost for 1 round
|
# train xgboost for 1 round
|
||||||
bst = xgb.train( param, dtrain, 1, evallist )
|
bst = xgb.train( param, dtrain, 1, evallist )
|
||||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||||
|
|||||||
@ -62,9 +62,9 @@ extern "C" {
|
|||||||
int ncol = length(indptr) - 1;
|
int ncol = length(indptr) - 1;
|
||||||
int ndata = length(data);
|
int ndata = length(data);
|
||||||
// transform into CSR format
|
// transform into CSR format
|
||||||
std::vector<size_t> row_ptr;
|
std::vector<bst_ulong> row_ptr;
|
||||||
std::vector< std::pair<unsigned, float> > csr_data;
|
std::vector< std::pair<unsigned, float> > csr_data;
|
||||||
utils::SparseCSRMBuilder< std::pair<unsigned,float> > builder(row_ptr, csr_data);
|
utils::SparseCSRMBuilder<std::pair<unsigned,float>, false, bst_ulong> builder(row_ptr, csr_data);
|
||||||
builder.InitBudget();
|
builder.InitBudget();
|
||||||
for (int i = 0; i < ncol; ++i) {
|
for (int i = 0; i < ncol; ++i) {
|
||||||
for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
|
for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
|
||||||
@ -119,7 +119,7 @@ extern "C" {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
||||||
uint64_t olen;
|
bst_ulong olen;
|
||||||
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(field)), &olen);
|
CHAR(asChar(field)), &olen);
|
||||||
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||||
@ -188,7 +188,7 @@ extern "C" {
|
|||||||
&vec_dmats[0], &vec_sptr[0], len));
|
&vec_dmats[0], &vec_sptr[0], len));
|
||||||
}
|
}
|
||||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
|
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
|
||||||
uint64_t olen;
|
bst_ulong olen;
|
||||||
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
||||||
R_ExternalPtrAddr(dmat),
|
R_ExternalPtrAddr(dmat),
|
||||||
asInteger(output_margin),
|
asInteger(output_margin),
|
||||||
@ -207,13 +207,13 @@ extern "C" {
|
|||||||
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||||
}
|
}
|
||||||
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
||||||
uint64_t olen;
|
bst_ulong olen;
|
||||||
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(fmap)),
|
CHAR(asChar(fmap)),
|
||||||
&olen);
|
&olen);
|
||||||
FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
|
FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
|
||||||
for (size_t i = 0; i < olen; ++i) {
|
for (size_t i = 0; i < olen; ++i) {
|
||||||
fprintf(fo, "booster[%lu]:\n", i);
|
fprintf(fo, "booster[%u]:\n", static_cast<unsigned>(i));
|
||||||
fprintf(fo, "%s", res[i]);
|
fprintf(fo, "%s", res[i]);
|
||||||
}
|
}
|
||||||
fclose(fo);
|
fclose(fo);
|
||||||
|
|||||||
@ -23,18 +23,18 @@ class Booster: public learner::BoostLearner<FMatrixS> {
|
|||||||
this->init_model = false;
|
this->init_model = false;
|
||||||
this->SetCacheData(mats);
|
this->SetCacheData(mats);
|
||||||
}
|
}
|
||||||
const float *Pred(const DataMatrix &dmat, int output_margin, uint64_t *len) {
|
const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) {
|
||||||
this->CheckInitModel();
|
this->CheckInitModel();
|
||||||
this->Predict(dmat, output_margin, &this->preds_);
|
this->Predict(dmat, output_margin, &this->preds_);
|
||||||
*len = this->preds_.size();
|
*len = this->preds_.size();
|
||||||
return &this->preds_[0];
|
return &this->preds_[0];
|
||||||
}
|
}
|
||||||
inline void BoostOneIter(const DataMatrix &train,
|
inline void BoostOneIter(const DataMatrix &train,
|
||||||
float *grad, float *hess, uint64_t len) {
|
float *grad, float *hess, bst_ulong len) {
|
||||||
this->gpair_.resize(len);
|
this->gpair_.resize(len);
|
||||||
const unsigned ndata = static_cast<unsigned>(len);
|
const bst_omp_uint ndata = static_cast<bst_omp_uint>(len);
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (unsigned j = 0; j < ndata; ++j) {
|
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||||
gpair_[j] = bst_gpair(grad[j], hess[j]);
|
gpair_[j] = bst_gpair(grad[j], hess[j]);
|
||||||
}
|
}
|
||||||
gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
|
gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
|
||||||
@ -48,7 +48,7 @@ class Booster: public learner::BoostLearner<FMatrixS> {
|
|||||||
learner::BoostLearner<FMatrixS>::LoadModel(fname);
|
learner::BoostLearner<FMatrixS>::LoadModel(fname);
|
||||||
this->init_model = true;
|
this->init_model = true;
|
||||||
}
|
}
|
||||||
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, uint64_t *len) {
|
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, bst_ulong *len) {
|
||||||
model_dump = this->DumpModel(fmap, with_stats);
|
model_dump = this->DumpModel(fmap, with_stats);
|
||||||
model_dump_cptr.resize(model_dump.size());
|
model_dump_cptr.resize(model_dump.size());
|
||||||
for (size_t i = 0; i < model_dump.size(); ++i) {
|
for (size_t i = 0; i < model_dump.size(); ++i) {
|
||||||
@ -76,19 +76,19 @@ extern "C"{
|
|||||||
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
||||||
return LoadDataMatrix(fname, silent, false);
|
return LoadDataMatrix(fname, silent, false);
|
||||||
}
|
}
|
||||||
void* XGDMatrixCreateFromCSR(const uint64_t *indptr,
|
void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
|
||||||
const unsigned *indices,
|
const unsigned *indices,
|
||||||
const float *data,
|
const float *data,
|
||||||
uint64_t nindptr,
|
bst_ulong nindptr,
|
||||||
uint64_t nelem) {
|
bst_ulong nelem) {
|
||||||
DMatrixSimple *p_mat = new DMatrixSimple();
|
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||||
DMatrixSimple &mat = *p_mat;
|
DMatrixSimple &mat = *p_mat;
|
||||||
mat.row_ptr_.resize(nindptr);
|
mat.row_ptr_.resize(nindptr);
|
||||||
for (uint64_t i = 0; i < nindptr; ++ i) {
|
for (bst_ulong i = 0; i < nindptr; ++i) {
|
||||||
mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
|
mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
|
||||||
}
|
}
|
||||||
mat.row_data_.resize(nelem);
|
mat.row_data_.resize(nelem);
|
||||||
for (uint64_t i = 0; i < nelem; ++i) {
|
for (bst_ulong i = 0; i < nelem; ++i) {
|
||||||
mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
|
mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
|
||||||
mat.info.info.num_col = std::max(mat.info.info.num_col,
|
mat.info.info.num_col = std::max(mat.info.info.num_col,
|
||||||
static_cast<uint64_t>(indices[i]+1));
|
static_cast<uint64_t>(indices[i]+1));
|
||||||
@ -97,16 +97,16 @@ extern "C"{
|
|||||||
return p_mat;
|
return p_mat;
|
||||||
}
|
}
|
||||||
void* XGDMatrixCreateFromMat(const float *data,
|
void* XGDMatrixCreateFromMat(const float *data,
|
||||||
uint64_t nrow,
|
bst_ulong nrow,
|
||||||
uint64_t ncol,
|
bst_ulong ncol,
|
||||||
float missing) {
|
float missing) {
|
||||||
DMatrixSimple *p_mat = new DMatrixSimple();
|
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||||
DMatrixSimple &mat = *p_mat;
|
DMatrixSimple &mat = *p_mat;
|
||||||
mat.info.info.num_row = nrow;
|
mat.info.info.num_row = nrow;
|
||||||
mat.info.info.num_col = ncol;
|
mat.info.info.num_col = ncol;
|
||||||
for (uint64_t i = 0; i < nrow; ++i, data += ncol) {
|
for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
|
||||||
uint64_t nelem = 0;
|
bst_ulong nelem = 0;
|
||||||
for (uint64_t j = 0; j < ncol; ++j) {
|
for (bst_ulong j = 0; j < ncol; ++j) {
|
||||||
if (data[j] != missing) {
|
if (data[j] != missing) {
|
||||||
mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
|
mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
|
||||||
++nelem;
|
++nelem;
|
||||||
@ -118,7 +118,7 @@ extern "C"{
|
|||||||
}
|
}
|
||||||
void* XGDMatrixSliceDMatrix(void *handle,
|
void* XGDMatrixSliceDMatrix(void *handle,
|
||||||
const int *idxset,
|
const int *idxset,
|
||||||
uint64_t len) {
|
bst_ulong len) {
|
||||||
DMatrixSimple tmp;
|
DMatrixSimple tmp;
|
||||||
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
|
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
|
||||||
if (dsrc.magic != DMatrixSimple::kMagic) {
|
if (dsrc.magic != DMatrixSimple::kMagic) {
|
||||||
@ -139,10 +139,10 @@ extern "C"{
|
|||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
utils::Assert(iter->Next(), "slice");
|
utils::Assert(iter->Next(), "slice");
|
||||||
const SparseBatch &batch = iter->Value();
|
const SparseBatch &batch = iter->Value();
|
||||||
for (uint64_t i = 0; i < len; ++i) {
|
for (bst_ulong i = 0; i < len; ++i) {
|
||||||
const int ridx = idxset[i];
|
const int ridx = idxset[i];
|
||||||
SparseBatch::Inst inst = batch[ridx];
|
SparseBatch::Inst inst = batch[ridx];
|
||||||
utils::Check(static_cast<uint64_t>(ridx) < batch.size, "slice index exceed number of rows");
|
utils::Check(static_cast<bst_ulong>(ridx) < batch.size, "slice index exceed number of rows");
|
||||||
ret.row_data_.resize(ret.row_data_.size() + inst.length);
|
ret.row_data_.resize(ret.row_data_.size() + inst.length);
|
||||||
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
|
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
|
||||||
sizeof(SparseBatch::Entry) * inst.length);
|
sizeof(SparseBatch::Entry) * inst.length);
|
||||||
@ -168,13 +168,13 @@ extern "C"{
|
|||||||
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
|
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
|
||||||
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
|
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
|
||||||
}
|
}
|
||||||
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, uint64_t len) {
|
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) {
|
||||||
std::vector<float> &vec =
|
std::vector<float> &vec =
|
||||||
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
|
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
|
||||||
vec.resize(len);
|
vec.resize(len);
|
||||||
memcpy(&vec[0], info, sizeof(float) * len);
|
memcpy(&vec[0], info, sizeof(float) * len);
|
||||||
}
|
}
|
||||||
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, uint64_t len) {
|
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
|
||||||
std::vector<unsigned> &vec =
|
std::vector<unsigned> &vec =
|
||||||
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
|
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
|
||||||
vec.resize(len);
|
vec.resize(len);
|
||||||
@ -194,20 +194,20 @@ extern "C"{
|
|||||||
*len = vec.size();
|
*len = vec.size();
|
||||||
return &vec[0];
|
return &vec[0];
|
||||||
}
|
}
|
||||||
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* len) {
|
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
|
||||||
const std::vector<unsigned> &vec =
|
const std::vector<unsigned> &vec =
|
||||||
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
|
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
|
||||||
*len = vec.size();
|
*len = vec.size();
|
||||||
return &vec[0];
|
return &vec[0];
|
||||||
}
|
}
|
||||||
uint64_t XGDMatrixNumRow(const void *handle) {
|
bst_ulong XGDMatrixNumRow(const void *handle) {
|
||||||
return static_cast<const DataMatrix*>(handle)->info.num_row();
|
return static_cast<const DataMatrix*>(handle)->info.num_row();
|
||||||
}
|
}
|
||||||
|
|
||||||
// xgboost implementation
|
// xgboost implementation
|
||||||
void *XGBoosterCreate(void *dmats[], uint64_t len) {
|
void *XGBoosterCreate(void *dmats[], bst_ulong len) {
|
||||||
std::vector<DataMatrix*> mats;
|
std::vector<DataMatrix*> mats;
|
||||||
for (uint64_t i = 0; i < len; ++i) {
|
for (bst_ulong i = 0; i < len; ++i) {
|
||||||
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
|
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
|
||||||
mats.push_back(dtr);
|
mats.push_back(dtr);
|
||||||
}
|
}
|
||||||
@ -227,7 +227,7 @@ extern "C"{
|
|||||||
bst->UpdateOneIter(iter, *dtr);
|
bst->UpdateOneIter(iter, *dtr);
|
||||||
}
|
}
|
||||||
void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
||||||
float *grad, float *hess, uint64_t len) {
|
float *grad, float *hess, bst_ulong len) {
|
||||||
Booster *bst = static_cast<Booster*>(handle);
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
||||||
bst->CheckInitModel();
|
bst->CheckInitModel();
|
||||||
@ -235,11 +235,11 @@ extern "C"{
|
|||||||
bst->BoostOneIter(*dtr, grad, hess, len);
|
bst->BoostOneIter(*dtr, grad, hess, len);
|
||||||
}
|
}
|
||||||
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
||||||
const char *evnames[], uint64_t len) {
|
const char *evnames[], bst_ulong len) {
|
||||||
Booster *bst = static_cast<Booster*>(handle);
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
std::vector<std::string> names;
|
std::vector<std::string> names;
|
||||||
std::vector<const DataMatrix*> mats;
|
std::vector<const DataMatrix*> mats;
|
||||||
for (uint64_t i = 0; i < len; ++i) {
|
for (bst_ulong i = 0; i < len; ++i) {
|
||||||
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
|
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
|
||||||
names.push_back(std::string(evnames[i]));
|
names.push_back(std::string(evnames[i]));
|
||||||
}
|
}
|
||||||
@ -247,7 +247,7 @@ extern "C"{
|
|||||||
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
||||||
return bst->eval_str.c_str();
|
return bst->eval_str.c_str();
|
||||||
}
|
}
|
||||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len) {
|
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) {
|
||||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
|
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
|
||||||
}
|
}
|
||||||
void XGBoosterLoadModel(void *handle, const char *fname) {
|
void XGBoosterLoadModel(void *handle, const char *fname) {
|
||||||
@ -256,7 +256,7 @@ extern "C"{
|
|||||||
void XGBoosterSaveModel(const void *handle, const char *fname) {
|
void XGBoosterSaveModel(const void *handle, const char *fname) {
|
||||||
static_cast<const Booster*>(handle)->SaveModel(fname);
|
static_cast<const Booster*>(handle)->SaveModel(fname);
|
||||||
}
|
}
|
||||||
const char** XGBoosterDumpModel(void *handle, const char *fmap, uint64_t *len){
|
const char** XGBoosterDumpModel(void *handle, const char *fmap, bst_ulong *len){
|
||||||
utils::FeatMap featmap;
|
utils::FeatMap featmap;
|
||||||
if (strlen(fmap) != 0) {
|
if (strlen(fmap) != 0) {
|
||||||
featmap.LoadText(fmap);
|
featmap.LoadText(fmap);
|
||||||
|
|||||||
@ -7,15 +7,17 @@
|
|||||||
* can be used to create wrapper of other languages
|
* can be used to create wrapper of other languages
|
||||||
*/
|
*/
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
// define uint64_t
|
#define XGB_DLL
|
||||||
typedef unsigned long uint64_t;
|
// manually define unsign long
|
||||||
|
typedef unsigned long bst_ulong;
|
||||||
|
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
/*!
|
/*!
|
||||||
* \brief load a data matrix
|
* \brief load a data matrix
|
||||||
* \return a loaded data matrix
|
* \return a loaded data matrix
|
||||||
*/
|
*/
|
||||||
void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
||||||
/*!
|
/*!
|
||||||
* \brief create a matrix content from csr format
|
* \brief create a matrix content from csr format
|
||||||
* \param indptr pointer to row headers
|
* \param indptr pointer to row headers
|
||||||
@ -25,11 +27,11 @@ extern "C" {
|
|||||||
* \param nelem number of nonzero elements in the matrix
|
* \param nelem number of nonzero elements in the matrix
|
||||||
* \return created dmatrix
|
* \return created dmatrix
|
||||||
*/
|
*/
|
||||||
void* XGDMatrixCreateFromCSR(const uint64_t *indptr,
|
XGB_DLL void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
|
||||||
const unsigned *indices,
|
const unsigned *indices,
|
||||||
const float *data,
|
const float *data,
|
||||||
uint64_t nindptr,
|
bst_ulong nindptr,
|
||||||
uint64_t nelem);
|
bst_ulong nelem);
|
||||||
/*!
|
/*!
|
||||||
* \brief create matrix content from dense matrix
|
* \brief create matrix content from dense matrix
|
||||||
* \param data pointer to the data space
|
* \param data pointer to the data space
|
||||||
@ -38,9 +40,9 @@ extern "C" {
|
|||||||
* \param missing which value to represent missing value
|
* \param missing which value to represent missing value
|
||||||
* \return created dmatrix
|
* \return created dmatrix
|
||||||
*/
|
*/
|
||||||
void* XGDMatrixCreateFromMat(const float *data,
|
XGB_DLL void* XGDMatrixCreateFromMat(const float *data,
|
||||||
uint64_t nrow,
|
bst_ulong nrow,
|
||||||
uint64_t ncol,
|
bst_ulong ncol,
|
||||||
float missing);
|
float missing);
|
||||||
/*!
|
/*!
|
||||||
* \brief create a new dmatrix from sliced content of existing matrix
|
* \brief create a new dmatrix from sliced content of existing matrix
|
||||||
@ -49,20 +51,20 @@ extern "C" {
|
|||||||
* \param len length of index set
|
* \param len length of index set
|
||||||
* \return a sliced new matrix
|
* \return a sliced new matrix
|
||||||
*/
|
*/
|
||||||
void* XGDMatrixSliceDMatrix(void *handle,
|
XGB_DLL void* XGDMatrixSliceDMatrix(void *handle,
|
||||||
const int *idxset,
|
const int *idxset,
|
||||||
uint64_t len);
|
bst_ulong len);
|
||||||
/*!
|
/*!
|
||||||
* \brief free space in data matrix
|
* \brief free space in data matrix
|
||||||
*/
|
*/
|
||||||
void XGDMatrixFree(void *handle);
|
XGB_DLL void XGDMatrixFree(void *handle);
|
||||||
/*!
|
/*!
|
||||||
* \brief load a data matrix into binary file
|
* \brief load a data matrix into binary file
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
* \param fname file name
|
* \param fname file name
|
||||||
* \param silent print statistics when saving
|
* \param silent print statistics when saving
|
||||||
*/
|
*/
|
||||||
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
|
XGB_DLL void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
|
||||||
/*!
|
/*!
|
||||||
* \brief set float vector to a content in info
|
* \brief set float vector to a content in info
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
@ -70,7 +72,7 @@ extern "C" {
|
|||||||
* \param array pointer to float vector
|
* \param array pointer to float vector
|
||||||
* \param len length of array
|
* \param len length of array
|
||||||
*/
|
*/
|
||||||
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, uint64_t len);
|
XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, bst_ulong len);
|
||||||
/*!
|
/*!
|
||||||
* \brief set uint32 vector to a content in info
|
* \brief set uint32 vector to a content in info
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
@ -78,14 +80,14 @@ extern "C" {
|
|||||||
* \param array pointer to float vector
|
* \param array pointer to float vector
|
||||||
* \param len length of array
|
* \param len length of array
|
||||||
*/
|
*/
|
||||||
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, uint64_t len);
|
XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, bst_ulong len);
|
||||||
/*!
|
/*!
|
||||||
* \brief set label of the training matrix
|
* \brief set label of the training matrix
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
* \param group pointer to group size
|
* \param group pointer to group size
|
||||||
* \param len length of array
|
* \param len length of array
|
||||||
*/
|
*/
|
||||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, uint64_t len);
|
XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len);
|
||||||
/*!
|
/*!
|
||||||
* \brief get float info vector from matrix
|
* \brief get float info vector from matrix
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
@ -93,7 +95,7 @@ extern "C" {
|
|||||||
* \param out_len used to set result length
|
* \param out_len used to set result length
|
||||||
* \return pointer to the result
|
* \return pointer to the result
|
||||||
*/
|
*/
|
||||||
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, uint64_t* out_len);
|
XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* out_len);
|
||||||
/*!
|
/*!
|
||||||
* \brief get uint32 info vector from matrix
|
* \brief get uint32 info vector from matrix
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
@ -101,37 +103,37 @@ extern "C" {
|
|||||||
* \param out_len used to set result length
|
* \param out_len used to set result length
|
||||||
* \return pointer to the result
|
* \return pointer to the result
|
||||||
*/
|
*/
|
||||||
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* out_len);
|
XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* out_len);
|
||||||
/*!
|
/*!
|
||||||
* \brief return number of rows
|
* \brief return number of rows
|
||||||
*/
|
*/
|
||||||
uint64_t XGDMatrixNumRow(const void *handle);
|
XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle);
|
||||||
// --- start XGBoost class
|
// --- start XGBoost class
|
||||||
/*!
|
/*!
|
||||||
* \brief create xgboost learner
|
* \brief create xgboost learner
|
||||||
* \param dmats matrices that are set to be cached
|
* \param dmats matrices that are set to be cached
|
||||||
* \param len length of dmats
|
* \param len length of dmats
|
||||||
*/
|
*/
|
||||||
void *XGBoosterCreate(void* dmats[], uint64_t len);
|
XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len);
|
||||||
/*!
|
/*!
|
||||||
* \brief free obj in handle
|
* \brief free obj in handle
|
||||||
* \param handle handle to be freed
|
* \param handle handle to be freed
|
||||||
*/
|
*/
|
||||||
void XGBoosterFree(void* handle);
|
XGB_DLL void XGBoosterFree(void* handle);
|
||||||
/*!
|
/*!
|
||||||
* \brief set parameters
|
* \brief set parameters
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
* \param name parameter name
|
* \param name parameter name
|
||||||
* \param val value of parameter
|
* \param val value of parameter
|
||||||
*/
|
*/
|
||||||
void XGBoosterSetParam(void *handle, const char *name, const char *value);
|
XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value);
|
||||||
/*!
|
/*!
|
||||||
* \brief update the model in one round using dtrain
|
* \brief update the model in one round using dtrain
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
* \param iter current iteration rounds
|
* \param iter current iteration rounds
|
||||||
* \param dtrain training data
|
* \param dtrain training data
|
||||||
*/
|
*/
|
||||||
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
|
XGB_DLL void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
|
||||||
/*!
|
/*!
|
||||||
* \brief update the model, by directly specify gradient and second order gradient,
|
* \brief update the model, by directly specify gradient and second order gradient,
|
||||||
* this can be used to replace UpdateOneIter, to support customized loss function
|
* this can be used to replace UpdateOneIter, to support customized loss function
|
||||||
@ -141,8 +143,8 @@ extern "C" {
|
|||||||
* \param hess second order gradient statistics
|
* \param hess second order gradient statistics
|
||||||
* \param len length of grad/hess array
|
* \param len length of grad/hess array
|
||||||
*/
|
*/
|
||||||
void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
||||||
float *grad, float *hess, uint64_t len);
|
float *grad, float *hess, bst_ulong len);
|
||||||
/*!
|
/*!
|
||||||
* \brief get evaluation statistics for xgboost
|
* \brief get evaluation statistics for xgboost
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
@ -152,8 +154,8 @@ extern "C" {
|
|||||||
* \param len length of dmats
|
* \param len length of dmats
|
||||||
* \return the string containing evaluation stati
|
* \return the string containing evaluation stati
|
||||||
*/
|
*/
|
||||||
const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
||||||
const char *evnames[], uint64_t len);
|
const char *evnames[], bst_ulong len);
|
||||||
/*!
|
/*!
|
||||||
* \brief make prediction based on dmat
|
* \brief make prediction based on dmat
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
@ -161,19 +163,19 @@ extern "C" {
|
|||||||
* \param output_margin whether only output raw margin value
|
* \param output_margin whether only output raw margin value
|
||||||
* \param len used to store length of returning result
|
* \param len used to store length of returning result
|
||||||
*/
|
*/
|
||||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len);
|
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len);
|
||||||
/*!
|
/*!
|
||||||
* \brief load model from existing file
|
* \brief load model from existing file
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
* \param fname file name
|
* \param fname file name
|
||||||
*/
|
*/
|
||||||
void XGBoosterLoadModel(void *handle, const char *fname);
|
XGB_DLL void XGBoosterLoadModel(void *handle, const char *fname);
|
||||||
/*!
|
/*!
|
||||||
* \brief save model into existing file
|
* \brief save model into existing file
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
* \param fname file name
|
* \param fname file name
|
||||||
*/
|
*/
|
||||||
void XGBoosterSaveModel(const void *handle, const char *fname);
|
XGB_DLL void XGBoosterSaveModel(const void *handle, const char *fname);
|
||||||
/*!
|
/*!
|
||||||
* \brief dump model, return array of strings representing model dump
|
* \brief dump model, return array of strings representing model dump
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
@ -181,7 +183,7 @@ extern "C" {
|
|||||||
* \param out_len length of output array
|
* \param out_len length of output array
|
||||||
* \return char *data[], representing dump of each model
|
* \return char *data[], representing dump of each model
|
||||||
*/
|
*/
|
||||||
const char **XGBoosterDumpModel(void *handle, const char *fmap,
|
XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap,
|
||||||
uint64_t *out_len);
|
bst_ulong *out_len);
|
||||||
};
|
};
|
||||||
#endif // XGBOOST_WRAPPER_H_
|
#endif // XGBOOST_WRAPPER_H_
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user