Merge pull request #2 from dmlc/master

pr from origin:master
This commit is contained in:
yanqingmen 2015-07-06 09:00:42 +08:00
commit 7755c00721
71 changed files with 2574 additions and 1687 deletions

1
.gitignore vendored
View File

@ -66,3 +66,4 @@ java/xgboost4j-demo/data/
java/xgboost4j-demo/tmp/ java/xgboost4j-demo/tmp/
java/xgboost4j-demo/model/ java/xgboost4j-demo/model/
nb-configuration* nb-configuration*
dmlc-core

46
.travis.yml Normal file
View File

@ -0,0 +1,46 @@
sudo: true
# Use Build Matrix to do lint and build seperately
env:
matrix:
- TASK=lint LINT_LANG=cpp
- TASK=lint LINT_LANG=python
- TASK=R-package CXX=g++
- TASK=python-package CXX=g++
- TASK=build CXX=g++
- TASK=build-with-dmlc CXX=g++
# dependent apt packages
addons:
apt:
packages:
- doxygen
- libopenmpi-dev
- wget
- libcurl4-openssl-dev
- unzip
- python-numpy
- python-scipy
- python-nose
before_install:
- git clone https://github.com/dmlc/dmlc-core
- export TRAVIS=dmlc-core/scripts/travis/
- export PYTHONPATH=${PYTHONPATH}:${PWD}/wrapper
- source ${TRAVIS}/travis_setup_env.sh
install:
- pip install cpplint pylint --user `whoami`
script: scripts/travis_script.sh
after_failure:
- scripts/travis_after_failure.sh
notifications:
email:
on_success: change
on_failure: always

View File

@ -69,7 +69,11 @@ else
TARGET = $(BIN) TARGET = $(BIN)
endif endif
.PHONY: clean all mpi python Rpack ifndef LINT_LANG
LINT_LANG= "all"
endif
.PHONY: clean all mpi python Rpack lint
all: $(TARGET) all: $(TARGET)
mpi: $(MPIBIN) mpi: $(MPIBIN)
@ -144,10 +148,23 @@ Rpack:
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
cp xgboost/src/Makevars xgboost/src/Makevars.win cp xgboost/src/Makevars xgboost/src/Makevars.win
# R CMD build --no-build-vignettes xgboost # R CMD build --no-build-vignettes xgboost
# R CMD build xgboost
# rm -rf xgboost
# R CMD check --as-cran xgboost*.tar.gz
Rbuild:
make Rpack
R CMD build xgboost R CMD build xgboost
rm -rf xgboost rm -rf xgboost
Rcheck:
make Rbuild
R CMD check --as-cran xgboost*.tar.gz R CMD check --as-cran xgboost*.tar.gz
# lint requires dmlc to be in current folder
lint:
dmlc-core/scripts/lint.py xgboost $(LINT_LANG) src wrapper R-package
clean: clean:
$(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~ $(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~
cd subtree/rabit; make clean; cd .. cd subtree/rabit; make clean; cd ..

View File

@ -220,7 +220,8 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
stop("nfold must be bigger than 1") stop("nfold must be bigger than 1")
} }
if(is.null(folds)) { if(is.null(folds)) {
if (exists('objective', where=param) && strtrim(param[['objective']], 5) == 'rank:') { if (exists('objective', where=param) && is.character(param$objective) &&
strtrim(param[['objective']], 5) == 'rank:') {
stop("\tAutomatic creation of CV-folds is not implemented for ranking!\n", stop("\tAutomatic creation of CV-folds is not implemented for ranking!\n",
"\tConsider providing pre-computed CV-folds through the folds parameter.") "\tConsider providing pre-computed CV-folds through the folds parameter.")
} }
@ -234,7 +235,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
# For classification, need to convert y labels to factor before making the folds, # For classification, need to convert y labels to factor before making the folds,
# and then do stratification by factor levels. # and then do stratification by factor levels.
# For regression, leave y numeric and do stratification by quantiles. # For regression, leave y numeric and do stratification by quantiles.
if (exists('objective', where=param)) { if (exists('objective', where=param) && is.character(param$objective)) {
# If 'objective' provided in params, assume that y is a classification label # If 'objective' provided in params, assume that y is a classification label
# unless objective is reg:linear # unless objective is reg:linear
if (param[['objective']] != 'reg:linear') y <- factor(y) if (param[['objective']] != 'reg:linear') y <- factor(y)

View File

@ -95,152 +95,160 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
prediction = FALSE, showsd = TRUE, metrics=list(), prediction = FALSE, showsd = TRUE, metrics=list(),
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L, obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,
early.stop.round = NULL, maximize = NULL, ...) { early.stop.round = NULL, maximize = NULL, ...) {
if (typeof(params) != "list") { if (typeof(params) != "list") {
stop("xgb.cv: first argument params must be list") stop("xgb.cv: first argument params must be list")
}
if(!is.null(folds)) {
if(class(folds)!="list" | length(folds) < 2) {
stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold")
} }
nfold <- length(folds) if(!is.null(folds)) {
} if(class(folds)!="list" | length(folds) < 2) {
if (nfold <= 1) { stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold")
stop("nfold must be bigger than 1") }
} nfold <- length(folds)
if (is.null(missing)) {
dtrain <- xgb.get.DMatrix(data, label)
} else {
dtrain <- xgb.get.DMatrix(data, label, missing)
}
params <- append(params, list(...))
params <- append(params, list(silent=1))
for (mc in metrics) {
params <- append(params, list("eval_metric"=mc))
}
# customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj))
stop("xgb.cv: cannot assign two different objectives")
if (!is.null(params$objective))
if (class(params$objective)=='function') {
obj = params$objective
params$objective = NULL
} }
if (!is.null(params$eval_metric) && !is.null(feval)) if (nfold <= 1) {
stop("xgb.cv: cannot assign two different evaluation metrics") stop("nfold must be bigger than 1")
if (!is.null(params$eval_metric))
if (class(params$eval_metric)=='function') {
feval = params$eval_metric
params$eval_metric = NULL
} }
if (is.null(missing)) {
# Early Stopping dtrain <- xgb.get.DMatrix(data, label)
if (!is.null(early.stop.round)){
if (!is.null(feval) && is.null(maximize))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize) && is.null(params$eval_metric))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize))
{
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
maximize = FALSE
} else {
maximize = TRUE
}
}
if (maximize) {
bestScore = 0
} else { } else {
bestScore = Inf dtrain <- xgb.get.DMatrix(data, label, missing)
}
dot.params = list(...)
nms.params = names(params)
nms.dot.params = names(dot.params)
if (length(intersect(nms.params,nms.dot.params))>0)
stop("Duplicated defined term in parameters. Please check your list of params.")
params <- append(params, dot.params)
params <- append(params, list(silent=1))
for (mc in metrics) {
params <- append(params, list("eval_metric"=mc))
} }
bestInd = 0
earlyStopflag = FALSE
if (length(metrics)>1) # customized objective and evaluation metric interface
warning('Only the first metric is used for early stopping process.') if (!is.null(params$objective) && !is.null(obj))
} stop("xgb.cv: cannot assign two different objectives")
if (!is.null(params$objective))
xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds) if (class(params$objective)=='function') {
obj_type = params[['objective']] obj = params$objective
mat_pred = FALSE params[['objective']] = NULL
if (!is.null(obj_type) && obj_type=='multi:softprob') }
{ # if (!is.null(params$eval_metric) && !is.null(feval))
num_class = params[['num_class']] # stop("xgb.cv: cannot assign two different evaluation metrics")
if (is.null(num_class)) if (!is.null(params$eval_metric))
stop('must set num_class to use softmax') if (class(params$eval_metric)=='function') {
predictValues <- matrix(0,xgb.numrow(dtrain),num_class) feval = params$eval_metric
mat_pred = TRUE params[['eval_metric']] = NULL
}
else
predictValues <- rep(0,xgb.numrow(dtrain))
history <- c()
print.every.n = max(as.integer(print.every.n), 1L)
for (i in 1:nrounds) {
msg <- list()
for (k in 1:nfold) {
fd <- xgb_folds[[k]]
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
if (i<nrounds) {
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
} else {
if (!prediction) {
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
} else {
res <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval, prediction)
if (mat_pred) {
pred_mat = matrix(res[[2]],num_class,length(fd$index))
predictValues[fd$index,] <- t(pred_mat)
} else {
predictValues[fd$index] <- res[[2]]
}
msg[[k]] <- res[[1]] %>% str_split("\t") %>% .[[1]]
} }
}
}
ret <- xgb.cv.aggcv(msg, showsd)
history <- c(history, ret)
if(verbose)
if (0==(i-1L)%%print.every.n)
cat(ret, "\n", sep="")
# early_Stopping # Early Stopping
if (!is.null(early.stop.round)){ if (!is.null(early.stop.round)){
score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2] if (!is.null(feval) && is.null(maximize))
score = strsplit(score,'\\+|:')[[1]][[2]] stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
score = as.numeric(score) if (is.null(maximize) && is.null(params$eval_metric))
if ((maximize && score>bestScore) || (!maximize && score<bestScore)) { stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
bestScore = score if (is.null(maximize))
bestInd = i {
} else { if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
if (i-bestInd>=early.stop.round) { maximize = FALSE
earlyStopflag = TRUE } else {
cat('Stopping. Best iteration:',bestInd) maximize = TRUE
break }
} }
}
if (maximize) {
bestScore = 0
} else {
bestScore = Inf
}
bestInd = 0
earlyStopflag = FALSE
if (length(metrics)>1)
warning('Only the first metric is used for early stopping process.')
} }
} xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
obj_type = params[['objective']]
mat_pred = FALSE
if (!is.null(obj_type) && obj_type=='multi:softprob')
{
num_class = params[['num_class']]
if (is.null(num_class))
stop('must set num_class to use softmax')
predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
mat_pred = TRUE
}
else
predictValues <- rep(0,xgb.numrow(dtrain))
history <- c()
print.every.n = max(as.integer(print.every.n), 1L)
for (i in 1:nrounds) {
msg <- list()
for (k in 1:nfold) {
fd <- xgb_folds[[k]]
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
}
ret <- xgb.cv.aggcv(msg, showsd)
history <- c(history, ret)
if(verbose)
if (0==(i-1L)%%print.every.n)
cat(ret, "\n", sep="")
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".") # early_Stopping
colnamesMean <- paste(colnames, "mean") if (!is.null(early.stop.round)){
if(showsd) colnamesStd <- paste(colnames, "std") score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
score = strsplit(score,'\\+|:')[[1]][[2]]
score = as.numeric(score)
if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
bestScore = score
bestInd = i
} else {
if (i-bestInd>=early.stop.round) {
earlyStopflag = TRUE
cat('Stopping. Best iteration:',bestInd)
break
}
}
}
colnames <- c() }
if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
else colnames <- colnamesMean
type <- rep(x = "numeric", times = length(colnames)) if (prediction) {
dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table for (k in 1:nfold) {
split <- str_split(string = history, pattern = "\t") fd = xgb_folds[[k]]
if (!is.null(early.stop.round) && earlyStopflag) {
res = xgb.iter.eval(fd$booster, fd$watchlist, bestInd - 1, feval, prediction)
} else {
res = xgb.iter.eval(fd$booster, fd$watchlist, nrounds - 1, feval, prediction)
}
if (mat_pred) {
pred_mat = matrix(res[[2]],num_class,length(fd$index))
predictValues[fd$index,] = t(pred_mat)
} else {
predictValues[fd$index] = res[[2]]
}
}
}
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
if (prediction) { colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
return(list(dt = dt,pred = predictValues)) colnamesMean <- paste(colnames, "mean")
} if(showsd) colnamesStd <- paste(colnames, "std")
return(dt)
colnames <- c()
if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
else colnames <- colnamesMean
type <- rep(x = "numeric", times = length(colnames))
dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
split <- str_split(string = history, pattern = "\t")
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
if (prediction) {
return(list(dt = dt,pred = predictValues))
}
return(dt)
} }
# Avoid error messages during CRAN check. # Avoid error messages during CRAN check.

View File

@ -136,7 +136,13 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
if (length(watchlist) != 0 && verbose == 0) { if (length(watchlist) != 0 && verbose == 0) {
warning('watchlist is provided but verbose=0, no evaluation information will be printed') warning('watchlist is provided but verbose=0, no evaluation information will be printed')
} }
params = append(params, list(...))
dot.params = list(...)
nms.params = names(params)
nms.dot.params = names(dot.params)
if (length(intersect(nms.params,nms.dot.params))>0)
stop("Duplicated term in parameters. Please check your list of params.")
params = append(params, dot.params)
# customized objective and evaluation metric interface # customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj)) if (!is.null(params$objective) && !is.null(obj))

View File

@ -1,3 +1,4 @@
// Copyright (c) 2014 by Contributors
#include <vector> #include <vector>
#include <string> #include <string>
#include <utility> #include <utility>
@ -34,7 +35,7 @@ bool CheckNAN(double v) {
bool LogGamma(double v) { bool LogGamma(double v) {
return lgammafn(v); return lgammafn(v);
} }
} // namespace utils } // namespace utils
namespace random { namespace random {
void Seed(unsigned seed) { void Seed(unsigned seed) {
@ -58,6 +59,10 @@ inline void _WrapperEnd(void) {
PutRNGstate(); PutRNGstate();
} }
// do nothing, check error
inline void CheckErr(int ret) {
}
extern "C" { extern "C" {
SEXP XGCheckNullPtr_R(SEXP handle) { SEXP XGCheckNullPtr_R(SEXP handle) {
return ScalarLogical(R_ExternalPtrAddr(handle) == NULL); return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
@ -69,7 +74,8 @@ extern "C" {
} }
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
_WrapperBegin(); _WrapperBegin();
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent)); DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -90,7 +96,8 @@ extern "C" {
data[i * ncol +j] = din[i + nrow * j]; data[i * ncol +j] = din[i + nrow * j];
} }
} }
void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing)); DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -118,8 +125,10 @@ extern "C" {
indices_[i] = static_cast<unsigned>(p_indices[i]); indices_[i] = static_cast<unsigned>(p_indices[i]);
data_[i] = static_cast<float>(p_data[i]); data_[i] = static_cast<float>(p_data[i]);
} }
void *handle = XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_), DMatrixHandle handle;
BeginPtr(data_), nindptr, ndata); CheckErr(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
BeginPtr(data_), nindptr, ndata,
&handle));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -133,7 +142,10 @@ extern "C" {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
idxvec[i] = INTEGER(idxset)[i] - 1; idxvec[i] = INTEGER(idxset)[i] - 1;
} }
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len); DMatrixHandle res;
CheckErr(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle),
BeginPtr(idxvec), len,
&res));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -142,8 +154,8 @@ extern "C" {
} }
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
_WrapperBegin(); _WrapperBegin();
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), CheckErr(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
CHAR(asChar(fname)), asInteger(silent)); CHAR(asChar(fname)), asInteger(silent)));
_WrapperEnd(); _WrapperEnd();
} }
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
@ -156,24 +168,27 @@ extern "C" {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
vec[i] = static_cast<unsigned>(INTEGER(array)[i]); vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
} }
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len); CheckErr(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len));
} else { } else {
std::vector<float> vec(len); std::vector<float> vec(len);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
vec[i] = REAL(array)[i]; vec[i] = REAL(array)[i];
} }
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), CheckErr(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)), CHAR(asChar(field)),
BeginPtr(vec), len); BeginPtr(vec), len));
} }
_WrapperEnd(); _WrapperEnd();
} }
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
_WrapperBegin(); _WrapperBegin();
bst_ulong olen; bst_ulong olen;
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), const float *res;
CHAR(asChar(field)), &olen); CheckErr(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)),
&olen,
&res));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(allocVector(REALSXP, olen)); SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) { for (size_t i = 0; i < olen; ++i) {
@ -183,23 +198,25 @@ extern "C" {
return ret; return ret;
} }
SEXP XGDMatrixNumRow_R(SEXP handle) { SEXP XGDMatrixNumRow_R(SEXP handle) {
bst_ulong nrow = XGDMatrixNumRow(R_ExternalPtrAddr(handle)); bst_ulong nrow;
CheckErr(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));
return ScalarInteger(static_cast<int>(nrow)); return ScalarInteger(static_cast<int>(nrow));
} }
// functions related to booster // functions related to booster
void _BoosterFinalizer(SEXP ext) { void _BoosterFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return; if (R_ExternalPtrAddr(ext) == NULL) return;
XGBoosterFree(R_ExternalPtrAddr(ext)); CheckErr(XGBoosterFree(R_ExternalPtrAddr(ext)));
R_ClearExternalPtr(ext); R_ClearExternalPtr(ext);
} }
SEXP XGBoosterCreate_R(SEXP dmats) { SEXP XGBoosterCreate_R(SEXP dmats) {
_WrapperBegin(); _WrapperBegin();
int len = length(dmats); int len = length(dmats);
std::vector<void*> dvec; std::vector<void*> dvec;
for (int i = 0; i < len; ++i){ for (int i = 0; i < len; ++i) {
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
} }
void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size()); BoosterHandle handle;
CheckErr(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
@ -208,16 +225,16 @@ extern "C" {
} }
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
_WrapperBegin(); _WrapperBegin();
XGBoosterSetParam(R_ExternalPtrAddr(handle), CheckErr(XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)), CHAR(asChar(name)),
CHAR(asChar(val))); CHAR(asChar(val))));
_WrapperEnd(); _WrapperEnd();
} }
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
_WrapperBegin(); _WrapperBegin();
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), CheckErr(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
asInteger(iter), asInteger(iter),
R_ExternalPtrAddr(dtrain)); R_ExternalPtrAddr(dtrain)));
_WrapperEnd(); _WrapperEnd();
} }
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
@ -230,9 +247,10 @@ extern "C" {
tgrad[j] = REAL(grad)[j]; tgrad[j] = REAL(grad)[j];
thess[j] = REAL(hess)[j]; thess[j] = REAL(hess)[j];
} }
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), CheckErr(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dtrain), R_ExternalPtrAddr(dtrain),
BeginPtr(tgrad), BeginPtr(thess), len); BeginPtr(tgrad), BeginPtr(thess),
len));
_WrapperEnd(); _WrapperEnd();
} }
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
@ -249,21 +267,24 @@ extern "C" {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
vec_sptr.push_back(vec_names[i].c_str()); vec_sptr.push_back(vec_names[i].c_str());
} }
const char *ret = const char *ret;
XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), CheckErr(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter), asInteger(iter),
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len); BeginPtr(vec_dmats),
BeginPtr(vec_sptr),
len, &ret));
_WrapperEnd(); _WrapperEnd();
return mkString(ret); return mkString(ret);
} }
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) { SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
_WrapperBegin(); _WrapperBegin();
bst_ulong olen; bst_ulong olen;
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), const float *res;
R_ExternalPtrAddr(dmat), CheckErr(XGBoosterPredict(R_ExternalPtrAddr(handle),
asInteger(option_mask), R_ExternalPtrAddr(dmat),
asInteger(ntree_limit), asInteger(option_mask),
&olen); asInteger(ntree_limit),
&olen, &res));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(allocVector(REALSXP, olen)); SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) { for (size_t i = 0; i < olen; ++i) {
@ -274,12 +295,12 @@ extern "C" {
} }
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
_WrapperBegin(); _WrapperBegin();
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); CheckErr(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
_WrapperEnd(); _WrapperEnd();
} }
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
_WrapperBegin(); _WrapperBegin();
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); CheckErr(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
_WrapperEnd(); _WrapperEnd();
} }
void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
@ -292,7 +313,8 @@ extern "C" {
SEXP XGBoosterModelToRaw_R(SEXP handle) { SEXP XGBoosterModelToRaw_R(SEXP handle) {
bst_ulong olen; bst_ulong olen;
_WrapperBegin(); _WrapperBegin();
const char *raw = XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen); const char *raw;
CheckErr(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(allocVector(RAWSXP, olen)); SEXP ret = PROTECT(allocVector(RAWSXP, olen));
if (olen != 0) { if (olen != 0) {
@ -304,16 +326,16 @@ extern "C" {
SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) { SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) {
_WrapperBegin(); _WrapperBegin();
bst_ulong olen; bst_ulong olen;
const char **res = const char **res;
XGBoosterDumpModel(R_ExternalPtrAddr(handle), CheckErr(XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)), CHAR(asChar(fmap)),
asInteger(with_stats), asInteger(with_stats),
&olen); &olen, &res));
_WrapperEnd(); _WrapperEnd();
SEXP out = PROTECT(allocVector(STRSXP, olen)); SEXP out = PROTECT(allocVector(STRSXP, olen));
for (size_t i = 0; i < olen; ++i) { for (size_t i = 0; i < olen; ++i) {
stringstream stream; stringstream stream;
stream << "booster["<<i<<"]\n" << res[i]; stream << "booster[" << i <<"]\n" << res[i];
SET_STRING_ELT(out, i, mkChar(stream.str().c_str())); SET_STRING_ELT(out, i, mkChar(stream.str().c_str()));
} }
UNPROTECT(1); UNPROTECT(1);

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_WRAPPER_R_H_
#define XGBOOST_WRAPPER_R_H_
/*! /*!
* Copyright 2014 (c) by Contributors
* \file xgboost_wrapper_R.h * \file xgboost_wrapper_R.h
* \author Tianqi Chen * \author Tianqi Chen
* \brief R wrapper of xgboost * \brief R wrapper of xgboost
*/ */
#ifndef XGBOOST_WRAPPER_R_H_ // NOLINT(*)
#define XGBOOST_WRAPPER_R_H_ // NOLINT(*)
extern "C" { extern "C" {
#include <Rinternals.h> #include <Rinternals.h>
#include <R_ext/Random.h> #include <R_ext/Random.h>
@ -153,4 +155,4 @@ extern "C" {
*/ */
SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats); SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats);
} }
#endif // XGBOOST_WRAPPER_R_H_ #endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)

View File

@ -1,3 +1,4 @@
// Copyright (c) 2014 by Contributors
#include <stdio.h> #include <stdio.h>
#include <stdarg.h> #include <stdarg.h>
#include <Rinternals.h> #include <Rinternals.h>

View File

@ -1,6 +1,8 @@
XGBoost: eXtreme Gradient Boosting XGBoost: eXtreme Gradient Boosting
================================== ==================================
[![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost)
An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version. An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version.
It implements machine learning algorithm under gradient boosting framework, including generalized linear model and gradient boosted regression tree (GBDT). XGBoost can also be distributed and scale to Terascale data It implements machine learning algorithm under gradient boosting framework, including generalized linear model and gradient boosted regression tree (GBDT). XGBoost can also be distributed and scale to Terascale data

View File

@ -147,7 +147,7 @@ Run the command again, we can find the log file becomes
``` ```
The rule is eval[name-printed-in-log] = filename, then the file will be added to monitoring process, and evaluated each round. The rule is eval[name-printed-in-log] = filename, then the file will be added to monitoring process, and evaluated each round.
xgboost also support monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes xgboost also supports monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes
``` ```
[0] test-error:0.016139 test-negllik:0.029795 trainname-error:0.014433 trainname-negllik:0.027023 [0] test-error:0.016139 test-negllik:0.029795 trainname-error:0.014433 trainname-negllik:0.027023
[1] test-error:0.000000 test-negllik:0.000000 trainname-error:0.001228 trainname-negllik:0.002457 [1] test-error:0.000000 test-negllik:0.000000 trainname-error:0.001228 trainname-negllik:0.002457
@ -166,7 +166,7 @@ When you are working with a large dataset, you may want to take advantage of par
#### Additional Notes #### Additional Notes
* What are ```agaricus.txt.test.buffer``` and ```agaricus.txt.train.buffer``` generated during runexp.sh? * What are ```agaricus.txt.test.buffer``` and ```agaricus.txt.train.buffer``` generated during runexp.sh?
- By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. When next time you run xgboost, it detects i - By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. Next time when you run xgboost, it will detects these binary files.
Demonstrating how to use XGBoost accomplish binary classification tasks on UCI mushroom dataset http://archive.ics.uci.edu/ml/datasets/Mushroom

View File

@ -29,3 +29,7 @@ This section is about blogposts, presentation and videos discussing how to use x
Contribution Contribution
==== ====
Contribution of documents and use-cases are welcomed! Contribution of documents and use-cases are welcomed!
* This package use Google C++ style
* Check tool of codestyle
- clone https://github.com/dmlc/dmlc-core into root directory
- type ```make lint``` and fix possible errors.

View File

@ -17,13 +17,15 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost
1. Obtain gcc with openmp support by `brew install gcc --without-multilib` **or** clang with openmp by `brew install clang-omp`. The clang one is recommended because the first method requires us compiling gcc inside the machine (more than an hour in mine)! (BTW, `brew` is the de facto standard of `apt-get` on OS X. So installing [HPC](http://hpc.sourceforge.net/) separately is not recommended, but it should work.) 1. Obtain gcc with openmp support by `brew install gcc --without-multilib` **or** clang with openmp by `brew install clang-omp`. The clang one is recommended because the first method requires us compiling gcc inside the machine (more than an hour in mine)! (BTW, `brew` is the de facto standard of `apt-get` on OS X. So installing [HPC](http://hpc.sourceforge.net/) separately is not recommended, but it should work.)
2. **if plaing to use clang-omp** in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to 2. **if you are planing to use clang-omp** - in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to
```C++ ```C++
#include <libiomp/omp.h> /* instead of #include <omp.h> */` #include <libiomp/omp.h> /* instead of #include <omp.h> */`
``` ```
to make it work, otherwise the following steps would show `src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...` to make it work, otherwise you might get this error
`src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...`
@ -47,7 +49,7 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost
4. Set the `Makevars` file in highest piority for R. 4. Set the `Makevars` file in highest piority for R.
The point is, there are three `Makevars` inside the machine: `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by runing `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!). The point is, there are three `Makevars` : `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by running `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!).
So, **add** or **change** `~/.R/Makevars` to the following lines: So, **add** or **change** `~/.R/Makevars` to the following lines:

14
scripts/travis_R_script.sh Executable file
View File

@ -0,0 +1,14 @@
#!/bin/bash
# Test R package of xgboost
set -e
export _R_CHECK_TIMINGS_=0
export R_BUILD_ARGS="--no-build-vignettes --no-manual"
export R_CHECK_ARGS="--no-vignettes --no-manual"
curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
chmod 755 ./travis-tool.sh
./travis-tool.sh bootstrap
make Rpack
cd ./xgboost
../travis-tool.sh install_deps
../travis-tool.sh run_tests

View File

@ -0,0 +1,5 @@
#!/bin/bash
if [ ${TASK} == "R-package" ]; then
cat R-package/xgboost.Rcheck/00install.out
fi

28
scripts/travis_script.sh Executable file
View File

@ -0,0 +1,28 @@
#!/bin/bash
# main script of travis
if [ ${TASK} == "lint" ]; then
make lint || exit -1
fi
if [ ${TASK} == "build" ]; then
make all CXX=${CXX} || exit -1
fi
if [ ${TASK} == "build-with-dmlc" ]; then
cd dmlc-core
cp make/config.mk .
echo "USE_S3=1" >> config.mk
make all CXX=${CXX}|| exit -1
cd ..
make dmlc=dmlc-core CXX=${CXX} || exit -1
fi
if [ ${TASK} == "R-package" ]; then
scripts/travis_R_script.sh || exit -1
fi
if [ ${TASK} == "python-package" ]; then
make all CXX=${CXX} || exit -1
nosetests tests/python || exit -1
fi

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_DATA_H
#define XGBOOST_DATA_H
/*! /*!
* Copyright (c) 2014 by Contributors
* \file data.h * \file data.h
* \brief the input data structure for gradient boosting * \brief the input data structure for gradient boosting
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_DATA_H_
#define XGBOOST_DATA_H_
#include <cstdio> #include <cstdio>
#include <vector> #include <vector>
#include "utils/utils.h" #include "utils/utils.h"
@ -161,4 +163,4 @@ class IFMatrix {
virtual ~IFMatrix(void){} virtual ~IFMatrix(void){}
}; };
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_DATA_H #endif // XGBOOST_DATA_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_
#define XGBOOST_GBM_GBLINEAR_INL_HPP_
/*! /*!
* Copyright by Contributors
* \file gblinear-inl.hpp * \file gblinear-inl.hpp
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
* the update rule is parallel coordinate descent (shotgun) * the update rule is parallel coordinate descent (shotgun)
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_
#define XGBOOST_GBM_GBLINEAR_INL_HPP_
#include <vector> #include <vector>
#include <string> #include <string>
#include <sstream> #include <sstream>
@ -33,10 +35,10 @@ class GBLinear : public IGradBooster {
model.param.SetParam(name, val); model.param.SetParam(name, val);
} }
} }
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*)
model.LoadModel(fi); model.LoadModel(fi);
} }
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
model.SaveModel(fo); model.SaveModel(fo);
} }
virtual void InitModel(void) { virtual void InitModel(void) {
@ -92,7 +94,8 @@ class GBLinear : public IGradBooster {
sum_hess += p.hess * v * v; sum_hess += p.hess * v * v;
} }
float &w = model[fid][gid]; float &w = model[fid][gid];
bst_float dw = static_cast<bst_float>(param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w)); bst_float dw = static_cast<bst_float>(param.learning_rate *
param.CalcDelta(sum_grad, sum_hess, w));
w += dw; w += dw;
// update grad value // update grad value
for (bst_uint j = 0; j < col.length; ++j) { for (bst_uint j = 0; j < col.length; ++j) {
@ -258,12 +261,12 @@ class GBLinear : public IGradBooster {
std::fill(weight.begin(), weight.end(), 0.0f); std::fill(weight.begin(), weight.end(), 0.0f);
} }
// save the model to file // save the model to file
inline void SaveModel(utils::IStream &fo) const { inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
fo.Write(&param, sizeof(Param)); fo.Write(&param, sizeof(Param));
fo.Write(weight); fo.Write(weight);
} }
// load model from file // load model from file
inline void LoadModel(utils::IStream &fi) { inline void LoadModel(utils::IStream &fi) { // NOLINT(*)
utils::Assert(fi.Read(&param, sizeof(Param)) != 0, "Load LinearBooster"); utils::Assert(fi.Read(&param, sizeof(Param)) != 0, "Load LinearBooster");
fi.Read(&weight); fi.Read(&weight);
} }

View File

@ -1,3 +1,4 @@
// Copyright by Contributors
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX #define NOMINMAX

View File

@ -1,11 +1,14 @@
#ifndef XGBOOST_GBM_GBM_H_
#define XGBOOST_GBM_GBM_H_
/*! /*!
* Copyright by Contributors
* \file gbm.h * \file gbm.h
* \brief interface of gradient booster, that learns through gradient statistics * \brief interface of gradient booster, that learns through gradient statistics
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_GBM_GBM_H_
#define XGBOOST_GBM_GBM_H_
#include <vector> #include <vector>
#include <string>
#include "../data.h" #include "../data.h"
#include "../utils/io.h" #include "../utils/io.h"
#include "../utils/fmap.h" #include "../utils/fmap.h"
@ -29,13 +32,13 @@ class IGradBooster {
* \param fi input stream * \param fi input stream
* \param with_pbuffer whether the incoming data contains pbuffer * \param with_pbuffer whether the incoming data contains pbuffer
*/ */
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0; virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0; // NOLINT(*)
/*! /*!
* \brief save model to stream * \brief save model to stream
* \param fo output stream * \param fo output stream
* \param with_pbuffer whether save out pbuffer * \param with_pbuffer whether save out pbuffer
*/ */
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0; virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0; // NOLINT(*)
/*! /*!
* \brief initialize the model * \brief initialize the model
*/ */

View File

@ -1,13 +1,16 @@
#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
#define XGBOOST_GBM_GBTREE_INL_HPP_
/*! /*!
* Copyright by Contributors
* \file gbtree-inl.hpp * \file gbtree-inl.hpp
* \brief gradient boosted tree implementation * \brief gradient boosted tree implementation
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
#define XGBOOST_GBM_GBTREE_INL_HPP_
#include <vector> #include <vector>
#include <utility> #include <utility>
#include <string> #include <string>
#include <limits>
#include "./gbm.h" #include "./gbm.h"
#include "../utils/omp.h" #include "../utils/omp.h"
#include "../tree/updater.h" #include "../tree/updater.h"
@ -39,7 +42,7 @@ class GBTree : public IGradBooster {
tparam.SetParam(name, val); tparam.SetParam(name, val);
if (trees.size() == 0) mparam.SetParam(name, val); if (trees.size() == 0) mparam.SetParam(name, val);
} }
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*)
this->Clear(); this->Clear();
utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0, utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
"GBTree: invalid model file"); "GBTree: invalid model file");
@ -62,7 +65,7 @@ class GBTree : public IGradBooster {
"GBTree: invalid model file"); "GBTree: invalid model file");
} }
} }
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
utils::Assert(mparam.num_trees == static_cast<int>(trees.size()), "GBTree"); utils::Assert(mparam.num_trees == static_cast<int>(trees.size()), "GBTree");
if (with_pbuffer) { if (with_pbuffer) {
fo.Write(&mparam, sizeof(ModelParam)); fo.Write(&mparam, sizeof(ModelParam));
@ -196,7 +199,6 @@ class GBTree : public IGradBooster {
thread_temp[i].Init(mparam.num_feature); thread_temp[i].Init(mparam.num_feature);
} }
this->PredPath(p_fmat, info, out_preds, ntree_limit); this->PredPath(p_fmat, info, out_preds, ntree_limit);
} }
virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) { virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
std::vector<std::string> dump; std::vector<std::string> dump;
@ -339,7 +341,7 @@ class GBTree : public IGradBooster {
for (int j = 0; j < mparam.size_leaf_vector; ++j) { for (int j = 0; j < mparam.size_leaf_vector; ++j) {
vec_psum[j] += trees[i]->leafvec(tid)[j]; vec_psum[j] += trees[i]->leafvec(tid)[j];
} }
if(--treeleft == 0) break; if (--treeleft == 0) break;
} }
} }
p_feats->Drop(inst); p_feats->Drop(inst);

View File

@ -1,6 +1,8 @@
// Copyright by Contributors
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX #define NOMINMAX
#include <string>
#include "../utils/io.h" #include "../utils/io.h"
// implements a single no split version of DMLC // implements a single no split version of DMLC
@ -154,7 +156,7 @@ class StdFile : public dmlc::Stream {
std::fwrite(ptr, size, 1, fp); std::fwrite(ptr, size, 1, fp);
} }
virtual void Seek(size_t pos) { virtual void Seek(size_t pos) {
std::fseek(fp, static_cast<long>(pos), SEEK_SET); std::fseek(fp, static_cast<long>(pos), SEEK_SET); // NOLINT(*)
} }
virtual size_t Tell(void) { virtual size_t Tell(void) {
return std::ftell(fp); return std::ftell(fp);

View File

@ -1,3 +1,4 @@
// Copyright 2014 by Contributors
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX #define NOMINMAX

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_IO_IO_H_
#define XGBOOST_IO_IO_H_
/*! /*!
* Copyright 2014 by Contributors
* \file io.h * \file io.h
* \brief handles input data format of xgboost * \brief handles input data format of xgboost
* I/O module handles a specific DMatrix format * I/O module handles a specific DMatrix format
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_IO_IO_H_
#define XGBOOST_IO_IO_H_
#include "../data.h" #include "../data.h"
#include "../learner/dmatrix.h" #include "../learner/dmatrix.h"
@ -40,7 +42,6 @@ DataMatrix* LoadDataMatrix(const char *fname,
* \param silent whether print message during saving * \param silent whether print message during saving
*/ */
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false); void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false);
} // namespace io } // namespace io
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_IO_IO_H_ #endif // XGBOOST_IO_IO_H_

View File

@ -35,7 +35,7 @@ struct LibSVMPage : public SparsePage {
*/ */
class LibSVMPageFactory { class LibSVMPageFactory {
public: public:
explicit LibSVMPageFactory() LibSVMPageFactory()
: bytes_read_(0), at_head_(true) { : bytes_read_(0), at_head_(true) {
} }
inline bool Init(void) { inline bool Init(void) {
@ -199,6 +199,7 @@ class LibSVMParser : public utils::IIterator<LibSVMPage> {
inline size_t bytes_read(void) const { inline size_t bytes_read(void) const {
return itr.get_factory().bytes_read(); return itr.get_factory().bytes_read();
} }
private: private:
bool at_end_; bool at_end_;
size_t data_ptr_; size_t data_ptr_;

View File

@ -1,11 +1,15 @@
#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
/*! /*!
* Copyright (c) 2014 by Contributors
* \file page_dmatrix-inl.hpp * \file page_dmatrix-inl.hpp
* row iterator based on sparse page * row iterator based on sparse page
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
#include <vector> #include <vector>
#include <string>
#include <algorithm>
#include "../data.h" #include "../data.h"
#include "../utils/iterator.h" #include "../utils/iterator.h"
#include "../utils/thread_buffer.h" #include "../utils/thread_buffer.h"
@ -94,12 +98,12 @@ class DMatrixPageBase : public DataMatrix {
fbin.Close(); fbin.Close();
if (!silent) { if (!silent) {
utils::Printf("DMatrixPage: %lux%lu is saved to %s\n", utils::Printf("DMatrixPage: %lux%lu is saved to %s\n",
static_cast<unsigned long>(mat.info.num_row()), static_cast<unsigned long>(mat.info.num_row()), // NOLINT(*)
static_cast<unsigned long>(mat.info.num_col()), fname_); static_cast<unsigned long>(mat.info.num_col()), fname_); // NOLINT(*)
} }
} }
/*! \brief load and initialize the iterator with fi */ /*! \brief load and initialize the iterator with fi */
inline void LoadBinary(utils::FileStream &fi, inline void LoadBinary(utils::FileStream &fi, // NOLINT(*)
bool silent, bool silent,
const char *fname_) { const char *fname_) {
this->set_cache_file(fname_); this->set_cache_file(fname_);
@ -114,8 +118,8 @@ class DMatrixPageBase : public DataMatrix {
iter_->Load(fs); iter_->Load(fs);
if (!silent) { if (!silent) {
utils::Printf("DMatrixPage: %lux%lu matrix is loaded", utils::Printf("DMatrixPage: %lux%lu matrix is loaded",
static_cast<unsigned long>(info.num_row()), static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col())); static_cast<unsigned long>(info.num_col())); // NOLINT(*)
if (fname_ != NULL) { if (fname_ != NULL) {
utils::Printf(" from %s\n", fname_); utils::Printf(" from %s\n", fname_);
} else { } else {
@ -188,8 +192,8 @@ class DMatrixPageBase : public DataMatrix {
fs.Close(); fs.Close();
if (!silent) { if (!silent) {
utils::Printf("DMatrixPage: %lux%lu is parsed from %s\n", utils::Printf("DMatrixPage: %lux%lu is parsed from %s\n",
static_cast<unsigned long>(info.num_row()), static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), static_cast<unsigned long>(info.num_col()), // NOLINT(*)
uri); uri);
} }
} }

View File

@ -1,10 +1,16 @@
#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
/*! /*!
* Copyright (c) 2014 by Contributors
* \file page_fmatrix-inl.hpp * \file page_fmatrix-inl.hpp
* col iterator based on sparse page * col iterator based on sparse page
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
#include <vector>
#include <string>
#include <algorithm>
namespace xgboost { namespace xgboost {
namespace io { namespace io {
/*! \brief thread buffer iterator */ /*! \brief thread buffer iterator */
@ -96,7 +102,7 @@ struct ColConvertFactory {
return true; return true;
} }
} }
if (tmp_.Size() != 0){ if (tmp_.Size() != 0) {
this->MakeColPage(tmp_, BeginPtr(*buffered_rowset_) + btop, this->MakeColPage(tmp_, BeginPtr(*buffered_rowset_) + btop,
*enabled_, val); *enabled_, val);
return true; return true;

View File

@ -1,6 +1,5 @@
#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file simple_dmatrix-inl.hpp * \file simple_dmatrix-inl.hpp
* \brief simple implementation of DMatrixS that can be used * \brief simple implementation of DMatrixS that can be used
* the data format of xgboost is templatized, which means it can accept * the data format of xgboost is templatized, which means it can accept
@ -8,6 +7,9 @@
* this file is a specific implementation of input data structure that can be used by BoostLearner * this file is a specific implementation of input data structure that can be used by BoostLearner
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
#include <string> #include <string>
#include <cstring> #include <cstring>
#include <vector> #include <vector>
@ -123,9 +125,9 @@ class DMatrixSimple : public DataMatrix {
} }
if (!silent) { if (!silent) {
utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n", utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
static_cast<unsigned long>(info.num_row()), static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), static_cast<unsigned long>(info.num_col()), // NOLINT(*)
static_cast<unsigned long>(row_data_.size()), uri); static_cast<unsigned long>(row_data_.size()), uri); // NOLINT(*)
} }
// try to load in additional file // try to load in additional file
if (!loadsplit) { if (!loadsplit) {
@ -165,10 +167,11 @@ class DMatrixSimple : public DataMatrix {
* \param silent whether print information during loading * \param silent whether print information during loading
* \param fname file name, used to print message * \param fname file name, used to print message
*/ */
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) { inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) { // NOLINT(*)
int tmagic; int tmagic;
utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format"); utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch", fname == NULL ? "" : fname); utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch",
fname == NULL ? "" : fname);
info.LoadBinary(fs); info.LoadBinary(fs);
LoadBinary(fs, &row_ptr_, &row_data_); LoadBinary(fs, &row_ptr_, &row_data_);
@ -176,9 +179,9 @@ class DMatrixSimple : public DataMatrix {
if (!silent) { if (!silent) {
utils::Printf("%lux%lu matrix with %lu entries is loaded", utils::Printf("%lux%lu matrix with %lu entries is loaded",
static_cast<unsigned long>(info.num_row()), static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), static_cast<unsigned long>(info.num_col()), // NOLINT(*)
static_cast<unsigned long>(row_data_.size())); static_cast<unsigned long>(row_data_.size())); // NOLINT(*)
if (fname != NULL) { if (fname != NULL) {
utils::Printf(" from %s\n", fname); utils::Printf(" from %s\n", fname);
} else { } else {
@ -205,9 +208,9 @@ class DMatrixSimple : public DataMatrix {
if (!silent) { if (!silent) {
utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n", utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
static_cast<unsigned long>(info.num_row()), static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), static_cast<unsigned long>(info.num_col()), // NOLINT(*)
static_cast<unsigned long>(row_data_.size()), fname); static_cast<unsigned long>(row_data_.size()), fname); // NOLINT(*)
if (info.group_ptr.size() != 0) { if (info.group_ptr.size() != 0) {
utils::Printf("data contains %u groups\n", utils::Printf("data contains %u groups\n",
static_cast<unsigned>(info.group_ptr.size()-1)); static_cast<unsigned>(info.group_ptr.size()-1));
@ -256,7 +259,7 @@ class DMatrixSimple : public DataMatrix {
* \param ptr pointer data * \param ptr pointer data
* \param data data content * \param data data content
*/ */
inline static void SaveBinary(utils::IStream &fo, inline static void SaveBinary(utils::IStream &fo, // NOLINT(*)
const std::vector<size_t> &ptr, const std::vector<size_t> &ptr,
const std::vector<RowBatch::Entry> &data) { const std::vector<RowBatch::Entry> &data) {
size_t nrow = ptr.size() - 1; size_t nrow = ptr.size() - 1;
@ -272,7 +275,7 @@ class DMatrixSimple : public DataMatrix {
* \param out_ptr pointer data * \param out_ptr pointer data
* \param out_data data content * \param out_data data content
*/ */
inline static void LoadBinary(utils::IStream &fi, inline static void LoadBinary(utils::IStream &fi, // NOLINT(*)
std::vector<size_t> *out_ptr, std::vector<size_t> *out_ptr,
std::vector<RowBatch::Entry> *out_data) { std::vector<RowBatch::Entry> *out_data) {
size_t nrow; size_t nrow;

View File

@ -1,11 +1,15 @@
#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file simple_fmatrix-inl.hpp * \file simple_fmatrix-inl.hpp
* \brief the input data structure for gradient boosting * \brief the input data structure for gradient boosting
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
#include <limits> #include <limits>
#include <algorithm>
#include <vector>
#include "../data.h" #include "../data.h"
#include "../utils/utils.h" #include "../utils/utils.h"
#include "../utils/random.h" #include "../utils/random.h"
@ -39,7 +43,7 @@ class FMatrixS : public IFMatrix {
/*! \brief get number of colmuns */ /*! \brief get number of colmuns */
virtual size_t NumCol(void) const { virtual size_t NumCol(void) const {
utils::Check(this->HaveColAccess(), "NumCol:need column access"); utils::Check(this->HaveColAccess(), "NumCol:need column access");
return col_size_.size() - 1; return col_size_.size();
} }
/*! \brief get number of buffered rows */ /*! \brief get number of buffered rows */
virtual const std::vector<bst_uint> &buffered_rowset(void) const { virtual const std::vector<bst_uint> &buffered_rowset(void) const {
@ -94,7 +98,7 @@ class FMatrixS : public IFMatrix {
* \brief save column access data into stream * \brief save column access data into stream
* \param fo output stream to save to * \param fo output stream to save to
*/ */
inline void SaveColAccess(utils::IStream &fo) const { inline void SaveColAccess(utils::IStream &fo) const { // NOLINT(*)
size_t n = 0; size_t n = 0;
fo.Write(&n, sizeof(n)); fo.Write(&n, sizeof(n));
} }
@ -102,7 +106,7 @@ class FMatrixS : public IFMatrix {
* \brief load column access data from stream * \brief load column access data from stream
* \param fo output stream to load from * \param fo output stream to load from
*/ */
inline void LoadColAccess(utils::IStream &fi) { inline void LoadColAccess(utils::IStream &fi) { // NOLINT(*)
// do nothing in load col access // do nothing in load col access
} }
@ -153,14 +157,14 @@ class FMatrixS : public IFMatrix {
pcol->Clear(); pcol->Clear();
utils::ParallelGroupBuilder<SparseBatch::Entry> utils::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data); builder(&pcol->offset, &pcol->data);
builder.InitBudget(0, nthread); builder.InitBudget(info_.num_col(), nthread);
// start working // start working
iter_->BeforeFirst(); iter_->BeforeFirst();
while (iter_->Next()) { while (iter_->Next()) {
const RowBatch &batch = iter_->Value(); const RowBatch &batch = iter_->Value();
bmap.resize(bmap.size() + batch.size, true); bmap.resize(bmap.size() + batch.size, true);
long batch_size = static_cast<long>(batch.size); long batch_size = static_cast<long>(batch.size); // NOLINT(*)
for (long i = 0; i < batch_size; ++i) { for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (pkeep == 1.0f || random::SampleBinary(pkeep)) { if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
buffered_rowset_.push_back(ridx); buffered_rowset_.push_back(ridx);
@ -169,13 +173,13 @@ class FMatrixS : public IFMatrix {
} }
} }
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long i = 0; i < batch_size; ++i) { for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) { if (bmap[ridx]) {
RowBatch::Inst inst = batch[i]; RowBatch::Inst inst = batch[i];
for (bst_uint j = 0; j < inst.length; ++j) { for (bst_uint j = 0; j < inst.length; ++j) {
if (enabled[inst[j].index]){ if (enabled[inst[j].index]) {
builder.AddBudget(inst[j].index, tid); builder.AddBudget(inst[j].index, tid);
} }
} }
@ -188,7 +192,7 @@ class FMatrixS : public IFMatrix {
while (iter_->Next()) { while (iter_->Next()) {
const RowBatch &batch = iter_->Value(); const RowBatch &batch = iter_->Value();
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long i = 0; i < static_cast<long>(batch.size); ++i) { for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) { if (bmap[ridx]) {
@ -204,7 +208,8 @@ class FMatrixS : public IFMatrix {
} }
} }
utils::Assert(pcol->Size() == info_.num_col(), "inconsistent col data"); utils::Assert(pcol->Size() == info_.num_col(),
"inconsistent col data");
// sort columns // sort columns
bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size()); bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread) #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
@ -366,4 +371,4 @@ class FMatrixS : public IFMatrix {
}; };
} // namespace io } // namespace io
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP #endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP_

View File

@ -1,12 +1,16 @@
#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
/*! /*!
* Copyright (c) 2014 by Contributors
* \file sparse_batch_page.h * \file sparse_batch_page.h
* content holder of sparse batch that can be saved to disk * content holder of sparse batch that can be saved to disk
* the representation can be effectively * the representation can be effectively
* use in external memory computation * use in external memory computation
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
#include <vector>
#include <algorithm>
#include "../data.h" #include "../data.h"
namespace xgboost { namespace xgboost {

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_LEARNER_DMATRIX_H_
#define XGBOOST_LEARNER_DMATRIX_H_
/*! /*!
* Copyright 2014 by Contributors
* \file dmatrix.h * \file dmatrix.h
* \brief meta data and template data structure * \brief meta data and template data structure
* used for regression/classification/ranking * used for regression/classification/ranking
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_LEARNER_DMATRIX_H_
#define XGBOOST_LEARNER_DMATRIX_H_
#include <vector> #include <vector>
#include <cstring> #include <cstring>
#include "../data.h" #include "../data.h"
@ -66,7 +68,7 @@ struct MetaInfo {
return 1.0f; return 1.0f;
} }
} }
inline void SaveBinary(utils::IStream &fo) const { inline void SaveBinary(utils::IStream &fo) const { // NOLINT(*)
int version = kVersion; int version = kVersion;
fo.Write(&version, sizeof(version)); fo.Write(&version, sizeof(version));
fo.Write(&info.num_row, sizeof(info.num_row)); fo.Write(&info.num_row, sizeof(info.num_row));
@ -77,7 +79,7 @@ struct MetaInfo {
fo.Write(info.root_index); fo.Write(info.root_index);
fo.Write(base_margin); fo.Write(base_margin);
} }
inline void LoadBinary(utils::IStream &fi) { inline void LoadBinary(utils::IStream &fi) { // NOLINT(*)
int version; int version;
utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format"); utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format");
utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format"); utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format");
@ -114,7 +116,7 @@ struct MetaInfo {
return labels; return labels;
} }
inline const std::vector<float>& GetFloatInfo(const char *field) const { inline const std::vector<float>& GetFloatInfo(const char *field) const {
return ((MetaInfo*)this)->GetFloatInfo(field); return ((MetaInfo*)this)->GetFloatInfo(field); // NOLINT(*)
} }
inline std::vector<unsigned> &GetUIntInfo(const char *field) { inline std::vector<unsigned> &GetUIntInfo(const char *field) {
using namespace std; using namespace std;
@ -124,7 +126,7 @@ struct MetaInfo {
return info.root_index; return info.root_index;
} }
inline const std::vector<unsigned> &GetUIntInfo(const char *field) const { inline const std::vector<unsigned> &GetUIntInfo(const char *field) const {
return ((MetaInfo*)this)->GetUIntInfo(field); return ((MetaInfo*)this)->GetUIntInfo(field); // NOLINT(*)
} }
// try to load weight information from file, if exists // try to load weight information from file, if exists
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) { inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {

View File

@ -1,10 +1,12 @@
/*!
* Copyright 2014 by Contributors
* \file xgboost_evaluation-inl.hpp
* \brief evaluation metrics for regression and classification and rank
* \author Kailong Chen, Tianqi Chen
*/
#ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_ #ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_
#define XGBOOST_LEARNER_EVALUATION_INL_HPP_ #define XGBOOST_LEARNER_EVALUATION_INL_HPP_
/*!
* \file xgboost_evaluation-inl.hpp
* \brief evaluation metrics for regression and classification and rank
* \author Kailong Chen, Tianqi Chen
*/
#include <vector> #include <vector>
#include <utility> #include <utility>
#include <string> #include <string>
@ -344,7 +346,8 @@ struct EvalPrecisionRatio : public IEvaluator{
} }
protected: protected:
inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec, const MetaInfo &info) const { inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec,
const MetaInfo &info) const {
size_t cutoff = static_cast<size_t>(ratio_ * rec.size()); size_t cutoff = static_cast<size_t>(ratio_ * rec.size());
double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0; double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0;
for (size_t j = 0; j < cutoff; ++j) { for (size_t j = 0; j < cutoff; ++j) {
@ -417,8 +420,8 @@ struct EvalAuc : public IEvaluator {
} }
if (distributed) { if (distributed) {
float dat[2]; float dat[2];
dat[0] = static_cast<float>(sum_auc); dat[0] = static_cast<float>(sum_auc);
dat[1] = static_cast<float>(ngroup); dat[1] = static_cast<float>(ngroup);
// approximately estimate auc using mean // approximately estimate auc using mean
rabit::Allreduce<rabit::op::Sum>(dat, 2); rabit::Allreduce<rabit::op::Sum>(dat, 2);
return dat[0] / dat[1]; return dat[0] / dat[1];
@ -463,8 +466,8 @@ struct EvalRankList : public IEvaluator {
} }
if (distributed) { if (distributed) {
float dat[2]; float dat[2];
dat[0] = static_cast<float>(sum_metric); dat[0] = static_cast<float>(sum_metric);
dat[1] = static_cast<float>(ngroup); dat[1] = static_cast<float>(ngroup);
// approximately estimate auc using mean // approximately estimate auc using mean
rabit::Allreduce<rabit::op::Sum>(dat, 2); rabit::Allreduce<rabit::op::Sum>(dat, 2);
return dat[0] / dat[1]; return dat[0] / dat[1];
@ -489,7 +492,7 @@ struct EvalRankList : public IEvaluator {
} }
} }
/*! \return evaluation metric, given the pair_sort record, (pred,label) */ /*! \return evaluation metric, given the pair_sort record, (pred,label) */
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0; virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0; // NOLINT(*)
protected: protected:
unsigned topn_; unsigned topn_;
@ -530,7 +533,7 @@ struct EvalNDCG : public EvalRankList{
} }
return static_cast<float>(sumdcg); return static_cast<float>(sumdcg);
} }
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const { virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const { // NOLINT(*)
std::stable_sort(rec.begin(), rec.end(), CmpFirst); std::stable_sort(rec.begin(), rec.end(), CmpFirst);
float dcg = this->CalcDCG(rec); float dcg = this->CalcDCG(rec);
std::stable_sort(rec.begin(), rec.end(), CmpSecond); std::stable_sort(rec.begin(), rec.end(), CmpSecond);

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_EVALUATION_H_
#define XGBOOST_LEARNER_EVALUATION_H_
/*! /*!
* Copyright 2014 by Contributors
* \file evaluation.h * \file evaluation.h
* \brief interface of evaluation function supported in xgboost * \brief interface of evaluation function supported in xgboost
* \author Tianqi Chen, Kailong Chen * \author Tianqi Chen, Kailong Chen
*/ */
#ifndef XGBOOST_LEARNER_EVALUATION_H_
#define XGBOOST_LEARNER_EVALUATION_H_
#include <string> #include <string>
#include <vector> #include <vector>
#include <cstdio> #include <cstdio>

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
#define XGBOOST_LEARNER_HELPER_UTILS_H_
/*! /*!
* Copyright 2014 by Contributors
* \file helper_utils.h * \file helper_utils.h
* \brief useful helper functions * \brief useful helper functions
* \author Tianqi Chen, Kailong Chen * \author Tianqi Chen, Kailong Chen
*/ */
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
#define XGBOOST_LEARNER_HELPER_UTILS_H_
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <cmath> #include <cmath>

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file learner-inl.hpp * \file learner-inl.hpp
* \brief learning algorithm * \brief learning algorithm
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include <utility> #include <utility>
@ -30,7 +32,7 @@ class BoostLearner : public rabit::Serializable {
gbm_ = NULL; gbm_ = NULL;
name_obj_ = "reg:linear"; name_obj_ = "reg:linear";
name_gbm_ = "gbtree"; name_gbm_ = "gbtree";
silent= 0; silent = 0;
prob_buffer_row = 1.0f; prob_buffer_row = 1.0f;
distributed_mode = 0; distributed_mode = 0;
updater_mode = 0; updater_mode = 0;
@ -68,7 +70,7 @@ class BoostLearner : public rabit::Serializable {
} }
char str_temp[25]; char str_temp[25];
utils::SPrintf(str_temp, sizeof(str_temp), "%lu", utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
static_cast<unsigned long>(buffer_size)); static_cast<unsigned long>(buffer_size)); // NOLINT(*)
this->SetParam("num_pbuffer", str_temp); this->SetParam("num_pbuffer", str_temp);
this->pred_buffer_size = buffer_size; this->pred_buffer_size = buffer_size;
} }
@ -161,7 +163,7 @@ class BoostLearner : public rabit::Serializable {
* \param fi input stream * \param fi input stream
* \param calc_num_feature whether call InitTrainer with calc_num_feature * \param calc_num_feature whether call InitTrainer with calc_num_feature
*/ */
inline void LoadModel(utils::IStream &fi, inline void LoadModel(utils::IStream &fi, // NOLINT(*)
bool calc_num_feature = true) { bool calc_num_feature = true) {
utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0, utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
"BoostLearner: wrong model format"); "BoostLearner: wrong model format");
@ -228,7 +230,7 @@ class BoostLearner : public rabit::Serializable {
} }
delete fi; delete fi;
} }
inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
ModelParam p = mparam; ModelParam p = mparam;
p.saved_with_pbuffer = static_cast<int>(with_pbuffer); p.saved_with_pbuffer = static_cast<int>(with_pbuffer);
fo.Write(&p, sizeof(ModelParam)); fo.Write(&p, sizeof(ModelParam));
@ -345,8 +347,7 @@ class BoostLearner : public rabit::Serializable {
bool output_margin, bool output_margin,
std::vector<float> *out_preds, std::vector<float> *out_preds,
unsigned ntree_limit = 0, unsigned ntree_limit = 0,
bool pred_leaf = false bool pred_leaf = false) const {
) const {
if (pred_leaf) { if (pred_leaf) {
gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit); gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);
} else { } else {
@ -517,7 +518,7 @@ class BoostLearner : public rabit::Serializable {
protected: protected:
// magic number to transform random seed // magic number to transform random seed
const static int kRandSeedMagic = 127; static const int kRandSeedMagic = 127;
// cache entry object that helps handle feature caching // cache entry object that helps handle feature caching
struct CacheEntry { struct CacheEntry {
const DMatrix *mat_; const DMatrix *mat_;

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file objective-inl.hpp * \file objective-inl.hpp
* \brief objective function implementations * \brief objective function implementations
* \author Tianqi Chen, Kailong Chen * \author Tianqi Chen, Kailong Chen
*/ */
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
@ -176,14 +178,14 @@ class RegLossObj : public IObjFunction {
// poisson regression for count // poisson regression for count
class PoissonRegression : public IObjFunction { class PoissonRegression : public IObjFunction {
public: public:
explicit PoissonRegression(void) { PoissonRegression(void) {
max_delta_step = 0.0f; max_delta_step = 0.0f;
} }
virtual ~PoissonRegression(void) {} virtual ~PoissonRegression(void) {}
virtual void SetParam(const char *name, const char *val) { virtual void SetParam(const char *name, const char *val) {
using namespace std; using namespace std;
if (!strcmp( "max_delta_step", name )) { if (!strcmp("max_delta_step", name)) {
max_delta_step = static_cast<float>(atof(val)); max_delta_step = static_cast<float>(atof(val));
} }
} }
@ -201,9 +203,9 @@ class PoissonRegression : public IObjFunction {
// check if label in range // check if label in range
bool label_correct = true; bool label_correct = true;
// start calculating gradient // start calculating gradient
const long ndata = static_cast<bst_omp_uint>(preds.size()); const long ndata = static_cast<bst_omp_uint>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long i = 0; i < ndata; ++i) { for (long i = 0; i < ndata; ++i) { // NOLINT(*)
float p = preds[i]; float p = preds[i];
float w = info.GetWeight(i); float w = info.GetWeight(i);
float y = info.labels[i]; float y = info.labels[i];
@ -219,9 +221,9 @@ class PoissonRegression : public IObjFunction {
} }
virtual void PredTransform(std::vector<float> *io_preds) { virtual void PredTransform(std::vector<float> *io_preds) {
std::vector<float> &preds = *io_preds; std::vector<float> &preds = *io_preds;
const long ndata = static_cast<long>(preds.size()); const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]); preds[j] = std::exp(preds[j]);
} }
} }

View File

@ -1,11 +1,14 @@
#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
#define XGBOOST_LEARNER_OBJECTIVE_H_
/*! /*!
* Copyright 2014 by Contributors
* \file objective.h * \file objective.h
* \brief interface of objective function used for gradient boosting * \brief interface of objective function used for gradient boosting
* \author Tianqi Chen, Kailong Chen * \author Tianqi Chen, Kailong Chen
*/ */
#include "dmatrix.h" #ifndef XGBOOST_LEARNER_OBJECTIVE_H_
#define XGBOOST_LEARNER_OBJECTIVE_H_
#include <vector>
#include "./dmatrix.h"
namespace xgboost { namespace xgboost {
namespace learner { namespace learner {
@ -13,7 +16,7 @@ namespace learner {
class IObjFunction{ class IObjFunction{
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~IObjFunction(void){} virtual ~IObjFunction(void) {}
/*! /*!
* \brief set parameters from outside * \brief set parameters from outside
* \param name name of the parameter * \param name name of the parameter
@ -38,7 +41,7 @@ class IObjFunction{
* \brief transform prediction values, this is only called when Prediction is called * \brief transform prediction values, this is only called when Prediction is called
* \param io_preds prediction values, saves to this vector as well * \param io_preds prediction values, saves to this vector as well
*/ */
virtual void PredTransform(std::vector<float> *io_preds){} virtual void PredTransform(std::vector<float> *io_preds) {}
/*! /*!
* \brief transform prediction values, this is only called when Eval is called, * \brief transform prediction values, this is only called when Eval is called,
* usually it redirect to PredTransform * usually it redirect to PredTransform

View File

@ -1,13 +1,13 @@
#ifndef XGBOOST_SYNC_H_
#define XGBOOST_SYNC_H_
/*! /*!
* Copyright 2014 by Contributors
* \file sync.h * \file sync.h
* \brief the synchronization module of rabit * \brief the synchronization module of rabit
* redirects to subtree rabit header * redirects to subtree rabit header
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_SYNC_SYNC_H_
#define XGBOOST_SYNC_SYNC_H_
#include "../../subtree/rabit/include/rabit.h" #include "../../subtree/rabit/include/rabit.h"
#include "../../subtree/rabit/include/rabit/timer.h" #include "../../subtree/rabit/include/rabit/timer.h"
#endif // XGBOOST_SYNC_H_ #endif // XGBOOST_SYNC_SYNC_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_MODEL_H_
#define XGBOOST_TREE_MODEL_H_
/*! /*!
* Copyright 2014 by Contributors
* \file model.h * \file model.h
* \brief model structure for tree * \brief model structure for tree
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_MODEL_H_
#define XGBOOST_TREE_MODEL_H_
#include <string> #include <string>
#include <cstring> #include <cstring>
#include <sstream> #include <sstream>
@ -70,7 +72,7 @@ class TreeModel {
/*! \brief tree node */ /*! \brief tree node */
class Node { class Node {
public: public:
Node(void) : sindex_(0) {} Node(void) : sindex_(0) {}
/*! \brief index of left child */ /*! \brief index of left child */
inline int cleft(void) const { inline int cleft(void) const {
return this->cleft_; return this->cleft_;
@ -273,7 +275,7 @@ class TreeModel {
return &leaf_vector[nid * param.size_leaf_vector]; return &leaf_vector[nid * param.size_leaf_vector];
} }
/*! \brief get leaf vector given nid */ /*! \brief get leaf vector given nid */
inline const bst_float* leafvec(int nid) const{ inline const bst_float* leafvec(int nid) const {
if (leaf_vector.size() == 0) return NULL; if (leaf_vector.size() == 0) return NULL;
return &leaf_vector[nid * param.size_leaf_vector]; return &leaf_vector[nid * param.size_leaf_vector];
} }
@ -292,7 +294,7 @@ class TreeModel {
* \brief load model from stream * \brief load model from stream
* \param fi input stream * \param fi input stream
*/ */
inline void LoadModel(utils::IStream &fi) { inline void LoadModel(utils::IStream &fi) { // NOLINT(*)
utils::Check(fi.Read(&param, sizeof(Param)) > 0, utils::Check(fi.Read(&param, sizeof(Param)) > 0,
"TreeModel: wrong format"); "TreeModel: wrong format");
nodes.resize(param.num_nodes); stats.resize(param.num_nodes); nodes.resize(param.num_nodes); stats.resize(param.num_nodes);
@ -317,7 +319,7 @@ class TreeModel {
* \brief save model to stream * \brief save model to stream
* \param fo output stream * \param fo output stream
*/ */
inline void SaveModel(utils::IStream &fo) const { inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
utils::Assert(param.num_nodes == static_cast<int>(nodes.size()), utils::Assert(param.num_nodes == static_cast<int>(nodes.size()),
"Tree::SaveModel"); "Tree::SaveModel");
utils::Assert(param.num_nodes == static_cast<int>(stats.size()), utils::Assert(param.num_nodes == static_cast<int>(stats.size()),
@ -400,7 +402,7 @@ class TreeModel {
} }
private: private:
void Dump(int nid, std::stringstream &fo, void Dump(int nid, std::stringstream &fo, // NOLINT(*)
const utils::FeatMap& fmap, int depth, bool with_stats) { const utils::FeatMap& fmap, int depth, bool with_stats) {
for (int i = 0; i < depth; ++i) { for (int i = 0; i < depth; ++i) {
fo << '\t'; fo << '\t';
@ -469,7 +471,7 @@ struct RTreeNodeStat {
/*! \brief number of child that is leaf node known up to now */ /*! \brief number of child that is leaf node known up to now */
int leaf_child_cnt; int leaf_child_cnt;
/*! \brief print information of current stats to fo */ /*! \brief print information of current stats to fo */
inline void Print(std::stringstream &fo, bool is_leaf) const { inline void Print(std::stringstream &fo, bool is_leaf) const { // NOLINT(*)
if (!is_leaf) { if (!is_leaf) {
fo << ",gain=" << loss_chg << ",cover=" << sum_hess; fo << ",gain=" << loss_chg << ",cover=" << sum_hess;
} else { } else {

View File

@ -1,10 +1,13 @@
#ifndef XGBOOST_TREE_PARAM_H_
#define XGBOOST_TREE_PARAM_H_
/*! /*!
* Copyright 2014 by Contributors
* \file param.h * \file param.h
* \brief training parameters, statistics used to support tree construction * \brief training parameters, statistics used to support tree construction
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_PARAM_H_
#define XGBOOST_TREE_PARAM_H_
#include <vector>
#include <cstring> #include <cstring>
#include "../data.h" #include "../data.h"
@ -244,7 +247,7 @@ struct GradStats {
this->Add(b.sum_grad, b.sum_hess); this->Add(b.sum_grad, b.sum_hess);
} }
/*! \brief same as add, reduce is used in All Reduce */ /*! \brief same as add, reduce is used in All Reduce */
inline static void Reduce(GradStats &a, const GradStats &b) { inline static void Reduce(GradStats &a, const GradStats &b) { // NOLINT(*)
a.Add(b); a.Add(b);
} }
/*! \brief set current value to a - b */ /*! \brief set current value to a - b */
@ -257,7 +260,7 @@ struct GradStats {
return sum_hess == 0.0; return sum_hess == 0.0;
} }
/*! \brief set leaf vector value based on statistics */ /*! \brief set leaf vector value based on statistics */
inline void SetLeafVec(const TrainParam &param, bst_float *vec) const{ inline void SetLeafVec(const TrainParam &param, bst_float *vec) const {
} }
// constructor to allow inheritance // constructor to allow inheritance
GradStats(void) {} GradStats(void) {}
@ -324,7 +327,7 @@ struct CVGradStats : public GradStats {
} }
} }
/*! \brief same as add, reduce is used in All Reduce */ /*! \brief same as add, reduce is used in All Reduce */
inline static void Reduce(CVGradStats &a, const CVGradStats &b) { inline static void Reduce(CVGradStats &a, const CVGradStats &b) { // NOLINT(*)
a.Add(b); a.Add(b);
} }
/*! \brief set current value to a - b */ /*! \brief set current value to a - b */
@ -407,7 +410,7 @@ struct SplitEntry{
} }
} }
/*! \brief same as update, used by AllReduce*/ /*! \brief same as update, used by AllReduce*/
inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { // NOLINT(*)
dst.Update(src); dst.Update(src);
} }
/*!\return feature index to split on */ /*!\return feature index to split on */

View File

@ -1,3 +1,4 @@
// Copyright 2014 by Contributors
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX #define NOMINMAX

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_H_
#define XGBOOST_TREE_UPDATER_H_
/*! /*!
* Copyright 2014 by Contributors
* \file updater.h * \file updater.h
* \brief interface to update the tree * \brief interface to update the tree
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_UPDATER_H_
#define XGBOOST_TREE_UPDATER_H_
#include <vector> #include <vector>
#include "../data.h" #include "../data.h"

View File

@ -1,12 +1,14 @@
#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file updater_basemaker-inl.hpp * \file updater_basemaker-inl.hpp
* \brief implement a common tree constructor * \brief implement a common tree constructor
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <string>
#include <limits> #include <limits>
#include "../sync/sync.h" #include "../sync/sync.h"
#include "../utils/random.h" #include "../utils/random.h"
@ -60,8 +62,11 @@ class BaseMaker: public IUpdater {
bst_float a = fminmax[fid * 2]; bst_float a = fminmax[fid * 2];
bst_float b = fminmax[fid * 2 + 1]; bst_float b = fminmax[fid * 2 + 1];
if (a == -std::numeric_limits<bst_float>::max()) return 0; if (a == -std::numeric_limits<bst_float>::max()) return 0;
if (-a == b) return 1; if (-a == b) {
else return 2; return 1;
} else {
return 2;
}
} }
inline bst_float MaxValue(bst_uint fid) const { inline bst_float MaxValue(bst_uint fid) const {
return fminmax[fid *2 + 1]; return fminmax[fid *2 + 1];
@ -70,7 +75,7 @@ class BaseMaker: public IUpdater {
std::vector<bst_uint> &findex = *p_findex; std::vector<bst_uint> &findex = *p_findex;
findex.clear(); findex.clear();
for (size_t i = 0; i < fminmax.size(); i += 2) { for (size_t i = 0; i < fminmax.size(); i += 2) {
const bst_uint fid = static_cast<bst_uint>(i / 2); const bst_uint fid = static_cast<bst_uint>(i / 2);
if (this->Type(fid) != 0) findex.push_back(fid); if (this->Type(fid) != 0) findex.push_back(fid);
} }
unsigned n = static_cast<unsigned>(p * findex.size()); unsigned n = static_cast<unsigned>(p * findex.size());
@ -116,7 +121,7 @@ class BaseMaker: public IUpdater {
} }
return nthread; return nthread;
} }
// ------class member helpers--------- // ------class member helpers---------
/*! \brief initialize temp data structure */ /*! \brief initialize temp data structure */
inline void InitData(const std::vector<bst_gpair> &gpair, inline void InitData(const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat, const IFMatrix &fmat,
@ -124,7 +129,8 @@ class BaseMaker: public IUpdater {
const RegTree &tree) { const RegTree &tree) {
utils::Assert(tree.param.num_nodes == tree.param.num_roots, utils::Assert(tree.param.num_nodes == tree.param.num_roots,
"TreeMaker: can only grow new tree"); "TreeMaker: can only grow new tree");
{// setup position {
// setup position
position.resize(gpair.size()); position.resize(gpair.size());
if (root_index.size() == 0) { if (root_index.size() == 0) {
std::fill(position.begin(), position.end(), 0); std::fill(position.begin(), position.end(), 0);
@ -147,7 +153,8 @@ class BaseMaker: public IUpdater {
} }
} }
} }
{// expand query {
// expand query
qexpand.reserve(256); qexpand.clear(); qexpand.reserve(256); qexpand.clear();
for (int i = 0; i < tree.param.num_roots; ++i) { for (int i = 0; i < tree.param.num_roots; ++i) {
qexpand.push_back(i); qexpand.push_back(i);
@ -170,7 +177,7 @@ class BaseMaker: public IUpdater {
this->UpdateNode2WorkIndex(tree); this->UpdateNode2WorkIndex(tree);
} }
// return decoded position // return decoded position
inline int DecodePosition(bst_uint ridx) const{ inline int DecodePosition(bst_uint ridx) const {
const int pid = position[ridx]; const int pid = position[ridx];
return pid < 0 ? ~pid : pid; return pid < 0 ? ~pid : pid;
} }
@ -189,7 +196,8 @@ class BaseMaker: public IUpdater {
* \param p_fmat feature matrix needed for tree construction * \param p_fmat feature matrix needed for tree construction
* \param tree the regression tree structure * \param tree the regression tree structure
*/ */
inline void ResetPositionCol(const std::vector<int> &nodes, IFMatrix *p_fmat, const RegTree &tree) { inline void ResetPositionCol(const std::vector<int> &nodes,
IFMatrix *p_fmat, const RegTree &tree) {
// set the positions in the nondefault // set the positions in the nondefault
this->SetNonDefaultPositionCol(nodes, p_fmat, tree); this->SetNonDefaultPositionCol(nodes, p_fmat, tree);
// set rest of instances to default position // set rest of instances to default position
@ -252,7 +260,7 @@ class BaseMaker: public IUpdater {
const int nid = this->DecodePosition(ridx); const int nid = this->DecodePosition(ridx);
// go back to parent, correct those who are not default // go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if(fvalue < tree[nid].split_cond()) { if (fvalue < tree[nid].split_cond()) {
this->SetEncodePosition(ridx, tree[nid].cleft()); this->SetEncodePosition(ridx, tree[nid].cleft());
} else { } else {
this->SetEncodePosition(ridx, tree[nid].cright()); this->SetEncodePosition(ridx, tree[nid].cright());
@ -337,15 +345,16 @@ class BaseMaker: public IUpdater {
return; return;
} }
if (last_fvalue != fvalue) { if (last_fvalue != fvalue) {
double rmax = rmin + wmin; double rmax = rmin + wmin;
if (rmax >= next_goal && sketch->temp.size != max_size) { if (rmax >= next_goal && sketch->temp.size != max_size) {
if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) { if (sketch->temp.size == 0 ||
last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
// push to sketch // push to sketch
sketch->temp.data[sketch->temp.size] = sketch->temp.data[sketch->temp.size] =
utils::WXQuantileSketch<bst_float, bst_float>:: utils::WXQuantileSketch<bst_float, bst_float>::
Entry(static_cast<bst_float>(rmin), Entry(static_cast<bst_float>(rmin),
static_cast<bst_float>(rmax), static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue); static_cast<bst_float>(wmin), last_fvalue);
utils::Assert(sketch->temp.size < max_size, utils::Assert(sketch->temp.size < max_size,
"invalid maximum size max_size=%u, stemp.size=%lu\n", "invalid maximum size max_size=%u, stemp.size=%lu\n",
max_size, sketch->temp.size); max_size, sketch->temp.size);
@ -353,15 +362,15 @@ class BaseMaker: public IUpdater {
} }
if (sketch->temp.size == max_size) { if (sketch->temp.size == max_size) {
next_goal = sum_total * 2.0f + 1e-5f; next_goal = sum_total * 2.0f + 1e-5f;
} else{ } else {
next_goal = static_cast<bst_float>(sketch->temp.size * sum_total / max_size); next_goal = static_cast<bst_float>(sketch->temp.size * sum_total / max_size);
} }
} else { } else {
if (rmax >= next_goal) { if (rmax >= next_goal) {
rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n", rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n",
rmax, sum_total, next_goal, sketch->temp.size); rmax, sum_total, next_goal, sketch->temp.size);
} }
} }
rmin = rmax; rmin = rmax;
wmin = w; wmin = w;
last_fvalue = fvalue; last_fvalue = fvalue;
@ -375,13 +384,13 @@ class BaseMaker: public IUpdater {
if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) { if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
utils::Assert(sketch->temp.size <= max_size, utils::Assert(sketch->temp.size <= max_size,
"Finalize: invalid maximum size, max_size=%u, stemp.size=%lu", "Finalize: invalid maximum size, max_size=%u, stemp.size=%lu",
sketch->temp.size, max_size ); sketch->temp.size, max_size);
// push to sketch // push to sketch
sketch->temp.data[sketch->temp.size] = sketch->temp.data[sketch->temp.size] =
utils::WXQuantileSketch<bst_float, bst_float>:: utils::WXQuantileSketch<bst_float, bst_float>::
Entry(static_cast<bst_float>(rmin), Entry(static_cast<bst_float>(rmin),
static_cast<bst_float>(rmax), static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue); static_cast<bst_float>(wmin), last_fvalue);
++sketch->temp.size; ++sketch->temp.size;
} }
sketch->PushTemp(); sketch->PushTemp();
@ -415,4 +424,4 @@ class BaseMaker: public IUpdater {
}; };
} // namespace tree } // namespace tree
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ #endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file updater_colmaker-inl.hpp * \file updater_colmaker-inl.hpp
* \brief use columnwise update to construct a tree * \brief use columnwise update to construct a tree
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
#include <vector> #include <vector>
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
@ -114,10 +116,13 @@ class ColMaker: public IUpdater {
// initialize temp data structure // initialize temp data structure
inline void InitData(const std::vector<bst_gpair> &gpair, inline void InitData(const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat, const IFMatrix &fmat,
const std::vector<unsigned> &root_index, const RegTree &tree) { const std::vector<unsigned> &root_index,
utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree"); const RegTree &tree) {
utils::Assert(tree.param.num_nodes == tree.param.num_roots,
"ColMaker: can only grow new tree");
const std::vector<bst_uint> &rowset = fmat.buffered_rowset(); const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
{// setup position {
// setup position
position.resize(gpair.size()); position.resize(gpair.size());
if (root_index.size() == 0) { if (root_index.size() == 0) {
for (size_t i = 0; i < rowset.size(); ++i) { for (size_t i = 0; i < rowset.size(); ++i) {
@ -127,7 +132,8 @@ class ColMaker: public IUpdater {
for (size_t i = 0; i < rowset.size(); ++i) { for (size_t i = 0; i < rowset.size(); ++i) {
const bst_uint ridx = rowset[i]; const bst_uint ridx = rowset[i];
position[ridx] = root_index[ridx]; position[ridx] = root_index[ridx];
utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots, "root index exceed setting"); utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots,
"root index exceed setting");
} }
} }
// mark delete for the deleted datas // mark delete for the deleted datas
@ -154,11 +160,12 @@ class ColMaker: public IUpdater {
} }
unsigned n = static_cast<unsigned>(param.colsample_bytree * feat_index.size()); unsigned n = static_cast<unsigned>(param.colsample_bytree * feat_index.size());
random::Shuffle(feat_index); random::Shuffle(feat_index);
//utils::Check(n > 0, "colsample_bytree is too small that no feature can be included"); utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included",
utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included", param.colsample_bytree); param.colsample_bytree);
feat_index.resize(n); feat_index.resize(n);
} }
{// setup temp space for each thread {
// setup temp space for each thread
#pragma omp parallel #pragma omp parallel
{ {
this->nthread = omp_get_num_threads(); this->nthread = omp_get_num_threads();
@ -171,20 +178,25 @@ class ColMaker: public IUpdater {
} }
snode.reserve(256); snode.reserve(256);
} }
{// expand query {
// expand query
qexpand_.reserve(256); qexpand_.clear(); qexpand_.reserve(256); qexpand_.clear();
for (int i = 0; i < tree.param.num_roots; ++i) { for (int i = 0; i < tree.param.num_roots; ++i) {
qexpand_.push_back(i); qexpand_.push_back(i);
} }
} }
} }
/*! \brief initialize the base_weight, root_gain, and NodeEntry for all the new nodes in qexpand */ /*!
* \brief initialize the base_weight, root_gain,
* and NodeEntry for all the new nodes in qexpand
*/
inline void InitNewNode(const std::vector<int> &qexpand, inline void InitNewNode(const std::vector<int> &qexpand,
const std::vector<bst_gpair> &gpair, const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat, const IFMatrix &fmat,
const BoosterInfo &info, const BoosterInfo &info,
const RegTree &tree) { const RegTree &tree) {
{// setup statistics space for each tree node {
// setup statistics space for each tree node
for (size_t i = 0; i < stemp.size(); ++i) { for (size_t i = 0; i < stemp.size(); ++i) {
stemp[i].resize(tree.param.num_nodes, ThreadEntry(param)); stemp[i].resize(tree.param.num_nodes, ThreadEntry(param));
} }
@ -280,7 +292,7 @@ class ColMaker: public IUpdater {
ThreadEntry &e = stemp[tid][nid]; ThreadEntry &e = stemp[tid][nid];
float fsplit; float fsplit;
if (tid != 0) { if (tid != 0) {
if(std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) { if (std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) {
fsplit = (stemp[tid - 1][nid].last_fvalue - e.first_fvalue) * 0.5f; fsplit = (stemp[tid - 1][nid].last_fvalue - e.first_fvalue) * 0.5f;
} else { } else {
continue; continue;
@ -290,16 +302,20 @@ class ColMaker: public IUpdater {
} }
if (need_forward && tid != 0) { if (need_forward && tid != 0) {
c.SetSubstract(snode[nid].stats, e.stats); c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param.min_child_weight &&
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, fsplit, false); e.best.Update(loss_chg, fid, fsplit, false);
} }
} }
if (need_backward) { if (need_backward) {
tmp.SetSubstract(sum, e.stats); tmp.SetSubstract(sum, e.stats);
c.SetSubstract(snode[nid].stats, tmp); c.SetSubstract(snode[nid].stats, tmp);
if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param.min_child_weight &&
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, fsplit, true); e.best.Update(loss_chg, fid, fsplit, true);
} }
} }
@ -308,8 +324,10 @@ class ColMaker: public IUpdater {
tmp = sum; tmp = sum;
ThreadEntry &e = stemp[nthread-1][nid]; ThreadEntry &e = stemp[nthread-1][nid];
c.SetSubstract(snode[nid].stats, tmp); c.SetSubstract(snode[nid].stats, tmp);
if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param.min_child_weight &&
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true); e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true);
} }
} }
@ -335,19 +353,25 @@ class ColMaker: public IUpdater {
e.first_fvalue = fvalue; e.first_fvalue = fvalue;
} else { } else {
// forward default right // forward default right
if (std::abs(fvalue - e.first_fvalue) > rt_2eps){ if (std::abs(fvalue - e.first_fvalue) > rt_2eps) {
if (need_forward) { if (need_forward) {
c.SetSubstract(snode[nid].stats, e.stats); c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param.min_child_weight &&
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) -
snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false); e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false);
} }
} }
if (need_backward) { if (need_backward) {
cright.SetSubstract(e.stats_extra, e.stats); cright.SetSubstract(e.stats_extra, e.stats);
c.SetSubstract(snode[nid].stats, cright); c.SetSubstract(snode[nid].stats, cright);
if (c.sum_hess >= param.min_child_weight && cright.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param.min_child_weight &&
bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); cright.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) +
c.CalcGain(param) -
snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true); e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true);
} }
} }
@ -361,7 +385,7 @@ class ColMaker: public IUpdater {
// update enumeration solution // update enumeration solution
inline void UpdateEnumeration(int nid, bst_gpair gstats, inline void UpdateEnumeration(int nid, bst_gpair gstats,
float fvalue, int d_step, bst_uint fid, float fvalue, int d_step, bst_uint fid,
TStats &c, std::vector<ThreadEntry> &temp) { TStats &c, std::vector<ThreadEntry> &temp) { // NOLINT(*)
// get the statistics of nid // get the statistics of nid
ThreadEntry &e = temp[nid]; ThreadEntry &e = temp[nid];
// test if first hit, this is fine, because we set 0 during init // test if first hit, this is fine, because we set 0 during init
@ -370,10 +394,12 @@ class ColMaker: public IUpdater {
e.last_fvalue = fvalue; e.last_fvalue = fvalue;
} else { } else {
// try to find a split // try to find a split
if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) { if (std::abs(fvalue - e.last_fvalue) > rt_2eps &&
e.stats.sum_hess >= param.min_child_weight) {
c.SetSubstract(snode[nid].stats, e.stats); c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1); e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1);
} }
} }
@ -388,7 +414,7 @@ class ColMaker: public IUpdater {
int d_step, int d_step,
bst_uint fid, bst_uint fid,
const std::vector<bst_gpair> &gpair, const std::vector<bst_gpair> &gpair,
std::vector<ThreadEntry> &temp) { std::vector<ThreadEntry> &temp) { // NOLINT(*)
const std::vector<int> &qexpand = qexpand_; const std::vector<int> &qexpand = qexpand_;
// clear all the temp statistics // clear all the temp statistics
for (size_t j = 0; j < qexpand.size(); ++j) { for (size_t j = 0; j < qexpand.size(); ++j) {
@ -443,7 +469,8 @@ class ColMaker: public IUpdater {
ThreadEntry &e = temp[nid]; ThreadEntry &e = temp[nid];
c.SetSubstract(snode[nid].stats, e.stats); c.SetSubstract(snode[nid].stats, e.stats);
if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
const float gap = std::abs(e.last_fvalue) + rt_eps; const float gap = std::abs(e.last_fvalue) + rt_eps;
const float delta = d_step == +1 ? gap: -gap; const float delta = d_step == +1 ? gap: -gap;
e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1); e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
@ -458,7 +485,7 @@ class ColMaker: public IUpdater {
bst_uint fid, bst_uint fid,
const std::vector<bst_gpair> &gpair, const std::vector<bst_gpair> &gpair,
const BoosterInfo &info, const BoosterInfo &info,
std::vector<ThreadEntry> &temp) { std::vector<ThreadEntry> &temp) { // NOLINT(*)
// use cacheline aware optimization // use cacheline aware optimization
if (TStats::kSimpleStats != 0 && param.cache_opt != 0) { if (TStats::kSimpleStats != 0 && param.cache_opt != 0) {
EnumerateSplitCacheOpt(begin, end, d_step, fid, gpair, temp); EnumerateSplitCacheOpt(begin, end, d_step, fid, gpair, temp);
@ -471,7 +498,7 @@ class ColMaker: public IUpdater {
} }
// left statistics // left statistics
TStats c(param); TStats c(param);
for(const ColBatch::Entry *it = begin; it != end; it += d_step) { for (const ColBatch::Entry *it = begin; it != end; it += d_step) {
const bst_uint ridx = it->index; const bst_uint ridx = it->index;
const int nid = position[ridx]; const int nid = position[ridx];
if (nid < 0) continue; if (nid < 0) continue;
@ -485,10 +512,12 @@ class ColMaker: public IUpdater {
e.last_fvalue = fvalue; e.last_fvalue = fvalue;
} else { } else {
// try to find a split // try to find a split
if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) { if (std::abs(fvalue - e.last_fvalue) > rt_2eps &&
e.stats.sum_hess >= param.min_child_weight) {
c.SetSubstract(snode[nid].stats, e.stats); c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1); e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1);
} }
} }
@ -503,7 +532,8 @@ class ColMaker: public IUpdater {
ThreadEntry &e = temp[nid]; ThreadEntry &e = temp[nid];
c.SetSubstract(snode[nid].stats, e.stats); c.SetSubstract(snode[nid].stats, e.stats);
if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
const float gap = std::abs(e.last_fvalue) + rt_eps; const float gap = std::abs(e.last_fvalue) + rt_eps;
const float delta = d_step == +1 ? gap: -gap; const float delta = d_step == +1 ? gap: -gap;
e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1); e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
@ -585,7 +615,8 @@ class ColMaker: public IUpdater {
} }
} }
// reset position of each data points after split is created in the tree // reset position of each data points after split is created in the tree
inline void ResetPosition(const std::vector<int> &qexpand, IFMatrix *p_fmat, const RegTree &tree) { inline void ResetPosition(const std::vector<int> &qexpand,
IFMatrix *p_fmat, const RegTree &tree) {
// set the positions in the nondefault // set the positions in the nondefault
this->SetNonDefaultPosition(qexpand, p_fmat, tree); this->SetNonDefaultPosition(qexpand, p_fmat, tree);
// set rest of instances to default position // set rest of instances to default position
@ -655,7 +686,7 @@ class ColMaker: public IUpdater {
const float fvalue = col[j].fvalue; const float fvalue = col[j].fvalue;
// go back to parent, correct those who are not default // go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if(fvalue < tree[nid].split_cond()) { if (fvalue < tree[nid].split_cond()) {
this->SetEncodePosition(ridx, tree[nid].cleft()); this->SetEncodePosition(ridx, tree[nid].cleft());
} else { } else {
this->SetEncodePosition(ridx, tree[nid].cright()); this->SetEncodePosition(ridx, tree[nid].cright());
@ -667,7 +698,7 @@ class ColMaker: public IUpdater {
} }
// utils to get/set position, with encoded format // utils to get/set position, with encoded format
// return decoded position // return decoded position
inline int DecodePosition(bst_uint ridx) const{ inline int DecodePosition(bst_uint ridx) const {
const int pid = position[ridx]; const int pid = position[ridx];
return pid < 0 ? ~pid : pid; return pid < 0 ? ~pid : pid;
} }
@ -679,7 +710,7 @@ class ColMaker: public IUpdater {
position[ridx] = nid; position[ridx] = nid;
} }
} }
//--data fields-- // --data fields--
const TrainParam &param; const TrainParam &param;
// number of omp thread used during training // number of omp thread used during training
int nthread; int nthread;

View File

@ -1,11 +1,15 @@
#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file updater_distcol-inl.hpp * \file updater_distcol-inl.hpp
* \brief beta distributed version that takes a sub-column * \brief beta distributed version that takes a sub-column
* and construct a tree * and construct a tree
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
#include <vector>
#include <algorithm>
#include "../sync/sync.h" #include "../sync/sync.h"
#include "../utils/bitmap.h" #include "../utils/bitmap.h"
#include "../utils/io.h" #include "../utils/io.h"
@ -40,10 +44,11 @@ class DistColMaker : public ColMaker<TStats> {
virtual const int* GetLeafPosition(void) const { virtual const int* GetLeafPosition(void) const {
return builder.GetLeafPosition(); return builder.GetLeafPosition();
} }
private: private:
struct Builder : public ColMaker<TStats>::Builder { struct Builder : public ColMaker<TStats>::Builder {
public: public:
Builder(const TrainParam &param) explicit Builder(const TrainParam &param)
: ColMaker<TStats>::Builder(param) { : ColMaker<TStats>::Builder(param) {
} }
inline void UpdatePosition(IFMatrix *p_fmat, const RegTree &tree) { inline void UpdatePosition(IFMatrix *p_fmat, const RegTree &tree) {
@ -63,6 +68,7 @@ class DistColMaker : public ColMaker<TStats> {
virtual const int* GetLeafPosition(void) const { virtual const int* GetLeafPosition(void) const {
return BeginPtr(this->position); return BeginPtr(this->position);
} }
protected: protected:
virtual void SetNonDefaultPosition(const std::vector<int> &qexpand, virtual void SetNonDefaultPosition(const std::vector<int> &qexpand,
IFMatrix *p_fmat, const RegTree &tree) { IFMatrix *p_fmat, const RegTree &tree) {
@ -142,7 +148,7 @@ class DistColMaker : public ColMaker<TStats> {
} }
vec.push_back(this->snode[nid].best); vec.push_back(this->snode[nid].best);
} }
// TODO, lazy version // TODO(tqchen) lazy version
// communicate best solution // communicate best solution
reducer.Allreduce(BeginPtr(vec), vec.size()); reducer.Allreduce(BeginPtr(vec), vec.size());
// assign solution back // assign solution back
@ -166,4 +172,4 @@ class DistColMaker : public ColMaker<TStats> {
}; };
} // namespace tree } // namespace tree
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file updater_histmaker-inl.hpp * \file updater_histmaker-inl.hpp
* \brief use histogram counting to construct a tree * \brief use histogram counting to construct a tree
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include "../sync/sync.h" #include "../sync/sync.h"
@ -171,6 +173,7 @@ class HistMaker: public BaseMaker {
const BoosterInfo &info, const BoosterInfo &info,
const std::vector <bst_uint> &fset, const std::vector <bst_uint> &fset,
const RegTree &tree) = 0; const RegTree &tree) = 0;
private: private:
inline void EnumerateSplit(const HistUnit &hist, inline void EnumerateSplit(const HistUnit &hist,
const TStats &node_sum, const TStats &node_sum,
@ -187,7 +190,7 @@ class HistMaker: public BaseMaker {
c.SetSubstract(node_sum, s); c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
if (best->Update((float)loss_chg, fid, hist.cut[i], false)) { if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i], false)) {
*left_sum = s; *left_sum = s;
} }
} }
@ -200,7 +203,7 @@ class HistMaker: public BaseMaker {
c.SetSubstract(node_sum, s); c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
if (best->Update((float)loss_chg, fid, hist.cut[i-1], true)) { if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i-1], true)) {
*left_sum = c; *left_sum = c;
} }
} }
@ -219,19 +222,19 @@ class HistMaker: public BaseMaker {
std::vector<TStats> left_sum(qexpand.size()); std::vector<TStats> left_sum(qexpand.size());
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size()); bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid]; const int nid = qexpand[wid];
utils::Assert(node2workindex[nid] == static_cast<int>(wid), utils::Assert(node2workindex[nid] == static_cast<int>(wid),
"node2workindex inconsistent"); "node2workindex inconsistent");
SplitEntry &best = sol[wid]; SplitEntry &best = sol[wid];
TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
for (size_t i = 0; i < fset.size(); ++ i) { for (size_t i = 0; i < fset.size(); ++i) {
EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)], EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)],
node_sum, fset[i], &best, &left_sum[wid]); node_sum, fset[i], &best, &left_sum[wid]);
} }
} }
// get the best result, we can synchronize the solution // get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid]; const int nid = qexpand[wid];
const SplitEntry &best = sol[wid]; const SplitEntry &best = sol[wid];
const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
@ -394,7 +397,8 @@ class CQHistMaker: public HistMaker<TStats> {
#if __cplusplus >= 201103L #if __cplusplus >= 201103L
auto lazy_get_summary = [&]() auto lazy_get_summary = [&]()
#endif #endif
{// get smmary {
// get smmary
thread_sketch.resize(this->get_nthread()); thread_sketch.resize(this->get_nthread());
// number of rows in // number of rows in
const size_t nrows = p_fmat->buffered_rowset().size(); const size_t nrows = p_fmat->buffered_rowset().size();
@ -670,7 +674,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
} }
if (this->node2workindex[nid] < 0) { if (this->node2workindex[nid] < 0) {
this->position[ridx] = ~nid; this->position[ridx] = ~nid;
} else{ } else {
for (bst_uint j = 0; j < inst.length; ++j) { for (bst_uint j = 0; j < inst.length; ++j) {
builder.AddBudget(inst[j].index, omp_get_thread_num()); builder.AddBudget(inst[j].index, omp_get_thread_num());
} }

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file updater_prune-inl.hpp * \file updater_prune-inl.hpp
* \brief prune a tree given the statistics * \brief prune a tree given the statistics
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
#include <vector> #include <vector>
#include "./param.h" #include "./param.h"
#include "./updater.h" #include "./updater.h"
@ -37,9 +39,10 @@ class TreePruner: public IUpdater {
param.learning_rate = lr; param.learning_rate = lr;
syncher.Update(gpair, p_fmat, info, trees); syncher.Update(gpair, p_fmat, info, trees);
} }
private: private:
// try to prune off current leaf // try to prune off current leaf
inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
if (tree[nid].is_root()) return npruned; if (tree[nid].is_root()) return npruned;
int pid = tree[nid].parent(); int pid = tree[nid].parent();
RegTree::NodeStat &s = tree.stat(pid); RegTree::NodeStat &s = tree.stat(pid);
@ -54,7 +57,7 @@ class TreePruner: public IUpdater {
} }
} }
/*! \brief do prunning of a tree */ /*! \brief do prunning of a tree */
inline void DoPrune(RegTree &tree) { inline void DoPrune(RegTree &tree) { // NOLINT(*)
int npruned = 0; int npruned = 0;
// initialize auxiliary statistics // initialize auxiliary statistics
for (int nid = 0; nid < tree.param.num_nodes; ++nid) { for (int nid = 0; nid < tree.param.num_nodes; ++nid) {

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file updater_refresh-inl.hpp * \file updater_refresh-inl.hpp
* \brief refresh the statistics and leaf value on the tree on the dataset * \brief refresh the statistics and leaf value on the tree on the dataset
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
#include <vector> #include <vector>
#include <limits> #include <limits>
#include "../sync/sync.h" #include "../sync/sync.h"

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file updater_skmaker-inl.hpp * \file updater_skmaker-inl.hpp
* \brief use approximation sketch to construct a tree, * \brief use approximation sketch to construct a tree,
a refresh is needed to make the statistics exactly correct a refresh is needed to make the statistics exactly correct
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include "../sync/sync.h" #include "../sync/sync.h"
@ -81,7 +83,7 @@ class SketchMaker: public BaseMaker {
double neg_grad; double neg_grad;
/*! \brief sum of hessian statistics */ /*! \brief sum of hessian statistics */
double sum_hess; double sum_hess;
explicit SKStats(void) {} SKStats(void) {}
// constructor // constructor
explicit SKStats(const TrainParam &param) { explicit SKStats(const TrainParam &param) {
this->Clear(); this->Clear();
@ -123,7 +125,7 @@ class SketchMaker: public BaseMaker {
sum_hess += b.sum_hess; sum_hess += b.sum_hess;
} }
/*! \brief same as add, reduce is used in All Reduce */ /*! \brief same as add, reduce is used in All Reduce */
inline static void Reduce(SKStats &a, const SKStats &b) { inline static void Reduce(SKStats &a, const SKStats &b) { // NOLINT(*)
a.Add(b); a.Add(b);
} }
/*! \brief set leaf vector value based on statistics */ /*! \brief set leaf vector value based on statistics */
@ -217,7 +219,9 @@ class SketchMaker: public BaseMaker {
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (size_t i = 0; i < this->qexpand.size(); ++i) {
const int nid = this->qexpand[i]; const int nid = this->qexpand[i];
for (int k = 0; k < 3; ++k) { for (int k = 0; k < 3; ++k) {
sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, static_cast<bst_float>(sbuilder[3 * nid + k].sum_total)); sbuilder[3 * nid + k].sketch->Push(c[0].fvalue,
static_cast<bst_float>(
sbuilder[3 * nid + k].sum_total));
} }
} }
return; return;
@ -272,12 +276,12 @@ class SketchMaker: public BaseMaker {
std::vector<SplitEntry> sol(qexpand.size()); std::vector<SplitEntry> sol(qexpand.size());
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size()); bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid]; const int nid = qexpand[wid];
utils::Assert(node2workindex[nid] == static_cast<int>(wid), utils::Assert(node2workindex[nid] == static_cast<int>(wid),
"node2workindex inconsistent"); "node2workindex inconsistent");
SplitEntry &best = sol[wid]; SplitEntry &best = sol[wid];
for (bst_uint fid = 0; fid < num_feature; ++ fid) { for (bst_uint fid = 0; fid < num_feature; ++fid) {
unsigned base = (wid * p_tree->param.num_feature + fid) * 3; unsigned base = (wid * p_tree->param.num_feature + fid) * 3;
EnumerateSplit(summary_array[base + 0], EnumerateSplit(summary_array[base + 0],
summary_array[base + 1], summary_array[base + 1],
@ -286,7 +290,7 @@ class SketchMaker: public BaseMaker {
} }
} }
// get the best result, we can synchronize the solution // get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid]; const int nid = qexpand[wid];
const SplitEntry &best = sol[wid]; const SplitEntry &best = sol[wid];
// set up the values // set up the values
@ -361,7 +365,8 @@ class SketchMaker: public BaseMaker {
best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], true); best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], true);
} }
} }
{// all including {
// all including
SKStats s = feat_sum, c; SKStats s = feat_sum, c;
c.SetSubstract(node_sum, s); c.SetSubstract(node_sum, s);
if (s.sum_hess >= param.min_child_weight && if (s.sum_hess >= param.min_child_weight &&
@ -389,6 +394,6 @@ class SketchMaker: public BaseMaker {
// per node, per feature sketch // per node, per feature sketch
std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs; std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
}; };
} // tree } // namespace tree
} // xgboost } // namespace xgboost
#endif #endif // XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_

View File

@ -1,11 +1,14 @@
#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file updater_sync-inl.hpp * \file updater_sync-inl.hpp
* \brief synchronize the tree in all distributed nodes * \brief synchronize the tree in all distributed nodes
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#include <vector> #include <vector>
#include <string>
#include <limits> #include <limits>
#include "../sync/sync.h" #include "../sync/sync.h"
#include "./updater.h" #include "./updater.h"

View File

@ -1,13 +1,16 @@
#ifndef XGBOOST_UTILS_BASE64_INL_H_
#define XGBOOST_UTILS_BASE64_INL_H_
/*! /*!
* Copyright 2014 by Contributors
* \file base64.h * \file base64.h
* \brief data stream support to input and output from/to base64 stream * \brief data stream support to input and output from/to base64 stream
* base64 is easier to store and pass as text format in mapreduce * base64 is easier to store and pass as text format in mapreduce
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_BASE64_INL_H_
#define XGBOOST_UTILS_BASE64_INL_H_
#include <cctype> #include <cctype>
#include <cstdio> #include <cstdio>
#include <string>
#include "./io.h" #include "./io.h"
namespace xgboost { namespace xgboost {
@ -15,7 +18,7 @@ namespace utils {
/*! \brief buffer reader of the stream that allows you to get */ /*! \brief buffer reader of the stream that allows you to get */
class StreamBufferReader { class StreamBufferReader {
public: public:
StreamBufferReader(size_t buffer_size) explicit StreamBufferReader(size_t buffer_size)
:stream_(NULL), :stream_(NULL),
read_len_(1), read_ptr_(1) { read_len_(1), read_ptr_(1) {
buffer_.resize(buffer_size); buffer_.resize(buffer_size);
@ -75,7 +78,7 @@ const char DecodeTable[] = {
}; };
static const char EncodeTable[] = static const char EncodeTable[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
} // namespace base64 } // namespace base64
/*! \brief the stream that reads from base64, note we take from file pointers */ /*! \brief the stream that reads from base64, note we take from file pointers */
class Base64InStream: public IStream { class Base64InStream: public IStream {
public: public:
@ -132,19 +135,19 @@ class Base64InStream: public IStream {
{ {
// second byte // second byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
nvalue |= DecodeTable[tmp_ch] << 12; nvalue |= DecodeTable[tmp_ch] << 12;
*cptr++ = (nvalue >> 16) & 0xFF; --tlen; *cptr++ = (nvalue >> 16) & 0xFF; --tlen;
} }
{ {
// third byte // third byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
// handle termination // handle termination
if (tmp_ch == '=') { if (tmp_ch == '=') {
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format"); utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format");
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
break; break;
} }
nvalue |= DecodeTable[tmp_ch] << 6; nvalue |= DecodeTable[tmp_ch] << 6;
@ -157,10 +160,10 @@ class Base64InStream: public IStream {
{ {
// fourth byte // fourth byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
if (tmp_ch == '=') { if (tmp_ch == '=') {
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
break; break;
} }
nvalue |= DecodeTable[tmp_ch]; nvalue |= DecodeTable[tmp_ch];
@ -246,7 +249,7 @@ class Base64OutStream: public IStream {
int buf_top; int buf_top;
unsigned char buf[4]; unsigned char buf[4];
std::string out_buf; std::string out_buf;
const static size_t kBufferSize = 256; static const size_t kBufferSize = 256;
inline void PutChar(char ch) { inline void PutChar(char ch) {
out_buf += ch; out_buf += ch;
@ -260,5 +263,5 @@ class Base64OutStream: public IStream {
} }
}; };
} // namespace utils } // namespace utils
} // namespace rabit } // namespace xgboost
#endif // RABIT_LEARN_UTILS_BASE64_INL_H_ #endif // XGBOOST_UTILS_BASE64_INL_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_UTILS_BITMAP_H_
#define XGBOOST_UTILS_BITMAP_H_
/*! /*!
* Copyright 2014 by Contributors
* \file bitmap.h * \file bitmap.h
* \brief a simple implement of bitmap * \brief a simple implement of bitmap
* NOTE: bitmap is only threadsafe per word access, remember this when using bitmap * NOTE: bitmap is only threadsafe per word access, remember this when using bitmap
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_BITMAP_H_
#define XGBOOST_UTILS_BITMAP_H_
#include <vector> #include <vector>
#include "./utils.h" #include "./utils.h"
#include "./omp.h" #include "./omp.h"
@ -63,4 +65,4 @@ struct BitMap {
}; };
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_BITMAP_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_CONFIG_H_
#define XGBOOST_UTILS_CONFIG_H_
/*! /*!
* Copyright 2014 by Contributors
* \file config.h * \file config.h
* \brief helper class to load in configures from file * \brief helper class to load in configures from file
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_CONFIG_H_
#define XGBOOST_UTILS_CONFIG_H_
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <string> #include <string>

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_FMAP_H_
#define XGBOOST_UTILS_FMAP_H_
/*! /*!
* Copyright 2014 by Contributors
* \file fmap.h * \file fmap.h
* \brief helper class that holds the feature names and interpretations * \brief helper class that holds the feature names and interpretations
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_FMAP_H_
#define XGBOOST_UTILS_FMAP_H_
#include <vector> #include <vector>
#include <string> #include <string>
#include <cstring> #include <cstring>
@ -78,4 +80,4 @@ class FeatMap {
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_FMAP_H_ #endif // XGBOOST_UTILS_FMAP_H_

View File

@ -1,6 +1,5 @@
#ifndef XGBOOST_UTILS_GROUP_DATA_H_
#define XGBOOST_UTILS_GROUP_DATA_H_
/*! /*!
* Copyright 2014 by Contributors
* \file group_data.h * \file group_data.h
* \brief this file defines utils to group data by integer keys * \brief this file defines utils to group data by integer keys
* Input: given input sequence (key,value), (k1,v1), (k2,v2) * Input: given input sequence (key,value), (k1,v1), (k2,v2)
@ -12,6 +11,11 @@
* The major algorithm is a two pass linear scan algorithm that requires two pass scan over the data * The major algorithm is a two pass linear scan algorithm that requires two pass scan over the data
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_GROUP_DATA_H_
#define XGBOOST_UTILS_GROUP_DATA_H_
#include <vector>
namespace xgboost { namespace xgboost {
namespace utils { namespace utils {
/*! /*!
@ -107,5 +111,4 @@ struct ParallelGroupBuilder {
}; };
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_GROUP_DATA_H_

View File

@ -1,16 +1,19 @@
#ifndef XGBOOST_UTILS_IO_H /*!
#define XGBOOST_UTILS_IO_H * Copyright 2014 by Contributors
* \file io.h
* \brief general stream interface for serialization, I/O
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_IO_H_
#define XGBOOST_UTILS_IO_H_
#include <cstdio> #include <cstdio>
#include <vector> #include <vector>
#include <string> #include <string>
#include <cstring> #include <cstring>
#include "./utils.h" #include "./utils.h"
#include "../sync/sync.h" #include "../sync/sync.h"
/*!
* \file io.h
* \brief general stream interface for serialization, I/O
* \author Tianqi Chen
*/
namespace xgboost { namespace xgboost {
namespace utils { namespace utils {
// reuse the definitions of streams // reuse the definitions of streams
@ -23,7 +26,7 @@ typedef rabit::utils::MemoryBufferStream MemoryBufferStream;
class FileStream : public ISeekStream { class FileStream : public ISeekStream {
public: public:
explicit FileStream(std::FILE *fp) : fp(fp) {} explicit FileStream(std::FILE *fp) : fp(fp) {}
explicit FileStream(void) { FileStream(void) {
this->fp = NULL; this->fp = NULL;
} }
virtual size_t Read(void *ptr, size_t size) { virtual size_t Read(void *ptr, size_t size) {
@ -33,7 +36,7 @@ class FileStream : public ISeekStream {
std::fwrite(ptr, size, 1, fp); std::fwrite(ptr, size, 1, fp);
} }
virtual void Seek(size_t pos) { virtual void Seek(size_t pos) {
std::fseek(fp, static_cast<long>(pos), SEEK_SET); std::fseek(fp, static_cast<long>(pos), SEEK_SET); // NOLINT(*)
} }
virtual size_t Tell(void) { virtual size_t Tell(void) {
return std::ftell(fp); return std::ftell(fp);
@ -42,7 +45,7 @@ class FileStream : public ISeekStream {
return std::feof(fp) != 0; return std::feof(fp) != 0;
} }
inline void Close(void) { inline void Close(void) {
if (fp != NULL){ if (fp != NULL) {
std::fclose(fp); fp = NULL; std::fclose(fp); fp = NULL;
} }
} }
@ -52,6 +55,5 @@ class FileStream : public ISeekStream {
}; };
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#include "./base64-inl.h" #include "./base64-inl.h"
#endif #endif // XGBOOST_UTILS_IO_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_UTILS_ITERATOR_H
#define XGBOOST_UTILS_ITERATOR_H
#include <cstdio>
/*! /*!
* Copyright 2014 by Contributors
* \file iterator.h * \file iterator.h
* \brief itertator interface * \brief itertator interface
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_ITERATOR_H_
#define XGBOOST_UTILS_ITERATOR_H_
#include <cstdio>
namespace xgboost { namespace xgboost {
namespace utils { namespace utils {
/*! /*!
@ -36,5 +38,5 @@ class IIterator {
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_ITERATOR_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_MATH_H_
#define XGBOOST_UTILS_MATH_H_
/*! /*!
* Copyright 2014 by Contributors
* \file math.h * \file math.h
* \brief support additional math * \brief support additional math
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_MATH_H_
#define XGBOOST_UTILS_MATH_H_
#include <cmath> #include <cmath>
namespace xgboost { namespace xgboost {
@ -28,7 +30,8 @@ inline T LogGamma(T v) {
#if _MSC_VER >= 1800 #if _MSC_VER >= 1800
return lgamma(v); return lgamma(v);
#else #else
#pragma message ("Warning: lgamma function was not available until VS2013, poisson regression will be disabled") #pragma message("Warning: lgamma function was not available until VS2013"\
", poisson regression will be disabled")
utils::Error("lgamma function was not available until VS2013"); utils::Error("lgamma function was not available until VS2013");
return static_cast<T>(1.0); return static_cast<T>(1.0);
#endif #endif

View File

@ -1,16 +1,20 @@
#ifndef XGBOOST_UTILS_OMP_H_
#define XGBOOST_UTILS_OMP_H_
/*! /*!
* Copyright 2014 by Contributors
* \file omp.h * \file omp.h
* \brief header to handle OpenMP compatibility issues * \brief header to handle OpenMP compatibility issues
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_OMP_H_
#define XGBOOST_UTILS_OMP_H_
#if defined(_OPENMP) #if defined(_OPENMP)
#include <omp.h> #include <omp.h>
#else #else
#ifndef DISABLE_OPENMP #ifndef DISABLE_OPENMP
// use pragma message instead of warning // use pragma message instead of warning
#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading") #pragma message("Warning: OpenMP is not available,"\
"xgboost will be compiled into single-thread code."\
"Use OpenMP-enabled compiler to get benefit of multi-threading")
#endif #endif
inline int omp_get_thread_num() { return 0; } inline int omp_get_thread_num() { return 0; }
inline int omp_get_num_threads() { return 1; } inline int omp_get_num_threads() { return 1; }
@ -25,6 +29,6 @@ typedef int bst_omp_uint;
#else #else
typedef unsigned bst_omp_uint; typedef unsigned bst_omp_uint;
#endif #endif
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_UTILS_OMP_H_ #endif // XGBOOST_UTILS_OMP_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_QUANTILE_H_
#define XGBOOST_UTILS_QUANTILE_H_
/*! /*!
* Copyright 2014 by Contributors
* \file quantile.h * \file quantile.h
* \brief util to compute quantiles * \brief util to compute quantiles
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_QUANTILE_H_
#define XGBOOST_UTILS_QUANTILE_H_
#include <cmath> #include <cmath>
#include <vector> #include <vector>
#include <cstring> #include <cstring>
@ -124,7 +126,7 @@ struct WQSummary {
* \param qvalue the value we query for * \param qvalue the value we query for
* \param istart starting position * \param istart starting position
*/ */
inline Entry Query(DType qvalue, size_t &istart) const { inline Entry Query(DType qvalue, size_t &istart) const { // NOLINT(*)
while (istart < size && qvalue > data[istart].value) { while (istart < size && qvalue > data[istart].value) {
++istart; ++istart;
} }
@ -597,7 +599,7 @@ class QuantileSketchTemplate {
} }
/*! \brief save the data structure into stream */ /*! \brief save the data structure into stream */
template<typename TStream> template<typename TStream>
inline void Save(TStream &fo) const { inline void Save(TStream &fo) const { // NOLINT(*)
fo.Write(&(this->size), sizeof(this->size)); fo.Write(&(this->size), sizeof(this->size));
if (this->size != 0) { if (this->size != 0) {
fo.Write(this->data, this->size * sizeof(Entry)); fo.Write(this->data, this->size * sizeof(Entry));
@ -605,11 +607,12 @@ class QuantileSketchTemplate {
} }
/*! \brief load data structure from input stream */ /*! \brief load data structure from input stream */
template<typename TStream> template<typename TStream>
inline void Load(TStream &fi) { inline void Load(TStream &fi) { // NOLINT(*)
utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1"); utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1");
this->Reserve(this->size); this->Reserve(this->size);
if (this->size != 0) { if (this->size != 0) {
utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0, "invalid SummaryArray 2"); utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0,
"invalid SummaryArray 2");
} }
} }
}; };
@ -741,7 +744,7 @@ class QuantileSketchTemplate {
* \tparam DType type of data content * \tparam DType type of data content
* \tparam RType type of rank * \tparam RType type of rank
*/ */
template<typename DType, typename RType=unsigned> template<typename DType, typename RType = unsigned>
class WQuantileSketch : class WQuantileSketch :
public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> >{ public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> >{
}; };
@ -751,7 +754,7 @@ class WQuantileSketch :
* \tparam DType type of data content * \tparam DType type of data content
* \tparam RType type of rank * \tparam RType type of rank
*/ */
template<typename DType, typename RType=unsigned> template<typename DType, typename RType = unsigned>
class WXQuantileSketch : class WXQuantileSketch :
public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> >{ public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> >{
}; };
@ -760,11 +763,11 @@ class WXQuantileSketch :
* \tparam DType type of data content * \tparam DType type of data content
* \tparam RType type of rank * \tparam RType type of rank
*/ */
template<typename DType, typename RType=unsigned> template<typename DType, typename RType = unsigned>
class GKQuantileSketch : class GKQuantileSketch :
public QuantileSketchTemplate<DType, RType, GKSummary<DType, RType> >{ public QuantileSketchTemplate<DType, RType, GKSummary<DType, RType> >{
}; };
} // utils } // namespace utils
} // xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_QUANTILE_H_

View File

@ -1,12 +1,14 @@
#ifndef XGBOOST_UTILS_RANDOM_H_
#define XGBOOST_UTILS_RANDOM_H_
/*! /*!
* Copyright 2014 by Contributors
* \file xgboost_random.h * \file xgboost_random.h
* \brief PRNG to support random number generation * \brief PRNG to support random number generation
* \author Tianqi Chen: tianqi.tchen@gmail.com * \author Tianqi Chen: tianqi.tchen@gmail.com
* *
* Use standard PRNG from stdlib * Use standard PRNG from stdlib
*/ */
#ifndef XGBOOST_UTILS_RANDOM_H_
#define XGBOOST_UTILS_RANDOM_H_
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <vector> #include <vector>
@ -23,11 +25,11 @@ inline void Seed(unsigned seed) {
} }
/*! \brief basic function, uniform */ /*! \brief basic function, uniform */
inline double Uniform(void) { inline double Uniform(void) {
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0); return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0); // NOLINT(*)
} }
/*! \brief return a real numer uniform in (0,1) */ /*! \brief return a real numer uniform in (0,1) */
inline double NextDouble2(void) { inline double NextDouble2(void) {
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0); return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0); // NOLINT(*)
} }
/*! \brief return x~N(0,1) */ /*! \brief return x~N(0,1) */
inline double Normal(void) { inline double Normal(void) {
@ -73,7 +75,7 @@ inline void Shuffle(T *data, size_t sz) {
} }
// random shuffle the data inside, require PRNG // random shuffle the data inside, require PRNG
template<typename T> template<typename T>
inline void Shuffle(std::vector<T> &data) { inline void Shuffle(std::vector<T> &data) { // NOLINT(*)
Shuffle(&data[0], data.size()); Shuffle(&data[0], data.size());
} }
@ -81,17 +83,18 @@ inline void Shuffle(std::vector<T> &data) {
struct Random{ struct Random{
/*! \brief set random number seed */ /*! \brief set random number seed */
inline void Seed(unsigned sd) { inline void Seed(unsigned sd) {
this->rseed = sd; this->rseed = sd;
#if defined(_MSC_VER)||defined(_WIN32) #if defined(_MSC_VER) || defined(_WIN32)
::xgboost::random::Seed(sd); ::xgboost::random::Seed(sd);
#endif #endif
} }
/*! \brief return a real number uniform in [0,1) */ /*! \brief return a real number uniform in [0,1) */
inline double RandDouble(void) { inline double RandDouble(void) {
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe // use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general // For cygwin and mingw, this can slows down parallelism,
// todo, replace with another PRNG // but rand_r is only used in objective-inl.hpp, won't affect speed in general
#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_) // todo, replace with another PRNG
#if defined(_MSC_VER) || defined(_WIN32) || defined(XGBOOST_STRICT_CXX98_)
return Uniform(); return Uniform();
#else #else
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0); return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);

View File

@ -1,16 +1,17 @@
#ifndef XGBOOST_UTILS_THREAD_H
#define XGBOOST_UTILS_THREAD_H
/*! /*!
* Copyright by Contributors
* \file thread.h * \file thread.h
* \brief this header include the minimum necessary resource for multi-threading * \brief this header include the minimum necessary resource
* for multi-threading that can be compiled in windows, linux, mac
* \author Tianqi Chen * \author Tianqi Chen
* Acknowledgement: this file is adapted from SVDFeature project, by same author.
* The MAC support part of this code is provided by Artemy Kolchinsky
*/ */
#ifndef XGBOOST_UTILS_THREAD_H_ // NOLINT(*)
#define XGBOOST_UTILS_THREAD_H_ // NOLINT(*)
#ifdef _MSC_VER #ifdef _MSC_VER
#include "utils.h"
#include <windows.h> #include <windows.h>
#include <process.h> #include <process.h>
#include "../xgboost/utils.h"
namespace xgboost { namespace xgboost {
namespace utils { namespace utils {
/*! \brief simple semaphore used for synchronization */ /*! \brief simple semaphore used for synchronization */
@ -18,27 +19,78 @@ class Semaphore {
public : public :
inline void Init(int init_val) { inline void Init(int init_val) {
sem = CreateSemaphore(NULL, init_val, 10, NULL); sem = CreateSemaphore(NULL, init_val, 10, NULL);
utils::Assert(sem != NULL, "create Semaphore error"); utils::Check(sem != NULL, "create Semaphore error");
} }
inline void Destroy(void) { inline void Destroy(void) {
CloseHandle(sem); CloseHandle(sem);
} }
inline void Wait(void) { inline void Wait(void) {
utils::Assert(WaitForSingleObject(sem, INFINITE) == WAIT_OBJECT_0, "WaitForSingleObject error"); utils::Check(WaitForSingleObject(sem, INFINITE) == WAIT_OBJECT_0, "WaitForSingleObject error");
} }
inline void Post(void) { inline void Post(void) {
utils::Assert(ReleaseSemaphore(sem, 1, NULL) != 0, "ReleaseSemaphore error"); utils::Check(ReleaseSemaphore(sem, 1, NULL) != 0, "ReleaseSemaphore error");
} }
private: private:
HANDLE sem; HANDLE sem;
}; };
/*! \brief mutex under windows */
class Mutex {
public:
inline void Init(void) {
utils::Check(InitializeCriticalSectionAndSpinCount(&mutex, 0x00000400) != 0,
"Mutex::Init fail");
}
inline void Lock(void) {
EnterCriticalSection(&mutex);
}
inline void Unlock(void) {
LeaveCriticalSection(&mutex);
}
inline void Destroy(void) {
DeleteCriticalSection(&mutex);
}
private:
friend class ConditionVariable;
CRITICAL_SECTION mutex;
};
// conditional variable that uses pthread
class ConditionVariable {
public:
// initialize conditional variable
inline void Init(void) {
InitializeConditionVariable(&cond);
}
// destroy the thread
inline void Destroy(void) {
// DeleteConditionVariable(&cond);
}
// wait on the conditional variable
inline void Wait(Mutex *mutex) {
utils::Check(SleepConditionVariableCS(&cond, &(mutex->mutex), INFINITE) != 0,
"ConditionVariable:Wait fail");
}
inline void Broadcast(void) {
WakeAllConditionVariable(&cond);
}
inline void Signal(void) {
WakeConditionVariable(&cond);
}
private:
CONDITION_VARIABLE cond;
};
/*! \brief simple thread that wraps windows thread */ /*! \brief simple thread that wraps windows thread */
class Thread { class Thread {
private: private:
HANDLE thread_handle; HANDLE thread_handle;
unsigned thread_id; unsigned thread_id;
public: public:
inline void Start(unsigned int __stdcall entry(void*), void *param) { inline void Start(unsigned int __stdcall entry(void*p), void *param) {
thread_handle = (HANDLE)_beginthreadex(NULL, 0, entry, param, 0, &thread_id); thread_handle = (HANDLE)_beginthreadex(NULL, 0, entry, param, 0, &thread_id);
} }
inline int Join(void) { inline int Join(void) {
@ -55,38 +107,40 @@ inline void ThreadExit(void *status) {
} // namespace xgboost } // namespace xgboost
#else #else
// thread interface using g++ // thread interface using g++
extern "C" {
#include <semaphore.h> #include <semaphore.h>
#include <pthread.h> #include <pthread.h>
} #include <errno.h>
namespace xgboost { namespace xgboost {
namespace utils { namespace utils {
/*!\brief semaphore class */ /*!\brief semaphore class */
class Semaphore { class Semaphore {
#ifdef __APPLE__ #ifdef __APPLE__
private: private:
sem_t* semPtr; sem_t* semPtr;
char sema_name[20]; char sema_name[20];
private: private:
inline void GenRandomString(char *s, const int len) { inline void GenRandomString(char *s, const int len) {
static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" ; static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
s[i] = alphanum[rand() % (sizeof(alphanum) - 1)]; s[i] = alphanum[rand() % (sizeof(alphanum) - 1)];
} }
s[len] = 0; s[len] = 0;
} }
public: public:
inline void Init(int init_val) { inline void Init(int init_val) {
sema_name[0]='/'; sema_name[0] = '/';
sema_name[1]='s'; sema_name[1] = 's';
sema_name[2]='e'; sema_name[2] = 'e';
sema_name[3]='/'; sema_name[3] = '/';
GenRandomString(&sema_name[4], 16); GenRandomString(&sema_name[4], 16);
if((semPtr = sem_open(sema_name, O_CREAT, 0644, init_val)) == SEM_FAILED) { if ((semPtr = sem_open(sema_name, O_CREAT, 0644, init_val)) == SEM_FAILED) {
perror("sem_open"); perror("sem_open");
exit(1); exit(1);
} }
utils::Assert(semPtr != NULL, "create Semaphore error"); utils::Check(semPtr != NULL, "create Semaphore error");
} }
inline void Destroy(void) { inline void Destroy(void) {
if (sem_close(semPtr) == -1) { if (sem_close(semPtr) == -1) {
@ -105,51 +159,91 @@ class Semaphore {
sem_post(semPtr); sem_post(semPtr);
} }
#else #else
private: private:
sem_t sem; sem_t sem;
public: public:
inline void Init(int init_val) { inline void Init(int init_val) {
sem_init(&sem, 0, init_val); if (sem_init(&sem, 0, init_val) != 0) {
utils::Error("Semaphore.Init:%s", strerror(errno));
}
} }
inline void Destroy(void) { inline void Destroy(void) {
sem_destroy(&sem); if (sem_destroy(&sem) != 0) {
utils::Error("Semaphore.Destroy:%s", strerror(errno));
}
} }
inline void Wait(void) { inline void Wait(void) {
sem_wait(&sem); if (sem_wait(&sem) != 0) {
utils::Error("Semaphore.Wait:%s", strerror(errno));
}
} }
inline void Post(void) { inline void Post(void) {
sem_post(&sem); if (sem_post(&sem) != 0) {
utils::Error("Semaphore.Post:%s", strerror(errno));
}
} }
#endif #endif
}; };
// helper for c thread // mutex that works with pthread
// used to strictly call c++ function from pthread class Mutex {
struct ThreadContext { public:
void *(*entry)(void*); inline void Init(void) {
void *param; pthread_mutex_init(&mutex, NULL);
};
extern "C" {
inline void *RunThreadContext(void *ctx_) {
ThreadContext *ctx = reinterpret_cast<ThreadContext*>(ctx_);
void *ret = (*ctx->entry)(ctx->param);
delete ctx;
return ret;
} }
} inline void Lock(void) {
pthread_mutex_lock(&mutex);
}
inline void Unlock(void) {
pthread_mutex_unlock(&mutex);
}
inline void Destroy(void) {
pthread_mutex_destroy(&mutex);
}
private:
friend class ConditionVariable;
pthread_mutex_t mutex;
};
// conditional variable that uses pthread
class ConditionVariable {
public:
// initialize conditional variable
inline void Init(void) {
pthread_cond_init(&cond, NULL);
}
// destroy the thread
inline void Destroy(void) {
pthread_cond_destroy(&cond);
}
// wait on the conditional variable
inline void Wait(Mutex *mutex) {
pthread_cond_wait(&cond, &(mutex->mutex));
}
inline void Broadcast(void) {
pthread_cond_broadcast(&cond);
}
inline void Signal(void) {
pthread_cond_signal(&cond);
}
private:
pthread_cond_t cond;
};
/*!\brief simple thread class */ /*!\brief simple thread class */
class Thread { class Thread {
private: private:
pthread_t thread; pthread_t thread;
public : public :
inline void Start(void *entry(void*), void *param) { inline void Start(void * entry(void*), void *param) { // NOLINT(*)
pthread_attr_t attr; pthread_attr_t attr;
pthread_attr_init(&attr); pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
ThreadContext *ctx = new ThreadContext(); pthread_create(&thread, &attr, entry, param);
ctx->entry = entry; ctx->param = param;
pthread_create(&thread, &attr, RunThreadContext, ctx);
} }
inline int Join(void) { inline int Join(void) {
void *status; void *status;
@ -159,9 +253,8 @@ class Thread {
inline void ThreadExit(void *status) { inline void ThreadExit(void *status) {
pthread_exit(status); pthread_exit(status);
} }
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#define XGBOOST_THREAD_PREFIX void * #define XGBOOST_THREAD_PREFIX void *
#endif #endif // Linux
#endif #endif // XGBOOST_UTILS_THREAD_H_ NOLINT(*)

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_
#define XGBOOST_UTILS_THREAD_BUFFER_H_
/*! /*!
* Copyright 2014 by Contributors
* \file thread_buffer.h * \file thread_buffer.h
* \brief multi-thread buffer, iterator, can be used to create parallel pipeline * \brief multi-thread buffer, iterator, can be used to create parallel pipeline
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_
#define XGBOOST_UTILS_THREAD_BUFFER_H_
#include <vector> #include <vector>
#include <cstring> #include <cstring>
#include <cstdlib> #include <cstdlib>
@ -27,7 +29,7 @@ class ThreadBuffer {
this->buf_size = 30; this->buf_size = 30;
} }
~ThreadBuffer(void) { ~ThreadBuffer(void) {
if(init_end) this->Destroy(); if (init_end) this->Destroy();
} }
/*!\brief set parameter, will also pass the parameter to factory */ /*!\brief set parameter, will also pass the parameter to factory */
inline void SetParam(const char *name, const char *val) { inline void SetParam(const char *name, const char *val) {
@ -94,7 +96,7 @@ class ThreadBuffer {
* \param elem element to store into * \param elem element to store into
* \return whether reaches end of data * \return whether reaches end of data
*/ */
inline bool Next(Elem &elem) { inline bool Next(Elem &elem) { // NOLINT(*)
// end of buffer try to switch // end of buffer try to switch
if (buf_index == buf_size) { if (buf_index == buf_size) {
this->SwitchBuffer(); this->SwitchBuffer();
@ -114,11 +116,12 @@ class ThreadBuffer {
inline ElemFactory &get_factory(void) { inline ElemFactory &get_factory(void) {
return factory; return factory;
} }
inline const ElemFactory &get_factory(void) const{ inline const ElemFactory &get_factory(void) const {
return factory; return factory;
} }
// size of buffer // size of buffer
int buf_size; int buf_size;
private: private:
// factory object used to load configures // factory object used to load configures
ElemFactory factory; ElemFactory factory;
@ -147,7 +150,7 @@ class ThreadBuffer {
* this implementation is like producer-consumer style * this implementation is like producer-consumer style
*/ */
inline void RunLoader(void) { inline void RunLoader(void) {
while(!destroy_signal) { while (!destroy_signal) {
// sleep until loading is needed // sleep until loading is needed
loading_need.Wait(); loading_need.Wait();
std::vector<Elem> &buf = current_buf ? bufB : bufA; std::vector<Elem> &buf = current_buf ? bufB : bufA;
@ -155,7 +158,7 @@ class ThreadBuffer {
for (i = 0; i < buf_size ; ++i) { for (i = 0; i < buf_size ; ++i) {
if (!factory.LoadNext(buf[i])) { if (!factory.LoadNext(buf[i])) {
int &end = current_buf ? endB : endA; int &end = current_buf ? endB : endA;
end = i; // marks the termination end = i; // marks the termination
break; break;
} }
} }
@ -166,7 +169,7 @@ class ThreadBuffer {
} }
/*!\brief entry point of loader thread */ /*!\brief entry point of loader thread */
inline static XGBOOST_THREAD_PREFIX LoaderEntry(void *pthread) { inline static XGBOOST_THREAD_PREFIX LoaderEntry(void *pthread) {
static_cast< ThreadBuffer<Elem,ElemFactory>* >(pthread)->RunLoader(); static_cast< ThreadBuffer<Elem, ElemFactory>* >(pthread)->RunLoader();
return NULL; return NULL;
} }
/*!\brief start loader thread */ /*!\brief start loader thread */
@ -198,7 +201,6 @@ class ThreadBuffer {
loading_need.Post(); loading_need.Post();
} }
}; };
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_THREAD_BUFFER_H_

View File

@ -1,15 +1,18 @@
#ifndef XGBOOST_UTILS_UTILS_H_
#define XGBOOST_UTILS_UTILS_H_
/*! /*!
* Copyright 2014 by Contributors
* \file utils.h * \file utils.h
* \brief simple utils to support the code * \brief simple utils to support the code
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_UTILS_H_
#define XGBOOST_UTILS_UTILS_H_
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#include <cstdio> #include <cstdio>
#include <string> #include <string>
#include <cstdlib> #include <cstdlib>
#include <vector> #include <vector>
#include <stdexcept>
#ifndef XGBOOST_STRICT_CXX98_ #ifndef XGBOOST_STRICT_CXX98_
#include <cstdarg> #include <cstdarg>
@ -26,7 +29,7 @@
#else #else
#ifdef _FILE_OFFSET_BITS #ifdef _FILE_OFFSET_BITS
#if _FILE_OFFSET_BITS == 32 #if _FILE_OFFSET_BITS == 32
#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit") #pragma message("Warning: FILE OFFSET BITS defined to be 32 bit")
#endif #endif
#endif #endif
@ -71,8 +74,7 @@ inline void HandleAssertError(const char *msg) {
* \param msg error message * \param msg error message
*/ */
inline void HandleCheckError(const char *msg) { inline void HandleCheckError(const char *msg) {
fprintf(stderr, "%s\n", msg); throw std::runtime_error(msg);
exit(-1);
} }
inline void HandlePrint(const char *msg) { inline void HandlePrint(const char *msg) {
printf("%s", msg); printf("%s", msg);
@ -158,7 +160,7 @@ inline std::FILE *FopenCheck(const char *fname, const char *flag) {
// easy utils that can be directly acessed in xgboost // easy utils that can be directly acessed in xgboost
/*! \brief get the beginning address of a vector */ /*! \brief get the beginning address of a vector */
template<typename T> template<typename T>
inline T *BeginPtr(std::vector<T> &vec) { inline T *BeginPtr(std::vector<T> &vec) { // NOLINT(*)
if (vec.size() == 0) { if (vec.size() == 0) {
return NULL; return NULL;
} else { } else {
@ -174,7 +176,7 @@ inline const T *BeginPtr(const std::vector<T> &vec) {
return &vec[0]; return &vec[0];
} }
} }
inline char* BeginPtr(std::string &str) { inline char* BeginPtr(std::string &str) { // NOLINT(*)
if (str.length() == 0) return NULL; if (str.length() == 0) return NULL;
return &str[0]; return &str[0];
} }

View File

@ -1,14 +1,16 @@
// Copyright 2014 by Contributors
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX #define NOMINMAX
#include <ctime> #include <ctime>
#include <string> #include <string>
#include <cstring> #include <cstring>
#include <vector>
#include "./sync/sync.h" #include "./sync/sync.h"
#include "io/io.h" #include "./io/io.h"
#include "utils/utils.h" #include "./utils/utils.h"
#include "utils/config.h" #include "./utils/config.h"
#include "learner/learner-inl.hpp" #include "./learner/learner-inl.hpp"
namespace xgboost { namespace xgboost {
/*! /*!
@ -90,12 +92,14 @@ class BoostLearnTask {
if (!strcmp("save_pbuffer", name)) save_with_pbuffer = atoi(val); if (!strcmp("save_pbuffer", name)) save_with_pbuffer = atoi(val);
if (!strncmp("eval[", name, 5)) { if (!strncmp("eval[", name, 5)) {
char evname[256]; char evname[256];
utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display"); utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1,
"must specify evaluation name for display");
eval_data_names.push_back(std::string(evname)); eval_data_names.push_back(std::string(evname));
eval_data_paths.push_back(std::string(val)); eval_data_paths.push_back(std::string(val));
} }
learner.SetParam(name, val); learner.SetParam(name, val);
} }
public: public:
BoostLearnTask(void) { BoostLearnTask(void) {
// default parameters // default parameters
@ -119,12 +123,13 @@ class BoostLearnTask {
save_with_pbuffer = 0; save_with_pbuffer = 0;
data = NULL; data = NULL;
} }
~BoostLearnTask(void){ ~BoostLearnTask(void) {
for (size_t i = 0; i < deval.size(); i++){ for (size_t i = 0; i < deval.size(); i++) {
delete deval[i]; delete deval[i];
} }
if (data != NULL) delete data; if (data != NULL) delete data;
} }
private: private:
inline void InitData(void) { inline void InitData(void) {
if (strchr(train_path.c_str(), '%') != NULL) { if (strchr(train_path.c_str(), '%') != NULL) {
@ -153,7 +158,7 @@ class BoostLearnTask {
} }
std::vector<io::DataMatrix *> dcache(1, data); std::vector<io::DataMatrix *> dcache(1, data);
for (size_t i = 0; i < deval.size(); ++ i) { for (size_t i = 0; i < deval.size(); ++i) {
dcache.push_back(deval[i]); dcache.push_back(deval[i]);
} }
// set cache data to be all training and evaluation data // set cache data to be all training and evaluation data
@ -178,12 +183,12 @@ class BoostLearnTask {
int version = rabit::LoadCheckPoint(&learner); int version = rabit::LoadCheckPoint(&learner);
if (version == 0) this->InitLearner(); if (version == 0) this->InitLearner();
const time_t start = time(NULL); const time_t start = time(NULL);
unsigned long elapsed = 0; unsigned long elapsed = 0; // NOLINT(*)
learner.CheckInit(data); learner.CheckInit(data);
bool allow_lazy = learner.AllowLazyCheckPoint(); bool allow_lazy = learner.AllowLazyCheckPoint();
for (int i = version / 2; i < num_round; ++i) { for (int i = version / 2; i < num_round; ++i) {
elapsed = (unsigned long)(time(NULL) - start); elapsed = (unsigned long)(time(NULL) - start); // NOLINT(*)
if (version % 2 == 0) { if (version % 2 == 0) {
if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed); if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
learner.UpdateOneIter(i, *data); learner.UpdateOneIter(i, *data);
@ -196,7 +201,7 @@ class BoostLearnTask {
} }
utils::Assert(version == rabit::VersionNumber(), "consistent check"); utils::Assert(version == rabit::VersionNumber(), "consistent check");
std::string res = learner.EvalOneIter(i, devalall, eval_data_names); std::string res = learner.EvalOneIter(i, devalall, eval_data_names);
if (rabit::IsDistributed()){ if (rabit::IsDistributed()) {
if (rabit::GetRank() == 0) { if (rabit::GetRank() == 0) {
rabit::TrackerPrintf("%s\n", res.c_str()); rabit::TrackerPrintf("%s\n", res.c_str());
} }
@ -215,29 +220,29 @@ class BoostLearnTask {
} }
version += 1; version += 1;
utils::Assert(version == rabit::VersionNumber(), "consistent check"); utils::Assert(version == rabit::VersionNumber(), "consistent check");
elapsed = (unsigned long)(time(NULL) - start); elapsed = (unsigned long)(time(NULL) - start); // NOLINT(*)
} }
// always save final round // always save final round
if ((save_period == 0 || num_round % save_period != 0) && model_out != "NONE") { if ((save_period == 0 || num_round % save_period != 0) && model_out != "NONE") {
if (model_out == "NULL"){ if (model_out == "NULL") {
this->SaveModel(num_round - 1); this->SaveModel(num_round - 1);
} else { } else {
this->SaveModel(model_out.c_str()); this->SaveModel(model_out.c_str());
} }
} }
if (!silent){ if (!silent) {
printf("\nupdating end, %lu sec in all\n", elapsed); printf("\nupdating end, %lu sec in all\n", elapsed);
} }
} }
inline void TaskEval(void) { inline void TaskEval(void) {
learner.EvalOneIter(0, devalall, eval_data_names); learner.EvalOneIter(0, devalall, eval_data_names);
} }
inline void TaskDump(void){ inline void TaskDump(void) {
FILE *fo = utils::FopenCheck(name_dump.c_str(), "w"); FILE *fo = utils::FopenCheck(name_dump.c_str(), "w");
std::vector<std::string> dump = learner.DumpModel(fmap, dump_model_stats != 0); std::vector<std::string> dump = learner.DumpModel(fmap, dump_model_stats != 0);
for (size_t i = 0; i < dump.size(); ++ i) { for (size_t i = 0; i < dump.size(); ++i) {
fprintf(fo,"booster[%lu]:\n", i); fprintf(fo, "booster[%lu]:\n", i);
fprintf(fo,"%s", dump[i].c_str()); fprintf(fo, "%s", dump[i].c_str());
} }
fclose(fo); fclose(fo);
} }
@ -247,7 +252,8 @@ class BoostLearnTask {
} }
inline void SaveModel(int i) const { inline void SaveModel(int i) const {
char fname[256]; char fname[256];
sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1); utils::SPrintf(fname, sizeof(fname),
"%s/%04d.model", model_dir_path.c_str(), i + 1);
this->SaveModel(fname); this->SaveModel(fname);
} }
inline void TaskPred(void) { inline void TaskPred(void) {
@ -266,6 +272,7 @@ class BoostLearnTask {
} }
if (fo != stdout) fclose(fo); if (fo != stdout) fclose(fo);
} }
private: private:
/*! \brief whether silent */ /*! \brief whether silent */
int silent; int silent;
@ -309,6 +316,7 @@ class BoostLearnTask {
std::vector<std::string> eval_data_paths; std::vector<std::string> eval_data_paths;
/*! \brief the names of the evaluation data used in output log */ /*! \brief the names of the evaluation data used in output log */
std::vector<std::string> eval_data_names; std::vector<std::string> eval_data_names;
private: private:
io::DataMatrix* data; io::DataMatrix* data;
std::vector<io::DataMatrix*> deval; std::vector<io::DataMatrix*> deval;
@ -316,9 +324,9 @@ class BoostLearnTask {
utils::FeatMap fmap; utils::FeatMap fmap;
learner::BoostLearner learner; learner::BoostLearner learner;
}; };
} } // namespace xgboost
int main(int argc, char *argv[]){ int main(int argc, char *argv[]) {
xgboost::BoostLearnTask tsk; xgboost::BoostLearnTask tsk;
tsk.SetParam("seed", "0"); tsk.SetParam("seed", "0");
int ret = tsk.Run(argc, argv); int ret = tsk.Run(argc, argv);

1
tests/README.md Normal file
View File

@ -0,0 +1 @@
This folder contains tetstcases for xgboost.

View File

@ -0,0 +1,31 @@
import numpy as np
import xgboost as xgb
dpath = 'demo/data/'
def test_basic():
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
# specify validations set to watch performance
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
# this is prediction
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
# error must be smaller than 10%
assert err < 0.1
# save dmatrix into binary buffer
dtest.save_binary('dtest.buffer')
# save model
bst.save_model('xgb.model')
# load model and data in
bst2 = xgb.Booster(model_file='xgb.model')
dtest2 = xgb.DMatrix('dtest.buffer')
preds2 = bst2.predict(dtest2)
# assert they are the same
assert np.sum(np.abs(preds2-preds)) == 0

View File

@ -1,9 +1,12 @@
# pylint: disable=invalid-name
"""Setup xgboost package."""
import os import os
import platform import platform
from setuptools import setup from setuptools import setup
class XGBoostLibraryNotFound(Exception): class XGBoostLibraryNotFound(Exception):
"""Exception to raise when xgboost library cannot be found."""
pass pass

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,4 @@
// Copyright (c) 2014 by Contributors
// implementations in ctypes // implementations in ctypes
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE #define _CRT_SECURE_NO_DEPRECATE
@ -7,6 +8,7 @@
#include <cstring> #include <cstring>
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
#include <exception>
// include all std functions // include all std functions
using namespace std; using namespace std;
#include "./xgboost_wrapper.h" #include "./xgboost_wrapper.h"
@ -31,9 +33,11 @@ class Booster: public learner::BoostLearner {
this->init_model = false; this->init_model = false;
this->SetCacheData(mats); this->SetCacheData(mats);
} }
inline const float *Pred(const DataMatrix &dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) { inline const float *Pred(const DataMatrix &dmat, int option_mask,
unsigned ntree_limit, bst_ulong *len) {
this->CheckInitModel(); this->CheckInitModel();
this->Predict(dmat, (option_mask&1) != 0, &this->preds_, ntree_limit, (option_mask&2) != 0); this->Predict(dmat, (option_mask&1) != 0, &this->preds_,
ntree_limit, (option_mask&2) != 0);
*len = static_cast<bst_ulong>(this->preds_.size()); *len = static_cast<bst_ulong>(this->preds_.size());
return BeginPtr(this->preds_); return BeginPtr(this->preds_);
} }
@ -57,7 +61,7 @@ class Booster: public learner::BoostLearner {
this->init_model = true; this->init_model = true;
} }
inline void LoadModelFromBuffer(const void *buf, size_t size) { inline void LoadModelFromBuffer(const void *buf, size_t size) {
utils::MemoryFixSizeBuffer fs((void*)buf, size); utils::MemoryFixSizeBuffer fs((void*)buf, size); // NOLINT(*)
learner::BoostLearner::LoadModel(fs, true); learner::BoostLearner::LoadModel(fs, true);
this->init_model = true; this->init_model = true;
} }
@ -94,251 +98,459 @@ class Booster: public learner::BoostLearner {
private: private:
bool init_model; bool init_model;
}; };
// helper to support threadlocal
struct ThreadLocalStore {
std::vector<std::string*> data;
// allocate a string
inline std::string *Alloc() {
mutex.Lock();
data.push_back(new std::string());
std::string *ret = data.back();
mutex.Unlock();
return ret;
}
ThreadLocalStore() {
mutex.Init();
}
~ThreadLocalStore() {
for (size_t i = 0; i < data.size(); ++i) {
delete data[i];
}
mutex.Destroy();
}
utils::Mutex mutex;
};
static ThreadLocalStore thread_local_store;
} // namespace wrapper } // namespace wrapper
} // namespace xgboost } // namespace xgboost
using namespace xgboost::wrapper; using namespace xgboost::wrapper;
extern "C"{ /*! \brief macro to guard beginning and end section of all functions */
void* XGDMatrixCreateFromFile(const char *fname, int silent) { #define API_BEGIN() try {
return LoadDataMatrix(fname, silent != 0, false, false); /*!
} * \brief every function starts with API_BEGIN(); and finishes with API_END();
void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, * \param Finalize optionally put in a finalizer
const unsigned *indices, */
const float *data, #define API_END(Finalize) } catch(std::exception &e) { \
bst_ulong nindptr, Finalize; return XGBHandleException(e); \
bst_ulong nelem) { } return 0;
DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat;
mat.row_ptr_.resize(nindptr);
for (bst_ulong i = 0; i < nindptr; ++i) {
mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
}
mat.row_data_.resize(nelem);
for (bst_ulong i = 0; i < nelem; ++i) {
mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]);
mat.info.info.num_col = std::max(mat.info.info.num_col,
static_cast<size_t>(indices[i]+1));
}
mat.info.info.num_row = nindptr - 1;
return p_mat;
}
XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
const unsigned *indices,
const float *data,
bst_ulong nindptr,
bst_ulong nelem) {
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}
DMatrixSimple *p_mat = new DMatrixSimple(); // do not use threadlocal on OSX since it is not always available
DMatrixSimple &mat = *p_mat; #ifndef DISABLE_THREAD_LOCAL
utils::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_); #ifdef __GNUC__
builder.InitBudget(0, nthread); #define XGB_TREAD_LOCAL __thread
long ncol = static_cast<long>(nindptr - 1); #elif __STDC_VERSION__ >= 201112L
#pragma omp parallel for schedule(static) #define XGB_TREAD_LOCAL _Thread_local
for (long i = 0; i < ncol; ++i) { #elif defined(_MSC_VER)
int tid = omp_get_thread_num(); #define XGB_TREAD_LOCAL __declspec(thread)
for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) { #endif
builder.AddBudget(indices[j], tid); #endif
}
} #ifndef XGB_TREAD_LOCAL
builder.InitStorage(); #pragma message("Warning: Threadlocal not enabled, used single thread error handling")
#pragma omp parallel for schedule(static) #define XGB_TREAD_LOCAL
for (long i = 0; i < ncol; ++i) { #endif
int tid = omp_get_thread_num();
for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) { /*!
builder.Push(indices[j], * \brief a helper function for error handling
RowBatch::Entry(static_cast<bst_uint>(i), data[j]), * will set the last error to be str_set when it is not NULL
tid); * \param str_set the error to set
} * \return a pointer message to last error
} */
mat.info.info.num_row = mat.row_ptr_.size() - 1; const char *XGBSetGetLastError_(const char *str_set) {
mat.info.info.num_col = static_cast<size_t>(ncol); // use last_error to record last error
return p_mat; static XGB_TREAD_LOCAL std::string *last_error = NULL;
if (last_error == NULL) {
last_error = thread_local_store.Alloc();
} }
void* XGDMatrixCreateFromMat(const float *data, if (str_set != NULL) {
bst_ulong nrow, *last_error = str_set;
bst_ulong ncol, }
float missing) { return last_error->c_str();
bool nan_missing = utils::CheckNAN(missing); }
DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat; /*! \brief return str message of the last error */
mat.info.info.num_row = nrow; const char *XGBGetLastError() {
mat.info.info.num_col = ncol; return XGBSetGetLastError_(NULL);
for (bst_ulong i = 0; i < nrow; ++i, data += ncol) { }
bst_ulong nelem = 0;
for (bst_ulong j = 0; j < ncol; ++j) { /*!
if (utils::CheckNAN(data[j])) { * \brief handle exception throwed out
utils::Check(nan_missing, * \param e the exception
"There are NAN in the matrix, however, you did not set missing=NAN"); * \return the return value of API after exception is handled
} else { */
if (nan_missing || data[j] != missing) { int XGBHandleException(const std::exception &e) {
mat.row_data_.push_back(RowBatch::Entry(j, data[j])); XGBSetGetLastError_(e.what());
++nelem; return -1;
} }
int XGDMatrixCreateFromFile(const char *fname,
int silent,
DMatrixHandle *out) {
API_BEGIN();
*out = LoadDataMatrix(fname, silent != 0, false, false);
API_END();
}
int XGDMatrixCreateFromCSR(const bst_ulong *indptr,
const unsigned *indices,
const float *data,
bst_ulong nindptr,
bst_ulong nelem,
DMatrixHandle *out) {
DMatrixSimple *p_mat = NULL;
API_BEGIN();
p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat;
mat.row_ptr_.resize(nindptr);
for (bst_ulong i = 0; i < nindptr; ++i) {
mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
}
mat.row_data_.resize(nelem);
for (bst_ulong i = 0; i < nelem; ++i) {
mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]);
mat.info.info.num_col = std::max(mat.info.info.num_col,
static_cast<size_t>(indices[i]+1));
}
mat.info.info.num_row = nindptr - 1;
*out = p_mat;
API_END(delete p_mat);
}
int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
const unsigned *indices,
const float *data,
bst_ulong nindptr,
bst_ulong nelem,
DMatrixHandle *out) {
DMatrixSimple *p_mat = NULL;
API_BEGIN();
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}
p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat;
utils::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
builder.InitBudget(0, nthread);
long ncol = static_cast<long>(nindptr - 1); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (long i = 0; i < ncol; ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
builder.AddBudget(indices[j], tid);
}
}
builder.InitStorage();
#pragma omp parallel for schedule(static)
for (long i = 0; i < ncol; ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
builder.Push(indices[j],
RowBatch::Entry(static_cast<bst_uint>(i), data[j]),
tid);
}
}
mat.info.info.num_row = mat.row_ptr_.size() - 1;
mat.info.info.num_col = static_cast<size_t>(ncol);
*out = p_mat;
API_END(delete p_mat);
}
int XGDMatrixCreateFromMat(const float *data,
bst_ulong nrow,
bst_ulong ncol,
float missing,
DMatrixHandle *out) {
DMatrixSimple *p_mat = NULL;
API_BEGIN();
p_mat = new DMatrixSimple();
bool nan_missing = utils::CheckNAN(missing);
DMatrixSimple &mat = *p_mat;
mat.info.info.num_row = nrow;
mat.info.info.num_col = ncol;
for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
bst_ulong nelem = 0;
for (bst_ulong j = 0; j < ncol; ++j) {
if (utils::CheckNAN(data[j])) {
utils::Check(nan_missing,
"There are NAN in the matrix, however, you did not set missing=NAN");
} else {
if (nan_missing || data[j] != missing) {
mat.row_data_.push_back(RowBatch::Entry(j, data[j]));
++nelem;
} }
} }
mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
} }
return p_mat; mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
}
void* XGDMatrixSliceDMatrix(void *handle,
const int *idxset,
bst_ulong len) {
DMatrixSimple tmp;
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
if (dsrc.magic != DMatrixSimple::kMagic) {
tmp.CopyFrom(dsrc);
}
DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
*static_cast<DMatrixSimple*>(handle): tmp);
DMatrixSimple *p_ret = new DMatrixSimple();
DMatrixSimple &ret = *p_ret;
utils::Check(src.info.group_ptr.size() == 0,
"slice does not support group structure");
ret.Clear();
ret.info.info.num_row = len;
ret.info.info.num_col = src.info.num_col();
utils::IIterator<RowBatch> *iter = src.fmat()->RowIterator();
iter->BeforeFirst();
utils::Assert(iter->Next(), "slice");
const RowBatch &batch = iter->Value();
for (bst_ulong i = 0; i < len; ++i) {
const int ridx = idxset[i];
RowBatch::Inst inst = batch[ridx];
utils::Check(static_cast<bst_ulong>(ridx) < batch.size, "slice index exceed number of rows");
ret.row_data_.resize(ret.row_data_.size() + inst.length);
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
sizeof(RowBatch::Entry) * inst.length);
ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
if (src.info.labels.size() != 0) {
ret.info.labels.push_back(src.info.labels[ridx]);
}
if (src.info.weights.size() != 0) {
ret.info.weights.push_back(src.info.weights[ridx]);
}
if (src.info.info.root_index.size() != 0) {
ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
}
if (src.info.info.fold_index.size() != 0) {
ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]);
}
}
return p_ret;
}
void XGDMatrixFree(void *handle) {
delete static_cast<DataMatrix*>(handle);
}
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent != 0);
}
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) {
std::vector<float> &vec =
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
vec.resize(len);
memcpy(BeginPtr(vec), info, sizeof(float) * len);
}
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
std::vector<unsigned> &vec =
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
vec.resize(len);
memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
}
void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
pmat->info.group_ptr.resize(len + 1);
pmat->info.group_ptr[0] = 0;
for (uint64_t i = 0; i < len; ++i) {
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i];
}
}
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) {
const std::vector<float> &vec =
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
*len = static_cast<bst_ulong>(vec.size());
return BeginPtr(vec);
}
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
const std::vector<unsigned> &vec =
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
*len = static_cast<bst_ulong>(vec.size());
return BeginPtr(vec);
}
bst_ulong XGDMatrixNumRow(const void *handle) {
return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
}
// xgboost implementation
void *XGBoosterCreate(void *dmats[], bst_ulong len) {
std::vector<DataMatrix*> mats;
for (bst_ulong i = 0; i < len; ++i) {
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
mats.push_back(dtr);
}
return new Booster(mats);
}
void XGBoosterFree(void *handle) {
delete static_cast<Booster*>(handle);
}
void XGBoosterSetParam(void *handle, const char *name, const char *value) {
static_cast<Booster*>(handle)->SetParam(name, value);
}
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInitModel();
bst->CheckInit(dtr);
bst->UpdateOneIter(iter, *dtr);
}
void XGBoosterBoostOneIter(void *handle, void *dtrain,
float *grad, float *hess, bst_ulong len) {
Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInitModel();
bst->CheckInit(dtr);
bst->BoostOneIter(*dtr, grad, hess, len);
}
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
const char *evnames[], bst_ulong len) {
Booster *bst = static_cast<Booster*>(handle);
std::vector<std::string> names;
std::vector<const DataMatrix*> mats;
for (bst_ulong i = 0; i < len; ++i) {
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
names.push_back(std::string(evnames[i]));
}
bst->CheckInitModel();
bst->eval_str = bst->EvalOneIter(iter, mats, names);
return bst->eval_str.c_str();
}
const float *XGBoosterPredict(void *handle, void *dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) {
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), option_mask, ntree_limit, len);
}
void XGBoosterLoadModel(void *handle, const char *fname) {
static_cast<Booster*>(handle)->LoadModel(fname);
}
void XGBoosterSaveModel(void *handle, const char *fname) {
Booster *bst = static_cast<Booster*>(handle);
bst->CheckInitModel();
bst->SaveModel(fname, false);
}
void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len) {
static_cast<Booster*>(handle)->LoadModelFromBuffer(buf, len);
}
const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len) {
return static_cast<Booster*>(handle)->GetModelRaw(out_len);
}
const char** XGBoosterDumpModel(void *handle, const char *fmap, int with_stats, bst_ulong *len){
utils::FeatMap featmap;
if (strlen(fmap) != 0) {
featmap.LoadText(fmap);
}
return static_cast<Booster*>(handle)->GetModelDump(featmap, with_stats != 0, len);
} }
*out = p_mat;
API_END(delete p_mat);
}
int XGDMatrixSliceDMatrix(DMatrixHandle handle,
const int *idxset,
bst_ulong len,
DMatrixHandle *out) {
DMatrixSimple *p_ret = NULL;
API_BEGIN();
DMatrixSimple tmp;
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
if (dsrc.magic != DMatrixSimple::kMagic) {
tmp.CopyFrom(dsrc);
}
DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
*static_cast<DMatrixSimple*>(handle): tmp);
p_ret = new DMatrixSimple();
DMatrixSimple &ret = *p_ret;
utils::Check(src.info.group_ptr.size() == 0,
"slice does not support group structure");
ret.Clear();
ret.info.info.num_row = len;
ret.info.info.num_col = src.info.num_col();
utils::IIterator<RowBatch> *iter = src.fmat()->RowIterator();
iter->BeforeFirst();
utils::Assert(iter->Next(), "slice");
const RowBatch &batch = iter->Value();
for (bst_ulong i = 0; i < len; ++i) {
const int ridx = idxset[i];
RowBatch::Inst inst = batch[ridx];
utils::Check(static_cast<bst_ulong>(ridx) < batch.size, "slice index exceed number of rows");
ret.row_data_.resize(ret.row_data_.size() + inst.length);
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
sizeof(RowBatch::Entry) * inst.length);
ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
if (src.info.labels.size() != 0) {
ret.info.labels.push_back(src.info.labels[ridx]);
}
if (src.info.weights.size() != 0) {
ret.info.weights.push_back(src.info.weights[ridx]);
}
if (src.info.info.root_index.size() != 0) {
ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
}
if (src.info.info.fold_index.size() != 0) {
ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]);
}
}
*out = p_ret;
API_END(delete p_ret);
}
int XGDMatrixFree(DMatrixHandle handle) {
API_BEGIN();
delete static_cast<DataMatrix*>(handle);
API_END();
}
int XGDMatrixSaveBinary(DMatrixHandle handle,
const char *fname,
int silent) {
API_BEGIN();
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent != 0);
API_END();
}
int XGDMatrixSetFloatInfo(DMatrixHandle handle,
const char *field,
const float *info,
bst_ulong len) {
API_BEGIN();
std::vector<float> &vec =
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
vec.resize(len);
memcpy(BeginPtr(vec), info, sizeof(float) * len);
API_END();
}
int XGDMatrixSetUIntInfo(DMatrixHandle handle,
const char *field,
const unsigned *info,
bst_ulong len) {
API_BEGIN();
std::vector<unsigned> &vec =
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
vec.resize(len);
memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
API_END();
}
int XGDMatrixSetGroup(DMatrixHandle handle,
const unsigned *group,
bst_ulong len) {
API_BEGIN();
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
pmat->info.group_ptr.resize(len + 1);
pmat->info.group_ptr[0] = 0;
for (uint64_t i = 0; i < len; ++i) {
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i];
}
API_END();
}
int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
const char *field,
bst_ulong *out_len,
const float **out_dptr) {
API_BEGIN();
const std::vector<float> &vec =
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
*out_len = static_cast<bst_ulong>(vec.size());
*out_dptr = BeginPtr(vec);
API_END();
}
int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
const char *field,
bst_ulong *out_len,
const unsigned **out_dptr) {
API_BEGIN();
const std::vector<unsigned> &vec =
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
*out_len = static_cast<bst_ulong>(vec.size());
*out_dptr = BeginPtr(vec);
API_END();
}
int XGDMatrixNumRow(const DMatrixHandle handle,
bst_ulong *out) {
API_BEGIN();
*out = static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
API_END();
}
// xgboost implementation
int XGBoosterCreate(DMatrixHandle dmats[],
bst_ulong len,
BoosterHandle *out) {
API_BEGIN();
std::vector<DataMatrix*> mats;
for (bst_ulong i = 0; i < len; ++i) {
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
mats.push_back(dtr);
}
*out = new Booster(mats);
API_END();
}
int XGBoosterFree(BoosterHandle handle) {
API_BEGIN();
delete static_cast<Booster*>(handle);
API_END();
}
int XGBoosterSetParam(BoosterHandle handle,
const char *name, const char *value) {
API_BEGIN();
static_cast<Booster*>(handle)->SetParam(name, value);
API_END();
}
int XGBoosterUpdateOneIter(BoosterHandle handle,
int iter,
DMatrixHandle dtrain) {
API_BEGIN();
Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInitModel();
bst->CheckInit(dtr);
bst->UpdateOneIter(iter, *dtr);
API_END();
}
int XGBoosterBoostOneIter(BoosterHandle handle,
DMatrixHandle dtrain,
float *grad,
float *hess,
bst_ulong len) {
API_BEGIN();
Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInitModel();
bst->CheckInit(dtr);
bst->BoostOneIter(*dtr, grad, hess, len);
API_END();
}
int XGBoosterEvalOneIter(BoosterHandle handle,
int iter,
DMatrixHandle dmats[],
const char *evnames[],
bst_ulong len,
const char **out_str) {
API_BEGIN();
Booster *bst = static_cast<Booster*>(handle);
std::vector<std::string> names;
std::vector<const DataMatrix*> mats;
for (bst_ulong i = 0; i < len; ++i) {
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
names.push_back(std::string(evnames[i]));
}
bst->CheckInitModel();
bst->eval_str = bst->EvalOneIter(iter, mats, names);
*out_str = bst->eval_str.c_str();
API_END();
}
int XGBoosterPredict(BoosterHandle handle,
DMatrixHandle dmat,
int option_mask,
unsigned ntree_limit,
bst_ulong *len,
const float **out_result) {
API_BEGIN();
*out_result = static_cast<Booster*>(handle)->
Pred(*static_cast<DataMatrix*>(dmat),
option_mask, ntree_limit, len);
API_END();
}
int XGBoosterLoadModel(BoosterHandle handle, const char *fname) {
API_BEGIN();
static_cast<Booster*>(handle)->LoadModel(fname);
API_END();
}
int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
API_BEGIN();
Booster *bst = static_cast<Booster*>(handle);
bst->CheckInitModel();
bst->SaveModel(fname, false);
API_END();
}
int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
const void *buf,
bst_ulong len) {
API_BEGIN();
static_cast<Booster*>(handle)->LoadModelFromBuffer(buf, len);
API_END();
}
int XGBoosterGetModelRaw(BoosterHandle handle,
bst_ulong *out_len,
const char **out_dptr) {
API_BEGIN();
*out_dptr = static_cast<Booster*>(handle)->GetModelRaw(out_len);
API_END();
}
int XGBoosterDumpModel(BoosterHandle handle,
const char *fmap,
int with_stats,
bst_ulong *len,
const char ***out_models) {
API_BEGIN();
utils::FeatMap featmap;
if (strlen(fmap) != 0) {
featmap.LoadText(fmap);
}
*out_models = static_cast<Booster*>(handle)->GetModelDump(
featmap, with_stats != 0, len);
API_END();
} }

View File

@ -1,235 +1,327 @@
#ifndef XGBOOST_WRAPPER_H_
#define XGBOOST_WRAPPER_H_
/*! /*!
* Copyright (c) 2014 by Contributors
* \file xgboost_wrapper.h * \file xgboost_wrapper.h
* \author Tianqi Chen * \author Tianqi Chen
* \brief a C style wrapper of xgboost * \brief a C style wrapper of xgboost
* can be used to create wrapper of other languages * can be used to create wrapper of other languages
*/ */
#if defined(_MSC_VER) || defined(_WIN32) #ifndef XGBOOST_WRAPPER_H_
#define XGB_DLL __declspec(dllexport) #define XGBOOST_WRAPPER_H_
#else
#define XGB_DLL
#endif
// manually define unsign long
typedef unsigned long bst_ulong;
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { #define XGB_EXTERN_C extern "C"
#endif #endif
/*!
* \brief load a data matrix #if defined(_MSC_VER) || defined(_WIN32)
* \param fname the name of the file #define XGB_DLL XGB_EXTERN_C __declspec(dllexport)
* \param silent whether print messages during loading #else
* \return a loaded data matrix #define XGB_DLL XGB_EXTERN_C
*/
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
/*!
* \brief create a matrix content from csr format
* \param indptr pointer to row headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix
* \return created dmatrix
*/
XGB_DLL void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
const unsigned *indices,
const float *data,
bst_ulong nindptr,
bst_ulong nelem);
/*!
* \brief create a matrix content from CSC format
* \param col_ptr pointer to col headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix
* \return created dmatrix
*/
XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
const unsigned *indices,
const float *data,
bst_ulong nindptr,
bst_ulong nelem);
/*!
* \brief create matrix content from dense matrix
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param missing which value to represent missing value
* \return created dmatrix
*/
XGB_DLL void* XGDMatrixCreateFromMat(const float *data,
bst_ulong nrow,
bst_ulong ncol,
float missing);
/*!
* \brief create a new dmatrix from sliced content of existing matrix
* \param handle instance of data matrix to be sliced
* \param idxset index set
* \param len length of index set
* \return a sliced new matrix
*/
XGB_DLL void* XGDMatrixSliceDMatrix(void *handle,
const int *idxset,
bst_ulong len);
/*!
* \brief free space in data matrix
*/
XGB_DLL void XGDMatrixFree(void *handle);
/*!
* \brief load a data matrix into binary file
* \param handle a instance of data matrix
* \param fname file name
* \param silent print statistics when saving
*/
XGB_DLL void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
/*!
* \brief set float vector to a content in info
* \param handle a instance of data matrix
* \param field field name, can be label, weight
* \param array pointer to float vector
* \param len length of array
*/
XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, bst_ulong len);
/*!
* \brief set uint32 vector to a content in info
* \param handle a instance of data matrix
* \param field field name
* \param array pointer to float vector
* \param len length of array
*/
XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, bst_ulong len);
/*!
* \brief set label of the training matrix
* \param handle a instance of data matrix
* \param group pointer to group size
* \param len length of array
*/
XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len);
/*!
* \brief get float info vector from matrix
* \param handle a instance of data matrix
* \param field field name
* \param out_len used to set result length
* \return pointer to the result
*/
XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* out_len);
/*!
* \brief get uint32 info vector from matrix
* \param handle a instance of data matrix
* \param field field name
* \param out_len used to set result length
* \return pointer to the result
*/
XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* out_len);
/*!
* \brief return number of rows
*/
XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle);
// --- start XGBoost class
/*!
* \brief create xgboost learner
* \param dmats matrices that are set to be cached
* \param len length of dmats
*/
XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len);
/*!
* \brief free obj in handle
* \param handle handle to be freed
*/
XGB_DLL void XGBoosterFree(void* handle);
/*!
* \brief set parameters
* \param handle handle
* \param name parameter name
* \param val value of parameter
*/
XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value);
/*!
* \brief update the model in one round using dtrain
* \param handle handle
* \param iter current iteration rounds
* \param dtrain training data
*/
XGB_DLL void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
/*!
* \brief update the model, by directly specify gradient and second order gradient,
* this can be used to replace UpdateOneIter, to support customized loss function
* \param handle handle
* \param dtrain training data
* \param grad gradient statistics
* \param hess second order gradient statistics
* \param len length of grad/hess array
*/
XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain,
float *grad, float *hess, bst_ulong len);
/*!
* \brief get evaluation statistics for xgboost
* \param handle handle
* \param iter current iteration rounds
* \param dmats pointers to data to be evaluated
* \param evnames pointers to names of each data
* \param len length of dmats
* \return the string containing evaluation stati
*/
XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
const char *evnames[], bst_ulong len);
/*!
* \brief make prediction based on dmat
* \param handle handle
* \param dmat data matrix
* \param option_mask bit-mask of options taken in prediction, possible values
* 0:normal prediction
* 1:output margin instead of transformed value
* 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
* when the parameter is set to 0, we will use all the trees
* \param len used to store length of returning result
*/
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat,
int option_mask,
unsigned ntree_limit,
bst_ulong *len);
/*!
* \brief load model from existing file
* \param handle handle
* \param fname file name
*/
XGB_DLL void XGBoosterLoadModel(void *handle, const char *fname);
/*!
* \brief save model into existing file
* \param handle handle
* \param fname file name
*/
XGB_DLL void XGBoosterSaveModel(void *handle, const char *fname);
/*!
* \brief load model from in memory buffer
* \param handle handle
* \param buf pointer to the buffer
* \param len the length of the buffer
*/
XGB_DLL void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len);
/*!
* \brief save model into binary raw bytes, return header of the array
* user must copy the result out, before next xgboost call
* \param handle handle
* \param out_len the argument to hold the output length
* \return the pointer to the beginning of binary buffer
*/
XGB_DLL const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len);
/*!
* \brief dump model, return array of strings representing model dump
* \param handle handle
* \param fmap name to fmap can be empty string
* \param with_stats whether to dump with statistics
* \param out_len length of output array
* \return char *data[], representing dump of each model
*/
XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap,
int with_stats, bst_ulong *out_len);
#ifdef __cplusplus
}
#endif #endif
// manually define unsign long
typedef unsigned long bst_ulong; // NOLINT(*)
/*! \brief handle to DMatrix */
typedef void *DMatrixHandle;
/*! \brief handle to Booster */
typedef void *BoosterHandle;
/*!
* \brief get string message of the last error
*
* all function in this file will return 0 when success
* and -1 when an error occured,
* XGBGetLastError can be called to retrieve the error
*
* this function is threadsafe and can be called by different thread
* \return const char* error inforomation
*/
XGB_DLL const char *XGBGetLastError();
/*!
* \brief load a data matrix
* \param fname the name of the file
* \param silent whether print messages during loading
* \param out a loaded data matrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromFile(const char *fname,
int silent,
DMatrixHandle *out);
/*!
* \brief create a matrix content from csr format
* \param indptr pointer to row headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCSR(const bst_ulong *indptr,
const unsigned *indices,
const float *data,
bst_ulong nindptr,
bst_ulong nelem,
DMatrixHandle *out);
/*!
* \brief create a matrix content from CSC format
* \param col_ptr pointer to col headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
const unsigned *indices,
const float *data,
bst_ulong nindptr,
bst_ulong nelem,
DMatrixHandle *out);
/*!
* \brief create matrix content from dense matrix
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param missing which value to represent missing value
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromMat(const float *data,
bst_ulong nrow,
bst_ulong ncol,
float missing,
DMatrixHandle *out);
/*!
* \brief create a new dmatrix from sliced content of existing matrix
* \param handle instance of data matrix to be sliced
* \param idxset index set
* \param len length of index set
* \param out a sliced new matrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle,
const int *idxset,
bst_ulong len,
DMatrixHandle *out);
/*!
* \brief free space in data matrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixFree(void *handle);
/*!
* \brief load a data matrix into binary file
* \param handle a instance of data matrix
* \param fname file name
* \param silent print statistics when saving
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle,
const char *fname, int silent);
/*!
* \brief set float vector to a content in info
* \param handle a instance of data matrix
* \param field field name, can be label, weight
* \param array pointer to float vector
* \param len length of array
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
const char *field,
const float *array,
bst_ulong len);
/*!
* \brief set uint32 vector to a content in info
* \param handle a instance of data matrix
* \param field field name
* \param array pointer to float vector
* \param len length of array
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
const char *field,
const unsigned *array,
bst_ulong len);
/*!
* \brief set label of the training matrix
* \param handle a instance of data matrix
* \param group pointer to group size
* \param len length of array
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
const unsigned *group,
bst_ulong len);
/*!
* \brief get float info vector from matrix
* \param handle a instance of data matrix
* \param field field name
* \param out_len used to set result length
* \param out_dptr pointer to the result
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
const char *field,
bst_ulong* out_len,
const float **out_dptr);
/*!
* \brief get uint32 info vector from matrix
* \param handle a instance of data matrix
* \param field field name
* \param out_ptr pointer to the result
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
const char *field,
bst_ulong* out_len,
const unsigned **out_dptr);
/*!
* \brief get number of rows
* \param handle the handle to the DMatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle,
bst_ulong *out);
// --- start XGBoost class
/*!
* \brief create xgboost learner
* \param dmats matrices that are set to be cached
* \param len length of dmats
* \param out handle to the result booster
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterCreate(void* dmats[],
bst_ulong len,
BoosterHandle *out);
/*!
* \brief free obj in handle
* \param handle handle to be freed
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterFree(BoosterHandle handle);
/*!
* \brief set parameters
* \param handle handle
* \param name parameter name
* \param val value of parameter
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
const char *name,
const char *value);
/*!
* \brief update the model in one round using dtrain
* \param handle handle
* \param iter current iteration rounds
* \param dtrain training data
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
int iter,
DMatrixHandle dtrain);
/*!
* \brief update the model, by directly specify gradient and second order gradient,
* this can be used to replace UpdateOneIter, to support customized loss function
* \param handle handle
* \param dtrain training data
* \param grad gradient statistics
* \param hess second order gradient statistics
* \param len length of grad/hess array
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
DMatrixHandle dtrain,
float *grad,
float *hess,
bst_ulong len);
/*!
* \brief get evaluation statistics for xgboost
* \param handle handle
* \param iter current iteration rounds
* \param dmats pointers to data to be evaluated
* \param evnames pointers to names of each data
* \param len length of dmats
* \param out_result the string containing evaluation statistics
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
int iter,
DMatrixHandle dmats[],
const char *evnames[],
bst_ulong len,
const char **out_result);
/*!
* \brief make prediction based on dmat
* \param handle handle
* \param dmat data matrix
* \param option_mask bit-mask of options taken in prediction, possible values
* 0:normal prediction
* 1:output margin instead of transformed value
* 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
* when the parameter is set to 0, we will use all the trees
* \param out_len used to store length of returning result
* \param out_result used to set a pointer to array
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterPredict(BoosterHandle handle,
DMatrixHandle dmat,
int option_mask,
unsigned ntree_limit,
bst_ulong *out_len,
const float **out_result);
/*!
* \brief load model from existing file
* \param handle handle
* \param fname file name
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
const char *fname);
/*!
* \brief save model into existing file
* \param handle handle
* \param fname file name
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterSaveModel(BoosterHandle handle,
const char *fname);
/*!
* \brief load model from in memory buffer
* \param handle handle
* \param buf pointer to the buffer
* \param len the length of the buffer
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
const void *buf,
bst_ulong len);
/*!
* \brief save model into binary raw bytes, return header of the array
* user must copy the result out, before next xgboost call
* \param handle handle
* \param out_len the argument to hold the output length
* \param out_dptr the argument to hold the output data pointer
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle,
bst_ulong *out_len,
const char **out_dptr);
/*!
* \brief dump model, return array of strings representing model dump
* \param handle handle
* \param fmap name to fmap can be empty string
* \param with_stats whether to dump with statistics
* \param out_len length of output array
* \param out_dump_array pointer to hold representing dump of each model
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterDumpModel(BoosterHandle handle,
const char *fmap,
int with_stats,
bst_ulong *out_len,
const char ***out_dump_array);
#endif // XGBOOST_WRAPPER_H_ #endif // XGBOOST_WRAPPER_H_