Merge remote-tracking branch 'dmlc/master'

This commit is contained in:
El Potaeto 2015-07-15 16:00:21 +02:00
commit 86f9f707d8
114 changed files with 6723 additions and 1702 deletions

9
.gitignore vendored
View File

@ -58,3 +58,12 @@ R-package.Rproj
*.cache*
R-package/inst
R-package/src
#java
java/xgboost4j/target
java/xgboost4j/tmp
java/xgboost4j-demo/target
java/xgboost4j-demo/data/
java/xgboost4j-demo/tmp/
java/xgboost4j-demo/model/
nb-configuration*
dmlc-core

47
.travis.yml Normal file
View File

@ -0,0 +1,47 @@
sudo: true
# Use Build Matrix to do lint and build seperately
env:
matrix:
- TASK=lint LINT_LANG=cpp
- TASK=lint LINT_LANG=python
- TASK=R-package CXX=g++
- TASK=python-package CXX=g++
- TASK=java-package CXX=g++
- TASK=build CXX=g++
- TASK=build-with-dmlc CXX=g++
# dependent apt packages
addons:
apt:
packages:
- doxygen
- libopenmpi-dev
- wget
- libcurl4-openssl-dev
- unzip
- python-numpy
- python-scipy
- python-nose
before_install:
- git clone https://github.com/dmlc/dmlc-core
- export TRAVIS=dmlc-core/scripts/travis/
- export PYTHONPATH=${PYTHONPATH}:${PWD}/wrapper
- source ${TRAVIS}/travis_setup_env.sh
install:
- pip install cpplint pylint --user `whoami`
script: scripts/travis_script.sh
after_failure:
- scripts/travis_after_failure.sh
notifications:
email:
on_success: change
on_failure: always

View File

@ -1,8 +1,10 @@
export CC = gcc
export CXX = g++
export MPICXX = mpicxx
export LDFLAGS= -pthread -lm
export LDFLAGS= -pthread -lm
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -funroll-loops
# java include path
export JAVAINCFLAGS = -I${JAVA_HOME}/include -I${JAVA_HOME}/include/linux -I./java
ifeq ($(OS), Windows_NT)
export CXX = g++ -m64
@ -10,8 +12,8 @@ ifeq ($(OS), Windows_NT)
endif
ifeq ($(no_omp),1)
CFLAGS += -DDISABLE_OPENMP
else
CFLAGS += -DDISABLE_OPENMP
else
CFLAGS += -fopenmp
endif
@ -27,7 +29,7 @@ ifdef dmlc
config = $(dmlc)/config.mk
else
config = $(dmlc)/make/config.mk
endif
endif
endif
include $(config)
include $(dmlc)/make/dmlc.mk
@ -41,7 +43,7 @@ ifndef WITH_FPIC
WITH_FPIC = 1
endif
ifeq ($(WITH_FPIC), 1)
CFLAGS += -fPIC
CFLAGS += -fPIC
endif
@ -53,6 +55,9 @@ else
SLIB = wrapper/libxgboostwrapper.so
endif
# java lib
JLIB = java/libxgboostjavawrapper.so
# specify tensor path
BIN = xgboost
MOCKBIN = xgboost.mock
@ -64,7 +69,11 @@ else
TARGET = $(BIN)
endif
.PHONY: clean all mpi python Rpack
ifndef LINT_LANG
LINT_LANG= "all"
endif
.PHONY: clean all mpi python Rpack lint
all: $(TARGET)
mpi: $(MPIBIN)
@ -73,12 +82,15 @@ python: wrapper/libxgboostwrapper.so
# now the wrapper takes in two files. io and wrapper part
updater.o: src/tree/updater.cpp src/tree/*.hpp src/*.h src/tree/*.h src/utils/*.h
dmlc_simple.o: src/io/dmlc_simple.cpp src/utils/*.h
gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h
gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h
io.o: src/io/io.cpp src/io/*.hpp src/utils/*.h src/learner/dmatrix.h src/*.h
main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h
main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h
xgboost: updater.o gbm.o io.o main.o $(LIBRABIT) $(LIBDMLC)
wrapper/xgboost_wrapper.dll wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC)
java: java/libxgboostjavawrapper.so
java/libxgboostjavawrapper.so: java/xgboost4j_wrapper.cpp wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC)
# dependency on rabit
subtree/rabit/lib/librabit.a: subtree/rabit/src/engine.cc
+ cd subtree/rabit;make lib/librabit.a; cd ../..
@ -89,23 +101,26 @@ subtree/rabit/lib/librabit_mock.a: subtree/rabit/src/engine_mock.cc
subtree/rabit/lib/librabit_mpi.a: subtree/rabit/src/engine_mpi.cc
+ cd subtree/rabit;make lib/librabit_mpi.a; cd ../..
$(BIN) :
$(CXX) $(CFLAGS) -fPIC -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
$(BIN) :
$(CXX) $(CFLAGS) -fPIC -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
$(MOCKBIN) :
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
$(MOCKBIN) :
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
$(SLIB) :
$(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(DLLFLAGS)
$(OBJ) :
$(JLIB) :
$(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(JAVAINCFLAGS)
$(OBJ) :
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
$(MPIOBJ) :
$(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
$(MPIOBJ) :
$(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
$(MPIBIN) :
$(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
$(MPIBIN) :
$(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
install:
cp -f -r $(BIN) $(INSTALL_PATH)
@ -133,10 +148,23 @@ Rpack:
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
cp xgboost/src/Makevars xgboost/src/Makevars.win
# R CMD build --no-build-vignettes xgboost
# R CMD build xgboost
# rm -rf xgboost
# R CMD check --as-cran xgboost*.tar.gz
Rbuild:
make Rpack
R CMD build xgboost
rm -rf xgboost
Rcheck:
make Rbuild
R CMD check --as-cran xgboost*.tar.gz
# lint requires dmlc to be in current folder
lint:
dmlc-core/scripts/lint.py xgboost $(LINT_LANG) src wrapper R-package
clean:
$(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~
cd subtree/rabit; make clean; cd ..

View File

@ -220,7 +220,8 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
stop("nfold must be bigger than 1")
}
if(is.null(folds)) {
if (exists('objective', where=param) && strtrim(param[['objective']], 5) == 'rank:') {
if (exists('objective', where=param) && is.character(param$objective) &&
strtrim(param[['objective']], 5) == 'rank:') {
stop("\tAutomatic creation of CV-folds is not implemented for ranking!\n",
"\tConsider providing pre-computed CV-folds through the folds parameter.")
}
@ -234,7 +235,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
# For classification, need to convert y labels to factor before making the folds,
# and then do stratification by factor levels.
# For regression, leave y numeric and do stratification by quantiles.
if (exists('objective', where=param)) {
if (exists('objective', where=param) && is.character(param$objective)) {
# If 'objective' provided in params, assume that y is a classification label
# unless objective is reg:linear
if (param[['objective']] != 'reg:linear') y <- factor(y)

View File

@ -95,152 +95,160 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
prediction = FALSE, showsd = TRUE, metrics=list(),
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,
early.stop.round = NULL, maximize = NULL, ...) {
if (typeof(params) != "list") {
stop("xgb.cv: first argument params must be list")
}
if(!is.null(folds)) {
if(class(folds)!="list" | length(folds) < 2) {
stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold")
if (typeof(params) != "list") {
stop("xgb.cv: first argument params must be list")
}
nfold <- length(folds)
}
if (nfold <= 1) {
stop("nfold must be bigger than 1")
}
if (is.null(missing)) {
dtrain <- xgb.get.DMatrix(data, label)
} else {
dtrain <- xgb.get.DMatrix(data, label, missing)
}
params <- append(params, list(...))
params <- append(params, list(silent=1))
for (mc in metrics) {
params <- append(params, list("eval_metric"=mc))
}
# customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj))
stop("xgb.cv: cannot assign two different objectives")
if (!is.null(params$objective))
if (class(params$objective)=='function') {
obj = params$objective
params$objective = NULL
if(!is.null(folds)) {
if(class(folds)!="list" | length(folds) < 2) {
stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold")
}
nfold <- length(folds)
}
if (!is.null(params$eval_metric) && !is.null(feval))
stop("xgb.cv: cannot assign two different evaluation metrics")
if (!is.null(params$eval_metric))
if (class(params$eval_metric)=='function') {
feval = params$eval_metric
params$eval_metric = NULL
if (nfold <= 1) {
stop("nfold must be bigger than 1")
}
# Early Stopping
if (!is.null(early.stop.round)){
if (!is.null(feval) && is.null(maximize))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize) && is.null(params$eval_metric))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize))
{
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
maximize = FALSE
} else {
maximize = TRUE
}
}
if (maximize) {
bestScore = 0
if (is.null(missing)) {
dtrain <- xgb.get.DMatrix(data, label)
} else {
bestScore = Inf
dtrain <- xgb.get.DMatrix(data, label, missing)
}
dot.params = list(...)
nms.params = names(params)
nms.dot.params = names(dot.params)
if (length(intersect(nms.params,nms.dot.params))>0)
stop("Duplicated defined term in parameters. Please check your list of params.")
params <- append(params, dot.params)
params <- append(params, list(silent=1))
for (mc in metrics) {
params <- append(params, list("eval_metric"=mc))
}
bestInd = 0
earlyStopflag = FALSE
if (length(metrics)>1)
warning('Only the first metric is used for early stopping process.')
}
xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
obj_type = params[['objective']]
mat_pred = FALSE
if (!is.null(obj_type) && obj_type=='multi:softprob')
{
num_class = params[['num_class']]
if (is.null(num_class))
stop('must set num_class to use softmax')
predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
mat_pred = TRUE
}
else
predictValues <- rep(0,xgb.numrow(dtrain))
history <- c()
print.every.n = max(as.integer(print.every.n), 1L)
for (i in 1:nrounds) {
msg <- list()
for (k in 1:nfold) {
fd <- xgb_folds[[k]]
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
if (i<nrounds) {
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
} else {
if (!prediction) {
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
} else {
res <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval, prediction)
if (mat_pred) {
pred_mat = matrix(res[[2]],num_class,length(fd$index))
predictValues[fd$index,] <- t(pred_mat)
} else {
predictValues[fd$index] <- res[[2]]
}
msg[[k]] <- res[[1]] %>% str_split("\t") %>% .[[1]]
# customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj))
stop("xgb.cv: cannot assign two different objectives")
if (!is.null(params$objective))
if (class(params$objective)=='function') {
obj = params$objective
params[['objective']] = NULL
}
# if (!is.null(params$eval_metric) && !is.null(feval))
# stop("xgb.cv: cannot assign two different evaluation metrics")
if (!is.null(params$eval_metric))
if (class(params$eval_metric)=='function') {
feval = params$eval_metric
params[['eval_metric']] = NULL
}
}
}
ret <- xgb.cv.aggcv(msg, showsd)
history <- c(history, ret)
if(verbose)
if (0==(i-1L)%%print.every.n)
cat(ret, "\n", sep="")
# early_Stopping
# Early Stopping
if (!is.null(early.stop.round)){
score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
score = strsplit(score,'\\+|:')[[1]][[2]]
score = as.numeric(score)
if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
bestScore = score
bestInd = i
} else {
if (i-bestInd>=early.stop.round) {
earlyStopflag = TRUE
cat('Stopping. Best iteration:',bestInd)
break
if (!is.null(feval) && is.null(maximize))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize) && is.null(params$eval_metric))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize))
{
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
maximize = FALSE
} else {
maximize = TRUE
}
}
}
if (maximize) {
bestScore = 0
} else {
bestScore = Inf
}
bestInd = 0
earlyStopflag = FALSE
if (length(metrics)>1)
warning('Only the first metric is used for early stopping process.')
}
}
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
colnamesMean <- paste(colnames, "mean")
if(showsd) colnamesStd <- paste(colnames, "std")
colnames <- c()
if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
else colnames <- colnamesMean
type <- rep(x = "numeric", times = length(colnames))
dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
split <- str_split(string = history, pattern = "\t")
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
if (prediction) {
return(list(dt = dt,pred = predictValues))
}
return(dt)
xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
obj_type = params[['objective']]
mat_pred = FALSE
if (!is.null(obj_type) && obj_type=='multi:softprob')
{
num_class = params[['num_class']]
if (is.null(num_class))
stop('must set num_class to use softmax')
predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
mat_pred = TRUE
}
else
predictValues <- rep(0,xgb.numrow(dtrain))
history <- c()
print.every.n = max(as.integer(print.every.n), 1L)
for (i in 1:nrounds) {
msg <- list()
for (k in 1:nfold) {
fd <- xgb_folds[[k]]
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
}
ret <- xgb.cv.aggcv(msg, showsd)
history <- c(history, ret)
if(verbose)
if (0==(i-1L)%%print.every.n)
cat(ret, "\n", sep="")
# early_Stopping
if (!is.null(early.stop.round)){
score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
score = strsplit(score,'\\+|:')[[1]][[2]]
score = as.numeric(score)
if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
bestScore = score
bestInd = i
} else {
if (i-bestInd>=early.stop.round) {
earlyStopflag = TRUE
cat('Stopping. Best iteration:',bestInd)
break
}
}
}
}
if (prediction) {
for (k in 1:nfold) {
fd = xgb_folds[[k]]
if (!is.null(early.stop.round) && earlyStopflag) {
res = xgb.iter.eval(fd$booster, fd$watchlist, bestInd - 1, feval, prediction)
} else {
res = xgb.iter.eval(fd$booster, fd$watchlist, nrounds - 1, feval, prediction)
}
if (mat_pred) {
pred_mat = matrix(res[[2]],num_class,length(fd$index))
predictValues[fd$index,] = t(pred_mat)
} else {
predictValues[fd$index] = res[[2]]
}
}
}
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
colnamesMean <- paste(colnames, "mean")
if(showsd) colnamesStd <- paste(colnames, "std")
colnames <- c()
if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
else colnames <- colnamesMean
type <- rep(x = "numeric", times = length(colnames))
dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
split <- str_split(string = history, pattern = "\t")
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
if (prediction) {
return(list(dt = dt,pred = predictValues))
}
return(dt)
}
# Avoid error messages during CRAN check.

View File

@ -137,7 +137,13 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
if (length(watchlist) != 0 && verbose == 0) {
warning('watchlist is provided but verbose=0, no evaluation information will be printed')
}
params = append(params, list(...))
dot.params = list(...)
nms.params = names(params)
nms.dot.params = names(dot.params)
if (length(intersect(nms.params,nms.dot.params))>0)
stop("Duplicated term in parameters. Please check your list of params.")
params = append(params, dot.params)
# customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj))

View File

@ -1,6 +1,8 @@
# R package for xgboost.
R package for xgboost
=====================
## Installation
Installation
------------
For up-to-date version (which is recommended), please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
@ -8,8 +10,26 @@ For up-to-date version (which is recommended), please install from github. Windo
devtools::install_github('dmlc/xgboost',subdir='R-package')
```
## Examples
Examples
--------
* Please visit [walk through example](demo).
* See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.R) on this dataset and the one related to [Otto challenge](../demo/kaggle-otto), including a [RMarkdown documentation](../demo/kaggle-otto/understandingXGBoostModel.Rmd).
Notes
-----
If you face an issue installing the package using ```devtools::install_github```, something like this (even after updating libxml and RCurl as lot of forums say) -
```
devtools::install_github('dmlc/xgboost',subdir='R-package')
Downloading github repo dmlc/xgboost@master
Error in function (type, msg, asError = TRUE) :
Peer certificate cannot be authenticated with given CA certificates
```
To get around this you can build the package locally as mentioned [here](https://github.com/dmlc/xgboost/issues/347) -
```
1. Clone the current repository and set your workspace to xgboost/R-package/
2. Run R CMD INSTALL --build . in terminal to get the tarball.
3. Run install.packages('path_to_the_tarball',repo=NULL) in R to install.
```

View File

@ -1,8 +1,10 @@
require(xgboost)
require(Matrix)
require(data.table)
if (!require(vcd)) install.packages('vcd') #Available in Cran. Used for its dataset with categorical values.
if (!require(vcd)) {
install.packages('vcd') #Available in Cran. Used for its dataset with categorical values.
require(vcd)
}
# According to its documentation, Xgboost works only on numbers.
# Sometimes the dataset we have to work on have categorical data.
# A categorical variable is one which have a fixed number of values. By exemple, if for each observation a variable called "Colour" can have only "red", "blue" or "green" as value, it is a categorical variable.

View File

@ -1,9 +1,10 @@
// Copyright (c) 2014 by Contributors
#include <vector>
#include <string>
#include <utility>
#include <cstring>
#include <cstdio>
#include <sstream>
#include <sstream>
#include "wrapper/xgboost_wrapper.h"
#include "src/utils/utils.h"
#include "src/utils/omp.h"
@ -34,7 +35,7 @@ bool CheckNAN(double v) {
bool LogGamma(double v) {
return lgammafn(v);
}
} // namespace utils
} // namespace utils
namespace random {
void Seed(unsigned seed) {
@ -58,25 +59,30 @@ inline void _WrapperEnd(void) {
PutRNGstate();
}
// do nothing, check error
inline void CheckErr(int ret) {
}
extern "C" {
SEXP XGCheckNullPtr_R(SEXP handle) {
return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
}
void _DMatrixFinalizer(SEXP ext) {
void _DMatrixFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return;
XGDMatrixFree(R_ExternalPtrAddr(ext));
R_ClearExternalPtr(ext);
}
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
_WrapperBegin();
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1);
return ret;
}
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
SEXP missing) {
_WrapperBegin();
SEXP dim = getAttrib(mat, R_DimSymbol);
@ -90,12 +96,13 @@ extern "C" {
data[i * ncol +j] = din[i + nrow * j];
}
}
void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1);
return ret;
return ret;
}
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
SEXP indices,
@ -118,8 +125,10 @@ extern "C" {
indices_[i] = static_cast<unsigned>(p_indices[i]);
data_[i] = static_cast<float>(p_data[i]);
}
void *handle = XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
BeginPtr(data_), nindptr, ndata);
DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
BeginPtr(data_), nindptr, ndata,
&handle));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -133,17 +142,20 @@ extern "C" {
for (int i = 0; i < len; ++i) {
idxvec[i] = INTEGER(idxset)[i] - 1;
}
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
DMatrixHandle res;
CheckErr(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle),
BeginPtr(idxvec), len,
&res));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1);
return ret;
return ret;
}
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
_WrapperBegin();
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
CHAR(asChar(fname)), asInteger(silent));
CheckErr(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
CHAR(asChar(fname)), asInteger(silent)));
_WrapperEnd();
}
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
@ -152,28 +164,31 @@ extern "C" {
const char *name = CHAR(asChar(field));
if (!strcmp("group", name)) {
std::vector<unsigned> vec(len);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (int i = 0; i < len; ++i) {
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
}
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
CheckErr(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len));
} else {
std::vector<float> vec(len);
#pragma omp parallel for schedule(static)
for (int i = 0; i < len; ++i) {
vec[i] = REAL(array)[i];
}
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)),
BeginPtr(vec), len);
CheckErr(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)),
BeginPtr(vec), len));
}
_WrapperEnd();
}
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
_WrapperBegin();
bst_ulong olen;
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)), &olen);
const float *res;
CheckErr(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)),
&olen,
&res));
_WrapperEnd();
SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) {
@ -183,23 +198,25 @@ extern "C" {
return ret;
}
SEXP XGDMatrixNumRow_R(SEXP handle) {
bst_ulong nrow = XGDMatrixNumRow(R_ExternalPtrAddr(handle));
bst_ulong nrow;
CheckErr(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));
return ScalarInteger(static_cast<int>(nrow));
}
// functions related to booster
void _BoosterFinalizer(SEXP ext) {
void _BoosterFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return;
XGBoosterFree(R_ExternalPtrAddr(ext));
CheckErr(XGBoosterFree(R_ExternalPtrAddr(ext)));
R_ClearExternalPtr(ext);
}
SEXP XGBoosterCreate_R(SEXP dmats) {
_WrapperBegin();
int len = length(dmats);
std::vector<void*> dvec;
for (int i = 0; i < len; ++i){
for (int i = 0; i < len; ++i) {
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
}
void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
BoosterHandle handle;
CheckErr(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
@ -208,16 +225,16 @@ extern "C" {
}
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
_WrapperBegin();
XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)),
CHAR(asChar(val)));
CheckErr(XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)),
CHAR(asChar(val))));
_WrapperEnd();
}
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
_WrapperBegin();
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
R_ExternalPtrAddr(dtrain));
CheckErr(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
R_ExternalPtrAddr(dtrain)));
_WrapperEnd();
}
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
@ -230,9 +247,10 @@ extern "C" {
tgrad[j] = REAL(grad)[j];
thess[j] = REAL(hess)[j];
}
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dtrain),
BeginPtr(tgrad), BeginPtr(thess), len);
CheckErr(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dtrain),
BeginPtr(tgrad), BeginPtr(thess),
len));
_WrapperEnd();
}
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
@ -249,21 +267,24 @@ extern "C" {
for (int i = 0; i < len; ++i) {
vec_sptr.push_back(vec_names[i].c_str());
}
const char *ret =
XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len);
const char *ret;
CheckErr(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
BeginPtr(vec_dmats),
BeginPtr(vec_sptr),
len, &ret));
_WrapperEnd();
return mkString(ret);
}
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
_WrapperBegin();
bst_ulong olen;
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat),
asInteger(option_mask),
asInteger(ntree_limit),
&olen);
const float *res;
CheckErr(XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat),
asInteger(option_mask),
asInteger(ntree_limit),
&olen, &res));
_WrapperEnd();
SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) {
@ -274,15 +295,15 @@ extern "C" {
}
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
_WrapperBegin();
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
CheckErr(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
_WrapperEnd();
}
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
_WrapperBegin();
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
CheckErr(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
_WrapperEnd();
}
void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
_WrapperBegin();
XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle),
RAW(raw),
@ -292,28 +313,29 @@ extern "C" {
SEXP XGBoosterModelToRaw_R(SEXP handle) {
bst_ulong olen;
_WrapperBegin();
const char *raw = XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen);
const char *raw;
CheckErr(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw));
_WrapperEnd();
SEXP ret = PROTECT(allocVector(RAWSXP, olen));
if (olen != 0) {
memcpy(RAW(ret), raw, olen);
}
UNPROTECT(1);
UNPROTECT(1);
return ret;
}
SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) {
_WrapperBegin();
bst_ulong olen;
const char **res =
XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)),
asInteger(with_stats),
&olen);
const char **res;
CheckErr(XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)),
asInteger(with_stats),
&olen, &res));
_WrapperEnd();
SEXP out = PROTECT(allocVector(STRSXP, olen));
for (size_t i = 0; i < olen; ++i) {
SEXP out = PROTECT(allocVector(STRSXP, olen));
for (size_t i = 0; i < olen; ++i) {
stringstream stream;
stream << "booster["<<i<<"]\n" << res[i];
stream << "booster[" << i <<"]\n" << res[i];
SET_STRING_ELT(out, i, mkChar(stream.str().c_str()));
}
UNPROTECT(1);

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_WRAPPER_R_H_
#define XGBOOST_WRAPPER_R_H_
/*!
* Copyright 2014 (c) by Contributors
* \file xgboost_wrapper_R.h
* \author Tianqi Chen
* \brief R wrapper of xgboost
*/
#ifndef XGBOOST_WRAPPER_R_H_ // NOLINT(*)
#define XGBOOST_WRAPPER_R_H_ // NOLINT(*)
extern "C" {
#include <Rinternals.h>
#include <R_ext/Random.h>
@ -19,7 +21,7 @@ extern "C" {
*/
SEXP XGCheckNullPtr_R(SEXP handle);
/*!
* \brief load a data matrix
* \brief load a data matrix
* \param fname name of the content
* \param silent whether print messages
* \return a loaded data matrix
@ -32,9 +34,9 @@ extern "C" {
* \param missing which value to represent missing value
* \return created dmatrix
*/
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
SEXP missing);
/*!
/*!
* \brief create a matrix content from CSC format
* \param indptr pointer to column headers
* \param indices row indices
@ -70,26 +72,26 @@ extern "C" {
* \param handle a instance of data matrix
* \param field field name
* \return info vector
*/
*/
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
/*!
* \brief return number of rows
* \param handle a instance of data matrix
*/
SEXP XGDMatrixNumRow_R(SEXP handle);
/*!
* \brief create xgboost learner
/*!
* \brief create xgboost learner
* \param dmats a list of dmatrix handles that will be cached
*/
*/
SEXP XGBoosterCreate_R(SEXP dmats);
/*!
* \brief set parameters
/*!
* \brief set parameters
* \param handle handle
* \param name parameter name
* \param val value of parameter
*/
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
/*!
/*!
* \brief update the model in one round using dtrain
* \param handle handle
* \param iter current iteration rounds
@ -132,12 +134,12 @@ extern "C" {
* \brief save model into existing file
* \param handle handle
* \param fname file name
*/
*/
void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
/*!
* \brief load model from raw array
* \param handle handle
*/
*/
void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw);
/*!
* \brief save model into R's raw array
@ -153,4 +155,4 @@ extern "C" {
*/
SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats);
}
#endif // XGBOOST_WRAPPER_R_H_
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)

View File

@ -1,3 +1,4 @@
// Copyright (c) 2014 by Contributors
#include <stdio.h>
#include <stdarg.h>
#include <Rinternals.h>
@ -6,17 +7,17 @@
void XGBoostAssert_R(int exp, const char *fmt, ...) {
char buf[1024];
if (exp == 0) {
va_list args;
va_list args;
va_start(args, fmt);
vsprintf(buf, fmt, args);
va_end(args);
error("AssertError:%s\n", buf);
}
}
}
void XGBoostCheck_R(int exp, const char *fmt, ...) {
char buf[1024];
if (exp == 0) {
va_list args;
va_list args;
va_start(args, fmt);
vsprintf(buf, fmt, args);
va_end(args);
@ -25,7 +26,7 @@ void XGBoostCheck_R(int exp, const char *fmt, ...) {
}
int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) {
int ret;
va_list args;
va_list args;
va_start(args, fmt);
ret = vsnprintf(buf, size, fmt, args);
va_end(args);

View File

@ -337,6 +337,17 @@ err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
print(paste("test-error=", err))
```
View feature importance/influence from the learnt model
-------------------------------------------------------
Feature importance is similar to R gbm package's relative influence (rel.inf).
```
importance_matrix <- xgb.importance(model = bst)
print(importance_matrix)
xgb.plot.importance(importance_matrix)
```
View the trees from a model
---------------------------
@ -346,6 +357,12 @@ You can dump the tree you learned using `xgb.dump` into a text file.
xgb.dump(bst, with.stats = T)
```
You can plot the trees from your model using ```xgb.plot.tree``
```
xgb.plot.tree(model = bst)
```
> if you provide a path to `fname` parameter you can save the trees to your hard drive.
Save and load models

View File

@ -1,12 +1,14 @@
XGBoost: eXtreme Gradient Boosting
DMLC/XGBoost
==================================
[![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost)
An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version.
It implements machine learning algorithm under gradient boosting framework, including generalized linear model and gradient boosted regression tree (GBDT). XGBoost can also be distributed and scale to Terascale data
It implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework, including [Generalized Linear Model](https://en.wikipedia.org/wiki/Generalized_linear_model) (GLM) and [Gradient Boosted Decision Trees](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) (GBDT). XGBoost can also be [distributed](#features) and scale to Terascale data
Contributors: https://github.com/dmlc/xgboost/graphs/contributors
Documentations: [Documentation of xgboost](doc/README.md)
Documentations: [Documentation of dmlc/xgboost](doc/README.md)
Issues Tracker: [https://github.com/dmlc/xgboost/issues](https://github.com/dmlc/xgboost/issues?q=is%3Aissue+label%3Aquestion)
@ -24,11 +26,19 @@ XGBoost is part of [Distributed Machine Learning Common](http://dmlc.github.io/)
What's New
==========
* XGBoost helps Chenglong Chen to win [Kaggle CrowdFlower Competition](https://www.kaggle.com/c/crowdflower-search-relevance)
- Checkout the winning solution at [Highlight links](doc/README.md#highlight-links)
* XGBoost-0.4 release, see [CHANGES.md](CHANGES.md#xgboost-04)
* XGBoost wins [WWW2015 Microsoft Malware Classification Challenge (BIG 2015)](http://www.kaggle.com/c/malware-classification/forums/t/13490/say-no-to-overfitting-approaches-sharing)
* XGBoost helps three champion teams to win [WWW2015 Microsoft Malware Classification Challenge (BIG 2015)](http://www.kaggle.com/c/malware-classification/forums/t/13490/say-no-to-overfitting-approaches-sharing)
- Checkout the winning solution at [Highlight links](doc/README.md#highlight-links)
* [External Memory Version](doc/external_memory.md)
Contributing to XGBoost
=========
XGBoost has been developed and used by a group of active community. Everyone is more than welcomed to is a great way to make the project better and more accessible to more users.
* Checkout [Feature Wish List](https://github.com/dmlc/xgboost/labels/Wish-List) to see what can be improved, or open an issue if you want something.
* Contribute to the [documents and examples](https://github.com/dmlc/xgboost/blob/master/doc/) to share your experience with other users.
Features
========
* Easily accessible in python, R, Julia, CLI

View File

@ -147,7 +147,7 @@ Run the command again, we can find the log file becomes
```
The rule is eval[name-printed-in-log] = filename, then the file will be added to monitoring process, and evaluated each round.
xgboost also support monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes
xgboost also supports monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes
```
[0] test-error:0.016139 test-negllik:0.029795 trainname-error:0.014433 trainname-negllik:0.027023
[1] test-error:0.000000 test-negllik:0.000000 trainname-error:0.001228 trainname-negllik:0.002457
@ -162,11 +162,15 @@ If you want to continue boosting from existing model, say 0002.model, use
```
xgboost will load from 0002.model continue boosting for 2 rounds, and save output to continue.model. However, beware that the training and evaluation data specified in mushroom.conf should not change when you use this function.
#### Use Multi-Threading
When you are working with a large dataset, you may want to take advantage of parallelism. If your compiler supports OpenMP, xgboost is naturally multi-threaded, to set number of parallel running threads to 10, add ```nthread=10``` to your configuration.
When you are working with a large dataset, you may want to take advantage of parallelism. If your compiler supports OpenMP, xgboost is naturally multi-threaded, to set number of parallel running add ```nthread``` parameter to you configuration.
Eg. ```nthread=10```
Set nthread to be the number of your real cpu (On Unix, this can be found using ```lscpu```)
Some systems will have ```Thread(s) per core = 2```, for example, a 4 core cpu with 8 threads, in such case set ```nthread=4``` and not 8.
#### Additional Notes
* What are ```agaricus.txt.test.buffer``` and ```agaricus.txt.train.buffer``` generated during runexp.sh?
- By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. When next time you run xgboost, it detects i
Demonstrating how to use XGBoost accomplish binary classification tasks on UCI mushroom dataset http://archive.ics.uci.edu/ml/datasets/Mushroom
- By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. Next time when you run xgboost, it will detects these binary files.

View File

@ -45,7 +45,7 @@ dim(train)
train[1:6,1:5, with =F]
# Test dataset dimensions
dim(train)
dim(test)
# Test content
test[1:6,1:5, with =F]
@ -228,4 +228,4 @@ There are 4 documents you may also be interested in:
* [xgboostPresentation.Rmd](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd): general presentation
* [discoverYourData.Rmd](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/discoverYourData.Rmd): explaining feature analysus
* [Feature Importance Analysis with XGBoost in Tax audit](http://fr.slideshare.net/MichaelBENESTY/feature-importance-analysis-with-xgboost-in-tax-audit): use case
* [The Elements of Statistical Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/): very good book to have a good understanding of the model
* [The Elements of Statistical Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/): very good book to have a good understanding of the model

View File

@ -20,7 +20,8 @@ How to get started
Highlight Links
====
This section is about blogposts, presentation and videos discussing how to use xgboost to solve your interesting problem. If you think something belongs to here, send a pull request.
* [Kaggle Malware Prediction winning solution](https://github.com/xiaozhouwang/kaggle_Microsoft_Malware)
* [Kaggle CrowdFlower winner's solution by Chenglong Chen](https://github.com/ChenglongChen/Kaggle_CrowdFlower)
* [Kaggle Malware Prediction winner's solution](https://github.com/xiaozhouwang/kaggle_Microsoft_Malware)
* [Kaggle Tradeshift winning solution by daxiongshu](https://github.com/daxiongshu/kaggle-tradeshift-winning-solution)
* [Feature Importance Analysis with XGBoost in Tax audit](http://fr.slideshare.net/MichaelBENESTY/feature-importance-analysis-with-xgboost-in-tax-audit)
* Video tutorial: [Better Optimization with Repeated Cross Validation and the XGBoost model](https://www.youtube.com/watch?v=Og7CGAfSr_Y)
@ -29,3 +30,7 @@ This section is about blogposts, presentation and videos discussing how to use x
Contribution
====
Contribution of documents and use-cases are welcomed!
* This package use Google C++ style
* Check tool of codestyle
- clone https://github.com/dmlc/dmlc-core into root directory
- type ```make lint``` and fix possible errors.

View File

@ -17,13 +17,15 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost
1. Obtain gcc with openmp support by `brew install gcc --without-multilib` **or** clang with openmp by `brew install clang-omp`. The clang one is recommended because the first method requires us compiling gcc inside the machine (more than an hour in mine)! (BTW, `brew` is the de facto standard of `apt-get` on OS X. So installing [HPC](http://hpc.sourceforge.net/) separately is not recommended, but it should work.)
2. **if plaing to use clang-omp** in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to
2. **if you are planing to use clang-omp** - in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to
```C++
#include <libiomp/omp.h> /* instead of #include <omp.h> */`
```
to make it work, otherwise the following steps would show `src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...`
to make it work, otherwise you might get this error
`src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...`
@ -41,13 +43,13 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost
export CXX = clang-omp++
```
Remember to change `header` if using clang-omp.
Remember to change `header` (mentioned in step 2) if using clang-omp.
Then `cd xgboost` then `bash build.sh` to compile XGBoost. And go to `wrapper` sub-folder to install python version.
4. Set the `Makevars` file in highest piority for R.
The point is, there are three `Makevars` inside the machine: `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by runing `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!).
The point is, there are three `Makevars` : `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by running `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!).
So, **add** or **change** `~/.R/Makevars` to the following lines:

28
java/README.md Normal file
View File

@ -0,0 +1,28 @@
# xgboost4j
this is a java wrapper for xgboost
the structure of this wrapper is almost the same as the official python wrapper.
core of this wrapper is two classes:
* DMatrix: for handling data
* Booster: for train and predict
## usage:
please refer to [xgboost4j.md](doc/xgboost4j.md) for more information.
besides, simple examples could be found in [xgboost4j-demo](xgboost4j-demo/README.md)
## build native library
for windows: open the xgboost.sln in "../windows" folder, you will found the xgboostjavawrapper project, you should do the following steps to build wrapper library:
* Select x64/win32 and Release in build
* (if you have setted `JAVA_HOME` properly in windows environment variables, escape this step) right click on xgboostjavawrapper project -> choose "Properties" -> click on "C/C++" in the window -> change the "Additional Include Directories" to fit your jdk install path.
* rebuild all
* double click "create_wrap.bat" to set library to proper place
for linux:
* make sure you have installed jdk and `JAVA_HOME` has been setted properly
* run "create_wrap.sh"

20
java/create_wrap.bat Normal file
View File

@ -0,0 +1,20 @@
echo "move native library"
set libsource=..\windows\x64\Release\xgboostjavawrapper.dll
if not exist %libsource% (
goto end
)
set libfolder=xgboost4j\src\main\resources\lib
set libpath=%libfolder%\xgboostjavawrapper.dll
if not exist %libfolder% (mkdir %libfolder%)
if exist %libpath% (del %libpath%)
move %libsource% %libfolder%
echo complete
pause
exit
:end
echo "source library not found, please build it first from ..\windows\xgboost.sln"
pause
exit

15
java/create_wrap.sh Executable file
View File

@ -0,0 +1,15 @@
echo "build java wrapper"
cd ..
make java
cd java
echo "move native lib"
libPath="xgboost4j/src/main/resources/lib"
if [ ! -d "$libPath" ]; then
mkdir -p "$libPath"
fi
rm -f xgboost4j/src/main/resources/lib/libxgboostjavawrapper.so
mv libxgboostjavawrapper.so xgboost4j/src/main/resources/lib/
echo "complete"

156
java/doc/xgboost4j.md Normal file
View File

@ -0,0 +1,156 @@
xgboost4j : java wrapper for xgboost
====
This page will introduce xgboost4j, the java wrapper for xgboost, including:
* [Building](#build-xgboost4j)
* [Data Interface](#data-interface)
* [Setting Parameters](#setting-parameters)
* [Train Model](#training-model)
* [Prediction](#prediction)
=
#### Build xgboost4j
* Build native library
first make sure you have installed jdk and `JAVA_HOME` has been setted properly, then simply run `./create_wrap.sh`.
* Package xgboost4j
to package xgboost4j, you can run `mvn package` in xgboost4j folder or just use IDE(eclipse/netbeans) to open this maven project and build.
=
#### Data Interface
Like the xgboost python module, xgboost4j use ```DMatrix``` to handle data, libsvm txt format file, sparse matrix in CSR/CSC format, and dense matrix is supported.
* To import ```DMatrix``` :
```java
import org.dmlc.xgboost4j.DMatrix;
```
* To load libsvm text format file, the usage is like :
```java
DMatrix dmat = new DMatrix("train.svm.txt");
```
* To load sparse matrix in CSR/CSC format is a little complicated, the usage is like :
suppose a sparse matrix :
1 0 2 0
4 0 0 3
3 1 2 0
for CSR format
```java
long[] rowHeaders = new long[] {0,2,4,7};
float[] data = new float[] {1f,2f,4f,3f,3f,1f,2f};
int[] colIndex = new int[] {0,2,0,3,0,1,2};
DMatrix dmat = new DMatrix(rowHeaders, colIndex, data, DMatrix.SparseType.CSR);
```
for CSC format
```java
long[] colHeaders = new long[] {0,3,4,6,7};
float[] data = new float[] {1f,4f,3f,1f,2f,2f,3f};
int[] rowIndex = new int[] {0,1,2,2,0,2,1};
DMatrix dmat = new DMatrix(colHeaders, rowIndex, data, DMatrix.SparseType.CSC);
```
* To load 3*2 dense matrix, the usage is like :
suppose a matrix :
1 2
3 4
5 6
```java
float[] data = new float[] {1f,2f,3f,4f,5f,6f};
int nrow = 3;
int ncol = 2;
float missing = 0.0f;
DMatrix dmat = new Matrix(data, nrow, ncol, missing);
```
* To set weight :
```java
float[] weights = new float[] {1f,2f,1f};
dmat.setWeight(weights);
```
#### Setting Parameters
* in xgboost4j any ```Iterable<Entry<String, Object>>``` object could be used as parameters.
* to set parameters, for non-multiple value params, you can simply use entrySet of an Map:
```java
Map<String, Object> paramMap = new HashMap<>() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
put("eval_metric", "logloss");
}
};
Iterable<Entry<String, Object>> params = paramMap.entrySet();
```
* for the situation that multiple values with same param key, List<Entry<String, Object>> would be a good choice, e.g. :
```java
List<Entry<String, Object>> params = new ArrayList<Entry<String, Object>>() {
{
add(new SimpleEntry<String, Object>("eta", 1.0));
add(new SimpleEntry<String, Object>("max_depth", 2.0));
add(new SimpleEntry<String, Object>("silent", 1));
add(new SimpleEntry<String, Object>("objective", "binary:logistic"));
}
};
```
#### Training Model
With parameters and data, you are able to train a booster model.
* Import ```Trainer``` and ```Booster``` :
```java
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.util.Trainer;
```
* Training
```java
DMatrix trainMat = new DMatrix("train.svm.txt");
DMatrix validMat = new DMatrix("valid.svm.txt");
//specifiy a watchList to see the performance
//any Iterable<Entry<String, DMatrix>> object could be used as watchList
List<Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new SimpleEntry<>("train", trainMat));
watchs.add(new SimpleEntry<>("test", testMat));
int round = 2;
Booster booster = Trainer.train(params, trainMat, round, watchs, null, null);
```
* Saving model
After training, you can save model and dump it out.
```java
booster.saveModel("model.bin");
```
* Dump Model and Feature Map
```java
booster.dumpModel("modelInfo.txt", false)
//dump with featureMap
booster.dumpModel("modelInfo.txt", "featureMap.txt", false)
```
* Load a model
```java
Params param = new Params() {
{
put("silent", 1);
put("nthread", 6);
}
};
Booster booster = new Booster(param, "model.bin");
```
####Prediction
after training and loading a model, you use it to predict other data, the predict results will be a two-dimension float array (nsample, nclass) ,for predict leaf, it would be (nsample, nclass*ntrees)
```java
DMatrix dtest = new DMatrix("test.svm.txt");
//predict
float[][] predicts = booster.predict(dtest);
//predict leaf
float[][] leafPredicts = booster.predict(dtest, 0, true);
```

View File

@ -0,0 +1,15 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

View File

@ -0,0 +1,10 @@
xgboost4j examples
====
* [Basic walkthrough of wrappers](src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java)
* [Cutomize loss function, and evaluation metric](src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java)
* [Boosting from existing prediction](src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java)
* [Predicting using first n trees](src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java)
* [Generalized Linear Model](src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java)
* [Cross validation](src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java)
* [Predicting leaf indices](src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java)
* [External Memory](src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java)

View File

@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.dmlc</groupId>
<artifactId>xgboost4j-demo</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.dmlc</groupId>
<artifactId>xgboost4j</artifactId>
<version>1.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,164 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.AbstractMap;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.demo.util.DataLoader;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* a simple example of java wrapper for xgboost
* @author hzx
*/
public class BasicWalkThrough {
public static boolean checkPredicts(float[][] fPredicts, float[][] sPredicts) {
if(fPredicts.length != sPredicts.length) {
return false;
}
for(int i=0; i<fPredicts.length; i++) {
if(!Arrays.equals(fPredicts[i], sPredicts[i])) {
return false;
}
}
return true;
}
public static void main(String[] args) throws UnsupportedEncodingException, IOException, XGBoostError {
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
//note: any Iterable<Entry<String, Object>> object would be used as paramters
//e.g.
// Map<String, Object> paramMap = new HashMap<String, Object>() {
// {
// put("eta", 1.0);
// put("max_depth", 2);
// put("silent", 1);
// put("objective", "binary:logistic");
// }
// };
// Iterable<Entry<String, Object>> param = paramMap.entrySet();
//or
// List<Entry<String, Object>> param = new ArrayList<Entry<String, Object>>() {
// {
// add(new SimpleEntry<String, Object>("eta", 1.0));
// add(new SimpleEntry<String, Object>("max_depth", 2.0));
// add(new SimpleEntry<String, Object>("silent", 1));
// add(new SimpleEntry<String, Object>("objective", "binary:logistic"));
// }
// };
//we use a util class Params to handle parameters as example
Iterable<Entry<String, Object>> param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//specify watchList to set evaluation dmats
//note: any Iterable<Entry<String, DMatrix>> object would be used as watchList
//e.g.
//an entrySet of Map is good
// Map<String, DMatrix> watchMap = new HashMap<>();
// watchMap.put("train", trainMat);
// watchMap.put("test", testMat);
// Iterable<Entry<String, DMatrix>> watchs = watchMap.entrySet();
//we use a List of Entry<String, DMatrix> WatchList as example
List<Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new SimpleEntry<>("train", trainMat));
watchs.add(new SimpleEntry<>("test", testMat));
//set round
int round = 2;
//train a boost model
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
//predict
float[][] predicts = booster.predict(testMat);
//save model to modelPath
File file = new File("./model");
if(!file.exists()) {
file.mkdirs();
}
String modelPath = "./model/xgb.model";
booster.saveModel(modelPath);
//dump model
booster.dumpModel("./model/dump.raw.txt", false);
//dump model with feature map
booster.dumpModel("./model/dump.nice.txt", "../../demo/data/featmap.txt", false);
//save dmatrix into binary buffer
testMat.saveBinary("./model/dtest.buffer");
//reload model and data
Booster booster2 = new Booster(param, "./model/xgb.model");
DMatrix testMat2 = new DMatrix("./model/dtest.buffer");
float[][] predicts2 = booster2.predict(testMat2);
//check the two predicts
System.out.println(checkPredicts(predicts, predicts2));
System.out.println("start build dmatrix from csr sparse data ...");
//build dmatrix from CSR Sparse Matrix
DataLoader.CSRSparseData spData = DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train");
DMatrix trainMat2 = new DMatrix(spData.rowHeaders, spData.colIndex, spData.data, DMatrix.SparseType.CSR);
trainMat2.setLabel(spData.labels);
//specify watchList
List<Entry<String, DMatrix>> watchs2 = new ArrayList<>();
watchs2.add(new SimpleEntry<>("train", trainMat2));
watchs2.add(new SimpleEntry<>("test", testMat2));
Booster booster3 = Trainer.train(param, trainMat2, round, watchs2, null, null);
float[][] predicts3 = booster3.predict(testMat2);
//check predicts
System.out.println(checkPredicts(predicts, predicts3));
}
}

View File

@ -0,0 +1,67 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* example for start from a initial base prediction
* @author hzx
*/
public class BoostFromPrediction {
public static void main(String[] args) throws XGBoostError {
System.out.println("start running example to start from a initial prediction");
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//train xgboost for 1 round
Booster booster = Trainer.train(param, trainMat, 1, watchs, null, null);
float[][] trainPred = booster.predict(trainMat, true);
float[][] testPred = booster.predict(testMat, true);
trainMat.setBaseMargin(trainPred);
testMat.setBaseMargin(testPred);
System.out.println("result of running from initial prediction");
Booster booster2 = Trainer.train(param, trainMat, 1, watchs, null, null);
}
}

View File

@ -0,0 +1,54 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.io.IOException;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* an example of cross validation
* @author hzx
*/
public class CrossValidation {
public static void main(String[] args) throws IOException, XGBoostError {
//load train mat
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
//set params
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 3);
put("silent", 1);
put("nthread", 6);
put("objective", "binary:logistic");
put("gamma", 1.0);
put("eval_metric", "error");
}
};
//do 5-fold cross validation
int round = 2;
int nfold = 5;
//set additional eval_metrics
String[] metrics = null;
String[] evalHist = Trainer.crossValiation(param, trainMat, round, nfold, metrics, null, null);
}
}

View File

@ -0,0 +1,175 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.IEvaluation;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.IObjective;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* an example user define objective and eval
* NOTE: when you do customized loss function, the default prediction value is margin
* this may make buildin evalution metric not function properly
* for example, we are doing logistic loss, the prediction is score before logistic transformation
* he buildin evaluation error assumes input is after logistic transformation
* Take this in mind when you use the customization, and maybe you need write customized evaluation function
* @author hzx
*/
public class CustomObjective {
/**
* loglikelihoode loss obj function
*/
public static class LogRegObj implements IObjective {
private static final Log logger = LogFactory.getLog(LogRegObj.class);
/**
* simple sigmoid func
* @param input
* @return
* Note: this func is not concern about numerical stability, only used as example
*/
public float sigmoid(float input) {
float val = (float) (1/(1+Math.exp(-input)));
return val;
}
public float[][] transform(float[][] predicts) {
int nrow = predicts.length;
float[][] transPredicts = new float[nrow][1];
for(int i=0; i<nrow; i++) {
transPredicts[i][0] = sigmoid(predicts[i][0]);
}
return transPredicts;
}
@Override
public List<float[]> getGradient(float[][] predicts, DMatrix dtrain) {
int nrow = predicts.length;
List<float[]> gradients = new ArrayList<>();
float[] labels;
try {
labels = dtrain.getLabel();
} catch (XGBoostError ex) {
logger.error(ex);
return null;
}
float[] grad = new float[nrow];
float[] hess = new float[nrow];
float[][] transPredicts = transform(predicts);
for(int i=0; i<nrow; i++) {
float predict = transPredicts[i][0];
grad[i] = predict - labels[i];
hess[i] = predict * (1 - predict);
}
gradients.add(grad);
gradients.add(hess);
return gradients;
}
}
/**
* user defined eval function.
* NOTE: when you do customized loss function, the default prediction value is margin
* this may make buildin evalution metric not function properly
* for example, we are doing logistic loss, the prediction is score before logistic transformation
* the buildin evaluation error assumes input is after logistic transformation
* Take this in mind when you use the customization, and maybe you need write customized evaluation function
*/
public static class EvalError implements IEvaluation {
private static final Log logger = LogFactory.getLog(EvalError.class);
String evalMetric = "custom_error";
public EvalError() {
}
@Override
public String getMetric() {
return evalMetric;
}
@Override
public float eval(float[][] predicts, DMatrix dmat) {
float error = 0f;
float[] labels;
try {
labels = dmat.getLabel();
} catch (XGBoostError ex) {
logger.error(ex);
return -1f;
}
int nrow = predicts.length;
for(int i=0; i<nrow; i++) {
if(labels[i]==0f && predicts[i][0]>0) {
error++;
}
else if(labels[i]==1f && predicts[i][0]<=0) {
error++;
}
}
return error/labels.length;
}
}
public static void main(String[] args) throws XGBoostError {
//load train mat (svmlight format)
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
//load valid mat (svmlight format)
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//set params
//set params
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
}
};
//set round
int round = 2;
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//user define obj and eval
IObjective obj = new LogRegObj();
IEvaluation eval = new EvalError();
//train a booster
System.out.println("begin to train the booster model");
Booster booster = Trainer.train(param, trainMat, round, watchs, obj, eval);
}
}

View File

@ -0,0 +1,65 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* simple example for using external memory version
* @author hzx
*/
public class ExternalMemory {
public static void main(String[] args) throws XGBoostError {
//this is the only difference, add a # followed by a cache prefix name
//several cache file with the prefix will be generated
//currently only support convert from libsvm file
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train#dtrain.cache");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test#dtest.cache");
//specify parameters
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//performance notice: set nthread to be the number of your real cpu
//some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4
//param.put("nthread", num_real_cpu);
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//set round
int round = 2;
//train a boost model
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
}
}

View File

@ -0,0 +1,74 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.demo.util.CustomEval;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* this is an example of fit generalized linear model in xgboost
* basically, we are using linear model, instead of tree for our boosters
* @author hzx
*/
public class GeneralizedLinearModel {
public static void main(String[] args) throws XGBoostError {
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
//change booster to gblinear, so that we are fitting a linear model
// alpha is the L1 regularizer
//lambda is the L2 regularizer
//you can also set lambda_bias which is L2 regularizer on the bias term
Params param = new Params() {
{
put("alpha", 0.0001);
put("silent", 1);
put("objective", "binary:logistic");
put("booster", "gblinear");
}
};
//normally, you do not need to set eta (step_size)
//XGBoost uses a parallel coordinate descent algorithm (shotgun),
//there could be affection on convergence with parallelization on certain cases
//setting eta to be smaller value, e.g 0.5 can make the optimization more stable
//param.put("eta", "0.5");
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//train a booster
int round = 4;
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
float[][] predicts = booster.predict(testMat);
CustomEval eval = new CustomEval();
System.out.println("error=" + eval.eval(predicts, testMat));
}
}

View File

@ -0,0 +1,69 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.demo.util.CustomEval;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* predict first ntree
* @author hzx
*/
public class PredictFirstNtree {
public static void main(String[] args) throws XGBoostError {
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//train a booster
int round = 3;
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
//predict use 1 tree
float[][] predicts1 = booster.predict(testMat, false, 1);
//by default all trees are used to do predict
float[][] predicts2 = booster.predict(testMat);
//use a simple evaluation class to check error result
CustomEval eval = new CustomEval();
System.out.println("error of predicts1: " + eval.eval(predicts1, testMat));
System.out.println("error of predicts2: " + eval.eval(predicts2, testMat));
}
}

View File

@ -0,0 +1,70 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* predict leaf indices
* @author hzx
*/
public class PredictLeafIndices {
public static void main(String[] args) throws XGBoostError {
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//train a booster
int round = 3;
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
//predict using first 2 tree
float[][] leafindex = booster.predict(testMat, 2, true);
for(float[] leafs : leafindex) {
System.out.println(Arrays.toString(leafs));
}
//predict all trees
leafindex = booster.predict(testMat, 0, true);
for(float[] leafs : leafindex) {
System.out.println(Arrays.toString(leafs));
}
}
}

View File

@ -0,0 +1,60 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo.util;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.IEvaluation;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* a util evaluation class for examples
* @author hzx
*/
public class CustomEval implements IEvaluation {
private static final Log logger = LogFactory.getLog(CustomEval.class);
String evalMetric = "custom_error";
@Override
public String getMetric() {
return evalMetric;
}
@Override
public float eval(float[][] predicts, DMatrix dmat) {
float error = 0f;
float[] labels;
try {
labels = dmat.getLabel();
} catch (XGBoostError ex) {
logger.error(ex);
return -1f;
}
int nrow = predicts.length;
for(int i=0; i<nrow; i++) {
if(labels[i]==0f && predicts[i][0]>0.5) {
error++;
}
else if(labels[i]==1f && predicts[i][0]<=0.5) {
error++;
}
}
return error/labels.length;
}
}

View File

@ -0,0 +1,127 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.ArrayUtils;
/**
* util class for loading data
* @author hzx
*/
public class DataLoader {
public static class DenseData {
public float[] labels;
public float[] data;
public int nrow;
public int ncol;
}
public static class CSRSparseData {
public float[] labels;
public float[] data;
public long[] rowHeaders;
public int[] colIndex;
}
public static DenseData loadCSVFile(String filePath) throws FileNotFoundException, UnsupportedEncodingException, IOException {
DenseData denseData = new DenseData();
File f = new File(filePath);
FileInputStream in = new FileInputStream(f);
BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
denseData.nrow = 0;
denseData.ncol = -1;
String line;
List<Float> tlabels = new ArrayList<>();
List<Float> tdata = new ArrayList<>();
while((line=reader.readLine()) != null) {
String[] items = line.trim().split(",");
if(items.length==0) {
continue;
}
denseData.nrow++;
if(denseData.ncol == -1) {
denseData.ncol = items.length - 1;
}
tlabels.add(Float.valueOf(items[items.length-1]));
for(int i=0; i<items.length-1; i++) {
tdata.add(Float.valueOf(items[i]));
}
}
reader.close();
in.close();
denseData.labels = ArrayUtils.toPrimitive(tlabels.toArray(new Float[tlabels.size()]));
denseData.data = ArrayUtils.toPrimitive(tdata.toArray(new Float[tdata.size()]));
return denseData;
}
public static CSRSparseData loadSVMFile(String filePath) throws FileNotFoundException, UnsupportedEncodingException, IOException {
CSRSparseData spData = new CSRSparseData();
List<Float> tlabels = new ArrayList<>();
List<Float> tdata = new ArrayList<>();
List<Long> theaders = new ArrayList<>();
List<Integer> tindex = new ArrayList<>();
File f = new File(filePath);
FileInputStream in = new FileInputStream(f);
BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
String line;
long rowheader = 0;
theaders.add(rowheader);
while((line=reader.readLine()) != null) {
String[] items = line.trim().split(" ");
if(items.length==0) {
continue;
}
rowheader += items.length - 1;
theaders.add(rowheader);
tlabels.add(Float.valueOf(items[0]));
for(int i=1; i<items.length; i++) {
String[] tup = items[i].split(":");
assert tup.length == 2;
tdata.add(Float.valueOf(tup[1]));
tindex.add(Integer.valueOf(tup[0]));
}
}
spData.labels = ArrayUtils.toPrimitive(tlabels.toArray(new Float[tlabels.size()]));
spData.data = ArrayUtils.toPrimitive(tdata.toArray(new Float[tdata.size()]));
spData.colIndex = ArrayUtils.toPrimitive(tindex.toArray(new Integer[tindex.size()]));
spData.rowHeaders = ArrayUtils.toPrimitive(theaders.toArray(new Long[theaders.size()]));
return spData;
}
}

View File

@ -0,0 +1,54 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo.util;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.AbstractMap;
/**
* a util class for handle params
* @author hzx
*/
public class Params implements Iterable<Entry<String, Object>>{
List<Entry<String, Object>> params = new ArrayList<>();
/**
* put param key-value pair
* @param key
* @param value
*/
public void put(String key, Object value) {
params.add(new AbstractMap.SimpleEntry<>(key, value));
}
@Override
public String toString(){
String paramsInfo = "";
for(Entry<String, Object> param : params) {
paramsInfo += param.getKey() + ":" + param.getValue() + "\n";
}
return paramsInfo;
}
@Override
public Iterator<Entry<String, Object>> iterator() {
return params.iterator();
}
}

15
java/xgboost4j/LICENSE Normal file
View File

@ -0,0 +1,15 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

23
java/xgboost4j/README.md Normal file
View File

@ -0,0 +1,23 @@
# xgboost4j
this is a java wrapper for xgboost (https://github.com/dmlc/xgboost)
the structure of this wrapper is almost the same as the official python wrapper.
core of this wrapper is two classes:
* DMatrix for handling data
* Booster: for train and predict
## usage:
simple examples could be found in test package:
* Simple Train Example: org.dmlc.xgboost4j.TrainExample.java
* Simple Predict Example: org.dmlc.xgboost4j.PredictExample.java
* Cross Validation Example: org.dmlc.xgboost4j.example.CVExample.java
## native library:
only 64-bit linux/windows is supported now, if you want to build native wrapper library yourself, please refer to
https://github.com/yanqingmen/xgboost-java, and put your native library to the "./src/main/resources/lib" folder and replace the originals. (either "libxgboostjavawrapper.so" for linux or "xgboostjavawrapper.dll" for windows)

35
java/xgboost4j/pom.xml Normal file
View File

@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.dmlc</groupId>
<artifactId>xgboost4j</artifactId>
<version>1.1</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<reporting>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.3</version>
</plugin>
</plugins>
</reporting>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,484 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.util.Initializer;
import org.dmlc.xgboost4j.util.ErrorHandle;
import org.dmlc.xgboost4j.util.XGBoostError;
import org.dmlc.xgboost4j.wrapper.XgboostJNI;
/**
* Booster for xgboost, similar to the python wrapper xgboost.py
* but custom obj function and eval function not supported at present.
* @author hzx
*/
public final class Booster {
private static final Log logger = LogFactory.getLog(Booster.class);
long handle = 0;
//load native library
static {
try {
Initializer.InitXgboost();
} catch (IOException ex) {
logger.error("load native library failed.");
logger.error(ex);
}
}
/**
* init Booster from dMatrixs
* @param params parameters
* @param dMatrixs DMatrix array
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public Booster(Iterable<Entry<String, Object>> params, DMatrix[] dMatrixs) throws XGBoostError {
init(dMatrixs);
setParam("seed","0");
setParams(params);
}
/**
* load model from modelPath
* @param params parameters
* @param modelPath booster modelPath (model generated by booster.saveModel)
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public Booster(Iterable<Entry<String, Object>> params, String modelPath) throws XGBoostError {
init(null);
if(modelPath == null) {
throw new NullPointerException("modelPath : null");
}
loadModel(modelPath);
setParam("seed","0");
setParams(params);
}
private void init(DMatrix[] dMatrixs) throws XGBoostError {
long[] handles = null;
if(dMatrixs != null) {
handles = dMatrixs2handles(dMatrixs);
}
long[] out = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGBoosterCreate(handles, out));
handle = out[0];
}
/**
* set parameter
* @param key param name
* @param value param value
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public final void setParam(String key, String value) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGBoosterSetParam(handle, key, value));
}
/**
* set parameters
* @param params parameters key-value map
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setParams(Iterable<Entry<String, Object>> params) throws XGBoostError {
if(params!=null) {
for(Map.Entry<String, Object> entry : params) {
setParam(entry.getKey(), entry.getValue().toString());
}
}
}
/**
* Update (one iteration)
* @param dtrain training data
* @param iter current iteration number
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void update(DMatrix dtrain, int iter) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGBoosterUpdateOneIter(handle, iter, dtrain.getHandle()));
}
/**
* update with customize obj func
* @param dtrain training data
* @param iter current iteration number
* @param obj customized objective class
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void update(DMatrix dtrain, int iter, IObjective obj) throws XGBoostError {
float[][] predicts = predict(dtrain, true);
List<float[]> gradients = obj.getGradient(predicts, dtrain);
boost(dtrain, gradients.get(0), gradients.get(1));
}
/**
* update with give grad and hess
* @param dtrain training data
* @param grad first order of gradient
* @param hess seconde order of gradient
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void boost(DMatrix dtrain, float[] grad, float[] hess) throws XGBoostError {
if(grad.length != hess.length) {
throw new AssertionError(String.format("grad/hess length mismatch %s / %s", grad.length, hess.length));
}
ErrorHandle.checkCall(XgboostJNI.XGBoosterBoostOneIter(handle, dtrain.getHandle(), grad, hess));
}
/**
* evaluate with given dmatrixs.
* @param evalMatrixs dmatrixs for evaluation
* @param evalNames name for eval dmatrixs, used for check results
* @param iter current eval iteration
* @return eval information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throws XGBoostError {
long[] handles = dMatrixs2handles(evalMatrixs);
String[] evalInfo = new String[1];
ErrorHandle.checkCall(XgboostJNI.XGBoosterEvalOneIter(handle, iter, handles, evalNames, evalInfo));
return evalInfo[0];
}
/**
* evaluate with given customized Evaluation class
* @param evalMatrixs
* @param evalNames
* @param iter
* @param eval
* @return eval information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, IEvaluation eval) throws XGBoostError {
String evalInfo = "";
for(int i=0; i<evalNames.length; i++) {
String evalName = evalNames[i];
DMatrix evalMat = evalMatrixs[i];
float evalResult = eval.eval(predict(evalMat), evalMat);
String evalMetric = eval.getMetric();
evalInfo += String.format("\t%s-%s:%f", evalName,evalMetric, evalResult);
}
return evalInfo;
}
/**
* evaluate with given dmatrix handles;
* @param dHandles evaluation data handles
* @param evalNames name for eval dmatrixs, used for check results
* @param iter current eval iteration
* @return eval information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String evalSet(long[] dHandles, String[] evalNames, int iter) throws XGBoostError {
String[] evalInfo = new String[1];
ErrorHandle.checkCall(XgboostJNI.XGBoosterEvalOneIter(handle, iter, dHandles, evalNames, evalInfo));
return evalInfo[0];
}
/**
* evaluate with given dmatrix, similar to evalSet
* @param evalMat
* @param evalName
* @param iter
* @return eval information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String eval(DMatrix evalMat, String evalName, int iter) throws XGBoostError {
DMatrix[] evalMats = new DMatrix[] {evalMat};
String[] evalNames = new String[] {evalName};
return evalSet(evalMats, evalNames, iter);
}
/**
* base function for Predict
* @param data
* @param outPutMargin
* @param treeLimit
* @param predLeaf
* @return predict results
*/
private synchronized float[][] pred(DMatrix data, boolean outPutMargin, long treeLimit, boolean predLeaf) throws XGBoostError {
int optionMask = 0;
if(outPutMargin) {
optionMask = 1;
}
if(predLeaf) {
optionMask = 2;
}
float[][] rawPredicts = new float[1][];
ErrorHandle.checkCall(XgboostJNI.XGBoosterPredict(handle, data.getHandle(), optionMask, treeLimit, rawPredicts));
int row = (int) data.rowNum();
int col = (int) rawPredicts[0].length/row;
float[][] predicts = new float[row][col];
int r,c;
for(int i=0; i< rawPredicts[0].length; i++) {
r = i/col;
c = i%col;
predicts[r][c] = rawPredicts[0][i];
}
return predicts;
}
/**
* Predict with data
* @param data dmatrix storing the input
* @return predict result
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[][] predict(DMatrix data) throws XGBoostError {
return pred(data, false, 0, false);
}
/**
* Predict with data
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @return predict result
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[][] predict(DMatrix data, boolean outPutMargin) throws XGBoostError {
return pred(data, outPutMargin, 0, false);
}
/**
* Predict with data
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @return predict result
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[][] predict(DMatrix data, boolean outPutMargin, long treeLimit) throws XGBoostError {
return pred(data, outPutMargin, treeLimit, false);
}
/**
* Predict with data
* @param data dmatrix storing the input
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @param predLeaf When this option is on, the output will be a matrix of (nsample, ntrees), nsample = data.numRow
with each record indicating the predicted leaf index of each sample in each tree.
Note that the leaf index of a tree is unique per tree, so you may find leaf 1
in both tree 1 and tree 0.
* @return predict result
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[][] predict(DMatrix data , long treeLimit, boolean predLeaf) throws XGBoostError {
return pred(data, false, treeLimit, predLeaf);
}
/**
* save model to modelPath
* @param modelPath
*/
public void saveModel(String modelPath) {
XgboostJNI.XGBoosterSaveModel(handle, modelPath);
}
private void loadModel(String modelPath) {
XgboostJNI.XGBoosterLoadModel(handle, modelPath);
}
/**
* get the dump of the model as a string array
* @param withStats Controls whether the split statistics are output.
* @return dumped model information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String[] getDumpInfo(boolean withStats) throws XGBoostError {
int statsFlag = 0;
if(withStats) {
statsFlag = 1;
}
String[][] modelInfos = new String[1][];
ErrorHandle.checkCall(XgboostJNI.XGBoosterDumpModel(handle, "", statsFlag, modelInfos));
return modelInfos[0];
}
/**
* get the dump of the model as a string array
* @param featureMap featureMap file
* @param withStats Controls whether the split statistics are output.
* @return dumped model information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String[] getDumpInfo(String featureMap, boolean withStats) throws XGBoostError {
int statsFlag = 0;
if(withStats) {
statsFlag = 1;
}
String[][] modelInfos = new String[1][];
ErrorHandle.checkCall(XgboostJNI.XGBoosterDumpModel(handle, featureMap, statsFlag, modelInfos));
return modelInfos[0];
}
/**
* Dump model into a text file.
* @param modelPath file to save dumped model info
* @param withStats bool
Controls whether the split statistics are output.
* @throws FileNotFoundException
* @throws UnsupportedEncodingException
* @throws IOException
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void dumpModel(String modelPath, boolean withStats) throws FileNotFoundException, UnsupportedEncodingException, IOException, XGBoostError {
File tf = new File(modelPath);
FileOutputStream out = new FileOutputStream(tf);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
String[] modelInfos = getDumpInfo(withStats);
for(int i=0; i<modelInfos.length; i++) {
writer.write("booster [" + i +"]:\n");
writer.write(modelInfos[i]);
}
writer.close();
out.close();
}
/**
* Dump model into a text file.
* @param modelPath file to save dumped model info
* @param featureMap featureMap file
* @param withStats bool
Controls whether the split statistics are output.
* @throws FileNotFoundException
* @throws UnsupportedEncodingException
* @throws IOException
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void dumpModel(String modelPath, String featureMap, boolean withStats) throws FileNotFoundException, UnsupportedEncodingException, IOException, XGBoostError {
File tf = new File(modelPath);
FileOutputStream out = new FileOutputStream(tf);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
String[] modelInfos = getDumpInfo(featureMap, withStats);
for(int i=0; i<modelInfos.length; i++) {
writer.write("booster [" + i +"]:\n");
writer.write(modelInfos[i]);
}
writer.close();
out.close();
}
/**
* get importance of each feature
* @return featureMap key: feature index, value: feature importance score
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public Map<String, Integer> getFeatureScore() throws XGBoostError {
String[] modelInfos = getDumpInfo(false);
Map<String, Integer> featureScore = new HashMap<>();
for(String tree : modelInfos) {
for(String node : tree.split("\n")) {
String[] array = node.split("\\[");
if(array.length == 1) {
continue;
}
String fid = array[1].split("\\]")[0];
fid = fid.split("<")[0];
if(featureScore.containsKey(fid)) {
featureScore.put(fid, 1 + featureScore.get(fid));
}
else {
featureScore.put(fid, 1);
}
}
}
return featureScore;
}
/**
* get importance of each feature
* @param featureMap file to save dumped model info
* @return featureMap key: feature index, value: feature importance score
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public Map<String, Integer> getFeatureScore(String featureMap) throws XGBoostError {
String[] modelInfos = getDumpInfo(featureMap, false);
Map<String, Integer> featureScore = new HashMap<>();
for(String tree : modelInfos) {
for(String node : tree.split("\n")) {
String[] array = node.split("\\[");
if(array.length == 1) {
continue;
}
String fid = array[1].split("\\]")[0];
fid = fid.split("<")[0];
if(featureScore.containsKey(fid)) {
featureScore.put(fid, 1 + featureScore.get(fid));
}
else {
featureScore.put(fid, 1);
}
}
}
return featureScore;
}
/**
* transfer DMatrix array to handle array (used for native functions)
* @param dmatrixs
* @return handle array for input dmatrixs
*/
private static long[] dMatrixs2handles(DMatrix[] dmatrixs) {
long[] handles = new long[dmatrixs.length];
for(int i=0; i<dmatrixs.length; i++) {
handles[i] = dmatrixs[i].getHandle();
}
return handles;
}
@Override
protected void finalize() {
delete();
}
public synchronized void delete() {
if(handle != 0l) {
XgboostJNI.XGBoosterFree(handle);
handle=0;
}
}
}

View File

@ -0,0 +1,263 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.util.ErrorHandle;
import org.dmlc.xgboost4j.util.XGBoostError;
import org.dmlc.xgboost4j.util.Initializer;
import org.dmlc.xgboost4j.wrapper.XgboostJNI;
/**
* DMatrix for xgboost, similar to the python wrapper xgboost.py
* @author hzx
*/
public class DMatrix {
private static final Log logger = LogFactory.getLog(DMatrix.class);
long handle = 0;
//load native library
static {
try {
Initializer.InitXgboost();
} catch (IOException ex) {
logger.error("load native library failed.");
logger.error(ex);
}
}
/**
* sparse matrix type (CSR or CSC)
*/
public static enum SparseType {
CSR,
CSC;
}
/**
* init DMatrix from file (svmlight format)
* @param dataPath
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public DMatrix(String dataPath) throws XGBoostError {
if(dataPath == null) {
throw new NullPointerException("dataPath: null");
}
long[] out = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromFile(dataPath, 1, out));
handle = out[0];
}
/**
* create DMatrix from sparse matrix
* @param headers index to headers (rowHeaders for CSR or colHeaders for CSC)
* @param indices Indices (colIndexs for CSR or rowIndexs for CSC)
* @param data non zero values (sequence by row for CSR or by col for CSC)
* @param st sparse matrix type (CSR or CSC)
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public DMatrix(long[] headers, int[] indices, float[] data, SparseType st) throws XGBoostError {
long[] out = new long[1];
if(st == SparseType.CSR) {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromCSR(headers, indices, data, out));
}
else if(st == SparseType.CSC) {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromCSC(headers, indices, data, out));
}
else {
throw new UnknownError("unknow sparsetype");
}
handle = out[0];
}
/**
* create DMatrix from dense matrix
* @param data data values
* @param nrow number of rows
* @param ncol number of columns
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public DMatrix(float[] data, int nrow, int ncol) throws XGBoostError {
long[] out = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromMat(data, nrow, ncol, 0.0f, out));
handle = out[0];
}
/**
* used for DMatrix slice
* @param handle
*/
private DMatrix(long handle) {
this.handle = handle;
}
/**
* set label of dmatrix
* @param labels
*/
public void setLabel(float[] labels) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "label", labels));
}
/**
* set weight of each instance
* @param weights
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setWeight(float[] weights) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "weight", weights));
}
/**
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from
* @param baseMargin
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setBaseMargin(float[] baseMargin) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "base_margin", baseMargin));
}
/**
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from
* @param baseMargin
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setBaseMargin(float[][] baseMargin) throws XGBoostError {
float[] flattenMargin = flatten(baseMargin);
setBaseMargin(flattenMargin);
}
/**
* Set group sizes of DMatrix (used for ranking)
* @param group
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setGroup(int[] group) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetGroup(handle, group));
}
private float[] getFloatInfo(String field) throws XGBoostError {
float[][] infos = new float[1][];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixGetFloatInfo(handle, field, infos));
return infos[0];
}
private int[] getIntInfo(String field) throws XGBoostError {
int[][] infos = new int[1][];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixGetUIntInfo(handle, field, infos));
return infos[0];
}
/**
* get label values
* @return label
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[] getLabel() throws XGBoostError {
return getFloatInfo("label");
}
/**
* get weight of the DMatrix
* @return weights
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[] getWeight() throws XGBoostError {
return getFloatInfo("weight");
}
/**
* get base margin of the DMatrix
* @return base margin
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[] getBaseMargin() throws XGBoostError {
return getFloatInfo("base_margin");
}
/**
* Slice the DMatrix and return a new DMatrix that only contains `rowIndex`.
* @param rowIndex
* @return sliced new DMatrix
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public DMatrix slice(int[] rowIndex) throws XGBoostError {
long[] out = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSliceDMatrix(handle, rowIndex, out));
long sHandle = out[0];
DMatrix sMatrix = new DMatrix(sHandle);
return sMatrix;
}
/**
* get the row number of DMatrix
* @return number of rows
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public long rowNum() throws XGBoostError {
long[] rowNum = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixNumRow(handle,rowNum));
return rowNum[0];
}
/**
* save DMatrix to filePath
* @param filePath
*/
public void saveBinary(String filePath) {
XgboostJNI.XGDMatrixSaveBinary(handle, filePath, 1);
}
public long getHandle() {
return handle;
}
/**
* flatten a mat to array
* @param mat
* @return
*/
private static float[] flatten(float[][] mat) {
int size = 0;
for (float[] array : mat) size += array.length;
float[] result = new float[size];
int pos = 0;
for (float[] ar : mat) {
System.arraycopy(ar, 0, result, pos, ar.length);
pos += ar.length;
}
return result;
}
@Override
protected void finalize() {
delete();
}
public synchronized void delete() {
if(handle != 0) {
XgboostJNI.XGDMatrixFree(handle);
handle = 0;
}
}
}

View File

@ -0,0 +1,36 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
/**
* interface for customized evaluation
* @author hzx
*/
public interface IEvaluation {
/**
* get evaluate metric
* @return evalMetric
*/
public abstract String getMetric();
/**
* evaluate with predicts and data
* @param predicts
* @param dmat
* @return
*/
public abstract float eval(float[][] predicts, DMatrix dmat);
}

View File

@ -0,0 +1,32 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.util.List;
/**
* interface for customize Object function
* @author hzx
*/
public interface IObjective {
/**
* user define objective function, return gradient and second order gradient
* @param predicts untransformed margin predicts
* @param dtrain training data
* @return List with two float array, correspond to first order grad and second order grad
*/
public abstract List<float[]> getGradient(float[][] predicts, DMatrix dtrain);
}

View File

@ -0,0 +1,89 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.util.Map;
import org.dmlc.xgboost4j.IEvaluation;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.IObjective;
/**
* cross validation package for xgb
* @author hzx
*/
public class CVPack {
DMatrix dtrain;
DMatrix dtest;
DMatrix[] dmats;
String[] names;
Booster booster;
/**
* create an cross validation package
* @param dtrain train data
* @param dtest test data
* @param params parameters
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public CVPack(DMatrix dtrain, DMatrix dtest, Iterable<Map.Entry<String, Object>> params) throws XGBoostError {
dmats = new DMatrix[] {dtrain, dtest};
booster = new Booster(params, dmats);
names = new String[] {"train", "test"};
this.dtrain = dtrain;
this.dtest = dtest;
}
/**
* update one iteration
* @param iter iteration num
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void update(int iter) throws XGBoostError {
booster.update(dtrain, iter);
}
/**
* update one iteration
* @param iter iteration num
* @param obj customized objective
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void update(int iter, IObjective obj) throws XGBoostError {
booster.update(dtrain, iter, obj);
}
/**
* evaluation
* @param iter iteration num
* @return
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String eval(int iter) throws XGBoostError {
return booster.evalSet(dmats, names, iter);
}
/**
* evaluation
* @param iter iteration num
* @param eval customized eval
* @return
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String eval(int iter, IEvaluation eval) throws XGBoostError {
return booster.evalSet(dmats, names, iter, eval);
}
}

View File

@ -0,0 +1,50 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.wrapper.XgboostJNI;
/**
* error handle for Xgboost
* @author hzx
*/
public class ErrorHandle {
private static final Log logger = LogFactory.getLog(ErrorHandle.class);
//load native library
static {
try {
Initializer.InitXgboost();
} catch (IOException ex) {
logger.error("load native library failed.");
logger.error(ex);
}
}
/**
* check the return value of C API
* @param ret return valud of xgboostJNI C API call
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public static void checkCall(int ret) throws XGBoostError {
if(ret != 0) {
throw new XGBoostError(XgboostJNI.XGBGetLastError());
}
}
}

View File

@ -0,0 +1,92 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.io.IOException;
import java.lang.reflect.Field;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* class to load native library
* @author hzx
*/
public class Initializer {
private static final Log logger = LogFactory.getLog(Initializer.class);
static boolean initialized = false;
public static final String nativePath = "./lib";
public static final String nativeResourcePath = "/lib/";
public static final String[] libNames = new String[] {"xgboostjavawrapper"};
public static synchronized void InitXgboost() throws IOException {
if(initialized == false) {
for(String libName: libNames) {
smartLoad(libName);
}
initialized = true;
}
}
/**
* load native library, this method will first try to load library from java.library.path, then try to load library in jar package.
* @param libName
* @throws IOException
*/
private static void smartLoad(String libName) throws IOException {
addNativeDir(nativePath);
try {
System.loadLibrary(libName);
}
catch (UnsatisfiedLinkError e) {
try {
NativeUtils.loadLibraryFromJar(nativeResourcePath + System.mapLibraryName(libName));
}
catch (IOException e1) {
throw e1;
}
}
}
/**
* add libPath to java.library.path, then native library in libPath would be load properly
* @param libPath
* @throws IOException
*/
public static void addNativeDir(String libPath) throws IOException {
try {
Field field = ClassLoader.class.getDeclaredField("usr_paths");
field.setAccessible(true);
String[] paths = (String[]) field.get(null);
for (String path : paths) {
if (libPath.equals(path)) {
return;
}
}
String[] tmp = new String[paths.length+1];
System.arraycopy(paths,0,tmp,0,paths.length);
tmp[paths.length] = libPath;
field.set(null, tmp);
} catch (IllegalAccessException e) {
logger.error(e.getMessage());
throw new IOException("Failed to get permissions to set library path");
} catch (NoSuchFieldException e) {
logger.error(e.getMessage());
throw new IOException("Failed to get field handle to set library path");
}
}
}

View File

@ -0,0 +1,109 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* Simple library class for working with JNI (Java Native Interface)
*
* @see http://adamheinrich.com/2012/how-to-load-native-jni-library-from-jar
*
* @author Adam Heirnich &lt;adam@adamh.cz&gt;, http://www.adamh.cz
*/
public class NativeUtils {
/**
* Private constructor - this class will never be instanced
*/
private NativeUtils() {
}
/**
* Loads library from current JAR archive
*
* The file from JAR is copied into system temporary directory and then loaded. The temporary file is deleted after exiting.
* Method uses String as filename because the pathname is "abstract", not system-dependent.
*
* @param path The filename inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext
* @throws IOException If temporary file creation or read/write operation fails
* @throws IllegalArgumentException If source file (param path) does not exist
* @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters (restriction of {@see File#createTempFile(java.lang.String, java.lang.String)}).
*/
public static void loadLibraryFromJar(String path) throws IOException {
if (!path.startsWith("/")) {
throw new IllegalArgumentException("The path has to be absolute (start with '/').");
}
// Obtain filename from path
String[] parts = path.split("/");
String filename = (parts.length > 1) ? parts[parts.length - 1] : null;
// Split filename to prexif and suffix (extension)
String prefix = "";
String suffix = null;
if (filename != null) {
parts = filename.split("\\.", 2);
prefix = parts[0];
suffix = (parts.length > 1) ? "."+parts[parts.length - 1] : null; // Thanks, davs! :-)
}
// Check if the filename is okay
if (filename == null || prefix.length() < 3) {
throw new IllegalArgumentException("The filename has to be at least 3 characters long.");
}
// Prepare temporary file
File temp = File.createTempFile(prefix, suffix);
temp.deleteOnExit();
if (!temp.exists()) {
throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist.");
}
// Prepare buffer for data copying
byte[] buffer = new byte[1024];
int readBytes;
// Open and check input stream
InputStream is = NativeUtils.class.getResourceAsStream(path);
if (is == null) {
throw new FileNotFoundException("File " + path + " was not found inside JAR.");
}
// Open output stream and copy data between source file in JAR and the temporary file
OutputStream os = new FileOutputStream(temp);
try {
while ((readBytes = is.read(buffer)) != -1) {
os.write(buffer, 0, readBytes);
}
} finally {
// If read/write fails, close streams safely before throwing an exception
os.close();
is.close();
}
// Finally, load the library
System.load(temp.getAbsolutePath());
}
}

View File

@ -0,0 +1,235 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.IEvaluation;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.IObjective;
/**
* trainer for xgboost
* @author hzx
*/
public class Trainer {
private static final Log logger = LogFactory.getLog(Trainer.class);
/**
* Train a booster with given parameters.
* @param params Booster params.
* @param dtrain Data to be trained.
* @param round Number of boosting iterations.
* @param watchs a group of items to be evaluated during training, this allows user to watch performance on the validation set.
* @param obj customized objective (set to null if not used)
* @param eval customized evaluation (set to null if not used)
* @return trained booster
*/
public static Booster train(Iterable<Entry<String, Object>> params, DMatrix dtrain, int round,
Iterable<Entry<String, DMatrix>> watchs, IObjective obj, IEvaluation eval) throws XGBoostError {
//collect eval matrixs
String[] evalNames;
DMatrix[] evalMats;
List<String> names = new ArrayList<>();
List<DMatrix> mats = new ArrayList<>();
for(Entry<String, DMatrix> evalEntry : watchs) {
names.add(evalEntry.getKey());
mats.add(evalEntry.getValue());
}
evalNames = names.toArray(new String[names.size()]);
evalMats = mats.toArray(new DMatrix[mats.size()]);
//collect all data matrixs
DMatrix[] allMats;
if(evalMats!=null && evalMats.length>0) {
allMats = new DMatrix[evalMats.length+1];
allMats[0] = dtrain;
System.arraycopy(evalMats, 0, allMats, 1, evalMats.length);
}
else {
allMats = new DMatrix[1];
allMats[0] = dtrain;
}
//initialize booster
Booster booster = new Booster(params, allMats);
//begin to train
for(int iter=0; iter<round; iter++) {
if(obj != null) {
booster.update(dtrain, iter, obj);
} else {
booster.update(dtrain, iter);
}
//evaluation
if(evalMats!=null && evalMats.length>0) {
String evalInfo;
if(eval != null) {
evalInfo = booster.evalSet(evalMats, evalNames, iter, eval);
}
else {
evalInfo = booster.evalSet(evalMats, evalNames, iter);
}
logger.info(evalInfo);
}
}
return booster;
}
/**
* Cross-validation with given paramaters.
* @param params Booster params.
* @param data Data to be trained.
* @param round Number of boosting iterations.
* @param nfold Number of folds in CV.
* @param metrics Evaluation metrics to be watched in CV.
* @param obj customized objective (set to null if not used)
* @param eval customized evaluation (set to null if not used)
* @return evaluation history
*/
public static String[] crossValiation(Iterable<Entry<String, Object>> params, DMatrix data, int round, int nfold, String[] metrics, IObjective obj, IEvaluation eval) throws XGBoostError {
CVPack[] cvPacks = makeNFold(data, nfold, params, metrics);
String[] evalHist = new String[round];
String[] results = new String[cvPacks.length];
for(int i=0; i<round; i++) {
for(CVPack cvPack : cvPacks) {
if(obj != null) {
cvPack.update(i, obj);
}
else {
cvPack.update(i);
}
}
for(int j=0; j<cvPacks.length; j++) {
if(eval != null) {
results[j] = cvPacks[j].eval(i, eval);
}
else {
results[j] = cvPacks[j].eval(i);
}
}
evalHist[i] = aggCVResults(results);
logger.info(evalHist[i]);
}
return evalHist;
}
/**
* make an n-fold array of CVPack from random indices
* @param data original data
* @param nfold num of folds
* @param params booster parameters
* @param evalMetrics Evaluation metrics
* @return CV package array
*/
public static CVPack[] makeNFold(DMatrix data, int nfold, Iterable<Entry<String, Object>> params, String[] evalMetrics) throws XGBoostError {
List<Integer> samples = genRandPermutationNums(0, (int) data.rowNum());
int step = samples.size()/nfold;
int[] testSlice = new int[step];
int[] trainSlice = new int[samples.size()-step];
int testid, trainid;
CVPack[] cvPacks = new CVPack[nfold];
for(int i=0; i<nfold; i++) {
testid = 0;
trainid = 0;
for(int j=0; j<samples.size(); j++) {
if(j>(i*step) && j<(i*step+step) && testid<step) {
testSlice[testid] = samples.get(j);
testid++;
}
else{
if(trainid<samples.size()-step) {
trainSlice[trainid] = samples.get(j);
trainid++;
}
else {
testSlice[testid] = samples.get(j);
testid++;
}
}
}
DMatrix dtrain = data.slice(trainSlice);
DMatrix dtest = data.slice(testSlice);
CVPack cvPack = new CVPack(dtrain, dtest, params);
//set eval types
if(evalMetrics!=null) {
for(String type : evalMetrics) {
cvPack.booster.setParam("eval_metric", type);
}
}
cvPacks[i] = cvPack;
}
return cvPacks;
}
private static List<Integer> genRandPermutationNums(int start, int end) {
List<Integer> samples = new ArrayList<>();
for(int i=start; i<end; i++) {
samples.add(i);
}
Collections.shuffle(samples);
return samples;
}
/**
* Aggregate cross-validation results.
* @param results eval info from each data sample
* @return cross-validation eval info
*/
public static String aggCVResults(String[] results) {
Map<String, List<Float> > cvMap = new HashMap<>();
String aggResult = results[0].split("\t")[0];
for(String result : results) {
String[] items = result.split("\t");
for(int i=1; i<items.length; i++) {
String[] tup = items[i].split(":");
String key = tup[0];
Float value = Float.valueOf(tup[1]);
if(!cvMap.containsKey(key)) {
cvMap.put(key, new ArrayList<Float>());
}
cvMap.get(key).add(value);
}
}
for(String key : cvMap.keySet()) {
float value = 0f;
for(Float tvalue : cvMap.get(key)) {
value += tvalue;
}
value /= cvMap.get(key).size();
aggResult += String.format("\tcv-%s:%f", key, value);
}
return aggResult;
}
}

View File

@ -0,0 +1,26 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
/**
* custom error class for xgboost
* @author hzx
*/
public class XGBoostError extends Exception{
public XGBoostError(String message) {
super(message);
}
}

View File

@ -0,0 +1,50 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.wrapper;
/**
* xgboost jni wrapper functions for xgboost_wrapper.h
* change 2015-7-6: *use a long[] (length=1) as container of handle to get the output DMatrix or Booster
* @author hzx
*/
public class XgboostJNI {
public final static native String XGBGetLastError();
public final static native int XGDMatrixCreateFromFile(String fname, int silent, long[] out);
public final static native int XGDMatrixCreateFromCSR(long[] indptr, int[] indices, float[] data, long[] out);
public final static native int XGDMatrixCreateFromCSC(long[] colptr, int[] indices, float[] data, long[] out);
public final static native int XGDMatrixCreateFromMat(float[] data, int nrow, int ncol, float missing, long[] out);
public final static native int XGDMatrixSliceDMatrix(long handle, int[] idxset, long[] out);
public final static native int XGDMatrixFree(long handle);
public final static native int XGDMatrixSaveBinary(long handle, String fname, int silent);
public final static native int XGDMatrixSetFloatInfo(long handle, String field, float[] array);
public final static native int XGDMatrixSetUIntInfo(long handle, String field, int[] array);
public final static native int XGDMatrixSetGroup(long handle, int[] group);
public final static native int XGDMatrixGetFloatInfo(long handle, String field, float[][] info);
public final static native int XGDMatrixGetUIntInfo(long handle, String filed, int[][] info);
public final static native int XGDMatrixNumRow(long handle, long[] row);
public final static native int XGBoosterCreate(long[] handles, long[] out);
public final static native int XGBoosterFree(long handle);
public final static native int XGBoosterSetParam(long handle, String name, String value);
public final static native int XGBoosterUpdateOneIter(long handle, int iter, long dtrain);
public final static native int XGBoosterBoostOneIter(long handle, long dtrain, float[] grad, float[] hess);
public final static native int XGBoosterEvalOneIter(long handle, int iter, long[] dmats, String[] evnames, String[] eval_info);
public final static native int XGBoosterPredict(long handle, long dmat, int option_mask, long ntree_limit, float[][] predicts);
public final static native int XGBoosterLoadModel(long handle, String fname);
public final static native int XGBoosterSaveModel(long handle, String fname);
public final static native int XGBoosterLoadModelFromBuffer(long handle, long buf, long len);
public final static native int XGBoosterGetModelRaw(long handle, String[] out_string);
public final static native int XGBoosterDumpModel(long handle, String fmap, int with_stats, String[][] out_strings);
}

View File

@ -0,0 +1,108 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
import org.junit.Test;
/**
* test cases for Booster
* @author hzx
*/
public class BoosterTest {
public static class EvalError implements IEvaluation {
private static final Log logger = LogFactory.getLog(EvalError.class);
String evalMetric = "custom_error";
public EvalError() {
}
@Override
public String getMetric() {
return evalMetric;
}
@Override
public float eval(float[][] predicts, DMatrix dmat) {
float error = 0f;
float[] labels;
try {
labels = dmat.getLabel();
} catch (XGBoostError ex) {
logger.error(ex);
return -1f;
}
int nrow = predicts.length;
for(int i=0; i<nrow; i++) {
if(labels[i]==0f && predicts[i][0]>0) {
error++;
}
else if(labels[i]==1f && predicts[i][0]<=0) {
error++;
}
}
return error/labels.length;
}
}
@Test
public void testBoosterBasic() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//set params
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
Iterable<Entry<String, Object>> param = paramMap.entrySet();
//set watchList
List<Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//set round
int round = 2;
//train a boost model
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
//predict raw output
float[][] predicts = booster.predict(testMat, true);
//eval
IEvaluation eval = new EvalError();
//error must be less than 0.1
TestCase.assertTrue(eval.eval(predicts, testMat)<0.1f);
}
}

View File

@ -0,0 +1,102 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.util.Arrays;
import java.util.Random;
import junit.framework.TestCase;
import org.dmlc.xgboost4j.util.XGBoostError;
import org.junit.Test;
/**
* test cases for DMatrix
* @author hzx
*/
public class DMatrixTest {
@Test
public void testCreateFromFile() throws XGBoostError {
//create DMatrix from file
DMatrix dmat = new DMatrix("../../demo/data/agaricus.txt.test");
//get label
float[] labels = dmat.getLabel();
//check length
TestCase.assertTrue(dmat.rowNum()==labels.length);
//set weights
float[] weights = Arrays.copyOf(labels, labels.length);
dmat.setWeight(weights);
float[] dweights = dmat.getWeight();
TestCase.assertTrue(Arrays.equals(weights, dweights));
}
@Test
public void testCreateFromCSR() throws XGBoostError {
//create Matrix from csr format sparse Matrix and labels
/**
* sparse matrix
* 1 0 2 3 0
* 4 0 2 3 5
* 3 1 2 5 0
*/
float[] data = new float[] {1, 2, 3, 4, 2, 3, 5, 3, 1, 2, 5};
int[] colIndex = new int[] {0, 2, 3, 0, 2, 3, 4, 0, 1, 2, 3};
long[] rowHeaders = new long[] {0, 3, 7, 11};
DMatrix dmat1 = new DMatrix(rowHeaders, colIndex, data, DMatrix.SparseType.CSR);
//check row num
System.out.println(dmat1.rowNum());
TestCase.assertTrue(dmat1.rowNum()==3);
//test set label
float[] label1 = new float[] {1, 0, 1};
dmat1.setLabel(label1);
float[] label2 = dmat1.getLabel();
TestCase.assertTrue(Arrays.equals(label1, label2));
}
@Test
public void testCreateFromDenseMatrix() throws XGBoostError {
//create DMatrix from 10*5 dense matrix
int nrow = 10;
int ncol = 5;
float[] data0 = new float[nrow*ncol];
//put random nums
Random random = new Random();
for(int i=0; i<nrow*ncol; i++) {
data0[i] = random.nextFloat();
}
//create label
float[] label0 = new float[nrow];
for(int i=0; i<nrow; i++) {
label0[i] = random.nextFloat();
}
DMatrix dmat0 = new DMatrix(data0, nrow, ncol);
dmat0.setLabel(label0);
//check
TestCase.assertTrue(dmat0.rowNum()==10);
TestCase.assertTrue(dmat0.getLabel().length==10);
//set weights for each instance
float[] weights = new float[nrow];
for(int i=0; i<nrow; i++) {
weights[i] = random.nextFloat();
}
dmat0.setWeight(weights);
TestCase.assertTrue(Arrays.equals(weights, dmat0.getWeight()));
}
}

680
java/xgboost4j_wrapper.cpp Normal file
View File

@ -0,0 +1,680 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <jni.h>
#include "../wrapper/xgboost_wrapper.h"
#include "xgboost4j_wrapper.h"
JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBGetLastError
(JNIEnv *jenv, jclass jcls) {
jstring jresult = 0 ;
char* result = 0;
result = (char *)XGBGetLastError();
if (result) jresult = jenv->NewStringUTF((const char *)result);
return jresult;
}
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile
(JNIEnv *jenv, jclass jcls, jstring jfname, jint jsilent, jlongArray jout) {
jint jresult = 0 ;
char *fname = (char *) 0 ;
int silent;
void* result[1];
unsigned long out[1];
fname = (char *)jenv->GetStringUTFChars(jfname, 0);
silent = (int)jsilent;
jresult = (jint) XGDMatrixCreateFromFile((char const *)fname, silent, result);
*(void **)&out[0] = *result;
if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname);
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromCSR
* Signature: ([J[J[F)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jlongArray jout) {
jint jresult = 0 ;
bst_ulong nindptr ;
bst_ulong nelem;
void *result[1];
unsigned long out[1];
jlong* indptr = jenv->GetLongArrayElements(jindptr, 0);
jint* indices = jenv->GetIntArrayElements(jindices, 0);
jfloat* data = jenv->GetFloatArrayElements(jdata, 0);
nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
nelem = (bst_ulong)jenv->GetArrayLength(jdata);
jresult = (jint) XGDMatrixCreateFromCSR((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem, result);
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
//release
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
jenv->ReleaseIntArrayElements(jindices, indices, 0);
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromCSC
* Signature: ([J[J[F)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jlongArray jout) {
jint jresult = 0;
bst_ulong nindptr ;
bst_ulong nelem;
void *result[1];
unsigned long out[1];
jlong* indptr = jenv->GetLongArrayElements(jindptr, NULL);
jint* indices = jenv->GetIntArrayElements(jindices, 0);
jfloat* data = jenv->GetFloatArrayElements(jdata, NULL);
nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
nelem = (bst_ulong)jenv->GetArrayLength(jdata);
jresult = (jint) XGDMatrixCreateFromCSC((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem, result);
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
//release
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
jenv->ReleaseIntArrayElements(jindices, indices, 0);
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromMat
* Signature: ([FIIF)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat
(JNIEnv *jenv, jclass jcls, jfloatArray jdata, jint jnrow, jint jncol, jfloat jmiss, jlongArray jout) {
jint jresult = 0 ;
bst_ulong nrow ;
bst_ulong ncol ;
float miss ;
void *result[1];
unsigned long out[1];
jfloat* data = jenv->GetFloatArrayElements(jdata, 0);
nrow = (bst_ulong)jnrow;
ncol = (bst_ulong)jncol;
miss = (float)jmiss;
jresult = (jint) XGDMatrixCreateFromMat((float const *)data, nrow, ncol, miss, result);
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
//release
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSliceDMatrix
* Signature: (J[I)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix
(JNIEnv *jenv, jclass jcls, jlong jhandle, jintArray jindexset, jlongArray jout) {
jint jresult = 0 ;
void *handle = (void *) 0 ;
bst_ulong len;
void *result[1];
unsigned long out[1];
jint* indexset = jenv->GetIntArrayElements(jindexset, 0);
handle = *(void **)&jhandle;
len = (bst_ulong)jenv->GetArrayLength(jindexset);
jresult = (jint) XGDMatrixSliceDMatrix(handle, (int const *)indexset, len, result);
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
//release
jenv->ReleaseIntArrayElements(jindexset, indexset, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixFree
* Signature: (J)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree
(JNIEnv *jenv, jclass jcls, jlong jhandle) {
jint jresult = 0;
void *handle = (void *) 0 ;
handle = *(void **)&jhandle;
jresult = (jint) XGDMatrixFree(handle);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSaveBinary
* Signature: (JLjava/lang/String;I)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname, jint jsilent) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *fname = (char *) 0 ;
int silent ;
handle = *(void **)&jhandle;
fname = 0;
fname = (char *)jenv->GetStringUTFChars(jfname, 0);
silent = (int)jsilent;
jresult = (jint) XGDMatrixSaveBinary(handle, (char const *)fname, silent);
if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetFloatInfo
* Signature: (JLjava/lang/String;[F)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jfloatArray jarray) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *field = (char *) 0 ;
bst_ulong len;
handle = *(void **)&jhandle;
field = (char *)jenv->GetStringUTFChars(jfield, 0);
jfloat* array = jenv->GetFloatArrayElements(jarray, NULL);
len = (bst_ulong)jenv->GetArrayLength(jarray);
jresult = (jint) XGDMatrixSetFloatInfo(handle, (char const *)field, (float const *)array, len);
//release
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
jenv->ReleaseFloatArrayElements(jarray, array, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetUIntInfo
* Signature: (JLjava/lang/String;[I)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jintArray jarray) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *field = (char *) 0 ;
bst_ulong len ;
handle = *(void **)&jhandle;
field = 0;
field = (char *)jenv->GetStringUTFChars(jfield, 0);
jint* array = jenv->GetIntArrayElements(jarray, NULL);
len = (bst_ulong)jenv->GetArrayLength(jarray);
jresult = (jint) XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len);
//release
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
jenv->ReleaseIntArrayElements(jarray, array, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetGroup
* Signature: (J[I)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup
(JNIEnv * jenv, jclass jcls, jlong jhandle, jintArray jarray) {
jint jresult = 0;
void *handle = (void *) 0 ;
bst_ulong len ;
handle = *(void **)&jhandle;
jint* array = jenv->GetIntArrayElements(jarray, NULL);
len = (bst_ulong)jenv->GetArrayLength(jarray);
jresult = (jint) XGDMatrixSetGroup(handle, (unsigned int const *)array, len);
//release
jenv->ReleaseIntArrayElements(jarray, array, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixGetFloatInfo
* Signature: (JLjava/lang/String;)[F
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *field = (char *) 0 ;
bst_ulong len[1];
*len = 0;
float *result[1];
handle = *(void **)&jhandle;
field = 0;
if (jfield) {
field = (char *)jenv->GetStringUTFChars(jfield, 0);
if (!field) return 0;
}
jresult = (jint) XGDMatrixGetFloatInfo(handle, (char const *)field, len, (const float **) result);
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
jsize jlen = (jsize)*len;
jfloatArray jarray = jenv->NewFloatArray(jlen);
jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) *result);
jenv->SetObjectArrayElement(jout, 0, (jobject) jarray);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixGetUIntInfo
* Signature: (JLjava/lang/String;)[I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *field = (char *) 0 ;
bst_ulong len[1];
*len = 0;
unsigned int *result[1];
handle = *(void **)&jhandle;
field = (char *)jenv->GetStringUTFChars(jfield, 0);
jresult = (jint) XGDMatrixGetUIntInfo(handle, (char const *)field, len, (const unsigned int **) result);
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
jsize jlen = (jsize)*len;
jintArray jarray = jenv->NewIntArray(jlen);
jenv->SetIntArrayRegion(jarray, 0, jlen, (jint *) *result);
jenv->SetObjectArrayElement(jout, 0, jarray);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixNumRow
* Signature: (J)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlongArray jout) {
jint jresult = 0 ;
void *handle = (void *) 0 ;
bst_ulong result[1];
handle = *(void **)&jhandle;
jresult = (jint) XGDMatrixNumRow(handle, result);
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) result);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterCreate
* Signature: ([J)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate
(JNIEnv *jenv, jclass jcls, jlongArray jhandles, jlongArray jout) {
jint jresult = 0;
void **handles = 0;
bst_ulong len = 0;
void *result[1];
jlong* cjhandles = 0;
unsigned long out[1];
if(jhandles) {
len = (bst_ulong)jenv->GetArrayLength(jhandles);
handles = new void*[len];
//put handle from jhandles to chandles
cjhandles = jenv->GetLongArrayElements(jhandles, 0);
for(bst_ulong i=0; i<len; i++) {
handles[i] = *(void **)&cjhandles[i];
}
}
jresult = (jint) XGBoosterCreate(handles, len, result);
//release
if(jhandles) {
delete[] handles;
jenv->ReleaseLongArrayElements(jhandles, cjhandles, 0);
}
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterFree
* Signature: (J)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree
(JNIEnv *jenv, jclass jcls, jlong jhandle) {
void *handle = (void *) 0 ;
handle = *(void **)&jhandle;
return (jint) XGBoosterFree(handle);
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterSetParam
* Signature: (JLjava/lang/String;Ljava/lang/String;)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jname, jstring jvalue) {
jint jresult = -1;
void *handle = (void *) 0 ;
char *name = (char *) 0 ;
char *value = (char *) 0 ;
handle = *(void **)&jhandle;
name = (char *)jenv->GetStringUTFChars(jname, 0);
value = (char *)jenv->GetStringUTFChars(jvalue, 0);
jresult = (jint) XGBoosterSetParam(handle, (char const *)name, (char const *)value);
if (name) jenv->ReleaseStringUTFChars(jname, (const char *)name);
if (value) jenv->ReleaseStringUTFChars(jvalue, (const char *)value);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterUpdateOneIter
* Signature: (JIJ)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter
(JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlong jdtrain) {
void *handle = (void *) 0 ;
int iter ;
void *dtrain = (void *) 0 ;
handle = *(void **)&jhandle;
iter = (int)jiter;
dtrain = *(void **)&jdtrain;
return (jint) XGBoosterUpdateOneIter(handle, iter, dtrain);
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterBoostOneIter
* Signature: (JJ[F[F)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdtrain, jfloatArray jgrad, jfloatArray jhess) {
jint jresult = 0;
void *handle = (void *) 0 ;
void *dtrain = (void *) 0 ;
bst_ulong len ;
handle = *(void **)&jhandle;
dtrain = *(void **)&jdtrain;
jfloat* grad = jenv->GetFloatArrayElements(jgrad, 0);
jfloat* hess = jenv->GetFloatArrayElements(jhess, 0);
len = (bst_ulong)jenv->GetArrayLength(jgrad);
jresult = (jint) XGBoosterBoostOneIter(handle, dtrain, grad, hess, len);
//release
jenv->ReleaseFloatArrayElements(jgrad, grad, 0);
jenv->ReleaseFloatArrayElements(jhess, hess, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterEvalOneIter
* Signature: (JI[J[Ljava/lang/String;)Ljava/lang/String;
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter
(JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlongArray jdmats, jobjectArray jevnames, jobjectArray jout) {
jint jresult = 0 ;
void *handle = (void *) 0 ;
int iter ;
void **dmats = 0;
char **evnames = 0;
bst_ulong len ;
char *result[1];
handle = *(void **)&jhandle;
iter = (int)jiter;
len = (bst_ulong)jenv->GetArrayLength(jdmats);
if(len > 0) {
dmats = new void*[len];
evnames = new char*[len];
}
//put handle from jhandles to chandles
jlong* cjdmats = jenv->GetLongArrayElements(jdmats, 0);
for(bst_ulong i=0; i<len; i++) {
dmats[i] = *(void **)&cjdmats[i];
}
//transfer jObjectArray to char**
for(bst_ulong i=0; i<len; i++) {
jstring jevname = (jstring)jenv->GetObjectArrayElement(jevnames, i);
evnames[i] = (char *)jenv->GetStringUTFChars(jevname, 0);
}
jresult = (jint) XGBoosterEvalOneIter(handle, iter, dmats, (char const *(*))evnames, len, (const char **) result);
if(len > 0) {
delete[] dmats;
//release string chars
for(bst_ulong i=0; i<len; i++) {
jstring jevname = (jstring)jenv->GetObjectArrayElement(jevnames, i);
jenv->ReleaseStringUTFChars(jevname, (const char*)evnames[i]);
}
delete[] evnames;
jenv->ReleaseLongArrayElements(jdmats, cjdmats, 0);
}
jstring jinfo = 0;
if (*result) jinfo = jenv->NewStringUTF((const char *) *result);
jenv->SetObjectArrayElement(jout, 0, jinfo);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterPredict
* Signature: (JJIJ)[F
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdmat, jint joption_mask, jlong jntree_limit, jobjectArray jout) {
jint jresult = 0;
void *handle = (void *) 0 ;
void *dmat = (void *) 0 ;
int option_mask ;
unsigned int ntree_limit ;
bst_ulong len[1];
*len = 0;
float *result[1];
handle = *(void **)&jhandle;
dmat = *(void **)&jdmat;
option_mask = (int)joption_mask;
ntree_limit = (unsigned int)jntree_limit;
jresult = (jint) XGBoosterPredict(handle, dmat, option_mask, ntree_limit, len, (const float **) result);
jsize jlen = (jsize)*len;
jfloatArray jarray = jenv->NewFloatArray(jlen);
jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) *result);
jenv->SetObjectArrayElement(jout, 0, jarray);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterLoadModel
* Signature: (JLjava/lang/String;)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *fname = (char *) 0 ;
handle = *(void **)&jhandle;
fname = (char *)jenv->GetStringUTFChars(jfname, 0);
jresult = (jint) XGBoosterLoadModel(handle,(char const *)fname);
if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterSaveModel
* Signature: (JLjava/lang/String;)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *fname = (char *) 0 ;
handle = *(void **)&jhandle;
fname = 0;
fname = (char *)jenv->GetStringUTFChars(jfname, 0);
jresult = (jint) XGBoosterSaveModel(handle, (char const *)fname);
if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterLoadModelFromBuffer
* Signature: (JJJ)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jbuf, jlong jlen) {
void *handle = (void *) 0 ;
void *buf = (void *) 0 ;
bst_ulong len ;
handle = *(void **)&jhandle;
buf = *(void **)&jbuf;
len = (bst_ulong)jlen;
return (jint) XGBoosterLoadModelFromBuffer(handle, (void const *)buf, len);
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterGetModelRaw
* Signature: (J)Ljava/lang/String;
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw
(JNIEnv * jenv, jclass jcls, jlong jhandle, jobjectArray jout) {
jint jresult = 0 ;
jstring jinfo = 0;
void *handle = (void *) 0 ;
bst_ulong len[1];
*len = 0;
char *result[1];
handle = *(void **)&jhandle;
jresult = (jint)XGBoosterGetModelRaw(handle, len, (const char **) result);
if (*result){
jinfo = jenv->NewStringUTF((const char *) *result);
jenv->SetObjectArrayElement(jout, 0, jinfo);
}
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterDumpModel
* Signature: (JLjava/lang/String;I)[Ljava/lang/String;
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfmap, jint jwith_stats, jobjectArray jout) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *fmap = (char *) 0 ;
int with_stats ;
bst_ulong len[1];
*len = 0;
char **result[1];
handle = *(void **)&jhandle;
fmap = 0;
if (jfmap) {
fmap = (char *)jenv->GetStringUTFChars(jfmap, 0);
if (!fmap) return 0;
}
with_stats = (int)jwith_stats;
jresult = (jint) XGBoosterDumpModel(handle, (const char *)fmap, with_stats, len, (const char ***) result);
jsize jlen = (jsize)*len;
jobjectArray jinfos = jenv->NewObjectArray(jlen, jenv->FindClass("java/lang/String"), jenv->NewStringUTF(""));
for(int i=0 ; i<jlen; i++) {
jenv->SetObjectArrayElement(jinfos, i, jenv->NewStringUTF((const char*) result[0][i]));
}
jenv->SetObjectArrayElement(jout, 0, jinfos);
if (fmap) jenv->ReleaseStringUTFChars(jfmap, (const char *)fmap);
return jresult;
}

221
java/xgboost4j_wrapper.h Normal file
View File

@ -0,0 +1,221 @@
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class org_dmlc_xgboost4j_wrapper_XgboostJNI */
#ifndef _Included_org_dmlc_xgboost4j_wrapper_XgboostJNI
#define _Included_org_dmlc_xgboost4j_wrapper_XgboostJNI
#ifdef __cplusplus
extern "C" {
#endif
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBGetLastError
* Signature: ()Ljava/lang/String;
*/
JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBGetLastError
(JNIEnv *, jclass);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromFile
* Signature: (Ljava/lang/String;I[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile
(JNIEnv *, jclass, jstring, jint, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromCSR
* Signature: ([J[I[F[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromCSC
* Signature: ([J[I[F[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromMat
* Signature: ([FIIF[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat
(JNIEnv *, jclass, jfloatArray, jint, jint, jfloat, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSliceDMatrix
* Signature: (J[I[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix
(JNIEnv *, jclass, jlong, jintArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixFree
* Signature: (J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree
(JNIEnv *, jclass, jlong);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSaveBinary
* Signature: (JLjava/lang/String;I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary
(JNIEnv *, jclass, jlong, jstring, jint);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetFloatInfo
* Signature: (JLjava/lang/String;[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo
(JNIEnv *, jclass, jlong, jstring, jfloatArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetUIntInfo
* Signature: (JLjava/lang/String;[I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo
(JNIEnv *, jclass, jlong, jstring, jintArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetGroup
* Signature: (J[I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup
(JNIEnv *, jclass, jlong, jintArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixGetFloatInfo
* Signature: (JLjava/lang/String;[[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixGetUIntInfo
* Signature: (JLjava/lang/String;[[I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixNumRow
* Signature: (J[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow
(JNIEnv *, jclass, jlong, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterCreate
* Signature: ([J[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate
(JNIEnv *, jclass, jlongArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterFree
* Signature: (J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree
(JNIEnv *, jclass, jlong);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterSetParam
* Signature: (JLjava/lang/String;Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam
(JNIEnv *, jclass, jlong, jstring, jstring);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterUpdateOneIter
* Signature: (JIJ)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter
(JNIEnv *, jclass, jlong, jint, jlong);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterBoostOneIter
* Signature: (JJ[F[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter
(JNIEnv *, jclass, jlong, jlong, jfloatArray, jfloatArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterEvalOneIter
* Signature: (JI[J[Ljava/lang/String;[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter
(JNIEnv *, jclass, jlong, jint, jlongArray, jobjectArray, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterPredict
* Signature: (JJIJ[[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict
(JNIEnv *, jclass, jlong, jlong, jint, jlong, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterLoadModel
* Signature: (JLjava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel
(JNIEnv *, jclass, jlong, jstring);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterSaveModel
* Signature: (JLjava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel
(JNIEnv *, jclass, jlong, jstring);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterLoadModelFromBuffer
* Signature: (JJJ)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer
(JNIEnv *, jclass, jlong, jlong, jlong);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterGetModelRaw
* Signature: (J[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw
(JNIEnv *, jclass, jlong, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterDumpModel
* Signature: (JLjava/lang/String;I[[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel
(JNIEnv *, jclass, jlong, jstring, jint, jobjectArray);
#ifdef __cplusplus
}
#endif
#endif

14
scripts/travis_R_script.sh Executable file
View File

@ -0,0 +1,14 @@
#!/bin/bash
# Test R package of xgboost
set -e
export _R_CHECK_TIMINGS_=0
export R_BUILD_ARGS="--no-build-vignettes --no-manual"
export R_CHECK_ARGS="--no-vignettes --no-manual"
curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
chmod 755 ./travis-tool.sh
./travis-tool.sh bootstrap
make Rpack
cd ./xgboost
../travis-tool.sh install_deps
../travis-tool.sh run_tests

View File

@ -0,0 +1,5 @@
#!/bin/bash
if [ ${TASK} == "R-package" ]; then
cat R-package/xgboost.Rcheck/*.log
fi

7
scripts/travis_java_script.sh Executable file
View File

@ -0,0 +1,7 @@
# Test java package of xgboost
set -e
cd java
./create_wrap.sh
cd xgboost4j
mvn clean install -DskipTests=true
mvn test

33
scripts/travis_script.sh Executable file
View File

@ -0,0 +1,33 @@
#!/bin/bash
# main script of travis
if [ ${TASK} == "lint" ]; then
make lint || exit -1
fi
if [ ${TASK} == "build" ]; then
make all CXX=${CXX} || exit -1
fi
if [ ${TASK} == "build-with-dmlc" ]; then
cd dmlc-core
cp make/config.mk .
echo "USE_S3=1" >> config.mk
make all CXX=${CXX}|| exit -1
cd ..
make dmlc=dmlc-core CXX=${CXX} || exit -1
fi
if [ ${TASK} == "R-package" ]; then
scripts/travis_R_script.sh || exit -1
fi
if [ ${TASK} == "python-package" ]; then
make all CXX=${CXX} || exit -1
nosetests tests/python || exit -1
fi
if [ ${TASK} == "java-package" ]; then
make java CXX=${CXX} || exit -1
scripts/travis_java_script.sh || exit -1
fi

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_DATA_H
#define XGBOOST_DATA_H
/*!
* Copyright (c) 2014 by Contributors
* \file data.h
* \brief the input data structure for gradient boosting
* \author Tianqi Chen
*/
#ifndef XGBOOST_DATA_H_
#define XGBOOST_DATA_H_
#include <cstdio>
#include <vector>
#include "utils/utils.h"
@ -32,7 +34,7 @@ struct bst_gpair {
bst_gpair(bst_float grad, bst_float hess) : grad(grad), hess(hess) {}
};
/*!
/*!
* \brief extra information that might needed by gbm and tree module
* these information are not necessarily presented, and can be empty
*/
@ -102,7 +104,7 @@ struct RowBatch : public SparseBatch {
return Inst(data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i+1] - ind_ptr[i]));
}
};
/*!
/*!
* \brief read-only column batch, used to access columns,
* the columns are not required to be continuous
*/
@ -131,7 +133,7 @@ class IFMatrix {
/*!\brief get column iterator */
virtual utils::IIterator<ColBatch> *ColIterator(void) = 0;
/*!
* \brief get the column iterator associated with FMatrix with subset of column features
* \brief get the column iterator associated with FMatrix with subset of column features
* \param fset is the list of column index set that must be contained in the returning Column iterator
* \return the column iterator, initialized so that it reads the elements in fset
*/
@ -154,11 +156,11 @@ class IFMatrix {
/*! \brief get number of non-missing entries in column */
virtual size_t GetColSize(size_t cidx) const = 0;
/*! \brief get column density */
virtual float GetColDensity(size_t cidx) const = 0;
virtual float GetColDensity(size_t cidx) const = 0;
/*! \brief reference of buffered rowset */
virtual const std::vector<bst_uint> &buffered_rowset(void) const = 0;
// virtual destructor
virtual ~IFMatrix(void){}
};
} // namespace xgboost
#endif // XGBOOST_DATA_H
#endif // XGBOOST_DATA_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_
#define XGBOOST_GBM_GBLINEAR_INL_HPP_
/*!
* Copyright by Contributors
* \file gblinear-inl.hpp
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
* the update rule is parallel coordinate descent (shotgun)
* \author Tianqi Chen
*/
#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_
#define XGBOOST_GBM_GBLINEAR_INL_HPP_
#include <vector>
#include <string>
#include <sstream>
@ -33,10 +35,10 @@ class GBLinear : public IGradBooster {
model.param.SetParam(name, val);
}
}
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) {
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*)
model.LoadModel(fi);
}
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
model.SaveModel(fo);
}
virtual void InitModel(void) {
@ -92,7 +94,8 @@ class GBLinear : public IGradBooster {
sum_hess += p.hess * v * v;
}
float &w = model[fid][gid];
bst_float dw = static_cast<bst_float>(param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w));
bst_float dw = static_cast<bst_float>(param.learning_rate *
param.CalcDelta(sum_grad, sum_hess, w));
w += dw;
// update grad value
for (bst_uint j = 0; j < col.length; ++j) {
@ -258,12 +261,12 @@ class GBLinear : public IGradBooster {
std::fill(weight.begin(), weight.end(), 0.0f);
}
// save the model to file
inline void SaveModel(utils::IStream &fo) const {
inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
fo.Write(&param, sizeof(Param));
fo.Write(weight);
}
// load model from file
inline void LoadModel(utils::IStream &fi) {
inline void LoadModel(utils::IStream &fi) { // NOLINT(*)
utils::Assert(fi.Read(&param, sizeof(Param)) != 0, "Load LinearBooster");
fi.Read(&weight);
}

View File

@ -1,3 +1,4 @@
// Copyright by Contributors
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX

View File

@ -1,11 +1,14 @@
#ifndef XGBOOST_GBM_GBM_H_
#define XGBOOST_GBM_GBM_H_
/*!
* Copyright by Contributors
* \file gbm.h
* \brief interface of gradient booster, that learns through gradient statistics
* \author Tianqi Chen
*/
#ifndef XGBOOST_GBM_GBM_H_
#define XGBOOST_GBM_GBM_H_
#include <vector>
#include <string>
#include "../data.h"
#include "../utils/io.h"
#include "../utils/fmap.h"
@ -13,7 +16,7 @@
namespace xgboost {
/*! \brief namespace for gradient booster */
namespace gbm {
/*!
/*!
* \brief interface of gradient boosting model
*/
class IGradBooster {
@ -29,26 +32,26 @@ class IGradBooster {
* \param fi input stream
* \param with_pbuffer whether the incoming data contains pbuffer
*/
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0;
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0; // NOLINT(*)
/*!
* \brief save model to stream
* \param fo output stream
* \param with_pbuffer whether save out pbuffer
*/
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0;
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0; // NOLINT(*)
/*!
* \brief initialize the model
*/
virtual void InitModel(void) = 0;
/*!
/*!
* \brief reset the predict buffer
* this will invalidate all the previous cached results
* and recalculate from scratch
*/
virtual void ResetPredBuffer(size_t num_pbuffer) {}
/*!
/*!
* \brief whether the model allow lazy checkpoint
* return true if model is only updated in DoBoost
* return true if model is only updated in DoBoost
* after all Allreduce calls
*/
virtual bool AllowLazyCheckPoint(void) const {
@ -76,20 +79,20 @@ class IGradBooster {
* the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
* \param info extra side information that may be needed for prediction
* \param out_preds output vector to hold the predictions
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
*/
virtual void Predict(IFMatrix *p_fmat,
int64_t buffer_offset,
const BoosterInfo &info,
std::vector<float> *out_preds,
unsigned ntree_limit = 0) = 0;
unsigned ntree_limit = 0) = 0;
/*!
* \brief online prediction funciton, predict score for one instance at a time
* NOTE: use the batch prediction interface if possible, batch prediction is usually
* more efficient than online prediction
* This function is NOT threadsafe, make sure you only call from one thread
*
*
* \param inst the instance you want to predict
* \param out_preds output vector to hold the predictions
* \param ntree_limit limit the number of trees used in prediction
@ -106,7 +109,7 @@ class IGradBooster {
* \param p_fmat feature matrix
* \param info extra side information that may be needed for prediction
* \param out_preds output vector to hold the predictions
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
*/
virtual void PredictLeaf(IFMatrix *p_fmat,

View File

@ -1,13 +1,16 @@
#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
#define XGBOOST_GBM_GBTREE_INL_HPP_
/*!
* Copyright by Contributors
* \file gbtree-inl.hpp
* \brief gradient boosted tree implementation
* \author Tianqi Chen
*/
#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
#define XGBOOST_GBM_GBTREE_INL_HPP_
#include <vector>
#include <utility>
#include <string>
#include <limits>
#include "./gbm.h"
#include "../utils/omp.h"
#include "../tree/updater.h"
@ -39,7 +42,7 @@ class GBTree : public IGradBooster {
tparam.SetParam(name, val);
if (trees.size() == 0) mparam.SetParam(name, val);
}
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) {
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*)
this->Clear();
utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
"GBTree: invalid model file");
@ -62,10 +65,10 @@ class GBTree : public IGradBooster {
"GBTree: invalid model file");
}
}
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
utils::Assert(mparam.num_trees == static_cast<int>(trees.size()), "GBTree");
if (with_pbuffer) {
fo.Write(&mparam, sizeof(ModelParam));
fo.Write(&mparam, sizeof(ModelParam));
} else {
ModelParam p = mparam;
p.num_pbuffer = 0;
@ -129,7 +132,7 @@ class GBTree : public IGradBooster {
int64_t buffer_offset,
const BoosterInfo &info,
std::vector<float> *out_preds,
unsigned ntree_limit = 0) {
unsigned ntree_limit = 0) {
int nthread;
#pragma omp parallel
{
@ -160,12 +163,12 @@ class GBTree : public IGradBooster {
this->Pred(batch[i],
buffer_offset < 0 ? -1 : buffer_offset + ridx,
gid, info.GetRoot(ridx), &feats,
&preds[ridx * mparam.num_output_group + gid], stride,
&preds[ridx * mparam.num_output_group + gid], stride,
ntree_limit);
}
}
}
}
}
virtual void Predict(const SparseBatch::Inst &inst,
std::vector<float> *out_preds,
unsigned ntree_limit,
@ -178,10 +181,10 @@ class GBTree : public IGradBooster {
// loop over output groups
for (int gid = 0; gid < mparam.num_output_group; ++gid) {
this->Pred(inst, -1, gid, root_index, &thread_temp[0],
&(*out_preds)[gid], mparam.num_output_group,
&(*out_preds)[gid], mparam.num_output_group,
ntree_limit);
}
}
}
virtual void PredictLeaf(IFMatrix *p_fmat,
const BoosterInfo &info,
std::vector<float> *out_preds,
@ -196,7 +199,6 @@ class GBTree : public IGradBooster {
thread_temp[i].Init(mparam.num_feature);
}
this->PredPath(p_fmat, info, out_preds, ntree_limit);
}
virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
std::vector<std::string> dump;
@ -260,7 +262,7 @@ class GBTree : public IGradBooster {
// update the trees
for (size_t i = 0; i < updaters.size(); ++i) {
updaters[i]->Update(gpair, p_fmat, info, new_trees);
}
}
// optimization, update buffer, if possible
// this is only under distributed column mode
// for safety check of lazy checkpoint
@ -287,7 +289,7 @@ class GBTree : public IGradBooster {
}
// update buffer by pre-cached position
inline void UpdateBufferByPosition(IFMatrix *p_fmat,
int64_t buffer_offset,
int64_t buffer_offset,
int bst_group,
const tree::RegTree &new_tree,
const int* leaf_position) {
@ -313,11 +315,11 @@ class GBTree : public IGradBooster {
int bst_group,
unsigned root_index,
tree::RegTree::FVec *p_feats,
float *out_pred, size_t stride,
float *out_pred, size_t stride,
unsigned ntree_limit) {
size_t itop = 0;
float psum = 0.0f;
// sum of leaf vector
// sum of leaf vector
std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
// number of valid trees
@ -339,7 +341,7 @@ class GBTree : public IGradBooster {
for (int j = 0; j < mparam.size_leaf_vector; ++j) {
vec_psum[j] += trees[i]->leafvec(tid)[j];
}
if(--treeleft == 0) break;
if (--treeleft == 0) break;
}
}
p_feats->Drop(inst);
@ -365,7 +367,7 @@ class GBTree : public IGradBooster {
// number of valid trees
if (ntree_limit == 0 || ntree_limit > trees.size()) {
ntree_limit = static_cast<unsigned>(trees.size());
}
}
std::vector<float> &preds = *out_preds;
preds.resize(info.num_row * ntree_limit);
// start collecting the prediction
@ -389,7 +391,7 @@ class GBTree : public IGradBooster {
}
}
}
// --- data structure ---
/*! \brief training parameters */
struct TrainParam {
@ -442,10 +444,10 @@ class GBTree : public IGradBooster {
int num_feature;
/*! \brief size of predicton buffer allocated used for buffering */
int64_t num_pbuffer;
/*!
/*!
* \brief how many output group a single instance can produce
* this affects the behavior of number of output we have:
* suppose we have n instance and k group, output will be k*n
* suppose we have n instance and k group, output will be k*n
*/
int num_output_group;
/*! \brief size of leaf vector needed in tree */
@ -478,8 +480,8 @@ class GBTree : public IGradBooster {
inline size_t PredBufferSize(void) const {
return num_output_group * num_pbuffer * (size_leaf_vector + 1);
}
/*!
* \brief get the buffer offset given a buffer index and group id
/*!
* \brief get the buffer offset given a buffer index and group id
* \return calculated buffer offset
*/
inline int64_t BufferOffset(int64_t buffer_index, int bst_group) const {

View File

@ -1,6 +1,8 @@
// Copyright by Contributors
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX
#include <string>
#include "../utils/io.h"
// implements a single no split version of DMLC
@ -9,7 +11,7 @@
namespace xgboost {
namespace utils {
/*!
* \brief line split implementation from single FILE
* \brief line split implementation from single FILE
* simply returns lines of files, used for stdin
*/
class SingleFileSplit : public dmlc::InputSplit {
@ -32,7 +34,7 @@ class SingleFileSplit : public dmlc::InputSplit {
}
virtual size_t Read(void *ptr, size_t size) {
return std::fread(ptr, 1, size, fp_);
}
}
virtual void Write(const void *ptr, size_t size) {
utils::Error("cannot do write in inputsplit");
}
@ -47,13 +49,13 @@ class SingleFileSplit : public dmlc::InputSplit {
chunk_end_);
out_rec->dptr = chunk_begin_;
out_rec->size = next - chunk_begin_;
chunk_begin_ = next;
chunk_begin_ = next;
return true;
}
virtual bool NextChunk(Blob *out_chunk) {
if (chunk_begin_ == chunk_end_) {
if (!LoadChunk()) return false;
}
}
out_chunk->dptr = chunk_begin_;
out_chunk->size = chunk_end_ - chunk_begin_;
chunk_begin_ = chunk_end_;
@ -64,8 +66,8 @@ class SingleFileSplit : public dmlc::InputSplit {
if (max_size <= overflow_.length()) {
*size = 0; return true;
}
if (overflow_.length() != 0) {
std::memcpy(buf, BeginPtr(overflow_), overflow_.length());
if (overflow_.length() != 0) {
std::memcpy(buf, BeginPtr(overflow_), overflow_.length());
}
size_t olen = overflow_.length();
overflow_.resize(0);
@ -88,13 +90,13 @@ class SingleFileSplit : public dmlc::InputSplit {
return true;
}
}
protected:
inline const char* FindLastRecordBegin(const char *begin,
const char *end) {
if (begin == end) return begin;
for (const char *p = end - 1; p != begin; --p) {
if (*p == '\n' || *p == '\r') return p + 1;
if (*p == '\n' || *p == '\r') return p + 1;
}
return begin;
}
@ -143,7 +145,7 @@ class StdFile : public dmlc::Stream {
public:
explicit StdFile(std::FILE *fp, bool use_stdio)
: fp(fp), use_stdio(use_stdio) {
}
}
virtual ~StdFile(void) {
this->Close();
}
@ -154,7 +156,7 @@ class StdFile : public dmlc::Stream {
std::fwrite(ptr, size, 1, fp);
}
virtual void Seek(size_t pos) {
std::fseek(fp, static_cast<long>(pos), SEEK_SET);
std::fseek(fp, static_cast<long>(pos), SEEK_SET); // NOLINT(*)
}
virtual size_t Tell(void) {
return std::ftell(fp);
@ -197,7 +199,7 @@ Stream *Stream::Create(const char *fname, const char * const mode, bool allow_nu
"to use hdfs, s3 or distributed version, compile with make dmlc=1";
utils::Check(strncmp(fname, "s3://", 5) != 0, msg);
utils::Check(strncmp(fname, "hdfs://", 7) != 0, msg);
std::FILE *fp = NULL;
bool use_stdio = false;
using namespace std;

View File

@ -1,3 +1,4 @@
// Copyright 2014 by Contributors
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX
@ -17,7 +18,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
const char *cache_file) {
using namespace std;
std::string fname_ = fname;
const char *dlm = strchr(fname, '#');
if (dlm != NULL) {
utils::Check(strchr(dlm + 1, '#') == NULL,
@ -29,7 +30,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
cache_file = dlm +1;
}
if (cache_file == NULL) {
if (cache_file == NULL) {
if (!std::strcmp(fname, "stdin") ||
!std::strncmp(fname, "s3://", 5) ||
!std::strncmp(fname, "hdfs://", 7) ||
@ -42,7 +43,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
utils::FileStream fs(utils::FopenCheck(fname, "rb"));
utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
fs.Seek(0);
if (magic == DMatrixSimple::kMagic) {
if (magic == DMatrixSimple::kMagic) {
DMatrixSimple *dmat = new DMatrixSimple();
dmat->LoadBinary(fs, silent, fname);
fs.Close();
@ -81,7 +82,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
}
}
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
if (dmat.magic == DMatrixSimple::kMagic) {
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
p_dmat->SaveBinary(fname, silent);

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_IO_IO_H_
#define XGBOOST_IO_IO_H_
/*!
* Copyright 2014 by Contributors
* \file io.h
* \brief handles input data format of xgboost
* I/O module handles a specific DMatrix format
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_IO_H_
#define XGBOOST_IO_IO_H_
#include "../data.h"
#include "../learner/dmatrix.h"
@ -32,7 +34,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
bool loadsplit,
const char *cache_file = NULL);
/*!
* \brief save DataMatrix into stream,
* \brief save DataMatrix into stream,
* note: the saved dmatrix format may not be in exactly same as input
* SaveDMatrix will choose the best way to materialize the dmatrix.
* \param dmat the dmatrix to be saved
@ -40,7 +42,6 @@ DataMatrix* LoadDataMatrix(const char *fname,
* \param silent whether print message during saving
*/
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false);
} // namespace io
} // namespace xgboost
#endif // XGBOOST_IO_IO_H_

View File

@ -22,7 +22,7 @@ namespace io {
/*! \brief page returned by libsvm parser */
struct LibSVMPage : public SparsePage {
std::vector<float> label;
// overload clear
// overload clear
inline void Clear() {
SparsePage::Clear();
label.clear();
@ -35,7 +35,7 @@ struct LibSVMPage : public SparsePage {
*/
class LibSVMPageFactory {
public:
explicit LibSVMPageFactory()
LibSVMPageFactory()
: bytes_read_(0), at_head_(true) {
}
inline bool Init(void) {
@ -85,7 +85,7 @@ class LibSVMPageFactory {
data->resize(nthread);
bytes_read_ += chunk.size;
utils::Assert(chunk.size != 0, "LibSVMParser.FileData");
char *head = reinterpret_cast<char*>(chunk.dptr);
char *head = reinterpret_cast<char*>(chunk.dptr);
#pragma omp parallel num_threads(nthread_)
{
// threadid
@ -150,7 +150,7 @@ class LibSVMPageFactory {
}
return begin;
}
private:
// nthread
int nthread_;
@ -199,12 +199,13 @@ class LibSVMParser : public utils::IIterator<LibSVMPage> {
inline size_t bytes_read(void) const {
return itr.get_factory().bytes_read();
}
private:
bool at_end_;
size_t data_ptr_;
std::vector<LibSVMPage> *data_;
utils::ThreadBuffer<std::vector<LibSVMPage>*, LibSVMPageFactory> itr;
};
};
} // namespace io
} // namespace xgboost

View File

@ -1,11 +1,15 @@
#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
/*!
* Copyright (c) 2014 by Contributors
* \file page_dmatrix-inl.hpp
* row iterator based on sparse page
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
#include <vector>
#include <string>
#include <algorithm>
#include "../data.h"
#include "../utils/iterator.h"
#include "../utils/thread_buffer.h"
@ -94,12 +98,12 @@ class DMatrixPageBase : public DataMatrix {
fbin.Close();
if (!silent) {
utils::Printf("DMatrixPage: %lux%lu is saved to %s\n",
static_cast<unsigned long>(mat.info.num_row()),
static_cast<unsigned long>(mat.info.num_col()), fname_);
static_cast<unsigned long>(mat.info.num_row()), // NOLINT(*)
static_cast<unsigned long>(mat.info.num_col()), fname_); // NOLINT(*)
}
}
/*! \brief load and initialize the iterator with fi */
inline void LoadBinary(utils::FileStream &fi,
inline void LoadBinary(utils::FileStream &fi, // NOLINT(*)
bool silent,
const char *fname_) {
this->set_cache_file(fname_);
@ -114,8 +118,8 @@ class DMatrixPageBase : public DataMatrix {
iter_->Load(fs);
if (!silent) {
utils::Printf("DMatrixPage: %lux%lu matrix is loaded",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()));
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col())); // NOLINT(*)
if (fname_ != NULL) {
utils::Printf(" from %s\n", fname_);
} else {
@ -141,7 +145,7 @@ class DMatrixPageBase : public DataMatrix {
}
this->set_cache_file(cache_file);
std::string fname_row = std::string(cache_file) + ".row.blob";
utils::FileStream fo(utils::FopenCheck(fname_row.c_str(), "wb"));
utils::FileStream fo(utils::FopenCheck(fname_row.c_str(), "wb"));
SparsePage page;
size_t bytes_write = 0;
double tstart = rabit::utils::GetTime();
@ -178,8 +182,8 @@ class DMatrixPageBase : public DataMatrix {
if (page.data.size() != 0) {
page.Save(&fo);
}
fo.Close();
iter_->Load(utils::FileStream(utils::FopenCheck(fname_row.c_str(), "rb")));
fo.Close();
iter_->Load(utils::FileStream(utils::FopenCheck(fname_row.c_str(), "rb")));
// save data matrix
utils::FileStream fs(utils::FopenCheck(cache_file, "wb"));
int tmagic = kMagic;
@ -188,8 +192,8 @@ class DMatrixPageBase : public DataMatrix {
fs.Close();
if (!silent) {
utils::Printf("DMatrixPage: %lux%lu is parsed from %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
uri);
}
}
@ -241,12 +245,12 @@ class DMatrixHalfRAM : public DMatrixPageBase<0xffffab03> {
virtual IFMatrix *fmat(void) const {
return fmat_;
}
virtual void set_cache_file(const std::string &cache_file) {
virtual void set_cache_file(const std::string &cache_file) {
}
virtual void CheckMagic(int tmagic) {
utils::Check(tmagic == DMatrixPageBase<0xffffab02>::kMagic ||
tmagic == DMatrixPageBase<0xffffab03>::kMagic,
"invalid format,magic number mismatch");
"invalid format,magic number mismatch");
}
/*! \brief the real fmatrix */
IFMatrix *fmat_;

View File

@ -1,10 +1,16 @@
#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
/*!
* Copyright (c) 2014 by Contributors
* \file page_fmatrix-inl.hpp
* col iterator based on sparse page
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
#include <vector>
#include <string>
#include <algorithm>
namespace xgboost {
namespace io {
/*! \brief thread buffer iterator */
@ -42,9 +48,9 @@ class ThreadColPageIterator: public utils::IIterator<ColBatch> {
}
// set index set
inline void SetIndexSet(const std::vector<bst_uint> &fset, bool load_all) {
itr.get_factory().SetIndexSet(fset, load_all);
itr.get_factory().SetIndexSet(fset, load_all);
}
private:
// output data
ColBatch out_;
@ -96,7 +102,7 @@ struct ColConvertFactory {
return true;
}
}
if (tmp_.Size() != 0){
if (tmp_.Size() != 0) {
this->MakeColPage(tmp_, BeginPtr(*buffered_rowset_) + btop,
*enabled_, val);
return true;
@ -104,7 +110,7 @@ struct ColConvertFactory {
return false;
}
}
inline void Destroy(void) {}
inline void Destroy(void) {}
inline void BeforeFirst(void) {}
inline void MakeColPage(const SparsePage &prow,
const bst_uint *ridx,
@ -115,7 +121,7 @@ struct ColConvertFactory {
#pragma omp parallel
{
nthread = omp_get_num_threads();
int max_nthread = std::max(omp_get_num_procs() / 2 - 4, 1);
int max_nthread = std::max(omp_get_num_procs() / 2 - 4, 1);
if (nthread > max_nthread) {
nthread = max_nthread;
}
@ -130,10 +136,10 @@ struct ColConvertFactory {
int tid = omp_get_thread_num();
for (size_t j = prow.offset[i]; j < prow.offset[i+1]; ++j) {
const SparseBatch::Entry &e = prow.data[j];
if (enabled[e.index]) {
if (enabled[e.index]) {
builder.AddBudget(e.index, tid);
}
}
}
}
builder.InitStorage();
#pragma omp parallel for schedule(static) num_threads(nthread)
@ -169,7 +175,7 @@ struct ColConvertFactory {
// buffered rowset
std::vector<bst_uint> *buffered_rowset_;
// enabled marks
const std::vector<bool> *enabled_;
const std::vector<bool> *enabled_;
// internal temp cache
SparsePage tmp_;
/*! \brief page size 256 M */
@ -191,7 +197,7 @@ class FMatrixPage : public IFMatrix {
if (iter_ != NULL) delete iter_;
}
/*! \return whether column access is enabled */
virtual bool HaveColAccess(void) const {
virtual bool HaveColAccess(void) const {
return col_size_.size() != 0;
}
/*! \brief get number of colmuns */
@ -212,7 +218,7 @@ class FMatrixPage : public IFMatrix {
size_t nmiss = num_buffered_row_ - (col_size_[cidx]);
return 1.0f - (static_cast<float>(nmiss)) / num_buffered_row_;
}
virtual void InitColAccess(const std::vector<bool> &enabled,
virtual void InitColAccess(const std::vector<bool> &enabled,
float pkeep, size_t max_row_perbatch) {
if (this->HaveColAccess()) return;
if (TryLoadColData()) return;
@ -242,11 +248,11 @@ class FMatrixPage : public IFMatrix {
/*!
* \brief colmun based iterator
*/
virtual utils::IIterator<ColBatch> *ColIterator(const std::vector<bst_uint> &fset) {
virtual utils::IIterator<ColBatch> *ColIterator(const std::vector<bst_uint> &fset) {
size_t ncol = this->NumCol();
col_index_.resize(0);
for (size_t i = 0; i < fset.size(); ++i) {
if (fset[i] < ncol) col_index_.push_back(fset[i]);
if (fset[i] < ncol) col_index_.push_back(fset[i]);
}
col_iter_.SetIndexSet(col_index_, false);
col_iter_.BeforeFirst();
@ -255,13 +261,13 @@ class FMatrixPage : public IFMatrix {
// set the cache file name
inline void set_cache_file(const std::string &cache_file) {
col_data_name_ = std::string(cache_file) + ".col.blob";
col_meta_name_ = std::string(cache_file) + ".col.meta";
col_meta_name_ = std::string(cache_file) + ".col.meta";
}
protected:
inline bool TryLoadColData(void) {
std::FILE *fi = fopen64(col_meta_name_.c_str(), "rb");
if (fi == NULL) return false;
if (fi == NULL) return false;
utils::FileStream fs(fi);
LoadMeta(&fs);
fs.Close();
@ -306,12 +312,12 @@ class FMatrixPage : public IFMatrix {
SparsePage *pcol;
while (citer.Next(pcol)) {
for (size_t i = 0; i < pcol->Size(); ++i) {
col_size_[i] += pcol->offset[i + 1] - pcol->offset[i];
col_size_[i] += pcol->offset[i + 1] - pcol->offset[i];
}
pcol->Save(&fo);
size_t spage = pcol->MemCostBytes();
bytes_write += spage;
double tnow = rabit::utils::GetTime();
double tnow = rabit::utils::GetTime();
double tdiff = tnow - tstart;
utils::Printf("Writting to %s in %g MB/s, %lu MB written current speed:%g MB/s\n",
col_data_name_.c_str(),

View File

@ -1,13 +1,15 @@
#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file simple_dmatrix-inl.hpp
* \brief simple implementation of DMatrixS that can be used
* \brief simple implementation of DMatrixS that can be used
* the data format of xgboost is templatized, which means it can accept
* any data structure that implements the function defined by FMatrix
* this file is a specific implementation of input data structure that can be used by BoostLearner
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
#include <string>
#include <cstring>
#include <vector>
@ -119,13 +121,13 @@ class DMatrixSimple : public DataMatrix {
for (size_t i = 0; i < batch.data.size(); ++i) {
info.info.num_col = std::max(info.info.num_col,
static_cast<size_t>(batch.data[i].index+1));
}
}
}
if (!silent) {
utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), uri);
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
static_cast<unsigned long>(row_data_.size()), uri); // NOLINT(*)
}
// try to load in additional file
if (!loadsplit) {
@ -141,7 +143,7 @@ class DMatrixSimple : public DataMatrix {
"DMatrix: weight data does not match the number of rows in features");
}
std::string mname = name + ".base_margin";
if (info.TryLoadFloatInfo("base_margin", mname.c_str(), silent)) {
if (info.TryLoadFloatInfo("base_margin", mname.c_str(), silent)) {
}
}
}
@ -165,10 +167,11 @@ class DMatrixSimple : public DataMatrix {
* \param silent whether print information during loading
* \param fname file name, used to print message
*/
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) { // NOLINT(*)
int tmagic;
utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch", fname == NULL ? "" : fname);
utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch",
fname == NULL ? "" : fname);
info.LoadBinary(fs);
LoadBinary(fs, &row_ptr_, &row_data_);
@ -176,9 +179,9 @@ class DMatrixSimple : public DataMatrix {
if (!silent) {
utils::Printf("%lux%lu matrix with %lu entries is loaded",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()));
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
static_cast<unsigned long>(row_data_.size())); // NOLINT(*)
if (fname != NULL) {
utils::Printf(" from %s\n", fname);
} else {
@ -205,9 +208,9 @@ class DMatrixSimple : public DataMatrix {
if (!silent) {
utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), fname);
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
static_cast<unsigned long>(row_data_.size()), fname); // NOLINT(*)
if (info.group_ptr.size() != 0) {
utils::Printf("data contains %u groups\n",
static_cast<unsigned>(info.group_ptr.size()-1));
@ -256,7 +259,7 @@ class DMatrixSimple : public DataMatrix {
* \param ptr pointer data
* \param data data content
*/
inline static void SaveBinary(utils::IStream &fo,
inline static void SaveBinary(utils::IStream &fo, // NOLINT(*)
const std::vector<size_t> &ptr,
const std::vector<RowBatch::Entry> &data) {
size_t nrow = ptr.size() - 1;
@ -272,7 +275,7 @@ class DMatrixSimple : public DataMatrix {
* \param out_ptr pointer data
* \param out_data data content
*/
inline static void LoadBinary(utils::IStream &fi,
inline static void LoadBinary(utils::IStream &fi, // NOLINT(*)
std::vector<size_t> *out_ptr,
std::vector<RowBatch::Entry> *out_data) {
size_t nrow;
@ -314,7 +317,7 @@ class DMatrixSimple : public DataMatrix {
DMatrixSimple *parent_;
// temporal space for batch
RowBatch batch_;
};
};
};
} // namespace io
} // namespace xgboost

View File

@ -1,11 +1,15 @@
#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file simple_fmatrix-inl.hpp
* \brief the input data structure for gradient boosting
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
#include <limits>
#include <algorithm>
#include <vector>
#include "../data.h"
#include "../utils/utils.h"
#include "../utils/random.h"
@ -30,7 +34,7 @@ class FMatrixS : public IFMatrix {
}
// destructor
virtual ~FMatrixS(void) {
if (iter_ != NULL) delete iter_;
if (iter_ != NULL) delete iter_;
}
/*! \return whether column access is enabled */
virtual bool HaveColAccess(void) const {
@ -39,7 +43,7 @@ class FMatrixS : public IFMatrix {
/*! \brief get number of colmuns */
virtual size_t NumCol(void) const {
utils::Check(this->HaveColAccess(), "NumCol:need column access");
return col_size_.size() - 1;
return col_size_.size();
}
/*! \brief get number of buffered rows */
virtual const std::vector<bst_uint> &buffered_rowset(void) const {
@ -54,7 +58,7 @@ class FMatrixS : public IFMatrix {
size_t nmiss = buffered_rowset_.size() - col_size_[cidx];
return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
}
virtual void InitColAccess(const std::vector<bool> &enabled,
virtual void InitColAccess(const std::vector<bool> &enabled,
float pkeep, size_t max_row_perbatch) {
if (this->HaveColAccess()) return;
this->InitColData(enabled, pkeep, max_row_perbatch);
@ -85,7 +89,7 @@ class FMatrixS : public IFMatrix {
size_t ncol = this->NumCol();
col_iter_.col_index_.resize(0);
for (size_t i = 0; i < fset.size(); ++i) {
if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]);
if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]);
}
col_iter_.BeforeFirst();
return &col_iter_;
@ -94,7 +98,7 @@ class FMatrixS : public IFMatrix {
* \brief save column access data into stream
* \param fo output stream to save to
*/
inline void SaveColAccess(utils::IStream &fo) const {
inline void SaveColAccess(utils::IStream &fo) const { // NOLINT(*)
size_t n = 0;
fo.Write(&n, sizeof(n));
}
@ -102,10 +106,10 @@ class FMatrixS : public IFMatrix {
* \brief load column access data from stream
* \param fo output stream to load from
*/
inline void LoadColAccess(utils::IStream &fi) {
inline void LoadColAccess(utils::IStream &fi) { // NOLINT(*)
// do nothing in load col access
}
protected:
/*!
* \brief intialize column data
@ -129,7 +133,7 @@ class FMatrixS : public IFMatrix {
for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) {
SparsePage *pcol = col_iter_.cpages_[i];
for (size_t j = 0; j < pcol->Size(); ++j) {
col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];
col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];
}
}
}
@ -139,7 +143,7 @@ class FMatrixS : public IFMatrix {
* \param pcol the target column
*/
inline void MakeOneBatch(const std::vector<bool> &enabled,
float pkeep,
float pkeep,
SparsePage *pcol) {
// clear rowset
buffered_rowset_.clear();
@ -153,14 +157,14 @@ class FMatrixS : public IFMatrix {
pcol->Clear();
utils::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
builder.InitBudget(0, nthread);
builder.InitBudget(info_.num_col(), nthread);
// start working
iter_->BeforeFirst();
while (iter_->Next()) {
const RowBatch &batch = iter_->Value();
bmap.resize(bmap.size() + batch.size, true);
long batch_size = static_cast<long>(batch.size);
for (long i = 0; i < batch_size; ++i) {
long batch_size = static_cast<long>(batch.size); // NOLINT(*)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
buffered_rowset_.push_back(ridx);
@ -169,13 +173,13 @@ class FMatrixS : public IFMatrix {
}
}
#pragma omp parallel for schedule(static)
for (long i = 0; i < batch_size; ++i) {
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) {
RowBatch::Inst inst = batch[i];
for (bst_uint j = 0; j < inst.length; ++j) {
if (enabled[inst[j].index]){
if (enabled[inst[j].index]) {
builder.AddBudget(inst[j].index, tid);
}
}
@ -183,18 +187,18 @@ class FMatrixS : public IFMatrix {
}
}
builder.InitStorage();
iter_->BeforeFirst();
while (iter_->Next()) {
const RowBatch &batch = iter_->Value();
#pragma omp parallel for schedule(static)
for (long i = 0; i < static_cast<long>(batch.size); ++i) {
for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) {
RowBatch::Inst inst = batch[i];
for (bst_uint j = 0; j < inst.length; ++j) {
if (enabled[inst[j].index]) {
if (enabled[inst[j].index]) {
builder.Push(inst[j].index,
Entry((bst_uint)(batch.base_rowid+i),
inst[j].fvalue), tid);
@ -204,7 +208,8 @@ class FMatrixS : public IFMatrix {
}
}
utils::Assert(pcol->Size() == info_.num_col(), "inconsistent col data");
utils::Assert(pcol->Size() == info_.num_col(),
"inconsistent col data");
// sort columns
bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
@ -260,7 +265,7 @@ class FMatrixS : public IFMatrix {
#pragma omp parallel
{
nthread = omp_get_num_threads();
int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
if (nthread > max_nthread) {
nthread = max_nthread;
}
@ -276,7 +281,7 @@ class FMatrixS : public IFMatrix {
RowBatch::Inst inst = batch[i];
for (bst_uint j = 0; j < inst.length; ++j) {
const SparseBatch::Entry &e = inst[j];
if (enabled[e.index]) {
if (enabled[e.index]) {
builder.AddBudget(e.index, tid);
}
}
@ -329,10 +334,10 @@ class FMatrixS : public IFMatrix {
static_cast<bst_uint>(pcol->offset[ridx + 1] - pcol->offset[ridx]));
}
batch_.col_index = BeginPtr(col_index_);
batch_.col_data = BeginPtr(col_data_);
batch_.col_data = BeginPtr(col_data_);
return true;
}
virtual const ColBatch &Value(void) const {
virtual const ColBatch &Value(void) const {
return batch_;
}
inline void Clear(void) {
@ -346,7 +351,7 @@ class FMatrixS : public IFMatrix {
// column content
std::vector<ColBatch::Inst> col_data_;
// column sparse pages
std::vector<SparsePage*> cpages_;
std::vector<SparsePage*> cpages_;
// data pointer
size_t data_ptr_;
// temporal space for batch
@ -356,7 +361,7 @@ class FMatrixS : public IFMatrix {
// column iterator
ColBatchIter col_iter_;
// shared meta info with DMatrix
const learner::MetaInfo &info_;
const learner::MetaInfo &info_;
// row iterator
utils::IIterator<RowBatch> *iter_;
/*! \brief list of row index that are buffered */
@ -366,4 +371,4 @@ class FMatrixS : public IFMatrix {
};
} // namespace io
} // namespace xgboost
#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP
#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP_

View File

@ -1,18 +1,22 @@
#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
/*!
* Copyright (c) 2014 by Contributors
* \file sparse_batch_page.h
* content holder of sparse batch that can be saved to disk
* the representation can be effectively
* use in external memory computation
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
#include <vector>
#include <algorithm>
#include "../data.h"
namespace xgboost {
namespace io {
/*!
* \brief storage unit of sparse batch
* \brief storage unit of sparse batch
*/
class SparsePage {
public:
@ -96,7 +100,7 @@ class SparsePage {
}
/*!
* \brief save the data to fo, when a page was written
* to disk it must contain all the elements in the
* to disk it must contain all the elements in the
* \param fo output stream
*/
inline void Save(utils::IStream *fo) const {
@ -124,7 +128,7 @@ class SparsePage {
*/
inline bool PushLoad(utils::IStream *fi) {
if (!fi->Read(&disk_offset_)) return false;
data.resize(offset.back() + disk_offset_.back());
data.resize(offset.back() + disk_offset_.back());
if (disk_offset_.back() != 0) {
utils::Check(fi->Read(BeginPtr(data) + offset.back(),
disk_offset_.back() * sizeof(SparseBatch::Entry)) != 0,
@ -138,7 +142,7 @@ class SparsePage {
}
return true;
}
/*!
/*!
* \brief Push row batch into the page
* \param batch the row batch
*/
@ -154,7 +158,7 @@ class SparsePage {
offset[i + begin] = top + batch.ind_ptr[i + 1] - batch.ind_ptr[0];
}
}
/*!
/*!
* \brief Push a sparse page
* \param batch the row page
*/
@ -170,7 +174,7 @@ class SparsePage {
offset[i + begin] = top + batch.offset[i + 1];
}
}
/*!
/*!
* \brief Push one instance into page
* \param row an instance row
*/
@ -202,7 +206,7 @@ class SparsePage {
};
/*!
* \brief factory class for SparsePage,
* used in threadbuffer template
* used in threadbuffer template
*/
class SparsePageFactory {
public:
@ -217,7 +221,7 @@ class SparsePageFactory {
return action_index_set_;
}
// set index set, will be used after next before first
inline void SetIndexSet(const std::vector<bst_uint> &index_set,
inline void SetIndexSet(const std::vector<bst_uint> &index_set,
bool load_all) {
set_load_all_ = load_all;
if (!set_load_all_) {
@ -229,7 +233,7 @@ class SparsePageFactory {
return true;
}
inline void SetParam(const char *name, const char *val) {}
inline bool LoadNext(SparsePage *val) {
inline bool LoadNext(SparsePage *val) {
if (!action_load_all_) {
if (action_index_set_.size() == 0) {
return false;

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_LEARNER_DMATRIX_H_
#define XGBOOST_LEARNER_DMATRIX_H_
/*!
* Copyright 2014 by Contributors
* \file dmatrix.h
* \brief meta data and template data structure
* \brief meta data and template data structure
* used for regression/classification/ranking
* \author Tianqi Chen
*/
#ifndef XGBOOST_LEARNER_DMATRIX_H_
#define XGBOOST_LEARNER_DMATRIX_H_
#include <vector>
#include <cstring>
#include "../data.h"
@ -16,8 +18,8 @@ namespace learner {
* \brief meta information needed in training, including label, weight
*/
struct MetaInfo {
/*!
* \brief information needed by booster
/*!
* \brief information needed by booster
* BoosterInfo does not implement save and load,
* all serialization is done in MetaInfo
*/
@ -31,7 +33,7 @@ struct MetaInfo {
std::vector<bst_uint> group_ptr;
/*! \brief weights of each instance, optional */
std::vector<float> weights;
/*!
/*!
* \brief initialized margins,
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from
@ -66,7 +68,7 @@ struct MetaInfo {
return 1.0f;
}
}
inline void SaveBinary(utils::IStream &fo) const {
inline void SaveBinary(utils::IStream &fo) const { // NOLINT(*)
int version = kVersion;
fo.Write(&version, sizeof(version));
fo.Write(&info.num_row, sizeof(info.num_row));
@ -77,7 +79,7 @@ struct MetaInfo {
fo.Write(info.root_index);
fo.Write(base_margin);
}
inline void LoadBinary(utils::IStream &fi) {
inline void LoadBinary(utils::IStream &fi) { // NOLINT(*)
int version;
utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format");
utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format");
@ -114,7 +116,7 @@ struct MetaInfo {
return labels;
}
inline const std::vector<float>& GetFloatInfo(const char *field) const {
return ((MetaInfo*)this)->GetFloatInfo(field);
return ((MetaInfo*)this)->GetFloatInfo(field); // NOLINT(*)
}
inline std::vector<unsigned> &GetUIntInfo(const char *field) {
using namespace std;
@ -124,7 +126,7 @@ struct MetaInfo {
return info.root_index;
}
inline const std::vector<unsigned> &GetUIntInfo(const char *field) const {
return ((MetaInfo*)this)->GetUIntInfo(field);
return ((MetaInfo*)this)->GetUIntInfo(field); // NOLINT(*)
}
// try to load weight information from file, if exists
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
@ -149,14 +151,14 @@ struct MetaInfo {
* \tparam FMatrix type of feature data source
*/
struct DMatrix {
/*!
* \brief magic number associated with this object
/*!
* \brief magic number associated with this object
* used to check if it is specific instance
*/
const int magic;
/*! \brief meta information about the dataset */
MetaInfo info;
/*!
/*!
* \brief cache pointer to verify if the data structure is cached in some learner
* used to verify if DMatrix is cached
*/

View File

@ -1,10 +1,12 @@
/*!
* Copyright 2014 by Contributors
* \file xgboost_evaluation-inl.hpp
* \brief evaluation metrics for regression and classification and rank
* \author Kailong Chen, Tianqi Chen
*/
#ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_
#define XGBOOST_LEARNER_EVALUATION_INL_HPP_
/*!
* \file xgboost_evaluation-inl.hpp
* \brief evaluation metrics for regression and classification and rank
* \author Kailong Chen, Tianqi Chen
*/
#include <vector>
#include <utility>
#include <string>
@ -18,8 +20,8 @@
namespace xgboost {
namespace learner {
/*!
* \brief base class of elementwise evaluation
/*!
* \brief base class of elementwise evaluation
* \tparam Derived the name of subclass
*/
template<typename Derived>
@ -47,15 +49,15 @@ struct EvalEWiseBase : public IEvaluator {
}
return Derived::GetFinal(dat[0], dat[1]);
}
/*!
* \brief to be implemented by subclass,
* get evaluation result from one row
/*!
* \brief to be implemented by subclass,
* get evaluation result from one row
* \param label label of current instance
* \param pred prediction value of current instance
*/
inline static float EvalRow(float label, float pred);
/*!
* \brief to be overide by subclas, final trasnformation
/*!
* \brief to be overide by subclas, final trasnformation
* \param esum the sum statistics returned by EvalRow
* \param wsum sum of weight
*/
@ -87,9 +89,9 @@ struct EvalLogLoss : public EvalEWiseBase<EvalLogLoss> {
const float eps = 1e-16f;
const float pneg = 1.0f - py;
if (py < eps) {
return -y * std::log(eps) - (1.0f - y) * std::log(1.0f - eps);
return -y * std::log(eps) - (1.0f - y) * std::log(1.0f - eps);
} else if (pneg < eps) {
return -y * std::log(1.0f - eps) - (1.0f - y) * std::log(eps);
return -y * std::log(1.0f - eps) - (1.0f - y) * std::log(eps);
} else {
return -y * std::log(py) - (1.0f - y) * std::log(pneg);
}
@ -119,7 +121,7 @@ struct EvalPoissionNegLogLik : public EvalEWiseBase<EvalPoissionNegLogLik> {
}
};
/*!
/*!
* \brief base class of multi-class evaluation
* \tparam Derived the name of subclass
*/
@ -139,7 +141,7 @@ struct EvalMClassBase : public IEvaluator {
float sum = 0.0, wsum = 0.0;
int label_error = 0;
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
for (bst_omp_uint i = 0; i < ndata; ++i) {
const float wt = info.GetWeight(i);
int label = static_cast<int>(info.labels[i]);
if (label >= 0 && label < static_cast<int>(nclass)) {
@ -161,18 +163,18 @@ struct EvalMClassBase : public IEvaluator {
}
return Derived::GetFinal(dat[0], dat[1]);
}
/*!
* \brief to be implemented by subclass,
* get evaluation result from one row
/*!
* \brief to be implemented by subclass,
* get evaluation result from one row
* \param label label of current instance
* \param pred prediction value of current instance
* \param pred prediction value of current instance
* \param nclass number of class in the prediction
*/
inline static float EvalRow(int label,
const float *pred,
size_t nclass);
/*!
* \brief to be overide by subclas, final trasnformation
/*!
* \brief to be overide by subclas, final trasnformation
* \param esum the sum statistics returned by EvalRow
* \param wsum sum of weight
*/
@ -208,7 +210,7 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
} else {
return -std::log(eps);
}
}
}
};
/*! \brief ctest */
@ -240,7 +242,7 @@ struct EvalCTest: public IEvaluator {
tpred.push_back(preds[i + (k + 1) * ndata]);
tinfo.labels.push_back(info.labels[i]);
tinfo.weights.push_back(info.GetWeight(i));
}
}
}
wsum += base_->Eval(tpred, tinfo);
}
@ -328,7 +330,7 @@ struct EvalPrecisionRatio : public IEvaluator{
const MetaInfo &info,
bool distributed) const {
utils::Check(!distributed, "metric %s do not support distributed evaluation", Name());
utils::Check(info.labels.size() != 0, "label set cannot be empty");
utils::Check(info.labels.size() != 0, "label set cannot be empty");
utils::Assert(preds.size() % info.labels.size() == 0,
"label size predict size not match");
std::vector< std::pair<float, unsigned> > rec;
@ -344,7 +346,8 @@ struct EvalPrecisionRatio : public IEvaluator{
}
protected:
inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec, const MetaInfo &info) const {
inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec,
const MetaInfo &info) const {
size_t cutoff = static_cast<size_t>(ratio_ * rec.size());
double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0;
for (size_t j = 0; j < cutoff; ++j) {
@ -372,7 +375,7 @@ struct EvalAuc : public IEvaluator {
utils::Check(info.labels.size() != 0, "label set cannot be empty");
utils::Check(preds.size() % info.labels.size() == 0,
"label size predict size not match");
std::vector<unsigned> tgptr(2, 0);
std::vector<unsigned> tgptr(2, 0);
tgptr[1] = static_cast<unsigned>(info.labels.size());
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
@ -417,8 +420,8 @@ struct EvalAuc : public IEvaluator {
}
if (distributed) {
float dat[2];
dat[0] = static_cast<float>(sum_auc);
dat[1] = static_cast<float>(ngroup);
dat[0] = static_cast<float>(sum_auc);
dat[1] = static_cast<float>(ngroup);
// approximately estimate auc using mean
rabit::Allreduce<rabit::op::Sum>(dat, 2);
return dat[0] / dat[1];
@ -463,8 +466,8 @@ struct EvalRankList : public IEvaluator {
}
if (distributed) {
float dat[2];
dat[0] = static_cast<float>(sum_metric);
dat[1] = static_cast<float>(ngroup);
dat[0] = static_cast<float>(sum_metric);
dat[1] = static_cast<float>(ngroup);
// approximately estimate auc using mean
rabit::Allreduce<rabit::op::Sum>(dat, 2);
return dat[0] / dat[1];
@ -489,7 +492,7 @@ struct EvalRankList : public IEvaluator {
}
}
/*! \return evaluation metric, given the pair_sort record, (pred,label) */
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0;
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0; // NOLINT(*)
protected:
unsigned topn_;
@ -524,13 +527,13 @@ struct EvalNDCG : public EvalRankList{
double sumdcg = 0.0;
for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
const unsigned rel = rec[i].second;
if (rel != 0) {
if (rel != 0) {
sumdcg += ((1 << rel) - 1) / std::log(i + 2.0);
}
}
return static_cast<float>(sumdcg);
}
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const { // NOLINT(*)
std::stable_sort(rec.begin(), rec.end(), CmpFirst);
float dcg = this->CalcDCG(rec);
std::stable_sort(rec.begin(), rec.end(), CmpSecond);

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_EVALUATION_H_
#define XGBOOST_LEARNER_EVALUATION_H_
/*!
* Copyright 2014 by Contributors
* \file evaluation.h
* \brief interface of evaluation function supported in xgboost
* \author Tianqi Chen, Kailong Chen
*/
#ifndef XGBOOST_LEARNER_EVALUATION_H_
#define XGBOOST_LEARNER_EVALUATION_H_
#include <string>
#include <vector>
#include <cstdio>
@ -19,7 +21,7 @@ struct IEvaluator{
* \brief evaluate a specific metric
* \param preds prediction
* \param info information, including label etc.
* \param distributed whether a call to Allreduce is needed to gather
* \param distributed whether a call to Allreduce is needed to gather
* the average statistics across all the node,
* this is only supported by some metrics
*/

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
#define XGBOOST_LEARNER_HELPER_UTILS_H_
/*!
* Copyright 2014 by Contributors
* \file helper_utils.h
* \brief useful helper functions
* \author Tianqi Chen, Kailong Chen
*/
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
#define XGBOOST_LEARNER_HELPER_UTILS_H_
#include <utility>
#include <vector>
#include <cmath>
@ -61,7 +63,7 @@ inline float LogSum(const float *rec, size_t size) {
for (size_t i = 0; i < size; ++i) {
sum += std::exp(rec[i] - mx);
}
return mx + std::log(sum);
return mx + std::log(sum);
}
inline static bool CmpFirst(const std::pair<float, unsigned> &a,

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file learner-inl.hpp
* \brief learning algorithm
* \brief learning algorithm
* \author Tianqi Chen
*/
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
#include <algorithm>
#include <vector>
#include <utility>
@ -19,7 +21,7 @@
namespace xgboost {
/*! \brief namespace for learning algorithm */
namespace learner {
/*!
/*!
* \brief learner that takes do gradient boosting on specific objective functions
* and do training and prediction
*/
@ -30,7 +32,7 @@ class BoostLearner : public rabit::Serializable {
gbm_ = NULL;
name_obj_ = "reg:linear";
name_gbm_ = "gbtree";
silent= 0;
silent = 0;
prob_buffer_row = 1.0f;
distributed_mode = 0;
updater_mode = 0;
@ -47,10 +49,10 @@ class BoostLearner : public rabit::Serializable {
* \brief add internal cache space for mat, this can speedup prediction for matrix,
* please cache prediction for training and eval data
* warning: if the model is loaded from file from some previous training history
* set cache data must be called with exactly SAME
* set cache data must be called with exactly SAME
* data matrices to continue training otherwise it will cause error
* \param mats array of pointers to matrix whose prediction result need to be cached
*/
*/
inline void SetCacheData(const std::vector<DMatrix*>& mats) {
utils::Assert(cache_.size() == 0, "can only call cache data once");
// assign buffer index
@ -67,10 +69,10 @@ class BoostLearner : public rabit::Serializable {
buffer_size += mats[i]->info.num_row();
}
char str_temp[25];
utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
static_cast<unsigned long>(buffer_size));
utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
static_cast<unsigned long>(buffer_size)); // NOLINT(*)
this->SetParam("num_pbuffer", str_temp);
this->pred_buffer_size = buffer_size;
this->pred_buffer_size = buffer_size;
}
/*!
* \brief set parameters from outside
@ -79,7 +81,7 @@ class BoostLearner : public rabit::Serializable {
*/
inline void SetParam(const char *name, const char *val) {
using namespace std;
// in this version, bst: prefix is no longer required
// in this version, bst: prefix is no longer required
if (strncmp(name, "bst:", 4) != 0) {
std::string n = "bst:"; n += name;
this->SetParam(n.c_str(), val);
@ -119,7 +121,7 @@ class BoostLearner : public rabit::Serializable {
if (!strcmp(name, "objective")) name_obj_ = val;
if (!strcmp(name, "booster")) name_gbm_ = val;
mparam.SetParam(name, val);
}
}
if (gbm_ != NULL) gbm_->SetParam(name, val);
if (obj_ != NULL) obj_->SetParam(name, val);
if (gbm_ == NULL || obj_ == NULL) {
@ -133,16 +135,16 @@ class BoostLearner : public rabit::Serializable {
// estimate feature bound
unsigned num_feature = 0;
for (size_t i = 0; i < cache_.size(); ++i) {
num_feature = std::max(num_feature,
num_feature = std::max(num_feature,
static_cast<unsigned>(cache_[i].mat_->info.num_col()));
}
// run allreduce on num_feature to find the maximum value
rabit::Allreduce<rabit::op::Max>(&num_feature, 1);
if (num_feature > mparam.num_feature) mparam.num_feature = num_feature;
}
}
char str_temp[25];
utils::SPrintf(str_temp, sizeof(str_temp), "%d", mparam.num_feature);
this->SetParam("bst:num_feature", str_temp);
this->SetParam("bst:num_feature", str_temp);
}
/*!
* \brief initialize the model
@ -161,13 +163,13 @@ class BoostLearner : public rabit::Serializable {
* \param fi input stream
* \param calc_num_feature whether call InitTrainer with calc_num_feature
*/
inline void LoadModel(utils::IStream &fi,
inline void LoadModel(utils::IStream &fi, // NOLINT(*)
bool calc_num_feature = true) {
utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
"BoostLearner: wrong model format");
{
// backward compatibility code for compatible with old model type
// for new model, Read(&name_obj_) is suffice
// for new model, Read(&name_obj_) is suffice
uint64_t len;
utils::Check(fi.Read(&len, sizeof(len)) != 0, "BoostLearner: wrong model format");
if (len >= std::numeric_limits<unsigned>::max()) {
@ -226,9 +228,9 @@ class BoostLearner : public rabit::Serializable {
fi = utils::IStream::Create(fname, "r");
this->LoadModel(*fi, true);
}
delete fi;
delete fi;
}
inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
ModelParam p = mparam;
p.saved_with_pbuffer = static_cast<int>(with_pbuffer);
fo.Write(&p, sizeof(ModelParam));
@ -247,7 +249,7 @@ class BoostLearner : public rabit::Serializable {
fo->Write("bs64\t", 5);
utils::Base64OutStream bout(fo);
this->SaveModel(bout, with_pbuffer);
bout.Finish('\n');
bout.Finish('\n');
} else {
fo->Write("binf", 4);
this->SaveModel(*fo, with_pbuffer);
@ -260,7 +262,7 @@ class BoostLearner : public rabit::Serializable {
* \param p_train pointer to the matrix used by training
*/
inline void CheckInit(DMatrix *p_train) {
int ncol = static_cast<int>(p_train->info.info.num_col);
int ncol = static_cast<int>(p_train->info.info.num_col);
std::vector<bool> enabled(ncol, true);
// set max row per batch to limited value
// in distributed mode, use safe choice otherwise
@ -345,10 +347,9 @@ class BoostLearner : public rabit::Serializable {
bool output_margin,
std::vector<float> *out_preds,
unsigned ntree_limit = 0,
bool pred_leaf = false
) const {
bool pred_leaf = false) const {
if (pred_leaf) {
gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);
gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);
} else {
this->PredictRaw(data, out_preds, ntree_limit);
if (!output_margin) {
@ -361,7 +362,7 @@ class BoostLearner : public rabit::Serializable {
* NOTE: use the batch prediction interface if possible, batch prediction is usually
* more efficient than online prediction
* This function is NOT threadsafe, make sure you only call from one thread
*
*
* \param inst the instance you want to predict
* \param output_margin whether to only predict margin value instead of transformed prediction
* \param out_preds output vector to hold the predictions
@ -387,8 +388,8 @@ class BoostLearner : public rabit::Serializable {
}
protected:
/*!
* \brief initialize the objective function and GBM,
/*!
* \brief initialize the objective function and GBM,
* if not yet done
*/
inline void InitObjGBM(void) {
@ -401,12 +402,12 @@ class BoostLearner : public rabit::Serializable {
for (size_t i = 0; i < cfg_.size(); ++i) {
obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
}
}
if (evaluator_.Size() == 0) {
evaluator_.AddEval(obj_->DefaultEvalMetric());
}
}
/*!
/*!
* \brief additional default value for specific objs
*/
inline void InitAdditionDefaultParam(void) {
@ -415,12 +416,12 @@ class BoostLearner : public rabit::Serializable {
gbm_->SetParam("max_delta_step", "0.7");
}
}
/*!
/*!
* \brief get un-transformed prediction
* \param data training data matrix
* \param out_preds output vector that stores the prediction
* \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees
* predictor, when it equals 0, this means we are using all the trees
*/
inline void PredictRaw(const DMatrix &data,
std::vector<float> *out_preds,
@ -517,7 +518,7 @@ class BoostLearner : public rabit::Serializable {
protected:
// magic number to transform random seed
const static int kRandSeedMagic = 127;
static const int kRandSeedMagic = 127;
// cache entry object that helps handle feature caching
struct CacheEntry {
const DMatrix *mat_;

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file objective-inl.hpp
* \brief objective function implementations
* \author Tianqi Chen, Kailong Chen
*/
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#include <vector>
#include <algorithm>
#include <utility>
@ -176,14 +178,14 @@ class RegLossObj : public IObjFunction {
// poisson regression for count
class PoissonRegression : public IObjFunction {
public:
explicit PoissonRegression(void) {
PoissonRegression(void) {
max_delta_step = 0.0f;
}
virtual ~PoissonRegression(void) {}
virtual void SetParam(const char *name, const char *val) {
using namespace std;
if (!strcmp( "max_delta_step", name )) {
if (!strcmp("max_delta_step", name)) {
max_delta_step = static_cast<float>(atof(val));
}
}
@ -201,9 +203,9 @@ class PoissonRegression : public IObjFunction {
// check if label in range
bool label_correct = true;
// start calculating gradient
const long ndata = static_cast<bst_omp_uint>(preds.size());
const long ndata = static_cast<bst_omp_uint>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (long i = 0; i < ndata; ++i) {
for (long i = 0; i < ndata; ++i) { // NOLINT(*)
float p = preds[i];
float w = info.GetWeight(i);
float y = info.labels[i];
@ -219,9 +221,9 @@ class PoissonRegression : public IObjFunction {
}
virtual void PredTransform(std::vector<float> *io_preds) {
std::vector<float> &preds = *io_preds;
const long ndata = static_cast<long>(preds.size());
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) {
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]);
}
}
@ -234,7 +236,7 @@ class PoissonRegression : public IObjFunction {
virtual const char* DefaultEvalMetric(void) const {
return "poisson-nloglik";
}
private:
float max_delta_step;
};
@ -467,7 +469,7 @@ class LambdaRankObj : public IObjFunction {
: pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
};
/*!
* \brief get lambda weight for existing pairs
* \brief get lambda weight for existing pairs
* \param list a list that is sorted by pred score
* \param io_pairs record of pairs, containing the pairs to fill in weights
*/
@ -555,10 +557,10 @@ class LambdaRankObjMAP : public LambdaRankObj {
float ap_acc;
/*!
* \brief the accumulated precision,
* assuming a positive instance is missing
* assuming a positive instance is missing
*/
float ap_acc_miss;
/*!
/*!
* \brief the accumulated precision,
* assuming that one more positive instance is inserted ahead
*/

View File

@ -1,11 +1,14 @@
#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
#define XGBOOST_LEARNER_OBJECTIVE_H_
/*!
* Copyright 2014 by Contributors
* \file objective.h
* \brief interface of objective function used for gradient boosting
* \author Tianqi Chen, Kailong Chen
*/
#include "dmatrix.h"
#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
#define XGBOOST_LEARNER_OBJECTIVE_H_
#include <vector>
#include "./dmatrix.h"
namespace xgboost {
namespace learner {
@ -13,13 +16,13 @@ namespace learner {
class IObjFunction{
public:
/*! \brief virtual destructor */
virtual ~IObjFunction(void){}
virtual ~IObjFunction(void) {}
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
virtual void SetParam(const char *name, const char *val) = 0;
virtual void SetParam(const char *name, const char *val) = 0;
/*!
* \brief get gradient over each of predictions, given existing information
* \param preds prediction of current round
@ -38,9 +41,9 @@ class IObjFunction{
* \brief transform prediction values, this is only called when Prediction is called
* \param io_preds prediction values, saves to this vector as well
*/
virtual void PredTransform(std::vector<float> *io_preds){}
virtual void PredTransform(std::vector<float> *io_preds) {}
/*!
* \brief transform prediction values, this is only called when Eval is called,
* \brief transform prediction values, this is only called when Eval is called,
* usually it redirect to PredTransform
* \param io_preds prediction values, saves to this vector as well
*/
@ -49,7 +52,7 @@ class IObjFunction{
}
/*!
* \brief transform probability value back to margin
* this is used to transform user-set base_score back to margin
* this is used to transform user-set base_score back to margin
* used by gradient boosting
* \return transformed value
*/
@ -77,7 +80,7 @@ inline IObjFunction* CreateObjFunction(const char *name) {
if (!strcmp("multi:softprob", name)) return new SoftmaxMultiClassObj(1);
if (!strcmp("rank:pairwise", name )) return new PairwiseRankObj();
if (!strcmp("rank:ndcg", name)) return new LambdaRankObjNDCG();
if (!strcmp("rank:map", name)) return new LambdaRankObjMAP();
if (!strcmp("rank:map", name)) return new LambdaRankObjMAP();
utils::Error("unknown objective function type: %s", name);
return NULL;
}

View File

@ -1,13 +1,13 @@
#ifndef XGBOOST_SYNC_H_
#define XGBOOST_SYNC_H_
/*!
* Copyright 2014 by Contributors
* \file sync.h
* \brief the synchronization module of rabit
* redirects to subtree rabit header
* \author Tianqi Chen
*/
#ifndef XGBOOST_SYNC_SYNC_H_
#define XGBOOST_SYNC_SYNC_H_
#include "../../subtree/rabit/include/rabit.h"
#include "../../subtree/rabit/include/rabit/timer.h"
#endif // XGBOOST_SYNC_H_
#endif // XGBOOST_SYNC_SYNC_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_MODEL_H_
#define XGBOOST_TREE_MODEL_H_
/*!
* Copyright 2014 by Contributors
* \file model.h
* \brief model structure for tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_MODEL_H_
#define XGBOOST_TREE_MODEL_H_
#include <string>
#include <cstring>
#include <sstream>
@ -19,7 +21,7 @@
namespace xgboost {
namespace tree {
/*!
* \brief template class of TreeModel
* \brief template class of TreeModel
* \tparam TSplitCond data type to indicate split condition
* \tparam TNodeStat auxiliary statistics of node to help tree building
*/
@ -42,7 +44,7 @@ class TreeModel {
int max_depth;
/*! \brief number of features used for tree construction */
int num_feature;
/*!
/*!
* \brief leaf vector size, used for vector tree
* used to store more than one dimensional information in tree
*/
@ -55,8 +57,8 @@ class TreeModel {
size_leaf_vector = 0;
std::memset(reserved, 0, sizeof(reserved));
}
/*!
* \brief set parameters from outside
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
@ -70,7 +72,7 @@ class TreeModel {
/*! \brief tree node */
class Node {
public:
Node(void) : sindex_(0) {}
Node(void) : sindex_(0) {}
/*! \brief index of left child */
inline int cleft(void) const {
return this->cleft_;
@ -119,15 +121,15 @@ class TreeModel {
inline bool is_root(void) const {
return parent_ == -1;
}
/*!
* \brief set the right child
/*!
* \brief set the right child
* \param nide node id to right child
*/
inline void set_right_child(int nid) {
this->cright_ = nid;
}
/*!
* \brief set split condition of current node
/*!
* \brief set split condition of current node
* \param split_index feature index to split
* \param split_cond split condition
* \param default_left the default direction when feature is unknown
@ -138,10 +140,10 @@ class TreeModel {
this->sindex_ = split_index;
(this->info_).split_cond = split_cond;
}
/*!
/*!
* \brief set the leaf value of the node
* \param value leaf value
* \param right right index, could be used to store
* \param right right index, could be used to store
* additional information
*/
inline void set_leaf(float value, int right = -1) {
@ -153,12 +155,12 @@ class TreeModel {
inline void mark_delete(void) {
this->sindex_ = std::numeric_limits<unsigned>::max();
}
private:
friend class TreeModel<TSplitCond, TNodeStat>;
/*!
* \brief in leaf node, we have weights, in non-leaf nodes,
* we have split condition
/*!
* \brief in leaf node, we have weights, in non-leaf nodes,
* we have split condition
*/
union Info{
float leaf_value;
@ -203,7 +205,7 @@ class TreeModel {
"number of nodes in the tree exceed 2^31");
nodes.resize(param.num_nodes);
stats.resize(param.num_nodes);
leaf_vector.resize(param.num_nodes * param.size_leaf_vector);
leaf_vector.resize(param.num_nodes * param.size_leaf_vector);
return nd;
}
// delete a tree node, keep the parent field to allow trace back
@ -215,7 +217,7 @@ class TreeModel {
}
public:
/*!
/*!
* \brief change a non leaf node to a leaf node, delete its children
* \param rid node id of the node
* \param new leaf value
@ -229,7 +231,7 @@ class TreeModel {
this->DeleteNode(nodes[rid].cright());
nodes[rid].set_leaf(value);
}
/*!
/*!
* \brief collapse a non leaf node to a leaf node, delete its children
* \param rid node id of the node
* \param new leaf value
@ -273,7 +275,7 @@ class TreeModel {
return &leaf_vector[nid * param.size_leaf_vector];
}
/*! \brief get leaf vector given nid */
inline const bst_float* leafvec(int nid) const{
inline const bst_float* leafvec(int nid) const {
if (leaf_vector.size() == 0) return NULL;
return &leaf_vector[nid * param.size_leaf_vector];
}
@ -288,15 +290,15 @@ class TreeModel {
nodes[i].set_parent(-1);
}
}
/*!
/*!
* \brief load model from stream
* \param fi input stream
*/
inline void LoadModel(utils::IStream &fi) {
inline void LoadModel(utils::IStream &fi) { // NOLINT(*)
utils::Check(fi.Read(&param, sizeof(Param)) > 0,
"TreeModel: wrong format");
nodes.resize(param.num_nodes); stats.resize(param.num_nodes);
utils::Assert(param.num_nodes != 0, "invalid model");
utils::Assert(param.num_nodes != 0, "invalid model");
utils::Check(fi.Read(BeginPtr(nodes), sizeof(Node) * nodes.size()) > 0,
"TreeModel: wrong format");
utils::Check(fi.Read(BeginPtr(stats), sizeof(NodeStat) * stats.size()) > 0,
@ -313,22 +315,22 @@ class TreeModel {
"number of deleted nodes do not match, num_deleted=%d, dnsize=%lu, num_nodes=%d",
param.num_deleted, deleted_nodes.size(), param.num_nodes);
}
/*!
/*!
* \brief save model to stream
* \param fo output stream
*/
inline void SaveModel(utils::IStream &fo) const {
inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
utils::Assert(param.num_nodes == static_cast<int>(nodes.size()),
"Tree::SaveModel");
utils::Assert(param.num_nodes == static_cast<int>(stats.size()),
"Tree::SaveModel");
fo.Write(&param, sizeof(Param));
utils::Assert(param.num_nodes != 0, "invalid model");
utils::Assert(param.num_nodes != 0, "invalid model");
fo.Write(BeginPtr(nodes), sizeof(Node) * nodes.size());
fo.Write(BeginPtr(stats), sizeof(NodeStat) * nodes.size());
if (param.size_leaf_vector != 0) fo.Write(leaf_vector);
}
/*!
/*!
* \brief add child nodes to node
* \param nid node id to add childs
*/
@ -340,8 +342,8 @@ class TreeModel {
nodes[nodes[nid].cleft() ].set_parent(nid, true);
nodes[nodes[nid].cright()].set_parent(nid, false);
}
/*!
* \brief only add a right child to a leaf node
/*!
* \brief only add a right child to a leaf node
* \param node id to add right child
*/
inline void AddRightChild(int nid) {
@ -385,7 +387,7 @@ class TreeModel {
inline int num_extra_nodes(void) const {
return param.num_nodes - param.num_roots - param.num_deleted;
}
/*!
/*!
* \brief dump model to text string
* \param fmap feature map of feature types
* \param with_stats whether dump out statistics as well
@ -400,7 +402,7 @@ class TreeModel {
}
private:
void Dump(int nid, std::stringstream &fo,
void Dump(int nid, std::stringstream &fo, // NOLINT(*)
const utils::FeatMap& fmap, int depth, bool with_stats) {
for (int i = 0; i < depth; ++i) {
fo << '\t';
@ -469,7 +471,7 @@ struct RTreeNodeStat {
/*! \brief number of child that is leaf node known up to now */
int leaf_child_cnt;
/*! \brief print information of current stats to fo */
inline void Print(std::stringstream &fo, bool is_leaf) const {
inline void Print(std::stringstream &fo, bool is_leaf) const { // NOLINT(*)
if (!is_leaf) {
fo << ",gain=" << loss_chg << ",cover=" << sum_hess;
} else {
@ -481,13 +483,13 @@ struct RTreeNodeStat {
/*! \brief define regression tree to be the most common tree model */
class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
public:
/*!
/*!
* \brief dense feature vector that can be taken by RegTree
* to do tranverse efficiently
* and can be construct from sparse feature vector
*/
struct FVec {
/*!
/*!
* \brief a union value of value and flag
* when flag == -1, this indicate the value is missing
*/
@ -510,7 +512,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
}
}
/*! \brief drop the trace after fill, must be called after fill */
inline void Drop(const RowBatch::Inst &inst) {
inline void Drop(const RowBatch::Inst &inst) {
for (bst_uint i = 0; i < inst.length; ++i) {
if (inst[i].index >= data.size()) continue;
data[inst[i].index].flag = -1;
@ -526,10 +528,10 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
}
};
/*!
* \brief get the leaf index
* \brief get the leaf index
* \param feats dense feature vector, if the feature is missing the field is set to NaN
* \param root_gid starting root index of the instance
* \return the leaf index of the given feature
* \return the leaf index of the given feature
*/
inline int GetLeafIndex(const FVec&feat, unsigned root_id = 0) const {
// start from groups that belongs to current data
@ -545,7 +547,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
* \brief get the prediction of regression tree, only accepts dense feature vector
* \param feats dense feature vector, if the feature is missing the field is set to NaN
* \param root_gid starting root index of the instance
* \return the leaf index of the given feature
* \return the leaf index of the given feature
*/
inline float Predict(const FVec &feat, unsigned root_id = 0) const {
int pid = this->GetLeafIndex(feat, root_id);

View File

@ -1,10 +1,13 @@
#ifndef XGBOOST_TREE_PARAM_H_
#define XGBOOST_TREE_PARAM_H_
/*!
* Copyright 2014 by Contributors
* \file param.h
* \brief training parameters, statistics used to support tree construction
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_PARAM_H_
#define XGBOOST_TREE_PARAM_H_
#include <vector>
#include <cstring>
#include "../data.h"
@ -27,7 +30,7 @@ struct TrainParam{
// L1 regularization factor
float reg_alpha;
// default direction choice
int default_direction;
int default_direction;
// maximum delta update we can add in weight estimation
// this parameter can be used to stablize update
// default=0 means no constraint on weight delta
@ -45,7 +48,7 @@ struct TrainParam{
// accuracy of sketch
float sketch_ratio;
// leaf vector size
int size_leaf_vector;
int size_leaf_vector;
// option for parallelization
int parallel_option;
// option to open cacheline optimizaton
@ -74,11 +77,11 @@ struct TrainParam{
sketch_ratio = 2.0f;
cache_opt = 1;
}
/*!
* \brief set parameters from outside
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
*/
inline void SetParam(const char *name, const char *val) {
using namespace std;
// sync-names
@ -116,7 +119,7 @@ struct TrainParam{
if (reg_alpha == 0.0f) {
return Sqr(sum_grad) / (sum_hess + reg_lambda);
} else {
return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda);
return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda);
}
} else {
double w = CalcWeight(sum_grad, sum_hess);
@ -213,7 +216,7 @@ struct GradStats {
inline static void CheckInfo(const BoosterInfo &info) {
}
/*!
* \brief accumulate statistics
* \brief accumulate statistics
* \param p the gradient pair
*/
inline void Add(bst_gpair p) {
@ -222,7 +225,7 @@ struct GradStats {
/*!
* \brief accumulate statistics, more complicated version
* \param gpair the vector storing the gradient statistics
* \param info the additional information
* \param info the additional information
* \param ridx instance index of this instance
*/
inline void Add(const std::vector<bst_gpair> &gpair,
@ -244,7 +247,7 @@ struct GradStats {
this->Add(b.sum_grad, b.sum_hess);
}
/*! \brief same as add, reduce is used in All Reduce */
inline static void Reduce(GradStats &a, const GradStats &b) {
inline static void Reduce(GradStats &a, const GradStats &b) { // NOLINT(*)
a.Add(b);
}
/*! \brief set current value to a - b */
@ -257,8 +260,8 @@ struct GradStats {
return sum_hess == 0.0;
}
/*! \brief set leaf vector value based on statistics */
inline void SetLeafVec(const TrainParam &param, bst_float *vec) const{
}
inline void SetLeafVec(const TrainParam &param, bst_float *vec) const {
}
// constructor to allow inheritance
GradStats(void) {}
/*! \brief add statistics to the data */
@ -311,7 +314,7 @@ struct CVGradStats : public GradStats {
ret += param.CalcGain(train[i].sum_grad,
train[i].sum_hess,
vsize * valid[i].sum_grad,
vsize * valid[i].sum_hess);
vsize * valid[i].sum_hess);
}
return ret / vsize;
}
@ -324,7 +327,7 @@ struct CVGradStats : public GradStats {
}
}
/*! \brief same as add, reduce is used in All Reduce */
inline static void Reduce(CVGradStats &a, const CVGradStats &b) {
inline static void Reduce(CVGradStats &a, const CVGradStats &b) { // NOLINT(*)
a.Add(b);
}
/*! \brief set current value to a - b */
@ -344,8 +347,8 @@ struct CVGradStats : public GradStats {
}
};
/*!
* \brief statistics that is helpful to store
/*!
* \brief statistics that is helpful to store
* and represent a split solution for the tree
*/
struct SplitEntry{
@ -357,12 +360,12 @@ struct SplitEntry{
float split_value;
/*! \brief constructor */
SplitEntry(void) : loss_chg(0.0f), sindex(0), split_value(0.0f) {}
/*!
* \brief decides whether a we can replace current entry with the statistics given
/*!
* \brief decides whether a we can replace current entry with the statistics given
* This function gives better priority to lower index when loss_chg equals
* not the best way, but helps to give consistent result during multi-thread execution
* \param loss_chg the loss reduction get through the split
* \param split_index the feature index where the split is on
* \param split_index the feature index where the split is on
*/
inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
if (this->split_index() <= split_index) {
@ -371,7 +374,7 @@ struct SplitEntry{
return !(this->loss_chg > new_loss_chg);
}
}
/*!
/*!
* \brief update the split entry, replace it if e is better
* \param e candidate split solution
* \return whether the proposed split is better and can replace current split
@ -386,7 +389,7 @@ struct SplitEntry{
return false;
}
}
/*!
/*!
* \brief update the split entry, replace it if e is better
* \param loss_chg loss reduction of new candidate
* \param split_index feature index to split on
@ -407,7 +410,7 @@ struct SplitEntry{
}
}
/*! \brief same as update, used by AllReduce*/
inline static void Reduce(SplitEntry &dst, const SplitEntry &src) {
inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { // NOLINT(*)
dst.Update(src);
}
/*!\return feature index to split on */

View File

@ -1,3 +1,4 @@
// Copyright 2014 by Contributors
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_H_
#define XGBOOST_TREE_UPDATER_H_
/*!
* Copyright 2014 by Contributors
* \file updater.h
* \brief interface to update the tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_H_
#define XGBOOST_TREE_UPDATER_H_
#include <vector>
#include "../data.h"
@ -12,7 +14,7 @@
namespace xgboost {
namespace tree {
/*!
/*!
* \brief interface of tree update module, that performs update of a tree
*/
class IUpdater {
@ -21,7 +23,7 @@ class IUpdater {
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
*/
virtual void SetParam(const char *name, const char *val) = 0;
/*!
* \brief peform update to the tree models
@ -29,8 +31,8 @@ class IUpdater {
* \param p_fmat feature matrix that provide access to features
* \param info extra side information that may be need, such as root index
* \param trees pointer to the trees to be updated, upater will change the content of the tree
* note: all the trees in the vector are updated, with the same statistics,
* but maybe different random seeds, usually one tree is passed in at a time,
* note: all the trees in the vector are updated, with the same statistics,
* but maybe different random seeds, usually one tree is passed in at a time,
* there can be multiple trees when we train random forest style model
*/
virtual void Update(const std::vector<bst_gpair> &gpair,
@ -38,7 +40,7 @@ class IUpdater {
const BoosterInfo &info,
const std::vector<RegTree*> &trees) = 0;
/*!
/*!
* \brief this is simply a function for optimizing performance
* this function asks the updater to return the leaf position of each instance in the p_fmat,
* if it is cached in the updater, if it is not available, return NULL
@ -50,8 +52,8 @@ class IUpdater {
// destructor
virtual ~IUpdater(void) {}
};
/*!
* \brief create a updater based on name
/*!
* \brief create a updater based on name
* \param name name of updater
* \return return the updater instance
*/

View File

@ -1,12 +1,14 @@
#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_basemaker-inl.hpp
* \brief implement a common tree constructor
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#include <vector>
#include <algorithm>
#include <string>
#include <limits>
#include "../sync/sync.h"
#include "../utils/random.h"
@ -14,7 +16,7 @@
namespace xgboost {
namespace tree {
/*!
/*!
* \brief base tree maker class that defines common operation
* needed in tree making
*/
@ -26,7 +28,7 @@ class BaseMaker: public IUpdater {
virtual void SetParam(const char *name, const char *val) {
param.SetParam(name, val);
}
protected:
// helper to collect and query feature meta information
struct FMetaHelper {
@ -60,8 +62,11 @@ class BaseMaker: public IUpdater {
bst_float a = fminmax[fid * 2];
bst_float b = fminmax[fid * 2 + 1];
if (a == -std::numeric_limits<bst_float>::max()) return 0;
if (-a == b) return 1;
else return 2;
if (-a == b) {
return 1;
} else {
return 2;
}
}
inline bst_float MaxValue(bst_uint fid) const {
return fminmax[fid *2 + 1];
@ -70,7 +75,7 @@ class BaseMaker: public IUpdater {
std::vector<bst_uint> &findex = *p_findex;
findex.clear();
for (size_t i = 0; i < fminmax.size(); i += 2) {
const bst_uint fid = static_cast<bst_uint>(i / 2);
const bst_uint fid = static_cast<bst_uint>(i / 2);
if (this->Type(fid) != 0) findex.push_back(fid);
}
unsigned n = static_cast<unsigned>(p * findex.size());
@ -86,7 +91,7 @@ class BaseMaker: public IUpdater {
rabit::Broadcast(&s_cache, 0);
fs.Read(&findex);
}
private:
std::vector<bst_float> fminmax;
};
@ -116,7 +121,7 @@ class BaseMaker: public IUpdater {
}
return nthread;
}
// ------class member helpers---------
// ------class member helpers---------
/*! \brief initialize temp data structure */
inline void InitData(const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat,
@ -124,7 +129,8 @@ class BaseMaker: public IUpdater {
const RegTree &tree) {
utils::Assert(tree.param.num_nodes == tree.param.num_roots,
"TreeMaker: can only grow new tree");
{// setup position
{
// setup position
position.resize(gpair.size());
if (root_index.size() == 0) {
std::fill(position.begin(), position.end(), 0);
@ -147,7 +153,8 @@ class BaseMaker: public IUpdater {
}
}
}
{// expand query
{
// expand query
qexpand.reserve(256); qexpand.clear();
for (int i = 0; i < tree.param.num_roots; ++i) {
qexpand.push_back(i);
@ -170,7 +177,7 @@ class BaseMaker: public IUpdater {
this->UpdateNode2WorkIndex(tree);
}
// return decoded position
inline int DecodePosition(bst_uint ridx) const{
inline int DecodePosition(bst_uint ridx) const {
const int pid = position[ridx];
return pid < 0 ? ~pid : pid;
}
@ -182,23 +189,24 @@ class BaseMaker: public IUpdater {
position[ridx] = nid;
}
}
/*!
/*!
* \brief this is helper function uses column based data structure,
* reset the positions to the lastest one
* \param nodes the set of nodes that contains the split to be used
* \param p_fmat feature matrix needed for tree construction
* \param tree the regression tree structure
*/
inline void ResetPositionCol(const std::vector<int> &nodes, IFMatrix *p_fmat, const RegTree &tree) {
inline void ResetPositionCol(const std::vector<int> &nodes,
IFMatrix *p_fmat, const RegTree &tree) {
// set the positions in the nondefault
this->SetNonDefaultPositionCol(nodes, p_fmat, tree);
// set rest of instances to default position
const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
// set default direct nodes to default
// for leaf nodes that are not fresh, mark then to ~nid,
// for leaf nodes that are not fresh, mark then to ~nid,
// so that they are ignored in future statistics collection
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i];
@ -237,7 +245,7 @@ class BaseMaker: public IUpdater {
}
std::sort(fsplits.begin(), fsplits.end());
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(fsplits);
while (iter->Next()) {
const ColBatch &batch = iter->Value();
@ -252,7 +260,7 @@ class BaseMaker: public IUpdater {
const int nid = this->DecodePosition(ridx);
// go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if(fvalue < tree[nid].split_cond()) {
if (fvalue < tree[nid].split_cond()) {
this->SetEncodePosition(ridx, tree[nid].cleft());
} else {
this->SetEncodePosition(ridx, tree[nid].cright());
@ -324,7 +332,7 @@ class BaseMaker: public IUpdater {
sketch->temp.size = 0;
}
/*!
* \brief push a new element to sketch
* \brief push a new element to sketch
* \param fvalue feature value, comes in sorted ascending order
* \param w weight
* \param max_size
@ -337,31 +345,32 @@ class BaseMaker: public IUpdater {
return;
}
if (last_fvalue != fvalue) {
double rmax = rmin + wmin;
double rmax = rmin + wmin;
if (rmax >= next_goal && sketch->temp.size != max_size) {
if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
if (sketch->temp.size == 0 ||
last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
// push to sketch
sketch->temp.data[sketch->temp.size] =
utils::WXQuantileSketch<bst_float, bst_float>::
Entry(static_cast<bst_float>(rmin),
static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue);
static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue);
utils::Assert(sketch->temp.size < max_size,
"invalid maximum size max_size=%u, stemp.size=%lu\n",
max_size, sketch->temp.size);
++sketch->temp.size;
}
if (sketch->temp.size == max_size) {
next_goal = sum_total * 2.0f + 1e-5f;
} else{
next_goal = sum_total * 2.0f + 1e-5f;
} else {
next_goal = static_cast<bst_float>(sketch->temp.size * sum_total / max_size);
}
} else {
if (rmax >= next_goal) {
rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n",
rmax, sum_total, next_goal, sketch->temp.size);
}
}
if (rmax >= next_goal) {
rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n",
rmax, sum_total, next_goal, sketch->temp.size);
}
}
rmin = rmax;
wmin = w;
last_fvalue = fvalue;
@ -375,13 +384,13 @@ class BaseMaker: public IUpdater {
if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
utils::Assert(sketch->temp.size <= max_size,
"Finalize: invalid maximum size, max_size=%u, stemp.size=%lu",
sketch->temp.size, max_size );
sketch->temp.size, max_size);
// push to sketch
sketch->temp.data[sketch->temp.size] =
utils::WXQuantileSketch<bst_float, bst_float>::
Entry(static_cast<bst_float>(rmin),
static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue);
static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue);
++sketch->temp.size;
}
sketch->PushTemp();
@ -415,4 +424,4 @@ class BaseMaker: public IUpdater {
};
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_colmaker-inl.hpp
* \brief use columnwise update to construct a tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
#include <vector>
#include <cmath>
#include <algorithm>
@ -114,10 +116,13 @@ class ColMaker: public IUpdater {
// initialize temp data structure
inline void InitData(const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat,
const std::vector<unsigned> &root_index, const RegTree &tree) {
utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree");
const std::vector<unsigned> &root_index,
const RegTree &tree) {
utils::Assert(tree.param.num_nodes == tree.param.num_roots,
"ColMaker: can only grow new tree");
const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
{// setup position
{
// setup position
position.resize(gpair.size());
if (root_index.size() == 0) {
for (size_t i = 0; i < rowset.size(); ++i) {
@ -127,7 +132,8 @@ class ColMaker: public IUpdater {
for (size_t i = 0; i < rowset.size(); ++i) {
const bst_uint ridx = rowset[i];
position[ridx] = root_index[ridx];
utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots, "root index exceed setting");
utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots,
"root index exceed setting");
}
}
// mark delete for the deleted datas
@ -154,11 +160,12 @@ class ColMaker: public IUpdater {
}
unsigned n = static_cast<unsigned>(param.colsample_bytree * feat_index.size());
random::Shuffle(feat_index);
//utils::Check(n > 0, "colsample_bytree is too small that no feature can be included");
utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included", param.colsample_bytree);
utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included",
param.colsample_bytree);
feat_index.resize(n);
}
{// setup temp space for each thread
{
// setup temp space for each thread
#pragma omp parallel
{
this->nthread = omp_get_num_threads();
@ -171,20 +178,25 @@ class ColMaker: public IUpdater {
}
snode.reserve(256);
}
{// expand query
{
// expand query
qexpand_.reserve(256); qexpand_.clear();
for (int i = 0; i < tree.param.num_roots; ++i) {
qexpand_.push_back(i);
}
}
}
/*! \brief initialize the base_weight, root_gain, and NodeEntry for all the new nodes in qexpand */
/*!
* \brief initialize the base_weight, root_gain,
* and NodeEntry for all the new nodes in qexpand
*/
inline void InitNewNode(const std::vector<int> &qexpand,
const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat,
const BoosterInfo &info,
const RegTree &tree) {
{// setup statistics space for each tree node
{
// setup statistics space for each tree node
for (size_t i = 0; i < stemp.size(); ++i) {
stemp[i].resize(tree.param.num_nodes, ThreadEntry(param));
}
@ -226,7 +238,7 @@ class ColMaker: public IUpdater {
}
// use new nodes for qexpand
qexpand = newnodes;
}
}
// parallel find the best split of current fid
// this function does not support nested functions
inline void ParallelFindSplit(const ColBatch::Inst &col,
@ -280,26 +292,30 @@ class ColMaker: public IUpdater {
ThreadEntry &e = stemp[tid][nid];
float fsplit;
if (tid != 0) {
if(std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) {
if (std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) {
fsplit = (stemp[tid - 1][nid].last_fvalue - e.first_fvalue) * 0.5f;
} else {
continue;
}
} else {
fsplit = e.first_fvalue - rt_eps;
}
}
if (need_forward && tid != 0) {
c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, fsplit, false);
}
}
if (need_backward) {
tmp.SetSubstract(sum, e.stats);
c.SetSubstract(snode[nid].stats, tmp);
if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, fsplit, true);
}
}
@ -308,8 +324,10 @@ class ColMaker: public IUpdater {
tmp = sum;
ThreadEntry &e = stemp[nthread-1][nid];
c.SetSubstract(snode[nid].stats, tmp);
if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true);
}
}
@ -335,25 +353,31 @@ class ColMaker: public IUpdater {
e.first_fvalue = fvalue;
} else {
// forward default right
if (std::abs(fvalue - e.first_fvalue) > rt_2eps){
if (need_forward) {
if (std::abs(fvalue - e.first_fvalue) > rt_2eps) {
if (need_forward) {
c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) -
snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false);
}
}
if (need_backward) {
cright.SetSubstract(e.stats_extra, e.stats);
c.SetSubstract(snode[nid].stats, cright);
if (c.sum_hess >= param.min_child_weight && cright.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
cright.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) +
c.CalcGain(param) -
snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true);
}
}
}
}
e.stats.Add(gpair, info, ridx);
e.first_fvalue = fvalue;
e.first_fvalue = fvalue;
}
}
}
@ -361,7 +385,7 @@ class ColMaker: public IUpdater {
// update enumeration solution
inline void UpdateEnumeration(int nid, bst_gpair gstats,
float fvalue, int d_step, bst_uint fid,
TStats &c, std::vector<ThreadEntry> &temp) {
TStats &c, std::vector<ThreadEntry> &temp) { // NOLINT(*)
// get the statistics of nid
ThreadEntry &e = temp[nid];
// test if first hit, this is fine, because we set 0 during init
@ -370,10 +394,12 @@ class ColMaker: public IUpdater {
e.last_fvalue = fvalue;
} else {
// try to find a split
if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
if (std::abs(fvalue - e.last_fvalue) > rt_2eps &&
e.stats.sum_hess >= param.min_child_weight) {
c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1);
}
}
@ -388,7 +414,7 @@ class ColMaker: public IUpdater {
int d_step,
bst_uint fid,
const std::vector<bst_gpair> &gpair,
std::vector<ThreadEntry> &temp) {
std::vector<ThreadEntry> &temp) { // NOLINT(*)
const std::vector<int> &qexpand = qexpand_;
// clear all the temp statistics
for (size_t j = 0; j < qexpand.size(); ++j) {
@ -423,7 +449,7 @@ class ColMaker: public IUpdater {
this->UpdateEnumeration(nid, buf_gpair[i],
p->fvalue, d_step,
fid, c, temp);
}
}
}
// finish up the ending piece
for (it = align_end, i = 0; it != end; ++i, it += d_step) {
@ -436,14 +462,15 @@ class ColMaker: public IUpdater {
this->UpdateEnumeration(nid, buf_gpair[i],
it->fvalue, d_step,
fid, c, temp);
}
}
// finish updating all statistics, check if it is possible to include all sum statistics
for (size_t i = 0; i < qexpand.size(); ++i) {
const int nid = qexpand[i];
ThreadEntry &e = temp[nid];
c.SetSubstract(snode[nid].stats, e.stats);
if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
const float gap = std::abs(e.last_fvalue) + rt_eps;
const float delta = d_step == +1 ? gap: -gap;
e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
@ -458,7 +485,7 @@ class ColMaker: public IUpdater {
bst_uint fid,
const std::vector<bst_gpair> &gpair,
const BoosterInfo &info,
std::vector<ThreadEntry> &temp) {
std::vector<ThreadEntry> &temp) { // NOLINT(*)
// use cacheline aware optimization
if (TStats::kSimpleStats != 0 && param.cache_opt != 0) {
EnumerateSplitCacheOpt(begin, end, d_step, fid, gpair, temp);
@ -471,7 +498,7 @@ class ColMaker: public IUpdater {
}
// left statistics
TStats c(param);
for(const ColBatch::Entry *it = begin; it != end; it += d_step) {
for (const ColBatch::Entry *it = begin; it != end; it += d_step) {
const bst_uint ridx = it->index;
const int nid = position[ridx];
if (nid < 0) continue;
@ -485,10 +512,12 @@ class ColMaker: public IUpdater {
e.last_fvalue = fvalue;
} else {
// try to find a split
if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
if (std::abs(fvalue - e.last_fvalue) > rt_2eps &&
e.stats.sum_hess >= param.min_child_weight) {
c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1);
}
}
@ -503,7 +532,8 @@ class ColMaker: public IUpdater {
ThreadEntry &e = temp[nid];
c.SetSubstract(snode[nid].stats, e.stats);
if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
const float gap = std::abs(e.last_fvalue) + rt_eps;
const float delta = d_step == +1 ? gap: -gap;
e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
@ -511,14 +541,14 @@ class ColMaker: public IUpdater {
}
}
// update the solution candidate
// update the solution candidate
virtual void UpdateSolution(const ColBatch &batch,
const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat,
const BoosterInfo &info) {
// start enumeration
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
#if defined(_OPENMP)
#if defined(_OPENMP)
const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
#endif
int poption = param.parallel_option;
@ -533,11 +563,11 @@ class ColMaker: public IUpdater {
const ColBatch::Inst c = batch[i];
const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue;
if (param.need_forward_search(fmat.GetColDensity(fid), ind)) {
this->EnumerateSplit(c.data, c.data + c.length, +1,
this->EnumerateSplit(c.data, c.data + c.length, +1,
fid, gpair, info, stemp[tid]);
}
if (param.need_backward_search(fmat.GetColDensity(fid), ind)) {
this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1,
this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1,
fid, gpair, info, stemp[tid]);
}
}
@ -546,7 +576,7 @@ class ColMaker: public IUpdater {
this->ParallelFindSplit(batch[i], batch.col_index[i],
fmat, gpair, info);
}
}
}
}
// find splits at current level, do split per level
inline void FindSplit(int depth,
@ -571,7 +601,7 @@ class ColMaker: public IUpdater {
// get the best result, we can synchronize the solution
for (size_t i = 0; i < qexpand.size(); ++i) {
const int nid = qexpand[i];
NodeEntry &e = snode[nid];
NodeEntry &e = snode[nid];
// now we know the solution in snode[nid], set split
if (e.best.loss_chg > rt_eps) {
p_tree->AddChilds(nid);
@ -582,19 +612,20 @@ class ColMaker: public IUpdater {
} else {
(*p_tree)[nid].set_leaf(e.weight * param.learning_rate);
}
}
}
}
// reset position of each data points after split is created in the tree
inline void ResetPosition(const std::vector<int> &qexpand, IFMatrix *p_fmat, const RegTree &tree) {
inline void ResetPosition(const std::vector<int> &qexpand,
IFMatrix *p_fmat, const RegTree &tree) {
// set the positions in the nondefault
this->SetNonDefaultPosition(qexpand, p_fmat, tree);
this->SetNonDefaultPosition(qexpand, p_fmat, tree);
// set rest of instances to default position
const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
// set default direct nodes to default
// for leaf nodes that are not fresh, mark then to ~nid,
// for leaf nodes that are not fresh, mark then to ~nid,
// so that they are ignored in future statistics collection
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i];
@ -655,7 +686,7 @@ class ColMaker: public IUpdater {
const float fvalue = col[j].fvalue;
// go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if(fvalue < tree[nid].split_cond()) {
if (fvalue < tree[nid].split_cond()) {
this->SetEncodePosition(ridx, tree[nid].cleft());
} else {
this->SetEncodePosition(ridx, tree[nid].cright());
@ -667,7 +698,7 @@ class ColMaker: public IUpdater {
}
// utils to get/set position, with encoded format
// return decoded position
inline int DecodePosition(bst_uint ridx) const{
inline int DecodePosition(bst_uint ridx) const {
const int pid = position[ridx];
return pid < 0 ? ~pid : pid;
}
@ -679,7 +710,7 @@ class ColMaker: public IUpdater {
position[ridx] = nid;
}
}
//--data fields--
// --data fields--
const TrainParam &param;
// number of omp thread used during training
int nthread;

View File

@ -1,11 +1,15 @@
#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_distcol-inl.hpp
* \brief beta distributed version that takes a sub-column
* \brief beta distributed version that takes a sub-column
* and construct a tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
#include <vector>
#include <algorithm>
#include "../sync/sync.h"
#include "../utils/bitmap.h"
#include "../utils/io.h"
@ -27,7 +31,7 @@ class DistColMaker : public ColMaker<TStats> {
virtual void Update(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
const BoosterInfo &info,
const std::vector<RegTree*> &trees) {
const std::vector<RegTree*> &trees) {
TStats::CheckInfo(info);
utils::Check(trees.size() == 1, "DistColMaker: only support one tree at a time");
// build the tree
@ -39,11 +43,12 @@ class DistColMaker : public ColMaker<TStats> {
}
virtual const int* GetLeafPosition(void) const {
return builder.GetLeafPosition();
}
}
private:
struct Builder : public ColMaker<TStats>::Builder {
public:
Builder(const TrainParam &param)
explicit Builder(const TrainParam &param)
: ColMaker<TStats>::Builder(param) {
}
inline void UpdatePosition(IFMatrix *p_fmat, const RegTree &tree) {
@ -63,7 +68,8 @@ class DistColMaker : public ColMaker<TStats> {
virtual const int* GetLeafPosition(void) const {
return BeginPtr(this->position);
}
protected:
protected:
virtual void SetNonDefaultPosition(const std::vector<int> &qexpand,
IFMatrix *p_fmat, const RegTree &tree) {
// step 2, classify the non-default data into right places
@ -87,7 +93,7 @@ class DistColMaker : public ColMaker<TStats> {
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
boolmap[j] = 0;
}
}
}
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(fsplits);
while (iter->Next()) {
@ -111,7 +117,7 @@ class DistColMaker : public ColMaker<TStats> {
}
}
}
bitmap.InitFromBool(boolmap);
// communicate bitmap
rabit::Allreduce<rabit::op::BitOR>(BeginPtr(bitmap.data), bitmap.data.size());
@ -142,7 +148,7 @@ class DistColMaker : public ColMaker<TStats> {
}
vec.push_back(this->snode[nid].best);
}
// TODO, lazy version
// TODO(tqchen) lazy version
// communicate best solution
reducer.Allreduce(BeginPtr(vec), vec.size());
// assign solution back
@ -151,7 +157,7 @@ class DistColMaker : public ColMaker<TStats> {
this->snode[nid].best = vec[i];
}
}
private:
utils::BitMap bitmap;
std::vector<int> boolmap;
@ -162,8 +168,8 @@ class DistColMaker : public ColMaker<TStats> {
// training parameter
TrainParam param;
// pointer to the builder
Builder builder;
Builder builder;
};
} // namespace tree
} // namespace xgboost
#endif
#endif // XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_histmaker-inl.hpp
* \brief use histogram counting to construct a tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
#include <vector>
#include <algorithm>
#include "../sync/sync.h"
@ -38,7 +40,7 @@ class HistMaker: public BaseMaker {
struct HistUnit {
/*! \brief cutting point of histogram, contains maximum point */
const bst_float *cut;
/*! \brief content of statistics data */
/*! \brief content of statistics data */
TStats *data;
/*! \brief size of histogram */
unsigned size;
@ -48,13 +50,13 @@ class HistMaker: public BaseMaker {
HistUnit(const bst_float *cut, TStats *data, unsigned size)
: cut(cut), data(data), size(size) {}
/*! \brief add a histogram to data */
inline void Add(bst_float fv,
inline void Add(bst_float fv,
const std::vector<bst_gpair> &gpair,
const BoosterInfo &info,
const bst_uint ridx) {
unsigned i = std::upper_bound(cut, cut + size, fv) - cut;
utils::Assert(size != 0, "try insert into size=0");
utils::Assert(i < size,
utils::Assert(i < size,
"maximum value must be in cut, fv = %g, cutmax=%g", fv, cut[size-1]);
data[i].Add(gpair, info, ridx);
}
@ -74,7 +76,7 @@ class HistMaker: public BaseMaker {
rptr[fid+1] - rptr[fid]);
}
};
// thread workspace
// thread workspace
struct ThreadWSpace {
/*! \brief actual unit pointer */
std::vector<unsigned> rptr;
@ -92,7 +94,7 @@ class HistMaker: public BaseMaker {
}
hset[tid].rptr = BeginPtr(rptr);
hset[tid].cut = BeginPtr(cut);
hset[tid].data.resize(cut.size(), TStats(param));
hset[tid].data.resize(cut.size(), TStats(param));
}
}
// aggregate all statistics to hset[0]
@ -147,7 +149,7 @@ class HistMaker: public BaseMaker {
}
// this function does two jobs
// (1) reset the position in array position, to be the latest leaf id
// (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly
// (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly
virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
const BoosterInfo &info,
@ -171,8 +173,9 @@ class HistMaker: public BaseMaker {
const BoosterInfo &info,
const std::vector <bst_uint> &fset,
const RegTree &tree) = 0;
private:
inline void EnumerateSplit(const HistUnit &hist,
inline void EnumerateSplit(const HistUnit &hist,
const TStats &node_sum,
bst_uint fid,
SplitEntry *best,
@ -187,7 +190,7 @@ class HistMaker: public BaseMaker {
c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
if (best->Update((float)loss_chg, fid, hist.cut[i], false)) {
if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i], false)) {
*left_sum = s;
}
}
@ -200,7 +203,7 @@ class HistMaker: public BaseMaker {
c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
if (best->Update((float)loss_chg, fid, hist.cut[i-1], true)) {
if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i-1], true)) {
*left_sum = c;
}
}
@ -216,22 +219,22 @@ class HistMaker: public BaseMaker {
const size_t num_feature = fset.size();
// get the best split condition for each node
std::vector<SplitEntry> sol(qexpand.size());
std::vector<TStats> left_sum(qexpand.size());
std::vector<TStats> left_sum(qexpand.size());
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
utils::Assert(node2workindex[nid] == static_cast<int>(wid),
"node2workindex inconsistent");
SplitEntry &best = sol[wid];
TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
for (size_t i = 0; i < fset.size(); ++ i) {
for (size_t i = 0; i < fset.size(); ++i) {
EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)],
node_sum, fset[i], &best, &left_sum[wid]);
}
}
// get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
const SplitEntry &best = sol[wid];
const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
@ -244,7 +247,7 @@ class HistMaker: public BaseMaker {
(*p_tree)[nid].set_split(best.split_index(),
best.split_value, best.default_left());
// mark right child as 0, to indicate fresh leaf
(*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
(*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
(*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
// right side sum
TStats right_sum;
@ -256,11 +259,11 @@ class HistMaker: public BaseMaker {
}
}
}
inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) {
p_tree->stat(nid).base_weight = static_cast<float>(node_sum.CalcWeight(param));
p_tree->stat(nid).sum_hess = static_cast<float>(node_sum.sum_hess);
node_sum.SetLeafVec(param, p_tree->leafvec(nid));
node_sum.SetLeafVec(param, p_tree->leafvec(nid));
}
};
@ -270,7 +273,7 @@ class CQHistMaker: public HistMaker<TStats> {
struct HistEntry {
typename HistMaker<TStats>::HistUnit hist;
unsigned istart;
/*!
/*!
* \brief add a histogram to data,
* do linear scan, start from istart
*/
@ -282,7 +285,7 @@ class CQHistMaker: public HistMaker<TStats> {
utils::Assert(istart != hist.size, "the bound variable must be max");
hist.data[istart].Add(gpair, info, ridx);
}
/*!
/*!
* \brief add a histogram to data,
* do linear scan, start from istart
*/
@ -302,7 +305,7 @@ class CQHistMaker: public HistMaker<TStats> {
feat_helper.InitByCol(p_fmat, tree);
feat_helper.SampleCol(this->param.colsample_bytree, p_fset);
}
// code to create histogram
// code to create histogram
virtual void CreateHist(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
const BoosterInfo &info,
@ -313,7 +316,7 @@ class CQHistMaker: public HistMaker<TStats> {
std::fill(feat2workindex.begin(), feat2workindex.end(), -1);
for (size_t i = 0; i < fset.size(); ++i) {
feat2workindex[fset[i]] = static_cast<int>(i);
}
}
// start to work
this->wspace.Init(this->param, 1);
// if it is C++11, use lazy evaluation for Allreduce,
@ -350,11 +353,11 @@ class CQHistMaker: public HistMaker<TStats> {
// sync the histogram
// if it is C++11, use lazy evaluation for Allreduce
#if __cplusplus >= 201103L
this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data),
this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data),
this->wspace.hset[0].data.size(), lazy_get_hist);
#else
this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size());
#endif
this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size());
#endif
}
virtual void ResetPositionAfterSplit(IFMatrix *p_fmat,
const RegTree &tree) {
@ -374,11 +377,11 @@ class CQHistMaker: public HistMaker<TStats> {
feat2workindex[fset[i]] = static_cast<int>(freal_set.size());
freal_set.push_back(fset[i]);
} else {
feat2workindex[fset[i]] = -2;
feat2workindex[fset[i]] = -2;
}
}
this->GetNodeStats(gpair, *p_fmat, tree, info,
&thread_stats, &node_stats);
&thread_stats, &node_stats);
sketchs.resize(this->qexpand.size() * freal_set.size());
for (size_t i = 0; i < sketchs.size(); ++i) {
sketchs[i].Init(info.num_row, this->param.sketch_eps);
@ -394,7 +397,8 @@ class CQHistMaker: public HistMaker<TStats> {
#if __cplusplus >= 201103L
auto lazy_get_summary = [&]()
#endif
{// get smmary
{
// get smmary
thread_sketch.resize(this->get_nthread());
// number of rows in
const size_t nrows = p_fmat->buffered_rowset().size();
@ -457,9 +461,9 @@ class CQHistMaker: public HistMaker<TStats> {
this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
} else {
utils::Assert(offset == -2, "BUG in mark");
bst_float cpt = feat_helper.MaxValue(fset[i]);
bst_float cpt = feat_helper.MaxValue(fset[i]);
this->wspace.cut.push_back(cpt + fabs(cpt) + rt_eps);
this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
}
}
// reserve last value for global statistics
@ -470,7 +474,7 @@ class CQHistMaker: public HistMaker<TStats> {
(fset.size() + 1) * this->qexpand.size() + 1,
"cut space inconsistent");
}
private:
inline void UpdateHistCol(const std::vector<bst_gpair> &gpair,
const ColBatch::Inst &c,
@ -554,9 +558,9 @@ class CQHistMaker: public HistMaker<TStats> {
}
} else {
for (size_t i = 0; i < this->qexpand.size(); ++i) {
const unsigned nid = this->qexpand[i];
const unsigned nid = this->qexpand[i];
sbuilder[nid].sum_total = static_cast<bst_float>(nstats[nid].sum_hess);
}
}
}
// if only one value, no need to do second pass
if (c[0].fvalue == c[c.length-1].fvalue) {
@ -589,7 +593,7 @@ class CQHistMaker: public HistMaker<TStats> {
if (nid >= 0) {
sbuilder[nid].Push(c[j + i].fvalue, buf_hess[i], max_size);
}
}
}
}
for (bst_uint j = align_length; j < c.length; ++j) {
const bst_uint ridx = c[j].index;
@ -617,7 +621,7 @@ class CQHistMaker: public HistMaker<TStats> {
// temp space to map feature id to working index
std::vector<int> feat2workindex;
// set of index from fset that are real
std::vector<bst_uint> freal_set;
std::vector<bst_uint> freal_set;
// thread temp data
std::vector< std::vector<BaseMaker::SketchEntry> > thread_sketch;
// used to hold statistics
@ -631,18 +635,18 @@ class CQHistMaker: public HistMaker<TStats> {
// reducer for summary
rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
// per node, per feature sketch
std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
};
template<typename TStats>
class QuantileHistMaker: public HistMaker<TStats> {
class QuantileHistMaker: public HistMaker<TStats> {
protected:
typedef utils::WXQuantileSketch<bst_float, bst_float> WXQSketch;
virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
const BoosterInfo &info,
const std::vector <bst_uint> &fset,
const RegTree &tree) {
const RegTree &tree) {
// initialize the data structure
int nthread = BaseMaker::get_nthread();
sketchs.resize(this->qexpand.size() * tree.param.num_feature);
@ -658,7 +662,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
utils::ParallelGroupBuilder<SparseBatch::Entry> builder(&col_ptr, &col_data, &thread_col_ptr);
builder.InitBudget(tree.param.num_feature, nthread);
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nbatch; ++i) {
RowBatch::Inst inst = batch[i];
@ -667,11 +671,11 @@ class QuantileHistMaker: public HistMaker<TStats> {
if (nid >= 0) {
if (!tree[nid].is_leaf()) {
this->position[ridx] = nid = HistMaker<TStats>::NextLevel(inst, tree, nid);
}
}
if (this->node2workindex[nid] < 0) {
this->position[ridx] = ~nid;
} else{
for (bst_uint j = 0; j < inst.length; ++j) {
} else {
for (bst_uint j = 0; j < inst.length; ++j) {
builder.AddBudget(inst[j].index, omp_get_thread_num());
}
}
@ -712,8 +716,8 @@ class QuantileHistMaker: public HistMaker<TStats> {
summary_array[i].Reserve(max_size);
summary_array[i].SetPrune(out, max_size);
}
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
sreducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size());
// now we get the final result of sketch, setup the cut
this->wspace.cut.clear();

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_prune-inl.hpp
* \brief prune a tree given the statistics
* \brief prune a tree given the statistics
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
#include <vector>
#include "./param.h"
#include "./updater.h"
@ -37,9 +39,10 @@ class TreePruner: public IUpdater {
param.learning_rate = lr;
syncher.Update(gpair, p_fmat, info, trees);
}
private:
// try to prune off current leaf
inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) {
inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
if (tree[nid].is_root()) return npruned;
int pid = tree[nid].parent();
RegTree::NodeStat &s = tree.stat(pid);
@ -51,10 +54,10 @@ class TreePruner: public IUpdater {
return this->TryPruneLeaf(tree, pid, depth - 1, npruned+2);
} else {
return npruned;
}
}
}
/*! \brief do prunning of a tree */
inline void DoPrune(RegTree &tree) {
inline void DoPrune(RegTree &tree) { // NOLINT(*)
int npruned = 0;
// initialize auxiliary statistics
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_refresh-inl.hpp
* \brief refresh the statistics and leaf value on the tree on the dataset
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
#include <vector>
#include <limits>
#include "../sync/sync.h"
@ -27,7 +29,7 @@ class TreeRefresher: public IUpdater {
virtual void Update(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
const BoosterInfo &info,
const std::vector<RegTree*> &trees) {
const std::vector<RegTree*> &trees) {
if (trees.size() == 0) return;
// number of threads
// thread temporal space
@ -100,7 +102,7 @@ class TreeRefresher: public IUpdater {
float lr = param.learning_rate;
param.learning_rate = lr / trees.size();
int offset = 0;
for (size_t i = 0; i < trees.size(); ++i) {
for (size_t i = 0; i < trees.size(); ++i) {
for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) {
this->Refresh(BeginPtr(stemp[0]) + offset, rid, trees[i]);
}
@ -147,7 +149,7 @@ class TreeRefresher: public IUpdater {
// training parameter
TrainParam param;
// reducer
rabit::Reducer<TStats, TStats::Reduce> reducer;
rabit::Reducer<TStats, TStats::Reduce> reducer;
};
} // namespace tree

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_skmaker-inl.hpp
* \brief use approximation sketch to construct a tree,
a refresh is needed to make the statistics exactly correct
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
#include <vector>
#include <algorithm>
#include "../sync/sync.h"
@ -30,7 +32,7 @@ class SketchMaker: public BaseMaker {
}
param.learning_rate = lr;
}
protected:
inline void Update(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
@ -79,9 +81,9 @@ class SketchMaker: public BaseMaker {
double pos_grad;
/*! \brief sum of all negative gradient */
double neg_grad;
/*! \brief sum of hessian statistics */
/*! \brief sum of hessian statistics */
double sum_hess;
explicit SKStats(void) {}
SKStats(void) {}
// constructor
explicit SKStats(const TrainParam &param) {
this->Clear();
@ -123,7 +125,7 @@ class SketchMaker: public BaseMaker {
sum_hess += b.sum_hess;
}
/*! \brief same as add, reduce is used in All Reduce */
inline static void Reduce(SKStats &a, const SKStats &b) {
inline static void Reduce(SKStats &a, const SKStats &b) { // NOLINT(*)
a.Add(b);
}
/*! \brief set leaf vector value based on statistics */
@ -139,7 +141,7 @@ class SketchMaker: public BaseMaker {
sketchs[i].Init(info.num_row, this->param.sketch_eps);
}
thread_sketch.resize(this->get_nthread());
// number of rows in
// number of rows in
const size_t nrows = p_fmat->buffered_rowset().size();
// start accumulating statistics
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator();
@ -156,7 +158,7 @@ class SketchMaker: public BaseMaker {
batch[i].length == nrows,
&thread_sketch[omp_get_thread_num()]);
}
}
}
// setup maximum size
unsigned max_size = param.max_sketch_size();
// synchronize sketch
@ -167,8 +169,8 @@ class SketchMaker: public BaseMaker {
summary_array[i].Reserve(max_size);
summary_array[i].SetPrune(out, max_size);
}
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
sketch_reducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size());
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
sketch_reducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size());
}
// update sketch information in column fid
inline void UpdateSketchCol(const std::vector<bst_gpair> &gpair,
@ -209,7 +211,7 @@ class SketchMaker: public BaseMaker {
const unsigned nid = this->qexpand[i];
sbuilder[3 * nid + 0].sum_total = static_cast<bst_float>(nstats[nid].pos_grad);
sbuilder[3 * nid + 1].sum_total = static_cast<bst_float>(nstats[nid].neg_grad);
sbuilder[3 * nid + 2].sum_total = static_cast<bst_float>(nstats[nid].sum_hess);
sbuilder[3 * nid + 2].sum_total = static_cast<bst_float>(nstats[nid].sum_hess);
}
}
// if only one value, no need to do second pass
@ -217,7 +219,9 @@ class SketchMaker: public BaseMaker {
for (size_t i = 0; i < this->qexpand.size(); ++i) {
const int nid = this->qexpand[i];
for (int k = 0; k < 3; ++k) {
sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, static_cast<bst_float>(sbuilder[3 * nid + k].sum_total));
sbuilder[3 * nid + k].sketch->Push(c[0].fvalue,
static_cast<bst_float>(
sbuilder[3 * nid + k].sum_total));
}
}
return;
@ -250,7 +254,7 @@ class SketchMaker: public BaseMaker {
sbuilder[3 * nid + k].Finalize(max_size);
}
}
}
}
inline void SyncNodeStats(void) {
utils::Assert(qexpand.size() != 0, "qexpand must not be empty");
std::vector<SKStats> tmp(qexpand.size());
@ -272,12 +276,12 @@ class SketchMaker: public BaseMaker {
std::vector<SplitEntry> sol(qexpand.size());
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
utils::Assert(node2workindex[nid] == static_cast<int>(wid),
"node2workindex inconsistent");
SplitEntry &best = sol[wid];
for (bst_uint fid = 0; fid < num_feature; ++ fid) {
for (bst_uint fid = 0; fid < num_feature; ++fid) {
unsigned base = (wid * p_tree->param.num_feature + fid) * 3;
EnumerateSplit(summary_array[base + 0],
summary_array[base + 1],
@ -286,7 +290,7 @@ class SketchMaker: public BaseMaker {
}
}
// get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
const SplitEntry &best = sol[wid];
// set up the values
@ -337,7 +341,7 @@ class SketchMaker: public BaseMaker {
feat_sum.neg_grad = neg_grad.data[neg_grad.size - 1].rmax;
feat_sum.sum_hess = sum_hess.data[sum_hess.size - 1].rmax;
size_t ipos = 0, ineg = 0, ihess = 0;
for (size_t i = 1; i < fsplits.size(); ++i) {
for (size_t i = 1; i < fsplits.size(); ++i) {
WXQSketch::Entry pos = pos_grad.Query(fsplits[i], ipos);
WXQSketch::Entry neg = neg_grad.Query(fsplits[i], ineg);
WXQSketch::Entry hess = sum_hess.Query(fsplits[i], ihess);
@ -345,11 +349,11 @@ class SketchMaker: public BaseMaker {
s.pos_grad = 0.5f * (pos.rmin + pos.rmax - pos.wmin);
s.neg_grad = 0.5f * (neg.rmin + neg.rmax - neg.wmin);
s.sum_hess = 0.5f * (hess.rmin + hess.rmax - hess.wmin);
c.SetSubstract(node_sum, s);
c.SetSubstract(node_sum, s);
// forward
if (s.sum_hess >= param.min_child_weight &&
c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], false);
}
// backward
@ -357,22 +361,23 @@ class SketchMaker: public BaseMaker {
s.SetSubstract(node_sum, c);
if (s.sum_hess >= param.min_child_weight &&
c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], true);
}
}
}
{// all including
{
// all including
SKStats s = feat_sum, c;
c.SetSubstract(node_sum, s);
if (s.sum_hess >= param.min_child_weight &&
c.sum_hess >= param.min_child_weight) {
bst_float cpt = fsplits.back();
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
best->Update(static_cast<bst_float>(loss_chg), fid, cpt + fabsf(cpt) + 1.0f, false);
}
}
}
// thread temp data
// used to hold temporal sketch
std::vector< std::vector<SketchEntry> > thread_sketch;
@ -389,6 +394,6 @@ class SketchMaker: public BaseMaker {
// per node, per feature sketch
std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
};
} // tree
} // xgboost
#endif
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_

View File

@ -1,18 +1,21 @@
#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_sync-inl.hpp
* \brief synchronize the tree in all distributed nodes
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#include <vector>
#include <string>
#include <limits>
#include "../sync/sync.h"
#include "./updater.h"
namespace xgboost {
namespace tree {
/*!
/*!
* \brief syncher that synchronize the tree in all distributed nodes
* can implement various strategies, so far it is always set to node 0's tree
*/
@ -28,7 +31,7 @@ class TreeSyncher: public IUpdater {
const std::vector<RegTree*> &trees) {
this->SyncTrees(trees);
}
private:
// synchronize the trees in different nodes, take tree from rank 0
inline void SyncTrees(const std::vector<RegTree *> &trees) {
@ -43,7 +46,7 @@ class TreeSyncher: public IUpdater {
}
fs.Seek(0);
rabit::Broadcast(&s_model, 0);
for (size_t i = 0; i < trees.size(); ++i) {
for (size_t i = 0; i < trees.size(); ++i) {
trees[i]->LoadModel(fs);
}
}

View File

@ -1,13 +1,16 @@
#ifndef XGBOOST_UTILS_BASE64_INL_H_
#define XGBOOST_UTILS_BASE64_INL_H_
/*!
* Copyright 2014 by Contributors
* \file base64.h
* \brief data stream support to input and output from/to base64 stream
* base64 is easier to store and pass as text format in mapreduce
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_BASE64_INL_H_
#define XGBOOST_UTILS_BASE64_INL_H_
#include <cctype>
#include <cstdio>
#include <string>
#include "./io.h"
namespace xgboost {
@ -15,7 +18,7 @@ namespace utils {
/*! \brief buffer reader of the stream that allows you to get */
class StreamBufferReader {
public:
StreamBufferReader(size_t buffer_size)
explicit StreamBufferReader(size_t buffer_size)
:stream_(NULL),
read_len_(1), read_ptr_(1) {
buffer_.resize(buffer_size);
@ -45,7 +48,7 @@ class StreamBufferReader {
inline bool AtEnd(void) const {
return read_len_ == 0;
}
private:
/*! \brief the underlying stream */
IStream *stream_;
@ -75,7 +78,7 @@ const char DecodeTable[] = {
};
static const char EncodeTable[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
} // namespace base64
} // namespace base64
/*! \brief the stream that reads from base64, note we take from file pointers */
class Base64InStream: public IStream {
public:
@ -83,8 +86,8 @@ class Base64InStream: public IStream {
reader_.set_stream(fs);
num_prev = 0; tmp_ch = 0;
}
/*!
* \brief initialize the stream position to beginning of next base64 stream
/*!
* \brief initialize the stream position to beginning of next base64 stream
* call this function before actually start read
*/
inline void InitPosition(void) {
@ -132,19 +135,19 @@ class Base64InStream: public IStream {
{
// second byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
nvalue |= DecodeTable[tmp_ch] << 12;
*cptr++ = (nvalue >> 16) & 0xFF; --tlen;
}
{
// third byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
// handle termination
if (tmp_ch == '=') {
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format");
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
break;
}
nvalue |= DecodeTable[tmp_ch] << 6;
@ -157,10 +160,10 @@ class Base64InStream: public IStream {
{
// fourth byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
if (tmp_ch == '=') {
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
break;
}
nvalue |= DecodeTable[tmp_ch];
@ -240,13 +243,13 @@ class Base64OutStream: public IStream {
if (endch != EOF) PutChar(endch);
this->Flush();
}
private:
private:
IStream *fp;
int buf_top;
unsigned char buf[4];
std::string out_buf;
const static size_t kBufferSize = 256;
static const size_t kBufferSize = 256;
inline void PutChar(char ch) {
out_buf += ch;
@ -260,5 +263,5 @@ class Base64OutStream: public IStream {
}
};
} // namespace utils
} // namespace rabit
#endif // RABIT_LEARN_UTILS_BASE64_INL_H_
} // namespace xgboost
#endif // XGBOOST_UTILS_BASE64_INL_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_UTILS_BITMAP_H_
#define XGBOOST_UTILS_BITMAP_H_
/*!
* Copyright 2014 by Contributors
* \file bitmap.h
* \brief a simple implement of bitmap
* NOTE: bitmap is only threadsafe per word access, remember this when using bitmap
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_BITMAP_H_
#define XGBOOST_UTILS_BITMAP_H_
#include <vector>
#include "./utils.h"
#include "./omp.h"
@ -16,22 +18,22 @@ namespace utils {
struct BitMap {
/*! \brief internal data structure */
std::vector<uint32_t> data;
/*!
* \brief resize the bitmap to be certain size
/*!
* \brief resize the bitmap to be certain size
* \param size the size of bitmap
*/
inline void Resize(size_t size) {
data.resize((size + 31U) >> 5, 0);
}
/*!
* \brief query the i-th position of bitmap
* \param i the position in
/*!
* \brief query the i-th position of bitmap
* \param i the position in
*/
inline bool Get(size_t i) const {
return (data[i >> 5] >> (i & 31U)) & 1U;
}
/*!
* \brief set i-th position to true
/*!
* \brief set i-th position to true
* \param i position index
*/
inline void SetTrue(size_t i) {
@ -63,4 +65,4 @@ struct BitMap {
};
} // namespace utils
} // namespace xgboost
#endif
#endif // XGBOOST_UTILS_BITMAP_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_CONFIG_H_
#define XGBOOST_UTILS_CONFIG_H_
/*!
* Copyright 2014 by Contributors
* \file config.h
* \brief helper class to load in configures from file
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_CONFIG_H_
#define XGBOOST_UTILS_CONFIG_H_
#include <cstdio>
#include <cstring>
#include <string>
@ -14,26 +16,26 @@
namespace xgboost {
namespace utils {
/*!
/*!
* \brief base implementation of config reader
*/
class ConfigReaderBase {
public:
/*!
/*!
* \brief get current name, called after Next returns true
* \return current parameter name
* \return current parameter name
*/
inline const char *name(void) const {
return s_name.c_str();
}
/*!
/*!
* \brief get current value, called after Next returns true
* \return current parameter value
* \return current parameter value
*/
inline const char *val(void) const {
return s_val.c_str();
}
/*!
/*!
* \brief move iterator to next position
* \return true if there is value in next position
*/
@ -55,7 +57,7 @@ class ConfigReaderBase {
protected:
/*!
* \brief to be implemented by subclass,
* get next token, return EOF if end of file
* get next token, return EOF if end of file
*/
virtual char GetChar(void) = 0;
/*! \brief to be implemented by child, check if end of stream */
@ -144,9 +146,9 @@ class ConfigReaderBase {
*/
class ConfigStreamReader: public ConfigReaderBase {
public:
/*!
* \brief constructor
* \param istream input stream
/*!
* \brief constructor
* \param istream input stream
*/
explicit ConfigStreamReader(std::istream &fin) : fin(fin) {}
@ -163,13 +165,13 @@ class ConfigStreamReader: public ConfigReaderBase {
std::istream &fin;
};
/*!
/*!
* \brief an iterator that iterates over a configure file and gets the configures
*/
class ConfigIterator: public ConfigStreamReader {
public:
/*!
* \brief constructor
/*!
* \brief constructor
* \param fname name of configure file
*/
explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) {

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_FMAP_H_
#define XGBOOST_UTILS_FMAP_H_
/*!
* Copyright 2014 by Contributors
* \file fmap.h
* \brief helper class that holds the feature names and interpretations
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_FMAP_H_
#define XGBOOST_UTILS_FMAP_H_
#include <vector>
#include <string>
#include <cstring>
@ -78,4 +80,4 @@ class FeatMap {
} // namespace utils
} // namespace xgboost
#endif // XGBOOST_FMAP_H_
#endif // XGBOOST_UTILS_FMAP_H_

View File

@ -1,6 +1,5 @@
#ifndef XGBOOST_UTILS_GROUP_DATA_H_
#define XGBOOST_UTILS_GROUP_DATA_H_
/*!
* Copyright 2014 by Contributors
* \file group_data.h
* \brief this file defines utils to group data by integer keys
* Input: given input sequence (key,value), (k1,v1), (k2,v2)
@ -12,6 +11,11 @@
* The major algorithm is a two pass linear scan algorithm that requires two pass scan over the data
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_GROUP_DATA_H_
#define XGBOOST_UTILS_GROUP_DATA_H_
#include <vector>
namespace xgboost {
namespace utils {
/*!
@ -32,10 +36,10 @@ struct ParallelGroupBuilder {
std::vector< std::vector<SizeType> > *p_thread_rptr)
: rptr(*p_rptr), data(*p_data), thread_rptr(*p_thread_rptr) {
}
public:
/*!
* \brief step 1: initialize the helper, with hint of number keys
* \brief step 1: initialize the helper, with hint of number keys
* and thread used in the construction
* \param nkeys number of keys in the matrix, can be smaller than expected
* \param nthread number of thread that will be used in construction
@ -56,7 +60,7 @@ struct ParallelGroupBuilder {
inline void AddBudget(size_t key, int threadid, SizeType nelem = 1) {
std::vector<SizeType> &trptr = thread_rptr[threadid];
if (trptr.size() < key + 1) {
trptr.resize(key + 1, 0);
trptr.resize(key + 1, 0);
}
trptr[key] += nelem;
}
@ -84,13 +88,13 @@ struct ParallelGroupBuilder {
data.resize(start);
}
/*!
* \brief step 4: add data to the allocated space,
* \brief step 4: add data to the allocated space,
* the calls to this function should be exactly match previous call to AddBudget
*
* \param key the key of
* \param key the key of
* \param threadid the id of thread that calls this function
*/
inline void Push(size_t key, ValueType value, int threadid) {
inline void Push(size_t key, ValueType value, int threadid) {
SizeType &rp = thread_rptr[threadid][key];
data[rp++] = value;
}
@ -107,5 +111,4 @@ struct ParallelGroupBuilder {
};
} // namespace utils
} // namespace xgboost
#endif
#endif // XGBOOST_UTILS_GROUP_DATA_H_

View File

@ -1,16 +1,19 @@
#ifndef XGBOOST_UTILS_IO_H
#define XGBOOST_UTILS_IO_H
/*!
* Copyright 2014 by Contributors
* \file io.h
* \brief general stream interface for serialization, I/O
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_IO_H_
#define XGBOOST_UTILS_IO_H_
#include <cstdio>
#include <vector>
#include <string>
#include <cstring>
#include "./utils.h"
#include "../sync/sync.h"
/*!
* \file io.h
* \brief general stream interface for serialization, I/O
* \author Tianqi Chen
*/
namespace xgboost {
namespace utils {
// reuse the definitions of streams
@ -23,7 +26,7 @@ typedef rabit::utils::MemoryBufferStream MemoryBufferStream;
class FileStream : public ISeekStream {
public:
explicit FileStream(std::FILE *fp) : fp(fp) {}
explicit FileStream(void) {
FileStream(void) {
this->fp = NULL;
}
virtual size_t Read(void *ptr, size_t size) {
@ -33,7 +36,7 @@ class FileStream : public ISeekStream {
std::fwrite(ptr, size, 1, fp);
}
virtual void Seek(size_t pos) {
std::fseek(fp, static_cast<long>(pos), SEEK_SET);
std::fseek(fp, static_cast<long>(pos), SEEK_SET); // NOLINT(*)
}
virtual size_t Tell(void) {
return std::ftell(fp);
@ -42,7 +45,7 @@ class FileStream : public ISeekStream {
return std::feof(fp) != 0;
}
inline void Close(void) {
if (fp != NULL){
if (fp != NULL) {
std::fclose(fp); fp = NULL;
}
}
@ -52,6 +55,5 @@ class FileStream : public ISeekStream {
};
} // namespace utils
} // namespace xgboost
#include "./base64-inl.h"
#endif
#endif // XGBOOST_UTILS_IO_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_UTILS_ITERATOR_H
#define XGBOOST_UTILS_ITERATOR_H
#include <cstdio>
/*!
* Copyright 2014 by Contributors
* \file iterator.h
* \brief itertator interface
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_ITERATOR_H_
#define XGBOOST_UTILS_ITERATOR_H_
#include <cstdio>
namespace xgboost {
namespace utils {
/*!
@ -16,7 +18,7 @@ template<typename DType>
class IIterator {
public:
/*!
* \brief set the parameter
* \brief set the parameter
* \param name name of parameter
* \param val value of parameter
*/
@ -36,5 +38,5 @@ class IIterator {
} // namespace utils
} // namespace xgboost
#endif
#endif // XGBOOST_UTILS_ITERATOR_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_MATH_H_
#define XGBOOST_UTILS_MATH_H_
/*!
* Copyright 2014 by Contributors
* \file math.h
* \brief support additional math
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_MATH_H_
#define XGBOOST_UTILS_MATH_H_
#include <cmath>
namespace xgboost {
@ -28,7 +30,8 @@ inline T LogGamma(T v) {
#if _MSC_VER >= 1800
return lgamma(v);
#else
#pragma message ("Warning: lgamma function was not available until VS2013, poisson regression will be disabled")
#pragma message("Warning: lgamma function was not available until VS2013"\
", poisson regression will be disabled")
utils::Error("lgamma function was not available until VS2013");
return static_cast<T>(1.0);
#endif

View File

@ -1,16 +1,20 @@
#ifndef XGBOOST_UTILS_OMP_H_
#define XGBOOST_UTILS_OMP_H_
/*!
* Copyright 2014 by Contributors
* \file omp.h
* \brief header to handle OpenMP compatibility issues
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_OMP_H_
#define XGBOOST_UTILS_OMP_H_
#if defined(_OPENMP)
#include <omp.h>
#else
#ifndef DISABLE_OPENMP
// use pragma message instead of warning
#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading")
#pragma message("Warning: OpenMP is not available,"\
"xgboost will be compiled into single-thread code."\
"Use OpenMP-enabled compiler to get benefit of multi-threading")
#endif
inline int omp_get_thread_num() { return 0; }
inline int omp_get_num_threads() { return 1; }
@ -25,6 +29,6 @@ typedef int bst_omp_uint;
#else
typedef unsigned bst_omp_uint;
#endif
} // namespace xgboost
} // namespace xgboost
#endif // XGBOOST_UTILS_OMP_H_

Some files were not shown because too many files have changed in this diff Show More