Merge remote-tracking branch 'dmlc/master'

This commit is contained in:
El Potaeto 2015-07-15 16:00:21 +02:00
commit 86f9f707d8
114 changed files with 6723 additions and 1702 deletions

9
.gitignore vendored
View File

@ -58,3 +58,12 @@ R-package.Rproj
*.cache*
R-package/inst
R-package/src
#java
java/xgboost4j/target
java/xgboost4j/tmp
java/xgboost4j-demo/target
java/xgboost4j-demo/data/
java/xgboost4j-demo/tmp/
java/xgboost4j-demo/model/
nb-configuration*
dmlc-core

47
.travis.yml Normal file
View File

@ -0,0 +1,47 @@
sudo: true
# Use Build Matrix to do lint and build seperately
env:
matrix:
- TASK=lint LINT_LANG=cpp
- TASK=lint LINT_LANG=python
- TASK=R-package CXX=g++
- TASK=python-package CXX=g++
- TASK=java-package CXX=g++
- TASK=build CXX=g++
- TASK=build-with-dmlc CXX=g++
# dependent apt packages
addons:
apt:
packages:
- doxygen
- libopenmpi-dev
- wget
- libcurl4-openssl-dev
- unzip
- python-numpy
- python-scipy
- python-nose
before_install:
- git clone https://github.com/dmlc/dmlc-core
- export TRAVIS=dmlc-core/scripts/travis/
- export PYTHONPATH=${PYTHONPATH}:${PWD}/wrapper
- source ${TRAVIS}/travis_setup_env.sh
install:
- pip install cpplint pylint --user `whoami`
script: scripts/travis_script.sh
after_failure:
- scripts/travis_after_failure.sh
notifications:
email:
on_success: change
on_failure: always

View File

@ -3,6 +3,8 @@ export CXX = g++
export MPICXX = mpicxx
export LDFLAGS= -pthread -lm
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -funroll-loops
# java include path
export JAVAINCFLAGS = -I${JAVA_HOME}/include -I${JAVA_HOME}/include/linux -I./java
ifeq ($(OS), Windows_NT)
export CXX = g++ -m64
@ -53,6 +55,9 @@ else
SLIB = wrapper/libxgboostwrapper.so
endif
# java lib
JLIB = java/libxgboostjavawrapper.so
# specify tensor path
BIN = xgboost
MOCKBIN = xgboost.mock
@ -64,7 +69,11 @@ else
TARGET = $(BIN)
endif
.PHONY: clean all mpi python Rpack
ifndef LINT_LANG
LINT_LANG= "all"
endif
.PHONY: clean all mpi python Rpack lint
all: $(TARGET)
mpi: $(MPIBIN)
@ -79,6 +88,9 @@ main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner
xgboost: updater.o gbm.o io.o main.o $(LIBRABIT) $(LIBDMLC)
wrapper/xgboost_wrapper.dll wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC)
java: java/libxgboostjavawrapper.so
java/libxgboostjavawrapper.so: java/xgboost4j_wrapper.cpp wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC)
# dependency on rabit
subtree/rabit/lib/librabit.a: subtree/rabit/src/engine.cc
+ cd subtree/rabit;make lib/librabit.a; cd ../..
@ -98,6 +110,9 @@ $(MOCKBIN) :
$(SLIB) :
$(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(DLLFLAGS)
$(JLIB) :
$(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(JAVAINCFLAGS)
$(OBJ) :
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
@ -133,10 +148,23 @@ Rpack:
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
cp xgboost/src/Makevars xgboost/src/Makevars.win
# R CMD build --no-build-vignettes xgboost
# R CMD build xgboost
# rm -rf xgboost
# R CMD check --as-cran xgboost*.tar.gz
Rbuild:
make Rpack
R CMD build xgboost
rm -rf xgboost
Rcheck:
make Rbuild
R CMD check --as-cran xgboost*.tar.gz
# lint requires dmlc to be in current folder
lint:
dmlc-core/scripts/lint.py xgboost $(LINT_LANG) src wrapper R-package
clean:
$(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~
cd subtree/rabit; make clean; cd ..

View File

@ -220,7 +220,8 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
stop("nfold must be bigger than 1")
}
if(is.null(folds)) {
if (exists('objective', where=param) && strtrim(param[['objective']], 5) == 'rank:') {
if (exists('objective', where=param) && is.character(param$objective) &&
strtrim(param[['objective']], 5) == 'rank:') {
stop("\tAutomatic creation of CV-folds is not implemented for ranking!\n",
"\tConsider providing pre-computed CV-folds through the folds parameter.")
}
@ -234,7 +235,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
# For classification, need to convert y labels to factor before making the folds,
# and then do stratification by factor levels.
# For regression, leave y numeric and do stratification by quantiles.
if (exists('objective', where=param)) {
if (exists('objective', where=param) && is.character(param$objective)) {
# If 'objective' provided in params, assume that y is a classification label
# unless objective is reg:linear
if (param[['objective']] != 'reg:linear') y <- factor(y)

View File

@ -95,152 +95,160 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
prediction = FALSE, showsd = TRUE, metrics=list(),
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,
early.stop.round = NULL, maximize = NULL, ...) {
if (typeof(params) != "list") {
stop("xgb.cv: first argument params must be list")
}
if(!is.null(folds)) {
if(class(folds)!="list" | length(folds) < 2) {
stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold")
if (typeof(params) != "list") {
stop("xgb.cv: first argument params must be list")
}
nfold <- length(folds)
}
if (nfold <= 1) {
stop("nfold must be bigger than 1")
}
if (is.null(missing)) {
dtrain <- xgb.get.DMatrix(data, label)
} else {
dtrain <- xgb.get.DMatrix(data, label, missing)
}
params <- append(params, list(...))
params <- append(params, list(silent=1))
for (mc in metrics) {
params <- append(params, list("eval_metric"=mc))
}
# customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj))
stop("xgb.cv: cannot assign two different objectives")
if (!is.null(params$objective))
if (class(params$objective)=='function') {
obj = params$objective
params$objective = NULL
if(!is.null(folds)) {
if(class(folds)!="list" | length(folds) < 2) {
stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold")
}
nfold <- length(folds)
}
if (!is.null(params$eval_metric) && !is.null(feval))
stop("xgb.cv: cannot assign two different evaluation metrics")
if (!is.null(params$eval_metric))
if (class(params$eval_metric)=='function') {
feval = params$eval_metric
params$eval_metric = NULL
if (nfold <= 1) {
stop("nfold must be bigger than 1")
}
# Early Stopping
if (!is.null(early.stop.round)){
if (!is.null(feval) && is.null(maximize))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize) && is.null(params$eval_metric))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize))
{
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
maximize = FALSE
} else {
maximize = TRUE
}
}
if (maximize) {
bestScore = 0
if (is.null(missing)) {
dtrain <- xgb.get.DMatrix(data, label)
} else {
bestScore = Inf
dtrain <- xgb.get.DMatrix(data, label, missing)
}
dot.params = list(...)
nms.params = names(params)
nms.dot.params = names(dot.params)
if (length(intersect(nms.params,nms.dot.params))>0)
stop("Duplicated defined term in parameters. Please check your list of params.")
params <- append(params, dot.params)
params <- append(params, list(silent=1))
for (mc in metrics) {
params <- append(params, list("eval_metric"=mc))
}
bestInd = 0
earlyStopflag = FALSE
if (length(metrics)>1)
warning('Only the first metric is used for early stopping process.')
}
xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
obj_type = params[['objective']]
mat_pred = FALSE
if (!is.null(obj_type) && obj_type=='multi:softprob')
{
num_class = params[['num_class']]
if (is.null(num_class))
stop('must set num_class to use softmax')
predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
mat_pred = TRUE
}
else
predictValues <- rep(0,xgb.numrow(dtrain))
history <- c()
print.every.n = max(as.integer(print.every.n), 1L)
for (i in 1:nrounds) {
msg <- list()
for (k in 1:nfold) {
fd <- xgb_folds[[k]]
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
if (i<nrounds) {
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
} else {
if (!prediction) {
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
} else {
res <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval, prediction)
if (mat_pred) {
pred_mat = matrix(res[[2]],num_class,length(fd$index))
predictValues[fd$index,] <- t(pred_mat)
} else {
predictValues[fd$index] <- res[[2]]
}
msg[[k]] <- res[[1]] %>% str_split("\t") %>% .[[1]]
# customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj))
stop("xgb.cv: cannot assign two different objectives")
if (!is.null(params$objective))
if (class(params$objective)=='function') {
obj = params$objective
params[['objective']] = NULL
}
# if (!is.null(params$eval_metric) && !is.null(feval))
# stop("xgb.cv: cannot assign two different evaluation metrics")
if (!is.null(params$eval_metric))
if (class(params$eval_metric)=='function') {
feval = params$eval_metric
params[['eval_metric']] = NULL
}
}
}
ret <- xgb.cv.aggcv(msg, showsd)
history <- c(history, ret)
if(verbose)
if (0==(i-1L)%%print.every.n)
cat(ret, "\n", sep="")
# early_Stopping
# Early Stopping
if (!is.null(early.stop.round)){
score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
score = strsplit(score,'\\+|:')[[1]][[2]]
score = as.numeric(score)
if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
bestScore = score
bestInd = i
} else {
if (i-bestInd>=early.stop.round) {
earlyStopflag = TRUE
cat('Stopping. Best iteration:',bestInd)
break
if (!is.null(feval) && is.null(maximize))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize) && is.null(params$eval_metric))
stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
if (is.null(maximize))
{
if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
maximize = FALSE
} else {
maximize = TRUE
}
}
}
if (maximize) {
bestScore = 0
} else {
bestScore = Inf
}
bestInd = 0
earlyStopflag = FALSE
if (length(metrics)>1)
warning('Only the first metric is used for early stopping process.')
}
}
xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
obj_type = params[['objective']]
mat_pred = FALSE
if (!is.null(obj_type) && obj_type=='multi:softprob')
{
num_class = params[['num_class']]
if (is.null(num_class))
stop('must set num_class to use softmax')
predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
mat_pred = TRUE
}
else
predictValues <- rep(0,xgb.numrow(dtrain))
history <- c()
print.every.n = max(as.integer(print.every.n), 1L)
for (i in 1:nrounds) {
msg <- list()
for (k in 1:nfold) {
fd <- xgb_folds[[k]]
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
}
ret <- xgb.cv.aggcv(msg, showsd)
history <- c(history, ret)
if(verbose)
if (0==(i-1L)%%print.every.n)
cat(ret, "\n", sep="")
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
colnamesMean <- paste(colnames, "mean")
if(showsd) colnamesStd <- paste(colnames, "std")
# early_Stopping
if (!is.null(early.stop.round)){
score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
score = strsplit(score,'\\+|:')[[1]][[2]]
score = as.numeric(score)
if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
bestScore = score
bestInd = i
} else {
if (i-bestInd>=early.stop.round) {
earlyStopflag = TRUE
cat('Stopping. Best iteration:',bestInd)
break
}
}
}
colnames <- c()
if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
else colnames <- colnamesMean
}
type <- rep(x = "numeric", times = length(colnames))
dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
split <- str_split(string = history, pattern = "\t")
if (prediction) {
for (k in 1:nfold) {
fd = xgb_folds[[k]]
if (!is.null(early.stop.round) && earlyStopflag) {
res = xgb.iter.eval(fd$booster, fd$watchlist, bestInd - 1, feval, prediction)
} else {
res = xgb.iter.eval(fd$booster, fd$watchlist, nrounds - 1, feval, prediction)
}
if (mat_pred) {
pred_mat = matrix(res[[2]],num_class,length(fd$index))
predictValues[fd$index,] = t(pred_mat)
} else {
predictValues[fd$index] = res[[2]]
}
}
}
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
if (prediction) {
return(list(dt = dt,pred = predictValues))
}
return(dt)
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
colnamesMean <- paste(colnames, "mean")
if(showsd) colnamesStd <- paste(colnames, "std")
colnames <- c()
if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
else colnames <- colnamesMean
type <- rep(x = "numeric", times = length(colnames))
dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
split <- str_split(string = history, pattern = "\t")
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
if (prediction) {
return(list(dt = dt,pred = predictValues))
}
return(dt)
}
# Avoid error messages during CRAN check.

View File

@ -137,7 +137,13 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
if (length(watchlist) != 0 && verbose == 0) {
warning('watchlist is provided but verbose=0, no evaluation information will be printed')
}
params = append(params, list(...))
dot.params = list(...)
nms.params = names(params)
nms.dot.params = names(dot.params)
if (length(intersect(nms.params,nms.dot.params))>0)
stop("Duplicated term in parameters. Please check your list of params.")
params = append(params, dot.params)
# customized objective and evaluation metric interface
if (!is.null(params$objective) && !is.null(obj))

View File

@ -1,6 +1,8 @@
# R package for xgboost.
R package for xgboost
=====================
## Installation
Installation
------------
For up-to-date version (which is recommended), please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
@ -8,8 +10,26 @@ For up-to-date version (which is recommended), please install from github. Windo
devtools::install_github('dmlc/xgboost',subdir='R-package')
```
## Examples
Examples
--------
* Please visit [walk through example](demo).
* See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.R) on this dataset and the one related to [Otto challenge](../demo/kaggle-otto), including a [RMarkdown documentation](../demo/kaggle-otto/understandingXGBoostModel.Rmd).
Notes
-----
If you face an issue installing the package using ```devtools::install_github```, something like this (even after updating libxml and RCurl as lot of forums say) -
```
devtools::install_github('dmlc/xgboost',subdir='R-package')
Downloading github repo dmlc/xgboost@master
Error in function (type, msg, asError = TRUE) :
Peer certificate cannot be authenticated with given CA certificates
```
To get around this you can build the package locally as mentioned [here](https://github.com/dmlc/xgboost/issues/347) -
```
1. Clone the current repository and set your workspace to xgboost/R-package/
2. Run R CMD INSTALL --build . in terminal to get the tarball.
3. Run install.packages('path_to_the_tarball',repo=NULL) in R to install.
```

View File

@ -1,8 +1,10 @@
require(xgboost)
require(Matrix)
require(data.table)
if (!require(vcd)) install.packages('vcd') #Available in Cran. Used for its dataset with categorical values.
if (!require(vcd)) {
install.packages('vcd') #Available in Cran. Used for its dataset with categorical values.
require(vcd)
}
# According to its documentation, Xgboost works only on numbers.
# Sometimes the dataset we have to work on have categorical data.
# A categorical variable is one which have a fixed number of values. By exemple, if for each observation a variable called "Colour" can have only "red", "blue" or "green" as value, it is a categorical variable.

View File

@ -1,3 +1,4 @@
// Copyright (c) 2014 by Contributors
#include <vector>
#include <string>
#include <utility>
@ -34,7 +35,7 @@ bool CheckNAN(double v) {
bool LogGamma(double v) {
return lgammafn(v);
}
} // namespace utils
} // namespace utils
namespace random {
void Seed(unsigned seed) {
@ -58,6 +59,10 @@ inline void _WrapperEnd(void) {
PutRNGstate();
}
// do nothing, check error
inline void CheckErr(int ret) {
}
extern "C" {
SEXP XGCheckNullPtr_R(SEXP handle) {
return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
@ -69,7 +74,8 @@ extern "C" {
}
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
_WrapperBegin();
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -90,7 +96,8 @@ extern "C" {
data[i * ncol +j] = din[i + nrow * j];
}
}
void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -118,8 +125,10 @@ extern "C" {
indices_[i] = static_cast<unsigned>(p_indices[i]);
data_[i] = static_cast<float>(p_data[i]);
}
void *handle = XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
BeginPtr(data_), nindptr, ndata);
DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
BeginPtr(data_), nindptr, ndata,
&handle));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -133,7 +142,10 @@ extern "C" {
for (int i = 0; i < len; ++i) {
idxvec[i] = INTEGER(idxset)[i] - 1;
}
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
DMatrixHandle res;
CheckErr(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle),
BeginPtr(idxvec), len,
&res));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -142,8 +154,8 @@ extern "C" {
}
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
_WrapperBegin();
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
CHAR(asChar(fname)), asInteger(silent));
CheckErr(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
CHAR(asChar(fname)), asInteger(silent)));
_WrapperEnd();
}
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
@ -156,24 +168,27 @@ extern "C" {
for (int i = 0; i < len; ++i) {
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
}
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
CheckErr(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len));
} else {
std::vector<float> vec(len);
#pragma omp parallel for schedule(static)
for (int i = 0; i < len; ++i) {
vec[i] = REAL(array)[i];
}
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)),
BeginPtr(vec), len);
CheckErr(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)),
BeginPtr(vec), len));
}
_WrapperEnd();
}
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
_WrapperBegin();
bst_ulong olen;
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)), &olen);
const float *res;
CheckErr(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)),
&olen,
&res));
_WrapperEnd();
SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) {
@ -183,23 +198,25 @@ extern "C" {
return ret;
}
SEXP XGDMatrixNumRow_R(SEXP handle) {
bst_ulong nrow = XGDMatrixNumRow(R_ExternalPtrAddr(handle));
bst_ulong nrow;
CheckErr(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));
return ScalarInteger(static_cast<int>(nrow));
}
// functions related to booster
void _BoosterFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return;
XGBoosterFree(R_ExternalPtrAddr(ext));
CheckErr(XGBoosterFree(R_ExternalPtrAddr(ext)));
R_ClearExternalPtr(ext);
}
SEXP XGBoosterCreate_R(SEXP dmats) {
_WrapperBegin();
int len = length(dmats);
std::vector<void*> dvec;
for (int i = 0; i < len; ++i){
for (int i = 0; i < len; ++i) {
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
}
void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
BoosterHandle handle;
CheckErr(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle));
_WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
@ -208,16 +225,16 @@ extern "C" {
}
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
_WrapperBegin();
XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)),
CHAR(asChar(val)));
CheckErr(XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)),
CHAR(asChar(val))));
_WrapperEnd();
}
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
_WrapperBegin();
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
R_ExternalPtrAddr(dtrain));
CheckErr(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
R_ExternalPtrAddr(dtrain)));
_WrapperEnd();
}
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
@ -230,9 +247,10 @@ extern "C" {
tgrad[j] = REAL(grad)[j];
thess[j] = REAL(hess)[j];
}
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dtrain),
BeginPtr(tgrad), BeginPtr(thess), len);
CheckErr(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dtrain),
BeginPtr(tgrad), BeginPtr(thess),
len));
_WrapperEnd();
}
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
@ -249,21 +267,24 @@ extern "C" {
for (int i = 0; i < len; ++i) {
vec_sptr.push_back(vec_names[i].c_str());
}
const char *ret =
XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len);
const char *ret;
CheckErr(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
BeginPtr(vec_dmats),
BeginPtr(vec_sptr),
len, &ret));
_WrapperEnd();
return mkString(ret);
}
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
_WrapperBegin();
bst_ulong olen;
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat),
asInteger(option_mask),
asInteger(ntree_limit),
&olen);
const float *res;
CheckErr(XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat),
asInteger(option_mask),
asInteger(ntree_limit),
&olen, &res));
_WrapperEnd();
SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) {
@ -274,12 +295,12 @@ extern "C" {
}
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
_WrapperBegin();
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
CheckErr(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
_WrapperEnd();
}
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
_WrapperBegin();
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
CheckErr(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
_WrapperEnd();
}
void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
@ -292,7 +313,8 @@ extern "C" {
SEXP XGBoosterModelToRaw_R(SEXP handle) {
bst_ulong olen;
_WrapperBegin();
const char *raw = XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen);
const char *raw;
CheckErr(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw));
_WrapperEnd();
SEXP ret = PROTECT(allocVector(RAWSXP, olen));
if (olen != 0) {
@ -304,16 +326,16 @@ extern "C" {
SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) {
_WrapperBegin();
bst_ulong olen;
const char **res =
XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)),
asInteger(with_stats),
&olen);
const char **res;
CheckErr(XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)),
asInteger(with_stats),
&olen, &res));
_WrapperEnd();
SEXP out = PROTECT(allocVector(STRSXP, olen));
for (size_t i = 0; i < olen; ++i) {
stringstream stream;
stream << "booster["<<i<<"]\n" << res[i];
stream << "booster[" << i <<"]\n" << res[i];
SET_STRING_ELT(out, i, mkChar(stream.str().c_str()));
}
UNPROTECT(1);

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_WRAPPER_R_H_
#define XGBOOST_WRAPPER_R_H_
/*!
* Copyright 2014 (c) by Contributors
* \file xgboost_wrapper_R.h
* \author Tianqi Chen
* \brief R wrapper of xgboost
*/
#ifndef XGBOOST_WRAPPER_R_H_ // NOLINT(*)
#define XGBOOST_WRAPPER_R_H_ // NOLINT(*)
extern "C" {
#include <Rinternals.h>
#include <R_ext/Random.h>
@ -153,4 +155,4 @@ extern "C" {
*/
SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats);
}
#endif // XGBOOST_WRAPPER_R_H_
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)

View File

@ -1,3 +1,4 @@
// Copyright (c) 2014 by Contributors
#include <stdio.h>
#include <stdarg.h>
#include <Rinternals.h>

View File

@ -337,6 +337,17 @@ err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
print(paste("test-error=", err))
```
View feature importance/influence from the learnt model
-------------------------------------------------------
Feature importance is similar to R gbm package's relative influence (rel.inf).
```
importance_matrix <- xgb.importance(model = bst)
print(importance_matrix)
xgb.plot.importance(importance_matrix)
```
View the trees from a model
---------------------------
@ -346,6 +357,12 @@ You can dump the tree you learned using `xgb.dump` into a text file.
xgb.dump(bst, with.stats = T)
```
You can plot the trees from your model using ```xgb.plot.tree``
```
xgb.plot.tree(model = bst)
```
> if you provide a path to `fname` parameter you can save the trees to your hard drive.
Save and load models

View File

@ -1,12 +1,14 @@
XGBoost: eXtreme Gradient Boosting
DMLC/XGBoost
==================================
[![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost)
An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version.
It implements machine learning algorithm under gradient boosting framework, including generalized linear model and gradient boosted regression tree (GBDT). XGBoost can also be distributed and scale to Terascale data
It implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework, including [Generalized Linear Model](https://en.wikipedia.org/wiki/Generalized_linear_model) (GLM) and [Gradient Boosted Decision Trees](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) (GBDT). XGBoost can also be [distributed](#features) and scale to Terascale data
Contributors: https://github.com/dmlc/xgboost/graphs/contributors
Documentations: [Documentation of xgboost](doc/README.md)
Documentations: [Documentation of dmlc/xgboost](doc/README.md)
Issues Tracker: [https://github.com/dmlc/xgboost/issues](https://github.com/dmlc/xgboost/issues?q=is%3Aissue+label%3Aquestion)
@ -24,11 +26,19 @@ XGBoost is part of [Distributed Machine Learning Common](http://dmlc.github.io/)
What's New
==========
* XGBoost helps Chenglong Chen to win [Kaggle CrowdFlower Competition](https://www.kaggle.com/c/crowdflower-search-relevance)
- Checkout the winning solution at [Highlight links](doc/README.md#highlight-links)
* XGBoost-0.4 release, see [CHANGES.md](CHANGES.md#xgboost-04)
* XGBoost wins [WWW2015 Microsoft Malware Classification Challenge (BIG 2015)](http://www.kaggle.com/c/malware-classification/forums/t/13490/say-no-to-overfitting-approaches-sharing)
* XGBoost helps three champion teams to win [WWW2015 Microsoft Malware Classification Challenge (BIG 2015)](http://www.kaggle.com/c/malware-classification/forums/t/13490/say-no-to-overfitting-approaches-sharing)
- Checkout the winning solution at [Highlight links](doc/README.md#highlight-links)
* [External Memory Version](doc/external_memory.md)
Contributing to XGBoost
=========
XGBoost has been developed and used by a group of active community. Everyone is more than welcomed to is a great way to make the project better and more accessible to more users.
* Checkout [Feature Wish List](https://github.com/dmlc/xgboost/labels/Wish-List) to see what can be improved, or open an issue if you want something.
* Contribute to the [documents and examples](https://github.com/dmlc/xgboost/blob/master/doc/) to share your experience with other users.
Features
========
* Easily accessible in python, R, Julia, CLI

View File

@ -147,7 +147,7 @@ Run the command again, we can find the log file becomes
```
The rule is eval[name-printed-in-log] = filename, then the file will be added to monitoring process, and evaluated each round.
xgboost also support monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes
xgboost also supports monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes
```
[0] test-error:0.016139 test-negllik:0.029795 trainname-error:0.014433 trainname-negllik:0.027023
[1] test-error:0.000000 test-negllik:0.000000 trainname-error:0.001228 trainname-negllik:0.002457
@ -162,11 +162,15 @@ If you want to continue boosting from existing model, say 0002.model, use
```
xgboost will load from 0002.model continue boosting for 2 rounds, and save output to continue.model. However, beware that the training and evaluation data specified in mushroom.conf should not change when you use this function.
#### Use Multi-Threading
When you are working with a large dataset, you may want to take advantage of parallelism. If your compiler supports OpenMP, xgboost is naturally multi-threaded, to set number of parallel running threads to 10, add ```nthread=10``` to your configuration.
When you are working with a large dataset, you may want to take advantage of parallelism. If your compiler supports OpenMP, xgboost is naturally multi-threaded, to set number of parallel running add ```nthread``` parameter to you configuration.
Eg. ```nthread=10```
Set nthread to be the number of your real cpu (On Unix, this can be found using ```lscpu```)
Some systems will have ```Thread(s) per core = 2```, for example, a 4 core cpu with 8 threads, in such case set ```nthread=4``` and not 8.
#### Additional Notes
* What are ```agaricus.txt.test.buffer``` and ```agaricus.txt.train.buffer``` generated during runexp.sh?
- By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. When next time you run xgboost, it detects i
Demonstrating how to use XGBoost accomplish binary classification tasks on UCI mushroom dataset http://archive.ics.uci.edu/ml/datasets/Mushroom
- By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. Next time when you run xgboost, it will detects these binary files.

View File

@ -45,7 +45,7 @@ dim(train)
train[1:6,1:5, with =F]
# Test dataset dimensions
dim(train)
dim(test)
# Test content
test[1:6,1:5, with =F]

View File

@ -20,7 +20,8 @@ How to get started
Highlight Links
====
This section is about blogposts, presentation and videos discussing how to use xgboost to solve your interesting problem. If you think something belongs to here, send a pull request.
* [Kaggle Malware Prediction winning solution](https://github.com/xiaozhouwang/kaggle_Microsoft_Malware)
* [Kaggle CrowdFlower winner's solution by Chenglong Chen](https://github.com/ChenglongChen/Kaggle_CrowdFlower)
* [Kaggle Malware Prediction winner's solution](https://github.com/xiaozhouwang/kaggle_Microsoft_Malware)
* [Kaggle Tradeshift winning solution by daxiongshu](https://github.com/daxiongshu/kaggle-tradeshift-winning-solution)
* [Feature Importance Analysis with XGBoost in Tax audit](http://fr.slideshare.net/MichaelBENESTY/feature-importance-analysis-with-xgboost-in-tax-audit)
* Video tutorial: [Better Optimization with Repeated Cross Validation and the XGBoost model](https://www.youtube.com/watch?v=Og7CGAfSr_Y)
@ -29,3 +30,7 @@ This section is about blogposts, presentation and videos discussing how to use x
Contribution
====
Contribution of documents and use-cases are welcomed!
* This package use Google C++ style
* Check tool of codestyle
- clone https://github.com/dmlc/dmlc-core into root directory
- type ```make lint``` and fix possible errors.

View File

@ -17,13 +17,15 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost
1. Obtain gcc with openmp support by `brew install gcc --without-multilib` **or** clang with openmp by `brew install clang-omp`. The clang one is recommended because the first method requires us compiling gcc inside the machine (more than an hour in mine)! (BTW, `brew` is the de facto standard of `apt-get` on OS X. So installing [HPC](http://hpc.sourceforge.net/) separately is not recommended, but it should work.)
2. **if plaing to use clang-omp** in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to
2. **if you are planing to use clang-omp** - in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to
```C++
#include <libiomp/omp.h> /* instead of #include <omp.h> */`
```
to make it work, otherwise the following steps would show `src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...`
to make it work, otherwise you might get this error
`src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...`
@ -41,13 +43,13 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost
export CXX = clang-omp++
```
Remember to change `header` if using clang-omp.
Remember to change `header` (mentioned in step 2) if using clang-omp.
Then `cd xgboost` then `bash build.sh` to compile XGBoost. And go to `wrapper` sub-folder to install python version.
4. Set the `Makevars` file in highest piority for R.
The point is, there are three `Makevars` inside the machine: `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by runing `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!).
The point is, there are three `Makevars` : `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by running `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!).
So, **add** or **change** `~/.R/Makevars` to the following lines:

28
java/README.md Normal file
View File

@ -0,0 +1,28 @@
# xgboost4j
this is a java wrapper for xgboost
the structure of this wrapper is almost the same as the official python wrapper.
core of this wrapper is two classes:
* DMatrix: for handling data
* Booster: for train and predict
## usage:
please refer to [xgboost4j.md](doc/xgboost4j.md) for more information.
besides, simple examples could be found in [xgboost4j-demo](xgboost4j-demo/README.md)
## build native library
for windows: open the xgboost.sln in "../windows" folder, you will found the xgboostjavawrapper project, you should do the following steps to build wrapper library:
* Select x64/win32 and Release in build
* (if you have setted `JAVA_HOME` properly in windows environment variables, escape this step) right click on xgboostjavawrapper project -> choose "Properties" -> click on "C/C++" in the window -> change the "Additional Include Directories" to fit your jdk install path.
* rebuild all
* double click "create_wrap.bat" to set library to proper place
for linux:
* make sure you have installed jdk and `JAVA_HOME` has been setted properly
* run "create_wrap.sh"

20
java/create_wrap.bat Normal file
View File

@ -0,0 +1,20 @@
echo "move native library"
set libsource=..\windows\x64\Release\xgboostjavawrapper.dll
if not exist %libsource% (
goto end
)
set libfolder=xgboost4j\src\main\resources\lib
set libpath=%libfolder%\xgboostjavawrapper.dll
if not exist %libfolder% (mkdir %libfolder%)
if exist %libpath% (del %libpath%)
move %libsource% %libfolder%
echo complete
pause
exit
:end
echo "source library not found, please build it first from ..\windows\xgboost.sln"
pause
exit

15
java/create_wrap.sh Executable file
View File

@ -0,0 +1,15 @@
echo "build java wrapper"
cd ..
make java
cd java
echo "move native lib"
libPath="xgboost4j/src/main/resources/lib"
if [ ! -d "$libPath" ]; then
mkdir -p "$libPath"
fi
rm -f xgboost4j/src/main/resources/lib/libxgboostjavawrapper.so
mv libxgboostjavawrapper.so xgboost4j/src/main/resources/lib/
echo "complete"

156
java/doc/xgboost4j.md Normal file
View File

@ -0,0 +1,156 @@
xgboost4j : java wrapper for xgboost
====
This page will introduce xgboost4j, the java wrapper for xgboost, including:
* [Building](#build-xgboost4j)
* [Data Interface](#data-interface)
* [Setting Parameters](#setting-parameters)
* [Train Model](#training-model)
* [Prediction](#prediction)
=
#### Build xgboost4j
* Build native library
first make sure you have installed jdk and `JAVA_HOME` has been setted properly, then simply run `./create_wrap.sh`.
* Package xgboost4j
to package xgboost4j, you can run `mvn package` in xgboost4j folder or just use IDE(eclipse/netbeans) to open this maven project and build.
=
#### Data Interface
Like the xgboost python module, xgboost4j use ```DMatrix``` to handle data, libsvm txt format file, sparse matrix in CSR/CSC format, and dense matrix is supported.
* To import ```DMatrix``` :
```java
import org.dmlc.xgboost4j.DMatrix;
```
* To load libsvm text format file, the usage is like :
```java
DMatrix dmat = new DMatrix("train.svm.txt");
```
* To load sparse matrix in CSR/CSC format is a little complicated, the usage is like :
suppose a sparse matrix :
1 0 2 0
4 0 0 3
3 1 2 0
for CSR format
```java
long[] rowHeaders = new long[] {0,2,4,7};
float[] data = new float[] {1f,2f,4f,3f,3f,1f,2f};
int[] colIndex = new int[] {0,2,0,3,0,1,2};
DMatrix dmat = new DMatrix(rowHeaders, colIndex, data, DMatrix.SparseType.CSR);
```
for CSC format
```java
long[] colHeaders = new long[] {0,3,4,6,7};
float[] data = new float[] {1f,4f,3f,1f,2f,2f,3f};
int[] rowIndex = new int[] {0,1,2,2,0,2,1};
DMatrix dmat = new DMatrix(colHeaders, rowIndex, data, DMatrix.SparseType.CSC);
```
* To load 3*2 dense matrix, the usage is like :
suppose a matrix :
1 2
3 4
5 6
```java
float[] data = new float[] {1f,2f,3f,4f,5f,6f};
int nrow = 3;
int ncol = 2;
float missing = 0.0f;
DMatrix dmat = new Matrix(data, nrow, ncol, missing);
```
* To set weight :
```java
float[] weights = new float[] {1f,2f,1f};
dmat.setWeight(weights);
```
#### Setting Parameters
* in xgboost4j any ```Iterable<Entry<String, Object>>``` object could be used as parameters.
* to set parameters, for non-multiple value params, you can simply use entrySet of an Map:
```java
Map<String, Object> paramMap = new HashMap<>() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
put("eval_metric", "logloss");
}
};
Iterable<Entry<String, Object>> params = paramMap.entrySet();
```
* for the situation that multiple values with same param key, List<Entry<String, Object>> would be a good choice, e.g. :
```java
List<Entry<String, Object>> params = new ArrayList<Entry<String, Object>>() {
{
add(new SimpleEntry<String, Object>("eta", 1.0));
add(new SimpleEntry<String, Object>("max_depth", 2.0));
add(new SimpleEntry<String, Object>("silent", 1));
add(new SimpleEntry<String, Object>("objective", "binary:logistic"));
}
};
```
#### Training Model
With parameters and data, you are able to train a booster model.
* Import ```Trainer``` and ```Booster``` :
```java
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.util.Trainer;
```
* Training
```java
DMatrix trainMat = new DMatrix("train.svm.txt");
DMatrix validMat = new DMatrix("valid.svm.txt");
//specifiy a watchList to see the performance
//any Iterable<Entry<String, DMatrix>> object could be used as watchList
List<Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new SimpleEntry<>("train", trainMat));
watchs.add(new SimpleEntry<>("test", testMat));
int round = 2;
Booster booster = Trainer.train(params, trainMat, round, watchs, null, null);
```
* Saving model
After training, you can save model and dump it out.
```java
booster.saveModel("model.bin");
```
* Dump Model and Feature Map
```java
booster.dumpModel("modelInfo.txt", false)
//dump with featureMap
booster.dumpModel("modelInfo.txt", "featureMap.txt", false)
```
* Load a model
```java
Params param = new Params() {
{
put("silent", 1);
put("nthread", 6);
}
};
Booster booster = new Booster(param, "model.bin");
```
####Prediction
after training and loading a model, you use it to predict other data, the predict results will be a two-dimension float array (nsample, nclass) ,for predict leaf, it would be (nsample, nclass*ntrees)
```java
DMatrix dtest = new DMatrix("test.svm.txt");
//predict
float[][] predicts = booster.predict(dtest);
//predict leaf
float[][] leafPredicts = booster.predict(dtest, 0, true);
```

View File

@ -0,0 +1,15 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

View File

@ -0,0 +1,10 @@
xgboost4j examples
====
* [Basic walkthrough of wrappers](src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java)
* [Cutomize loss function, and evaluation metric](src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java)
* [Boosting from existing prediction](src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java)
* [Predicting using first n trees](src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java)
* [Generalized Linear Model](src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java)
* [Cross validation](src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java)
* [Predicting leaf indices](src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java)
* [External Memory](src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java)

View File

@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.dmlc</groupId>
<artifactId>xgboost4j-demo</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.dmlc</groupId>
<artifactId>xgboost4j</artifactId>
<version>1.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,164 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.AbstractMap;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.demo.util.DataLoader;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* a simple example of java wrapper for xgboost
* @author hzx
*/
public class BasicWalkThrough {
public static boolean checkPredicts(float[][] fPredicts, float[][] sPredicts) {
if(fPredicts.length != sPredicts.length) {
return false;
}
for(int i=0; i<fPredicts.length; i++) {
if(!Arrays.equals(fPredicts[i], sPredicts[i])) {
return false;
}
}
return true;
}
public static void main(String[] args) throws UnsupportedEncodingException, IOException, XGBoostError {
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
//note: any Iterable<Entry<String, Object>> object would be used as paramters
//e.g.
// Map<String, Object> paramMap = new HashMap<String, Object>() {
// {
// put("eta", 1.0);
// put("max_depth", 2);
// put("silent", 1);
// put("objective", "binary:logistic");
// }
// };
// Iterable<Entry<String, Object>> param = paramMap.entrySet();
//or
// List<Entry<String, Object>> param = new ArrayList<Entry<String, Object>>() {
// {
// add(new SimpleEntry<String, Object>("eta", 1.0));
// add(new SimpleEntry<String, Object>("max_depth", 2.0));
// add(new SimpleEntry<String, Object>("silent", 1));
// add(new SimpleEntry<String, Object>("objective", "binary:logistic"));
// }
// };
//we use a util class Params to handle parameters as example
Iterable<Entry<String, Object>> param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//specify watchList to set evaluation dmats
//note: any Iterable<Entry<String, DMatrix>> object would be used as watchList
//e.g.
//an entrySet of Map is good
// Map<String, DMatrix> watchMap = new HashMap<>();
// watchMap.put("train", trainMat);
// watchMap.put("test", testMat);
// Iterable<Entry<String, DMatrix>> watchs = watchMap.entrySet();
//we use a List of Entry<String, DMatrix> WatchList as example
List<Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new SimpleEntry<>("train", trainMat));
watchs.add(new SimpleEntry<>("test", testMat));
//set round
int round = 2;
//train a boost model
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
//predict
float[][] predicts = booster.predict(testMat);
//save model to modelPath
File file = new File("./model");
if(!file.exists()) {
file.mkdirs();
}
String modelPath = "./model/xgb.model";
booster.saveModel(modelPath);
//dump model
booster.dumpModel("./model/dump.raw.txt", false);
//dump model with feature map
booster.dumpModel("./model/dump.nice.txt", "../../demo/data/featmap.txt", false);
//save dmatrix into binary buffer
testMat.saveBinary("./model/dtest.buffer");
//reload model and data
Booster booster2 = new Booster(param, "./model/xgb.model");
DMatrix testMat2 = new DMatrix("./model/dtest.buffer");
float[][] predicts2 = booster2.predict(testMat2);
//check the two predicts
System.out.println(checkPredicts(predicts, predicts2));
System.out.println("start build dmatrix from csr sparse data ...");
//build dmatrix from CSR Sparse Matrix
DataLoader.CSRSparseData spData = DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train");
DMatrix trainMat2 = new DMatrix(spData.rowHeaders, spData.colIndex, spData.data, DMatrix.SparseType.CSR);
trainMat2.setLabel(spData.labels);
//specify watchList
List<Entry<String, DMatrix>> watchs2 = new ArrayList<>();
watchs2.add(new SimpleEntry<>("train", trainMat2));
watchs2.add(new SimpleEntry<>("test", testMat2));
Booster booster3 = Trainer.train(param, trainMat2, round, watchs2, null, null);
float[][] predicts3 = booster3.predict(testMat2);
//check predicts
System.out.println(checkPredicts(predicts, predicts3));
}
}

View File

@ -0,0 +1,67 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* example for start from a initial base prediction
* @author hzx
*/
public class BoostFromPrediction {
public static void main(String[] args) throws XGBoostError {
System.out.println("start running example to start from a initial prediction");
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//train xgboost for 1 round
Booster booster = Trainer.train(param, trainMat, 1, watchs, null, null);
float[][] trainPred = booster.predict(trainMat, true);
float[][] testPred = booster.predict(testMat, true);
trainMat.setBaseMargin(trainPred);
testMat.setBaseMargin(testPred);
System.out.println("result of running from initial prediction");
Booster booster2 = Trainer.train(param, trainMat, 1, watchs, null, null);
}
}

View File

@ -0,0 +1,54 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.io.IOException;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* an example of cross validation
* @author hzx
*/
public class CrossValidation {
public static void main(String[] args) throws IOException, XGBoostError {
//load train mat
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
//set params
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 3);
put("silent", 1);
put("nthread", 6);
put("objective", "binary:logistic");
put("gamma", 1.0);
put("eval_metric", "error");
}
};
//do 5-fold cross validation
int round = 2;
int nfold = 5;
//set additional eval_metrics
String[] metrics = null;
String[] evalHist = Trainer.crossValiation(param, trainMat, round, nfold, metrics, null, null);
}
}

View File

@ -0,0 +1,175 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.IEvaluation;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.IObjective;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* an example user define objective and eval
* NOTE: when you do customized loss function, the default prediction value is margin
* this may make buildin evalution metric not function properly
* for example, we are doing logistic loss, the prediction is score before logistic transformation
* he buildin evaluation error assumes input is after logistic transformation
* Take this in mind when you use the customization, and maybe you need write customized evaluation function
* @author hzx
*/
public class CustomObjective {
/**
* loglikelihoode loss obj function
*/
public static class LogRegObj implements IObjective {
private static final Log logger = LogFactory.getLog(LogRegObj.class);
/**
* simple sigmoid func
* @param input
* @return
* Note: this func is not concern about numerical stability, only used as example
*/
public float sigmoid(float input) {
float val = (float) (1/(1+Math.exp(-input)));
return val;
}
public float[][] transform(float[][] predicts) {
int nrow = predicts.length;
float[][] transPredicts = new float[nrow][1];
for(int i=0; i<nrow; i++) {
transPredicts[i][0] = sigmoid(predicts[i][0]);
}
return transPredicts;
}
@Override
public List<float[]> getGradient(float[][] predicts, DMatrix dtrain) {
int nrow = predicts.length;
List<float[]> gradients = new ArrayList<>();
float[] labels;
try {
labels = dtrain.getLabel();
} catch (XGBoostError ex) {
logger.error(ex);
return null;
}
float[] grad = new float[nrow];
float[] hess = new float[nrow];
float[][] transPredicts = transform(predicts);
for(int i=0; i<nrow; i++) {
float predict = transPredicts[i][0];
grad[i] = predict - labels[i];
hess[i] = predict * (1 - predict);
}
gradients.add(grad);
gradients.add(hess);
return gradients;
}
}
/**
* user defined eval function.
* NOTE: when you do customized loss function, the default prediction value is margin
* this may make buildin evalution metric not function properly
* for example, we are doing logistic loss, the prediction is score before logistic transformation
* the buildin evaluation error assumes input is after logistic transformation
* Take this in mind when you use the customization, and maybe you need write customized evaluation function
*/
public static class EvalError implements IEvaluation {
private static final Log logger = LogFactory.getLog(EvalError.class);
String evalMetric = "custom_error";
public EvalError() {
}
@Override
public String getMetric() {
return evalMetric;
}
@Override
public float eval(float[][] predicts, DMatrix dmat) {
float error = 0f;
float[] labels;
try {
labels = dmat.getLabel();
} catch (XGBoostError ex) {
logger.error(ex);
return -1f;
}
int nrow = predicts.length;
for(int i=0; i<nrow; i++) {
if(labels[i]==0f && predicts[i][0]>0) {
error++;
}
else if(labels[i]==1f && predicts[i][0]<=0) {
error++;
}
}
return error/labels.length;
}
}
public static void main(String[] args) throws XGBoostError {
//load train mat (svmlight format)
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
//load valid mat (svmlight format)
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//set params
//set params
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
}
};
//set round
int round = 2;
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//user define obj and eval
IObjective obj = new LogRegObj();
IEvaluation eval = new EvalError();
//train a booster
System.out.println("begin to train the booster model");
Booster booster = Trainer.train(param, trainMat, round, watchs, obj, eval);
}
}

View File

@ -0,0 +1,65 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* simple example for using external memory version
* @author hzx
*/
public class ExternalMemory {
public static void main(String[] args) throws XGBoostError {
//this is the only difference, add a # followed by a cache prefix name
//several cache file with the prefix will be generated
//currently only support convert from libsvm file
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train#dtrain.cache");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test#dtest.cache");
//specify parameters
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//performance notice: set nthread to be the number of your real cpu
//some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4
//param.put("nthread", num_real_cpu);
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//set round
int round = 2;
//train a boost model
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
}
}

View File

@ -0,0 +1,74 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.demo.util.CustomEval;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* this is an example of fit generalized linear model in xgboost
* basically, we are using linear model, instead of tree for our boosters
* @author hzx
*/
public class GeneralizedLinearModel {
public static void main(String[] args) throws XGBoostError {
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
//change booster to gblinear, so that we are fitting a linear model
// alpha is the L1 regularizer
//lambda is the L2 regularizer
//you can also set lambda_bias which is L2 regularizer on the bias term
Params param = new Params() {
{
put("alpha", 0.0001);
put("silent", 1);
put("objective", "binary:logistic");
put("booster", "gblinear");
}
};
//normally, you do not need to set eta (step_size)
//XGBoost uses a parallel coordinate descent algorithm (shotgun),
//there could be affection on convergence with parallelization on certain cases
//setting eta to be smaller value, e.g 0.5 can make the optimization more stable
//param.put("eta", "0.5");
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//train a booster
int round = 4;
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
float[][] predicts = booster.predict(testMat);
CustomEval eval = new CustomEval();
System.out.println("error=" + eval.eval(predicts, testMat));
}
}

View File

@ -0,0 +1,69 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.demo.util.CustomEval;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* predict first ntree
* @author hzx
*/
public class PredictFirstNtree {
public static void main(String[] args) throws XGBoostError {
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//train a booster
int round = 3;
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
//predict use 1 tree
float[][] predicts1 = booster.predict(testMat, false, 1);
//by default all trees are used to do predict
float[][] predicts2 = booster.predict(testMat);
//use a simple evaluation class to check error result
CustomEval eval = new CustomEval();
System.out.println("error of predicts1: " + eval.eval(predicts1, testMat));
System.out.println("error of predicts2: " + eval.eval(predicts2, testMat));
}
}

View File

@ -0,0 +1,70 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.demo.util.Params;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* predict leaf indices
* @author hzx
*/
public class PredictLeafIndices {
public static void main(String[] args) throws XGBoostError {
// load file from text file, also binary buffer generated by xgboost4j
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//specify parameters
Params param = new Params() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//specify watchList
List<Map.Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//train a booster
int round = 3;
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
//predict using first 2 tree
float[][] leafindex = booster.predict(testMat, 2, true);
for(float[] leafs : leafindex) {
System.out.println(Arrays.toString(leafs));
}
//predict all trees
leafindex = booster.predict(testMat, 0, true);
for(float[] leafs : leafindex) {
System.out.println(Arrays.toString(leafs));
}
}
}

View File

@ -0,0 +1,60 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo.util;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.IEvaluation;
import org.dmlc.xgboost4j.util.XGBoostError;
/**
* a util evaluation class for examples
* @author hzx
*/
public class CustomEval implements IEvaluation {
private static final Log logger = LogFactory.getLog(CustomEval.class);
String evalMetric = "custom_error";
@Override
public String getMetric() {
return evalMetric;
}
@Override
public float eval(float[][] predicts, DMatrix dmat) {
float error = 0f;
float[] labels;
try {
labels = dmat.getLabel();
} catch (XGBoostError ex) {
logger.error(ex);
return -1f;
}
int nrow = predicts.length;
for(int i=0; i<nrow; i++) {
if(labels[i]==0f && predicts[i][0]>0.5) {
error++;
}
else if(labels[i]==1f && predicts[i][0]<=0.5) {
error++;
}
}
return error/labels.length;
}
}

View File

@ -0,0 +1,127 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.ArrayUtils;
/**
* util class for loading data
* @author hzx
*/
public class DataLoader {
public static class DenseData {
public float[] labels;
public float[] data;
public int nrow;
public int ncol;
}
public static class CSRSparseData {
public float[] labels;
public float[] data;
public long[] rowHeaders;
public int[] colIndex;
}
public static DenseData loadCSVFile(String filePath) throws FileNotFoundException, UnsupportedEncodingException, IOException {
DenseData denseData = new DenseData();
File f = new File(filePath);
FileInputStream in = new FileInputStream(f);
BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
denseData.nrow = 0;
denseData.ncol = -1;
String line;
List<Float> tlabels = new ArrayList<>();
List<Float> tdata = new ArrayList<>();
while((line=reader.readLine()) != null) {
String[] items = line.trim().split(",");
if(items.length==0) {
continue;
}
denseData.nrow++;
if(denseData.ncol == -1) {
denseData.ncol = items.length - 1;
}
tlabels.add(Float.valueOf(items[items.length-1]));
for(int i=0; i<items.length-1; i++) {
tdata.add(Float.valueOf(items[i]));
}
}
reader.close();
in.close();
denseData.labels = ArrayUtils.toPrimitive(tlabels.toArray(new Float[tlabels.size()]));
denseData.data = ArrayUtils.toPrimitive(tdata.toArray(new Float[tdata.size()]));
return denseData;
}
public static CSRSparseData loadSVMFile(String filePath) throws FileNotFoundException, UnsupportedEncodingException, IOException {
CSRSparseData spData = new CSRSparseData();
List<Float> tlabels = new ArrayList<>();
List<Float> tdata = new ArrayList<>();
List<Long> theaders = new ArrayList<>();
List<Integer> tindex = new ArrayList<>();
File f = new File(filePath);
FileInputStream in = new FileInputStream(f);
BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
String line;
long rowheader = 0;
theaders.add(rowheader);
while((line=reader.readLine()) != null) {
String[] items = line.trim().split(" ");
if(items.length==0) {
continue;
}
rowheader += items.length - 1;
theaders.add(rowheader);
tlabels.add(Float.valueOf(items[0]));
for(int i=1; i<items.length; i++) {
String[] tup = items[i].split(":");
assert tup.length == 2;
tdata.add(Float.valueOf(tup[1]));
tindex.add(Integer.valueOf(tup[0]));
}
}
spData.labels = ArrayUtils.toPrimitive(tlabels.toArray(new Float[tlabels.size()]));
spData.data = ArrayUtils.toPrimitive(tdata.toArray(new Float[tdata.size()]));
spData.colIndex = ArrayUtils.toPrimitive(tindex.toArray(new Integer[tindex.size()]));
spData.rowHeaders = ArrayUtils.toPrimitive(theaders.toArray(new Long[theaders.size()]));
return spData;
}
}

View File

@ -0,0 +1,54 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.demo.util;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.AbstractMap;
/**
* a util class for handle params
* @author hzx
*/
public class Params implements Iterable<Entry<String, Object>>{
List<Entry<String, Object>> params = new ArrayList<>();
/**
* put param key-value pair
* @param key
* @param value
*/
public void put(String key, Object value) {
params.add(new AbstractMap.SimpleEntry<>(key, value));
}
@Override
public String toString(){
String paramsInfo = "";
for(Entry<String, Object> param : params) {
paramsInfo += param.getKey() + ":" + param.getValue() + "\n";
}
return paramsInfo;
}
@Override
public Iterator<Entry<String, Object>> iterator() {
return params.iterator();
}
}

15
java/xgboost4j/LICENSE Normal file
View File

@ -0,0 +1,15 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

23
java/xgboost4j/README.md Normal file
View File

@ -0,0 +1,23 @@
# xgboost4j
this is a java wrapper for xgboost (https://github.com/dmlc/xgboost)
the structure of this wrapper is almost the same as the official python wrapper.
core of this wrapper is two classes:
* DMatrix for handling data
* Booster: for train and predict
## usage:
simple examples could be found in test package:
* Simple Train Example: org.dmlc.xgboost4j.TrainExample.java
* Simple Predict Example: org.dmlc.xgboost4j.PredictExample.java
* Cross Validation Example: org.dmlc.xgboost4j.example.CVExample.java
## native library:
only 64-bit linux/windows is supported now, if you want to build native wrapper library yourself, please refer to
https://github.com/yanqingmen/xgboost-java, and put your native library to the "./src/main/resources/lib" folder and replace the originals. (either "libxgboostjavawrapper.so" for linux or "xgboostjavawrapper.dll" for windows)

35
java/xgboost4j/pom.xml Normal file
View File

@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.dmlc</groupId>
<artifactId>xgboost4j</artifactId>
<version>1.1</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<reporting>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.3</version>
</plugin>
</plugins>
</reporting>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,484 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.util.Initializer;
import org.dmlc.xgboost4j.util.ErrorHandle;
import org.dmlc.xgboost4j.util.XGBoostError;
import org.dmlc.xgboost4j.wrapper.XgboostJNI;
/**
* Booster for xgboost, similar to the python wrapper xgboost.py
* but custom obj function and eval function not supported at present.
* @author hzx
*/
public final class Booster {
private static final Log logger = LogFactory.getLog(Booster.class);
long handle = 0;
//load native library
static {
try {
Initializer.InitXgboost();
} catch (IOException ex) {
logger.error("load native library failed.");
logger.error(ex);
}
}
/**
* init Booster from dMatrixs
* @param params parameters
* @param dMatrixs DMatrix array
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public Booster(Iterable<Entry<String, Object>> params, DMatrix[] dMatrixs) throws XGBoostError {
init(dMatrixs);
setParam("seed","0");
setParams(params);
}
/**
* load model from modelPath
* @param params parameters
* @param modelPath booster modelPath (model generated by booster.saveModel)
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public Booster(Iterable<Entry<String, Object>> params, String modelPath) throws XGBoostError {
init(null);
if(modelPath == null) {
throw new NullPointerException("modelPath : null");
}
loadModel(modelPath);
setParam("seed","0");
setParams(params);
}
private void init(DMatrix[] dMatrixs) throws XGBoostError {
long[] handles = null;
if(dMatrixs != null) {
handles = dMatrixs2handles(dMatrixs);
}
long[] out = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGBoosterCreate(handles, out));
handle = out[0];
}
/**
* set parameter
* @param key param name
* @param value param value
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public final void setParam(String key, String value) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGBoosterSetParam(handle, key, value));
}
/**
* set parameters
* @param params parameters key-value map
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setParams(Iterable<Entry<String, Object>> params) throws XGBoostError {
if(params!=null) {
for(Map.Entry<String, Object> entry : params) {
setParam(entry.getKey(), entry.getValue().toString());
}
}
}
/**
* Update (one iteration)
* @param dtrain training data
* @param iter current iteration number
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void update(DMatrix dtrain, int iter) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGBoosterUpdateOneIter(handle, iter, dtrain.getHandle()));
}
/**
* update with customize obj func
* @param dtrain training data
* @param iter current iteration number
* @param obj customized objective class
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void update(DMatrix dtrain, int iter, IObjective obj) throws XGBoostError {
float[][] predicts = predict(dtrain, true);
List<float[]> gradients = obj.getGradient(predicts, dtrain);
boost(dtrain, gradients.get(0), gradients.get(1));
}
/**
* update with give grad and hess
* @param dtrain training data
* @param grad first order of gradient
* @param hess seconde order of gradient
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void boost(DMatrix dtrain, float[] grad, float[] hess) throws XGBoostError {
if(grad.length != hess.length) {
throw new AssertionError(String.format("grad/hess length mismatch %s / %s", grad.length, hess.length));
}
ErrorHandle.checkCall(XgboostJNI.XGBoosterBoostOneIter(handle, dtrain.getHandle(), grad, hess));
}
/**
* evaluate with given dmatrixs.
* @param evalMatrixs dmatrixs for evaluation
* @param evalNames name for eval dmatrixs, used for check results
* @param iter current eval iteration
* @return eval information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throws XGBoostError {
long[] handles = dMatrixs2handles(evalMatrixs);
String[] evalInfo = new String[1];
ErrorHandle.checkCall(XgboostJNI.XGBoosterEvalOneIter(handle, iter, handles, evalNames, evalInfo));
return evalInfo[0];
}
/**
* evaluate with given customized Evaluation class
* @param evalMatrixs
* @param evalNames
* @param iter
* @param eval
* @return eval information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, IEvaluation eval) throws XGBoostError {
String evalInfo = "";
for(int i=0; i<evalNames.length; i++) {
String evalName = evalNames[i];
DMatrix evalMat = evalMatrixs[i];
float evalResult = eval.eval(predict(evalMat), evalMat);
String evalMetric = eval.getMetric();
evalInfo += String.format("\t%s-%s:%f", evalName,evalMetric, evalResult);
}
return evalInfo;
}
/**
* evaluate with given dmatrix handles;
* @param dHandles evaluation data handles
* @param evalNames name for eval dmatrixs, used for check results
* @param iter current eval iteration
* @return eval information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String evalSet(long[] dHandles, String[] evalNames, int iter) throws XGBoostError {
String[] evalInfo = new String[1];
ErrorHandle.checkCall(XgboostJNI.XGBoosterEvalOneIter(handle, iter, dHandles, evalNames, evalInfo));
return evalInfo[0];
}
/**
* evaluate with given dmatrix, similar to evalSet
* @param evalMat
* @param evalName
* @param iter
* @return eval information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String eval(DMatrix evalMat, String evalName, int iter) throws XGBoostError {
DMatrix[] evalMats = new DMatrix[] {evalMat};
String[] evalNames = new String[] {evalName};
return evalSet(evalMats, evalNames, iter);
}
/**
* base function for Predict
* @param data
* @param outPutMargin
* @param treeLimit
* @param predLeaf
* @return predict results
*/
private synchronized float[][] pred(DMatrix data, boolean outPutMargin, long treeLimit, boolean predLeaf) throws XGBoostError {
int optionMask = 0;
if(outPutMargin) {
optionMask = 1;
}
if(predLeaf) {
optionMask = 2;
}
float[][] rawPredicts = new float[1][];
ErrorHandle.checkCall(XgboostJNI.XGBoosterPredict(handle, data.getHandle(), optionMask, treeLimit, rawPredicts));
int row = (int) data.rowNum();
int col = (int) rawPredicts[0].length/row;
float[][] predicts = new float[row][col];
int r,c;
for(int i=0; i< rawPredicts[0].length; i++) {
r = i/col;
c = i%col;
predicts[r][c] = rawPredicts[0][i];
}
return predicts;
}
/**
* Predict with data
* @param data dmatrix storing the input
* @return predict result
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[][] predict(DMatrix data) throws XGBoostError {
return pred(data, false, 0, false);
}
/**
* Predict with data
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @return predict result
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[][] predict(DMatrix data, boolean outPutMargin) throws XGBoostError {
return pred(data, outPutMargin, 0, false);
}
/**
* Predict with data
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @return predict result
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[][] predict(DMatrix data, boolean outPutMargin, long treeLimit) throws XGBoostError {
return pred(data, outPutMargin, treeLimit, false);
}
/**
* Predict with data
* @param data dmatrix storing the input
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @param predLeaf When this option is on, the output will be a matrix of (nsample, ntrees), nsample = data.numRow
with each record indicating the predicted leaf index of each sample in each tree.
Note that the leaf index of a tree is unique per tree, so you may find leaf 1
in both tree 1 and tree 0.
* @return predict result
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[][] predict(DMatrix data , long treeLimit, boolean predLeaf) throws XGBoostError {
return pred(data, false, treeLimit, predLeaf);
}
/**
* save model to modelPath
* @param modelPath
*/
public void saveModel(String modelPath) {
XgboostJNI.XGBoosterSaveModel(handle, modelPath);
}
private void loadModel(String modelPath) {
XgboostJNI.XGBoosterLoadModel(handle, modelPath);
}
/**
* get the dump of the model as a string array
* @param withStats Controls whether the split statistics are output.
* @return dumped model information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String[] getDumpInfo(boolean withStats) throws XGBoostError {
int statsFlag = 0;
if(withStats) {
statsFlag = 1;
}
String[][] modelInfos = new String[1][];
ErrorHandle.checkCall(XgboostJNI.XGBoosterDumpModel(handle, "", statsFlag, modelInfos));
return modelInfos[0];
}
/**
* get the dump of the model as a string array
* @param featureMap featureMap file
* @param withStats Controls whether the split statistics are output.
* @return dumped model information
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String[] getDumpInfo(String featureMap, boolean withStats) throws XGBoostError {
int statsFlag = 0;
if(withStats) {
statsFlag = 1;
}
String[][] modelInfos = new String[1][];
ErrorHandle.checkCall(XgboostJNI.XGBoosterDumpModel(handle, featureMap, statsFlag, modelInfos));
return modelInfos[0];
}
/**
* Dump model into a text file.
* @param modelPath file to save dumped model info
* @param withStats bool
Controls whether the split statistics are output.
* @throws FileNotFoundException
* @throws UnsupportedEncodingException
* @throws IOException
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void dumpModel(String modelPath, boolean withStats) throws FileNotFoundException, UnsupportedEncodingException, IOException, XGBoostError {
File tf = new File(modelPath);
FileOutputStream out = new FileOutputStream(tf);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
String[] modelInfos = getDumpInfo(withStats);
for(int i=0; i<modelInfos.length; i++) {
writer.write("booster [" + i +"]:\n");
writer.write(modelInfos[i]);
}
writer.close();
out.close();
}
/**
* Dump model into a text file.
* @param modelPath file to save dumped model info
* @param featureMap featureMap file
* @param withStats bool
Controls whether the split statistics are output.
* @throws FileNotFoundException
* @throws UnsupportedEncodingException
* @throws IOException
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void dumpModel(String modelPath, String featureMap, boolean withStats) throws FileNotFoundException, UnsupportedEncodingException, IOException, XGBoostError {
File tf = new File(modelPath);
FileOutputStream out = new FileOutputStream(tf);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
String[] modelInfos = getDumpInfo(featureMap, withStats);
for(int i=0; i<modelInfos.length; i++) {
writer.write("booster [" + i +"]:\n");
writer.write(modelInfos[i]);
}
writer.close();
out.close();
}
/**
* get importance of each feature
* @return featureMap key: feature index, value: feature importance score
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public Map<String, Integer> getFeatureScore() throws XGBoostError {
String[] modelInfos = getDumpInfo(false);
Map<String, Integer> featureScore = new HashMap<>();
for(String tree : modelInfos) {
for(String node : tree.split("\n")) {
String[] array = node.split("\\[");
if(array.length == 1) {
continue;
}
String fid = array[1].split("\\]")[0];
fid = fid.split("<")[0];
if(featureScore.containsKey(fid)) {
featureScore.put(fid, 1 + featureScore.get(fid));
}
else {
featureScore.put(fid, 1);
}
}
}
return featureScore;
}
/**
* get importance of each feature
* @param featureMap file to save dumped model info
* @return featureMap key: feature index, value: feature importance score
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public Map<String, Integer> getFeatureScore(String featureMap) throws XGBoostError {
String[] modelInfos = getDumpInfo(featureMap, false);
Map<String, Integer> featureScore = new HashMap<>();
for(String tree : modelInfos) {
for(String node : tree.split("\n")) {
String[] array = node.split("\\[");
if(array.length == 1) {
continue;
}
String fid = array[1].split("\\]")[0];
fid = fid.split("<")[0];
if(featureScore.containsKey(fid)) {
featureScore.put(fid, 1 + featureScore.get(fid));
}
else {
featureScore.put(fid, 1);
}
}
}
return featureScore;
}
/**
* transfer DMatrix array to handle array (used for native functions)
* @param dmatrixs
* @return handle array for input dmatrixs
*/
private static long[] dMatrixs2handles(DMatrix[] dmatrixs) {
long[] handles = new long[dmatrixs.length];
for(int i=0; i<dmatrixs.length; i++) {
handles[i] = dmatrixs[i].getHandle();
}
return handles;
}
@Override
protected void finalize() {
delete();
}
public synchronized void delete() {
if(handle != 0l) {
XgboostJNI.XGBoosterFree(handle);
handle=0;
}
}
}

View File

@ -0,0 +1,263 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.util.ErrorHandle;
import org.dmlc.xgboost4j.util.XGBoostError;
import org.dmlc.xgboost4j.util.Initializer;
import org.dmlc.xgboost4j.wrapper.XgboostJNI;
/**
* DMatrix for xgboost, similar to the python wrapper xgboost.py
* @author hzx
*/
public class DMatrix {
private static final Log logger = LogFactory.getLog(DMatrix.class);
long handle = 0;
//load native library
static {
try {
Initializer.InitXgboost();
} catch (IOException ex) {
logger.error("load native library failed.");
logger.error(ex);
}
}
/**
* sparse matrix type (CSR or CSC)
*/
public static enum SparseType {
CSR,
CSC;
}
/**
* init DMatrix from file (svmlight format)
* @param dataPath
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public DMatrix(String dataPath) throws XGBoostError {
if(dataPath == null) {
throw new NullPointerException("dataPath: null");
}
long[] out = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromFile(dataPath, 1, out));
handle = out[0];
}
/**
* create DMatrix from sparse matrix
* @param headers index to headers (rowHeaders for CSR or colHeaders for CSC)
* @param indices Indices (colIndexs for CSR or rowIndexs for CSC)
* @param data non zero values (sequence by row for CSR or by col for CSC)
* @param st sparse matrix type (CSR or CSC)
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public DMatrix(long[] headers, int[] indices, float[] data, SparseType st) throws XGBoostError {
long[] out = new long[1];
if(st == SparseType.CSR) {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromCSR(headers, indices, data, out));
}
else if(st == SparseType.CSC) {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromCSC(headers, indices, data, out));
}
else {
throw new UnknownError("unknow sparsetype");
}
handle = out[0];
}
/**
* create DMatrix from dense matrix
* @param data data values
* @param nrow number of rows
* @param ncol number of columns
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public DMatrix(float[] data, int nrow, int ncol) throws XGBoostError {
long[] out = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromMat(data, nrow, ncol, 0.0f, out));
handle = out[0];
}
/**
* used for DMatrix slice
* @param handle
*/
private DMatrix(long handle) {
this.handle = handle;
}
/**
* set label of dmatrix
* @param labels
*/
public void setLabel(float[] labels) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "label", labels));
}
/**
* set weight of each instance
* @param weights
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setWeight(float[] weights) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "weight", weights));
}
/**
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from
* @param baseMargin
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setBaseMargin(float[] baseMargin) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "base_margin", baseMargin));
}
/**
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from
* @param baseMargin
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setBaseMargin(float[][] baseMargin) throws XGBoostError {
float[] flattenMargin = flatten(baseMargin);
setBaseMargin(flattenMargin);
}
/**
* Set group sizes of DMatrix (used for ranking)
* @param group
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void setGroup(int[] group) throws XGBoostError {
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetGroup(handle, group));
}
private float[] getFloatInfo(String field) throws XGBoostError {
float[][] infos = new float[1][];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixGetFloatInfo(handle, field, infos));
return infos[0];
}
private int[] getIntInfo(String field) throws XGBoostError {
int[][] infos = new int[1][];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixGetUIntInfo(handle, field, infos));
return infos[0];
}
/**
* get label values
* @return label
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[] getLabel() throws XGBoostError {
return getFloatInfo("label");
}
/**
* get weight of the DMatrix
* @return weights
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[] getWeight() throws XGBoostError {
return getFloatInfo("weight");
}
/**
* get base margin of the DMatrix
* @return base margin
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public float[] getBaseMargin() throws XGBoostError {
return getFloatInfo("base_margin");
}
/**
* Slice the DMatrix and return a new DMatrix that only contains `rowIndex`.
* @param rowIndex
* @return sliced new DMatrix
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public DMatrix slice(int[] rowIndex) throws XGBoostError {
long[] out = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixSliceDMatrix(handle, rowIndex, out));
long sHandle = out[0];
DMatrix sMatrix = new DMatrix(sHandle);
return sMatrix;
}
/**
* get the row number of DMatrix
* @return number of rows
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public long rowNum() throws XGBoostError {
long[] rowNum = new long[1];
ErrorHandle.checkCall(XgboostJNI.XGDMatrixNumRow(handle,rowNum));
return rowNum[0];
}
/**
* save DMatrix to filePath
* @param filePath
*/
public void saveBinary(String filePath) {
XgboostJNI.XGDMatrixSaveBinary(handle, filePath, 1);
}
public long getHandle() {
return handle;
}
/**
* flatten a mat to array
* @param mat
* @return
*/
private static float[] flatten(float[][] mat) {
int size = 0;
for (float[] array : mat) size += array.length;
float[] result = new float[size];
int pos = 0;
for (float[] ar : mat) {
System.arraycopy(ar, 0, result, pos, ar.length);
pos += ar.length;
}
return result;
}
@Override
protected void finalize() {
delete();
}
public synchronized void delete() {
if(handle != 0) {
XgboostJNI.XGDMatrixFree(handle);
handle = 0;
}
}
}

View File

@ -0,0 +1,36 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
/**
* interface for customized evaluation
* @author hzx
*/
public interface IEvaluation {
/**
* get evaluate metric
* @return evalMetric
*/
public abstract String getMetric();
/**
* evaluate with predicts and data
* @param predicts
* @param dmat
* @return
*/
public abstract float eval(float[][] predicts, DMatrix dmat);
}

View File

@ -0,0 +1,32 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.util.List;
/**
* interface for customize Object function
* @author hzx
*/
public interface IObjective {
/**
* user define objective function, return gradient and second order gradient
* @param predicts untransformed margin predicts
* @param dtrain training data
* @return List with two float array, correspond to first order grad and second order grad
*/
public abstract List<float[]> getGradient(float[][] predicts, DMatrix dtrain);
}

View File

@ -0,0 +1,89 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.util.Map;
import org.dmlc.xgboost4j.IEvaluation;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.IObjective;
/**
* cross validation package for xgb
* @author hzx
*/
public class CVPack {
DMatrix dtrain;
DMatrix dtest;
DMatrix[] dmats;
String[] names;
Booster booster;
/**
* create an cross validation package
* @param dtrain train data
* @param dtest test data
* @param params parameters
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public CVPack(DMatrix dtrain, DMatrix dtest, Iterable<Map.Entry<String, Object>> params) throws XGBoostError {
dmats = new DMatrix[] {dtrain, dtest};
booster = new Booster(params, dmats);
names = new String[] {"train", "test"};
this.dtrain = dtrain;
this.dtest = dtest;
}
/**
* update one iteration
* @param iter iteration num
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void update(int iter) throws XGBoostError {
booster.update(dtrain, iter);
}
/**
* update one iteration
* @param iter iteration num
* @param obj customized objective
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public void update(int iter, IObjective obj) throws XGBoostError {
booster.update(dtrain, iter, obj);
}
/**
* evaluation
* @param iter iteration num
* @return
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String eval(int iter) throws XGBoostError {
return booster.evalSet(dmats, names, iter);
}
/**
* evaluation
* @param iter iteration num
* @param eval customized eval
* @return
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public String eval(int iter, IEvaluation eval) throws XGBoostError {
return booster.evalSet(dmats, names, iter, eval);
}
}

View File

@ -0,0 +1,50 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.wrapper.XgboostJNI;
/**
* error handle for Xgboost
* @author hzx
*/
public class ErrorHandle {
private static final Log logger = LogFactory.getLog(ErrorHandle.class);
//load native library
static {
try {
Initializer.InitXgboost();
} catch (IOException ex) {
logger.error("load native library failed.");
logger.error(ex);
}
}
/**
* check the return value of C API
* @param ret return valud of xgboostJNI C API call
* @throws org.dmlc.xgboost4j.util.XGBoostError
*/
public static void checkCall(int ret) throws XGBoostError {
if(ret != 0) {
throw new XGBoostError(XgboostJNI.XGBGetLastError());
}
}
}

View File

@ -0,0 +1,92 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.io.IOException;
import java.lang.reflect.Field;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* class to load native library
* @author hzx
*/
public class Initializer {
private static final Log logger = LogFactory.getLog(Initializer.class);
static boolean initialized = false;
public static final String nativePath = "./lib";
public static final String nativeResourcePath = "/lib/";
public static final String[] libNames = new String[] {"xgboostjavawrapper"};
public static synchronized void InitXgboost() throws IOException {
if(initialized == false) {
for(String libName: libNames) {
smartLoad(libName);
}
initialized = true;
}
}
/**
* load native library, this method will first try to load library from java.library.path, then try to load library in jar package.
* @param libName
* @throws IOException
*/
private static void smartLoad(String libName) throws IOException {
addNativeDir(nativePath);
try {
System.loadLibrary(libName);
}
catch (UnsatisfiedLinkError e) {
try {
NativeUtils.loadLibraryFromJar(nativeResourcePath + System.mapLibraryName(libName));
}
catch (IOException e1) {
throw e1;
}
}
}
/**
* add libPath to java.library.path, then native library in libPath would be load properly
* @param libPath
* @throws IOException
*/
public static void addNativeDir(String libPath) throws IOException {
try {
Field field = ClassLoader.class.getDeclaredField("usr_paths");
field.setAccessible(true);
String[] paths = (String[]) field.get(null);
for (String path : paths) {
if (libPath.equals(path)) {
return;
}
}
String[] tmp = new String[paths.length+1];
System.arraycopy(paths,0,tmp,0,paths.length);
tmp[paths.length] = libPath;
field.set(null, tmp);
} catch (IllegalAccessException e) {
logger.error(e.getMessage());
throw new IOException("Failed to get permissions to set library path");
} catch (NoSuchFieldException e) {
logger.error(e.getMessage());
throw new IOException("Failed to get field handle to set library path");
}
}
}

View File

@ -0,0 +1,109 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* Simple library class for working with JNI (Java Native Interface)
*
* @see http://adamheinrich.com/2012/how-to-load-native-jni-library-from-jar
*
* @author Adam Heirnich &lt;adam@adamh.cz&gt;, http://www.adamh.cz
*/
public class NativeUtils {
/**
* Private constructor - this class will never be instanced
*/
private NativeUtils() {
}
/**
* Loads library from current JAR archive
*
* The file from JAR is copied into system temporary directory and then loaded. The temporary file is deleted after exiting.
* Method uses String as filename because the pathname is "abstract", not system-dependent.
*
* @param path The filename inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext
* @throws IOException If temporary file creation or read/write operation fails
* @throws IllegalArgumentException If source file (param path) does not exist
* @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters (restriction of {@see File#createTempFile(java.lang.String, java.lang.String)}).
*/
public static void loadLibraryFromJar(String path) throws IOException {
if (!path.startsWith("/")) {
throw new IllegalArgumentException("The path has to be absolute (start with '/').");
}
// Obtain filename from path
String[] parts = path.split("/");
String filename = (parts.length > 1) ? parts[parts.length - 1] : null;
// Split filename to prexif and suffix (extension)
String prefix = "";
String suffix = null;
if (filename != null) {
parts = filename.split("\\.", 2);
prefix = parts[0];
suffix = (parts.length > 1) ? "."+parts[parts.length - 1] : null; // Thanks, davs! :-)
}
// Check if the filename is okay
if (filename == null || prefix.length() < 3) {
throw new IllegalArgumentException("The filename has to be at least 3 characters long.");
}
// Prepare temporary file
File temp = File.createTempFile(prefix, suffix);
temp.deleteOnExit();
if (!temp.exists()) {
throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist.");
}
// Prepare buffer for data copying
byte[] buffer = new byte[1024];
int readBytes;
// Open and check input stream
InputStream is = NativeUtils.class.getResourceAsStream(path);
if (is == null) {
throw new FileNotFoundException("File " + path + " was not found inside JAR.");
}
// Open output stream and copy data between source file in JAR and the temporary file
OutputStream os = new FileOutputStream(temp);
try {
while ((readBytes = is.read(buffer)) != -1) {
os.write(buffer, 0, readBytes);
}
} finally {
// If read/write fails, close streams safely before throwing an exception
os.close();
is.close();
}
// Finally, load the library
System.load(temp.getAbsolutePath());
}
}

View File

@ -0,0 +1,235 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.IEvaluation;
import org.dmlc.xgboost4j.Booster;
import org.dmlc.xgboost4j.DMatrix;
import org.dmlc.xgboost4j.IObjective;
/**
* trainer for xgboost
* @author hzx
*/
public class Trainer {
private static final Log logger = LogFactory.getLog(Trainer.class);
/**
* Train a booster with given parameters.
* @param params Booster params.
* @param dtrain Data to be trained.
* @param round Number of boosting iterations.
* @param watchs a group of items to be evaluated during training, this allows user to watch performance on the validation set.
* @param obj customized objective (set to null if not used)
* @param eval customized evaluation (set to null if not used)
* @return trained booster
*/
public static Booster train(Iterable<Entry<String, Object>> params, DMatrix dtrain, int round,
Iterable<Entry<String, DMatrix>> watchs, IObjective obj, IEvaluation eval) throws XGBoostError {
//collect eval matrixs
String[] evalNames;
DMatrix[] evalMats;
List<String> names = new ArrayList<>();
List<DMatrix> mats = new ArrayList<>();
for(Entry<String, DMatrix> evalEntry : watchs) {
names.add(evalEntry.getKey());
mats.add(evalEntry.getValue());
}
evalNames = names.toArray(new String[names.size()]);
evalMats = mats.toArray(new DMatrix[mats.size()]);
//collect all data matrixs
DMatrix[] allMats;
if(evalMats!=null && evalMats.length>0) {
allMats = new DMatrix[evalMats.length+1];
allMats[0] = dtrain;
System.arraycopy(evalMats, 0, allMats, 1, evalMats.length);
}
else {
allMats = new DMatrix[1];
allMats[0] = dtrain;
}
//initialize booster
Booster booster = new Booster(params, allMats);
//begin to train
for(int iter=0; iter<round; iter++) {
if(obj != null) {
booster.update(dtrain, iter, obj);
} else {
booster.update(dtrain, iter);
}
//evaluation
if(evalMats!=null && evalMats.length>0) {
String evalInfo;
if(eval != null) {
evalInfo = booster.evalSet(evalMats, evalNames, iter, eval);
}
else {
evalInfo = booster.evalSet(evalMats, evalNames, iter);
}
logger.info(evalInfo);
}
}
return booster;
}
/**
* Cross-validation with given paramaters.
* @param params Booster params.
* @param data Data to be trained.
* @param round Number of boosting iterations.
* @param nfold Number of folds in CV.
* @param metrics Evaluation metrics to be watched in CV.
* @param obj customized objective (set to null if not used)
* @param eval customized evaluation (set to null if not used)
* @return evaluation history
*/
public static String[] crossValiation(Iterable<Entry<String, Object>> params, DMatrix data, int round, int nfold, String[] metrics, IObjective obj, IEvaluation eval) throws XGBoostError {
CVPack[] cvPacks = makeNFold(data, nfold, params, metrics);
String[] evalHist = new String[round];
String[] results = new String[cvPacks.length];
for(int i=0; i<round; i++) {
for(CVPack cvPack : cvPacks) {
if(obj != null) {
cvPack.update(i, obj);
}
else {
cvPack.update(i);
}
}
for(int j=0; j<cvPacks.length; j++) {
if(eval != null) {
results[j] = cvPacks[j].eval(i, eval);
}
else {
results[j] = cvPacks[j].eval(i);
}
}
evalHist[i] = aggCVResults(results);
logger.info(evalHist[i]);
}
return evalHist;
}
/**
* make an n-fold array of CVPack from random indices
* @param data original data
* @param nfold num of folds
* @param params booster parameters
* @param evalMetrics Evaluation metrics
* @return CV package array
*/
public static CVPack[] makeNFold(DMatrix data, int nfold, Iterable<Entry<String, Object>> params, String[] evalMetrics) throws XGBoostError {
List<Integer> samples = genRandPermutationNums(0, (int) data.rowNum());
int step = samples.size()/nfold;
int[] testSlice = new int[step];
int[] trainSlice = new int[samples.size()-step];
int testid, trainid;
CVPack[] cvPacks = new CVPack[nfold];
for(int i=0; i<nfold; i++) {
testid = 0;
trainid = 0;
for(int j=0; j<samples.size(); j++) {
if(j>(i*step) && j<(i*step+step) && testid<step) {
testSlice[testid] = samples.get(j);
testid++;
}
else{
if(trainid<samples.size()-step) {
trainSlice[trainid] = samples.get(j);
trainid++;
}
else {
testSlice[testid] = samples.get(j);
testid++;
}
}
}
DMatrix dtrain = data.slice(trainSlice);
DMatrix dtest = data.slice(testSlice);
CVPack cvPack = new CVPack(dtrain, dtest, params);
//set eval types
if(evalMetrics!=null) {
for(String type : evalMetrics) {
cvPack.booster.setParam("eval_metric", type);
}
}
cvPacks[i] = cvPack;
}
return cvPacks;
}
private static List<Integer> genRandPermutationNums(int start, int end) {
List<Integer> samples = new ArrayList<>();
for(int i=start; i<end; i++) {
samples.add(i);
}
Collections.shuffle(samples);
return samples;
}
/**
* Aggregate cross-validation results.
* @param results eval info from each data sample
* @return cross-validation eval info
*/
public static String aggCVResults(String[] results) {
Map<String, List<Float> > cvMap = new HashMap<>();
String aggResult = results[0].split("\t")[0];
for(String result : results) {
String[] items = result.split("\t");
for(int i=1; i<items.length; i++) {
String[] tup = items[i].split(":");
String key = tup[0];
Float value = Float.valueOf(tup[1]);
if(!cvMap.containsKey(key)) {
cvMap.put(key, new ArrayList<Float>());
}
cvMap.get(key).add(value);
}
}
for(String key : cvMap.keySet()) {
float value = 0f;
for(Float tvalue : cvMap.get(key)) {
value += tvalue;
}
value /= cvMap.get(key).size();
aggResult += String.format("\tcv-%s:%f", key, value);
}
return aggResult;
}
}

View File

@ -0,0 +1,26 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.util;
/**
* custom error class for xgboost
* @author hzx
*/
public class XGBoostError extends Exception{
public XGBoostError(String message) {
super(message);
}
}

View File

@ -0,0 +1,50 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j.wrapper;
/**
* xgboost jni wrapper functions for xgboost_wrapper.h
* change 2015-7-6: *use a long[] (length=1) as container of handle to get the output DMatrix or Booster
* @author hzx
*/
public class XgboostJNI {
public final static native String XGBGetLastError();
public final static native int XGDMatrixCreateFromFile(String fname, int silent, long[] out);
public final static native int XGDMatrixCreateFromCSR(long[] indptr, int[] indices, float[] data, long[] out);
public final static native int XGDMatrixCreateFromCSC(long[] colptr, int[] indices, float[] data, long[] out);
public final static native int XGDMatrixCreateFromMat(float[] data, int nrow, int ncol, float missing, long[] out);
public final static native int XGDMatrixSliceDMatrix(long handle, int[] idxset, long[] out);
public final static native int XGDMatrixFree(long handle);
public final static native int XGDMatrixSaveBinary(long handle, String fname, int silent);
public final static native int XGDMatrixSetFloatInfo(long handle, String field, float[] array);
public final static native int XGDMatrixSetUIntInfo(long handle, String field, int[] array);
public final static native int XGDMatrixSetGroup(long handle, int[] group);
public final static native int XGDMatrixGetFloatInfo(long handle, String field, float[][] info);
public final static native int XGDMatrixGetUIntInfo(long handle, String filed, int[][] info);
public final static native int XGDMatrixNumRow(long handle, long[] row);
public final static native int XGBoosterCreate(long[] handles, long[] out);
public final static native int XGBoosterFree(long handle);
public final static native int XGBoosterSetParam(long handle, String name, String value);
public final static native int XGBoosterUpdateOneIter(long handle, int iter, long dtrain);
public final static native int XGBoosterBoostOneIter(long handle, long dtrain, float[] grad, float[] hess);
public final static native int XGBoosterEvalOneIter(long handle, int iter, long[] dmats, String[] evnames, String[] eval_info);
public final static native int XGBoosterPredict(long handle, long dmat, int option_mask, long ntree_limit, float[][] predicts);
public final static native int XGBoosterLoadModel(long handle, String fname);
public final static native int XGBoosterSaveModel(long handle, String fname);
public final static native int XGBoosterLoadModelFromBuffer(long handle, long buf, long len);
public final static native int XGBoosterGetModelRaw(long handle, String[] out_string);
public final static native int XGBoosterDumpModel(long handle, String fmap, int with_stats, String[][] out_strings);
}

View File

@ -0,0 +1,108 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dmlc.xgboost4j.util.Trainer;
import org.dmlc.xgboost4j.util.XGBoostError;
import org.junit.Test;
/**
* test cases for Booster
* @author hzx
*/
public class BoosterTest {
public static class EvalError implements IEvaluation {
private static final Log logger = LogFactory.getLog(EvalError.class);
String evalMetric = "custom_error";
public EvalError() {
}
@Override
public String getMetric() {
return evalMetric;
}
@Override
public float eval(float[][] predicts, DMatrix dmat) {
float error = 0f;
float[] labels;
try {
labels = dmat.getLabel();
} catch (XGBoostError ex) {
logger.error(ex);
return -1f;
}
int nrow = predicts.length;
for(int i=0; i<nrow; i++) {
if(labels[i]==0f && predicts[i][0]>0) {
error++;
}
else if(labels[i]==1f && predicts[i][0]<=0) {
error++;
}
}
return error/labels.length;
}
}
@Test
public void testBoosterBasic() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
//set params
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
Iterable<Entry<String, Object>> param = paramMap.entrySet();
//set watchList
List<Entry<String, DMatrix>> watchs = new ArrayList<>();
watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat));
watchs.add(new AbstractMap.SimpleEntry<>("test", testMat));
//set round
int round = 2;
//train a boost model
Booster booster = Trainer.train(param, trainMat, round, watchs, null, null);
//predict raw output
float[][] predicts = booster.predict(testMat, true);
//eval
IEvaluation eval = new EvalError();
//error must be less than 0.1
TestCase.assertTrue(eval.eval(predicts, testMat)<0.1f);
}
}

View File

@ -0,0 +1,102 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.util.Arrays;
import java.util.Random;
import junit.framework.TestCase;
import org.dmlc.xgboost4j.util.XGBoostError;
import org.junit.Test;
/**
* test cases for DMatrix
* @author hzx
*/
public class DMatrixTest {
@Test
public void testCreateFromFile() throws XGBoostError {
//create DMatrix from file
DMatrix dmat = new DMatrix("../../demo/data/agaricus.txt.test");
//get label
float[] labels = dmat.getLabel();
//check length
TestCase.assertTrue(dmat.rowNum()==labels.length);
//set weights
float[] weights = Arrays.copyOf(labels, labels.length);
dmat.setWeight(weights);
float[] dweights = dmat.getWeight();
TestCase.assertTrue(Arrays.equals(weights, dweights));
}
@Test
public void testCreateFromCSR() throws XGBoostError {
//create Matrix from csr format sparse Matrix and labels
/**
* sparse matrix
* 1 0 2 3 0
* 4 0 2 3 5
* 3 1 2 5 0
*/
float[] data = new float[] {1, 2, 3, 4, 2, 3, 5, 3, 1, 2, 5};
int[] colIndex = new int[] {0, 2, 3, 0, 2, 3, 4, 0, 1, 2, 3};
long[] rowHeaders = new long[] {0, 3, 7, 11};
DMatrix dmat1 = new DMatrix(rowHeaders, colIndex, data, DMatrix.SparseType.CSR);
//check row num
System.out.println(dmat1.rowNum());
TestCase.assertTrue(dmat1.rowNum()==3);
//test set label
float[] label1 = new float[] {1, 0, 1};
dmat1.setLabel(label1);
float[] label2 = dmat1.getLabel();
TestCase.assertTrue(Arrays.equals(label1, label2));
}
@Test
public void testCreateFromDenseMatrix() throws XGBoostError {
//create DMatrix from 10*5 dense matrix
int nrow = 10;
int ncol = 5;
float[] data0 = new float[nrow*ncol];
//put random nums
Random random = new Random();
for(int i=0; i<nrow*ncol; i++) {
data0[i] = random.nextFloat();
}
//create label
float[] label0 = new float[nrow];
for(int i=0; i<nrow; i++) {
label0[i] = random.nextFloat();
}
DMatrix dmat0 = new DMatrix(data0, nrow, ncol);
dmat0.setLabel(label0);
//check
TestCase.assertTrue(dmat0.rowNum()==10);
TestCase.assertTrue(dmat0.getLabel().length==10);
//set weights for each instance
float[] weights = new float[nrow];
for(int i=0; i<nrow; i++) {
weights[i] = random.nextFloat();
}
dmat0.setWeight(weights);
TestCase.assertTrue(Arrays.equals(weights, dmat0.getWeight()));
}
}

680
java/xgboost4j_wrapper.cpp Normal file
View File

@ -0,0 +1,680 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <jni.h>
#include "../wrapper/xgboost_wrapper.h"
#include "xgboost4j_wrapper.h"
JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBGetLastError
(JNIEnv *jenv, jclass jcls) {
jstring jresult = 0 ;
char* result = 0;
result = (char *)XGBGetLastError();
if (result) jresult = jenv->NewStringUTF((const char *)result);
return jresult;
}
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile
(JNIEnv *jenv, jclass jcls, jstring jfname, jint jsilent, jlongArray jout) {
jint jresult = 0 ;
char *fname = (char *) 0 ;
int silent;
void* result[1];
unsigned long out[1];
fname = (char *)jenv->GetStringUTFChars(jfname, 0);
silent = (int)jsilent;
jresult = (jint) XGDMatrixCreateFromFile((char const *)fname, silent, result);
*(void **)&out[0] = *result;
if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname);
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromCSR
* Signature: ([J[J[F)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jlongArray jout) {
jint jresult = 0 ;
bst_ulong nindptr ;
bst_ulong nelem;
void *result[1];
unsigned long out[1];
jlong* indptr = jenv->GetLongArrayElements(jindptr, 0);
jint* indices = jenv->GetIntArrayElements(jindices, 0);
jfloat* data = jenv->GetFloatArrayElements(jdata, 0);
nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
nelem = (bst_ulong)jenv->GetArrayLength(jdata);
jresult = (jint) XGDMatrixCreateFromCSR((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem, result);
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
//release
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
jenv->ReleaseIntArrayElements(jindices, indices, 0);
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromCSC
* Signature: ([J[J[F)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jlongArray jout) {
jint jresult = 0;
bst_ulong nindptr ;
bst_ulong nelem;
void *result[1];
unsigned long out[1];
jlong* indptr = jenv->GetLongArrayElements(jindptr, NULL);
jint* indices = jenv->GetIntArrayElements(jindices, 0);
jfloat* data = jenv->GetFloatArrayElements(jdata, NULL);
nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
nelem = (bst_ulong)jenv->GetArrayLength(jdata);
jresult = (jint) XGDMatrixCreateFromCSC((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem, result);
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
//release
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
jenv->ReleaseIntArrayElements(jindices, indices, 0);
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromMat
* Signature: ([FIIF)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat
(JNIEnv *jenv, jclass jcls, jfloatArray jdata, jint jnrow, jint jncol, jfloat jmiss, jlongArray jout) {
jint jresult = 0 ;
bst_ulong nrow ;
bst_ulong ncol ;
float miss ;
void *result[1];
unsigned long out[1];
jfloat* data = jenv->GetFloatArrayElements(jdata, 0);
nrow = (bst_ulong)jnrow;
ncol = (bst_ulong)jncol;
miss = (float)jmiss;
jresult = (jint) XGDMatrixCreateFromMat((float const *)data, nrow, ncol, miss, result);
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
//release
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSliceDMatrix
* Signature: (J[I)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix
(JNIEnv *jenv, jclass jcls, jlong jhandle, jintArray jindexset, jlongArray jout) {
jint jresult = 0 ;
void *handle = (void *) 0 ;
bst_ulong len;
void *result[1];
unsigned long out[1];
jint* indexset = jenv->GetIntArrayElements(jindexset, 0);
handle = *(void **)&jhandle;
len = (bst_ulong)jenv->GetArrayLength(jindexset);
jresult = (jint) XGDMatrixSliceDMatrix(handle, (int const *)indexset, len, result);
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
//release
jenv->ReleaseIntArrayElements(jindexset, indexset, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixFree
* Signature: (J)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree
(JNIEnv *jenv, jclass jcls, jlong jhandle) {
jint jresult = 0;
void *handle = (void *) 0 ;
handle = *(void **)&jhandle;
jresult = (jint) XGDMatrixFree(handle);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSaveBinary
* Signature: (JLjava/lang/String;I)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname, jint jsilent) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *fname = (char *) 0 ;
int silent ;
handle = *(void **)&jhandle;
fname = 0;
fname = (char *)jenv->GetStringUTFChars(jfname, 0);
silent = (int)jsilent;
jresult = (jint) XGDMatrixSaveBinary(handle, (char const *)fname, silent);
if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetFloatInfo
* Signature: (JLjava/lang/String;[F)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jfloatArray jarray) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *field = (char *) 0 ;
bst_ulong len;
handle = *(void **)&jhandle;
field = (char *)jenv->GetStringUTFChars(jfield, 0);
jfloat* array = jenv->GetFloatArrayElements(jarray, NULL);
len = (bst_ulong)jenv->GetArrayLength(jarray);
jresult = (jint) XGDMatrixSetFloatInfo(handle, (char const *)field, (float const *)array, len);
//release
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
jenv->ReleaseFloatArrayElements(jarray, array, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetUIntInfo
* Signature: (JLjava/lang/String;[I)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jintArray jarray) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *field = (char *) 0 ;
bst_ulong len ;
handle = *(void **)&jhandle;
field = 0;
field = (char *)jenv->GetStringUTFChars(jfield, 0);
jint* array = jenv->GetIntArrayElements(jarray, NULL);
len = (bst_ulong)jenv->GetArrayLength(jarray);
jresult = (jint) XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len);
//release
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
jenv->ReleaseIntArrayElements(jarray, array, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetGroup
* Signature: (J[I)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup
(JNIEnv * jenv, jclass jcls, jlong jhandle, jintArray jarray) {
jint jresult = 0;
void *handle = (void *) 0 ;
bst_ulong len ;
handle = *(void **)&jhandle;
jint* array = jenv->GetIntArrayElements(jarray, NULL);
len = (bst_ulong)jenv->GetArrayLength(jarray);
jresult = (jint) XGDMatrixSetGroup(handle, (unsigned int const *)array, len);
//release
jenv->ReleaseIntArrayElements(jarray, array, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixGetFloatInfo
* Signature: (JLjava/lang/String;)[F
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *field = (char *) 0 ;
bst_ulong len[1];
*len = 0;
float *result[1];
handle = *(void **)&jhandle;
field = 0;
if (jfield) {
field = (char *)jenv->GetStringUTFChars(jfield, 0);
if (!field) return 0;
}
jresult = (jint) XGDMatrixGetFloatInfo(handle, (char const *)field, len, (const float **) result);
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
jsize jlen = (jsize)*len;
jfloatArray jarray = jenv->NewFloatArray(jlen);
jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) *result);
jenv->SetObjectArrayElement(jout, 0, (jobject) jarray);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixGetUIntInfo
* Signature: (JLjava/lang/String;)[I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *field = (char *) 0 ;
bst_ulong len[1];
*len = 0;
unsigned int *result[1];
handle = *(void **)&jhandle;
field = (char *)jenv->GetStringUTFChars(jfield, 0);
jresult = (jint) XGDMatrixGetUIntInfo(handle, (char const *)field, len, (const unsigned int **) result);
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
jsize jlen = (jsize)*len;
jintArray jarray = jenv->NewIntArray(jlen);
jenv->SetIntArrayRegion(jarray, 0, jlen, (jint *) *result);
jenv->SetObjectArrayElement(jout, 0, jarray);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixNumRow
* Signature: (J)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlongArray jout) {
jint jresult = 0 ;
void *handle = (void *) 0 ;
bst_ulong result[1];
handle = *(void **)&jhandle;
jresult = (jint) XGDMatrixNumRow(handle, result);
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) result);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterCreate
* Signature: ([J)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate
(JNIEnv *jenv, jclass jcls, jlongArray jhandles, jlongArray jout) {
jint jresult = 0;
void **handles = 0;
bst_ulong len = 0;
void *result[1];
jlong* cjhandles = 0;
unsigned long out[1];
if(jhandles) {
len = (bst_ulong)jenv->GetArrayLength(jhandles);
handles = new void*[len];
//put handle from jhandles to chandles
cjhandles = jenv->GetLongArrayElements(jhandles, 0);
for(bst_ulong i=0; i<len; i++) {
handles[i] = *(void **)&cjhandles[i];
}
}
jresult = (jint) XGBoosterCreate(handles, len, result);
//release
if(jhandles) {
delete[] handles;
jenv->ReleaseLongArrayElements(jhandles, cjhandles, 0);
}
*(void **)&out[0] = *result;
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterFree
* Signature: (J)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree
(JNIEnv *jenv, jclass jcls, jlong jhandle) {
void *handle = (void *) 0 ;
handle = *(void **)&jhandle;
return (jint) XGBoosterFree(handle);
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterSetParam
* Signature: (JLjava/lang/String;Ljava/lang/String;)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jname, jstring jvalue) {
jint jresult = -1;
void *handle = (void *) 0 ;
char *name = (char *) 0 ;
char *value = (char *) 0 ;
handle = *(void **)&jhandle;
name = (char *)jenv->GetStringUTFChars(jname, 0);
value = (char *)jenv->GetStringUTFChars(jvalue, 0);
jresult = (jint) XGBoosterSetParam(handle, (char const *)name, (char const *)value);
if (name) jenv->ReleaseStringUTFChars(jname, (const char *)name);
if (value) jenv->ReleaseStringUTFChars(jvalue, (const char *)value);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterUpdateOneIter
* Signature: (JIJ)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter
(JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlong jdtrain) {
void *handle = (void *) 0 ;
int iter ;
void *dtrain = (void *) 0 ;
handle = *(void **)&jhandle;
iter = (int)jiter;
dtrain = *(void **)&jdtrain;
return (jint) XGBoosterUpdateOneIter(handle, iter, dtrain);
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterBoostOneIter
* Signature: (JJ[F[F)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdtrain, jfloatArray jgrad, jfloatArray jhess) {
jint jresult = 0;
void *handle = (void *) 0 ;
void *dtrain = (void *) 0 ;
bst_ulong len ;
handle = *(void **)&jhandle;
dtrain = *(void **)&jdtrain;
jfloat* grad = jenv->GetFloatArrayElements(jgrad, 0);
jfloat* hess = jenv->GetFloatArrayElements(jhess, 0);
len = (bst_ulong)jenv->GetArrayLength(jgrad);
jresult = (jint) XGBoosterBoostOneIter(handle, dtrain, grad, hess, len);
//release
jenv->ReleaseFloatArrayElements(jgrad, grad, 0);
jenv->ReleaseFloatArrayElements(jhess, hess, 0);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterEvalOneIter
* Signature: (JI[J[Ljava/lang/String;)Ljava/lang/String;
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter
(JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlongArray jdmats, jobjectArray jevnames, jobjectArray jout) {
jint jresult = 0 ;
void *handle = (void *) 0 ;
int iter ;
void **dmats = 0;
char **evnames = 0;
bst_ulong len ;
char *result[1];
handle = *(void **)&jhandle;
iter = (int)jiter;
len = (bst_ulong)jenv->GetArrayLength(jdmats);
if(len > 0) {
dmats = new void*[len];
evnames = new char*[len];
}
//put handle from jhandles to chandles
jlong* cjdmats = jenv->GetLongArrayElements(jdmats, 0);
for(bst_ulong i=0; i<len; i++) {
dmats[i] = *(void **)&cjdmats[i];
}
//transfer jObjectArray to char**
for(bst_ulong i=0; i<len; i++) {
jstring jevname = (jstring)jenv->GetObjectArrayElement(jevnames, i);
evnames[i] = (char *)jenv->GetStringUTFChars(jevname, 0);
}
jresult = (jint) XGBoosterEvalOneIter(handle, iter, dmats, (char const *(*))evnames, len, (const char **) result);
if(len > 0) {
delete[] dmats;
//release string chars
for(bst_ulong i=0; i<len; i++) {
jstring jevname = (jstring)jenv->GetObjectArrayElement(jevnames, i);
jenv->ReleaseStringUTFChars(jevname, (const char*)evnames[i]);
}
delete[] evnames;
jenv->ReleaseLongArrayElements(jdmats, cjdmats, 0);
}
jstring jinfo = 0;
if (*result) jinfo = jenv->NewStringUTF((const char *) *result);
jenv->SetObjectArrayElement(jout, 0, jinfo);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterPredict
* Signature: (JJIJ)[F
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdmat, jint joption_mask, jlong jntree_limit, jobjectArray jout) {
jint jresult = 0;
void *handle = (void *) 0 ;
void *dmat = (void *) 0 ;
int option_mask ;
unsigned int ntree_limit ;
bst_ulong len[1];
*len = 0;
float *result[1];
handle = *(void **)&jhandle;
dmat = *(void **)&jdmat;
option_mask = (int)joption_mask;
ntree_limit = (unsigned int)jntree_limit;
jresult = (jint) XGBoosterPredict(handle, dmat, option_mask, ntree_limit, len, (const float **) result);
jsize jlen = (jsize)*len;
jfloatArray jarray = jenv->NewFloatArray(jlen);
jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) *result);
jenv->SetObjectArrayElement(jout, 0, jarray);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterLoadModel
* Signature: (JLjava/lang/String;)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *fname = (char *) 0 ;
handle = *(void **)&jhandle;
fname = (char *)jenv->GetStringUTFChars(jfname, 0);
jresult = (jint) XGBoosterLoadModel(handle,(char const *)fname);
if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterSaveModel
* Signature: (JLjava/lang/String;)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *fname = (char *) 0 ;
handle = *(void **)&jhandle;
fname = 0;
fname = (char *)jenv->GetStringUTFChars(jfname, 0);
jresult = (jint) XGBoosterSaveModel(handle, (char const *)fname);
if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname);
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterLoadModelFromBuffer
* Signature: (JJJ)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jbuf, jlong jlen) {
void *handle = (void *) 0 ;
void *buf = (void *) 0 ;
bst_ulong len ;
handle = *(void **)&jhandle;
buf = *(void **)&jbuf;
len = (bst_ulong)jlen;
return (jint) XGBoosterLoadModelFromBuffer(handle, (void const *)buf, len);
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterGetModelRaw
* Signature: (J)Ljava/lang/String;
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw
(JNIEnv * jenv, jclass jcls, jlong jhandle, jobjectArray jout) {
jint jresult = 0 ;
jstring jinfo = 0;
void *handle = (void *) 0 ;
bst_ulong len[1];
*len = 0;
char *result[1];
handle = *(void **)&jhandle;
jresult = (jint)XGBoosterGetModelRaw(handle, len, (const char **) result);
if (*result){
jinfo = jenv->NewStringUTF((const char *) *result);
jenv->SetObjectArrayElement(jout, 0, jinfo);
}
return jresult;
}
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterDumpModel
* Signature: (JLjava/lang/String;I)[Ljava/lang/String;
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfmap, jint jwith_stats, jobjectArray jout) {
jint jresult = 0;
void *handle = (void *) 0 ;
char *fmap = (char *) 0 ;
int with_stats ;
bst_ulong len[1];
*len = 0;
char **result[1];
handle = *(void **)&jhandle;
fmap = 0;
if (jfmap) {
fmap = (char *)jenv->GetStringUTFChars(jfmap, 0);
if (!fmap) return 0;
}
with_stats = (int)jwith_stats;
jresult = (jint) XGBoosterDumpModel(handle, (const char *)fmap, with_stats, len, (const char ***) result);
jsize jlen = (jsize)*len;
jobjectArray jinfos = jenv->NewObjectArray(jlen, jenv->FindClass("java/lang/String"), jenv->NewStringUTF(""));
for(int i=0 ; i<jlen; i++) {
jenv->SetObjectArrayElement(jinfos, i, jenv->NewStringUTF((const char*) result[0][i]));
}
jenv->SetObjectArrayElement(jout, 0, jinfos);
if (fmap) jenv->ReleaseStringUTFChars(jfmap, (const char *)fmap);
return jresult;
}

221
java/xgboost4j_wrapper.h Normal file
View File

@ -0,0 +1,221 @@
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class org_dmlc_xgboost4j_wrapper_XgboostJNI */
#ifndef _Included_org_dmlc_xgboost4j_wrapper_XgboostJNI
#define _Included_org_dmlc_xgboost4j_wrapper_XgboostJNI
#ifdef __cplusplus
extern "C" {
#endif
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBGetLastError
* Signature: ()Ljava/lang/String;
*/
JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBGetLastError
(JNIEnv *, jclass);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromFile
* Signature: (Ljava/lang/String;I[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile
(JNIEnv *, jclass, jstring, jint, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromCSR
* Signature: ([J[I[F[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromCSC
* Signature: ([J[I[F[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixCreateFromMat
* Signature: ([FIIF[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat
(JNIEnv *, jclass, jfloatArray, jint, jint, jfloat, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSliceDMatrix
* Signature: (J[I[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix
(JNIEnv *, jclass, jlong, jintArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixFree
* Signature: (J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree
(JNIEnv *, jclass, jlong);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSaveBinary
* Signature: (JLjava/lang/String;I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary
(JNIEnv *, jclass, jlong, jstring, jint);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetFloatInfo
* Signature: (JLjava/lang/String;[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo
(JNIEnv *, jclass, jlong, jstring, jfloatArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetUIntInfo
* Signature: (JLjava/lang/String;[I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo
(JNIEnv *, jclass, jlong, jstring, jintArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixSetGroup
* Signature: (J[I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup
(JNIEnv *, jclass, jlong, jintArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixGetFloatInfo
* Signature: (JLjava/lang/String;[[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixGetUIntInfo
* Signature: (JLjava/lang/String;[[I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGDMatrixNumRow
* Signature: (J[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow
(JNIEnv *, jclass, jlong, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterCreate
* Signature: ([J[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate
(JNIEnv *, jclass, jlongArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterFree
* Signature: (J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree
(JNIEnv *, jclass, jlong);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterSetParam
* Signature: (JLjava/lang/String;Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam
(JNIEnv *, jclass, jlong, jstring, jstring);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterUpdateOneIter
* Signature: (JIJ)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter
(JNIEnv *, jclass, jlong, jint, jlong);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterBoostOneIter
* Signature: (JJ[F[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter
(JNIEnv *, jclass, jlong, jlong, jfloatArray, jfloatArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterEvalOneIter
* Signature: (JI[J[Ljava/lang/String;[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter
(JNIEnv *, jclass, jlong, jint, jlongArray, jobjectArray, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterPredict
* Signature: (JJIJ[[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict
(JNIEnv *, jclass, jlong, jlong, jint, jlong, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterLoadModel
* Signature: (JLjava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel
(JNIEnv *, jclass, jlong, jstring);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterSaveModel
* Signature: (JLjava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel
(JNIEnv *, jclass, jlong, jstring);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterLoadModelFromBuffer
* Signature: (JJJ)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer
(JNIEnv *, jclass, jlong, jlong, jlong);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterGetModelRaw
* Signature: (J[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw
(JNIEnv *, jclass, jlong, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_wrapper_XgboostJNI
* Method: XGBoosterDumpModel
* Signature: (JLjava/lang/String;I[[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel
(JNIEnv *, jclass, jlong, jstring, jint, jobjectArray);
#ifdef __cplusplus
}
#endif
#endif

14
scripts/travis_R_script.sh Executable file
View File

@ -0,0 +1,14 @@
#!/bin/bash
# Test R package of xgboost
set -e
export _R_CHECK_TIMINGS_=0
export R_BUILD_ARGS="--no-build-vignettes --no-manual"
export R_CHECK_ARGS="--no-vignettes --no-manual"
curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
chmod 755 ./travis-tool.sh
./travis-tool.sh bootstrap
make Rpack
cd ./xgboost
../travis-tool.sh install_deps
../travis-tool.sh run_tests

View File

@ -0,0 +1,5 @@
#!/bin/bash
if [ ${TASK} == "R-package" ]; then
cat R-package/xgboost.Rcheck/*.log
fi

7
scripts/travis_java_script.sh Executable file
View File

@ -0,0 +1,7 @@
# Test java package of xgboost
set -e
cd java
./create_wrap.sh
cd xgboost4j
mvn clean install -DskipTests=true
mvn test

33
scripts/travis_script.sh Executable file
View File

@ -0,0 +1,33 @@
#!/bin/bash
# main script of travis
if [ ${TASK} == "lint" ]; then
make lint || exit -1
fi
if [ ${TASK} == "build" ]; then
make all CXX=${CXX} || exit -1
fi
if [ ${TASK} == "build-with-dmlc" ]; then
cd dmlc-core
cp make/config.mk .
echo "USE_S3=1" >> config.mk
make all CXX=${CXX}|| exit -1
cd ..
make dmlc=dmlc-core CXX=${CXX} || exit -1
fi
if [ ${TASK} == "R-package" ]; then
scripts/travis_R_script.sh || exit -1
fi
if [ ${TASK} == "python-package" ]; then
make all CXX=${CXX} || exit -1
nosetests tests/python || exit -1
fi
if [ ${TASK} == "java-package" ]; then
make java CXX=${CXX} || exit -1
scripts/travis_java_script.sh || exit -1
fi

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_DATA_H
#define XGBOOST_DATA_H
/*!
* Copyright (c) 2014 by Contributors
* \file data.h
* \brief the input data structure for gradient boosting
* \author Tianqi Chen
*/
#ifndef XGBOOST_DATA_H_
#define XGBOOST_DATA_H_
#include <cstdio>
#include <vector>
#include "utils/utils.h"
@ -161,4 +163,4 @@ class IFMatrix {
virtual ~IFMatrix(void){}
};
} // namespace xgboost
#endif // XGBOOST_DATA_H
#endif // XGBOOST_DATA_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_
#define XGBOOST_GBM_GBLINEAR_INL_HPP_
/*!
* Copyright by Contributors
* \file gblinear-inl.hpp
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
* the update rule is parallel coordinate descent (shotgun)
* \author Tianqi Chen
*/
#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_
#define XGBOOST_GBM_GBLINEAR_INL_HPP_
#include <vector>
#include <string>
#include <sstream>
@ -33,10 +35,10 @@ class GBLinear : public IGradBooster {
model.param.SetParam(name, val);
}
}
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) {
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*)
model.LoadModel(fi);
}
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
model.SaveModel(fo);
}
virtual void InitModel(void) {
@ -92,7 +94,8 @@ class GBLinear : public IGradBooster {
sum_hess += p.hess * v * v;
}
float &w = model[fid][gid];
bst_float dw = static_cast<bst_float>(param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w));
bst_float dw = static_cast<bst_float>(param.learning_rate *
param.CalcDelta(sum_grad, sum_hess, w));
w += dw;
// update grad value
for (bst_uint j = 0; j < col.length; ++j) {
@ -258,12 +261,12 @@ class GBLinear : public IGradBooster {
std::fill(weight.begin(), weight.end(), 0.0f);
}
// save the model to file
inline void SaveModel(utils::IStream &fo) const {
inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
fo.Write(&param, sizeof(Param));
fo.Write(weight);
}
// load model from file
inline void LoadModel(utils::IStream &fi) {
inline void LoadModel(utils::IStream &fi) { // NOLINT(*)
utils::Assert(fi.Read(&param, sizeof(Param)) != 0, "Load LinearBooster");
fi.Read(&weight);
}

View File

@ -1,3 +1,4 @@
// Copyright by Contributors
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX

View File

@ -1,11 +1,14 @@
#ifndef XGBOOST_GBM_GBM_H_
#define XGBOOST_GBM_GBM_H_
/*!
* Copyright by Contributors
* \file gbm.h
* \brief interface of gradient booster, that learns through gradient statistics
* \author Tianqi Chen
*/
#ifndef XGBOOST_GBM_GBM_H_
#define XGBOOST_GBM_GBM_H_
#include <vector>
#include <string>
#include "../data.h"
#include "../utils/io.h"
#include "../utils/fmap.h"
@ -29,13 +32,13 @@ class IGradBooster {
* \param fi input stream
* \param with_pbuffer whether the incoming data contains pbuffer
*/
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0;
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0; // NOLINT(*)
/*!
* \brief save model to stream
* \param fo output stream
* \param with_pbuffer whether save out pbuffer
*/
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0;
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0; // NOLINT(*)
/*!
* \brief initialize the model
*/

View File

@ -1,13 +1,16 @@
#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
#define XGBOOST_GBM_GBTREE_INL_HPP_
/*!
* Copyright by Contributors
* \file gbtree-inl.hpp
* \brief gradient boosted tree implementation
* \author Tianqi Chen
*/
#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
#define XGBOOST_GBM_GBTREE_INL_HPP_
#include <vector>
#include <utility>
#include <string>
#include <limits>
#include "./gbm.h"
#include "../utils/omp.h"
#include "../tree/updater.h"
@ -39,7 +42,7 @@ class GBTree : public IGradBooster {
tparam.SetParam(name, val);
if (trees.size() == 0) mparam.SetParam(name, val);
}
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) {
virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*)
this->Clear();
utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
"GBTree: invalid model file");
@ -62,7 +65,7 @@ class GBTree : public IGradBooster {
"GBTree: invalid model file");
}
}
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
utils::Assert(mparam.num_trees == static_cast<int>(trees.size()), "GBTree");
if (with_pbuffer) {
fo.Write(&mparam, sizeof(ModelParam));
@ -196,7 +199,6 @@ class GBTree : public IGradBooster {
thread_temp[i].Init(mparam.num_feature);
}
this->PredPath(p_fmat, info, out_preds, ntree_limit);
}
virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
std::vector<std::string> dump;
@ -339,7 +341,7 @@ class GBTree : public IGradBooster {
for (int j = 0; j < mparam.size_leaf_vector; ++j) {
vec_psum[j] += trees[i]->leafvec(tid)[j];
}
if(--treeleft == 0) break;
if (--treeleft == 0) break;
}
}
p_feats->Drop(inst);

View File

@ -1,6 +1,8 @@
// Copyright by Contributors
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX
#include <string>
#include "../utils/io.h"
// implements a single no split version of DMLC
@ -154,7 +156,7 @@ class StdFile : public dmlc::Stream {
std::fwrite(ptr, size, 1, fp);
}
virtual void Seek(size_t pos) {
std::fseek(fp, static_cast<long>(pos), SEEK_SET);
std::fseek(fp, static_cast<long>(pos), SEEK_SET); // NOLINT(*)
}
virtual size_t Tell(void) {
return std::ftell(fp);

View File

@ -1,3 +1,4 @@
// Copyright 2014 by Contributors
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_IO_IO_H_
#define XGBOOST_IO_IO_H_
/*!
* Copyright 2014 by Contributors
* \file io.h
* \brief handles input data format of xgboost
* I/O module handles a specific DMatrix format
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_IO_H_
#define XGBOOST_IO_IO_H_
#include "../data.h"
#include "../learner/dmatrix.h"
@ -40,7 +42,6 @@ DataMatrix* LoadDataMatrix(const char *fname,
* \param silent whether print message during saving
*/
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false);
} // namespace io
} // namespace xgboost
#endif // XGBOOST_IO_IO_H_

View File

@ -35,7 +35,7 @@ struct LibSVMPage : public SparsePage {
*/
class LibSVMPageFactory {
public:
explicit LibSVMPageFactory()
LibSVMPageFactory()
: bytes_read_(0), at_head_(true) {
}
inline bool Init(void) {
@ -199,6 +199,7 @@ class LibSVMParser : public utils::IIterator<LibSVMPage> {
inline size_t bytes_read(void) const {
return itr.get_factory().bytes_read();
}
private:
bool at_end_;
size_t data_ptr_;

View File

@ -1,11 +1,15 @@
#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
/*!
* Copyright (c) 2014 by Contributors
* \file page_dmatrix-inl.hpp
* row iterator based on sparse page
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
#include <vector>
#include <string>
#include <algorithm>
#include "../data.h"
#include "../utils/iterator.h"
#include "../utils/thread_buffer.h"
@ -94,12 +98,12 @@ class DMatrixPageBase : public DataMatrix {
fbin.Close();
if (!silent) {
utils::Printf("DMatrixPage: %lux%lu is saved to %s\n",
static_cast<unsigned long>(mat.info.num_row()),
static_cast<unsigned long>(mat.info.num_col()), fname_);
static_cast<unsigned long>(mat.info.num_row()), // NOLINT(*)
static_cast<unsigned long>(mat.info.num_col()), fname_); // NOLINT(*)
}
}
/*! \brief load and initialize the iterator with fi */
inline void LoadBinary(utils::FileStream &fi,
inline void LoadBinary(utils::FileStream &fi, // NOLINT(*)
bool silent,
const char *fname_) {
this->set_cache_file(fname_);
@ -114,8 +118,8 @@ class DMatrixPageBase : public DataMatrix {
iter_->Load(fs);
if (!silent) {
utils::Printf("DMatrixPage: %lux%lu matrix is loaded",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()));
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col())); // NOLINT(*)
if (fname_ != NULL) {
utils::Printf(" from %s\n", fname_);
} else {
@ -188,8 +192,8 @@ class DMatrixPageBase : public DataMatrix {
fs.Close();
if (!silent) {
utils::Printf("DMatrixPage: %lux%lu is parsed from %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
uri);
}
}

View File

@ -1,10 +1,16 @@
#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
/*!
* Copyright (c) 2014 by Contributors
* \file page_fmatrix-inl.hpp
* col iterator based on sparse page
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
#include <vector>
#include <string>
#include <algorithm>
namespace xgboost {
namespace io {
/*! \brief thread buffer iterator */
@ -96,7 +102,7 @@ struct ColConvertFactory {
return true;
}
}
if (tmp_.Size() != 0){
if (tmp_.Size() != 0) {
this->MakeColPage(tmp_, BeginPtr(*buffered_rowset_) + btop,
*enabled_, val);
return true;

View File

@ -1,6 +1,5 @@
#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file simple_dmatrix-inl.hpp
* \brief simple implementation of DMatrixS that can be used
* the data format of xgboost is templatized, which means it can accept
@ -8,6 +7,9 @@
* this file is a specific implementation of input data structure that can be used by BoostLearner
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
#include <string>
#include <cstring>
#include <vector>
@ -123,9 +125,9 @@ class DMatrixSimple : public DataMatrix {
}
if (!silent) {
utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), uri);
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
static_cast<unsigned long>(row_data_.size()), uri); // NOLINT(*)
}
// try to load in additional file
if (!loadsplit) {
@ -165,10 +167,11 @@ class DMatrixSimple : public DataMatrix {
* \param silent whether print information during loading
* \param fname file name, used to print message
*/
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) { // NOLINT(*)
int tmagic;
utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch", fname == NULL ? "" : fname);
utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch",
fname == NULL ? "" : fname);
info.LoadBinary(fs);
LoadBinary(fs, &row_ptr_, &row_data_);
@ -176,9 +179,9 @@ class DMatrixSimple : public DataMatrix {
if (!silent) {
utils::Printf("%lux%lu matrix with %lu entries is loaded",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()));
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
static_cast<unsigned long>(row_data_.size())); // NOLINT(*)
if (fname != NULL) {
utils::Printf(" from %s\n", fname);
} else {
@ -205,9 +208,9 @@ class DMatrixSimple : public DataMatrix {
if (!silent) {
utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), fname);
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
static_cast<unsigned long>(row_data_.size()), fname); // NOLINT(*)
if (info.group_ptr.size() != 0) {
utils::Printf("data contains %u groups\n",
static_cast<unsigned>(info.group_ptr.size()-1));
@ -256,7 +259,7 @@ class DMatrixSimple : public DataMatrix {
* \param ptr pointer data
* \param data data content
*/
inline static void SaveBinary(utils::IStream &fo,
inline static void SaveBinary(utils::IStream &fo, // NOLINT(*)
const std::vector<size_t> &ptr,
const std::vector<RowBatch::Entry> &data) {
size_t nrow = ptr.size() - 1;
@ -272,7 +275,7 @@ class DMatrixSimple : public DataMatrix {
* \param out_ptr pointer data
* \param out_data data content
*/
inline static void LoadBinary(utils::IStream &fi,
inline static void LoadBinary(utils::IStream &fi, // NOLINT(*)
std::vector<size_t> *out_ptr,
std::vector<RowBatch::Entry> *out_data) {
size_t nrow;

View File

@ -1,11 +1,15 @@
#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file simple_fmatrix-inl.hpp
* \brief the input data structure for gradient boosting
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
#include <limits>
#include <algorithm>
#include <vector>
#include "../data.h"
#include "../utils/utils.h"
#include "../utils/random.h"
@ -39,7 +43,7 @@ class FMatrixS : public IFMatrix {
/*! \brief get number of colmuns */
virtual size_t NumCol(void) const {
utils::Check(this->HaveColAccess(), "NumCol:need column access");
return col_size_.size() - 1;
return col_size_.size();
}
/*! \brief get number of buffered rows */
virtual const std::vector<bst_uint> &buffered_rowset(void) const {
@ -94,7 +98,7 @@ class FMatrixS : public IFMatrix {
* \brief save column access data into stream
* \param fo output stream to save to
*/
inline void SaveColAccess(utils::IStream &fo) const {
inline void SaveColAccess(utils::IStream &fo) const { // NOLINT(*)
size_t n = 0;
fo.Write(&n, sizeof(n));
}
@ -102,7 +106,7 @@ class FMatrixS : public IFMatrix {
* \brief load column access data from stream
* \param fo output stream to load from
*/
inline void LoadColAccess(utils::IStream &fi) {
inline void LoadColAccess(utils::IStream &fi) { // NOLINT(*)
// do nothing in load col access
}
@ -153,14 +157,14 @@ class FMatrixS : public IFMatrix {
pcol->Clear();
utils::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data);
builder.InitBudget(0, nthread);
builder.InitBudget(info_.num_col(), nthread);
// start working
iter_->BeforeFirst();
while (iter_->Next()) {
const RowBatch &batch = iter_->Value();
bmap.resize(bmap.size() + batch.size, true);
long batch_size = static_cast<long>(batch.size);
for (long i = 0; i < batch_size; ++i) {
long batch_size = static_cast<long>(batch.size); // NOLINT(*)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
buffered_rowset_.push_back(ridx);
@ -169,13 +173,13 @@ class FMatrixS : public IFMatrix {
}
}
#pragma omp parallel for schedule(static)
for (long i = 0; i < batch_size; ++i) {
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) {
RowBatch::Inst inst = batch[i];
for (bst_uint j = 0; j < inst.length; ++j) {
if (enabled[inst[j].index]){
if (enabled[inst[j].index]) {
builder.AddBudget(inst[j].index, tid);
}
}
@ -188,7 +192,7 @@ class FMatrixS : public IFMatrix {
while (iter_->Next()) {
const RowBatch &batch = iter_->Value();
#pragma omp parallel for schedule(static)
for (long i = 0; i < static_cast<long>(batch.size); ++i) {
for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) {
@ -204,7 +208,8 @@ class FMatrixS : public IFMatrix {
}
}
utils::Assert(pcol->Size() == info_.num_col(), "inconsistent col data");
utils::Assert(pcol->Size() == info_.num_col(),
"inconsistent col data");
// sort columns
bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
@ -366,4 +371,4 @@ class FMatrixS : public IFMatrix {
};
} // namespace io
} // namespace xgboost
#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP
#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP_

View File

@ -1,12 +1,16 @@
#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
/*!
* Copyright (c) 2014 by Contributors
* \file sparse_batch_page.h
* content holder of sparse batch that can be saved to disk
* the representation can be effectively
* use in external memory computation
* \author Tianqi Chen
*/
#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
#include <vector>
#include <algorithm>
#include "../data.h"
namespace xgboost {

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_LEARNER_DMATRIX_H_
#define XGBOOST_LEARNER_DMATRIX_H_
/*!
* Copyright 2014 by Contributors
* \file dmatrix.h
* \brief meta data and template data structure
* used for regression/classification/ranking
* \author Tianqi Chen
*/
#ifndef XGBOOST_LEARNER_DMATRIX_H_
#define XGBOOST_LEARNER_DMATRIX_H_
#include <vector>
#include <cstring>
#include "../data.h"
@ -66,7 +68,7 @@ struct MetaInfo {
return 1.0f;
}
}
inline void SaveBinary(utils::IStream &fo) const {
inline void SaveBinary(utils::IStream &fo) const { // NOLINT(*)
int version = kVersion;
fo.Write(&version, sizeof(version));
fo.Write(&info.num_row, sizeof(info.num_row));
@ -77,7 +79,7 @@ struct MetaInfo {
fo.Write(info.root_index);
fo.Write(base_margin);
}
inline void LoadBinary(utils::IStream &fi) {
inline void LoadBinary(utils::IStream &fi) { // NOLINT(*)
int version;
utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format");
utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format");
@ -114,7 +116,7 @@ struct MetaInfo {
return labels;
}
inline const std::vector<float>& GetFloatInfo(const char *field) const {
return ((MetaInfo*)this)->GetFloatInfo(field);
return ((MetaInfo*)this)->GetFloatInfo(field); // NOLINT(*)
}
inline std::vector<unsigned> &GetUIntInfo(const char *field) {
using namespace std;
@ -124,7 +126,7 @@ struct MetaInfo {
return info.root_index;
}
inline const std::vector<unsigned> &GetUIntInfo(const char *field) const {
return ((MetaInfo*)this)->GetUIntInfo(field);
return ((MetaInfo*)this)->GetUIntInfo(field); // NOLINT(*)
}
// try to load weight information from file, if exists
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {

View File

@ -1,10 +1,12 @@
/*!
* Copyright 2014 by Contributors
* \file xgboost_evaluation-inl.hpp
* \brief evaluation metrics for regression and classification and rank
* \author Kailong Chen, Tianqi Chen
*/
#ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_
#define XGBOOST_LEARNER_EVALUATION_INL_HPP_
/*!
* \file xgboost_evaluation-inl.hpp
* \brief evaluation metrics for regression and classification and rank
* \author Kailong Chen, Tianqi Chen
*/
#include <vector>
#include <utility>
#include <string>
@ -344,7 +346,8 @@ struct EvalPrecisionRatio : public IEvaluator{
}
protected:
inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec, const MetaInfo &info) const {
inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec,
const MetaInfo &info) const {
size_t cutoff = static_cast<size_t>(ratio_ * rec.size());
double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0;
for (size_t j = 0; j < cutoff; ++j) {
@ -417,8 +420,8 @@ struct EvalAuc : public IEvaluator {
}
if (distributed) {
float dat[2];
dat[0] = static_cast<float>(sum_auc);
dat[1] = static_cast<float>(ngroup);
dat[0] = static_cast<float>(sum_auc);
dat[1] = static_cast<float>(ngroup);
// approximately estimate auc using mean
rabit::Allreduce<rabit::op::Sum>(dat, 2);
return dat[0] / dat[1];
@ -463,8 +466,8 @@ struct EvalRankList : public IEvaluator {
}
if (distributed) {
float dat[2];
dat[0] = static_cast<float>(sum_metric);
dat[1] = static_cast<float>(ngroup);
dat[0] = static_cast<float>(sum_metric);
dat[1] = static_cast<float>(ngroup);
// approximately estimate auc using mean
rabit::Allreduce<rabit::op::Sum>(dat, 2);
return dat[0] / dat[1];
@ -489,7 +492,7 @@ struct EvalRankList : public IEvaluator {
}
}
/*! \return evaluation metric, given the pair_sort record, (pred,label) */
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0;
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0; // NOLINT(*)
protected:
unsigned topn_;
@ -530,7 +533,7 @@ struct EvalNDCG : public EvalRankList{
}
return static_cast<float>(sumdcg);
}
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const { // NOLINT(*)
std::stable_sort(rec.begin(), rec.end(), CmpFirst);
float dcg = this->CalcDCG(rec);
std::stable_sort(rec.begin(), rec.end(), CmpSecond);

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_EVALUATION_H_
#define XGBOOST_LEARNER_EVALUATION_H_
/*!
* Copyright 2014 by Contributors
* \file evaluation.h
* \brief interface of evaluation function supported in xgboost
* \author Tianqi Chen, Kailong Chen
*/
#ifndef XGBOOST_LEARNER_EVALUATION_H_
#define XGBOOST_LEARNER_EVALUATION_H_
#include <string>
#include <vector>
#include <cstdio>

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
#define XGBOOST_LEARNER_HELPER_UTILS_H_
/*!
* Copyright 2014 by Contributors
* \file helper_utils.h
* \brief useful helper functions
* \author Tianqi Chen, Kailong Chen
*/
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
#define XGBOOST_LEARNER_HELPER_UTILS_H_
#include <utility>
#include <vector>
#include <cmath>

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file learner-inl.hpp
* \brief learning algorithm
* \author Tianqi Chen
*/
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
#include <algorithm>
#include <vector>
#include <utility>
@ -30,7 +32,7 @@ class BoostLearner : public rabit::Serializable {
gbm_ = NULL;
name_obj_ = "reg:linear";
name_gbm_ = "gbtree";
silent= 0;
silent = 0;
prob_buffer_row = 1.0f;
distributed_mode = 0;
updater_mode = 0;
@ -68,7 +70,7 @@ class BoostLearner : public rabit::Serializable {
}
char str_temp[25];
utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
static_cast<unsigned long>(buffer_size));
static_cast<unsigned long>(buffer_size)); // NOLINT(*)
this->SetParam("num_pbuffer", str_temp);
this->pred_buffer_size = buffer_size;
}
@ -161,7 +163,7 @@ class BoostLearner : public rabit::Serializable {
* \param fi input stream
* \param calc_num_feature whether call InitTrainer with calc_num_feature
*/
inline void LoadModel(utils::IStream &fi,
inline void LoadModel(utils::IStream &fi, // NOLINT(*)
bool calc_num_feature = true) {
utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
"BoostLearner: wrong model format");
@ -228,7 +230,7 @@ class BoostLearner : public rabit::Serializable {
}
delete fi;
}
inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
ModelParam p = mparam;
p.saved_with_pbuffer = static_cast<int>(with_pbuffer);
fo.Write(&p, sizeof(ModelParam));
@ -345,8 +347,7 @@ class BoostLearner : public rabit::Serializable {
bool output_margin,
std::vector<float> *out_preds,
unsigned ntree_limit = 0,
bool pred_leaf = false
) const {
bool pred_leaf = false) const {
if (pred_leaf) {
gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);
} else {
@ -517,7 +518,7 @@ class BoostLearner : public rabit::Serializable {
protected:
// magic number to transform random seed
const static int kRandSeedMagic = 127;
static const int kRandSeedMagic = 127;
// cache entry object that helps handle feature caching
struct CacheEntry {
const DMatrix *mat_;

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file objective-inl.hpp
* \brief objective function implementations
* \author Tianqi Chen, Kailong Chen
*/
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#include <vector>
#include <algorithm>
#include <utility>
@ -176,14 +178,14 @@ class RegLossObj : public IObjFunction {
// poisson regression for count
class PoissonRegression : public IObjFunction {
public:
explicit PoissonRegression(void) {
PoissonRegression(void) {
max_delta_step = 0.0f;
}
virtual ~PoissonRegression(void) {}
virtual void SetParam(const char *name, const char *val) {
using namespace std;
if (!strcmp( "max_delta_step", name )) {
if (!strcmp("max_delta_step", name)) {
max_delta_step = static_cast<float>(atof(val));
}
}
@ -201,9 +203,9 @@ class PoissonRegression : public IObjFunction {
// check if label in range
bool label_correct = true;
// start calculating gradient
const long ndata = static_cast<bst_omp_uint>(preds.size());
const long ndata = static_cast<bst_omp_uint>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (long i = 0; i < ndata; ++i) {
for (long i = 0; i < ndata; ++i) { // NOLINT(*)
float p = preds[i];
float w = info.GetWeight(i);
float y = info.labels[i];
@ -219,9 +221,9 @@ class PoissonRegression : public IObjFunction {
}
virtual void PredTransform(std::vector<float> *io_preds) {
std::vector<float> &preds = *io_preds;
const long ndata = static_cast<long>(preds.size());
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) {
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]);
}
}

View File

@ -1,11 +1,14 @@
#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
#define XGBOOST_LEARNER_OBJECTIVE_H_
/*!
* Copyright 2014 by Contributors
* \file objective.h
* \brief interface of objective function used for gradient boosting
* \author Tianqi Chen, Kailong Chen
*/
#include "dmatrix.h"
#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
#define XGBOOST_LEARNER_OBJECTIVE_H_
#include <vector>
#include "./dmatrix.h"
namespace xgboost {
namespace learner {
@ -13,7 +16,7 @@ namespace learner {
class IObjFunction{
public:
/*! \brief virtual destructor */
virtual ~IObjFunction(void){}
virtual ~IObjFunction(void) {}
/*!
* \brief set parameters from outside
* \param name name of the parameter
@ -38,7 +41,7 @@ class IObjFunction{
* \brief transform prediction values, this is only called when Prediction is called
* \param io_preds prediction values, saves to this vector as well
*/
virtual void PredTransform(std::vector<float> *io_preds){}
virtual void PredTransform(std::vector<float> *io_preds) {}
/*!
* \brief transform prediction values, this is only called when Eval is called,
* usually it redirect to PredTransform

View File

@ -1,13 +1,13 @@
#ifndef XGBOOST_SYNC_H_
#define XGBOOST_SYNC_H_
/*!
* Copyright 2014 by Contributors
* \file sync.h
* \brief the synchronization module of rabit
* redirects to subtree rabit header
* \author Tianqi Chen
*/
#ifndef XGBOOST_SYNC_SYNC_H_
#define XGBOOST_SYNC_SYNC_H_
#include "../../subtree/rabit/include/rabit.h"
#include "../../subtree/rabit/include/rabit/timer.h"
#endif // XGBOOST_SYNC_H_
#endif // XGBOOST_SYNC_SYNC_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_MODEL_H_
#define XGBOOST_TREE_MODEL_H_
/*!
* Copyright 2014 by Contributors
* \file model.h
* \brief model structure for tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_MODEL_H_
#define XGBOOST_TREE_MODEL_H_
#include <string>
#include <cstring>
#include <sstream>
@ -70,7 +72,7 @@ class TreeModel {
/*! \brief tree node */
class Node {
public:
Node(void) : sindex_(0) {}
Node(void) : sindex_(0) {}
/*! \brief index of left child */
inline int cleft(void) const {
return this->cleft_;
@ -273,7 +275,7 @@ class TreeModel {
return &leaf_vector[nid * param.size_leaf_vector];
}
/*! \brief get leaf vector given nid */
inline const bst_float* leafvec(int nid) const{
inline const bst_float* leafvec(int nid) const {
if (leaf_vector.size() == 0) return NULL;
return &leaf_vector[nid * param.size_leaf_vector];
}
@ -292,7 +294,7 @@ class TreeModel {
* \brief load model from stream
* \param fi input stream
*/
inline void LoadModel(utils::IStream &fi) {
inline void LoadModel(utils::IStream &fi) { // NOLINT(*)
utils::Check(fi.Read(&param, sizeof(Param)) > 0,
"TreeModel: wrong format");
nodes.resize(param.num_nodes); stats.resize(param.num_nodes);
@ -317,7 +319,7 @@ class TreeModel {
* \brief save model to stream
* \param fo output stream
*/
inline void SaveModel(utils::IStream &fo) const {
inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
utils::Assert(param.num_nodes == static_cast<int>(nodes.size()),
"Tree::SaveModel");
utils::Assert(param.num_nodes == static_cast<int>(stats.size()),
@ -400,7 +402,7 @@ class TreeModel {
}
private:
void Dump(int nid, std::stringstream &fo,
void Dump(int nid, std::stringstream &fo, // NOLINT(*)
const utils::FeatMap& fmap, int depth, bool with_stats) {
for (int i = 0; i < depth; ++i) {
fo << '\t';
@ -469,7 +471,7 @@ struct RTreeNodeStat {
/*! \brief number of child that is leaf node known up to now */
int leaf_child_cnt;
/*! \brief print information of current stats to fo */
inline void Print(std::stringstream &fo, bool is_leaf) const {
inline void Print(std::stringstream &fo, bool is_leaf) const { // NOLINT(*)
if (!is_leaf) {
fo << ",gain=" << loss_chg << ",cover=" << sum_hess;
} else {

View File

@ -1,10 +1,13 @@
#ifndef XGBOOST_TREE_PARAM_H_
#define XGBOOST_TREE_PARAM_H_
/*!
* Copyright 2014 by Contributors
* \file param.h
* \brief training parameters, statistics used to support tree construction
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_PARAM_H_
#define XGBOOST_TREE_PARAM_H_
#include <vector>
#include <cstring>
#include "../data.h"
@ -244,7 +247,7 @@ struct GradStats {
this->Add(b.sum_grad, b.sum_hess);
}
/*! \brief same as add, reduce is used in All Reduce */
inline static void Reduce(GradStats &a, const GradStats &b) {
inline static void Reduce(GradStats &a, const GradStats &b) { // NOLINT(*)
a.Add(b);
}
/*! \brief set current value to a - b */
@ -257,7 +260,7 @@ struct GradStats {
return sum_hess == 0.0;
}
/*! \brief set leaf vector value based on statistics */
inline void SetLeafVec(const TrainParam &param, bst_float *vec) const{
inline void SetLeafVec(const TrainParam &param, bst_float *vec) const {
}
// constructor to allow inheritance
GradStats(void) {}
@ -324,7 +327,7 @@ struct CVGradStats : public GradStats {
}
}
/*! \brief same as add, reduce is used in All Reduce */
inline static void Reduce(CVGradStats &a, const CVGradStats &b) {
inline static void Reduce(CVGradStats &a, const CVGradStats &b) { // NOLINT(*)
a.Add(b);
}
/*! \brief set current value to a - b */
@ -407,7 +410,7 @@ struct SplitEntry{
}
}
/*! \brief same as update, used by AllReduce*/
inline static void Reduce(SplitEntry &dst, const SplitEntry &src) {
inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { // NOLINT(*)
dst.Update(src);
}
/*!\return feature index to split on */

View File

@ -1,3 +1,4 @@
// Copyright 2014 by Contributors
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_H_
#define XGBOOST_TREE_UPDATER_H_
/*!
* Copyright 2014 by Contributors
* \file updater.h
* \brief interface to update the tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_H_
#define XGBOOST_TREE_UPDATER_H_
#include <vector>
#include "../data.h"

View File

@ -1,12 +1,14 @@
#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_basemaker-inl.hpp
* \brief implement a common tree constructor
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#include <vector>
#include <algorithm>
#include <string>
#include <limits>
#include "../sync/sync.h"
#include "../utils/random.h"
@ -60,8 +62,11 @@ class BaseMaker: public IUpdater {
bst_float a = fminmax[fid * 2];
bst_float b = fminmax[fid * 2 + 1];
if (a == -std::numeric_limits<bst_float>::max()) return 0;
if (-a == b) return 1;
else return 2;
if (-a == b) {
return 1;
} else {
return 2;
}
}
inline bst_float MaxValue(bst_uint fid) const {
return fminmax[fid *2 + 1];
@ -70,7 +75,7 @@ class BaseMaker: public IUpdater {
std::vector<bst_uint> &findex = *p_findex;
findex.clear();
for (size_t i = 0; i < fminmax.size(); i += 2) {
const bst_uint fid = static_cast<bst_uint>(i / 2);
const bst_uint fid = static_cast<bst_uint>(i / 2);
if (this->Type(fid) != 0) findex.push_back(fid);
}
unsigned n = static_cast<unsigned>(p * findex.size());
@ -116,7 +121,7 @@ class BaseMaker: public IUpdater {
}
return nthread;
}
// ------class member helpers---------
// ------class member helpers---------
/*! \brief initialize temp data structure */
inline void InitData(const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat,
@ -124,7 +129,8 @@ class BaseMaker: public IUpdater {
const RegTree &tree) {
utils::Assert(tree.param.num_nodes == tree.param.num_roots,
"TreeMaker: can only grow new tree");
{// setup position
{
// setup position
position.resize(gpair.size());
if (root_index.size() == 0) {
std::fill(position.begin(), position.end(), 0);
@ -147,7 +153,8 @@ class BaseMaker: public IUpdater {
}
}
}
{// expand query
{
// expand query
qexpand.reserve(256); qexpand.clear();
for (int i = 0; i < tree.param.num_roots; ++i) {
qexpand.push_back(i);
@ -170,7 +177,7 @@ class BaseMaker: public IUpdater {
this->UpdateNode2WorkIndex(tree);
}
// return decoded position
inline int DecodePosition(bst_uint ridx) const{
inline int DecodePosition(bst_uint ridx) const {
const int pid = position[ridx];
return pid < 0 ? ~pid : pid;
}
@ -189,7 +196,8 @@ class BaseMaker: public IUpdater {
* \param p_fmat feature matrix needed for tree construction
* \param tree the regression tree structure
*/
inline void ResetPositionCol(const std::vector<int> &nodes, IFMatrix *p_fmat, const RegTree &tree) {
inline void ResetPositionCol(const std::vector<int> &nodes,
IFMatrix *p_fmat, const RegTree &tree) {
// set the positions in the nondefault
this->SetNonDefaultPositionCol(nodes, p_fmat, tree);
// set rest of instances to default position
@ -252,7 +260,7 @@ class BaseMaker: public IUpdater {
const int nid = this->DecodePosition(ridx);
// go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if(fvalue < tree[nid].split_cond()) {
if (fvalue < tree[nid].split_cond()) {
this->SetEncodePosition(ridx, tree[nid].cleft());
} else {
this->SetEncodePosition(ridx, tree[nid].cright());
@ -337,15 +345,16 @@ class BaseMaker: public IUpdater {
return;
}
if (last_fvalue != fvalue) {
double rmax = rmin + wmin;
double rmax = rmin + wmin;
if (rmax >= next_goal && sketch->temp.size != max_size) {
if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
if (sketch->temp.size == 0 ||
last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
// push to sketch
sketch->temp.data[sketch->temp.size] =
utils::WXQuantileSketch<bst_float, bst_float>::
Entry(static_cast<bst_float>(rmin),
static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue);
static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue);
utils::Assert(sketch->temp.size < max_size,
"invalid maximum size max_size=%u, stemp.size=%lu\n",
max_size, sketch->temp.size);
@ -353,15 +362,15 @@ class BaseMaker: public IUpdater {
}
if (sketch->temp.size == max_size) {
next_goal = sum_total * 2.0f + 1e-5f;
} else{
} else {
next_goal = static_cast<bst_float>(sketch->temp.size * sum_total / max_size);
}
} else {
if (rmax >= next_goal) {
rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n",
rmax, sum_total, next_goal, sketch->temp.size);
}
}
if (rmax >= next_goal) {
rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n",
rmax, sum_total, next_goal, sketch->temp.size);
}
}
rmin = rmax;
wmin = w;
last_fvalue = fvalue;
@ -375,13 +384,13 @@ class BaseMaker: public IUpdater {
if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
utils::Assert(sketch->temp.size <= max_size,
"Finalize: invalid maximum size, max_size=%u, stemp.size=%lu",
sketch->temp.size, max_size );
sketch->temp.size, max_size);
// push to sketch
sketch->temp.data[sketch->temp.size] =
utils::WXQuantileSketch<bst_float, bst_float>::
Entry(static_cast<bst_float>(rmin),
static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue);
static_cast<bst_float>(rmax),
static_cast<bst_float>(wmin), last_fvalue);
++sketch->temp.size;
}
sketch->PushTemp();
@ -415,4 +424,4 @@ class BaseMaker: public IUpdater {
};
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_colmaker-inl.hpp
* \brief use columnwise update to construct a tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
#include <vector>
#include <cmath>
#include <algorithm>
@ -114,10 +116,13 @@ class ColMaker: public IUpdater {
// initialize temp data structure
inline void InitData(const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat,
const std::vector<unsigned> &root_index, const RegTree &tree) {
utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree");
const std::vector<unsigned> &root_index,
const RegTree &tree) {
utils::Assert(tree.param.num_nodes == tree.param.num_roots,
"ColMaker: can only grow new tree");
const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
{// setup position
{
// setup position
position.resize(gpair.size());
if (root_index.size() == 0) {
for (size_t i = 0; i < rowset.size(); ++i) {
@ -127,7 +132,8 @@ class ColMaker: public IUpdater {
for (size_t i = 0; i < rowset.size(); ++i) {
const bst_uint ridx = rowset[i];
position[ridx] = root_index[ridx];
utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots, "root index exceed setting");
utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots,
"root index exceed setting");
}
}
// mark delete for the deleted datas
@ -154,11 +160,12 @@ class ColMaker: public IUpdater {
}
unsigned n = static_cast<unsigned>(param.colsample_bytree * feat_index.size());
random::Shuffle(feat_index);
//utils::Check(n > 0, "colsample_bytree is too small that no feature can be included");
utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included", param.colsample_bytree);
utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included",
param.colsample_bytree);
feat_index.resize(n);
}
{// setup temp space for each thread
{
// setup temp space for each thread
#pragma omp parallel
{
this->nthread = omp_get_num_threads();
@ -171,20 +178,25 @@ class ColMaker: public IUpdater {
}
snode.reserve(256);
}
{// expand query
{
// expand query
qexpand_.reserve(256); qexpand_.clear();
for (int i = 0; i < tree.param.num_roots; ++i) {
qexpand_.push_back(i);
}
}
}
/*! \brief initialize the base_weight, root_gain, and NodeEntry for all the new nodes in qexpand */
/*!
* \brief initialize the base_weight, root_gain,
* and NodeEntry for all the new nodes in qexpand
*/
inline void InitNewNode(const std::vector<int> &qexpand,
const std::vector<bst_gpair> &gpair,
const IFMatrix &fmat,
const BoosterInfo &info,
const RegTree &tree) {
{// setup statistics space for each tree node
{
// setup statistics space for each tree node
for (size_t i = 0; i < stemp.size(); ++i) {
stemp[i].resize(tree.param.num_nodes, ThreadEntry(param));
}
@ -280,7 +292,7 @@ class ColMaker: public IUpdater {
ThreadEntry &e = stemp[tid][nid];
float fsplit;
if (tid != 0) {
if(std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) {
if (std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) {
fsplit = (stemp[tid - 1][nid].last_fvalue - e.first_fvalue) * 0.5f;
} else {
continue;
@ -290,16 +302,20 @@ class ColMaker: public IUpdater {
}
if (need_forward && tid != 0) {
c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, fsplit, false);
}
}
if (need_backward) {
tmp.SetSubstract(sum, e.stats);
c.SetSubstract(snode[nid].stats, tmp);
if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, fsplit, true);
}
}
@ -308,8 +324,10 @@ class ColMaker: public IUpdater {
tmp = sum;
ThreadEntry &e = stemp[nthread-1][nid];
c.SetSubstract(snode[nid].stats, tmp);
if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
tmp.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true);
}
}
@ -335,19 +353,25 @@ class ColMaker: public IUpdater {
e.first_fvalue = fvalue;
} else {
// forward default right
if (std::abs(fvalue - e.first_fvalue) > rt_2eps){
if (std::abs(fvalue - e.first_fvalue) > rt_2eps) {
if (need_forward) {
c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
e.stats.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) -
snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false);
}
}
if (need_backward) {
cright.SetSubstract(e.stats_extra, e.stats);
c.SetSubstract(snode[nid].stats, cright);
if (c.sum_hess >= param.min_child_weight && cright.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
if (c.sum_hess >= param.min_child_weight &&
cright.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) +
c.CalcGain(param) -
snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true);
}
}
@ -361,7 +385,7 @@ class ColMaker: public IUpdater {
// update enumeration solution
inline void UpdateEnumeration(int nid, bst_gpair gstats,
float fvalue, int d_step, bst_uint fid,
TStats &c, std::vector<ThreadEntry> &temp) {
TStats &c, std::vector<ThreadEntry> &temp) { // NOLINT(*)
// get the statistics of nid
ThreadEntry &e = temp[nid];
// test if first hit, this is fine, because we set 0 during init
@ -370,10 +394,12 @@ class ColMaker: public IUpdater {
e.last_fvalue = fvalue;
} else {
// try to find a split
if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
if (std::abs(fvalue - e.last_fvalue) > rt_2eps &&
e.stats.sum_hess >= param.min_child_weight) {
c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1);
}
}
@ -388,7 +414,7 @@ class ColMaker: public IUpdater {
int d_step,
bst_uint fid,
const std::vector<bst_gpair> &gpair,
std::vector<ThreadEntry> &temp) {
std::vector<ThreadEntry> &temp) { // NOLINT(*)
const std::vector<int> &qexpand = qexpand_;
// clear all the temp statistics
for (size_t j = 0; j < qexpand.size(); ++j) {
@ -443,7 +469,8 @@ class ColMaker: public IUpdater {
ThreadEntry &e = temp[nid];
c.SetSubstract(snode[nid].stats, e.stats);
if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
const float gap = std::abs(e.last_fvalue) + rt_eps;
const float delta = d_step == +1 ? gap: -gap;
e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
@ -458,7 +485,7 @@ class ColMaker: public IUpdater {
bst_uint fid,
const std::vector<bst_gpair> &gpair,
const BoosterInfo &info,
std::vector<ThreadEntry> &temp) {
std::vector<ThreadEntry> &temp) { // NOLINT(*)
// use cacheline aware optimization
if (TStats::kSimpleStats != 0 && param.cache_opt != 0) {
EnumerateSplitCacheOpt(begin, end, d_step, fid, gpair, temp);
@ -471,7 +498,7 @@ class ColMaker: public IUpdater {
}
// left statistics
TStats c(param);
for(const ColBatch::Entry *it = begin; it != end; it += d_step) {
for (const ColBatch::Entry *it = begin; it != end; it += d_step) {
const bst_uint ridx = it->index;
const int nid = position[ridx];
if (nid < 0) continue;
@ -485,10 +512,12 @@ class ColMaker: public IUpdater {
e.last_fvalue = fvalue;
} else {
// try to find a split
if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
if (std::abs(fvalue - e.last_fvalue) > rt_2eps &&
e.stats.sum_hess >= param.min_child_weight) {
c.SetSubstract(snode[nid].stats, e.stats);
if (c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1);
}
}
@ -503,7 +532,8 @@ class ColMaker: public IUpdater {
ThreadEntry &e = temp[nid];
c.SetSubstract(snode[nid].stats, e.stats);
if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
c.CalcGain(param) - snode[nid].root_gain);
const float gap = std::abs(e.last_fvalue) + rt_eps;
const float delta = d_step == +1 ? gap: -gap;
e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
@ -585,7 +615,8 @@ class ColMaker: public IUpdater {
}
}
// reset position of each data points after split is created in the tree
inline void ResetPosition(const std::vector<int> &qexpand, IFMatrix *p_fmat, const RegTree &tree) {
inline void ResetPosition(const std::vector<int> &qexpand,
IFMatrix *p_fmat, const RegTree &tree) {
// set the positions in the nondefault
this->SetNonDefaultPosition(qexpand, p_fmat, tree);
// set rest of instances to default position
@ -655,7 +686,7 @@ class ColMaker: public IUpdater {
const float fvalue = col[j].fvalue;
// go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if(fvalue < tree[nid].split_cond()) {
if (fvalue < tree[nid].split_cond()) {
this->SetEncodePosition(ridx, tree[nid].cleft());
} else {
this->SetEncodePosition(ridx, tree[nid].cright());
@ -667,7 +698,7 @@ class ColMaker: public IUpdater {
}
// utils to get/set position, with encoded format
// return decoded position
inline int DecodePosition(bst_uint ridx) const{
inline int DecodePosition(bst_uint ridx) const {
const int pid = position[ridx];
return pid < 0 ? ~pid : pid;
}
@ -679,7 +710,7 @@ class ColMaker: public IUpdater {
position[ridx] = nid;
}
}
//--data fields--
// --data fields--
const TrainParam &param;
// number of omp thread used during training
int nthread;

View File

@ -1,11 +1,15 @@
#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_distcol-inl.hpp
* \brief beta distributed version that takes a sub-column
* and construct a tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
#include <vector>
#include <algorithm>
#include "../sync/sync.h"
#include "../utils/bitmap.h"
#include "../utils/io.h"
@ -40,10 +44,11 @@ class DistColMaker : public ColMaker<TStats> {
virtual const int* GetLeafPosition(void) const {
return builder.GetLeafPosition();
}
private:
struct Builder : public ColMaker<TStats>::Builder {
public:
Builder(const TrainParam &param)
explicit Builder(const TrainParam &param)
: ColMaker<TStats>::Builder(param) {
}
inline void UpdatePosition(IFMatrix *p_fmat, const RegTree &tree) {
@ -63,6 +68,7 @@ class DistColMaker : public ColMaker<TStats> {
virtual const int* GetLeafPosition(void) const {
return BeginPtr(this->position);
}
protected:
virtual void SetNonDefaultPosition(const std::vector<int> &qexpand,
IFMatrix *p_fmat, const RegTree &tree) {
@ -142,7 +148,7 @@ class DistColMaker : public ColMaker<TStats> {
}
vec.push_back(this->snode[nid].best);
}
// TODO, lazy version
// TODO(tqchen) lazy version
// communicate best solution
reducer.Allreduce(BeginPtr(vec), vec.size());
// assign solution back
@ -166,4 +172,4 @@ class DistColMaker : public ColMaker<TStats> {
};
} // namespace tree
} // namespace xgboost
#endif
#endif // XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_histmaker-inl.hpp
* \brief use histogram counting to construct a tree
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
#include <vector>
#include <algorithm>
#include "../sync/sync.h"
@ -171,6 +173,7 @@ class HistMaker: public BaseMaker {
const BoosterInfo &info,
const std::vector <bst_uint> &fset,
const RegTree &tree) = 0;
private:
inline void EnumerateSplit(const HistUnit &hist,
const TStats &node_sum,
@ -187,7 +190,7 @@ class HistMaker: public BaseMaker {
c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
if (best->Update((float)loss_chg, fid, hist.cut[i], false)) {
if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i], false)) {
*left_sum = s;
}
}
@ -200,7 +203,7 @@ class HistMaker: public BaseMaker {
c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
if (best->Update((float)loss_chg, fid, hist.cut[i-1], true)) {
if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i-1], true)) {
*left_sum = c;
}
}
@ -219,19 +222,19 @@ class HistMaker: public BaseMaker {
std::vector<TStats> left_sum(qexpand.size());
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
utils::Assert(node2workindex[nid] == static_cast<int>(wid),
"node2workindex inconsistent");
SplitEntry &best = sol[wid];
TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
for (size_t i = 0; i < fset.size(); ++ i) {
for (size_t i = 0; i < fset.size(); ++i) {
EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)],
node_sum, fset[i], &best, &left_sum[wid]);
}
}
// get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
const SplitEntry &best = sol[wid];
const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
@ -394,7 +397,8 @@ class CQHistMaker: public HistMaker<TStats> {
#if __cplusplus >= 201103L
auto lazy_get_summary = [&]()
#endif
{// get smmary
{
// get smmary
thread_sketch.resize(this->get_nthread());
// number of rows in
const size_t nrows = p_fmat->buffered_rowset().size();
@ -670,7 +674,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
}
if (this->node2workindex[nid] < 0) {
this->position[ridx] = ~nid;
} else{
} else {
for (bst_uint j = 0; j < inst.length; ++j) {
builder.AddBudget(inst[j].index, omp_get_thread_num());
}

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_prune-inl.hpp
* \brief prune a tree given the statistics
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
#include <vector>
#include "./param.h"
#include "./updater.h"
@ -37,9 +39,10 @@ class TreePruner: public IUpdater {
param.learning_rate = lr;
syncher.Update(gpair, p_fmat, info, trees);
}
private:
// try to prune off current leaf
inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) {
inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
if (tree[nid].is_root()) return npruned;
int pid = tree[nid].parent();
RegTree::NodeStat &s = tree.stat(pid);
@ -54,7 +57,7 @@ class TreePruner: public IUpdater {
}
}
/*! \brief do prunning of a tree */
inline void DoPrune(RegTree &tree) {
inline void DoPrune(RegTree &tree) { // NOLINT(*)
int npruned = 0;
// initialize auxiliary statistics
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_refresh-inl.hpp
* \brief refresh the statistics and leaf value on the tree on the dataset
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
#include <vector>
#include <limits>
#include "../sync/sync.h"

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_skmaker-inl.hpp
* \brief use approximation sketch to construct a tree,
a refresh is needed to make the statistics exactly correct
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
#include <vector>
#include <algorithm>
#include "../sync/sync.h"
@ -81,7 +83,7 @@ class SketchMaker: public BaseMaker {
double neg_grad;
/*! \brief sum of hessian statistics */
double sum_hess;
explicit SKStats(void) {}
SKStats(void) {}
// constructor
explicit SKStats(const TrainParam &param) {
this->Clear();
@ -123,7 +125,7 @@ class SketchMaker: public BaseMaker {
sum_hess += b.sum_hess;
}
/*! \brief same as add, reduce is used in All Reduce */
inline static void Reduce(SKStats &a, const SKStats &b) {
inline static void Reduce(SKStats &a, const SKStats &b) { // NOLINT(*)
a.Add(b);
}
/*! \brief set leaf vector value based on statistics */
@ -217,7 +219,9 @@ class SketchMaker: public BaseMaker {
for (size_t i = 0; i < this->qexpand.size(); ++i) {
const int nid = this->qexpand[i];
for (int k = 0; k < 3; ++k) {
sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, static_cast<bst_float>(sbuilder[3 * nid + k].sum_total));
sbuilder[3 * nid + k].sketch->Push(c[0].fvalue,
static_cast<bst_float>(
sbuilder[3 * nid + k].sum_total));
}
}
return;
@ -272,12 +276,12 @@ class SketchMaker: public BaseMaker {
std::vector<SplitEntry> sol(qexpand.size());
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
utils::Assert(node2workindex[nid] == static_cast<int>(wid),
"node2workindex inconsistent");
SplitEntry &best = sol[wid];
for (bst_uint fid = 0; fid < num_feature; ++ fid) {
for (bst_uint fid = 0; fid < num_feature; ++fid) {
unsigned base = (wid * p_tree->param.num_feature + fid) * 3;
EnumerateSplit(summary_array[base + 0],
summary_array[base + 1],
@ -286,7 +290,7 @@ class SketchMaker: public BaseMaker {
}
}
// get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid];
const SplitEntry &best = sol[wid];
// set up the values
@ -361,7 +365,8 @@ class SketchMaker: public BaseMaker {
best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], true);
}
}
{// all including
{
// all including
SKStats s = feat_sum, c;
c.SetSubstract(node_sum, s);
if (s.sum_hess >= param.min_child_weight &&
@ -389,6 +394,6 @@ class SketchMaker: public BaseMaker {
// per node, per feature sketch
std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
};
} // tree
} // xgboost
#endif
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_

View File

@ -1,11 +1,14 @@
#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
/*!
* Copyright 2014 by Contributors
* \file updater_sync-inl.hpp
* \brief synchronize the tree in all distributed nodes
* \author Tianqi Chen
*/
#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
#include <vector>
#include <string>
#include <limits>
#include "../sync/sync.h"
#include "./updater.h"

View File

@ -1,13 +1,16 @@
#ifndef XGBOOST_UTILS_BASE64_INL_H_
#define XGBOOST_UTILS_BASE64_INL_H_
/*!
* Copyright 2014 by Contributors
* \file base64.h
* \brief data stream support to input and output from/to base64 stream
* base64 is easier to store and pass as text format in mapreduce
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_BASE64_INL_H_
#define XGBOOST_UTILS_BASE64_INL_H_
#include <cctype>
#include <cstdio>
#include <string>
#include "./io.h"
namespace xgboost {
@ -15,7 +18,7 @@ namespace utils {
/*! \brief buffer reader of the stream that allows you to get */
class StreamBufferReader {
public:
StreamBufferReader(size_t buffer_size)
explicit StreamBufferReader(size_t buffer_size)
:stream_(NULL),
read_len_(1), read_ptr_(1) {
buffer_.resize(buffer_size);
@ -75,7 +78,7 @@ const char DecodeTable[] = {
};
static const char EncodeTable[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
} // namespace base64
} // namespace base64
/*! \brief the stream that reads from base64, note we take from file pointers */
class Base64InStream: public IStream {
public:
@ -132,19 +135,19 @@ class Base64InStream: public IStream {
{
// second byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
nvalue |= DecodeTable[tmp_ch] << 12;
*cptr++ = (nvalue >> 16) & 0xFF; --tlen;
}
{
// third byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
// handle termination
if (tmp_ch == '=') {
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format");
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
break;
}
nvalue |= DecodeTable[tmp_ch] << 6;
@ -157,10 +160,10 @@ class Base64InStream: public IStream {
{
// fourth byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
if (tmp_ch == '=') {
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
"invalid base64 format");
"invalid base64 format");
break;
}
nvalue |= DecodeTable[tmp_ch];
@ -246,7 +249,7 @@ class Base64OutStream: public IStream {
int buf_top;
unsigned char buf[4];
std::string out_buf;
const static size_t kBufferSize = 256;
static const size_t kBufferSize = 256;
inline void PutChar(char ch) {
out_buf += ch;
@ -260,5 +263,5 @@ class Base64OutStream: public IStream {
}
};
} // namespace utils
} // namespace rabit
#endif // RABIT_LEARN_UTILS_BASE64_INL_H_
} // namespace xgboost
#endif // XGBOOST_UTILS_BASE64_INL_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_UTILS_BITMAP_H_
#define XGBOOST_UTILS_BITMAP_H_
/*!
* Copyright 2014 by Contributors
* \file bitmap.h
* \brief a simple implement of bitmap
* NOTE: bitmap is only threadsafe per word access, remember this when using bitmap
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_BITMAP_H_
#define XGBOOST_UTILS_BITMAP_H_
#include <vector>
#include "./utils.h"
#include "./omp.h"
@ -63,4 +65,4 @@ struct BitMap {
};
} // namespace utils
} // namespace xgboost
#endif
#endif // XGBOOST_UTILS_BITMAP_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_CONFIG_H_
#define XGBOOST_UTILS_CONFIG_H_
/*!
* Copyright 2014 by Contributors
* \file config.h
* \brief helper class to load in configures from file
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_CONFIG_H_
#define XGBOOST_UTILS_CONFIG_H_
#include <cstdio>
#include <cstring>
#include <string>

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_FMAP_H_
#define XGBOOST_UTILS_FMAP_H_
/*!
* Copyright 2014 by Contributors
* \file fmap.h
* \brief helper class that holds the feature names and interpretations
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_FMAP_H_
#define XGBOOST_UTILS_FMAP_H_
#include <vector>
#include <string>
#include <cstring>
@ -78,4 +80,4 @@ class FeatMap {
} // namespace utils
} // namespace xgboost
#endif // XGBOOST_FMAP_H_
#endif // XGBOOST_UTILS_FMAP_H_

View File

@ -1,6 +1,5 @@
#ifndef XGBOOST_UTILS_GROUP_DATA_H_
#define XGBOOST_UTILS_GROUP_DATA_H_
/*!
* Copyright 2014 by Contributors
* \file group_data.h
* \brief this file defines utils to group data by integer keys
* Input: given input sequence (key,value), (k1,v1), (k2,v2)
@ -12,6 +11,11 @@
* The major algorithm is a two pass linear scan algorithm that requires two pass scan over the data
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_GROUP_DATA_H_
#define XGBOOST_UTILS_GROUP_DATA_H_
#include <vector>
namespace xgboost {
namespace utils {
/*!
@ -107,5 +111,4 @@ struct ParallelGroupBuilder {
};
} // namespace utils
} // namespace xgboost
#endif
#endif // XGBOOST_UTILS_GROUP_DATA_H_

View File

@ -1,16 +1,19 @@
#ifndef XGBOOST_UTILS_IO_H
#define XGBOOST_UTILS_IO_H
/*!
* Copyright 2014 by Contributors
* \file io.h
* \brief general stream interface for serialization, I/O
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_IO_H_
#define XGBOOST_UTILS_IO_H_
#include <cstdio>
#include <vector>
#include <string>
#include <cstring>
#include "./utils.h"
#include "../sync/sync.h"
/*!
* \file io.h
* \brief general stream interface for serialization, I/O
* \author Tianqi Chen
*/
namespace xgboost {
namespace utils {
// reuse the definitions of streams
@ -23,7 +26,7 @@ typedef rabit::utils::MemoryBufferStream MemoryBufferStream;
class FileStream : public ISeekStream {
public:
explicit FileStream(std::FILE *fp) : fp(fp) {}
explicit FileStream(void) {
FileStream(void) {
this->fp = NULL;
}
virtual size_t Read(void *ptr, size_t size) {
@ -33,7 +36,7 @@ class FileStream : public ISeekStream {
std::fwrite(ptr, size, 1, fp);
}
virtual void Seek(size_t pos) {
std::fseek(fp, static_cast<long>(pos), SEEK_SET);
std::fseek(fp, static_cast<long>(pos), SEEK_SET); // NOLINT(*)
}
virtual size_t Tell(void) {
return std::ftell(fp);
@ -42,7 +45,7 @@ class FileStream : public ISeekStream {
return std::feof(fp) != 0;
}
inline void Close(void) {
if (fp != NULL){
if (fp != NULL) {
std::fclose(fp); fp = NULL;
}
}
@ -52,6 +55,5 @@ class FileStream : public ISeekStream {
};
} // namespace utils
} // namespace xgboost
#include "./base64-inl.h"
#endif
#endif // XGBOOST_UTILS_IO_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_UTILS_ITERATOR_H
#define XGBOOST_UTILS_ITERATOR_H
#include <cstdio>
/*!
* Copyright 2014 by Contributors
* \file iterator.h
* \brief itertator interface
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_ITERATOR_H_
#define XGBOOST_UTILS_ITERATOR_H_
#include <cstdio>
namespace xgboost {
namespace utils {
/*!
@ -36,5 +38,5 @@ class IIterator {
} // namespace utils
} // namespace xgboost
#endif
#endif // XGBOOST_UTILS_ITERATOR_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_MATH_H_
#define XGBOOST_UTILS_MATH_H_
/*!
* Copyright 2014 by Contributors
* \file math.h
* \brief support additional math
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_MATH_H_
#define XGBOOST_UTILS_MATH_H_
#include <cmath>
namespace xgboost {
@ -28,7 +30,8 @@ inline T LogGamma(T v) {
#if _MSC_VER >= 1800
return lgamma(v);
#else
#pragma message ("Warning: lgamma function was not available until VS2013, poisson regression will be disabled")
#pragma message("Warning: lgamma function was not available until VS2013"\
", poisson regression will be disabled")
utils::Error("lgamma function was not available until VS2013");
return static_cast<T>(1.0);
#endif

View File

@ -1,16 +1,20 @@
#ifndef XGBOOST_UTILS_OMP_H_
#define XGBOOST_UTILS_OMP_H_
/*!
* Copyright 2014 by Contributors
* \file omp.h
* \brief header to handle OpenMP compatibility issues
* \author Tianqi Chen
*/
#ifndef XGBOOST_UTILS_OMP_H_
#define XGBOOST_UTILS_OMP_H_
#if defined(_OPENMP)
#include <omp.h>
#else
#ifndef DISABLE_OPENMP
// use pragma message instead of warning
#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading")
#pragma message("Warning: OpenMP is not available,"\
"xgboost will be compiled into single-thread code."\
"Use OpenMP-enabled compiler to get benefit of multi-threading")
#endif
inline int omp_get_thread_num() { return 0; }
inline int omp_get_num_threads() { return 1; }
@ -25,6 +29,6 @@ typedef int bst_omp_uint;
#else
typedef unsigned bst_omp_uint;
#endif
} // namespace xgboost
} // namespace xgboost
#endif // XGBOOST_UTILS_OMP_H_

Some files were not shown because too many files have changed in this diff Show More