t push origin unityMerge branch 'master' into unity

This commit is contained in:
tqchen 2014-09-02 11:22:57 -07:00
commit 76c513b191
58 changed files with 481 additions and 289 deletions

5
.gitignore vendored
View File

@ -6,12 +6,15 @@
# Compiled Dynamic libraries
*.so
*.dylib
*.page
# Compiled Static libraries
*.lai
*.la
*.a
*~
*.Rcheck
*.rds
*.tar.gz
*txt*
*conf
*buffer

View File

@ -15,7 +15,7 @@ BIN = xgboost
OBJ = updater.o gbm.o io.o
SLIB = wrapper/libxgboostwrapper.so
.PHONY: clean all python
.PHONY: clean all python Rpack
all: $(BIN) $(OBJ) $(SLIB)
@ -40,19 +40,25 @@ $(OBJ) :
install:
cp -f -r $(BIN) $(INSTALL_PATH)
R-package.tar.gz:
rm -rf xgboost-R
cp -r R-package xgboost-R
rm -rf xgboost-R/src/*.o xgboost-R/src/*.so xgboost-R/src/*.dll
cp -r src xgboost-R/src/src
mkdir xgboost-R/src/wrapper
cp wrapper/xgboost_wrapper.h xgboost-R/src/wrapper
cp wrapper/xgboost_wrapper.cpp xgboost-R/src/wrapper
cp ./LICENSE xgboost-R
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost-R/src/Makevars.win
tar czf $@ xgboost-R
rm -rf xgboost-R
Rpack:
make clean
rm -rf xgboost xgboost*.tar.gz
cp -r R-package xgboost
rm -rf xgboost/inst/examples/*.buffer
rm -rf xgboost/inst/examples/*.model
rm -rf xgboost/inst/examples/dump*
rm -rf xgboost/src/*.o xgboost/src/*.so xgboost/src/*.dll
rm -rf xgboost/demo/*.model xgboost/demo/*.buffer
cp -r src xgboost/src/src
mkdir xgboost/src/wrapper
cp wrapper/xgboost_wrapper.h xgboost/src/wrapper
cp wrapper/xgboost_wrapper.cpp xgboost/src/wrapper
cp ./LICENSE xgboost
cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win
R CMD build xgboost
rm -rf xgboost
R CMD check --as-cran xgboost*.tar.gz
clean:
$(RM) $(OBJ) $(BIN) $(SLIB) *.o *~ */*~ */*/*~
$(RM) $(OBJ) $(BIN) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~

View File

@ -1,12 +1,20 @@
Package: xgboost
Type: Package
Title: R wrapper of xgboost
Title: eXtreme Gradient Boosting
Version: 0.3-0
Date: 2014-08-23
Author: Tianqi Chen, Tong He
Maintainer: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
Description: xgboost
License: file LICENSE
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
Maintainer: Tong He <hetong007@gmail.com>
Description: This package is a R wrapper of xgboost, which is short for eXtreme
Gradient Boosting. It is an efficient and scalable implementation of
gradient boosting framework. The package includes efficient linear model
solver and tree learning algorithm. The package can automatically do
parallel computation with OpenMP, and it can be more than 10 times faster
than existing gradient boosting packages such as gbm. It supports various
objective functions, including regression, classification and ranking. The
package is made to be extensible, so that user are also allowed to define
their own objectives easily.
License: Apache License (== 2.0) | file LICENSE
URL: https://github.com/tqchen/xgboost
BugReports: https://github.com/tqchen/xgboost/issues
Depends:

View File

@ -10,5 +10,6 @@ export(xgb.save)
export(xgb.train)
export(xgboost)
exportMethods(predict)
import(methods)
importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgeMatrix)

View File

@ -4,20 +4,23 @@ setClass('xgb.DMatrix')
#'
#' Get information of an xgb.DMatrix object
#'
#' @param object Object of class "xgb.DMatrix"
#' @param name the name of the field to get
#'
#' @examples
#' data(iris)
#' iris[,5] <- as.numeric(iris[,5])
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' labels <- getinfo(dtrain, "label")
#' @rdname getinfo
#' @export
#'
getinfo <- function(object, ...){
UseMethod("getinfo")
}
#' @param object Object of class "xgb.DMatrix"
#' @param name the name of the field to get
#' @param ... other parameters
#' @rdname getinfo
#' @method getinfo xgb.DMatrix
setMethod("getinfo", signature = "xgb.DMatrix",
definition = function(object, name) {
if (typeof(name) != "character") {

View File

@ -11,7 +11,8 @@ setClass("xgb.Booster")
#' value of sum of functions, when outputmargin=TRUE, the prediction is
#' untransformed margin value. In logistic regression, outputmargin=T will
#' output value before logistic transformation.
#'
#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
#' set it to be value bigger than 0. It will use all trees by default.
#' @examples
#' data(iris)
#' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
@ -19,11 +20,18 @@ setClass("xgb.Booster")
#' @export
#'
setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, outputmargin = FALSE) {
definition = function(object, newdata, outputmargin = FALSE, ntreelimit = NULL) {
if (class(newdata) != "xgb.DMatrix") {
newdata <- xgb.DMatrix(newdata)
}
ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), PACKAGE = "xgboost")
if (is.null(ntreelimit)) {
ntreelimit <- 0
} else {
if (ntreelimit < 1){
stop("predict: ntreelimit must be equal to or greater than 1")
}
}
ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), as.integer(ntreelimit), PACKAGE = "xgboost")
return(ret)
})

View File

@ -6,22 +6,25 @@ setClass('xgb.DMatrix')
#' Get a new DMatrix containing the specified rows of
#' orginal xgb.DMatrix object
#'
#' @param object Object of class "xgb.DMatrix"
#' @param idxset a integer vector of indices of rows needed
#'
#' @examples
#' data(iris)
#' iris[,5] <- as.numeric(iris[,5])
#' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5])
#' dsub <- slice(dtrain, 1:3)
#' @rdname slice
#' @export
#'
slice <- function(object, ...){
UseMethod("slice")
}
#' @param object Object of class "xgb.DMatrix"
#' @param idxset a integer vector of indices of rows needed
#' @param ... other parameters
#' @rdname slice
#' @method slice xgb.DMatrix
setMethod("slice", signature = "xgb.DMatrix",
definition = function(object, idxset) {
definition = function(object, idxset, ...) {
if (class(object) != "xgb.DMatrix") {
stop("slice: first argument dtrain must be xgb.DMatrix")
}

View File

@ -1,4 +1,5 @@
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
#' @import methods
# depends on matrix
.onLoad <- function(libname, pkgname) {
@ -48,7 +49,6 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
}
}
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE = "xgboost")
if (length(params) != 0) {
for (i in 1:length(params)) {
p <- params[i]
@ -121,8 +121,8 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
stop("xgb.eval: watch list can only contain xgb.DMatrix")
}
}
evnames <- list()
if (length(watchlist) != 0) {
evnames <- list()
for (i in 1:length(watchlist)) {
w <- watchlist[i]
if (length(names(w)) == 0) {
@ -130,8 +130,10 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
}
evnames <- append(evnames, names(w))
}
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
evnames, PACKAGE = "xgboost")
} else {
msg <- ""
}
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
evnames, PACKAGE = "xgboost")
return(msg)
}

View File

@ -2,7 +2,7 @@
#'
#' Save xgb.DMatrix object to binary file
#'
#' @param model the model object.
#' @param DMatrix the model object.
#' @param fname the name of the binary file.
#'
#' @examples

View File

@ -4,9 +4,12 @@
#'
#' @param model the model object.
#' @param fname the name of the binary file.
#' @param fmap feature map file representing the type of feature, to make it
#' look nice, run demo/demo.R for result and demo/featmap.txt for example
#' Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model
#' @param fmap feature map file representing the type of feature.
#' Detailed description could be found at
#' \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
#' Run inst/examples/demo.R for the result and inst/examples/featmap.txt
#' for example Format.
#'
#'
#' @examples
#' data(iris)

View File

@ -15,7 +15,7 @@
#' }
#'
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
#' further details. See also demo/demo.R for walkthrough example in R.
#' further details. See also inst/examples/demo.R for walkthrough example in R.
#' @param dtrain takes an \code{xgb.DMatrix} as the input.
#' @param nrounds the max number of iterations
#' @param watchlist what information should be printed when \code{verbose=1} or
@ -24,10 +24,11 @@
#' watchlist=list(validation1=mat1, validation2=mat2) to watch
#' the performance of each round's model on mat1 and mat2
#'
#' @param obj customized objective function. Given prediction and dtrain,
#' return gradient and second order gradient.
#' @param feval custimized evaluation function. Given prediction and dtrain,
#' return a \code{list(metric='metric-name', value='metric-value')}.
#' @param obj customized objective function. Returns gradient and second order
#' gradient with given prediction and dtrain,
#' @param feval custimized evaluation function. Returns
#' \code{list(metric='metric-name', value='metric-value')} with given
#' prediction and dtrain,
#' @param ... other parameters to pass to \code{params}.
#'
#' @details

View File

@ -19,7 +19,7 @@
#' }
#'
#' See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
#' further details. See also demo/demo.R for walkthrough example in R.
#' further details. See also inst/examples/demo.R for walkthrough example in R.
#' @param nrounds the max number of iterations
#' @param verbose If 0, xgboost will stay silent. If 1, xgboost will print
#' information of performance. If 2, xgboost will print information of both

View File

@ -1,10 +1,21 @@
This is subfolder for experimental version of R package.
# R package for xgboost.
Installation:
## Installation
For up-to-date version(which is recommended), please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
```r
require(devtools)
install_github('xgboost','tqchen',subdir='R-package')
```
Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/demo/demo.R) for more details.
For stable version on CRAN, please run
```r
install.packages('xgboost')
```
## Examples
* Please visit [demo](https://github.com/tqchen/xgboost/blob/master/R-package/inst/examples/demo.R) for walk throughe example.
* See also the [example scripts](https://github.com/tqchen/xgboost/tree/master/demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R) on this dataset.

View File

@ -1 +0,0 @@
demo R code for xgboost usages on agaricus data

View File

@ -1,14 +1,20 @@
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{methods}
\name{getinfo}
\alias{getinfo}
\alias{getinfo,xgb.DMatrix-method}
\title{Get information of an xgb.DMatrix object}
\usage{
getinfo(object, ...)
\S4method{getinfo}{xgb.DMatrix}(object, name)
}
\arguments{
\item{object}{Object of class "xgb.DMatrix"}
\item{name}{the name of the field to get}
\item{...}{other parameters}
}
\description{
Get information of an xgb.DMatrix object

View File

@ -4,7 +4,8 @@
\alias{predict,xgb.Booster-method}
\title{Predict method for eXtreme Gradient Boosting model}
\usage{
\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE)
\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE,
ntreelimit = NULL)
}
\arguments{
\item{object}{Object of class "xgb.Boost"}
@ -13,9 +14,12 @@
\code{xgb.DMatrix}.}
\item{outputmargin}{whether the prediction should be shown in the original
value of sum of functions, when outputmargin=TRUE, the prediction is
untransformed margin value. In logistic regression, outputmargin=T will
output value before logistic transformation.}
value of sum of functions, when outputmargin=TRUE, the prediction is
untransformed margin value. In logistic regression, outputmargin=T will
output value before logistic transformation.}
\item{ntreelimit}{limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
set it to be value bigger than 0. It will use all trees by default.}
}
\description{
Predicted values based on xgboost model object.

View File

@ -1,15 +1,21 @@
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{methods}
\name{slice}
\alias{slice}
\alias{slice,xgb.DMatrix-method}
\title{Get a new DMatrix containing the specified rows of
orginal xgb.DMatrix object}
\usage{
slice(object, ...)
\S4method{slice}{xgb.DMatrix}(object, idxset, ...)
}
\arguments{
\item{object}{Object of class "xgb.DMatrix"}
\item{idxset}{a integer vector of indices of rows needed}
\item{...}{other parameters}
}
\description{
Get a new DMatrix containing the specified rows of

View File

@ -6,7 +6,7 @@
xgb.DMatrix.save(DMatrix, fname)
}
\arguments{
\item{model}{the model object.}
\item{DMatrix}{the model object.}
\item{fname}{the name of the binary file.}
}

View File

@ -10,9 +10,11 @@ xgb.dump(model, fname, fmap = "")
\item{fname}{the name of the binary file.}
\item{fmap}{feature map file representing the type of feature, to make it
look nice, run demo/demo.R for result and demo/featmap.txt for example
Format: https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}
\item{fmap}{feature map file representing the type of feature.
Detailed description could be found at
\url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
Run inst/examples/demo.R for the result and inst/examples/featmap.txt
for example Format.}
}
\description{
Save a xgboost model to text file. Could be parsed later.

View File

@ -20,7 +20,7 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
}
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
further details. See also demo/demo.R for walkthrough example in R.}
further details. See also inst/examples/demo.R for walkthrough example in R.}
\item{dtrain}{takes an \code{xgb.DMatrix} as the input.}
@ -32,11 +32,12 @@ xgb.train(params = list(), dtrain, nrounds, watchlist = list(),
watchlist=list(validation1=mat1, validation2=mat2) to watch
the performance of each round's model on mat1 and mat2}
\item{obj}{customized objective function. Given prediction and dtrain,
return gradient and second order gradient.}
\item{obj}{customized objective function. Returns gradient and second order
gradient with given prediction and dtrain,}
\item{feval}{custimized evaluation function. Given prediction and dtrain,
return a \code{list(metric='metric-name', value='metric-value')}.}
\item{feval}{custimized evaluation function. Returns
\code{list(metric='metric-name', value='metric-value')} with given
prediction and dtrain,}
\item{...}{other parameters to pass to \code{params}.}
}

View File

@ -25,7 +25,7 @@ xgboost(data = NULL, label = NULL, params = list(), nrounds,
}
See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for
further details. See also demo/demo.R for walkthrough example in R.}
further details. See also inst/examples/demo.R for walkthrough example in R.}
\item{nrounds}{the max number of iterations}

View File

@ -1,32 +1,7 @@
# package root
PKGROOT=../../
# _*_ mode: Makefile; _*_
CXX=`R CMD config CXX`
TCFLAGS=`R CMD config CFLAGS`
# expose these flags to R CMD SHLIB
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS)
PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
ifeq ($(no_omp),1)
PKG_CPPFLAGS += -DDISABLE_OPENMP
endif
CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
OBJECTS= xgboost_R.o $(CXXOBJ)
.PHONY: all clean
all: $(SHLIB)
$(SHLIB): $(OBJECTS)
xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
xgboost_io.o: $(PKGROOT)/src/io/io.cpp
xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
$(CXXOBJ) :
$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
clean:
rm -rf *.so *.o *~ *.dll
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o

View File

@ -1,33 +1,7 @@
# package root
PKGROOT=../../
# _*_ mode: Makefile; _*_
CXX=`Rcmd config CXX`
TCFLAGS=`Rcmd config CFLAGS`
# expose these flags to R CMD SHLIB
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS)
PKG_CPPFLAGS+= $(SHLIB_PTHREAD_FLAGS)
XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
ifeq ($(no_omp),1)
PKG_CPPFLAGS += -DDISABLE_OPENMP
endif
CXXOBJ= xgboost_wrapper.o xgboost_io.o xgboost_gbm.o xgboost_updater.o
OBJECTS= xgboost_R.o $(CXXOBJ)
.PHONY: all clean
all: $(SHLIB)
$(SHLIB): $(OBJECTS)
xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp
xgboost_io.o: $(PKGROOT)/src/io/io.cpp
xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp
xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp
$(CXXOBJ) :
$(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) )
clean:
rm -rf *.so *.o *~ *.dll
PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -I$(PKGROOT)
PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o

View File

@ -2,25 +2,55 @@
#include <string>
#include <utility>
#include <cstring>
#include <cstdio>
#include "xgboost_R.h"
#include "wrapper/xgboost_wrapper.h"
#include "src/utils/utils.h"
#include "src/utils/omp.h"
#include "src/utils/matrix_csr.h"
using namespace std;
using namespace xgboost;
extern "C" {
void XGBoostAssert_R(int exp, const char *fmt, ...);
void XGBoostCheck_R(int exp, const char *fmt, ...);
int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...);
}
// implements error handling
namespace xgboost {
namespace utils {
void HandleAssertError(const char *msg) {
error("%s", msg);
}
void HandleCheckError(const char *msg) {
error("%s", msg);
extern "C" {
void (*Printf)(const char *fmt, ...) = Rprintf;
int (*SPrintf)(char *buf, size_t size, const char *fmt, ...) = XGBoostSPrintf_R;
void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R;
void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R;
void (*Error)(const char *fmt, ...) = error;
}
} // namespace utils
namespace random {
void Seed(unsigned seed) {
warning("parameter seed is ignored, please set random seed using set.seed");
}
double Uniform(void) {
return unif_rand();
}
double Normal(void) {
return norm_rand();
}
} // namespace random
} // namespace xgboost
// call before wrapper starts
inline void _WrapperBegin(void) {
GetRNGstate();
}
// call after wrapper starts
inline void _WrapperEnd(void) {
PutRNGstate();
}
extern "C" {
void _DMatrixFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return;
@ -28,14 +58,17 @@ extern "C" {
R_ClearExternalPtr(ext);
}
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
_WrapperBegin();
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1);
_WrapperEnd();
return ret;
}
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
SEXP missing) {
_WrapperBegin();
SEXP dim = getAttrib(mat, R_DimSymbol);
int nrow = INTEGER(dim)[0];
int ncol = INTEGER(dim)[1];
@ -47,15 +80,17 @@ extern "C" {
data[i * ncol +j] = din[i + nrow * j];
}
}
void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing));
void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1);
_WrapperEnd();
return ret;
}
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
SEXP indices,
SEXP data) {
_WrapperBegin();
const int *col_ptr = INTEGER(indptr);
const int *row_index = INTEGER(indices);
const double *col_data = REAL(data);
@ -85,29 +120,36 @@ extern "C" {
col_index[i] = csr_data[i].first;
row_data[i] = csr_data[i].second;
}
void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata );
void *handle = XGDMatrixCreateFromCSR(BeginPtr(row_ptr), BeginPtr(col_index),
BeginPtr(row_data), row_ptr.size(), ndata );
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1);
_WrapperEnd();
return ret;
}
SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
_WrapperBegin();
int len = length(idxset);
std::vector<int> idxvec(len);
for (int i = 0; i < len; ++i) {
idxvec[i] = INTEGER(idxset)[i] - 1;
}
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), &idxvec[0], len);
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1);
_WrapperEnd();
return ret;
}
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
_WrapperBegin();
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
CHAR(asChar(fname)), asInteger(silent));
_WrapperEnd();
}
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
_WrapperBegin();
int len = length(array);
const char *name = CHAR(asChar(field));
if (!strcmp("group", name)) {
@ -116,7 +158,8 @@ extern "C" {
for (int i = 0; i < len; ++i) {
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
}
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len);
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
_WrapperEnd();
return;
}
{
@ -127,10 +170,12 @@ extern "C" {
}
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)),
&vec[0], len);
BeginPtr(vec), len);
}
_WrapperEnd();
}
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
_WrapperBegin();
bst_ulong olen;
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)), &olen);
@ -139,6 +184,7 @@ extern "C" {
REAL(ret)[i] = res[i];
}
UNPROTECT(1);
_WrapperEnd();
return ret;
}
// functions related to booster
@ -148,28 +194,35 @@ extern "C" {
R_ClearExternalPtr(ext);
}
SEXP XGBoosterCreate_R(SEXP dmats) {
_WrapperBegin();
int len = length(dmats);
std::vector<void*> dvec;
for (int i = 0; i < len; ++i){
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
}
void *handle = XGBoosterCreate(&dvec[0], dvec.size());
void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
UNPROTECT(1);
_WrapperEnd();
return ret;
}
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
_WrapperBegin();
XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)),
CHAR(asChar(val)));
_WrapperEnd();
}
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
_WrapperBegin();
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
R_ExternalPtrAddr(dtrain));
_WrapperEnd();
}
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
_WrapperBegin();
utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
int len = length(grad);
std::vector<float> tgrad(len), thess(len);
@ -180,9 +233,11 @@ extern "C" {
}
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dtrain),
&tgrad[0], &thess[0], len);
BeginPtr(tgrad), BeginPtr(thess), len);
_WrapperEnd();
}
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
_WrapperBegin();
utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
int len = length(dmats);
std::vector<void*> vec_dmats;
@ -197,28 +252,37 @@ extern "C" {
}
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
&vec_dmats[0], &vec_sptr[0], len));
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len));
_WrapperEnd();
}
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) {
_WrapperBegin();
bst_ulong olen;
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat),
asInteger(output_margin),
asInteger(ntree_limit),
&olen);
SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) {
REAL(ret)[i] = res[i];
}
UNPROTECT(1);
_WrapperEnd();
return ret;
}
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
_WrapperBegin();
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
_WrapperEnd();
}
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
_WrapperBegin();
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
_WrapperEnd();
}
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
_WrapperBegin();
bst_ulong olen;
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)),
@ -229,5 +293,6 @@ extern "C" {
fprintf(fo, "%s", res[i]);
}
fclose(fo);
_WrapperEnd();
}
}

View File

@ -7,6 +7,7 @@
*/
extern "C" {
#include <Rinternals.h>
#include <R_ext/Random.h>
}
extern "C" {
@ -106,8 +107,9 @@ extern "C" {
* \param handle handle
* \param dmat data matrix
* \param output_margin whether only output raw margin value
* \param ntree_limit limit number of trees used in prediction
*/
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit);
/*!
* \brief load model from existing file
* \param handle handle

View File

@ -0,0 +1,33 @@
#include <stdio.h>
#include <stdarg.h>
#include <Rinternals.h>
// implements error handling
void XGBoostAssert_R(int exp, const char *fmt, ...) {
char buf[1024];
if (exp == 0) {
va_list args;
va_start(args, fmt);
vsprintf(buf, fmt, args);
va_end(args);
error("AssertError:%s\n", buf);
}
}
void XGBoostCheck_R(int exp, const char *fmt, ...) {
char buf[1024];
if (exp == 0) {
va_list args;
va_start(args, fmt);
vsprintf(buf, fmt, args);
va_end(args);
error("%s\n", buf);
}
}
int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) {
int ret;
va_list args;
va_start(args, fmt);
ret = vsnprintf(buf, size, fmt, args);
va_end(args);
return ret;
}

View File

@ -173,7 +173,7 @@ objective function.
We also have \verb@slice@ for row extraction. It is useful in
cross-validation.
For a walkthrough demo, please see \verb@R-package/demo/demo.R@ for further
For a walkthrough demo, please see \verb@R-package/inst/examples/demo.R@ for further
details.
\section{The Higgs Boson competition}

View File

@ -35,11 +35,11 @@ Version
======
* This version xgboost-0.3, the code has been refactored from 0.2x to be cleaner and more flexibility
* This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure
- This means the model and buffer file of previous version can not be loaded in xgboost-unity
- This means the model and buffer file of previous version can not be loaded in xgboost-3.0
* For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22)
* Change log in [CHANGES.md](CHANGES.md)
XGBoost in Graphlab Create
======
* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of big data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html
* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to data manipulation, graph processing, hyper-parameter search, and visualization of TeraBytes scale data in one framework. Try the Graphlab Create in http://graphlab.com/products/create/quick-start-guide.html
* Nice blogpost by Jay Gu using GLC boosted tree to solve kaggle bike sharing challenge: http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand

View File

@ -105,7 +105,10 @@ class GBLinear : public IGradBooster {
virtual void Predict(IFMatrix *p_fmat,
int64_t buffer_offset,
const BoosterInfo &info,
std::vector<float> *out_preds) {
std::vector<float> *out_preds,
unsigned ntree_limit = 0) {
utils::Check(ntree_limit == 0,
"GBLinear::Predict ntrees is only valid for gbtree predictor");
std::vector<float> &preds = *out_preds;
preds.resize(0);
// start collecting the prediction

View File

@ -1,6 +1,7 @@
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#include <cstring>
using namespace std;
#include "./gbm.h"
#include "./gbtree-inl.hpp"
#include "./gblinear-inl.hpp"

View File

@ -57,11 +57,14 @@ class IGradBooster {
* the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
* \param info extra side information that may be needed for prediction
* \param out_preds output vector to hold the predictions
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
*/
virtual void Predict(IFMatrix *p_fmat,
int64_t buffer_offset,
const BoosterInfo &info,
std::vector<float> *out_preds) = 0;
std::vector<float> *out_preds,
unsigned ntree_limit = 0) = 0;
/*!
* \brief dump the model in text format
* \param fmap feature map that may help give interpretations of feature

View File

@ -105,7 +105,8 @@ class GBTree : public IGradBooster {
virtual void Predict(IFMatrix *p_fmat,
int64_t buffer_offset,
const BoosterInfo &info,
std::vector<float> *out_preds) {
std::vector<float> *out_preds,
unsigned ntree_limit = 0) {
int nthread;
#pragma omp parallel
{
@ -137,7 +138,8 @@ class GBTree : public IGradBooster {
this->Pred(batch[i],
buffer_offset < 0 ? -1 : buffer_offset + ridx,
gid, info.GetRoot(ridx), &feats,
&preds[ridx * mparam.num_output_group + gid], stride);
&preds[ridx * mparam.num_output_group + gid], stride,
ntree_limit);
}
}
}
@ -212,14 +214,16 @@ class GBTree : public IGradBooster {
int bst_group,
unsigned root_index,
tree::RegTree::FVec *p_feats,
float *out_pred, size_t stride) {
float *out_pred, size_t stride, unsigned ntree_limit) {
size_t itop = 0;
float psum = 0.0f;
// sum of leaf vector
std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
// number of valid trees
unsigned treeleft = ntree_limit == 0 ? std::numeric_limits<unsigned>::max() : ntree_limit;
// load buffered results if any
if (bid >= 0) {
if (bid >= 0 && ntree_limit == 0) {
itop = pred_counter[bid];
psum = pred_buffer[bid];
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
@ -235,12 +239,13 @@ class GBTree : public IGradBooster {
for (int j = 0; j < mparam.size_leaf_vector; ++j) {
vec_psum[j] += trees[i]->leafvec(tid)[j];
}
if(--treeleft == 0) break;
}
}
p_feats->Drop(inst);
}
// updated the buffered results
if (bid >= 0) {
if (bid >= 0 && ntree_limit == 0) {
pred_counter[bid] = static_cast<unsigned>(trees.size());
pred_buffer[bid] = psum;
for (int i = 0; i < mparam.size_leaf_vector; ++i) {

View File

@ -1,6 +1,7 @@
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#include <string>
using namespace std;
#include "./io.h"
#include "../utils/io.h"
#include "../utils/utils.h"

View File

@ -54,8 +54,10 @@ class DMatrixSimple : public DataMatrix {
for (size_t i = 0; i < batch.size; ++i) {
RowBatch::Inst inst = batch[i];
row_data_.resize(row_data_.size() + inst.length);
memcpy(&row_data_[row_ptr_.back()], inst.data,
sizeof(RowBatch::Entry) * inst.length);
if (inst.length != 0) {
memcpy(&row_data_[row_ptr_.back()], inst.data,
sizeof(RowBatch::Entry) * inst.length);
}
row_ptr_.push_back(row_ptr_.back() + inst.length);
}
}
@ -104,10 +106,10 @@ class DMatrixSimple : public DataMatrix {
this->AddRow(feats);
if (!silent) {
printf("%lux%lu matrix with %lu entries is loaded from %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), fname);
utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), fname);
}
fclose(file);
// try to load in additional file
@ -147,26 +149,26 @@ class DMatrixSimple : public DataMatrix {
* \param fname file name, used to print message
*/
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
int magic;
utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
utils::Check(magic == kMagic, "invalid format,magic number mismatch");
int tmagic;
utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
utils::Check(tmagic == kMagic, "invalid format,magic number mismatch");
info.LoadBinary(fs);
FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
fmat_->LoadColAccess(fs);
if (!silent) {
printf("%lux%lu matrix with %lu entries is loaded",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()));
utils::Printf("%lux%lu matrix with %lu entries is loaded",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()));
if (fname != NULL) {
printf(" from %s\n", fname);
utils::Printf(" from %s\n", fname);
} else {
printf("\n");
utils::Printf("\n");
}
if (info.group_ptr.size() != 0) {
printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
utils::Printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
}
}
}
@ -177,8 +179,8 @@ class DMatrixSimple : public DataMatrix {
*/
inline void SaveBinary(const char* fname, bool silent = false) const {
utils::FileStream fs(utils::FopenCheck(fname, "wb"));
int magic = kMagic;
fs.Write(&magic, sizeof(magic));
int tmagic = kMagic;
fs.Write(&tmagic, sizeof(tmagic));
info.SaveBinary(fs);
FMatrixS::SaveBinary(fs, row_ptr_, row_data_);
@ -186,13 +188,13 @@ class DMatrixSimple : public DataMatrix {
fs.Close();
if (!silent) {
printf("%lux%lu matrix with %lu entries is saved to %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), fname);
utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
static_cast<unsigned long>(info.num_row()),
static_cast<unsigned long>(info.num_col()),
static_cast<unsigned long>(row_data_.size()), fname);
if (info.group_ptr.size() != 0) {
printf("data contains %u groups\n",
static_cast<unsigned>(info.group_ptr.size()-1));
utils::Printf("data contains %u groups\n",
static_cast<unsigned>(info.group_ptr.size()-1));
}
}
}
@ -244,8 +246,8 @@ class DMatrixSimple : public DataMatrix {
at_first_ = false;
batch_.size = parent_->row_ptr_.size() - 1;
batch_.base_rowid = 0;
batch_.ind_ptr = &parent_->row_ptr_[0];
batch_.data_ptr = &parent_->row_data_[0];
batch_.ind_ptr = BeginPtr(parent_->row_ptr_);
batch_.data_ptr = BeginPtr(parent_->row_data_);
return true;
}
virtual const RowBatch &Value(void) const {

View File

@ -110,9 +110,9 @@ class FMatrixS : public IFMatrix{
const std::vector<RowBatch::Entry> &data) {
size_t nrow = ptr.size() - 1;
fo.Write(&nrow, sizeof(size_t));
fo.Write(&ptr[0], ptr.size() * sizeof(size_t));
fo.Write(BeginPtr(ptr), ptr.size() * sizeof(size_t));
if (data.size() != 0) {
fo.Write(&data[0], data.size() * sizeof(RowBatch::Entry));
fo.Write(BeginPtr(data), data.size() * sizeof(RowBatch::Entry));
}
}
/*!
@ -127,11 +127,11 @@ class FMatrixS : public IFMatrix{
size_t nrow;
utils::Check(fi.Read(&nrow, sizeof(size_t)) != 0, "invalid input file format");
out_ptr->resize(nrow + 1);
utils::Check(fi.Read(&(*out_ptr)[0], out_ptr->size() * sizeof(size_t)) != 0,
utils::Check(fi.Read(BeginPtr(*out_ptr), out_ptr->size() * sizeof(size_t)) != 0,
"invalid input file format");
out_data->resize(out_ptr->back());
if (out_data->size() != 0) {
utils::Assert(fi.Read(&(*out_data)[0], out_data->size() * sizeof(RowBatch::Entry)) != 0,
utils::Assert(fi.Read(BeginPtr(*out_data), out_data->size() * sizeof(RowBatch::Entry)) != 0,
"invalid input file format");
}
}
@ -213,8 +213,8 @@ class FMatrixS : public IFMatrix{
col_data_[i] = SparseBatch::Inst(&data[0] + ptr[ridx],
static_cast<bst_uint>(ptr[ridx+1] - ptr[ridx]));
}
batch_.col_index = &col_index_[0];
batch_.col_data = &col_data_[0];
batch_.col_index = BeginPtr(col_index_);
batch_.col_data = BeginPtr(col_data_);
this->BeforeFirst();
}
// data content

View File

@ -98,8 +98,8 @@ struct MetaInfo {
group_ptr.push_back(group_ptr.back()+nline);
}
if (!silent) {
printf("%u groups are loaded from %s\n",
static_cast<unsigned>(group_ptr.size()-1), fname);
utils::Printf("%u groups are loaded from %s\n",
static_cast<unsigned>(group_ptr.size()-1), fname);
}
fclose(fi);
return true;
@ -125,15 +125,15 @@ struct MetaInfo {
}
// try to load weight information from file, if exists
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
std::vector<float> &weights = this->GetFloatInfo(field);
std::vector<float> &data = this->GetFloatInfo(field);
FILE *fi = fopen64(fname, "r");
if (fi == NULL) return false;
float wt;
while (fscanf(fi, "%f", &wt) == 1) {
weights.push_back(wt);
data.push_back(wt);
}
if (!silent) {
printf("loading %s from %s\n", field, fname);
utils::Printf("loading %s from %s\n", field, fname);
}
fclose(fi);
return true;

View File

@ -8,8 +8,8 @@
#include <vector>
#include <utility>
#include <string>
#include <climits>
#include <cmath>
#include <climits>
#include <algorithm>
#include "./evaluation.h"
#include "./helper_utils.h"
@ -183,7 +183,7 @@ struct EvalAMS : public IEvaluator {
}
}
if (ntop == ndata) {
fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
utils::Printf("\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
return static_cast<float>(tams);
} else {
return static_cast<float>(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp)));

View File

@ -73,7 +73,7 @@ class EvalSet{
for (size_t i = 0; i < evals_.size(); ++i) {
float res = evals_[i]->Eval(preds, info);
char tmp[1024];
snprintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
utils::SPrintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
result += tmp;
}
return result;

View File

@ -7,6 +7,7 @@
*/
#include <utility>
#include <vector>
#include <cmath>
#include <algorithm>
namespace xgboost {
namespace learner {

View File

@ -63,14 +63,14 @@ class BoostLearner {
}
char str_temp[25];
if (num_feature > mparam.num_feature) {
snprintf(str_temp, sizeof(str_temp), "%u", num_feature);
utils::SPrintf(str_temp, sizeof(str_temp), "%u", num_feature);
this->SetParam("bst:num_feature", str_temp);
}
snprintf(str_temp, sizeof(str_temp), "%lu",
utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
static_cast<unsigned long>(buffer_size));
this->SetParam("num_pbuffer", str_temp);
if (!silent) {
printf("buffer_size=%ld\n", static_cast<long>(buffer_size));
utils::Printf("buffer_size=%ld\n", static_cast<long>(buffer_size));
}
}
/*!
@ -183,7 +183,7 @@ class BoostLearner {
const std::vector<std::string> &evname) {
std::string res;
char tmp[256];
snprintf(tmp, sizeof(tmp), "[%d]", iter);
utils::SPrintf(tmp, sizeof(tmp), "[%d]", iter);
res = tmp;
for (size_t i = 0; i < evals.size(); ++i) {
this->PredictRaw(*evals[i], &preds_);
@ -212,11 +212,14 @@ class BoostLearner {
* \param data input data
* \param output_margin whether to only predict margin value instead of transformed prediction
* \param out_preds output vector that stores the prediction
* \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees
*/
inline void Predict(const DMatrix &data,
bool output_margin,
std::vector<float> *out_preds) const {
this->PredictRaw(data, out_preds);
std::vector<float> *out_preds,
unsigned ntree_limit = 0) const {
this->PredictRaw(data, out_preds, ntree_limit);
if (!output_margin) {
obj_->PredTransform(out_preds);
}
@ -246,11 +249,14 @@ class BoostLearner {
* \brief get un-transformed prediction
* \param data training data matrix
* \param out_preds output vector that stores the prediction
* \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees
*/
inline void PredictRaw(const DMatrix &data,
std::vector<float> *out_preds) const {
std::vector<float> *out_preds,
unsigned ntree_limit = 0) const {
gbm_->Predict(data.fmat(), this->FindBufferOffset(data),
data.info.info, out_preds);
data.info.info, out_preds, ntree_limit);
// add base margin
std::vector<float> &preds = *out_preds;
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());

View File

@ -6,9 +6,9 @@
* \author Tianqi Chen, Kailong Chen
*/
#include <vector>
#include <cmath>
#include <algorithm>
#include <utility>
#include <cmath>
#include <functional>
#include "../data.h"
#include "./objective.h"
@ -37,7 +37,7 @@ struct LossType {
case kLogisticRaw:
case kLinearSquare: return x;
case kLogisticClassify:
case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
case kLogisticNeglik: return 1.0f / (1.0f + std::exp(-x));
default: utils::Error("unknown loss_type"); return 0.0f;
}
}
@ -50,7 +50,7 @@ struct LossType {
inline float FirstOrderGradient(float predt, float label) const {
switch (loss_type) {
case kLinearSquare: return predt - label;
case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
case kLogisticClassify:
case kLogisticNeglik: return predt - label;
default: utils::Error("unknown loss_type"); return 0.0f;
@ -65,7 +65,7 @@ struct LossType {
inline float SecondOrderGradient(float predt, float label) const {
switch (loss_type) {
case kLinearSquare: return 1.0f;
case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
case kLogisticClassify:
case kLogisticNeglik: return predt * (1 - predt);
default: utils::Error("unknown loss_type"); return 0.0f;
@ -80,7 +80,7 @@ struct LossType {
loss_type == kLogisticNeglik ) {
utils::Check(base_score > 0.0f && base_score < 1.0f,
"base_score must be in (0,1) for logistic loss");
base_score = -logf(1.0f / base_score - 1.0f);
base_score = -std::log(1.0f / base_score - 1.0f);
}
return base_score;
}
@ -419,8 +419,8 @@ class LambdaRankObjNDCG : public LambdaRankObj {
for (size_t i = 0; i < pairs.size(); ++i) {
unsigned pos_idx = pairs[i].pos_index;
unsigned neg_idx = pairs[i].neg_index;
float pos_loginv = 1.0f / logf(pos_idx + 2.0f);
float neg_loginv = 1.0f / logf(neg_idx + 2.0f);
float pos_loginv = 1.0f / std::log(pos_idx + 2.0f);
float neg_loginv = 1.0f / std::log(neg_idx + 2.0f);
int pos_label = static_cast<int>(sorted_list[pos_idx].label);
int neg_label = static_cast<int>(sorted_list[neg_idx].label);
float original =
@ -438,7 +438,7 @@ class LambdaRankObjNDCG : public LambdaRankObj {
for (size_t i = 0; i < labels.size(); ++i) {
const unsigned rel = static_cast<unsigned>(labels[i]);
if (rel != 0) {
sumdcg += ((1 << rel) - 1) / logf(static_cast<float>(i + 2));
sumdcg += ((1 << rel) - 1) / std::log(static_cast<float>(i + 2));
}
}
return static_cast<float>(sumdcg);

View File

@ -302,11 +302,11 @@ struct SplitEntry{
* \param loss_chg the loss reduction get through the split
* \param split_index the feature index where the split is on
*/
inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const {
inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
if (this->split_index() <= split_index) {
return loss_chg > this->loss_chg;
return new_loss_chg > this->loss_chg;
} else {
return !(this->loss_chg > loss_chg);
return !(this->loss_chg > new_loss_chg);
}
}
/*!
@ -332,13 +332,13 @@ struct SplitEntry{
* \param default_left whether the missing value goes to left
* \return whether the proposed split is better and can replace current split
*/
inline bool Update(bst_float loss_chg, unsigned split_index,
float split_value, bool default_left) {
if (this->NeedReplace(loss_chg, split_index)) {
this->loss_chg = loss_chg;
inline bool Update(bst_float new_loss_chg, unsigned split_index,
float new_split_value, bool default_left) {
if (this->NeedReplace(new_loss_chg, split_index)) {
this->loss_chg = new_loss_chg;
if (default_left) split_index |= (1U << 31);
this->sindex = split_index;
this->split_value = split_value;
this->split_value = new_split_value;
return true;
} else {
return false;

View File

@ -1,6 +1,7 @@
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#include <cstring>
using namespace std;
#include "./updater.h"
#include "./updater_prune-inl.hpp"
#include "./updater_refresh-inl.hpp"

View File

@ -63,8 +63,8 @@ class TreePruner: public IUpdater {
}
}
if (silent == 0) {
printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
utils::Printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
}
}

View File

@ -26,14 +26,14 @@ class TreeRefresher: public IUpdater {
virtual void Update(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
const BoosterInfo &info,
const std::vector<RegTree*> &trees) {
const std::vector<RegTree*> &trees) {
if (trees.size() == 0) return;
// number of threads
int nthread;
// thread temporal space
std::vector< std::vector<TStats> > stemp;
std::vector<RegTree::FVec> fvec_temp;
// setup temp space for each thread
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
@ -127,8 +127,6 @@ class TreeRefresher: public IUpdater {
this->Refresh(gstats, tree[nid].cright(), p_tree);
}
}
// number of thread in the data
int nthread;
// training parameter
TrainParam param;
};

View File

@ -100,12 +100,10 @@ class ISeekStream: public IStream {
/*! \brief implementation of file i/o stream */
class FileStream : public ISeekStream {
public:
explicit FileStream(FILE *fp) : fp(fp) {}
explicit FileStream(void) {
this->fp = NULL;
}
explicit FileStream(FILE *fp) {
this->fp = fp;
}
virtual size_t Read(void *ptr, size_t size) {
return fread(ptr, size, 1, fp);
}

View File

@ -163,7 +163,7 @@ struct SparseCSRFileBuilder {
fo->Write(rptr);
// setup buffer space
buffer_rptr.resize(rptr.size());
buffer.reserve(buffer_size);
buffer_temp.reserve(buffer_size);
buffer_data.resize(buffer_size);
saved_offset.clear();
saved_offset.resize(rptr.size() - 1, 0);

View File

@ -9,13 +9,8 @@
#include <omp.h>
#else
#ifndef DISABLE_OPENMP
#ifndef _MSC_VER
#warning "OpenMP is not available, compile to single thread code."\
"You may want to ungrade your compiler to enable OpenMP support,"\
"to get benefit of multi-threading."
#else
// TODO add warning for msvc
#endif
// use pragma message instead of warning
#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading")
#endif
inline int omp_get_thread_num() { return 0; }
inline int omp_get_num_threads() { return 1; }

View File

@ -16,30 +16,21 @@
/*! namespace of PRNG */
namespace xgboost {
namespace random {
#ifndef XGBOOST_CUSTOMIZE_PRNG_
/*! \brief seed the PRNG */
inline void Seed(uint32_t seed) {
inline void Seed(unsigned seed) {
srand(seed);
}
/*! \brief return a real number uniform in [0,1) */
inline double NextDouble(void) {
/*! \brief basic function, uniform */
inline double Uniform(void) {
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
}
/*! \brief return a real numer uniform in (0,1) */
inline double NextDouble2(void) {
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
}
/*! \brief return a random number */
inline uint32_t NextUInt32(void) {
return (uint32_t)rand();
}
/*! \brief return a random number in n */
inline uint32_t NextUInt32(uint32_t n) {
return (uint32_t)floor(NextDouble() * n);
}
/*! \brief return x~N(0,1) */
inline double SampleNormal() {
inline double Normal(void) {
double x, y, s;
do {
x = 2 * NextDouble2() - 1.0;
@ -49,22 +40,24 @@ inline double SampleNormal() {
return x * sqrt(-2.0 * log(s) / s);
}
#else
// include declarations, to be implemented
void Seed(unsigned seed);
double Uniform(void);
double Normal(void);
#endif
/*! \brief return iid x,y ~N(0,1) */
inline void SampleNormal2D(double &xx, double &yy) {
double x, y, s;
do {
x = 2 * NextDouble2() - 1.0;
y = 2 * NextDouble2() - 1.0;
s = x*x + y*y;
} while (s >= 1.0 || s == 0.0);
double t = sqrt(-2.0 * log(s) / s);
xx = x * t;
yy = y * t;
/*! \brief return a real number uniform in [0,1) */
inline double NextDouble(void) {
return Uniform();
}
/*! \brief return a random number in n */
inline uint32_t NextUInt32(uint32_t n) {
return (uint32_t)floor(NextDouble() * n);
}
/*! \brief return x~N(mu,sigma^2) */
inline double SampleNormal(double mu, double sigma) {
return SampleNormal() * sigma + mu;
return Normal() * sigma + mu;
}
/*! \brief return 1 with probability p, coin flip */
inline int SampleBinary(double p) {
@ -90,7 +83,7 @@ struct Random{
inline void Seed(unsigned sd) {
this->rseed = sd;
#if defined(_MSC_VER)||defined(_WIN32)
srand(rseed);
::xgboost::random::Seed(sd);
#endif
}
/*! \brief return a real number uniform in [0,1) */
@ -98,8 +91,8 @@ struct Random{
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
// todo, replace with another PRNG
#if defined(_MSC_VER)||defined(_WIN32)
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX) + 1.0);
#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_)
return Uniform();
#else
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
#endif

View File

@ -7,11 +7,18 @@
*/
#define _CRT_SECURE_NO_WARNINGS
#include <cstdio>
#include <cstdarg>
#include <string>
#include <cstdlib>
#include <vector>
#ifndef XGBOOST_STRICT_CXX98_
#include <cstdarg>
#endif
#if !defined(__GNUC__)
#define fopen64 std::fopen
#endif
#ifdef _MSC_VER
#define fopen64 fopen
// NOTE: sprintf_s is not equivalent to snprintf,
// they are equivalent when success, which is sufficient for our case
#define snprintf sprintf_s
@ -19,16 +26,15 @@
#else
#ifdef _FILE_OFFSET_BITS
#if _FILE_OFFSET_BITS == 32
#warning "FILE OFFSET BITS defined to be 32 bit"
#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit")
#endif
#endif
#ifdef __APPLE__
#ifdef __APPLE__
#define off64_t off_t
#define fopen64 fopen
#define fopen64 std::fopen
#endif
#define _FILE_OFFSET_BITS 64
extern "C" {
#include <sys/types.h>
}
@ -47,10 +53,11 @@ typedef long int64_t;
namespace xgboost {
/*! \brief namespace for helper utils of the project */
namespace utils {
/*! \brief error message buffer length */
const int kErrorBuffer = 1 << 12;
#ifndef XGBOOST_CUSTOMIZE_ERROR_
/*! \brief error message buffer length */
const int kPrintBuffer = 1 << 12;
#ifndef XGBOOST_CUSTOMIZE_MSG_
/*!
* \brief handling of Assert error, caused by in-apropriate input
* \param msg error message
@ -67,19 +74,50 @@ inline void HandleCheckError(const char *msg) {
fprintf(stderr, "%s\n", msg);
exit(-1);
}
inline void HandlePrint(const char *msg) {
printf("%s", msg);
}
#else
#ifndef XGBOOST_STRICT_CXX98_
// include declarations, some one must implement this
void HandleAssertError(const char *msg);
void HandleCheckError(const char *msg);
void HandlePrint(const char *msg);
#endif
#endif
#ifdef XGBOOST_STRICT_CXX98_
// these function pointers are to be assigned
extern "C" void (*Printf)(const char *fmt, ...);
extern "C" int (*SPrintf)(char *buf, size_t size, const char *fmt, ...);
extern "C" void (*Assert)(int exp, const char *fmt, ...);
extern "C" void (*Check)(int exp, const char *fmt, ...);
extern "C" void (*Error)(const char *fmt, ...);
#else
/*! \brief printf, print message to the console */
inline void Printf(const char *fmt, ...) {
std::string msg(kPrintBuffer, '\0');
va_list args;
va_start(args, fmt);
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
va_end(args);
HandlePrint(msg.c_str());
}
/*! \brief portable version of snprintf */
inline int SPrintf(char *buf, size_t size, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
int ret = vsnprintf(buf, size, fmt, args);
va_end(args);
return ret;
}
/*! \brief assert an condition is true, use this to handle debug information */
inline void Assert(bool exp, const char *fmt, ...) {
if (!exp) {
std::string msg(kErrorBuffer, '\0');
std::string msg(kPrintBuffer, '\0');
va_list args;
va_start(args, fmt);
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
va_end(args);
HandleAssertError(msg.c_str());
}
@ -88,10 +126,10 @@ inline void Assert(bool exp, const char *fmt, ...) {
/*!\brief same as assert, but this is intended to be used as message for user*/
inline void Check(bool exp, const char *fmt, ...) {
if (!exp) {
std::string msg(kErrorBuffer, '\0');
std::string msg(kPrintBuffer, '\0');
va_list args;
va_start(args, fmt);
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
va_end(args);
HandleCheckError(msg.c_str());
}
@ -100,14 +138,15 @@ inline void Check(bool exp, const char *fmt, ...) {
/*! \brief report error message, same as check */
inline void Error(const char *fmt, ...) {
{
std::string msg(kErrorBuffer, '\0');
std::string msg(kPrintBuffer, '\0');
va_list args;
va_start(args, fmt);
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
vsnprintf(&msg[0], kPrintBuffer, fmt, args);
va_end(args);
HandleCheckError(msg.c_str());
}
}
#endif
/*! \brief replace fopen, report error when the file open fails */
inline FILE *FopenCheck(const char *fname, const char *flag) {
@ -115,7 +154,25 @@ inline FILE *FopenCheck(const char *fname, const char *flag) {
Check(fp != NULL, "can not open file \"%s\"\n", fname);
return fp;
}
} // namespace utils
} // namespace utils
// easy utils that can be directly acessed in xgboost
/*! \brief get the beginning address of a vector */
template<typename T>
inline T *BeginPtr(std::vector<T> &vec) {
if (vec.size() == 0) {
return NULL;
} else {
return &vec[0];
}
}
/*! \brief get the beginning address of a vector */
template<typename T>
inline const T *BeginPtr(const std::vector<T> &vec) {
if (vec.size() == 0) {
return NULL;
} else {
return &vec[0];
}
}
} // namespace xgboost
#endif // XGBOOST_UTILS_UTILS_H_

View File

@ -50,6 +50,7 @@ class BoostLearnTask{
if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
if (!strcmp("num_round", name)) num_round = atoi(val);
if (!strcmp("pred_margin", name)) pred_margin = atoi(val);
if (!strcmp("ntree_limit", name)) ntree_limit = atoi(val);
if (!strcmp("save_period", name)) save_period = atoi(val);
if (!strcmp("eval_train", name)) eval_train = atoi(val);
if (!strcmp("task", name)) task = val;
@ -79,6 +80,7 @@ class BoostLearnTask{
save_period = 0;
eval_train = 0;
pred_margin = 0;
ntree_limit = 0;
dump_model_stats = 0;
task = "train";
model_in = "NULL";
@ -186,7 +188,7 @@ class BoostLearnTask{
inline void TaskPred(void) {
std::vector<float> preds;
if (!silent) printf("start prediction...\n");
learner.Predict(*data, pred_margin != 0, &preds);
learner.Predict(*data, pred_margin != 0, &preds, ntree_limit);
if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
for (size_t i = 0; i < preds.size(); i++) {
@ -217,6 +219,8 @@ class BoostLearnTask{
std::string task;
/*! \brief name of predict file */
std::string name_pred;
/*!\brief limit number of trees in prediction */
int ntree_limit;
/*!\brief whether to directly output margin value */
int pred_margin;
/*! \brief whether dump statistics along with model */

View File

@ -365,7 +365,7 @@ class Booster:
return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
def eval(self, mat, name = 'eval', it = 0):
return self.eval_set( [(mat,name)], it)
def predict(self, data, output_margin=False):
def predict(self, data, output_margin=False, ntree_limit=0):
"""
predict with data
Args:
@ -373,12 +373,14 @@ class Booster:
the dmatrix storing the input
output_margin: bool
whether output raw margin value that is untransformed
ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees
Returns:
numpy array of prediction
"""
length = ctypes.c_ulong()
preds = xglib.XGBoosterPredict(self.handle, data.handle,
int(output_margin), ctypes.byref(length))
int(output_margin), ntree_limit, ctypes.byref(length))
return ctypes2numpy(preds, length.value, 'float32')
def save_model(self, fname):
""" save model to file

View File

@ -6,10 +6,14 @@
#include <string>
#include <cstring>
#include <algorithm>
// include all std functions
using namespace std;
#include "./xgboost_wrapper.h"
#include "../src/data.h"
#include "../src/learner/learner-inl.hpp"
#include "../src/io/io.h"
#include "../src/utils/utils.h"
#include "../src/io/simple_dmatrix-inl.hpp"
using namespace xgboost;
@ -25,11 +29,11 @@ class Booster: public learner::BoostLearner {
this->init_model = false;
this->SetCacheData(mats);
}
const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) {
inline const float *Pred(const DataMatrix &dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
this->CheckInitModel();
this->Predict(dmat, output_margin != 0, &this->preds_);
this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit);
*len = static_cast<bst_ulong>(this->preds_.size());
return &this->preds_[0];
return BeginPtr(this->preds_);
}
inline void BoostOneIter(const DataMatrix &train,
float *grad, float *hess, bst_ulong len) {
@ -57,7 +61,7 @@ class Booster: public learner::BoostLearner {
model_dump_cptr[i] = model_dump[i].c_str();
}
*len = static_cast<bst_ulong>(model_dump.size());
return &model_dump_cptr[0];
return BeginPtr(model_dump_cptr);
}
// temporal fields
// temporal data to save evaluation dump
@ -174,13 +178,13 @@ extern "C"{
std::vector<float> &vec =
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
vec.resize(len);
memcpy(&vec[0], info, sizeof(float) * len);
memcpy(BeginPtr(vec), info, sizeof(float) * len);
}
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
std::vector<unsigned> &vec =
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
vec.resize(len);
memcpy(&vec[0], info, sizeof(unsigned) * len);
memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
}
void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
@ -194,13 +198,13 @@ extern "C"{
const std::vector<float> &vec =
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
*len = static_cast<bst_ulong>(vec.size());
return &vec[0];
return BeginPtr(vec);
}
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
const std::vector<unsigned> &vec =
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
*len = static_cast<bst_ulong>(vec.size());
return &vec[0];
return BeginPtr(vec);
}
bst_ulong XGDMatrixNumRow(const void *handle) {
return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
@ -249,8 +253,8 @@ extern "C"{
bst->eval_str = bst->EvalOneIter(iter, mats, names);
return bst->eval_str.c_str();
}
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) {
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, ntree_limit, len);
}
void XGBoosterLoadModel(void *handle, const char *fname) {
static_cast<Booster*>(handle)->LoadModel(fname);

View File

@ -165,9 +165,11 @@ extern "C" {
* \param handle handle
* \param dmat data matrix
* \param output_margin whether only output raw margin value
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
* when the parameter is set to 0, we will use all the trees
* \param len used to store length of returning result
*/
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len);
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len);
/*!
* \brief load model from existing file
* \param handle handle