Merge pull request #1 from tqchen/master
updating fork to current master
This commit is contained in:
commit
ce1803a40c
18
.gitignore
vendored
18
.gitignore
vendored
@ -16,7 +16,6 @@
|
||||
*conf
|
||||
*buffer
|
||||
*model
|
||||
xgboost
|
||||
*pyc
|
||||
*train
|
||||
*test
|
||||
@ -24,3 +23,20 @@ xgboost
|
||||
*rar
|
||||
*vali
|
||||
*data
|
||||
*sdf
|
||||
Release
|
||||
*exe*
|
||||
*exp
|
||||
ipch
|
||||
*.filters
|
||||
*.user
|
||||
*log
|
||||
Debug
|
||||
*suo
|
||||
*test*
|
||||
.Rhistory
|
||||
*.dll
|
||||
*i386
|
||||
*x64
|
||||
*dump
|
||||
*save
|
||||
|
||||
14
Makefile
14
Makefile
@ -3,18 +3,21 @@ export CXX = g++
|
||||
export LDFLAGS= -pthread -lm
|
||||
# note for R module
|
||||
# add include path to Rinternals.h here
|
||||
export CPLUS_INCLUDE_PATH=/usr/share/R/include
|
||||
|
||||
ifeq ($(no_omp),1)
|
||||
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -DDISABLE_OPENMP
|
||||
else
|
||||
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp
|
||||
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp
|
||||
endif
|
||||
|
||||
# expose these flags to R CMD SHLIB
|
||||
export PKG_CPPFLAGS = $(CFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_
|
||||
|
||||
# specify tensor path
|
||||
BIN = xgboost
|
||||
OBJ =
|
||||
SLIB = wrapper/libxgboostwrapper.so wrapper/libxgboostR.so
|
||||
SLIB = wrapper/libxgboostwrapper.so
|
||||
RLIB = wrapper/libxgboostR.so
|
||||
.PHONY: clean all R
|
||||
|
||||
all: $(BIN) wrapper/libxgboostwrapper.so
|
||||
@ -31,6 +34,9 @@ $(BIN) :
|
||||
$(SLIB) :
|
||||
$(CXX) $(CFLAGS) -fPIC $(LDFLAGS) -shared -o $@ $(filter %.cpp %.o %.c, $^)
|
||||
|
||||
$(RLIB) :
|
||||
R CMD SHLIB -c -o $@ $(filter %.cpp %.o %.c, $^)
|
||||
|
||||
$(OBJ) :
|
||||
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
|
||||
|
||||
@ -38,4 +44,4 @@ install:
|
||||
cp -f -r $(BIN) $(INSTALL_PATH)
|
||||
|
||||
clean:
|
||||
$(RM) $(OBJ) $(BIN) $(SLIB) *~ */*~ */*/*~
|
||||
$(RM) $(OBJ) $(BIN) $(SLIB) $(RLIB) *~ */*~ */*/*~
|
||||
|
||||
16
R-package/DESCRIPTION
Normal file
16
R-package/DESCRIPTION
Normal file
@ -0,0 +1,16 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: R wrapper of xgboost
|
||||
Version: 0.3-0
|
||||
Date: 2014-08-23
|
||||
Author: Tianqi Chen
|
||||
Maintainer: Tianqi Chen <tianqi.tchen@gmail.com>
|
||||
Description: xgboost
|
||||
License: See LICENSE file
|
||||
URL: https://github.com/tqchen/xgboost
|
||||
BugReports: https://github.com/tqchen/xgboost/issues
|
||||
Depends:
|
||||
R (>= 2.0.2)
|
||||
Imports:
|
||||
Matrix (>= 1.1-0),
|
||||
methods
|
||||
13
R-package/LICENSE
Normal file
13
R-package/LICENSE
Normal file
@ -0,0 +1,13 @@
|
||||
Copyright (c) 2014 by Tianqi Chen and Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
10
R-package/NAMESPACE
Normal file
10
R-package/NAMESPACE
Normal file
@ -0,0 +1,10 @@
|
||||
importClassesFrom("Matrix", dgCMatrix, dgeMatrix)
|
||||
|
||||
export(xgboost)
|
||||
export(xgb.DMatrix)
|
||||
export(xgb.getinfo)
|
||||
exportMethods(predict)
|
||||
export(xgb.train)
|
||||
export(xgb.save)
|
||||
export(xgb.load)
|
||||
export(xgb.dump)
|
||||
16
R-package/R/predict.xgb.Booster.R
Normal file
16
R-package/R/predict.xgb.Booster.R
Normal file
@ -0,0 +1,16 @@
|
||||
#' @export
|
||||
setClass("xgb.Booster")
|
||||
|
||||
#' @export
|
||||
setMethod("predict",
|
||||
signature = "xgb.Booster",
|
||||
definition = function(object, newdata, outputmargin = FALSE)
|
||||
{
|
||||
if (class(newdata) != "xgb.DMatrix") {
|
||||
newdata = xgb.DMatrix(newdata)
|
||||
}
|
||||
ret <- .Call("XGBoosterPredict_R", object, newdata,
|
||||
as.integer(outputmargin), PACKAGE="xgboost")
|
||||
return(ret)
|
||||
})
|
||||
|
||||
128
R-package/R/utils.R
Normal file
128
R-package/R/utils.R
Normal file
@ -0,0 +1,128 @@
|
||||
# depends on matrix
|
||||
.onLoad <- function(libname, pkgname) {
|
||||
library.dynam("xgboost", pkgname, libname);
|
||||
}
|
||||
.onUnload <- function(libpath) {
|
||||
library.dynam.unload("xgboost", libpath);
|
||||
}
|
||||
|
||||
# set information into dmatrix, this mutate dmatrix
|
||||
xgb.setinfo <- function(dmat, name, info) {
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
if (name == "label") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "weight") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "base_margin") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "group") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
stop(paste("xgb.setinfo: unknown info name", name))
|
||||
return(FALSE)
|
||||
}
|
||||
|
||||
# construct a Booster from cachelist
|
||||
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||
if (typeof(cachelist) != "list") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
for (dm in cachelist) {
|
||||
if (class(dm) != "xgb.DMatrix") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
}
|
||||
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE="xgboost")
|
||||
.Call("XGBoosterSetParam_R", handle, "seed", "0", PACKAGE="xgboost")
|
||||
if (length(params) != 0) {
|
||||
for (i in 1:length(params)) {
|
||||
p <- params[i]
|
||||
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p), PACKAGE="xgboost")
|
||||
}
|
||||
}
|
||||
if (!is.null(modelfile)) {
|
||||
if (typeof(modelfile) != "character"){
|
||||
stop("xgb.Booster: modelfile must be character");
|
||||
}
|
||||
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE="xgboost")
|
||||
}
|
||||
return(structure(handle, class="xgb.Booster"))
|
||||
}
|
||||
|
||||
|
||||
# predict, depreciated
|
||||
xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.predict: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.predict: second argument must be type xgb.DMatrix")
|
||||
}
|
||||
ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost")
|
||||
return(ret)
|
||||
}
|
||||
|
||||
##--------------------------------------
|
||||
# the following are low level iteratively function, not needed
|
||||
# if you do not want to use them
|
||||
#---------------------------------------
|
||||
|
||||
# iteratively update booster with dtrain
|
||||
xgb.iter.update <- function(booster, dtrain, iter) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (class(dtrain) != "xgb.DMatrix") {
|
||||
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||
}
|
||||
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
|
||||
# iteratively update booster with customized statistics
|
||||
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (class(dtrain) != "xgb.DMatrix") {
|
||||
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||
}
|
||||
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
|
||||
# iteratively evaluate one iteration
|
||||
xgb.iter.eval <- function(booster, watchlist, iter) {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.eval: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (typeof(watchlist) != "list") {
|
||||
stop("xgb.eval: only accepts list of DMatrix as watchlist")
|
||||
}
|
||||
for (w in watchlist) {
|
||||
if (class(w) != "xgb.DMatrix") {
|
||||
stop("xgb.eval: watch list can only contain xgb.DMatrix")
|
||||
}
|
||||
}
|
||||
evnames <- list()
|
||||
if (length(watchlist) != 0) {
|
||||
for (i in 1:length(watchlist)) {
|
||||
w <- watchlist[i]
|
||||
if (length(names(w)) == 0) {
|
||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||
}
|
||||
evnames <- append(evnames, names(w))
|
||||
}
|
||||
}
|
||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames, PACKAGE="xgboost")
|
||||
return(msg)
|
||||
}
|
||||
22
R-package/R/xgb.DMatrix.R
Normal file
22
R-package/R/xgb.DMatrix.R
Normal file
@ -0,0 +1,22 @@
|
||||
# constructing DMatrix
|
||||
xgb.DMatrix <- function(data, missing=0.0, ...) {
|
||||
if (typeof(data) == "character") {
|
||||
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost")
|
||||
} else if(is.matrix(data)) {
|
||||
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost")
|
||||
} else if(class(data) == "dgCMatrix") {
|
||||
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost")
|
||||
} else {
|
||||
stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
|
||||
}
|
||||
dmat <- structure(handle, class="xgb.DMatrix")
|
||||
|
||||
info = list(...)
|
||||
if (length(info)==0)
|
||||
return(dmat)
|
||||
for (i in 1:length(info)) {
|
||||
p = info[i]
|
||||
xgb.setinfo(dmat, names(p), p[[1]])
|
||||
}
|
||||
return(dmat)
|
||||
}
|
||||
11
R-package/R/xgb.dump.R
Normal file
11
R-package/R/xgb.dump.R
Normal file
@ -0,0 +1,11 @@
|
||||
# dump model
|
||||
xgb.dump <- function(booster, fname, fmap = "") {
|
||||
if (class(booster) != "xgb.Booster") {
|
||||
stop("xgb.dump: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (typeof(fname) != "character"){
|
||||
stop("xgb.dump: second argument must be type character")
|
||||
}
|
||||
.Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
16
R-package/R/xgb.getinfo.R
Normal file
16
R-package/R/xgb.getinfo.R
Normal file
@ -0,0 +1,16 @@
|
||||
# get information from dmatrix
|
||||
xgb.getinfo <- function(dmat, name) {
|
||||
if (typeof(name) != "character") {
|
||||
stop("xgb.getinfo: name must be character")
|
||||
}
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
if (name != "label" &&
|
||||
name != "weight" &&
|
||||
name != "base_margin" ) {
|
||||
stop(paste("xgb.getinfo: unknown info name", name))
|
||||
}
|
||||
ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost")
|
||||
return(ret)
|
||||
}
|
||||
5
R-package/R/xgb.load.R
Normal file
5
R-package/R/xgb.load.R
Normal file
@ -0,0 +1,5 @@
|
||||
xgb.load <- function(modelfile) {
|
||||
if (is.null(modelfile))
|
||||
stop('xgb.load: modelfile cannot be NULL')
|
||||
xgb.Booster(modelfile=modelfile)
|
||||
}
|
||||
16
R-package/R/xgb.save.R
Normal file
16
R-package/R/xgb.save.R
Normal file
@ -0,0 +1,16 @@
|
||||
# save model or DMatrix to file
|
||||
xgb.save <- function(handle, fname) {
|
||||
if (typeof(fname) != "character") {
|
||||
stop("xgb.save: fname must be character")
|
||||
}
|
||||
if (class(handle) == "xgb.Booster") {
|
||||
.Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (class(handle) == "xgb.DMatrix") {
|
||||
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
|
||||
return(FALSE)
|
||||
}
|
||||
38
R-package/R/xgb.train.R
Normal file
38
R-package/R/xgb.train.R
Normal file
@ -0,0 +1,38 @@
|
||||
# train a model using given parameters
|
||||
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
|
||||
if (typeof(params) != "list") {
|
||||
stop("xgb.train: first argument params must be list");
|
||||
}
|
||||
if (class(dtrain) != "xgb.DMatrix") {
|
||||
stop("xgb.train: second argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
bst <- xgb.Booster(params, append(watchlist,dtrain))
|
||||
for (i in 1:nrounds) {
|
||||
if (is.null(obj)) {
|
||||
succ <- xgb.iter.update(bst, dtrain, i-1)
|
||||
} else {
|
||||
pred <- xgb.predict(bst, dtrain)
|
||||
gpair <- obj(pred, dtrain)
|
||||
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
||||
}
|
||||
if (length(watchlist) != 0) {
|
||||
if (is.null(feval)) {
|
||||
msg <- xgb.iter.eval(bst, watchlist, i-1)
|
||||
cat(msg); cat("\n")
|
||||
} else {
|
||||
cat("["); cat(i); cat("]");
|
||||
for (j in 1:length(watchlist)) {
|
||||
w <- watchlist[j]
|
||||
if (length(names(w)) == 0) {
|
||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||
}
|
||||
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
|
||||
cat("\t"); cat(names(w)); cat("-"); cat(ret$metric);
|
||||
cat(":"); cat(ret$value)
|
||||
}
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
return(bst)
|
||||
}
|
||||
49
R-package/R/xgboost.R
Normal file
49
R-package/R/xgboost.R
Normal file
@ -0,0 +1,49 @@
|
||||
# Main function for xgboost-package
|
||||
|
||||
xgboost = function(x=NULL,y=NULL,DMatrix=NULL, file=NULL, validation=NULL,
|
||||
nrounds=10, obj=NULL, feval=NULL, margin=NULL, verbose = T, ...)
|
||||
{
|
||||
if (!is.null(DMatrix))
|
||||
dtrain = DMatrix
|
||||
else
|
||||
{
|
||||
if (is.null(x) && is.null(y))
|
||||
{
|
||||
if (is.null(file))
|
||||
stop('xgboost need input data, either R objects, local files or DMatrix object.')
|
||||
dtrain = xgb.DMatrix(file)
|
||||
}
|
||||
else
|
||||
dtrain = xgb.DMatrix(x, label=y)
|
||||
if (!is.null(margin))
|
||||
{
|
||||
succ <- xgb.setinfo(dtrain, "base_margin", margin)
|
||||
if (!succ)
|
||||
warning('Attemp to use margin failed.')
|
||||
}
|
||||
}
|
||||
|
||||
params = list(...)
|
||||
|
||||
watchlist=list()
|
||||
if (verbose)
|
||||
{
|
||||
if (!is.null(validation))
|
||||
{
|
||||
if (class(validation)!='xgb.DMatrix')
|
||||
dtest = xgb.DMatrix(validation)
|
||||
else
|
||||
dtest = validation
|
||||
watchlist = list(eval=dtest,train=dtrain)
|
||||
}
|
||||
|
||||
else
|
||||
watchlist = list(train=dtrain)
|
||||
}
|
||||
|
||||
bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval)
|
||||
|
||||
return(bst)
|
||||
}
|
||||
|
||||
|
||||
10
R-package/README.md
Normal file
10
R-package/README.md
Normal file
@ -0,0 +1,10 @@
|
||||
This is subfolder for experimental version of R package.
|
||||
|
||||
Not yet ready.
|
||||
|
||||
Installation:
|
||||
|
||||
```r
|
||||
require(devtools)
|
||||
install_github('xgboost','tqchen',subdir='R-package')
|
||||
```
|
||||
1611
R-package/inst/examples/agaricus.txt.test
Normal file
1611
R-package/inst/examples/agaricus.txt.test
Normal file
File diff suppressed because it is too large
Load Diff
6513
R-package/inst/examples/agaricus.txt.train
Normal file
6513
R-package/inst/examples/agaricus.txt.train
Normal file
File diff suppressed because it is too large
Load Diff
133
R-package/inst/examples/demo-new.R
Normal file
133
R-package/inst/examples/demo-new.R
Normal file
@ -0,0 +1,133 @@
|
||||
require(xgboost)
|
||||
require(methods)
|
||||
|
||||
# helper function to read libsvm format
|
||||
# this is very badly written, load in dense, and convert to sparse
|
||||
# use this only for demo purpose
|
||||
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
||||
read.libsvm = function(fname, maxcol) {
|
||||
content = readLines(fname)
|
||||
nline = length(content)
|
||||
label = numeric(nline)
|
||||
mat = matrix(0, nline, maxcol+1)
|
||||
for (i in 1:nline) {
|
||||
arr = as.vector(strsplit(content[i], " ")[[1]])
|
||||
label[i] = as.numeric(arr[[1]])
|
||||
for (j in 2:length(arr)) {
|
||||
kv = strsplit(arr[j], ":")[[1]]
|
||||
# to avoid 0 index
|
||||
findex = as.integer(kv[1]) + 1
|
||||
fvalue = as.numeric(kv[2])
|
||||
mat[i,findex] = fvalue
|
||||
}
|
||||
}
|
||||
mat = as(mat, "sparseMatrix")
|
||||
return(list(label=label, data=mat))
|
||||
}
|
||||
|
||||
############################
|
||||
# Test xgb.DMatrix with local file, sparse matrix and dense matrix in R.
|
||||
############################
|
||||
|
||||
# Directly read in local file
|
||||
dtrain = xgb.DMatrix('agaricus.txt.train')
|
||||
class(dtrain)
|
||||
|
||||
# read file in R
|
||||
csc = read.libsvm("agaricus.txt.train", 126)
|
||||
y = csc$label
|
||||
x = csc$data
|
||||
|
||||
# x as Sparse Matrix
|
||||
class(x)
|
||||
dtrain = xgb.DMatrix(x, label=y)
|
||||
|
||||
# x as dense matrix
|
||||
dense.x = as.matrix(x)
|
||||
dtrain = xgb.DMatrix(dense.x, label=y)
|
||||
|
||||
############################
|
||||
# Test xgboost with local file, sparse matrix and dense matrix in R.
|
||||
############################
|
||||
|
||||
# Test with DMatrix object
|
||||
bst = xgboost(DMatrix=dtrain, max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
|
||||
# Test with local file
|
||||
bst = xgboost(file='agaricus.txt.train', max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
|
||||
# Test with Sparse Matrix
|
||||
bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
|
||||
# Test with dense Matrix
|
||||
bst = xgboost(x = dense.x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
|
||||
# Test with validation set
|
||||
bst = xgboost(file='agaricus.txt.train', validation='agaricus.txt.test',
|
||||
max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
|
||||
############################
|
||||
# Test predict
|
||||
############################
|
||||
|
||||
# Prediction with DMatrix object
|
||||
dtest = xgb.DMatrix('agaricus.txt.test')
|
||||
pred = predict(bst, dtest)
|
||||
|
||||
# Prediction with local test file
|
||||
pred = predict(bst, 'agaricus.txt.test')
|
||||
|
||||
# Prediction with Sparse Matrix
|
||||
csc = read.libsvm("agaricus.txt.test", 126)
|
||||
test.y = csc$label
|
||||
test.x = csc$data
|
||||
pred = predict(bst, test.x)
|
||||
|
||||
# Extrac label with xgb.getinfo
|
||||
labels = xgb.getinfo(dtest, "label")
|
||||
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
||||
print(paste("error=",err))
|
||||
|
||||
############################
|
||||
# Save and load model to hard disk
|
||||
############################
|
||||
|
||||
# save model to binary local file
|
||||
xgb.save(bst, 'model.save')
|
||||
|
||||
# load binary model to R
|
||||
bst = xgb.load('model.save')
|
||||
pred = predict(bst, test.x)
|
||||
|
||||
# save model to text file
|
||||
xgb.dump(bst, 'model.dump')
|
||||
|
||||
############################
|
||||
# Customized objective and evaluation function
|
||||
############################
|
||||
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
logregobj = function(preds, dtrain) {
|
||||
labels = xgb.getinfo(dtrain, "label")
|
||||
preds = 1.0 / (1.0 + exp(-preds))
|
||||
grad = preds - labels
|
||||
hess = preds * (1.0-preds)
|
||||
return(list(grad=grad, hess=hess))
|
||||
}
|
||||
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||
# this may make buildin evalution metric not function properly
|
||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||
# the buildin evaluation error assumes input is after logistic transformation
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror = function(preds, dtrain) {
|
||||
labels = xgb.getinfo(dtrain, "label")
|
||||
err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||
return(list(metric="error", value=err))
|
||||
}
|
||||
|
||||
bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic',
|
||||
obj=logregobj, feval=evalerror)
|
||||
|
||||
|
||||
127
R-package/inst/examples/demo-old.R
Normal file
127
R-package/inst/examples/demo-old.R
Normal file
@ -0,0 +1,127 @@
|
||||
# load xgboost library
|
||||
require(xgboost)
|
||||
require(methods)
|
||||
|
||||
# helper function to read libsvm format
|
||||
# this is very badly written, load in dense, and convert to sparse
|
||||
# use this only for demo purpose
|
||||
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
||||
read.libsvm <- function(fname, maxcol) {
|
||||
content <- readLines(fname)
|
||||
nline <- length(content)
|
||||
label <- numeric(nline)
|
||||
mat <- matrix(0, nline, maxcol+1)
|
||||
for (i in 1:nline) {
|
||||
arr <- as.vector(strsplit(content[i], " ")[[1]])
|
||||
label[i] <- as.numeric(arr[[1]])
|
||||
for (j in 2:length(arr)) {
|
||||
kv <- strsplit(arr[j], ":")[[1]]
|
||||
# to avoid 0 index
|
||||
findex <- as.integer(kv[1]) + 1
|
||||
fvalue <- as.numeric(kv[2])
|
||||
mat[i,findex] <- fvalue
|
||||
}
|
||||
}
|
||||
mat <- as(mat, "sparseMatrix")
|
||||
return(list(label=label, data=mat))
|
||||
}
|
||||
|
||||
# test code here
|
||||
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
# training xgboost model
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
# make prediction
|
||||
preds <- xgb.predict(bst, dtest)
|
||||
labels <- xgb.getinfo(dtest, "label")
|
||||
err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
|
||||
# print error rate
|
||||
print(paste("error=",err))
|
||||
|
||||
# dump model
|
||||
xgb.dump(bst, "dump.raw.txt")
|
||||
# dump model with feature map
|
||||
xgb.dump(bst, "dump.nice.txt", "featmap.txt")
|
||||
|
||||
# save dmatrix into binary buffer
|
||||
succ <- xgb.save(dtest, "dtest.buffer")
|
||||
# save model into file
|
||||
succ <- xgb.save(bst, "xgb.model")
|
||||
# load model and data in
|
||||
bst2 <- xgb.Booster(modelfile="xgb.model")
|
||||
dtest2 <- xgb.DMatrix("dtest.buffer")
|
||||
preds2 <- xgb.predict(bst2, dtest2)
|
||||
# assert they are the same
|
||||
stopifnot(sum(abs(preds2-preds)) == 0)
|
||||
|
||||
###
|
||||
# build dmatrix from sparseMatrix
|
||||
###
|
||||
print ('start running example of build DMatrix from R.sparseMatrix')
|
||||
csc <- read.libsvm("agaricus.txt.train", 126)
|
||||
label <- csc$label
|
||||
data <- csc$data
|
||||
dtrain <- xgb.DMatrix(data, info=list(label=label) )
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
|
||||
###
|
||||
# build dmatrix from dense matrix
|
||||
###
|
||||
print ('start running example of build DMatrix from R.Matrix')
|
||||
mat = as.matrix(data)
|
||||
dtrain <- xgb.DMatrix(mat, info=list(label=label) )
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
|
||||
###
|
||||
# advanced: cutomsized loss function
|
||||
#
|
||||
print("start running example to used cutomized objective function")
|
||||
# note: for customized objective function, we leave objective as default
|
||||
# note: what we are getting is margin value in prediction
|
||||
# you must know what you are doing
|
||||
param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- xgb.getinfo(dtrain, "label")
|
||||
preds <- 1.0 / (1.0 + exp(-preds))
|
||||
grad <- preds - labels
|
||||
hess <- preds * (1.0-preds)
|
||||
return(list(grad=grad, hess=hess))
|
||||
}
|
||||
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||
# this may make buildin evalution metric not function properly
|
||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||
# the buildin evaluation error assumes input is after logistic transformation
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror <- function(preds, dtrain) {
|
||||
labels <- xgb.getinfo(dtrain, "label")
|
||||
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||
return(list(metric="error", value=err))
|
||||
}
|
||||
|
||||
# training with customized objective, we can also do step by step training
|
||||
# simply look at xgboost.py"s implementation of train
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
|
||||
|
||||
###
|
||||
# advanced: start from a initial base prediction
|
||||
#
|
||||
print ("start running example to start from a initial prediction")
|
||||
# specify parameters via map, definition are same as c++ version
|
||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||
# train xgboost for 1 round
|
||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||
# do predict with output_margin=True, will always give you margin values before logistic transformation
|
||||
ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
|
||||
ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
|
||||
succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
|
||||
succ <- xgb.setinfo(dtest, "base_margin", ptest)
|
||||
print ("this is result of running from initial prediction")
|
||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||
103
R-package/inst/examples/demo.R
Normal file
103
R-package/inst/examples/demo.R
Normal file
@ -0,0 +1,103 @@
|
||||
require(xgboost)
|
||||
require(methods)
|
||||
|
||||
# helper function to read libsvm format
|
||||
# this is very badly written, load in dense, and convert to sparse
|
||||
# use this only for demo purpose
|
||||
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
||||
read.libsvm = function(fname, maxcol) {
|
||||
content = readLines(fname)
|
||||
nline = length(content)
|
||||
label = numeric(nline)
|
||||
mat = matrix(0, nline, maxcol+1)
|
||||
for (i in 1:nline) {
|
||||
arr = as.vector(strsplit(content[i], " ")[[1]])
|
||||
label[i] = as.numeric(arr[[1]])
|
||||
for (j in 2:length(arr)) {
|
||||
kv = strsplit(arr[j], ":")[[1]]
|
||||
# to avoid 0 index
|
||||
findex = as.integer(kv[1]) + 1
|
||||
fvalue = as.numeric(kv[2])
|
||||
mat[i,findex] = fvalue
|
||||
}
|
||||
}
|
||||
mat = as(mat, "sparseMatrix")
|
||||
return(list(label=label, data=mat))
|
||||
}
|
||||
|
||||
# Parameter setting
|
||||
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||
watchlist = list("eval"=dtest,"train"=dtrain)
|
||||
|
||||
###########################
|
||||
# Train from local file
|
||||
###########################
|
||||
|
||||
# Training
|
||||
bst = xgboost(file='agaricus.txt.train',params=param,watchlist=watchlist)
|
||||
# Prediction
|
||||
pred = predict(bst, 'agaricus.txt.test')
|
||||
# Performance
|
||||
labels = xgb.getinfo(dtest, "label")
|
||||
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
||||
print(paste("error=",err))
|
||||
|
||||
###########################
|
||||
# Train from R object
|
||||
###########################
|
||||
|
||||
csc = read.libsvm("agaricus.txt.train", 126)
|
||||
y = csc$label
|
||||
x = csc$data
|
||||
# x as Sparse Matrix
|
||||
class(x)
|
||||
|
||||
# Training
|
||||
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||
# Prediction
|
||||
pred = predict(bst, 'agaricus.txt.test')
|
||||
# Performance
|
||||
labels = xgb.getinfo(dtest, "label")
|
||||
err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels)
|
||||
print(paste("error=",err))
|
||||
|
||||
# Training with dense matrix
|
||||
x = as.matrix(x)
|
||||
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||
|
||||
###########################
|
||||
# Train with customization
|
||||
###########################
|
||||
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
logregobj = function(preds, dtrain) {
|
||||
labels = xgb.getinfo(dtrain, "label")
|
||||
preds = 1.0 / (1.0 + exp(-preds))
|
||||
grad = preds - labels
|
||||
hess = preds * (1.0-preds)
|
||||
return(list(grad=grad, hess=hess))
|
||||
}
|
||||
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||
# this may make buildin evalution metric not function properly
|
||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||
# the buildin evaluation error assumes input is after logistic transformation
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror = function(preds, dtrain) {
|
||||
labels = xgb.getinfo(dtrain, "label")
|
||||
err = as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||
return(list(metric="error", value=err))
|
||||
}
|
||||
|
||||
bst = xgboost(x,y,params=param,watchlist=watchlist,obj=logregobj, feval=evalerror)
|
||||
|
||||
############################
|
||||
# Train with previous result
|
||||
############################
|
||||
|
||||
bst = xgboost(x,y,params=param,watchlist=watchlist)
|
||||
pred = predict(bst, 'agaricus.txt.train', outputmargin=TRUE)
|
||||
bst2 = xgboost(x,y,params=param,watchlist=watchlist,margin=pred)
|
||||
126
R-package/inst/examples/featmap.txt
Normal file
126
R-package/inst/examples/featmap.txt
Normal file
@ -0,0 +1,126 @@
|
||||
0 cap-shape=bell i
|
||||
1 cap-shape=conical i
|
||||
2 cap-shape=convex i
|
||||
3 cap-shape=flat i
|
||||
4 cap-shape=knobbed i
|
||||
5 cap-shape=sunken i
|
||||
6 cap-surface=fibrous i
|
||||
7 cap-surface=grooves i
|
||||
8 cap-surface=scaly i
|
||||
9 cap-surface=smooth i
|
||||
10 cap-color=brown i
|
||||
11 cap-color=buff i
|
||||
12 cap-color=cinnamon i
|
||||
13 cap-color=gray i
|
||||
14 cap-color=green i
|
||||
15 cap-color=pink i
|
||||
16 cap-color=purple i
|
||||
17 cap-color=red i
|
||||
18 cap-color=white i
|
||||
19 cap-color=yellow i
|
||||
20 bruises?=bruises i
|
||||
21 bruises?=no i
|
||||
22 odor=almond i
|
||||
23 odor=anise i
|
||||
24 odor=creosote i
|
||||
25 odor=fishy i
|
||||
26 odor=foul i
|
||||
27 odor=musty i
|
||||
28 odor=none i
|
||||
29 odor=pungent i
|
||||
30 odor=spicy i
|
||||
31 gill-attachment=attached i
|
||||
32 gill-attachment=descending i
|
||||
33 gill-attachment=free i
|
||||
34 gill-attachment=notched i
|
||||
35 gill-spacing=close i
|
||||
36 gill-spacing=crowded i
|
||||
37 gill-spacing=distant i
|
||||
38 gill-size=broad i
|
||||
39 gill-size=narrow i
|
||||
40 gill-color=black i
|
||||
41 gill-color=brown i
|
||||
42 gill-color=buff i
|
||||
43 gill-color=chocolate i
|
||||
44 gill-color=gray i
|
||||
45 gill-color=green i
|
||||
46 gill-color=orange i
|
||||
47 gill-color=pink i
|
||||
48 gill-color=purple i
|
||||
49 gill-color=red i
|
||||
50 gill-color=white i
|
||||
51 gill-color=yellow i
|
||||
52 stalk-shape=enlarging i
|
||||
53 stalk-shape=tapering i
|
||||
54 stalk-root=bulbous i
|
||||
55 stalk-root=club i
|
||||
56 stalk-root=cup i
|
||||
57 stalk-root=equal i
|
||||
58 stalk-root=rhizomorphs i
|
||||
59 stalk-root=rooted i
|
||||
60 stalk-root=missing i
|
||||
61 stalk-surface-above-ring=fibrous i
|
||||
62 stalk-surface-above-ring=scaly i
|
||||
63 stalk-surface-above-ring=silky i
|
||||
64 stalk-surface-above-ring=smooth i
|
||||
65 stalk-surface-below-ring=fibrous i
|
||||
66 stalk-surface-below-ring=scaly i
|
||||
67 stalk-surface-below-ring=silky i
|
||||
68 stalk-surface-below-ring=smooth i
|
||||
69 stalk-color-above-ring=brown i
|
||||
70 stalk-color-above-ring=buff i
|
||||
71 stalk-color-above-ring=cinnamon i
|
||||
72 stalk-color-above-ring=gray i
|
||||
73 stalk-color-above-ring=orange i
|
||||
74 stalk-color-above-ring=pink i
|
||||
75 stalk-color-above-ring=red i
|
||||
76 stalk-color-above-ring=white i
|
||||
77 stalk-color-above-ring=yellow i
|
||||
78 stalk-color-below-ring=brown i
|
||||
79 stalk-color-below-ring=buff i
|
||||
80 stalk-color-below-ring=cinnamon i
|
||||
81 stalk-color-below-ring=gray i
|
||||
82 stalk-color-below-ring=orange i
|
||||
83 stalk-color-below-ring=pink i
|
||||
84 stalk-color-below-ring=red i
|
||||
85 stalk-color-below-ring=white i
|
||||
86 stalk-color-below-ring=yellow i
|
||||
87 veil-type=partial i
|
||||
88 veil-type=universal i
|
||||
89 veil-color=brown i
|
||||
90 veil-color=orange i
|
||||
91 veil-color=white i
|
||||
92 veil-color=yellow i
|
||||
93 ring-number=none i
|
||||
94 ring-number=one i
|
||||
95 ring-number=two i
|
||||
96 ring-type=cobwebby i
|
||||
97 ring-type=evanescent i
|
||||
98 ring-type=flaring i
|
||||
99 ring-type=large i
|
||||
100 ring-type=none i
|
||||
101 ring-type=pendant i
|
||||
102 ring-type=sheathing i
|
||||
103 ring-type=zone i
|
||||
104 spore-print-color=black i
|
||||
105 spore-print-color=brown i
|
||||
106 spore-print-color=buff i
|
||||
107 spore-print-color=chocolate i
|
||||
108 spore-print-color=green i
|
||||
109 spore-print-color=orange i
|
||||
110 spore-print-color=purple i
|
||||
111 spore-print-color=white i
|
||||
112 spore-print-color=yellow i
|
||||
113 population=abundant i
|
||||
114 population=clustered i
|
||||
115 population=numerous i
|
||||
116 population=scattered i
|
||||
117 population=several i
|
||||
118 population=solitary i
|
||||
119 habitat=grasses i
|
||||
120 habitat=leaves i
|
||||
121 habitat=meadows i
|
||||
122 habitat=paths i
|
||||
123 habitat=urban i
|
||||
124 habitat=waste i
|
||||
125 habitat=woods i
|
||||
28
R-package/src/Makevars
Normal file
28
R-package/src/Makevars
Normal file
@ -0,0 +1,28 @@
|
||||
# _*_ mode: Makefile; _*_
|
||||
export CC = gcc
|
||||
export CXX = g++
|
||||
|
||||
# expose these flags to R CMD SHLIB
|
||||
PKG_CPPFLAGS = -O3 -Wno-unknown-pragmas -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS)
|
||||
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
|
||||
|
||||
ifeq ($(no_omp),1)
|
||||
PKG_CPPFLAGS += -DDISABLE_OPENMP
|
||||
endif
|
||||
|
||||
CXXOBJ= xgboost_wrapper.o xgboost_io.o
|
||||
OBJECTS= xgboost_R.o $(CXXOBJ)
|
||||
|
||||
.PHONY: all clean
|
||||
all: $(SHLIB)
|
||||
$(SHLIB): $(OBJECTS)
|
||||
|
||||
xgboost_wrapper.o: ../../wrapper/xgboost_wrapper.cpp
|
||||
xgboost_io.o: ../../src/io/io.cpp
|
||||
|
||||
$(CXXOBJ) :
|
||||
$(CXX) -c $(PKG_CPPFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
|
||||
|
||||
clean:
|
||||
rm -rf *.so *.o *~ *.dll
|
||||
|
||||
32
R-package/src/Makevars.win
Normal file
32
R-package/src/Makevars.win
Normal file
@ -0,0 +1,32 @@
|
||||
# _*_ mode: Makefile; _*_
|
||||
export CC = gcc
|
||||
export CXX = g++
|
||||
|
||||
# expose these flags to R CMD SHLIB
|
||||
PKG_CPPFLAGS = -O3 -Wno-unknown-pragmas -DXGBOOST_CUSTOMIZE_ERROR_ -fopenmp -fPIC $(SHLIB_OPENMP_CFLAGS)
|
||||
PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
|
||||
|
||||
# add flag to build native code even in cross compiler
|
||||
ifeq "$(WIN)" "64"
|
||||
PKG_CPPFLAGS += -m64
|
||||
endif
|
||||
|
||||
ifeq ($(no_omp),1)
|
||||
PKG_CPPFLAGS += -DDISABLE_OPENMP
|
||||
endif
|
||||
|
||||
CXXOBJ= xgboost_wrapper.o xgboost_io.o
|
||||
OBJECTS= xgboost_R.o $(CXXOBJ)
|
||||
|
||||
.PHONY: all clean
|
||||
all: $(SHLIB)
|
||||
$(SHLIB): $(OBJECTS)
|
||||
|
||||
xgboost_wrapper.o: ../../wrapper/xgboost_wrapper.cpp
|
||||
xgboost_io.o: ../../src/io/io.cpp
|
||||
|
||||
$(CXXOBJ) :
|
||||
$(CXX) -c $(PKG_CPPFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
|
||||
|
||||
clean:
|
||||
rm -rf *.so *.o *~ *.dll
|
||||
221
R-package/src/xgboost_R.cpp
Normal file
221
R-package/src/xgboost_R.cpp
Normal file
@ -0,0 +1,221 @@
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <cstring>
|
||||
#include "xgboost_R.h"
|
||||
#include "../../wrapper/xgboost_wrapper.h"
|
||||
#include "../../src/utils/utils.h"
|
||||
#include "../../src/utils/omp.h"
|
||||
#include "../../src/utils/matrix_csr.h"
|
||||
|
||||
using namespace xgboost;
|
||||
// implements error handling
|
||||
namespace xgboost {
|
||||
namespace utils {
|
||||
void HandleAssertError(const char *msg) {
|
||||
error("%s", msg);
|
||||
}
|
||||
void HandleCheckError(const char *msg) {
|
||||
error("%s", msg);
|
||||
}
|
||||
} // namespace utils
|
||||
} // namespace xgboost
|
||||
|
||||
extern "C" {
|
||||
void _DMatrixFinalizer(SEXP ext) {
|
||||
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||
XGDMatrixFree(R_ExternalPtrAddr(ext));
|
||||
R_ClearExternalPtr(ext);
|
||||
}
|
||||
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
|
||||
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
||||
SEXP missing) {
|
||||
SEXP dim = getAttrib(mat, R_DimSymbol);
|
||||
int nrow = INTEGER(dim)[0];
|
||||
int ncol = INTEGER(dim)[1];
|
||||
double *din = REAL(mat);
|
||||
std::vector<float> data(nrow * ncol);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (int i = 0; i < nrow; ++i) {
|
||||
for (int j = 0; j < ncol; ++j) {
|
||||
data[i * ncol +j] = din[i + nrow * j];
|
||||
}
|
||||
}
|
||||
void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing));
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
return ret;
|
||||
}
|
||||
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
|
||||
SEXP indices,
|
||||
SEXP data) {
|
||||
const int *col_ptr = INTEGER(indptr);
|
||||
const int *row_index = INTEGER(indices);
|
||||
const double *col_data = REAL(data);
|
||||
int ncol = length(indptr) - 1;
|
||||
int ndata = length(data);
|
||||
// transform into CSR format
|
||||
std::vector<bst_ulong> row_ptr;
|
||||
std::vector< std::pair<unsigned, float> > csr_data;
|
||||
utils::SparseCSRMBuilder<std::pair<unsigned,float>, false, bst_ulong> builder(row_ptr, csr_data);
|
||||
builder.InitBudget();
|
||||
for (int i = 0; i < ncol; ++i) {
|
||||
for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
|
||||
builder.AddBudget(row_index[j]);
|
||||
}
|
||||
}
|
||||
builder.InitStorage();
|
||||
for (int i = 0; i < ncol; ++i) {
|
||||
for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
|
||||
builder.PushElem(row_index[j], std::make_pair(i, col_data[j]));
|
||||
}
|
||||
}
|
||||
utils::Assert(csr_data.size() == static_cast<size_t>(ndata), "BUG CreateFromCSC");
|
||||
std::vector<float> row_data(ndata);
|
||||
std::vector<unsigned> col_index(ndata);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (int i = 0; i < ndata; ++i) {
|
||||
col_index[i] = csr_data[i].first;
|
||||
row_data[i] = csr_data[i].second;
|
||||
}
|
||||
void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata );
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
return ret;
|
||||
}
|
||||
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
|
||||
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(fname)), asInteger(silent));
|
||||
}
|
||||
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
|
||||
int len = length(array);
|
||||
const char *name = CHAR(asChar(field));
|
||||
if (!strcmp("group", name)) {
|
||||
std::vector<unsigned> vec(len);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (int i = 0; i < len; ++i) {
|
||||
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
|
||||
}
|
||||
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len);
|
||||
return;
|
||||
}
|
||||
{
|
||||
std::vector<float> vec(len);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (int i = 0; i < len; ++i) {
|
||||
vec[i] = REAL(array)[i];
|
||||
}
|
||||
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(field)),
|
||||
&vec[0], len);
|
||||
}
|
||||
}
|
||||
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
||||
bst_ulong olen;
|
||||
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(field)), &olen);
|
||||
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||
for (size_t i = 0; i < olen; ++i) {
|
||||
REAL(ret)[i] = res[i];
|
||||
}
|
||||
UNPROTECT(1);
|
||||
return ret;
|
||||
}
|
||||
// functions related to booster
|
||||
void _BoosterFinalizer(SEXP ext) {
|
||||
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||
XGBoosterFree(R_ExternalPtrAddr(ext));
|
||||
R_ClearExternalPtr(ext);
|
||||
}
|
||||
SEXP XGBoosterCreate_R(SEXP dmats) {
|
||||
int len = length(dmats);
|
||||
std::vector<void*> dvec;
|
||||
for (int i = 0; i < len; ++i){
|
||||
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
|
||||
}
|
||||
void *handle = XGBoosterCreate(&dvec[0], dvec.size());
|
||||
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
|
||||
UNPROTECT(1);
|
||||
return ret;
|
||||
}
|
||||
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
|
||||
XGBoosterSetParam(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(name)),
|
||||
CHAR(asChar(val)));
|
||||
}
|
||||
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
|
||||
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
|
||||
asInteger(iter),
|
||||
R_ExternalPtrAddr(dtrain));
|
||||
}
|
||||
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
|
||||
utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
|
||||
int len = length(grad);
|
||||
std::vector<float> tgrad(len), thess(len);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (int j = 0; j < len; ++j) {
|
||||
tgrad[j] = REAL(grad)[j];
|
||||
thess[j] = REAL(hess)[j];
|
||||
}
|
||||
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
|
||||
R_ExternalPtrAddr(dtrain),
|
||||
&tgrad[0], &thess[0], len);
|
||||
}
|
||||
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
|
||||
utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
|
||||
int len = length(dmats);
|
||||
std::vector<void*> vec_dmats;
|
||||
std::vector<std::string> vec_names;
|
||||
std::vector<const char*> vec_sptr;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
|
||||
vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
|
||||
}
|
||||
for (int i = 0; i < len; ++i) {
|
||||
vec_sptr.push_back(vec_names[i].c_str());
|
||||
}
|
||||
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
|
||||
asInteger(iter),
|
||||
&vec_dmats[0], &vec_sptr[0], len));
|
||||
}
|
||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
|
||||
bst_ulong olen;
|
||||
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
||||
R_ExternalPtrAddr(dmat),
|
||||
asInteger(output_margin),
|
||||
&olen);
|
||||
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||
for (size_t i = 0; i < olen; ++i) {
|
||||
REAL(ret)[i] = res[i];
|
||||
}
|
||||
UNPROTECT(1);
|
||||
return ret;
|
||||
}
|
||||
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
|
||||
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||
}
|
||||
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
|
||||
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||
}
|
||||
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
||||
bst_ulong olen;
|
||||
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(fmap)),
|
||||
&olen);
|
||||
FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
|
||||
for (size_t i = 0; i < olen; ++i) {
|
||||
fprintf(fo, "booster[%u]:\n", static_cast<unsigned>(i));
|
||||
fprintf(fo, "%s", res[i]);
|
||||
}
|
||||
fclose(fo);
|
||||
}
|
||||
}
|
||||
124
R-package/src/xgboost_R.h
Normal file
124
R-package/src/xgboost_R.h
Normal file
@ -0,0 +1,124 @@
|
||||
#ifndef XGBOOST_WRAPPER_R_H_
|
||||
#define XGBOOST_WRAPPER_R_H_
|
||||
/*!
|
||||
* \file xgboost_wrapper_R.h
|
||||
* \author Tianqi Chen
|
||||
* \brief R wrapper of xgboost
|
||||
*/
|
||||
extern "C" {
|
||||
#include <Rinternals.h>
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
/*!
|
||||
* \brief load a data matrix
|
||||
* \param fname name of the content
|
||||
* \param silent whether print messages
|
||||
* \return a loaded data matrix
|
||||
*/
|
||||
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
|
||||
/*!
|
||||
* \brief create matrix content from dense matrix
|
||||
* This assumes the matrix is stored in column major format
|
||||
* \param data R Matrix object
|
||||
* \param missing which value to represent missing value
|
||||
* \return created dmatrix
|
||||
*/
|
||||
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
||||
SEXP missing);
|
||||
/*!
|
||||
* \brief create a matrix content from CSC format
|
||||
* \param indptr pointer to column headers
|
||||
* \param indices row indices
|
||||
* \param data content of the data
|
||||
* \return created dmatrix
|
||||
*/
|
||||
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
|
||||
SEXP indices,
|
||||
SEXP data);
|
||||
/*!
|
||||
* \brief load a data matrix into binary file
|
||||
* \param handle a instance of data matrix
|
||||
* \param fname file name
|
||||
* \param silent print statistics when saving
|
||||
*/
|
||||
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
|
||||
/*!
|
||||
* \brief set information to dmatrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param field field name, can be label, weight
|
||||
* \param array pointer to float vector
|
||||
*/
|
||||
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array);
|
||||
/*!
|
||||
* \brief get info vector from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param field field name
|
||||
* \return info vector
|
||||
*/
|
||||
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
|
||||
/*!
|
||||
* \brief create xgboost learner
|
||||
* \param dmats a list of dmatrix handles that will be cached
|
||||
*/
|
||||
SEXP XGBoosterCreate_R(SEXP dmats);
|
||||
/*!
|
||||
* \brief set parameters
|
||||
* \param handle handle
|
||||
* \param name parameter name
|
||||
* \param val value of parameter
|
||||
*/
|
||||
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
|
||||
/*!
|
||||
* \brief update the model in one round using dtrain
|
||||
* \param handle handle
|
||||
* \param iter current iteration rounds
|
||||
* \param dtrain training data
|
||||
*/
|
||||
void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain);
|
||||
/*!
|
||||
* \brief update the model, by directly specify gradient and second order gradient,
|
||||
* this can be used to replace UpdateOneIter, to support customized loss function
|
||||
* \param handle handle
|
||||
* \param dtrain training data
|
||||
* \param grad gradient statistics
|
||||
* \param hess second order gradient statistics
|
||||
*/
|
||||
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess);
|
||||
/*!
|
||||
* \brief get evaluation statistics for xgboost
|
||||
* \param handle handle
|
||||
* \param iter current iteration rounds
|
||||
* \param dmats list of handles to dmatrices
|
||||
* \param evname name of evaluation
|
||||
* \return the string containing evaluation stati
|
||||
*/
|
||||
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames);
|
||||
/*!
|
||||
* \brief make prediction based on dmat
|
||||
* \param handle handle
|
||||
* \param dmat data matrix
|
||||
* \param output_margin whether only output raw margin value
|
||||
*/
|
||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
|
||||
/*!
|
||||
* \brief load model from existing file
|
||||
* \param handle handle
|
||||
* \param fname file name
|
||||
*/
|
||||
void XGBoosterLoadModel_R(SEXP handle, SEXP fname);
|
||||
/*!
|
||||
* \brief save model into existing file
|
||||
* \param handle handle
|
||||
* \param fname file name
|
||||
*/
|
||||
void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
|
||||
/*!
|
||||
* \brief dump model into text file
|
||||
* \param handle handle
|
||||
* \param fname file name of model that can be dumped into
|
||||
* \param fmap name to fmap can be empty string
|
||||
*/
|
||||
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap);
|
||||
};
|
||||
#endif // XGBOOST_WRAPPER_R_H_
|
||||
@ -6,13 +6,13 @@ objective = binary:logistic
|
||||
|
||||
# Tree Booster Parameters
|
||||
# step size shrinkage
|
||||
bst:eta = 1.0
|
||||
eta = 1.0
|
||||
# minimum loss reduction required to make a further partition
|
||||
bst:gamma = 1.0
|
||||
gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
bst:min_child_weight = 1
|
||||
min_child_weight = 1
|
||||
# maximum depth of a tree
|
||||
bst:max_depth = 3
|
||||
max_depth = 3
|
||||
|
||||
# Task Parameters
|
||||
# the number of round to do boosting
|
||||
|
||||
@ -42,8 +42,8 @@ param = {}
|
||||
param['objective'] = 'binary:logitraw'
|
||||
# scale weight of positive examples
|
||||
param['scale_pos_weight'] = sum_wneg/sum_wpos
|
||||
param['bst:eta'] = 0.1
|
||||
param['bst:max_depth'] = 6
|
||||
param['eta'] = 0.1
|
||||
param['max_depth'] = 6
|
||||
param['eval_metric'] = 'auc'
|
||||
param['silent'] = 1
|
||||
param['nthread'] = 16
|
||||
|
||||
@ -25,8 +25,8 @@ param = {}
|
||||
# use softmax multi-class classification
|
||||
param['objective'] = 'multi:softmax'
|
||||
# scale weight of positive examples
|
||||
param['bst:eta'] = 0.1
|
||||
param['bst:max_depth'] = 6
|
||||
param['eta'] = 0.1
|
||||
param['max_depth'] = 6
|
||||
param['silent'] = 1
|
||||
param['nthread'] = 4
|
||||
param['num_class'] = 6
|
||||
|
||||
@ -5,13 +5,13 @@ objective="rank:pairwise"
|
||||
|
||||
# Tree Booster Parameters
|
||||
# step size shrinkage
|
||||
bst:eta = 0.1
|
||||
eta = 0.1
|
||||
# minimum loss reduction required to make a further partition
|
||||
bst:gamma = 1.0
|
||||
gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
bst:min_child_weight = 0.1
|
||||
min_child_weight = 0.1
|
||||
# maximum depth of a tree
|
||||
bst:max_depth = 6
|
||||
max_depth = 6
|
||||
|
||||
# Task parameters
|
||||
# the number of round to do boosting
|
||||
|
||||
@ -7,13 +7,13 @@ objective = reg:linear
|
||||
|
||||
# Tree Booster Parameters
|
||||
# step size shrinkage
|
||||
bst:eta = 1.0
|
||||
eta = 1.0
|
||||
# minimum loss reduction required to make a further partition
|
||||
bst:gamma = 1.0
|
||||
gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
bst:min_child_weight = 1
|
||||
min_child_weight = 1
|
||||
# maximum depth of a tree
|
||||
bst:max_depth = 3
|
||||
max_depth = 3
|
||||
|
||||
# Task parameters
|
||||
# the number of round to do boosting
|
||||
|
||||
20
src/data.h
20
src/data.h
@ -12,6 +12,7 @@
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include "utils/io.h"
|
||||
#include "utils/omp.h"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/iterator.h"
|
||||
#include "utils/random.h"
|
||||
@ -44,6 +45,10 @@ struct bst_gpair {
|
||||
* these information are not necessarily presented, and can be empty
|
||||
*/
|
||||
struct BoosterInfo {
|
||||
/*! \brief number of rows in the data */
|
||||
size_t num_row;
|
||||
/*! \brief number of columns in the data */
|
||||
size_t num_col;
|
||||
/*!
|
||||
* \brief specified root index of each instance,
|
||||
* can be used for multi task setting
|
||||
@ -51,6 +56,9 @@ struct BoosterInfo {
|
||||
std::vector<unsigned> root_index;
|
||||
/*! \brief set fold indicator */
|
||||
std::vector<unsigned> fold_index;
|
||||
/*! \brief number of rows, number of columns */
|
||||
BoosterInfo(void) : num_row(0), num_col(0) {
|
||||
}
|
||||
/*! \brief get root of ith instance */
|
||||
inline unsigned GetRoot(size_t i) const {
|
||||
return root_index.size() == 0 ? 0 : root_index[i];
|
||||
@ -96,7 +104,7 @@ struct SparseBatch {
|
||||
const Entry *data_ptr;
|
||||
/*! \brief get i-th row from the batch */
|
||||
inline Inst operator[](size_t i) const {
|
||||
return Inst(data_ptr + row_ptr[i], row_ptr[i+1] - row_ptr[i]);
|
||||
return Inst(data_ptr + row_ptr[i], static_cast<bst_uint>(row_ptr[i+1] - row_ptr[i]));
|
||||
}
|
||||
};
|
||||
|
||||
@ -334,7 +342,7 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
|
||||
const SparseBatch &batch = iter_->Value();
|
||||
for (size_t i = 0; i < batch.size; ++i) {
|
||||
if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
|
||||
buffered_rowset_.push_back(batch.base_rowid+i);
|
||||
buffered_rowset_.push_back(static_cast<bst_uint>(batch.base_rowid+i));
|
||||
SparseBatch::Inst inst = batch[i];
|
||||
for (bst_uint j = 0; j < inst.length; ++j) {
|
||||
builder.AddBudget(inst[j].findex);
|
||||
@ -363,11 +371,11 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
|
||||
}
|
||||
|
||||
// sort columns
|
||||
unsigned ncol = static_cast<unsigned>(this->NumCol());
|
||||
bst_omp_uint ncol = static_cast<bst_omp_uint>(this->NumCol());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < ncol; ++i) {
|
||||
std::sort(&col_data_[col_ptr_[i]],
|
||||
&col_data_[col_ptr_[i + 1]], Entry::CmpValue);
|
||||
for (bst_omp_uint i = 0; i < ncol; ++i) {
|
||||
std::sort(&col_data_[0] + col_ptr_[i],
|
||||
&col_data_[0] + col_ptr_[i + 1], Entry::CmpValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -51,20 +51,21 @@ class GBLinear : public IGradBooster<FMatrix> {
|
||||
// for all the output group
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
const unsigned ndata = static_cast<unsigned>(rowset.size());
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
#pragma omp parallel for schedule(static) reduction(+: sum_grad, sum_hess)
|
||||
for (unsigned i = 0; i < ndata; ++i) {
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
bst_gpair &p = gpair[rowset[i] * ngroup + gid];
|
||||
if (p.hess >= 0.0f) {
|
||||
sum_grad += p.grad; sum_hess += p.hess;
|
||||
}
|
||||
}
|
||||
// remove bias effect
|
||||
double dw = param.learning_rate * param.CalcDeltaBias(sum_grad, sum_hess, model.bias()[gid]);
|
||||
bst_float dw = static_cast<bst_float>(
|
||||
param.learning_rate * param.CalcDeltaBias(sum_grad, sum_hess, model.bias()[gid]));
|
||||
model.bias()[gid] += dw;
|
||||
// update grad value
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < ndata; ++i) {
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
bst_gpair &p = gpair[rowset[i] * ngroup + gid];
|
||||
if (p.hess >= 0.0f) {
|
||||
p.grad += p.hess * dw;
|
||||
@ -72,9 +73,9 @@ class GBLinear : public IGradBooster<FMatrix> {
|
||||
}
|
||||
}
|
||||
// number of features
|
||||
const unsigned nfeat = static_cast<unsigned>(feat_index.size());
|
||||
const bst_omp_uint nfeat = static_cast<bst_omp_uint>(feat_index.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < nfeat; ++i) {
|
||||
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||
const bst_uint fid = feat_index[i];
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
@ -86,7 +87,7 @@ class GBLinear : public IGradBooster<FMatrix> {
|
||||
sum_hess += p.hess * v * v;
|
||||
}
|
||||
float &w = model[fid][gid];
|
||||
double dw = param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w);
|
||||
bst_float dw = static_cast<bst_float>(param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w));
|
||||
w += dw;
|
||||
// update grad value
|
||||
for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
|
||||
@ -116,9 +117,9 @@ class GBLinear : public IGradBooster<FMatrix> {
|
||||
// k is number of group
|
||||
preds.resize(preds.size() + batch.size * ngroup);
|
||||
// parallel over local batch
|
||||
const unsigned nsize = static_cast<unsigned>(batch.size);
|
||||
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < nsize; ++i) {
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
const size_t ridx = batch.base_rowid + i;
|
||||
// loop over output groups
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
|
||||
@ -94,8 +94,9 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
"must have exactly ngroup*nrow gpairs");
|
||||
std::vector<bst_gpair> tmp(gpair.size()/ngroup);
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (size_t i = 0; i < tmp.size(); ++i) {
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
tmp[i] = gpair[i * ngroup + gid];
|
||||
}
|
||||
this->BoostNewTrees(tmp, fmat, info, gid);
|
||||
@ -129,13 +130,13 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
// k is number of group
|
||||
preds.resize(preds.size() + batch.size * mparam.num_output_group);
|
||||
// parallel over local batch
|
||||
const unsigned nsize = static_cast<unsigned>(batch.size);
|
||||
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < nsize; ++i) {
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
const int tid = omp_get_thread_num();
|
||||
tree::RegTree::FVec &feats = thread_temp[tid];
|
||||
const size_t ridx = batch.base_rowid + i;
|
||||
const unsigned root_idx = info.GetRoot(i);
|
||||
int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
|
||||
const unsigned root_idx = info.GetRoot(ridx);
|
||||
// loop over output groups
|
||||
for (int gid = 0; gid < mparam.num_output_group; ++gid) {
|
||||
preds[ridx * mparam.num_output_group + gid] =
|
||||
@ -172,15 +173,15 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
}
|
||||
updaters.clear();
|
||||
std::string tval = tparam.updater_seq;
|
||||
char *saveptr, *pstr;
|
||||
pstr = strtok_r(&tval[0], ",", &saveptr);
|
||||
char *pstr;
|
||||
pstr = strtok(&tval[0], ",");
|
||||
while (pstr != NULL) {
|
||||
updaters.push_back(tree::CreateUpdater<FMatrix>(pstr));
|
||||
for (size_t j = 0; j < cfg.size(); ++j) {
|
||||
// set parameters
|
||||
updaters.back()->SetParam(cfg[j].first.c_str(), cfg[j].second.c_str());
|
||||
}
|
||||
pstr = strtok_r(NULL, ",", &saveptr);
|
||||
pstr = strtok(NULL, ",");
|
||||
}
|
||||
tparam.updater_initialized = 1;
|
||||
}
|
||||
@ -218,7 +219,7 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
tree::RegTree::FVec *p_feats) {
|
||||
size_t itop = 0;
|
||||
float psum = 0.0f;
|
||||
const int bid = mparam.BufferOffset(buffer_index, bst_group);
|
||||
const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
|
||||
// load buffered results if any
|
||||
if (bid >= 0) {
|
||||
itop = pred_counter[bid];
|
||||
@ -320,7 +321,7 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
* \brief get the buffer offset given a buffer index and group id
|
||||
* \return calculated buffer offset
|
||||
*/
|
||||
inline size_t BufferOffset(int64_t buffer_index, int bst_group) const {
|
||||
inline int64_t BufferOffset(int64_t buffer_index, int bst_group) const {
|
||||
if (buffer_index < 0) return -1;
|
||||
utils::Check(buffer_index < num_pbuffer, "buffer_index exceed num_pbuffer");
|
||||
return buffer_index + num_pbuffer * bst_group;
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
#include <string>
|
||||
#include "./io.h"
|
||||
#include "../utils/io.h"
|
||||
#include "../utils/utils.h"
|
||||
#include "simple_dmatrix-inl.hpp"
|
||||
// implements data loads using dmatrix simple for now
|
||||
@ -9,6 +10,19 @@
|
||||
namespace xgboost {
|
||||
namespace io {
|
||||
DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
|
||||
int magic;
|
||||
utils::FileStream fs(utils::FopenCheck(fname, "rb"));
|
||||
utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
|
||||
fs.Seek(0);
|
||||
|
||||
if (magic == DMatrixSimple::kMagic) {
|
||||
DMatrixSimple *dmat = new DMatrixSimple();
|
||||
dmat->LoadBinary(fs, silent, fname);
|
||||
fs.Close();
|
||||
return dmat;
|
||||
}
|
||||
fs.Close();
|
||||
|
||||
DMatrixSimple *dmat = new DMatrixSimple();
|
||||
dmat->CacheLoad(fname, silent, savebuffer);
|
||||
return dmat;
|
||||
|
||||
@ -62,10 +62,10 @@ class DMatrixSimple : public DataMatrix {
|
||||
inline size_t AddRow(const std::vector<SparseBatch::Entry> &feats) {
|
||||
for (size_t i = 0; i < feats.size(); ++i) {
|
||||
row_data_.push_back(feats[i]);
|
||||
info.num_col = std::max(info.num_col, static_cast<size_t>(feats[i].findex+1));
|
||||
info.info.num_col = std::max(info.info.num_col, static_cast<size_t>(feats[i].findex+1));
|
||||
}
|
||||
row_ptr_.push_back(row_ptr_.back() + feats.size());
|
||||
info.num_row += 1;
|
||||
info.info.num_row += 1;
|
||||
return row_ptr_.size() - 2;
|
||||
}
|
||||
/*!
|
||||
@ -99,19 +99,19 @@ class DMatrixSimple : public DataMatrix {
|
||||
|
||||
if (!silent) {
|
||||
printf("%lux%lu matrix with %lu entries is loaded from %s\n",
|
||||
info.num_row, info.num_col, row_data_.size(), fname);
|
||||
info.num_row(), info.num_col(), row_data_.size(), fname);
|
||||
}
|
||||
fclose(file);
|
||||
// try to load in additional file
|
||||
std::string name = fname;
|
||||
std::string gname = name + ".group";
|
||||
if (info.TryLoadGroup(gname.c_str(), silent)) {
|
||||
utils::Check(info.group_ptr.back() == info.num_row,
|
||||
utils::Check(info.group_ptr.back() == info.num_row(),
|
||||
"DMatrix: group data does not match the number of rows in features");
|
||||
}
|
||||
std::string wname = name + ".weight";
|
||||
if (info.TryLoadFloatInfo("weight", wname.c_str(), silent)) {
|
||||
utils::Check(info.weights.size() == info.num_row,
|
||||
utils::Check(info.weights.size() == info.num_row(),
|
||||
"DMatrix: weight data does not match the number of rows in features");
|
||||
}
|
||||
std::string mname = name + ".base_margin";
|
||||
@ -128,6 +128,17 @@ class DMatrixSimple : public DataMatrix {
|
||||
FILE *fp = fopen64(fname, "rb");
|
||||
if (fp == NULL) return false;
|
||||
utils::FileStream fs(fp);
|
||||
this->LoadBinary(fs, silent, fname);
|
||||
fs.Close();
|
||||
return true;
|
||||
}
|
||||
/*!
|
||||
* \brief load from binary stream
|
||||
* \param fs input file stream
|
||||
* \param silent whether print information during loading
|
||||
* \param fname file name, used to print message
|
||||
*/
|
||||
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
|
||||
int magic;
|
||||
utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
|
||||
utils::Check(magic == kMagic, "invalid format,magic number mismatch");
|
||||
@ -135,16 +146,19 @@ class DMatrixSimple : public DataMatrix {
|
||||
info.LoadBinary(fs);
|
||||
FMatrixS::LoadBinary(fs, &row_ptr_, &row_data_);
|
||||
fmat.LoadColAccess(fs);
|
||||
fs.Close();
|
||||
|
||||
if (!silent) {
|
||||
printf("%lux%lu matrix with %lu entries is loaded from %s\n",
|
||||
info.num_row, info.num_col, row_data_.size(), fname);
|
||||
printf("%lux%lu matrix with %lu entries is loaded",
|
||||
info.num_row(), info.num_col(), row_data_.size());
|
||||
if (fname != NULL) {
|
||||
printf(" from %s\n", fname);
|
||||
} else {
|
||||
printf("\n");
|
||||
}
|
||||
if (info.group_ptr.size() != 0) {
|
||||
printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/*!
|
||||
* \brief save to binary file
|
||||
@ -163,7 +177,7 @@ class DMatrixSimple : public DataMatrix {
|
||||
|
||||
if (!silent) {
|
||||
printf("%lux%lu matrix with %lu entries is saved to %s\n",
|
||||
info.num_row, info.num_col, row_data_.size(), fname);
|
||||
info.num_row(), info.num_col(), row_data_.size(), fname);
|
||||
if (info.group_ptr.size() != 0) {
|
||||
printf("data contains %lu groups\n", info.group_ptr.size()-1);
|
||||
}
|
||||
@ -179,7 +193,7 @@ class DMatrixSimple : public DataMatrix {
|
||||
* \param savebuffer whether do save binary buffer if it is text
|
||||
*/
|
||||
inline void CacheLoad(const char *fname, bool silent = false, bool savebuffer = true) {
|
||||
int len = strlen(fname);
|
||||
size_t len = strlen(fname);
|
||||
if (len > 8 && !strcmp(fname + len - 7, ".buffer")) {
|
||||
if (!this->LoadBinary(fname, silent)) {
|
||||
utils::Error("can not open file \"%s\"", fname);
|
||||
|
||||
@ -15,10 +15,12 @@ namespace learner {
|
||||
* \brief meta information needed in training, including label, weight
|
||||
*/
|
||||
struct MetaInfo {
|
||||
/*! \brief number of rows in the data */
|
||||
size_t num_row;
|
||||
/*! \brief number of columns in the data */
|
||||
size_t num_col;
|
||||
/*!
|
||||
* \brief information needed by booster
|
||||
* BoosterInfo does not implement save and load,
|
||||
* all serialization is done in MetaInfo
|
||||
*/
|
||||
BoosterInfo info;
|
||||
/*! \brief label of each instance */
|
||||
std::vector<float> labels;
|
||||
/*!
|
||||
@ -28,8 +30,6 @@ struct MetaInfo {
|
||||
std::vector<bst_uint> group_ptr;
|
||||
/*! \brief weights of each instance, optional */
|
||||
std::vector<float> weights;
|
||||
/*! \brief information needed by booster */
|
||||
BoosterInfo info;
|
||||
/*!
|
||||
* \brief initialized margins,
|
||||
* if specified, xgboost will start from this init margin
|
||||
@ -39,7 +39,15 @@ struct MetaInfo {
|
||||
/*! \brief version flag, used to check version of this info */
|
||||
static const int kVersion = 0;
|
||||
// constructor
|
||||
MetaInfo(void) : num_row(0), num_col(0) {}
|
||||
MetaInfo(void) {}
|
||||
/*! \return number of rows in dataset */
|
||||
inline size_t num_row(void) const {
|
||||
return info.num_row;
|
||||
}
|
||||
/*! \return number of columns in dataset */
|
||||
inline size_t num_col(void) const {
|
||||
return info.num_col;
|
||||
}
|
||||
/*! \brief clear all the information */
|
||||
inline void Clear(void) {
|
||||
labels.clear();
|
||||
@ -47,7 +55,7 @@ struct MetaInfo {
|
||||
weights.clear();
|
||||
info.root_index.clear();
|
||||
base_margin.clear();
|
||||
num_row = num_col = 0;
|
||||
info.num_row = info.num_col = 0;
|
||||
}
|
||||
/*! \brief get weight of each instances */
|
||||
inline float GetWeight(size_t i) const {
|
||||
@ -60,8 +68,8 @@ struct MetaInfo {
|
||||
inline void SaveBinary(utils::IStream &fo) const {
|
||||
int version = kVersion;
|
||||
fo.Write(&version, sizeof(version));
|
||||
fo.Write(&num_row, sizeof(num_row));
|
||||
fo.Write(&num_col, sizeof(num_col));
|
||||
fo.Write(&info.num_row, sizeof(info.num_row));
|
||||
fo.Write(&info.num_col, sizeof(info.num_col));
|
||||
fo.Write(labels);
|
||||
fo.Write(group_ptr);
|
||||
fo.Write(weights);
|
||||
@ -70,9 +78,9 @@ struct MetaInfo {
|
||||
}
|
||||
inline void LoadBinary(utils::IStream &fi) {
|
||||
int version;
|
||||
utils::Check(fi.Read(&version, sizeof(version)), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&num_row, sizeof(num_row)), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&num_col, sizeof(num_col)), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&info.num_col, sizeof(info.num_col)) != 0, "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
|
||||
@ -94,19 +102,28 @@ struct MetaInfo {
|
||||
fclose(fi);
|
||||
return true;
|
||||
}
|
||||
inline std::vector<float>& GetInfo(const char *field) {
|
||||
inline std::vector<float>& GetFloatInfo(const char *field) {
|
||||
if (!strcmp(field, "label")) return labels;
|
||||
if (!strcmp(field, "weight")) return weights;
|
||||
if (!strcmp(field, "base_margin")) return base_margin;
|
||||
utils::Error("unknown field %s", field);
|
||||
return labels;
|
||||
}
|
||||
inline const std::vector<float>& GetInfo(const char *field) const {
|
||||
return ((MetaInfo*)this)->GetInfo(field);
|
||||
inline const std::vector<float>& GetFloatInfo(const char *field) const {
|
||||
return ((MetaInfo*)this)->GetFloatInfo(field);
|
||||
}
|
||||
inline std::vector<unsigned> &GetUIntInfo(const char *field) {
|
||||
if (!strcmp(field, "root_index")) return info.root_index;
|
||||
if (!strcmp(field, "fold_index")) return info.fold_index;
|
||||
utils::Error("unknown field %s", field);
|
||||
return info.root_index;
|
||||
}
|
||||
inline const std::vector<unsigned> &GetUIntInfo(const char *field) const {
|
||||
return ((MetaInfo*)this)->GetUIntInfo(field);
|
||||
}
|
||||
// try to load weight information from file, if exists
|
||||
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
|
||||
std::vector<float> &weights = this->GetInfo(field);
|
||||
std::vector<float> &weights = this->GetFloatInfo(field);
|
||||
FILE *fi = fopen64(fname, "r");
|
||||
if (fi == NULL) return false;
|
||||
float wt;
|
||||
|
||||
@ -26,10 +26,10 @@ struct EvalEWiseBase : public IEvaluator {
|
||||
const MetaInfo &info) const {
|
||||
utils::Check(preds.size() == info.labels.size(),
|
||||
"label and prediction size not match");
|
||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||
float sum = 0.0, wsum = 0.0;
|
||||
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
|
||||
for (unsigned i = 0; i < ndata; ++i) {
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const float wt = info.GetWeight(i);
|
||||
sum += Derived::EvalRow(info.labels[i], preds[i]) * wt;
|
||||
wsum += wt;
|
||||
@ -109,12 +109,12 @@ struct EvalAMS : public IEvaluator {
|
||||
}
|
||||
virtual float Eval(const std::vector<float> &preds,
|
||||
const MetaInfo &info) const {
|
||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||
utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
|
||||
std::vector< std::pair<float, unsigned> > rec(ndata);
|
||||
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < ndata; ++i) {
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
rec[i] = std::make_pair(preds[i], i);
|
||||
}
|
||||
std::sort(rec.begin(), rec.end(), CmpFirst);
|
||||
@ -123,7 +123,7 @@ struct EvalAMS : public IEvaluator {
|
||||
const double br = 10.0;
|
||||
unsigned thresindex = 0;
|
||||
double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
|
||||
for (unsigned i = 0; i < ndata-1 && i < ntop; ++i) {
|
||||
for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) {
|
||||
const unsigned ridx = rec[i].second;
|
||||
const float wt = info.weights[ridx];
|
||||
if (info.labels[ridx] > 0.5f) {
|
||||
@ -132,7 +132,7 @@ struct EvalAMS : public IEvaluator {
|
||||
b_fp += wt;
|
||||
}
|
||||
if (rec[i].first != rec[i+1].first) {
|
||||
double ams = sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
|
||||
double ams = sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
|
||||
if (tams < ams) {
|
||||
thresindex = i;
|
||||
tams = ams;
|
||||
@ -141,9 +141,9 @@ struct EvalAMS : public IEvaluator {
|
||||
}
|
||||
if (ntop == ndata) {
|
||||
fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
|
||||
return tams;
|
||||
return static_cast<float>(tams);
|
||||
} else {
|
||||
return sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
|
||||
return static_cast<float>(sqrt(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp)));
|
||||
}
|
||||
}
|
||||
virtual const char *Name(void) const {
|
||||
@ -171,7 +171,7 @@ struct EvalPrecisionRatio : public IEvaluator{
|
||||
utils::Assert(preds.size() == info.labels.size(), "label size predict size not match");
|
||||
std::vector< std::pair<float, unsigned> > rec;
|
||||
for (size_t j = 0; j < preds.size(); ++j) {
|
||||
rec.push_back(std::make_pair(preds[j], j));
|
||||
rec.push_back(std::make_pair(preds[j], static_cast<unsigned>(j)));
|
||||
}
|
||||
std::sort(rec.begin(), rec.end(), CmpFirst);
|
||||
double pratio = CalcPRatio(rec, info);
|
||||
@ -207,11 +207,11 @@ struct EvalAuc : public IEvaluator {
|
||||
virtual float Eval(const std::vector<float> &preds,
|
||||
const MetaInfo &info) const {
|
||||
utils::Check(preds.size() == info.labels.size(), "label size predict size not match");
|
||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
|
||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(preds.size());
|
||||
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
||||
utils::Check(gptr.back() == preds.size(),
|
||||
"EvalAuc: group structure must match number of prediction");
|
||||
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
|
||||
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||
// sum statictis
|
||||
double sum_auc = 0.0f;
|
||||
#pragma omp parallel reduction(+:sum_auc)
|
||||
@ -219,7 +219,7 @@ struct EvalAuc : public IEvaluator {
|
||||
// each thread takes a local rec
|
||||
std::vector< std::pair<float, unsigned> > rec;
|
||||
#pragma omp for schedule(static)
|
||||
for (unsigned k = 0; k < ngroup; ++k) {
|
||||
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||
rec.clear();
|
||||
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
|
||||
rec.push_back(std::make_pair(preds[j], j));
|
||||
@ -264,12 +264,12 @@ struct EvalRankList : public IEvaluator {
|
||||
utils::Check(preds.size() == info.labels.size(),
|
||||
"label size predict size not match");
|
||||
// quick consistency when group is not available
|
||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
|
||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(preds.size());
|
||||
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
||||
utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
|
||||
utils::Assert(gptr.back() == preds.size(),
|
||||
"EvalRanklist: group structure must match number of prediction");
|
||||
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
|
||||
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||
// sum statistics
|
||||
double sum_metric = 0.0f;
|
||||
#pragma omp parallel reduction(+:sum_metric)
|
||||
@ -277,7 +277,7 @@ struct EvalRankList : public IEvaluator {
|
||||
// each thread takes a local rec
|
||||
std::vector< std::pair<float, unsigned> > rec;
|
||||
#pragma omp for schedule(static)
|
||||
for (unsigned k = 0; k < ngroup; ++k) {
|
||||
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||
rec.clear();
|
||||
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
|
||||
rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j])));
|
||||
@ -339,7 +339,7 @@ struct EvalNDCG : public EvalRankList{
|
||||
for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
|
||||
const unsigned rel = rec[i].second;
|
||||
if (rel != 0) {
|
||||
sumdcg += ((1 << rel) - 1) / logf(i + 2);
|
||||
sumdcg += ((1 << rel) - 1) / log(i + 2.0);
|
||||
}
|
||||
}
|
||||
return static_cast<float>(sumdcg);
|
||||
|
||||
@ -7,6 +7,7 @@
|
||||
*/
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
#include "../utils/utils.h"
|
||||
#include "./dmatrix.h"
|
||||
|
||||
|
||||
@ -58,9 +58,9 @@ class BoostLearner {
|
||||
if (dupilicate) continue;
|
||||
// set mats[i]'s cache learner pointer to this
|
||||
mats[i]->cache_learner_ptr_ = this;
|
||||
cache_.push_back(CacheEntry(mats[i], buffer_size, mats[i]->info.num_row));
|
||||
buffer_size += mats[i]->info.num_row;
|
||||
num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->info.num_col));
|
||||
cache_.push_back(CacheEntry(mats[i], buffer_size, mats[i]->info.num_row()));
|
||||
buffer_size += mats[i]->info.num_row();
|
||||
num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->info.num_col()));
|
||||
}
|
||||
char str_temp[25];
|
||||
if (num_feature > mparam.num_feature) {
|
||||
@ -79,6 +79,11 @@ class BoostLearner {
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
inline void SetParam(const char *name, const char *val) {
|
||||
// in this version, bst: prefix is no longer required
|
||||
if (strncmp(name, "bst:", 4) != 0) {
|
||||
std::string n = "bst:"; n += name;
|
||||
this->SetParam(n.c_str(), val);
|
||||
}
|
||||
if (!strcmp(name, "silent")) silent = atoi(val);
|
||||
if (!strcmp(name, "prob_buffer_row")) prob_buffer_row = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
|
||||
@ -91,7 +96,7 @@ class BoostLearner {
|
||||
if (!strcmp(name, "objective")) name_obj_ = val;
|
||||
if (!strcmp(name, "booster")) name_gbm_ = val;
|
||||
mparam.SetParam(name, val);
|
||||
}
|
||||
}
|
||||
if (gbm_ != NULL) gbm_->SetParam(name, val);
|
||||
if (obj_ != NULL) obj_->SetParam(name, val);
|
||||
if (gbm_ == NULL || obj_ == NULL) {
|
||||
@ -248,17 +253,17 @@ class BoostLearner {
|
||||
data.info.info, out_preds);
|
||||
// add base margin
|
||||
std::vector<float> &preds = *out_preds;
|
||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||
if (data.info.base_margin.size() != 0) {
|
||||
utils::Check(preds.size() == data.info.base_margin.size(),
|
||||
"base_margin.size does not match with prediction size");
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned j = 0; j < ndata; ++j) {
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
preds[j] += data.info.base_margin[j];
|
||||
}
|
||||
} else {
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned j = 0; j < ndata; ++j) {
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
preds[j] += mparam.base_score;
|
||||
}
|
||||
}
|
||||
@ -329,8 +334,8 @@ class BoostLearner {
|
||||
inline int64_t FindBufferOffset(const DMatrix<FMatrix> &mat) const {
|
||||
for (size_t i = 0; i < cache_.size(); ++i) {
|
||||
if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) {
|
||||
if (cache_[i].num_row_ == mat.info.num_row) {
|
||||
return cache_[i].buffer_offset_;
|
||||
if (cache_[i].num_row_ == mat.info.num_row()) {
|
||||
return static_cast<int64_t>(cache_[i].buffer_offset_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -116,9 +116,9 @@ class RegLossObj : public IObjFunction{
|
||||
gpair.resize(preds.size());
|
||||
// start calculating gradient
|
||||
const unsigned nstep = static_cast<unsigned>(info.labels.size());
|
||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < ndata; ++i) {
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const unsigned j = i % nstep;
|
||||
float p = loss.PredTransform(preds[i]);
|
||||
float w = info.GetWeight(j);
|
||||
@ -132,9 +132,9 @@ class RegLossObj : public IObjFunction{
|
||||
}
|
||||
virtual void PredTransform(std::vector<float> *io_preds) {
|
||||
std::vector<float> &preds = *io_preds;
|
||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned j = 0; j < ndata; ++j) {
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
preds[j] = loss.PredTransform(preds[j]);
|
||||
}
|
||||
}
|
||||
@ -169,12 +169,12 @@ class SoftmaxMultiClassObj : public IObjFunction {
|
||||
std::vector<bst_gpair> &gpair = *out_gpair;
|
||||
gpair.resize(preds.size());
|
||||
const unsigned nstep = static_cast<unsigned>(info.labels.size() * nclass);
|
||||
const unsigned ndata = static_cast<unsigned>(preds.size() / nclass);
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size() / nclass);
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<float> rec(nclass);
|
||||
#pragma omp for schedule(static)
|
||||
for (unsigned i = 0; i < ndata; ++i) {
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
for (int k = 0; k < nclass; ++k) {
|
||||
rec[k] = preds[i * nclass + k];
|
||||
}
|
||||
@ -210,18 +210,18 @@ class SoftmaxMultiClassObj : public IObjFunction {
|
||||
utils::Check(nclass != 0, "must set num_class to use softmax");
|
||||
std::vector<float> &preds = *io_preds;
|
||||
std::vector<float> tmp;
|
||||
const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size()/nclass);
|
||||
if (prob == 0) tmp.resize(ndata);
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<float> rec(nclass);
|
||||
#pragma omp for schedule(static)
|
||||
for (unsigned j = 0; j < ndata; ++j) {
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
for (int k = 0; k < nclass; ++k) {
|
||||
rec[k] = preds[j * nclass + k];
|
||||
}
|
||||
if (prob == 0) {
|
||||
tmp[j] = FindMaxIndex(rec);
|
||||
tmp[j] = static_cast<float>(FindMaxIndex(rec));
|
||||
} else {
|
||||
Softmax(&rec);
|
||||
for (int k = 0; k < nclass; ++k) {
|
||||
@ -259,11 +259,11 @@ class LambdaRankObj : public IObjFunction {
|
||||
std::vector<bst_gpair> &gpair = *out_gpair;
|
||||
gpair.resize(preds.size());
|
||||
// quick consistency when group is not available
|
||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = info.labels.size();
|
||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size());
|
||||
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
||||
utils::Check(gptr.size() != 0 && gptr.back() == info.labels.size(),
|
||||
"group structure not consistent with #rows");
|
||||
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
|
||||
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||
#pragma omp parallel
|
||||
{
|
||||
// parall construct, declare random number generator here, so that each
|
||||
@ -273,7 +273,7 @@ class LambdaRankObj : public IObjFunction {
|
||||
std::vector<ListEntry> lst;
|
||||
std::vector< std::pair<float, unsigned> > rec;
|
||||
#pragma omp for schedule(static)
|
||||
for (unsigned k = 0; k < ngroup; ++k) {
|
||||
for (bst_omp_uint k = 0; k < ngroup; ++k) {
|
||||
lst.clear(); pairs.clear();
|
||||
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
|
||||
lst.push_back(ListEntry(preds[j], info.labels[j], j));
|
||||
@ -290,7 +290,7 @@ class LambdaRankObj : public IObjFunction {
|
||||
unsigned j = i + 1;
|
||||
while (j < rec.size() && rec[j].first == rec[i].first) ++j;
|
||||
// bucket in [i,j), get a sample outside bucket
|
||||
unsigned nleft = i, nright = rec.size() - j;
|
||||
unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);
|
||||
if (nleft + nright != 0) {
|
||||
int nsample = num_pairsample;
|
||||
while (nsample --) {
|
||||
@ -436,9 +436,9 @@ class LambdaRankObjNDCG : public LambdaRankObj {
|
||||
inline static float CalcDCG(const std::vector<float> &labels) {
|
||||
double sumdcg = 0.0;
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
const unsigned rel = labels[i];
|
||||
const unsigned rel = static_cast<unsigned>(labels[i]);
|
||||
if (rel != 0) {
|
||||
sumdcg += ((1 << rel) - 1) / logf(i + 2);
|
||||
sumdcg += ((1 << rel) - 1) / logf(static_cast<float>(i + 2));
|
||||
}
|
||||
}
|
||||
return static_cast<float>(sumdcg);
|
||||
|
||||
@ -42,11 +42,17 @@ class TreeModel {
|
||||
int max_depth;
|
||||
/*! \brief number of features used for tree construction */
|
||||
int num_feature;
|
||||
/*!
|
||||
* \brief leaf vector size, used for vector tree
|
||||
* used to store more than one dimensional information in tree
|
||||
*/
|
||||
int size_leaf_vector;
|
||||
/*! \brief reserved part */
|
||||
int reserved[32];
|
||||
int reserved[31];
|
||||
/*! \brief constructor */
|
||||
Param(void) {
|
||||
max_depth = 0;
|
||||
size_leaf_vector = 0;
|
||||
memset(reserved, 0, sizeof(reserved));
|
||||
}
|
||||
/*!
|
||||
@ -57,6 +63,7 @@ class TreeModel {
|
||||
inline void SetParam(const char *name, const char *val) {
|
||||
if (!strcmp("num_roots", name)) num_roots = atoi(val);
|
||||
if (!strcmp("num_feature", name)) num_feature = atoi(val);
|
||||
if (!strcmp("size_leaf_vector", name)) size_leaf_vector = atoi(val);
|
||||
}
|
||||
};
|
||||
/*! \brief tree node */
|
||||
@ -166,10 +173,12 @@ class TreeModel {
|
||||
protected:
|
||||
// vector of nodes
|
||||
std::vector<Node> nodes;
|
||||
// stats of nodes
|
||||
std::vector<TNodeStat> stats;
|
||||
// free node space, used during training process
|
||||
std::vector<int> deleted_nodes;
|
||||
// stats of nodes
|
||||
std::vector<TNodeStat> stats;
|
||||
// leaf vector, that is used to store additional information
|
||||
std::vector<bst_float> leaf_vector;
|
||||
// allocate a new node,
|
||||
// !!!!!! NOTE: may cause BUG here, nodes.resize
|
||||
inline int AllocNode(void) {
|
||||
@ -184,6 +193,7 @@ class TreeModel {
|
||||
"number of nodes in the tree exceed 2^31");
|
||||
nodes.resize(param.num_nodes);
|
||||
stats.resize(param.num_nodes);
|
||||
leaf_vector.resize(param.num_nodes * param.size_leaf_vector);
|
||||
return nd;
|
||||
}
|
||||
// delete a tree node
|
||||
@ -247,6 +257,16 @@ class TreeModel {
|
||||
inline NodeStat &stat(int nid) {
|
||||
return stats[nid];
|
||||
}
|
||||
/*! \brief get leaf vector given nid */
|
||||
inline bst_float* leafvec(int nid) {
|
||||
if (leaf_vector.size() == 0) return NULL;
|
||||
return &leaf_vector[nid * param.size_leaf_vector];
|
||||
}
|
||||
/*! \brief get leaf vector given nid */
|
||||
inline const bst_float* leafvec(int nid) const{
|
||||
if (leaf_vector.size() == 0) return NULL;
|
||||
return &leaf_vector[nid * param.size_leaf_vector];
|
||||
}
|
||||
/*! \brief initialize the model */
|
||||
inline void InitModel(void) {
|
||||
param.num_nodes = param.num_roots;
|
||||
|
||||
105
src/tree/param.h
105
src/tree/param.h
@ -11,45 +11,6 @@
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
/*! \brief core statistics used for tree construction */
|
||||
struct GradStats {
|
||||
/*! \brief sum gradient statistics */
|
||||
double sum_grad;
|
||||
/*! \brief sum hessian statistics */
|
||||
double sum_hess;
|
||||
/*! \brief constructor */
|
||||
GradStats(void) {
|
||||
this->Clear();
|
||||
}
|
||||
/*! \brief clear the statistics */
|
||||
inline void Clear(void) {
|
||||
sum_grad = sum_hess = 0.0f;
|
||||
}
|
||||
/*! \brief add statistics to the data */
|
||||
inline void Add(double grad, double hess) {
|
||||
sum_grad += grad; sum_hess += hess;
|
||||
}
|
||||
/*! \brief add statistics to the data */
|
||||
inline void Add(const bst_gpair& b) {
|
||||
this->Add(b.grad, b.hess);
|
||||
}
|
||||
/*! \brief add statistics to the data */
|
||||
inline void Add(const GradStats &b) {
|
||||
this->Add(b.sum_grad, b.sum_hess);
|
||||
}
|
||||
/*! \brief substract the statistics by b */
|
||||
inline GradStats Substract(const GradStats &b) const {
|
||||
GradStats res;
|
||||
res.sum_grad = this->sum_grad - b.sum_grad;
|
||||
res.sum_hess = this->sum_hess - b.sum_hess;
|
||||
return res;
|
||||
}
|
||||
/*! \return whether the statistics is not used yet */
|
||||
inline bool Empty(void) const {
|
||||
return sum_hess == 0.0;
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief training parameters for regression tree */
|
||||
struct TrainParam{
|
||||
// learning step size for a time
|
||||
@ -106,7 +67,7 @@ struct TrainParam{
|
||||
if (!strcmp(name, "min_child_weight")) min_child_weight = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "min_split_loss")) min_split_loss = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "reg_lambda")) reg_lambda = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "reg_method")) reg_method = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "reg_method")) reg_method = atoi(val);
|
||||
if (!strcmp(name, "subsample")) subsample = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "colsample_bylevel")) colsample_bylevel = static_cast<float>(atof(val));
|
||||
if (!strcmp(name, "colsample_bytree")) colsample_bytree = static_cast<float>(atof(val));
|
||||
@ -165,13 +126,6 @@ struct TrainParam{
|
||||
inline bool cannot_split(double sum_hess, int depth) const {
|
||||
return sum_hess < this->min_child_weight * 2.0;
|
||||
}
|
||||
// code support for template data
|
||||
inline double CalcWeight(const GradStats &d) const {
|
||||
return this->CalcWeight(d.sum_grad, d.sum_hess);
|
||||
}
|
||||
inline double CalcGain(const GradStats &d) const {
|
||||
return this->CalcGain(d.sum_grad, d.sum_hess);
|
||||
}
|
||||
|
||||
protected:
|
||||
// functions for L1 cost
|
||||
@ -185,6 +139,63 @@ struct TrainParam{
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief core statistics used for tree construction */
|
||||
struct GradStats {
|
||||
/*! \brief sum gradient statistics */
|
||||
double sum_grad;
|
||||
/*! \brief sum hessian statistics */
|
||||
double sum_hess;
|
||||
/*! \brief constructor, the object must be cleared during construction */
|
||||
explicit GradStats(const TrainParam ¶m) {
|
||||
this->Clear();
|
||||
}
|
||||
/*! \brief clear the statistics */
|
||||
inline void Clear(void) {
|
||||
sum_grad = sum_hess = 0.0f;
|
||||
}
|
||||
/*!
|
||||
* \brief accumulate statistics,
|
||||
* \param gpair the vector storing the gradient statistics
|
||||
* \param info the additional information
|
||||
* \param ridx instance index of this instance
|
||||
*/
|
||||
inline void Add(const std::vector<bst_gpair> &gpair,
|
||||
const BoosterInfo &info,
|
||||
bst_uint ridx) {
|
||||
const bst_gpair &b = gpair[ridx];
|
||||
this->Add(b.grad, b.hess);
|
||||
}
|
||||
/*! \brief caculate leaf weight */
|
||||
inline double CalcWeight(const TrainParam ¶m) const {
|
||||
return param.CalcWeight(sum_grad, sum_hess);
|
||||
}
|
||||
/*! \brief calculate gain of the solution */
|
||||
inline double CalcGain(const TrainParam ¶m) const {
|
||||
return param.CalcGain(sum_grad, sum_hess);
|
||||
}
|
||||
/*! \brief add statistics to the data */
|
||||
inline void Add(const GradStats &b) {
|
||||
this->Add(b.sum_grad, b.sum_hess);
|
||||
}
|
||||
/*! \brief set current value to a - b */
|
||||
inline void SetSubstract(const GradStats &a, const GradStats &b) {
|
||||
sum_grad = a.sum_grad - b.sum_grad;
|
||||
sum_hess = a.sum_hess - b.sum_hess;
|
||||
}
|
||||
/*! \return whether the statistics is not used yet */
|
||||
inline bool Empty(void) const {
|
||||
return sum_hess == 0.0;
|
||||
}
|
||||
/*! \brief set leaf vector value based on statistics */
|
||||
inline void SetLeafVec(const TrainParam ¶m, bst_float *vec) const{
|
||||
}
|
||||
protected:
|
||||
/*! \brief add statistics to the data */
|
||||
inline void Add(double grad, double hess) {
|
||||
sum_grad += grad; sum_hess += hess;
|
||||
}
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief statistics that is helpful to store
|
||||
* and represent a split solution for the tree
|
||||
|
||||
@ -60,7 +60,7 @@ namespace tree {
|
||||
template<typename FMatrix>
|
||||
inline IUpdater<FMatrix>* CreateUpdater(const char *name) {
|
||||
if (!strcmp(name, "prune")) return new TreePruner<FMatrix>();
|
||||
if (!strcmp(name, "refresh")) return new TreeRefresher<FMatrix>();
|
||||
if (!strcmp(name, "refresh")) return new TreeRefresher<FMatrix, GradStats>();
|
||||
if (!strcmp(name, "grow_colmaker")) return new ColMaker<FMatrix, GradStats>();
|
||||
utils::Error("unknown updater:%s", name);
|
||||
return NULL;
|
||||
|
||||
@ -51,8 +51,8 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
/*! \brief current best solution */
|
||||
SplitEntry best;
|
||||
// constructor
|
||||
ThreadEntry(void) {
|
||||
stats.Clear();
|
||||
explicit ThreadEntry(const TrainParam ¶m)
|
||||
: stats(param) {
|
||||
}
|
||||
};
|
||||
struct NodeEntry {
|
||||
@ -65,8 +65,8 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
/*! \brief current best solution */
|
||||
SplitEntry best;
|
||||
// constructor
|
||||
NodeEntry(void) : root_gain(0.0f), weight(0.0f){
|
||||
stats.Clear();
|
||||
explicit NodeEntry(const TrainParam ¶m)
|
||||
: stats(param), root_gain(0.0f), weight(0.0f){
|
||||
}
|
||||
};
|
||||
// actual builder that runs the algorithm
|
||||
@ -80,13 +80,13 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
const BoosterInfo &info,
|
||||
RegTree *p_tree) {
|
||||
this->InitData(gpair, fmat, info.root_index, *p_tree);
|
||||
this->InitNewNode(qexpand, gpair, fmat, *p_tree);
|
||||
this->InitNewNode(qexpand, gpair, fmat, info, *p_tree);
|
||||
|
||||
for (int depth = 0; depth < param.max_depth; ++depth) {
|
||||
this->FindSplit(depth, this->qexpand, gpair, fmat, p_tree);
|
||||
this->FindSplit(depth, this->qexpand, gpair, fmat, info, p_tree);
|
||||
this->ResetPosition(this->qexpand, fmat, *p_tree);
|
||||
this->UpdateQueueExpand(*p_tree, &this->qexpand);
|
||||
this->InitNewNode(qexpand, gpair, fmat, *p_tree);
|
||||
this->InitNewNode(qexpand, gpair, fmat, info, *p_tree);
|
||||
// if nothing left to be expand, break
|
||||
if (qexpand.size() == 0) break;
|
||||
}
|
||||
@ -100,6 +100,7 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg;
|
||||
p_tree->stat(nid).base_weight = snode[nid].weight;
|
||||
p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess);
|
||||
snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid));
|
||||
}
|
||||
}
|
||||
|
||||
@ -175,34 +176,35 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
inline void InitNewNode(const std::vector<int> &qexpand,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
const FMatrix &fmat,
|
||||
const BoosterInfo &info,
|
||||
const RegTree &tree) {
|
||||
{// setup statistics space for each tree node
|
||||
for (size_t i = 0; i < stemp.size(); ++i) {
|
||||
stemp[i].resize(tree.param.num_nodes, ThreadEntry());
|
||||
stemp[i].resize(tree.param.num_nodes, ThreadEntry(param));
|
||||
}
|
||||
snode.resize(tree.param.num_nodes, NodeEntry());
|
||||
snode.resize(tree.param.num_nodes, NodeEntry(param));
|
||||
}
|
||||
const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
|
||||
// setup position
|
||||
const unsigned ndata = static_cast<unsigned>(rowset.size());
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < ndata; ++i) {
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
const int tid = omp_get_thread_num();
|
||||
if (position[ridx] < 0) continue;
|
||||
stemp[tid][position[ridx]].stats.Add(gpair[ridx]);
|
||||
stemp[tid][position[ridx]].stats.Add(gpair, info, ridx);
|
||||
}
|
||||
// sum the per thread statistics together
|
||||
for (size_t j = 0; j < qexpand.size(); ++j) {
|
||||
const int nid = qexpand[j];
|
||||
TStats stats; stats.Clear();
|
||||
TStats stats(param);
|
||||
for (size_t tid = 0; tid < stemp.size(); ++tid) {
|
||||
stats.Add(stemp[tid][nid].stats);
|
||||
}
|
||||
// update node statistics
|
||||
snode[nid].stats = stats;
|
||||
snode[nid].root_gain = param.CalcGain(stats);
|
||||
snode[nid].weight = param.CalcWeight(stats);
|
||||
snode[nid].root_gain = static_cast<float>(stats.CalcGain(param));
|
||||
snode[nid].weight = static_cast<float>(stats.CalcWeight(param));
|
||||
}
|
||||
}
|
||||
/*! \brief update queue expand add in new leaves */
|
||||
@ -223,12 +225,15 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
template<typename Iter>
|
||||
inline void EnumerateSplit(Iter it, unsigned fid,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
const BoosterInfo &info,
|
||||
std::vector<ThreadEntry> &temp,
|
||||
bool is_forward_search) {
|
||||
// clear all the temp statistics
|
||||
for (size_t j = 0; j < qexpand.size(); ++j) {
|
||||
temp[qexpand[j]].stats.Clear();
|
||||
}
|
||||
// left statistics
|
||||
TStats c(param);
|
||||
while (it.Next()) {
|
||||
const bst_uint ridx = it.rindex();
|
||||
const int nid = position[ridx];
|
||||
@ -239,19 +244,19 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
ThreadEntry &e = temp[nid];
|
||||
// test if first hit, this is fine, because we set 0 during init
|
||||
if (e.stats.Empty()) {
|
||||
e.stats.Add(gpair[ridx]);
|
||||
e.stats.Add(gpair, info, ridx);
|
||||
e.last_fvalue = fvalue;
|
||||
} else {
|
||||
// try to find a split
|
||||
if (fabsf(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
|
||||
TStats c = snode[nid].stats.Substract(e.stats);
|
||||
c.SetSubstract(snode[nid].stats, e.stats);
|
||||
if (c.sum_hess >= param.min_child_weight) {
|
||||
double loss_chg = param.CalcGain(e.stats) + param.CalcGain(c) - snode[nid].root_gain;
|
||||
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
|
||||
e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, !is_forward_search);
|
||||
}
|
||||
}
|
||||
// update the statistics
|
||||
e.stats.Add(gpair[ridx]);
|
||||
e.stats.Add(gpair, info, ridx);
|
||||
e.last_fvalue = fvalue;
|
||||
}
|
||||
}
|
||||
@ -259,9 +264,9 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
ThreadEntry &e = temp[nid];
|
||||
TStats c = snode[nid].stats.Substract(e.stats);
|
||||
c.SetSubstract(snode[nid].stats, e.stats);
|
||||
if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
|
||||
const double loss_chg = param.CalcGain(e.stats) + param.CalcGain(c) - snode[nid].root_gain;
|
||||
bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
|
||||
const float delta = is_forward_search ? rt_eps : -rt_eps;
|
||||
e.best.Update(loss_chg, fid, e.last_fvalue + delta, !is_forward_search);
|
||||
}
|
||||
@ -269,7 +274,9 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
}
|
||||
// find splits at current level, do split per level
|
||||
inline void FindSplit(int depth, const std::vector<int> &qexpand,
|
||||
const std::vector<bst_gpair> &gpair, const FMatrix &fmat,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
const FMatrix &fmat,
|
||||
const BoosterInfo &info,
|
||||
RegTree *p_tree) {
|
||||
std::vector<unsigned> feat_set = feat_index;
|
||||
if (param.colsample_bylevel != 1.0f) {
|
||||
@ -279,19 +286,19 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
feat_set.resize(n);
|
||||
}
|
||||
// start enumeration
|
||||
const unsigned nsize = static_cast<unsigned>(feat_set.size());
|
||||
const bst_omp_uint nsize = static_cast<bst_omp_uint>(feat_set.size());
|
||||
#if defined(_OPENMP)
|
||||
const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
|
||||
#endif
|
||||
#pragma omp parallel for schedule(dynamic, batch_size)
|
||||
for (unsigned i = 0; i < nsize; ++i) {
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
const unsigned fid = feat_set[i];
|
||||
const int tid = omp_get_thread_num();
|
||||
if (param.need_forward_search(fmat.GetColDensity(fid))) {
|
||||
this->EnumerateSplit(fmat.GetSortedCol(fid), fid, gpair, stemp[tid], true);
|
||||
this->EnumerateSplit(fmat.GetSortedCol(fid), fid, gpair, info, stemp[tid], true);
|
||||
}
|
||||
if (param.need_backward_search(fmat.GetColDensity(fid))) {
|
||||
this->EnumerateSplit(fmat.GetReverseSortedCol(fid), fid, gpair, stemp[tid], false);
|
||||
this->EnumerateSplit(fmat.GetReverseSortedCol(fid), fid, gpair, info, stemp[tid], false);
|
||||
}
|
||||
}
|
||||
// after this each thread's stemp will get the best candidates, aggregate results
|
||||
@ -314,9 +321,9 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
inline void ResetPosition(const std::vector<int> &qexpand, const FMatrix &fmat, const RegTree &tree) {
|
||||
const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
|
||||
// step 1, set default direct nodes to default, and leaf nodes to -1
|
||||
const unsigned ndata = static_cast<unsigned>(rowset.size());
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < ndata; ++i) {
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
const int nid = position[ridx];
|
||||
if (nid >= 0) {
|
||||
@ -337,9 +344,9 @@ class ColMaker: public IUpdater<FMatrix> {
|
||||
std::sort(fsplits.begin(), fsplits.end());
|
||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||
// start put things into right place
|
||||
const unsigned nfeats = static_cast<unsigned>(fsplits.size());
|
||||
const bst_omp_uint nfeats = static_cast<bst_omp_uint>(fsplits.size());
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (unsigned i = 0; i < nfeats; ++i) {
|
||||
for (bst_omp_uint i = 0; i < nfeats; ++i) {
|
||||
const unsigned fid = fsplits[i];
|
||||
for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
|
||||
const bst_uint ridx = it.rindex();
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
/*! \brief pruner that prunes a tree after growing finishs */
|
||||
template<typename FMatrix>
|
||||
template<typename FMatrix, typename TStats>
|
||||
class TreeRefresher: public IUpdater<FMatrix> {
|
||||
public:
|
||||
virtual ~TreeRefresher(void) {}
|
||||
@ -30,7 +30,7 @@ class TreeRefresher: public IUpdater<FMatrix> {
|
||||
// number of threads
|
||||
int nthread;
|
||||
// thread temporal space
|
||||
std::vector< std::vector<GradStats> > stemp;
|
||||
std::vector< std::vector<TStats> > stemp;
|
||||
std::vector<RegTree::FVec> fvec_temp;
|
||||
// setup temp space for each thread
|
||||
#pragma omp parallel
|
||||
@ -38,14 +38,14 @@ class TreeRefresher: public IUpdater<FMatrix> {
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
fvec_temp.resize(nthread, RegTree::FVec());
|
||||
stemp.resize(trees.size() * nthread, std::vector<GradStats>());
|
||||
stemp.resize(trees.size() * nthread, std::vector<TStats>());
|
||||
#pragma omp parallel
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
for (size_t i = 0; i < trees.size(); ++i) {
|
||||
std::vector<GradStats> &vec = stemp[tid * trees.size() + i];
|
||||
vec.resize(trees[i]->param.num_nodes);
|
||||
std::fill(vec.begin(), vec.end(), GradStats());
|
||||
std::vector<TStats> &vec = stemp[tid * trees.size() + i];
|
||||
vec.resize(trees[i]->param.num_nodes, TStats(param));
|
||||
std::fill(vec.begin(), vec.end(), TStats(param));
|
||||
}
|
||||
fvec_temp[tid].Init(trees[0]->param.num_feature);
|
||||
}
|
||||
@ -56,17 +56,16 @@ class TreeRefresher: public IUpdater<FMatrix> {
|
||||
const SparseBatch &batch = iter->Value();
|
||||
utils::Check(batch.size < std::numeric_limits<unsigned>::max(),
|
||||
"too large batch size ");
|
||||
const unsigned nbatch = static_cast<unsigned>(batch.size);
|
||||
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned i = 0; i < nbatch; ++i) {
|
||||
for (bst_omp_uint i = 0; i < nbatch; ++i) {
|
||||
SparseBatch::Inst inst = batch[i];
|
||||
const int tid = omp_get_thread_num();
|
||||
const size_t ridx = batch.base_rowid + i;
|
||||
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||
RegTree::FVec &feats = fvec_temp[tid];
|
||||
feats.Fill(inst);
|
||||
for (size_t j = 0; j < trees.size(); ++j) {
|
||||
AddStats(*trees[j], feats, gpair[ridx],
|
||||
info.GetRoot(j),
|
||||
AddStats(*trees[j], feats, gpair, info, ridx,
|
||||
&stemp[tid * trees.size() + j]);
|
||||
}
|
||||
feats.Drop(inst);
|
||||
@ -95,31 +94,34 @@ class TreeRefresher: public IUpdater<FMatrix> {
|
||||
private:
|
||||
inline static void AddStats(const RegTree &tree,
|
||||
const RegTree::FVec &feat,
|
||||
const bst_gpair &gpair, unsigned root_id,
|
||||
std::vector<GradStats> *p_gstats) {
|
||||
std::vector<GradStats> &gstats = *p_gstats;
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
const BoosterInfo &info,
|
||||
const bst_uint ridx,
|
||||
std::vector<TStats> *p_gstats) {
|
||||
std::vector<TStats> &gstats = *p_gstats;
|
||||
// start from groups that belongs to current data
|
||||
int pid = static_cast<int>(root_id);
|
||||
gstats[pid].Add(gpair);
|
||||
int pid = static_cast<int>(info.GetRoot(ridx));
|
||||
gstats[pid].Add(gpair, info, ridx);
|
||||
// tranverse tree
|
||||
while (!tree[pid].is_leaf()) {
|
||||
unsigned split_index = tree[pid].split_index();
|
||||
pid = tree.GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
|
||||
gstats[pid].Add(gpair);
|
||||
gstats[pid].Add(gpair, info, ridx);
|
||||
}
|
||||
}
|
||||
inline void Refresh(const std::vector<GradStats> &gstats,
|
||||
inline void Refresh(const std::vector<TStats> &gstats,
|
||||
int nid, RegTree *p_tree) {
|
||||
RegTree &tree = *p_tree;
|
||||
tree.stat(nid).base_weight = param.CalcWeight(gstats[nid]);
|
||||
tree.stat(nid).base_weight = static_cast<float>(gstats[nid].CalcWeight(param));
|
||||
tree.stat(nid).sum_hess = static_cast<float>(gstats[nid].sum_hess);
|
||||
gstats[nid].SetLeafVec(param, tree.leafvec(nid));
|
||||
if (tree[nid].is_leaf()) {
|
||||
tree[nid].set_leaf(tree.stat(nid).base_weight * param.learning_rate);
|
||||
} else {
|
||||
tree.stat(nid).loss_chg =
|
||||
param.CalcGain(gstats[tree[nid].cleft()]) +
|
||||
param.CalcGain(gstats[tree[nid].cright()]) -
|
||||
param.CalcGain(gstats[nid]);
|
||||
tree.stat(nid).loss_chg = static_cast<float>(
|
||||
gstats[tree[nid].cleft()].CalcGain(param) +
|
||||
gstats[tree[nid].cright()].CalcGain(param) -
|
||||
gstats[nid].CalcGain(param));
|
||||
this->Refresh(gstats, tree[nid].cleft(), p_tree);
|
||||
this->Refresh(gstats, tree[nid].cright(), p_tree);
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@ class IStream {
|
||||
*/
|
||||
template<typename T>
|
||||
inline void Write(const std::vector<T> &vec) {
|
||||
uint64_t sz = vec.size();
|
||||
uint64_t sz = static_cast<uint64_t>(vec.size());
|
||||
this->Write(&sz, sizeof(sz));
|
||||
if (sz != 0) {
|
||||
this->Write(&vec[0], sizeof(T) * sz);
|
||||
@ -66,7 +66,7 @@ class IStream {
|
||||
* \param str the string to be serialized
|
||||
*/
|
||||
inline void Write(const std::string &str) {
|
||||
uint64_t sz = str.length();
|
||||
uint64_t sz = static_cast<uint64_t>(str.length());
|
||||
this->Write(&sz, sizeof(sz));
|
||||
if (sz != 0) {
|
||||
this->Write(&str[0], sizeof(char) * sz);
|
||||
@ -102,6 +102,9 @@ class FileStream : public IStream {
|
||||
virtual void Write(const void *ptr, size_t size) {
|
||||
fwrite(ptr, size, 1, fp);
|
||||
}
|
||||
inline void Seek(size_t pos) {
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
}
|
||||
inline void Close(void) {
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
@ -17,26 +17,26 @@ namespace utils {
|
||||
* \tparam IndexType type of index used to store the index position, usually unsigned or size_t
|
||||
* \tparam whether enabling the usage of aclist, this option must be enabled manually
|
||||
*/
|
||||
template<typename IndexType, bool UseAcList = false>
|
||||
template<typename IndexType, bool UseAcList = false, typename SizeType = size_t>
|
||||
struct SparseCSRMBuilder {
|
||||
private:
|
||||
/*! \brief dummy variable used in the indicator matrix construction */
|
||||
std::vector<size_t> dummy_aclist;
|
||||
/*! \brief pointer to each of the row */
|
||||
std::vector<size_t> &rptr;
|
||||
std::vector<SizeType> &rptr;
|
||||
/*! \brief index of nonzero entries in each row */
|
||||
std::vector<IndexType> &findex;
|
||||
/*! \brief a list of active rows, used when many rows are empty */
|
||||
std::vector<size_t> &aclist;
|
||||
|
||||
public:
|
||||
SparseCSRMBuilder(std::vector<size_t> &p_rptr,
|
||||
SparseCSRMBuilder(std::vector<SizeType> &p_rptr,
|
||||
std::vector<IndexType> &p_findex)
|
||||
:rptr(p_rptr), findex(p_findex), aclist(dummy_aclist) {
|
||||
Assert(!UseAcList, "enabling bug");
|
||||
}
|
||||
/*! \brief use with caution! rptr must be cleaned before use */
|
||||
SparseCSRMBuilder(std::vector<size_t> &p_rptr,
|
||||
SparseCSRMBuilder(std::vector<SizeType> &p_rptr,
|
||||
std::vector<IndexType> &p_findex,
|
||||
std::vector<size_t> &p_aclist)
|
||||
:rptr(p_rptr), findex(p_findex), aclist(p_aclist) {
|
||||
@ -62,7 +62,7 @@ struct SparseCSRMBuilder {
|
||||
* \param row_id the id of the row
|
||||
* \param nelem number of element budget add to this row
|
||||
*/
|
||||
inline void AddBudget(size_t row_id, size_t nelem = 1) {
|
||||
inline void AddBudget(size_t row_id, SizeType nelem = 1) {
|
||||
if (rptr.size() < row_id + 2) {
|
||||
rptr.resize(row_id + 2, 0);
|
||||
}
|
||||
@ -101,7 +101,7 @@ struct SparseCSRMBuilder {
|
||||
* element to each row, the number of calls shall be exactly same as add_budget
|
||||
*/
|
||||
inline void PushElem(size_t row_id, IndexType col_id) {
|
||||
size_t &rp = rptr[row_id + 1];
|
||||
SizeType &rp = rptr[row_id + 1];
|
||||
findex[rp++] = col_id;
|
||||
}
|
||||
/*!
|
||||
|
||||
@ -9,10 +9,26 @@
|
||||
#include <omp.h>
|
||||
#else
|
||||
#ifndef DISABLE_OPENMP
|
||||
#warning "OpenMP is not available, compile to single thread code"
|
||||
#ifndef _MSC_VER
|
||||
#warning "OpenMP is not available, compile to single thread code."\
|
||||
"You may want to ungrade your compiler to enable OpenMP support,"\
|
||||
"to get benefit of multi-threading."
|
||||
#else
|
||||
// TODO add warning for msvc
|
||||
#endif
|
||||
#endif
|
||||
inline int omp_get_thread_num() { return 0; }
|
||||
inline int omp_get_num_threads() { return 1; }
|
||||
inline void omp_set_num_threads(int nthread) {}
|
||||
#endif
|
||||
|
||||
// loop variable used in openmp
|
||||
namespace xgboost {
|
||||
#ifdef _MSC_VER
|
||||
typedef int bst_omp_uint;
|
||||
#else
|
||||
typedef unsigned bst_omp_uint;
|
||||
#endif
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_UTILS_OMP_H_
|
||||
|
||||
@ -88,11 +88,21 @@ inline void Shuffle(std::vector<T> &data) {
|
||||
struct Random{
|
||||
/*! \brief set random number seed */
|
||||
inline void Seed(unsigned sd) {
|
||||
this->rseed = sd;
|
||||
this->rseed = sd;
|
||||
#if defined(_MSC_VER)||defined(_WIN32)
|
||||
srand(rseed);
|
||||
#endif
|
||||
}
|
||||
/*! \brief return a real number uniform in [0,1) */
|
||||
inline double RandDouble(void) {
|
||||
return static_cast<double>( rand_r( &rseed ) ) / (static_cast<double>( RAND_MAX )+1.0);
|
||||
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
|
||||
// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
|
||||
// todo, replace with another PRNG
|
||||
#if defined(_MSC_VER)||defined(_WIN32)
|
||||
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX) + 1.0);
|
||||
#else
|
||||
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
|
||||
#endif
|
||||
}
|
||||
// random number seed
|
||||
unsigned rseed;
|
||||
|
||||
@ -6,8 +6,15 @@
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#include <cstdio>
|
||||
#include <cstdarg>
|
||||
#include <cstdlib>
|
||||
#ifdef _MSC_VER
|
||||
#define fopen64 fopen
|
||||
// NOTE: sprintf_s is not equivalent to snprintf,
|
||||
// they are equivalent when success, which is sufficient for our case
|
||||
#define snprintf sprintf_s
|
||||
#define vsnprintf vsprintf_s
|
||||
#else
|
||||
#ifdef _FILE_OFFSET_BITS
|
||||
#if _FILE_OFFSET_BITS == 32
|
||||
@ -36,49 +43,68 @@ typedef long int64_t;
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdarg>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace xgboost {
|
||||
/*! \brief namespace for helper utils of the project */
|
||||
namespace utils {
|
||||
/*! \brief error message buffer length */
|
||||
const int kErrorBuffer = 1 << 12;
|
||||
|
||||
#ifndef XGBOOST_CUSTOMIZE_ERROR_
|
||||
/*!
|
||||
* \brief handling of Assert error, caused by in-apropriate input
|
||||
* \param msg error message
|
||||
*/
|
||||
inline void HandleAssertError(const char *msg) {
|
||||
fprintf(stderr, "AssertError:%s\n", msg);
|
||||
exit(-1);
|
||||
}
|
||||
/*!
|
||||
* \brief handling of Check error, caused by in-apropriate input
|
||||
* \param msg error message
|
||||
*/
|
||||
inline void HandleCheckError(const char *msg) {
|
||||
fprintf(stderr, "%s\n", msg);
|
||||
exit(-1);
|
||||
}
|
||||
#else
|
||||
// include declarations, some one must implement this
|
||||
void HandleAssertError(const char *msg);
|
||||
void HandleCheckError(const char *msg);
|
||||
#endif
|
||||
|
||||
/*! \brief assert an condition is true, use this to handle debug information */
|
||||
inline void Assert(bool exp, const char *fmt, ...) {
|
||||
if (!exp) {
|
||||
std::string msg(kErrorBuffer, '\0');
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
fprintf(stderr, "AssertError:");
|
||||
vfprintf(stderr, fmt, args);
|
||||
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
|
||||
va_end(args);
|
||||
fprintf(stderr, "\n");
|
||||
exit(-1);
|
||||
HandleAssertError(msg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
/*!\brief same as assert, but this is intended to be used as message for user*/
|
||||
inline void Check(bool exp, const char *fmt, ...) {
|
||||
if (!exp) {
|
||||
std::string msg(kErrorBuffer, '\0');
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vfprintf(stderr, fmt, args);
|
||||
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
|
||||
va_end(args);
|
||||
fprintf(stderr, "\n");
|
||||
exit(-1);
|
||||
HandleCheckError(msg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
/*! \brief report error message, same as check */
|
||||
inline void Error(const char *fmt, ...) {
|
||||
{
|
||||
std::string msg(kErrorBuffer, '\0');
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vfprintf(stderr, fmt, args);
|
||||
vsnprintf(&msg[0], kErrorBuffer, fmt, args);
|
||||
va_end(args);
|
||||
fprintf(stderr, "\n");
|
||||
exit(-1);
|
||||
HandleCheckError(msg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
1
windows/README.md
Normal file
1
windows/README.md
Normal file
@ -0,0 +1 @@
|
||||
This is a test for minimal files needed for windows version
|
||||
26
windows/xgboost.sln
Normal file
26
windows/xgboost.sln
Normal file
@ -0,0 +1,26 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual Studio 2010
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xgboost", "xgboost\xgboost.vcxproj", "{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Debug|x64 = Debug|x64
|
||||
Release|Win32 = Release|Win32
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Debug|x64.Build.0 = Debug|x64
|
||||
{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Release|Win32.Build.0 = Release|Win32
|
||||
{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Release|x64.ActiveCfg = Release|x64
|
||||
{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
117
windows/xgboost/xgboost.vcxproj
Normal file
117
windows/xgboost/xgboost.vcxproj
Normal file
@ -0,0 +1,117 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{1D6A56A5-5557-4D20-9D50-3DE4C30BE00C}</ProjectGuid>
|
||||
<RootNamespace>xgboost</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<OpenMPSupport>true</OpenMPSupport>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\src\io\io.cpp" />
|
||||
<ClCompile Include="..\..\src\xgboost_main.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
@ -35,7 +35,7 @@ bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
# make prediction
|
||||
preds <- xgb.predict(bst, dtest)
|
||||
labels <- xgb.getinfo(dtest, "label")
|
||||
err <- as.real(sum(as.integer(preds > 0.5) != labels)) / length(labels)
|
||||
err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
|
||||
# print error rate
|
||||
print(paste("error=",err))
|
||||
|
||||
@ -100,7 +100,7 @@ logregobj <- function(preds, dtrain) {
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror <- function(preds, dtrain) {
|
||||
labels <- xgb.getinfo(dtrain, "label")
|
||||
err <- as.real(sum(labels != (preds > 0.0))) / length(labels)
|
||||
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||
return(list(metric="error", value=err))
|
||||
}
|
||||
|
||||
|
||||
@ -13,7 +13,7 @@ dtrain = xgb.DMatrix('agaricus.txt.train')
|
||||
dtest = xgb.DMatrix('agaricus.txt.test')
|
||||
|
||||
# specify parameters via map, definition are same as c++ version
|
||||
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||
|
||||
# specify validations set to watch performance
|
||||
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||
@ -75,7 +75,7 @@ print ('start running example to used cutomized objective function')
|
||||
# note: for customized objective function, we leave objective as default
|
||||
# note: what we are getting is margin value in prediction
|
||||
# you must know what you are doing
|
||||
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1 }
|
||||
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
@ -107,7 +107,7 @@ bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
|
||||
#
|
||||
print ('start running example to start from a initial prediction')
|
||||
# specify parameters via map, definition are same as c++ version
|
||||
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||
# train xgboost for 1 round
|
||||
bst = xgb.train( param, dtrain, 1, evallist )
|
||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||
|
||||
@ -48,15 +48,15 @@ xgb.setinfo <- function(dmat, name, info) {
|
||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||
}
|
||||
if (name == "label") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.real(info))
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info))
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "weight") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.real(info))
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info))
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "base_margin") {
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.real(info))
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info))
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "group") {
|
||||
@ -214,7 +214,7 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
|
||||
if (length(names(w)) == 0) {
|
||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||
}
|
||||
evnames <- append(evnames, names(w))
|
||||
evnames <- append(evnames, names(w))
|
||||
}
|
||||
}
|
||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames)
|
||||
|
||||
@ -19,6 +19,7 @@ xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
|
||||
xglib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
|
||||
xglib.XGDMatrixGetUIntInfo.restype = ctypes.POINTER(ctypes.c_uint)
|
||||
xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
|
||||
|
||||
xglib.XGBoosterCreate.restype = ctypes.c_void_p
|
||||
@ -27,10 +28,10 @@ xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
|
||||
xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
|
||||
|
||||
|
||||
def ctypes2numpy(cptr, length):
|
||||
def ctypes2numpy(cptr, length, dtype):
|
||||
# convert a ctypes pointer array to numpy
|
||||
assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
|
||||
res = numpy.zeros(length, dtype='float32')
|
||||
res = numpy.zeros(length, dtype=dtype)
|
||||
assert ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0])
|
||||
return res
|
||||
|
||||
@ -44,7 +45,7 @@ class DMatrix:
|
||||
return
|
||||
if isinstance(data, str):
|
||||
self.handle = ctypes.c_void_p(
|
||||
xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1))
|
||||
xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 0))
|
||||
elif isinstance(data, scp.csr_matrix):
|
||||
self.__init_from_csr(data)
|
||||
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
|
||||
@ -76,23 +77,31 @@ class DMatrix:
|
||||
# destructor
|
||||
def __del__(self):
|
||||
xglib.XGDMatrixFree(self.handle)
|
||||
def __get_float_info(self, field):
|
||||
def get_float_info(self, field):
|
||||
length = ctypes.c_ulong()
|
||||
ret = xglib.XGDMatrixGetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
|
||||
ctypes.byref(length))
|
||||
return ctypes2numpy(ret, length.value)
|
||||
def __set_float_info(self, field, data):
|
||||
xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')),
|
||||
return ctypes2numpy(ret, length.value, 'float32')
|
||||
def get_uint_info(self, field):
|
||||
length = ctypes.c_ulong()
|
||||
ret = xglib.XGDMatrixGetUIntInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
|
||||
ctypes.byref(length))
|
||||
return ctypes2numpy(ret, length.value, 'uint32')
|
||||
def set_float_info(self, field, data):
|
||||
xglib.XGDMatrixSetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
|
||||
(ctypes.c_float*len(data))(*data), len(data))
|
||||
def set_uint_info(self, field, data):
|
||||
xglib.XGDMatrixSetUIntInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
|
||||
(ctypes.c_uint*len(data))(*data), len(data))
|
||||
# load data from file
|
||||
def save_binary(self, fname, silent=True):
|
||||
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
|
||||
# set label of dmatrix
|
||||
def set_label(self, label):
|
||||
self.__set_float_info('label', label)
|
||||
self.set_float_info('label', label)
|
||||
# set weight of each instances
|
||||
def set_weight(self, weight):
|
||||
self.__set_float_info('weight', weight)
|
||||
self.set_float_info('weight', weight)
|
||||
# set initialized margin prediction
|
||||
def set_base_margin(self, margin):
|
||||
"""
|
||||
@ -103,19 +112,19 @@ class DMatrix:
|
||||
e.g. for logistic regression: need to put in value before logistic transformation
|
||||
see also example/demo.py
|
||||
"""
|
||||
self.__set_float_info('base_margin', margin)
|
||||
self.set_float_info('base_margin', margin)
|
||||
# set group size of dmatrix, used for rank
|
||||
def set_group(self, group):
|
||||
xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
|
||||
# get label from dmatrix
|
||||
def get_label(self):
|
||||
return self.__get_float_info('label')
|
||||
return self.get_float_info('label')
|
||||
# get weight from dmatrix
|
||||
def get_weight(self):
|
||||
return self.__get_float_info('weight')
|
||||
return self.get_float_info('weight')
|
||||
# get base_margin from dmatrix
|
||||
def get_base_margin(self):
|
||||
return self.__get_float_info('base_margin')
|
||||
return self.get_float_info('base_margin')
|
||||
def num_row(self):
|
||||
return xglib.XGDMatrixNumRow(self.handle)
|
||||
# slice the DMatrix to return a new DMatrix that only contains rindex
|
||||
@ -189,7 +198,7 @@ class Booster:
|
||||
length = ctypes.c_ulong()
|
||||
preds = xglib.XGBoosterPredict(self.handle, data.handle,
|
||||
int(output_margin), ctypes.byref(length))
|
||||
return ctypes2numpy(preds, length.value)
|
||||
return ctypes2numpy(preds, length.value, 'float32')
|
||||
def save_model(self, fname):
|
||||
""" save model to file """
|
||||
xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
|
||||
|
||||
@ -2,13 +2,24 @@
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <cstring>
|
||||
#include "xgboost_wrapper.h"
|
||||
#include "xgboost_R.h"
|
||||
#include "xgboost_wrapper.h"
|
||||
#include "../src/utils/utils.h"
|
||||
#include "../src/utils/omp.h"
|
||||
#include "../src/utils/matrix_csr.h"
|
||||
|
||||
using namespace xgboost;
|
||||
// implements error handling
|
||||
namespace xgboost {
|
||||
namespace utils {
|
||||
void HandleAssertError(const char *msg) {
|
||||
error("%s", msg);
|
||||
}
|
||||
void HandleCheckError(const char *msg) {
|
||||
error("%s", msg);
|
||||
}
|
||||
} // namespace utils
|
||||
} // namespace xgboost
|
||||
|
||||
extern "C" {
|
||||
void _DMatrixFinalizer(SEXP ext) {
|
||||
@ -51,9 +62,9 @@ extern "C" {
|
||||
int ncol = length(indptr) - 1;
|
||||
int ndata = length(data);
|
||||
// transform into CSR format
|
||||
std::vector<size_t> row_ptr;
|
||||
std::vector<bst_ulong> row_ptr;
|
||||
std::vector< std::pair<unsigned, float> > csr_data;
|
||||
utils::SparseCSRMBuilder< std::pair<unsigned,float> > builder(row_ptr, csr_data);
|
||||
utils::SparseCSRMBuilder<std::pair<unsigned,float>, false, bst_ulong> builder(row_ptr, csr_data);
|
||||
builder.InitBudget();
|
||||
for (int i = 0; i < ncol; ++i) {
|
||||
for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
|
||||
@ -108,7 +119,7 @@ extern "C" {
|
||||
}
|
||||
}
|
||||
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
||||
size_t olen;
|
||||
bst_ulong olen;
|
||||
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(field)), &olen);
|
||||
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||
@ -165,17 +176,19 @@ extern "C" {
|
||||
std::vector<void*> vec_dmats;
|
||||
std::vector<std::string> vec_names;
|
||||
std::vector<const char*> vec_sptr;
|
||||
for (int i = 0; i < len; ++i){
|
||||
for (int i = 0; i < len; ++i) {
|
||||
vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
|
||||
vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
|
||||
vec_sptr.push_back(vec_names.back().c_str());
|
||||
}
|
||||
for (int i = 0; i < len; ++i) {
|
||||
vec_sptr.push_back(vec_names[i].c_str());
|
||||
}
|
||||
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
|
||||
asInteger(iter),
|
||||
&vec_dmats[0], &vec_sptr[0], len));
|
||||
&vec_dmats[0], &vec_sptr[0], len));
|
||||
}
|
||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
|
||||
size_t olen;
|
||||
bst_ulong olen;
|
||||
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
||||
R_ExternalPtrAddr(dmat),
|
||||
asInteger(output_margin),
|
||||
@ -194,13 +207,13 @@ extern "C" {
|
||||
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||
}
|
||||
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
||||
size_t olen;
|
||||
bst_ulong olen;
|
||||
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
||||
CHAR(asChar(fmap)),
|
||||
&olen);
|
||||
FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
|
||||
for (size_t i = 0; i < olen; ++i) {
|
||||
fprintf(fo, "booster[%lu]:\n", i);
|
||||
fprintf(fo, "booster[%u]:\n", static_cast<unsigned>(i));
|
||||
fprintf(fo, "%s", res[i]);
|
||||
}
|
||||
fclose(fo);
|
||||
|
||||
@ -23,18 +23,18 @@ class Booster: public learner::BoostLearner<FMatrixS> {
|
||||
this->init_model = false;
|
||||
this->SetCacheData(mats);
|
||||
}
|
||||
const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) {
|
||||
const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) {
|
||||
this->CheckInitModel();
|
||||
this->Predict(dmat, output_margin, &this->preds_);
|
||||
*len = this->preds_.size();
|
||||
return &this->preds_[0];
|
||||
}
|
||||
inline void BoostOneIter(const DataMatrix &train,
|
||||
float *grad, float *hess, size_t len) {
|
||||
float *grad, float *hess, bst_ulong len) {
|
||||
this->gpair_.resize(len);
|
||||
const unsigned ndata = static_cast<unsigned>(len);
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(len);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned j = 0; j < ndata; ++j) {
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
gpair_[j] = bst_gpair(grad[j], hess[j]);
|
||||
}
|
||||
gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
|
||||
@ -48,7 +48,7 @@ class Booster: public learner::BoostLearner<FMatrixS> {
|
||||
learner::BoostLearner<FMatrixS>::LoadModel(fname);
|
||||
this->init_model = true;
|
||||
}
|
||||
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
|
||||
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, bst_ulong *len) {
|
||||
model_dump = this->DumpModel(fmap, with_stats);
|
||||
model_dump_cptr.resize(model_dump.size());
|
||||
for (size_t i = 0; i < model_dump.size(); ++i) {
|
||||
@ -76,35 +76,37 @@ extern "C"{
|
||||
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
||||
return LoadDataMatrix(fname, silent, false);
|
||||
}
|
||||
void* XGDMatrixCreateFromCSR(const size_t *indptr,
|
||||
void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
size_t nindptr,
|
||||
size_t nelem) {
|
||||
bst_ulong nindptr,
|
||||
bst_ulong nelem) {
|
||||
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||
DMatrixSimple &mat = *p_mat;
|
||||
mat.row_ptr_.resize(nindptr);
|
||||
memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
|
||||
mat.row_data_.resize(nelem);
|
||||
for (size_t i = 0; i < nelem; ++i) {
|
||||
mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
|
||||
mat.info.num_col = std::max(mat.info.num_col,
|
||||
static_cast<size_t>(indices[i]+1));
|
||||
for (bst_ulong i = 0; i < nindptr; ++i) {
|
||||
mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
|
||||
}
|
||||
mat.info.num_row = nindptr - 1;
|
||||
mat.row_data_.resize(nelem);
|
||||
for (bst_ulong i = 0; i < nelem; ++i) {
|
||||
mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
|
||||
mat.info.info.num_col = std::max(mat.info.info.num_col,
|
||||
static_cast<size_t>(indices[i]+1));
|
||||
}
|
||||
mat.info.info.num_row = nindptr - 1;
|
||||
return p_mat;
|
||||
}
|
||||
void* XGDMatrixCreateFromMat(const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
bst_ulong nrow,
|
||||
bst_ulong ncol,
|
||||
float missing) {
|
||||
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||
DMatrixSimple &mat = *p_mat;
|
||||
mat.info.num_row = nrow;
|
||||
mat.info.num_col = ncol;
|
||||
for (size_t i = 0; i < nrow; ++i, data += ncol) {
|
||||
size_t nelem = 0;
|
||||
for (size_t j = 0; j < ncol; ++j) {
|
||||
mat.info.info.num_row = nrow;
|
||||
mat.info.info.num_col = ncol;
|
||||
for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
|
||||
bst_ulong nelem = 0;
|
||||
for (bst_ulong j = 0; j < ncol; ++j) {
|
||||
if (data[j] != missing) {
|
||||
mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
|
||||
++nelem;
|
||||
@ -116,7 +118,7 @@ extern "C"{
|
||||
}
|
||||
void* XGDMatrixSliceDMatrix(void *handle,
|
||||
const int *idxset,
|
||||
size_t len) {
|
||||
bst_ulong len) {
|
||||
DMatrixSimple tmp;
|
||||
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
|
||||
if (dsrc.magic != DMatrixSimple::kMagic) {
|
||||
@ -130,17 +132,17 @@ extern "C"{
|
||||
utils::Check(src.info.group_ptr.size() == 0,
|
||||
"slice does not support group structure");
|
||||
ret.Clear();
|
||||
ret.info.num_row = len;
|
||||
ret.info.num_col = src.info.num_col;
|
||||
ret.info.info.num_row = len;
|
||||
ret.info.info.num_col = src.info.num_col();
|
||||
|
||||
utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
|
||||
iter->BeforeFirst();
|
||||
utils::Assert(iter->Next(), "slice");
|
||||
const SparseBatch &batch = iter->Value();
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
for (bst_ulong i = 0; i < len; ++i) {
|
||||
const int ridx = idxset[i];
|
||||
SparseBatch::Inst inst = batch[ridx];
|
||||
utils::Check(static_cast<size_t>(ridx) < batch.size, "slice index exceed number of rows");
|
||||
utils::Check(static_cast<bst_ulong>(ridx) < batch.size, "slice index exceed number of rows");
|
||||
ret.row_data_.resize(ret.row_data_.size() + inst.length);
|
||||
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
|
||||
sizeof(SparseBatch::Entry) * inst.length);
|
||||
@ -163,34 +165,46 @@ extern "C"{
|
||||
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
|
||||
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
|
||||
}
|
||||
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) {
|
||||
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) {
|
||||
std::vector<float> &vec =
|
||||
static_cast<DataMatrix*>(handle)->info.GetInfo(field);
|
||||
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
|
||||
vec.resize(len);
|
||||
memcpy(&vec[0], info, sizeof(float) * len);
|
||||
}
|
||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) {
|
||||
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
|
||||
std::vector<unsigned> &vec =
|
||||
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
|
||||
vec.resize(len);
|
||||
memcpy(&vec[0], info, sizeof(unsigned) * len);
|
||||
}
|
||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
|
||||
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||
pmat->info.group_ptr.resize(len + 1);
|
||||
pmat->info.group_ptr[0] = 0;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
for (bst_ulong i = 0; i < len; ++i) {
|
||||
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
|
||||
}
|
||||
}
|
||||
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) {
|
||||
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) {
|
||||
const std::vector<float> &vec =
|
||||
static_cast<const DataMatrix*>(handle)->info.GetInfo(field);
|
||||
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
|
||||
*len = vec.size();
|
||||
return &vec[0];
|
||||
}
|
||||
size_t XGDMatrixNumRow(const void *handle) {
|
||||
return static_cast<const DataMatrix*>(handle)->info.num_row;
|
||||
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
|
||||
const std::vector<unsigned> &vec =
|
||||
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
|
||||
*len = vec.size();
|
||||
return &vec[0];
|
||||
}
|
||||
bst_ulong XGDMatrixNumRow(const void *handle) {
|
||||
return static_cast<const DataMatrix*>(handle)->info.num_row();
|
||||
}
|
||||
|
||||
// xgboost implementation
|
||||
void *XGBoosterCreate(void *dmats[], size_t len) {
|
||||
void *XGBoosterCreate(void *dmats[], bst_ulong len) {
|
||||
std::vector<DataMatrix*> mats;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
for (bst_ulong i = 0; i < len; ++i) {
|
||||
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
|
||||
mats.push_back(dtr);
|
||||
}
|
||||
@ -210,7 +224,7 @@ extern "C"{
|
||||
bst->UpdateOneIter(iter, *dtr);
|
||||
}
|
||||
void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
||||
float *grad, float *hess, size_t len) {
|
||||
float *grad, float *hess, bst_ulong len) {
|
||||
Booster *bst = static_cast<Booster*>(handle);
|
||||
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
||||
bst->CheckInitModel();
|
||||
@ -218,11 +232,11 @@ extern "C"{
|
||||
bst->BoostOneIter(*dtr, grad, hess, len);
|
||||
}
|
||||
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
||||
const char *evnames[], size_t len) {
|
||||
const char *evnames[], bst_ulong len) {
|
||||
Booster *bst = static_cast<Booster*>(handle);
|
||||
std::vector<std::string> names;
|
||||
std::vector<const DataMatrix*> mats;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
for (bst_ulong i = 0; i < len; ++i) {
|
||||
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
|
||||
names.push_back(std::string(evnames[i]));
|
||||
}
|
||||
@ -230,7 +244,7 @@ extern "C"{
|
||||
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
||||
return bst->eval_str.c_str();
|
||||
}
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) {
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) {
|
||||
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
|
||||
}
|
||||
void XGBoosterLoadModel(void *handle, const char *fname) {
|
||||
@ -239,7 +253,7 @@ extern "C"{
|
||||
void XGBoosterSaveModel(const void *handle, const char *fname) {
|
||||
static_cast<const Booster*>(handle)->SaveModel(fname);
|
||||
}
|
||||
const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
|
||||
const char** XGBoosterDumpModel(void *handle, const char *fmap, bst_ulong *len){
|
||||
utils::FeatMap featmap;
|
||||
if (strlen(fmap) != 0) {
|
||||
featmap.LoadText(fmap);
|
||||
|
||||
@ -7,13 +7,16 @@
|
||||
* can be used to create wrapper of other languages
|
||||
*/
|
||||
#include <cstdio>
|
||||
#define XGB_DLL
|
||||
// manually define unsign long
|
||||
typedef unsigned long bst_ulong;
|
||||
|
||||
extern "C" {
|
||||
/*!
|
||||
* \brief load a data matrix
|
||||
* \return a loaded data matrix
|
||||
*/
|
||||
void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
||||
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
||||
/*!
|
||||
* \brief create a matrix content from csr format
|
||||
* \param indptr pointer to row headers
|
||||
@ -23,11 +26,11 @@ extern "C" {
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \return created dmatrix
|
||||
*/
|
||||
void* XGDMatrixCreateFromCSR(const size_t *indptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
size_t nindptr,
|
||||
size_t nelem);
|
||||
XGB_DLL void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
|
||||
const unsigned *indices,
|
||||
const float *data,
|
||||
bst_ulong nindptr,
|
||||
bst_ulong nelem);
|
||||
/*!
|
||||
* \brief create matrix content from dense matrix
|
||||
* \param data pointer to the data space
|
||||
@ -36,10 +39,10 @@ extern "C" {
|
||||
* \param missing which value to represent missing value
|
||||
* \return created dmatrix
|
||||
*/
|
||||
void* XGDMatrixCreateFromMat(const float *data,
|
||||
size_t nrow,
|
||||
size_t ncol,
|
||||
float missing);
|
||||
XGB_DLL void* XGDMatrixCreateFromMat(const float *data,
|
||||
bst_ulong nrow,
|
||||
bst_ulong ncol,
|
||||
float missing);
|
||||
/*!
|
||||
* \brief create a new dmatrix from sliced content of existing matrix
|
||||
* \param handle instance of data matrix to be sliced
|
||||
@ -47,20 +50,20 @@ extern "C" {
|
||||
* \param len length of index set
|
||||
* \return a sliced new matrix
|
||||
*/
|
||||
void* XGDMatrixSliceDMatrix(void *handle,
|
||||
const int *idxset,
|
||||
size_t len);
|
||||
XGB_DLL void* XGDMatrixSliceDMatrix(void *handle,
|
||||
const int *idxset,
|
||||
bst_ulong len);
|
||||
/*!
|
||||
* \brief free space in data matrix
|
||||
*/
|
||||
void XGDMatrixFree(void *handle);
|
||||
XGB_DLL void XGDMatrixFree(void *handle);
|
||||
/*!
|
||||
* \brief load a data matrix into binary file
|
||||
* \param handle a instance of data matrix
|
||||
* \param fname file name
|
||||
* \param silent print statistics when saving
|
||||
*/
|
||||
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
|
||||
XGB_DLL void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
|
||||
/*!
|
||||
* \brief set float vector to a content in info
|
||||
* \param handle a instance of data matrix
|
||||
@ -68,52 +71,68 @@ extern "C" {
|
||||
* \param array pointer to float vector
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len);
|
||||
XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, bst_ulong len);
|
||||
/*!
|
||||
* \brief set uint32 vector to a content in info
|
||||
* \param handle a instance of data matrix
|
||||
* \param field field name
|
||||
* \param array pointer to float vector
|
||||
* \param len length of array
|
||||
*/
|
||||
XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, bst_ulong len);
|
||||
/*!
|
||||
* \brief set label of the training matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param group pointer to group size
|
||||
* \param len length of array
|
||||
*/
|
||||
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
|
||||
XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len);
|
||||
/*!
|
||||
* \brief get float info vector from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param field field name
|
||||
* \param out_len used to set result length
|
||||
* \return pointer to the label
|
||||
* \return pointer to the result
|
||||
*/
|
||||
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
|
||||
XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* out_len);
|
||||
/*!
|
||||
* \brief get uint32 info vector from matrix
|
||||
* \param handle a instance of data matrix
|
||||
* \param field field name
|
||||
* \param out_len used to set result length
|
||||
* \return pointer to the result
|
||||
*/
|
||||
XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* out_len);
|
||||
/*!
|
||||
* \brief return number of rows
|
||||
*/
|
||||
size_t XGDMatrixNumRow(const void *handle);
|
||||
XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle);
|
||||
// --- start XGBoost class
|
||||
/*!
|
||||
* \brief create xgboost learner
|
||||
* \param dmats matrices that are set to be cached
|
||||
* \param len length of dmats
|
||||
*/
|
||||
void *XGBoosterCreate(void* dmats[], size_t len);
|
||||
XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len);
|
||||
/*!
|
||||
* \brief free obj in handle
|
||||
* \param handle handle to be freed
|
||||
*/
|
||||
void XGBoosterFree(void* handle);
|
||||
XGB_DLL void XGBoosterFree(void* handle);
|
||||
/*!
|
||||
* \brief set parameters
|
||||
* \param handle handle
|
||||
* \param name parameter name
|
||||
* \param val value of parameter
|
||||
*/
|
||||
void XGBoosterSetParam(void *handle, const char *name, const char *value);
|
||||
XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value);
|
||||
/*!
|
||||
* \brief update the model in one round using dtrain
|
||||
* \param handle handle
|
||||
* \param iter current iteration rounds
|
||||
* \param dtrain training data
|
||||
*/
|
||||
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
|
||||
XGB_DLL void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
|
||||
/*!
|
||||
* \brief update the model, by directly specify gradient and second order gradient,
|
||||
* this can be used to replace UpdateOneIter, to support customized loss function
|
||||
@ -123,8 +142,8 @@ extern "C" {
|
||||
* \param hess second order gradient statistics
|
||||
* \param len length of grad/hess array
|
||||
*/
|
||||
void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
||||
float *grad, float *hess, size_t len);
|
||||
XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
||||
float *grad, float *hess, bst_ulong len);
|
||||
/*!
|
||||
* \brief get evaluation statistics for xgboost
|
||||
* \param handle handle
|
||||
@ -134,8 +153,8 @@ extern "C" {
|
||||
* \param len length of dmats
|
||||
* \return the string containing evaluation stati
|
||||
*/
|
||||
const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
||||
const char *evnames[], size_t len);
|
||||
XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
||||
const char *evnames[], bst_ulong len);
|
||||
/*!
|
||||
* \brief make prediction based on dmat
|
||||
* \param handle handle
|
||||
@ -143,19 +162,19 @@ extern "C" {
|
||||
* \param output_margin whether only output raw margin value
|
||||
* \param len used to store length of returning result
|
||||
*/
|
||||
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len);
|
||||
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len);
|
||||
/*!
|
||||
* \brief load model from existing file
|
||||
* \param handle handle
|
||||
* \param fname file name
|
||||
*/
|
||||
void XGBoosterLoadModel(void *handle, const char *fname);
|
||||
XGB_DLL void XGBoosterLoadModel(void *handle, const char *fname);
|
||||
/*!
|
||||
* \brief save model into existing file
|
||||
* \param handle handle
|
||||
* \param fname file name
|
||||
*/
|
||||
void XGBoosterSaveModel(const void *handle, const char *fname);
|
||||
XGB_DLL void XGBoosterSaveModel(const void *handle, const char *fname);
|
||||
/*!
|
||||
* \brief dump model, return array of strings representing model dump
|
||||
* \param handle handle
|
||||
@ -163,7 +182,7 @@ extern "C" {
|
||||
* \param out_len length of output array
|
||||
* \return char *data[], representing dump of each model
|
||||
*/
|
||||
const char **XGBoosterDumpModel(void *handle, const char *fmap,
|
||||
size_t *out_len);
|
||||
XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap,
|
||||
bst_ulong *out_len);
|
||||
};
|
||||
#endif // XGBOOST_WRAPPER_H_
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user