From 0130be4acce7c94284c3e6f57112c5f53bdb33ac Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 26 Aug 2014 23:41:03 -0700 Subject: [PATCH] major change in the design of R interface --- R-package/NAMESPACE | 16 +- ...redict.xgboost.R => predict.xgb.Booster.R} | 0 R-package/R/{xgb.Utils.R => utils.R} | 112 +-------- R-package/R/xgb.DMatrix.R | 22 ++ R-package/R/xgb.dump.R | 11 + R-package/R/xgb.getinfo.R | 16 ++ R-package/R/xgb.load.R | 5 + R-package/R/xgb.save.R | 16 ++ R-package/R/xgb.train.R | 38 +++ R-package/R/xgboost.R | 51 ++-- R-package/inst/examples/demo-new.R | 133 +++++++++++ R-package/inst/examples/demo.R | 1 + R-package/inst/examples/model.dump | 72 ++++++ R-package/src-i386/Makevars | 28 +++ R-package/src-i386/Makevars.win | 32 +++ R-package/src-i386/xgboost_R.cpp | 221 ++++++++++++++++++ R-package/src-i386/xgboost_R.h | 124 ++++++++++ R-package/src-x64/Makevars | 28 +++ R-package/src-x64/Makevars.win | 32 +++ R-package/src-x64/xgboost_R.cpp | 221 ++++++++++++++++++ R-package/src-x64/xgboost_R.h | 124 ++++++++++ 21 files changed, 1175 insertions(+), 128 deletions(-) rename R-package/R/{predict.xgboost.R => predict.xgb.Booster.R} (100%) rename R-package/R/{xgb.Utils.R => utils.R} (53%) create mode 100644 R-package/R/xgb.DMatrix.R create mode 100644 R-package/R/xgb.dump.R create mode 100644 R-package/R/xgb.getinfo.R create mode 100644 R-package/R/xgb.load.R create mode 100644 R-package/R/xgb.save.R create mode 100644 R-package/R/xgb.train.R create mode 100644 R-package/inst/examples/demo-new.R create mode 100644 R-package/inst/examples/model.dump create mode 100644 R-package/src-i386/Makevars create mode 100644 R-package/src-i386/Makevars.win create mode 100644 R-package/src-i386/xgboost_R.cpp create mode 100644 R-package/src-i386/xgboost_R.h create mode 100644 R-package/src-x64/Makevars create mode 100644 R-package/src-x64/Makevars.win create mode 100644 R-package/src-x64/xgboost_R.cpp create mode 100644 R-package/src-x64/xgboost_R.h diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index ce2d7d901..9dd4eaac0 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -3,16 +3,8 @@ importClassesFrom("Matrix", dgCMatrix, dgeMatrix) export(xgboost) export(xgb.DMatrix) export(xgb.getinfo) -export(xgb.setinfo) - -# exportClasses(xgb.Boost) exportMethods(predict) - -# export(xgb.Booster) -# export(xgb.train) -# export(xgb.save) -# export(xgb.predict) -# export(xgb.dump) - - - +export(xgb.train) +export(xgb.save) +export(xgb.load) +export(xgb.dump) diff --git a/R-package/R/predict.xgboost.R b/R-package/R/predict.xgb.Booster.R similarity index 100% rename from R-package/R/predict.xgboost.R rename to R-package/R/predict.xgb.Booster.R diff --git a/R-package/R/xgb.Utils.R b/R-package/R/utils.R similarity index 53% rename from R-package/R/xgb.Utils.R rename to R-package/R/utils.R index d9710121b..5ed4c8979 100644 --- a/R-package/R/xgb.Utils.R +++ b/R-package/R/utils.R @@ -6,42 +6,6 @@ library.dynam.unload("xgboost", libpath); } -# constructing DMatrix -xgb.DMatrix <- function(data, info=list(), missing=0.0) { - if (typeof(data) == "character") { - handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost") - } else if(is.matrix(data)) { - handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost") - } else if(class(data) == "dgCMatrix") { - handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost") - } else { - stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data))) - } - dmat <- structure(handle, class="xgb.DMatrix") - if (length(info) != 0) { - for (i in 1:length(info)) { - p <- info[i] - xgb.setinfo(dmat, names(p), p[[1]]) - } - } - return(dmat) -} -# get information from dmatrix -xgb.getinfo <- function(dmat, name) { - if (typeof(name) != "character") { - stop("xgb.getinfo: name must be character") - } - if (class(dmat) != "xgb.DMatrix") { - stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix"); - } - if (name != "label" && - name != "weight" && - name != "base_margin" ) { - stop(paste("xgb.getinfo: unknown info name", name)) - } - ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost") - return(ret) -} # set information into dmatrix, this mutate dmatrix xgb.setinfo <- function(dmat, name, info) { if (class(dmat) != "xgb.DMatrix") { @@ -63,9 +27,10 @@ xgb.setinfo <- function(dmat, name, info) { .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), PACKAGE="xgboost") return(TRUE) } - stop(pase("xgb.setinfo: unknown info name", name)) + stop(paste("xgb.setinfo: unknown info name", name)) return(FALSE) } + # construct a Booster from cachelist xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) { if (typeof(cachelist) != "list") { @@ -92,61 +57,9 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) { } return(structure(handle, class="xgb.Booster")) } -# train a model using given parameters -xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) { - if (typeof(params) != "list") { - stop("xgb.train: first argument params must be list"); - } - if (class(dtrain) != "xgb.DMatrix") { - stop("xgb.train: second argument dtrain must be xgb.DMatrix"); - } - bst <- xgb.Booster(params, append(watchlist,dtrain)) - for (i in 1:nrounds) { - if (is.null(obj)) { - succ <- xgb.iter.update(bst, dtrain, i-1) - } else { - pred <- xgb.predict(bst, dtrain) - gpair <- obj(pred, dtrain) - succ <- xgb.iter.boost(bst, dtrain, gpair) - } - if (length(watchlist) != 0) { - if (is.null(feval)) { - msg <- xgb.iter.eval(bst, watchlist, i-1) - cat(msg); cat("\n") - } else { - cat("["); cat(i); cat("]"); - for (j in 1:length(watchlist)) { - w <- watchlist[j] - if (length(names(w)) == 0) { - stop("xgb.eval: name tag must be presented for every elements in watchlist") - } - ret <- feval(xgb.predict(bst, w[[1]]), w[[1]]) - cat("\t"); cat(names(w)); cat("-"); cat(ret$metric); - cat(":"); cat(ret$value) - } - cat("\n") - } - } - } - return(bst) -} -# save model or DMatrix to file -xgb.save <- function(handle, fname) { - if (typeof(fname) != "character") { - stop("xgb.save: fname must be character") - } - if (class(handle) == "xgb.Booster") { - .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost") - return(TRUE) - } - if (class(handle) == "xgb.DMatrix") { - .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost") - return(TRUE) - } - stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster") - return(FALSE) -} -# predict + + +# predict, depreciated xgb.predict <- function(booster, dmat, outputmargin = FALSE) { if (class(booster) != "xgb.Booster") { stop("xgb.predict: first argument must be type xgb.Booster") @@ -157,21 +70,12 @@ xgb.predict <- function(booster, dmat, outputmargin = FALSE) { ret <- .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin), PACKAGE="xgboost") return(ret) } -# dump model -xgb.dump <- function(booster, fname, fmap = "") { - if (class(booster) != "xgb.Booster") { - stop("xgb.dump: first argument must be type xgb.Booster") - } - if (typeof(fname) != "character"){ - stop("xgb.dump: second argument must be type character") - } - .Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost") - return(TRUE) -} + ##-------------------------------------- # the following are low level iteratively function, not needed # if you do not want to use them #--------------------------------------- + # iteratively update booster with dtrain xgb.iter.update <- function(booster, dtrain, iter) { if (class(booster) != "xgb.Booster") { @@ -183,6 +87,7 @@ xgb.iter.update <- function(booster, dtrain, iter) { .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, PACKAGE="xgboost") return(TRUE) } + # iteratively update booster with customized statistics xgb.iter.boost <- function(booster, dtrain, gpair) { if (class(booster) != "xgb.Booster") { @@ -194,6 +99,7 @@ xgb.iter.boost <- function(booster, dtrain, gpair) { .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE="xgboost") return(TRUE) } + # iteratively evaluate one iteration xgb.iter.eval <- function(booster, watchlist, iter) { if (class(booster) != "xgb.Booster") { diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R new file mode 100644 index 000000000..b5835a4ae --- /dev/null +++ b/R-package/R/xgb.DMatrix.R @@ -0,0 +1,22 @@ +# constructing DMatrix +xgb.DMatrix <- function(data, missing=0.0, ...) { + if (typeof(data) == "character") { + handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost") + } else if(is.matrix(data)) { + handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE="xgboost") + } else if(class(data) == "dgCMatrix") { + handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE="xgboost") + } else { + stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data))) + } + dmat <- structure(handle, class="xgb.DMatrix") + + info = list(...) + if (length(info)==0) + return(dmat) + for (i in 1:length(info)) { + p = info[i] + xgb.setinfo(dmat, names(p), p[[1]]) + } + return(dmat) +} diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R new file mode 100644 index 000000000..b53cd8b46 --- /dev/null +++ b/R-package/R/xgb.dump.R @@ -0,0 +1,11 @@ +# dump model +xgb.dump <- function(booster, fname, fmap = "") { + if (class(booster) != "xgb.Booster") { + stop("xgb.dump: first argument must be type xgb.Booster") + } + if (typeof(fname) != "character"){ + stop("xgb.dump: second argument must be type character") + } + .Call("XGBoosterDumpModel_R", booster, fname, fmap, PACKAGE="xgboost") + return(TRUE) +} diff --git a/R-package/R/xgb.getinfo.R b/R-package/R/xgb.getinfo.R new file mode 100644 index 000000000..a8952826d --- /dev/null +++ b/R-package/R/xgb.getinfo.R @@ -0,0 +1,16 @@ +# get information from dmatrix +xgb.getinfo <- function(dmat, name) { + if (typeof(name) != "character") { + stop("xgb.getinfo: name must be character") + } + if (class(dmat) != "xgb.DMatrix") { + stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix"); + } + if (name != "label" && + name != "weight" && + name != "base_margin" ) { + stop(paste("xgb.getinfo: unknown info name", name)) + } + ret <- .Call("XGDMatrixGetInfo_R", dmat, name, PACKAGE="xgboost") + return(ret) +} diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R new file mode 100644 index 000000000..2d89620ff --- /dev/null +++ b/R-package/R/xgb.load.R @@ -0,0 +1,5 @@ +xgb.load <- function(modelfile) { + if (is.null(modelfile)) + stop('xgb.load: modelfile cannot be NULL') + xgb.Booster(modelfile=modelfile) +} diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R new file mode 100644 index 000000000..355c20f12 --- /dev/null +++ b/R-package/R/xgb.save.R @@ -0,0 +1,16 @@ +# save model or DMatrix to file +xgb.save <- function(handle, fname) { + if (typeof(fname) != "character") { + stop("xgb.save: fname must be character") + } + if (class(handle) == "xgb.Booster") { + .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost") + return(TRUE) + } + if (class(handle) == "xgb.DMatrix") { + .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost") + return(TRUE) + } + stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster") + return(FALSE) +} diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R new file mode 100644 index 000000000..ba595f801 --- /dev/null +++ b/R-package/R/xgb.train.R @@ -0,0 +1,38 @@ +# train a model using given parameters +xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) { + if (typeof(params) != "list") { + stop("xgb.train: first argument params must be list"); + } + if (class(dtrain) != "xgb.DMatrix") { + stop("xgb.train: second argument dtrain must be xgb.DMatrix"); + } + bst <- xgb.Booster(params, append(watchlist,dtrain)) + for (i in 1:nrounds) { + if (is.null(obj)) { + succ <- xgb.iter.update(bst, dtrain, i-1) + } else { + pred <- xgb.predict(bst, dtrain) + gpair <- obj(pred, dtrain) + succ <- xgb.iter.boost(bst, dtrain, gpair) + } + if (length(watchlist) != 0) { + if (is.null(feval)) { + msg <- xgb.iter.eval(bst, watchlist, i-1) + cat(msg); cat("\n") + } else { + cat("["); cat(i); cat("]"); + for (j in 1:length(watchlist)) { + w <- watchlist[j] + if (length(names(w)) == 0) { + stop("xgb.eval: name tag must be presented for every elements in watchlist") + } + ret <- feval(xgb.predict(bst, w[[1]]), w[[1]]) + cat("\t"); cat(names(w)); cat("-"); cat(ret$metric); + cat(":"); cat(ret$value) + } + cat("\n") + } + } + } + return(bst) +} diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index d67fb5880..c452dfde0 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -1,23 +1,48 @@ # Main function for xgboost-package -xgboost = function(x=NULL,y=NULL,file=NULL,nrounds=10,params,watchlist=list(), - obj=NULL, feval=NULL, margin=NULL) +xgboost = function(x=NULL,y=NULL,DMatrix=NULL, file=NULL, validation=NULL, + nrounds=10, obj=NULL, feval=NULL, margin=NULL, verbose = T, ...) { - if (is.null(x) && is.null(y)) - { - if (is.null(file)) - stop('xgboost need input data, either R objects or local files.') - dtrain = xgb.DMatrix(file) - } + if (!is.null(DMatrix)) + dtrain = DMatrix else - dtrain = xgb.DMatrix(x, info=list(label=y)) - if (!is.null(margin)) { - succ <- xgb.setinfo(dtrain, "base_margin", margin) - if (!succ) - warning('Attemp to use margin failed.') + if (is.null(x) && is.null(y)) + { + if (is.null(file)) + stop('xgboost need input data, either R objects, local files or DMatrix object.') + dtrain = xgb.DMatrix(file) + } + else + dtrain = xgb.DMatrix(x, label=y) + if (!is.null(margin)) + { + succ <- xgb.setinfo(dtrain, "base_margin", margin) + if (!succ) + warning('Attemp to use margin failed.') + } } + + params = list(...) + + watchlist=list() + if (verbose) + { + if (!is.null(validation)) + { + if (class(validation)!='xgb.DMatrix') + dtest = xgb.DMatrix(validation) + else + dtest = validation + watchlist = list(eval=dtest,train=dtrain) + } + + else + watchlist = list(train=dtrain) + } + bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval) + return(bst) } diff --git a/R-package/inst/examples/demo-new.R b/R-package/inst/examples/demo-new.R new file mode 100644 index 000000000..01f44ee9f --- /dev/null +++ b/R-package/inst/examples/demo-new.R @@ -0,0 +1,133 @@ +require(xgboost) +require(methods) + +# helper function to read libsvm format +# this is very badly written, load in dense, and convert to sparse +# use this only for demo purpose +# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r +read.libsvm = function(fname, maxcol) { + content = readLines(fname) + nline = length(content) + label = numeric(nline) + mat = matrix(0, nline, maxcol+1) + for (i in 1:nline) { + arr = as.vector(strsplit(content[i], " ")[[1]]) + label[i] = as.numeric(arr[[1]]) + for (j in 2:length(arr)) { + kv = strsplit(arr[j], ":")[[1]] + # to avoid 0 index + findex = as.integer(kv[1]) + 1 + fvalue = as.numeric(kv[2]) + mat[i,findex] = fvalue + } + } + mat = as(mat, "sparseMatrix") + return(list(label=label, data=mat)) +} + +############################ +# Test xgb.DMatrix with local file, sparse matrix and dense matrix in R. +############################ + +# Directly read in local file +dtrain = xgb.DMatrix('agaricus.txt.train') +class(dtrain) + +# read file in R +csc = read.libsvm("agaricus.txt.train", 126) +y = csc$label +x = csc$data + +# x as Sparse Matrix +class(x) +dtrain = xgb.DMatrix(x, label=y) + +# x as dense matrix +dense.x = as.matrix(x) +dtrain = xgb.DMatrix(dense.x, label=y) + +############################ +# Test xgboost with local file, sparse matrix and dense matrix in R. +############################ + +# Test with DMatrix object +bst = xgboost(DMatrix=dtrain, max_depth=2, eta=1, silent=1, objective='binary:logistic') + +# Test with local file +bst = xgboost(file='agaricus.txt.train', max_depth=2, eta=1, silent=1, objective='binary:logistic') + +# Test with Sparse Matrix +bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic') + +# Test with dense Matrix +bst = xgboost(x = dense.x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic') + +# Test with validation set +bst = xgboost(file='agaricus.txt.train', validation='agaricus.txt.test', + max_depth=2, eta=1, silent=1, objective='binary:logistic') + +############################ +# Test predict +############################ + +# Prediction with DMatrix object +dtest = xgb.DMatrix('agaricus.txt.test') +pred = predict(bst, dtest) + +# Prediction with local test file +pred = predict(bst, 'agaricus.txt.test') + +# Prediction with Sparse Matrix +csc = read.libsvm("agaricus.txt.test", 126) +test.y = csc$label +test.x = csc$data +pred = predict(bst, test.x) + +# Extrac label with xgb.getinfo +labels = xgb.getinfo(dtest, "label") +err = as.numeric(sum(as.integer(pred > 0.5) != labels)) / length(labels) +print(paste("error=",err)) + +############################ +# Save and load model to hard disk +############################ + +# save model to binary local file +xgb.save(bst, 'model.save') + +# load binary model to R +bst = xgb.load('model.save') +pred = predict(bst, test.x) + +# save model to text file +xgb.dump(bst, 'model.dump') + +############################ +# Customized objective and evaluation function +############################ + +# user define objective function, given prediction, return gradient and second order gradient +# this is loglikelihood loss +logregobj = function(preds, dtrain) { + labels = xgb.getinfo(dtrain, "label") + preds = 1.0 / (1.0 + exp(-preds)) + grad = preds - labels + hess = preds * (1.0-preds) + return(list(grad=grad, hess=hess)) +} +# user defined evaluation function, return a list(metric="metric-name", value="metric-value") +# NOTE: when you do customized loss function, the default prediction value is margin +# this may make buildin evalution metric not function properly +# for example, we are doing logistic loss, the prediction is score before logistic transformation +# the buildin evaluation error assumes input is after logistic transformation +# Take this in mind when you use the customization, and maybe you need write customized evaluation function +evalerror = function(preds, dtrain) { + labels = xgb.getinfo(dtrain, "label") + err = as.numeric(sum(labels != (preds > 0.0))) / length(labels) + return(list(metric="error", value=err)) +} + +bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic', + obj=logregobj, feval=evalerror) + + diff --git a/R-package/inst/examples/demo.R b/R-package/inst/examples/demo.R index 8b85c4cc0..d4a8698a3 100644 --- a/R-package/inst/examples/demo.R +++ b/R-package/inst/examples/demo.R @@ -1,4 +1,5 @@ require(xgboost) +require(methods) # helper function to read libsvm format # this is very badly written, load in dense, and convert to sparse diff --git a/R-package/inst/examples/model.dump b/R-package/inst/examples/model.dump new file mode 100644 index 000000000..ddeb6546d --- /dev/null +++ b/R-package/inst/examples/model.dump @@ -0,0 +1,72 @@ +booster[0]: +0:[f28<1.00001] yes=1,no=2,missing=2 + 1:[f108<1.00001] yes=3,no=4,missing=4 + 3:leaf=1.85965 + 4:leaf=-1.94071 + 2:[f55<1.00001] yes=5,no=6,missing=6 + 5:leaf=-1.70044 + 6:leaf=1.71218 +booster[1]: +0:[f59<1.00001] yes=1,no=2,missing=2 + 1:leaf=-6.23624 + 2:[f28<1.00001] yes=3,no=4,missing=4 + 3:leaf=-0.96853 + 4:leaf=0.784718 +booster[2]: +0:[f101<1.00001] yes=1,no=2,missing=2 + 1:[f110<1.00001] yes=3,no=4,missing=4 + 3:leaf=-9.42142 + 4:leaf=-0.791407 + 2:[f66<1.00001] yes=5,no=6,missing=6 + 5:leaf=5.77229 + 6:leaf=0.658725 +booster[3]: +0:[f26<1.00001] yes=1,no=2,missing=2 + 1:leaf=1.07748 + 2:[f38<1.00001] yes=3,no=4,missing=4 + 3:leaf=-0.877906 + 4:leaf=0.614153 +booster[4]: +0:[f108<1.00001] yes=1,no=2,missing=2 + 1:leaf=2.92191 + 2:[f35<1.00001] yes=3,no=4,missing=4 + 3:leaf=0.152607 + 4:leaf=-1.26934 +booster[5]: +0:[f22<1.00001] yes=1,no=2,missing=2 + 1:[f35<1.00001] yes=3,no=4,missing=4 + 3:leaf=-1.02315 + 4:leaf=-3.02414 + 2:[f23<1.00001] yes=5,no=6,missing=6 + 5:leaf=-1.53846 + 6:leaf=0.431742 +booster[6]: +0:[f28<1.00001] yes=1,no=2,missing=2 + 1:[f108<1.00001] yes=3,no=4,missing=4 + 3:leaf=0.836115 + 4:leaf=-0.912605 + 2:[f23<1.00001] yes=5,no=6,missing=6 + 5:leaf=-1.1971 + 6:leaf=0.777142 +booster[7]: +0:[f38<1.00001] yes=1,no=2,missing=2 + 1:[f26<1.00001] yes=3,no=4,missing=4 + 3:leaf=0.890623 + 4:leaf=-0.908312 + 2:[f111<1.00001] yes=5,no=6,missing=6 + 5:leaf=1.43619 + 6:leaf=-0.0180106 +booster[8]: +0:[f22<1.00001] yes=1,no=2,missing=2 + 1:leaf=-1.01502 + 2:[f101<1.00001] yes=3,no=4,missing=4 + 3:leaf=0.568838 + 4:leaf=-0.515293 +booster[9]: +0:[f114<1.00001] yes=1,no=2,missing=2 + 1:[f60<1.00001] yes=3,no=4,missing=4 + 3:leaf=-0.609475 + 4:leaf=3.63443 + 2:[f28<1.00001] yes=5,no=6,missing=6 + 5:leaf=-0.734556 + 6:leaf=0.217203 diff --git a/R-package/src-i386/Makevars b/R-package/src-i386/Makevars new file mode 100644 index 000000000..10b2661d0 --- /dev/null +++ b/R-package/src-i386/Makevars @@ -0,0 +1,28 @@ +# _*_ mode: Makefile; _*_ +export CC = gcc +export CXX = g++ + +# expose these flags to R CMD SHLIB +PKG_CPPFLAGS = -O3 -Wno-unknown-pragmas -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) + +ifeq ($(no_omp),1) + PKG_CPPFLAGS += -DDISABLE_OPENMP +endif + +CXXOBJ= xgboost_wrapper.o xgboost_io.o +OBJECTS= xgboost_R.o $(CXXOBJ) + +.PHONY: all clean +all: $(SHLIB) +$(SHLIB): $(OBJECTS) + +xgboost_wrapper.o: ../../wrapper/xgboost_wrapper.cpp +xgboost_io.o: ../../src/io/io.cpp + +$(CXXOBJ) : + $(CXX) -c $(PKG_CPPFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + +clean: + rm -rf *.so *.o *~ *.dll + diff --git a/R-package/src-i386/Makevars.win b/R-package/src-i386/Makevars.win new file mode 100644 index 000000000..5d2261230 --- /dev/null +++ b/R-package/src-i386/Makevars.win @@ -0,0 +1,32 @@ +# _*_ mode: Makefile; _*_ +export CC = gcc +export CXX = g++ + +# expose these flags to R CMD SHLIB +PKG_CPPFLAGS = -O3 -Wno-unknown-pragmas -DXGBOOST_CUSTOMIZE_ERROR_ -fopenmp -fPIC $(SHLIB_OPENMP_CFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) + +# add flag to build native code even in cross compiler +ifeq "$(WIN)" "64" + PKG_CPPFLAGS += -m64 +endif + +ifeq ($(no_omp),1) + PKG_CPPFLAGS += -DDISABLE_OPENMP +endif + +CXXOBJ= xgboost_wrapper.o xgboost_io.o +OBJECTS= xgboost_R.o $(CXXOBJ) + +.PHONY: all clean +all: $(SHLIB) +$(SHLIB): $(OBJECTS) + +xgboost_wrapper.o: ../../wrapper/xgboost_wrapper.cpp +xgboost_io.o: ../../src/io/io.cpp + +$(CXXOBJ) : + $(CXX) -c $(PKG_CPPFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + +clean: + rm -rf *.so *.o *~ *.dll diff --git a/R-package/src-i386/xgboost_R.cpp b/R-package/src-i386/xgboost_R.cpp new file mode 100644 index 000000000..1ca232509 --- /dev/null +++ b/R-package/src-i386/xgboost_R.cpp @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include "xgboost_R.h" +#include "../../wrapper/xgboost_wrapper.h" +#include "../../src/utils/utils.h" +#include "../../src/utils/omp.h" +#include "../../src/utils/matrix_csr.h" + +using namespace xgboost; +// implements error handling +namespace xgboost { +namespace utils { +void HandleAssertError(const char *msg) { + error("%s", msg); +} +void HandleCheckError(const char *msg) { + error("%s", msg); +} +} // namespace utils +} // namespace xgboost + +extern "C" { + void _DMatrixFinalizer(SEXP ext) { + if (R_ExternalPtrAddr(ext) == NULL) return; + XGDMatrixFree(R_ExternalPtrAddr(ext)); + R_ClearExternalPtr(ext); + } + SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { + void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent)); + SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + return ret; + } + SEXP XGDMatrixCreateFromMat_R(SEXP mat, + SEXP missing) { + SEXP dim = getAttrib(mat, R_DimSymbol); + int nrow = INTEGER(dim)[0]; + int ncol = INTEGER(dim)[1]; + double *din = REAL(mat); + std::vector data(nrow * ncol); + #pragma omp parallel for schedule(static) + for (int i = 0; i < nrow; ++i) { + for (int j = 0; j < ncol; ++j) { + data[i * ncol +j] = din[i + nrow * j]; + } + } + void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing)); + SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + return ret; + } + SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, + SEXP indices, + SEXP data) { + const int *col_ptr = INTEGER(indptr); + const int *row_index = INTEGER(indices); + const double *col_data = REAL(data); + int ncol = length(indptr) - 1; + int ndata = length(data); + // transform into CSR format + std::vector row_ptr; + std::vector< std::pair > csr_data; + utils::SparseCSRMBuilder, false, bst_ulong> builder(row_ptr, csr_data); + builder.InitBudget(); + for (int i = 0; i < ncol; ++i) { + for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) { + builder.AddBudget(row_index[j]); + } + } + builder.InitStorage(); + for (int i = 0; i < ncol; ++i) { + for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) { + builder.PushElem(row_index[j], std::make_pair(i, col_data[j])); + } + } + utils::Assert(csr_data.size() == static_cast(ndata), "BUG CreateFromCSC"); + std::vector row_data(ndata); + std::vector col_index(ndata); + #pragma omp parallel for schedule(static) + for (int i = 0; i < ndata; ++i) { + col_index[i] = csr_data[i].first; + row_data[i] = csr_data[i].second; + } + void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata ); + SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + return ret; + } + void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { + XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), + CHAR(asChar(fname)), asInteger(silent)); + } + void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { + int len = length(array); + const char *name = CHAR(asChar(field)); + if (!strcmp("group", name)) { + std::vector vec(len); + #pragma omp parallel for schedule(static) + for (int i = 0; i < len; ++i) { + vec[i] = static_cast(INTEGER(array)[i]); + } + XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len); + return; + } + { + std::vector vec(len); + #pragma omp parallel for schedule(static) + for (int i = 0; i < len; ++i) { + vec[i] = REAL(array)[i]; + } + XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), + &vec[0], len); + } + } + SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { + bst_ulong olen; + const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), &olen); + SEXP ret = PROTECT(allocVector(REALSXP, olen)); + for (size_t i = 0; i < olen; ++i) { + REAL(ret)[i] = res[i]; + } + UNPROTECT(1); + return ret; + } + // functions related to booster + void _BoosterFinalizer(SEXP ext) { + if (R_ExternalPtrAddr(ext) == NULL) return; + XGBoosterFree(R_ExternalPtrAddr(ext)); + R_ClearExternalPtr(ext); + } + SEXP XGBoosterCreate_R(SEXP dmats) { + int len = length(dmats); + std::vector dvec; + for (int i = 0; i < len; ++i){ + dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); + } + void *handle = XGBoosterCreate(&dvec[0], dvec.size()); + SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); + UNPROTECT(1); + return ret; + } + void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { + XGBoosterSetParam(R_ExternalPtrAddr(handle), + CHAR(asChar(name)), + CHAR(asChar(val))); + } + void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { + XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + R_ExternalPtrAddr(dtrain)); + } + void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { + utils::Check(length(grad) == length(hess), "gradient and hess must have same length"); + int len = length(grad); + std::vector tgrad(len), thess(len); + #pragma omp parallel for schedule(static) + for (int j = 0; j < len; ++j) { + tgrad[j] = REAL(grad)[j]; + thess[j] = REAL(hess)[j]; + } + XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dtrain), + &tgrad[0], &thess[0], len); + } + SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { + utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length"); + int len = length(dmats); + std::vector vec_dmats; + std::vector vec_names; + std::vector vec_sptr; + for (int i = 0; i < len; ++i) { + vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); + vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i))))); + } + for (int i = 0; i < len; ++i) { + vec_sptr.push_back(vec_names[i].c_str()); + } + return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + &vec_dmats[0], &vec_sptr[0], len)); + } + SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { + bst_ulong olen; + const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dmat), + asInteger(output_margin), + &olen); + SEXP ret = PROTECT(allocVector(REALSXP, olen)); + for (size_t i = 0; i < olen; ++i) { + REAL(ret)[i] = res[i]; + } + UNPROTECT(1); + return ret; + } + void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { + XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + } + void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { + XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + } + void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { + bst_ulong olen; + const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), + CHAR(asChar(fmap)), + &olen); + FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w"); + for (size_t i = 0; i < olen; ++i) { + fprintf(fo, "booster[%u]:\n", static_cast(i)); + fprintf(fo, "%s", res[i]); + } + fclose(fo); + } +} diff --git a/R-package/src-i386/xgboost_R.h b/R-package/src-i386/xgboost_R.h new file mode 100644 index 000000000..8e8b2728b --- /dev/null +++ b/R-package/src-i386/xgboost_R.h @@ -0,0 +1,124 @@ +#ifndef XGBOOST_WRAPPER_R_H_ +#define XGBOOST_WRAPPER_R_H_ +/*! + * \file xgboost_wrapper_R.h + * \author Tianqi Chen + * \brief R wrapper of xgboost + */ +extern "C" { +#include +} + +extern "C" { + /*! + * \brief load a data matrix + * \param fname name of the content + * \param silent whether print messages + * \return a loaded data matrix + */ + SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent); + /*! + * \brief create matrix content from dense matrix + * This assumes the matrix is stored in column major format + * \param data R Matrix object + * \param missing which value to represent missing value + * \return created dmatrix + */ + SEXP XGDMatrixCreateFromMat_R(SEXP mat, + SEXP missing); + /*! + * \brief create a matrix content from CSC format + * \param indptr pointer to column headers + * \param indices row indices + * \param data content of the data + * \return created dmatrix + */ + SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, + SEXP indices, + SEXP data); + /*! + * \brief load a data matrix into binary file + * \param handle a instance of data matrix + * \param fname file name + * \param silent print statistics when saving + */ + void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent); + /*! + * \brief set information to dmatrix + * \param handle a instance of data matrix + * \param field field name, can be label, weight + * \param array pointer to float vector + */ + void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array); + /*! + * \brief get info vector from matrix + * \param handle a instance of data matrix + * \param field field name + * \return info vector + */ + SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field); + /*! + * \brief create xgboost learner + * \param dmats a list of dmatrix handles that will be cached + */ + SEXP XGBoosterCreate_R(SEXP dmats); + /*! + * \brief set parameters + * \param handle handle + * \param name parameter name + * \param val value of parameter + */ + void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val); + /*! + * \brief update the model in one round using dtrain + * \param handle handle + * \param iter current iteration rounds + * \param dtrain training data + */ + void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain); + /*! + * \brief update the model, by directly specify gradient and second order gradient, + * this can be used to replace UpdateOneIter, to support customized loss function + * \param handle handle + * \param dtrain training data + * \param grad gradient statistics + * \param hess second order gradient statistics + */ + void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess); + /*! + * \brief get evaluation statistics for xgboost + * \param handle handle + * \param iter current iteration rounds + * \param dmats list of handles to dmatrices + * \param evname name of evaluation + * \return the string containing evaluation stati + */ + SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames); + /*! + * \brief make prediction based on dmat + * \param handle handle + * \param dmat data matrix + * \param output_margin whether only output raw margin value + */ + SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin); + /*! + * \brief load model from existing file + * \param handle handle + * \param fname file name + */ + void XGBoosterLoadModel_R(SEXP handle, SEXP fname); + /*! + * \brief save model into existing file + * \param handle handle + * \param fname file name + */ + void XGBoosterSaveModel_R(SEXP handle, SEXP fname); + /*! + * \brief dump model into text file + * \param handle handle + * \param fname file name of model that can be dumped into + * \param fmap name to fmap can be empty string + */ + void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap); +}; +#endif // XGBOOST_WRAPPER_R_H_ diff --git a/R-package/src-x64/Makevars b/R-package/src-x64/Makevars new file mode 100644 index 000000000..10b2661d0 --- /dev/null +++ b/R-package/src-x64/Makevars @@ -0,0 +1,28 @@ +# _*_ mode: Makefile; _*_ +export CC = gcc +export CXX = g++ + +# expose these flags to R CMD SHLIB +PKG_CPPFLAGS = -O3 -Wno-unknown-pragmas -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) + +ifeq ($(no_omp),1) + PKG_CPPFLAGS += -DDISABLE_OPENMP +endif + +CXXOBJ= xgboost_wrapper.o xgboost_io.o +OBJECTS= xgboost_R.o $(CXXOBJ) + +.PHONY: all clean +all: $(SHLIB) +$(SHLIB): $(OBJECTS) + +xgboost_wrapper.o: ../../wrapper/xgboost_wrapper.cpp +xgboost_io.o: ../../src/io/io.cpp + +$(CXXOBJ) : + $(CXX) -c $(PKG_CPPFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + +clean: + rm -rf *.so *.o *~ *.dll + diff --git a/R-package/src-x64/Makevars.win b/R-package/src-x64/Makevars.win new file mode 100644 index 000000000..5d2261230 --- /dev/null +++ b/R-package/src-x64/Makevars.win @@ -0,0 +1,32 @@ +# _*_ mode: Makefile; _*_ +export CC = gcc +export CXX = g++ + +# expose these flags to R CMD SHLIB +PKG_CPPFLAGS = -O3 -Wno-unknown-pragmas -DXGBOOST_CUSTOMIZE_ERROR_ -fopenmp -fPIC $(SHLIB_OPENMP_CFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) + +# add flag to build native code even in cross compiler +ifeq "$(WIN)" "64" + PKG_CPPFLAGS += -m64 +endif + +ifeq ($(no_omp),1) + PKG_CPPFLAGS += -DDISABLE_OPENMP +endif + +CXXOBJ= xgboost_wrapper.o xgboost_io.o +OBJECTS= xgboost_R.o $(CXXOBJ) + +.PHONY: all clean +all: $(SHLIB) +$(SHLIB): $(OBJECTS) + +xgboost_wrapper.o: ../../wrapper/xgboost_wrapper.cpp +xgboost_io.o: ../../src/io/io.cpp + +$(CXXOBJ) : + $(CXX) -c $(PKG_CPPFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + +clean: + rm -rf *.so *.o *~ *.dll diff --git a/R-package/src-x64/xgboost_R.cpp b/R-package/src-x64/xgboost_R.cpp new file mode 100644 index 000000000..1ca232509 --- /dev/null +++ b/R-package/src-x64/xgboost_R.cpp @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include "xgboost_R.h" +#include "../../wrapper/xgboost_wrapper.h" +#include "../../src/utils/utils.h" +#include "../../src/utils/omp.h" +#include "../../src/utils/matrix_csr.h" + +using namespace xgboost; +// implements error handling +namespace xgboost { +namespace utils { +void HandleAssertError(const char *msg) { + error("%s", msg); +} +void HandleCheckError(const char *msg) { + error("%s", msg); +} +} // namespace utils +} // namespace xgboost + +extern "C" { + void _DMatrixFinalizer(SEXP ext) { + if (R_ExternalPtrAddr(ext) == NULL) return; + XGDMatrixFree(R_ExternalPtrAddr(ext)); + R_ClearExternalPtr(ext); + } + SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { + void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent)); + SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + return ret; + } + SEXP XGDMatrixCreateFromMat_R(SEXP mat, + SEXP missing) { + SEXP dim = getAttrib(mat, R_DimSymbol); + int nrow = INTEGER(dim)[0]; + int ncol = INTEGER(dim)[1]; + double *din = REAL(mat); + std::vector data(nrow * ncol); + #pragma omp parallel for schedule(static) + for (int i = 0; i < nrow; ++i) { + for (int j = 0; j < ncol; ++j) { + data[i * ncol +j] = din[i + nrow * j]; + } + } + void *handle = XGDMatrixCreateFromMat(&data[0], nrow, ncol, asReal(missing)); + SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + return ret; + } + SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, + SEXP indices, + SEXP data) { + const int *col_ptr = INTEGER(indptr); + const int *row_index = INTEGER(indices); + const double *col_data = REAL(data); + int ncol = length(indptr) - 1; + int ndata = length(data); + // transform into CSR format + std::vector row_ptr; + std::vector< std::pair > csr_data; + utils::SparseCSRMBuilder, false, bst_ulong> builder(row_ptr, csr_data); + builder.InitBudget(); + for (int i = 0; i < ncol; ++i) { + for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) { + builder.AddBudget(row_index[j]); + } + } + builder.InitStorage(); + for (int i = 0; i < ncol; ++i) { + for (int j = col_ptr[i]; j < col_ptr[i+1]; ++j) { + builder.PushElem(row_index[j], std::make_pair(i, col_data[j])); + } + } + utils::Assert(csr_data.size() == static_cast(ndata), "BUG CreateFromCSC"); + std::vector row_data(ndata); + std::vector col_index(ndata); + #pragma omp parallel for schedule(static) + for (int i = 0; i < ndata; ++i) { + col_index[i] = csr_data[i].first; + row_data[i] = csr_data[i].second; + } + void *handle = XGDMatrixCreateFromCSR(&row_ptr[0], &col_index[0], &row_data[0], row_ptr.size(), ndata ); + SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + return ret; + } + void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { + XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), + CHAR(asChar(fname)), asInteger(silent)); + } + void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { + int len = length(array); + const char *name = CHAR(asChar(field)); + if (!strcmp("group", name)) { + std::vector vec(len); + #pragma omp parallel for schedule(static) + for (int i = 0; i < len; ++i) { + vec[i] = static_cast(INTEGER(array)[i]); + } + XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len); + return; + } + { + std::vector vec(len); + #pragma omp parallel for schedule(static) + for (int i = 0; i < len; ++i) { + vec[i] = REAL(array)[i]; + } + XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), + &vec[0], len); + } + } + SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { + bst_ulong olen; + const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), &olen); + SEXP ret = PROTECT(allocVector(REALSXP, olen)); + for (size_t i = 0; i < olen; ++i) { + REAL(ret)[i] = res[i]; + } + UNPROTECT(1); + return ret; + } + // functions related to booster + void _BoosterFinalizer(SEXP ext) { + if (R_ExternalPtrAddr(ext) == NULL) return; + XGBoosterFree(R_ExternalPtrAddr(ext)); + R_ClearExternalPtr(ext); + } + SEXP XGBoosterCreate_R(SEXP dmats) { + int len = length(dmats); + std::vector dvec; + for (int i = 0; i < len; ++i){ + dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); + } + void *handle = XGBoosterCreate(&dvec[0], dvec.size()); + SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); + UNPROTECT(1); + return ret; + } + void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { + XGBoosterSetParam(R_ExternalPtrAddr(handle), + CHAR(asChar(name)), + CHAR(asChar(val))); + } + void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { + XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + R_ExternalPtrAddr(dtrain)); + } + void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { + utils::Check(length(grad) == length(hess), "gradient and hess must have same length"); + int len = length(grad); + std::vector tgrad(len), thess(len); + #pragma omp parallel for schedule(static) + for (int j = 0; j < len; ++j) { + tgrad[j] = REAL(grad)[j]; + thess[j] = REAL(hess)[j]; + } + XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dtrain), + &tgrad[0], &thess[0], len); + } + SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { + utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length"); + int len = length(dmats); + std::vector vec_dmats; + std::vector vec_names; + std::vector vec_sptr; + for (int i = 0; i < len; ++i) { + vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); + vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i))))); + } + for (int i = 0; i < len; ++i) { + vec_sptr.push_back(vec_names[i].c_str()); + } + return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + &vec_dmats[0], &vec_sptr[0], len)); + } + SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { + bst_ulong olen; + const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dmat), + asInteger(output_margin), + &olen); + SEXP ret = PROTECT(allocVector(REALSXP, olen)); + for (size_t i = 0; i < olen; ++i) { + REAL(ret)[i] = res[i]; + } + UNPROTECT(1); + return ret; + } + void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { + XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + } + void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { + XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + } + void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { + bst_ulong olen; + const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), + CHAR(asChar(fmap)), + &olen); + FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w"); + for (size_t i = 0; i < olen; ++i) { + fprintf(fo, "booster[%u]:\n", static_cast(i)); + fprintf(fo, "%s", res[i]); + } + fclose(fo); + } +} diff --git a/R-package/src-x64/xgboost_R.h b/R-package/src-x64/xgboost_R.h new file mode 100644 index 000000000..8e8b2728b --- /dev/null +++ b/R-package/src-x64/xgboost_R.h @@ -0,0 +1,124 @@ +#ifndef XGBOOST_WRAPPER_R_H_ +#define XGBOOST_WRAPPER_R_H_ +/*! + * \file xgboost_wrapper_R.h + * \author Tianqi Chen + * \brief R wrapper of xgboost + */ +extern "C" { +#include +} + +extern "C" { + /*! + * \brief load a data matrix + * \param fname name of the content + * \param silent whether print messages + * \return a loaded data matrix + */ + SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent); + /*! + * \brief create matrix content from dense matrix + * This assumes the matrix is stored in column major format + * \param data R Matrix object + * \param missing which value to represent missing value + * \return created dmatrix + */ + SEXP XGDMatrixCreateFromMat_R(SEXP mat, + SEXP missing); + /*! + * \brief create a matrix content from CSC format + * \param indptr pointer to column headers + * \param indices row indices + * \param data content of the data + * \return created dmatrix + */ + SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, + SEXP indices, + SEXP data); + /*! + * \brief load a data matrix into binary file + * \param handle a instance of data matrix + * \param fname file name + * \param silent print statistics when saving + */ + void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent); + /*! + * \brief set information to dmatrix + * \param handle a instance of data matrix + * \param field field name, can be label, weight + * \param array pointer to float vector + */ + void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array); + /*! + * \brief get info vector from matrix + * \param handle a instance of data matrix + * \param field field name + * \return info vector + */ + SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field); + /*! + * \brief create xgboost learner + * \param dmats a list of dmatrix handles that will be cached + */ + SEXP XGBoosterCreate_R(SEXP dmats); + /*! + * \brief set parameters + * \param handle handle + * \param name parameter name + * \param val value of parameter + */ + void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val); + /*! + * \brief update the model in one round using dtrain + * \param handle handle + * \param iter current iteration rounds + * \param dtrain training data + */ + void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain); + /*! + * \brief update the model, by directly specify gradient and second order gradient, + * this can be used to replace UpdateOneIter, to support customized loss function + * \param handle handle + * \param dtrain training data + * \param grad gradient statistics + * \param hess second order gradient statistics + */ + void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess); + /*! + * \brief get evaluation statistics for xgboost + * \param handle handle + * \param iter current iteration rounds + * \param dmats list of handles to dmatrices + * \param evname name of evaluation + * \return the string containing evaluation stati + */ + SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames); + /*! + * \brief make prediction based on dmat + * \param handle handle + * \param dmat data matrix + * \param output_margin whether only output raw margin value + */ + SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin); + /*! + * \brief load model from existing file + * \param handle handle + * \param fname file name + */ + void XGBoosterLoadModel_R(SEXP handle, SEXP fname); + /*! + * \brief save model into existing file + * \param handle handle + * \param fname file name + */ + void XGBoosterSaveModel_R(SEXP handle, SEXP fname); + /*! + * \brief dump model into text file + * \param handle handle + * \param fname file name of model that can be dumped into + * \param fmap name to fmap can be empty string + */ + void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap); +}; +#endif // XGBOOST_WRAPPER_R_H_