complete R example
This commit is contained in:
parent
8bf758c63b
commit
de83ac72ea
@ -6,9 +6,72 @@ dtrain <- xgb.DMatrix("agaricus.txt.train")
|
|||||||
dtest <- xgb.DMatrix("agaricus.txt.test")
|
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||||
param = list('bst:max_depth'=2, 'bst:eta'=1, 'silent'=1, 'objective'='binary:logistic')
|
param = list('bst:max_depth'=2, 'bst:eta'=1, 'silent'=1, 'objective'='binary:logistic')
|
||||||
watchlist <- list('train'=dtrain,'test'=dtest)
|
watchlist <- list('train'=dtrain,'test'=dtest)
|
||||||
bst <- xgb.train(param, dtrain, watchlist=watchlist, nround=3)
|
# training xgboost model
|
||||||
|
bst <- xgb.train(param, dtrain, nround=3, watchlist=watchlist)
|
||||||
|
# make prediction
|
||||||
|
preds <- xgb.predict(bst, dtest)
|
||||||
|
labels <- xgb.getinfo(dtest, "label")
|
||||||
|
err <- as.real(sum(as.integer(preds > 0.5) != labels)) / length(labels)
|
||||||
|
# print error rate
|
||||||
|
print(err)
|
||||||
|
|
||||||
succ <- xgb.save(bst, "iter.model")
|
# save dmatrix into binary buffer
|
||||||
print('finsih save model')
|
succ <- xgb.save(dtest, "dtest.buffer")
|
||||||
bst2 <- xgb.Booster(modelfile="iter.model")
|
# save model into file
|
||||||
pred = xgb.predict(bst2, dtest)
|
succ <- xgb.save(bst, "xgb.model")
|
||||||
|
# load model in
|
||||||
|
bst2 <- xgb.Booster(modelfile="xgb.model")
|
||||||
|
dtest2 <- xgb.DMatrix("dtest.buffer")
|
||||||
|
preds2 <- xgb.predict(bst2, dtest2)
|
||||||
|
# print difference
|
||||||
|
print(sum(abs(preds2-preds)))
|
||||||
|
|
||||||
|
###
|
||||||
|
# advanced: cutomsized loss function
|
||||||
|
#
|
||||||
|
print("start running example to used cutomized objective function")
|
||||||
|
# note: for customized objective function, we leave objective as default
|
||||||
|
# note: what we are getting is margin value in prediction
|
||||||
|
# you must know what you are doing
|
||||||
|
param <- list('bst:max_depth' = 2, 'bst:eta' = 1, 'silent' =1)
|
||||||
|
# user define objective function, given prediction, return gradient and second order gradient
|
||||||
|
# this is loglikelihood loss
|
||||||
|
logregobj <- function(preds, dtrain) {
|
||||||
|
labels <- xgb.getinfo(dtrain, "label")
|
||||||
|
preds <- 1.0 / (1.0 + exp(-preds))
|
||||||
|
grad <- preds - labels
|
||||||
|
hess <- preds * (1.0-preds)
|
||||||
|
return(list(grad=grad, hess=hess))
|
||||||
|
}
|
||||||
|
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||||
|
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||||
|
# this may make buildin evalution metric not function properly
|
||||||
|
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||||
|
# the buildin evaluation error assumes input is after logistic transformation
|
||||||
|
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||||
|
evalerror <- function(preds, dtrain) {
|
||||||
|
labels <- xgb.getinfo(dtrain, "label")
|
||||||
|
err <- as.real(sum(labels != (preds > 0.0))) / length(labels)
|
||||||
|
return(list(metric="error", value=err))
|
||||||
|
}
|
||||||
|
|
||||||
|
# training with customized objective, we can also do step by step training
|
||||||
|
# simply look at xgboost.py's implementation of train
|
||||||
|
bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
|
||||||
|
|
||||||
|
###
|
||||||
|
# advanced: start from a initial base prediction
|
||||||
|
#
|
||||||
|
print ('start running example to start from a initial prediction')
|
||||||
|
# specify parameters via map, definition are same as c++ version
|
||||||
|
param = list('bst:max_depth'=2, 'bst:eta'=1, 'silent'=1, 'objective'='binary:logistic')
|
||||||
|
# train xgboost for 1 round
|
||||||
|
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||||
|
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||||
|
# do predict with output_margin=True, will always give you margin values before logistic transformation
|
||||||
|
ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
|
||||||
|
ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
|
||||||
|
succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
|
||||||
|
succ <- xgb.setinfo(dtest, "base_margin", ptest)
|
||||||
|
print ('this is result of running from initial prediction')
|
||||||
|
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||||
|
|||||||
@ -58,7 +58,7 @@ evallist = [(dtest,'eval'), (dtrain,'train')]
|
|||||||
bst = xgb.train( param, dtrain, num_round, evallist )
|
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||||
|
|
||||||
###
|
###
|
||||||
# advanced: cutomsized loss function, set loss_type to 0, so that predict get untransformed score
|
# advanced: cutomsized loss function
|
||||||
#
|
#
|
||||||
print ('start running example to used cutomized objective function')
|
print ('start running example to used cutomized objective function')
|
||||||
|
|
||||||
|
|||||||
@ -2,13 +2,60 @@
|
|||||||
dyn.load("./libxgboostR.so")
|
dyn.load("./libxgboostR.so")
|
||||||
|
|
||||||
# constructing DMatrix
|
# constructing DMatrix
|
||||||
xgb.DMatrix <- function(data) {
|
xgb.DMatrix <- function(data, info=list()) {
|
||||||
if (typeof(data) == "character") {
|
if (typeof(data) == "character") {
|
||||||
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE))
|
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE))
|
||||||
}else {
|
}else {
|
||||||
stop("xgb.DMatrix cannot recognize data type")
|
stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data)))
|
||||||
}
|
}
|
||||||
return(structure(handle, class="xgb.DMatrix"))
|
dmat = structure(handle, class="xgb.DMatrix")
|
||||||
|
if (length(info) != 0) {
|
||||||
|
for (i in 1:length(info)) {
|
||||||
|
p = info[i]
|
||||||
|
xgb.setinfo(dmat, names(p), p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(dmat)
|
||||||
|
}
|
||||||
|
# get information from dmatrix
|
||||||
|
xgb.getinfo <- function(dmat, name) {
|
||||||
|
if (typeof(name) != "character") {
|
||||||
|
stop("xgb.getinfo: name must be character")
|
||||||
|
}
|
||||||
|
if (class(dmat) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||||
|
}
|
||||||
|
if (name != "label" &&
|
||||||
|
name != "weight" &&
|
||||||
|
name != "base_margin" ) {
|
||||||
|
stop(paste("xgb.getinfo: unknown info name", name))
|
||||||
|
}
|
||||||
|
ret <- .Call("XGDMatrixGetInfo_R", dmat, name)
|
||||||
|
return(ret)
|
||||||
|
}
|
||||||
|
# set information into dmatrix, this mutate dmatrix
|
||||||
|
xgb.setinfo <- function(dmat, name, info) {
|
||||||
|
if (class(dmat) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix");
|
||||||
|
}
|
||||||
|
if (name == "label") {
|
||||||
|
.Call("XGDMatrixSetInfo_R", dmat, name, as.real(info))
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
if (name == "weight") {
|
||||||
|
.Call("XGDMatrixSetInfo_R", dmat, name, as.real(info))
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
if (name == "base_margin") {
|
||||||
|
.Call("XGDMatrixSetInfo_R", dmat, name, as.real(info))
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
if (name == "group") {
|
||||||
|
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info))
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
stop(pase("xgb.setinfo: unknown info name", name))
|
||||||
|
return(FALSE)
|
||||||
}
|
}
|
||||||
# construct a Booster from cachelist
|
# construct a Booster from cachelist
|
||||||
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||||
@ -21,9 +68,11 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
handle <- .Call("XGBoosterCreate_R", cachelist)
|
handle <- .Call("XGBoosterCreate_R", cachelist)
|
||||||
for (i in 1:length(params)) {
|
if (length(params) != 0) {
|
||||||
p = params[i]
|
for (i in 1:length(params)) {
|
||||||
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p))
|
p = params[i]
|
||||||
|
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!is.null(modelfile)) {
|
if (!is.null(modelfile)) {
|
||||||
if (typeof(modelfile) != "character"){
|
if (typeof(modelfile) != "character"){
|
||||||
@ -34,7 +83,7 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
|||||||
return(structure(handle, class="xgb.Booster"))
|
return(structure(handle, class="xgb.Booster"))
|
||||||
}
|
}
|
||||||
# train a model using given parameters
|
# train a model using given parameters
|
||||||
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL) {
|
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL, feval=NULL) {
|
||||||
if (typeof(params) != "list") {
|
if (typeof(params) != "list") {
|
||||||
stop("xgb.train: first argument params must be list");
|
stop("xgb.train: first argument params must be list");
|
||||||
}
|
}
|
||||||
@ -49,10 +98,24 @@ xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL) {
|
|||||||
pred = xgb.predict(bst, dtrain)
|
pred = xgb.predict(bst, dtrain)
|
||||||
gpair = obj(pred, dtrain)
|
gpair = obj(pred, dtrain)
|
||||||
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
||||||
}
|
}
|
||||||
if (length(watchlist) != 0) {
|
if (length(watchlist) != 0) {
|
||||||
msg <- xgb.iter.eval(bst, watchlist, i-1)
|
if (is.null(feval)) {
|
||||||
|
msg <- xgb.iter.eval(bst, watchlist, i-1)
|
||||||
cat(msg); cat("\n")
|
cat(msg); cat("\n")
|
||||||
|
} else {
|
||||||
|
cat("["); cat(i); cat("]");
|
||||||
|
for (j in 1:length(watchlist)) {
|
||||||
|
w <- watchlist[j]
|
||||||
|
if (length(names(w)) == 0) {
|
||||||
|
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||||
|
}
|
||||||
|
ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
|
||||||
|
cat("\t"); cat(names(w)); cat("-"); cat(ret$metric);
|
||||||
|
cat(":"); cat(ret$value)
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return(bst)
|
return(bst)
|
||||||
@ -124,12 +187,14 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
evnames <- list()
|
evnames <- list()
|
||||||
for (i in 1:length(watchlist)) {
|
if (length(watchlist) != 0) {
|
||||||
w <- watchlist[i]
|
for (i in 1:length(watchlist)) {
|
||||||
if (length(names(w)) == 0) {
|
w <- watchlist[i]
|
||||||
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
if (length(names(w)) == 0) {
|
||||||
|
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||||
|
}
|
||||||
|
evnames <- append(evnames, names(w))
|
||||||
}
|
}
|
||||||
evnames <- append(evnames, names(w))
|
|
||||||
}
|
}
|
||||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames)
|
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames)
|
||||||
return(msg)
|
return(msg)
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
#include "xgboost_wrapper.h"
|
#include "xgboost_wrapper.h"
|
||||||
#include "xgboost_R.h"
|
#include "xgboost_R.h"
|
||||||
#include "../src/utils/utils.h"
|
#include "../src/utils/utils.h"
|
||||||
@ -24,7 +25,40 @@ extern "C" {
|
|||||||
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
|
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(fname)), asInteger(silent));
|
CHAR(asChar(fname)), asInteger(silent));
|
||||||
}
|
}
|
||||||
|
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
|
||||||
|
int len = length(array);
|
||||||
|
const char *name = CHAR(asChar(field));
|
||||||
|
if (!strcmp("group", name)) {
|
||||||
|
std::vector<unsigned> vec(len);
|
||||||
|
#pragma omp parallel for schedule(static)
|
||||||
|
for (int i = 0; i < len; ++i) {
|
||||||
|
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
|
||||||
|
}
|
||||||
|
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), &vec[0], len);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
std::vector<float> vec(len);
|
||||||
|
#pragma omp parallel for schedule(static)
|
||||||
|
for (int i = 0; i < len; ++i) {
|
||||||
|
vec[i] = REAL(array)[i];
|
||||||
|
}
|
||||||
|
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
|
||||||
|
CHAR(asChar(field)),
|
||||||
|
&vec[0], len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
||||||
|
size_t olen;
|
||||||
|
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
|
||||||
|
CHAR(asChar(field)), &olen);
|
||||||
|
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||||
|
for (size_t i = 0; i < olen; ++i) {
|
||||||
|
REAL(ret)[i] = res[i];
|
||||||
|
}
|
||||||
|
UNPROTECT(1);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
// functions related to booster
|
// functions related to booster
|
||||||
void _BoosterFinalizer(SEXP ext) {
|
void _BoosterFinalizer(SEXP ext) {
|
||||||
if (R_ExternalPtrAddr(ext) == NULL) return;
|
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||||
|
|||||||
@ -24,6 +24,20 @@ extern "C" {
|
|||||||
* \param silent print statistics when saving
|
* \param silent print statistics when saving
|
||||||
*/
|
*/
|
||||||
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
|
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
|
||||||
|
/*!
|
||||||
|
* \brief set information to dmatrix
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param field field name, can be label, weight
|
||||||
|
* \param array pointer to float vector
|
||||||
|
*/
|
||||||
|
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array);
|
||||||
|
/*!
|
||||||
|
* \brief get info vector from matrix
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param field field name
|
||||||
|
* \return info vector
|
||||||
|
*/
|
||||||
|
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
|
||||||
/*!
|
/*!
|
||||||
* \brief create xgboost learner
|
* \brief create xgboost learner
|
||||||
* \param dmats a list of dmatrix handles that will be cached
|
* \param dmats a list of dmatrix handles that will be cached
|
||||||
|
|||||||
@ -81,8 +81,8 @@ extern "C" {
|
|||||||
/*!
|
/*!
|
||||||
* \brief get float info vector from matrix
|
* \brief get float info vector from matrix
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
* \param len used to set result length
|
|
||||||
* \param field field name
|
* \param field field name
|
||||||
|
* \param out_len used to set result length
|
||||||
* \return pointer to the label
|
* \return pointer to the label
|
||||||
*/
|
*/
|
||||||
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
|
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user