diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 9dd4eaac0..21b6c9ace 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -8,3 +8,4 @@ export(xgb.train) export(xgb.save) export(xgb.load) export(xgb.dump) +export(xgb.DMatrix.save) diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index b5835a4ae..e076de18f 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -1,5 +1,5 @@ # constructing DMatrix -xgb.DMatrix <- function(data, missing=0.0, ...) { +xgb.DMatrix <- function(data, info=list(), missing=0.0, ...) { if (typeof(data) == "character") { handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE="xgboost") } else if(is.matrix(data)) { @@ -11,7 +11,7 @@ xgb.DMatrix <- function(data, missing=0.0, ...) { } dmat <- structure(handle, class="xgb.DMatrix") - info = list(...) + info = append(info,list(...)) if (length(info)==0) return(dmat) for (i in 1:length(info)) { diff --git a/R-package/R/xgb.DMatrix.save.R b/R-package/R/xgb.DMatrix.save.R new file mode 100644 index 000000000..98400e2f2 --- /dev/null +++ b/R-package/R/xgb.DMatrix.save.R @@ -0,0 +1,12 @@ +# save model or DMatrix to file +xgb.DMatrix.save <- function(handle, fname) { + if (typeof(fname) != "character") { + stop("xgb.save: fname must be character") + } + if (class(handle) == "xgb.DMatrix") { + .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost") + return(TRUE) + } + stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster") + return(FALSE) +} diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R index 355c20f12..c4269a250 100644 --- a/R-package/R/xgb.save.R +++ b/R-package/R/xgb.save.R @@ -7,10 +7,6 @@ xgb.save <- function(handle, fname) { .Call("XGBoosterSaveModel_R", handle, fname, PACKAGE="xgboost") return(TRUE) } - if (class(handle) == "xgb.DMatrix") { - .Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE), PACKAGE="xgboost") - return(TRUE) - } stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster") return(FALSE) } diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index c452dfde0..c5299e941 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -1,49 +1,41 @@ # Main function for xgboost-package -xgboost = function(x=NULL,y=NULL,DMatrix=NULL, file=NULL, validation=NULL, - nrounds=10, obj=NULL, feval=NULL, margin=NULL, verbose = T, ...) +xgboost = function(data=NULL, label = NULL, params=list(), nrounds=10, + verbose = 1, ...) { - if (!is.null(DMatrix)) - dtrain = DMatrix + inClass = class(data) + if (inClass=='dgCMatrix' || inClass=='matrix') + { + if (is.null(label)) + stop('xgboost: need label when data is a matrix') + dtrain = xgb.DMatrix(data, label=y) + } else { - if (is.null(x) && is.null(y)) - { - if (is.null(file)) - stop('xgboost need input data, either R objects, local files or DMatrix object.') - dtrain = xgb.DMatrix(file) - } + if (!is.null(label)) + warning('xgboost: label will be ignored.') + if (inClass=='character') + dtrain = xgb.DMatrix(data) + else if (inClass=='xgb.DMatrix') + dtrain = data else - dtrain = xgb.DMatrix(x, label=y) - if (!is.null(margin)) - { - succ <- xgb.setinfo(dtrain, "base_margin", margin) - if (!succ) - warning('Attemp to use margin failed.') - } + stop('xgboost: Invalid input of data') } - params = list(...) + if (verbose>1) + silent = 0 + else + silent = 1 - watchlist=list() - if (verbose) - { - if (!is.null(validation)) - { - if (class(validation)!='xgb.DMatrix') - dtest = xgb.DMatrix(validation) - else - dtest = validation - watchlist = list(eval=dtest,train=dtrain) - } - - else - watchlist = list(train=dtrain) - } + params = append(params, list(silent=silent)) + params = append(params, list(...)) - bst <- xgb.train(params, dtrain, nrounds, watchlist, obj, feval) + if (verbose>0) + watchlist = list(train=dtrain) + else + watchlist = list() + + bst <- xgb.train(params, dtrain, nrounds, watchlist) return(bst) } - - diff --git a/R-package/inst/examples/demo-new.R b/R-package/inst/examples/demo-new.R index 01f44ee9f..5cde74320 100644 --- a/R-package/inst/examples/demo-new.R +++ b/R-package/inst/examples/demo-new.R @@ -51,20 +51,25 @@ dtrain = xgb.DMatrix(dense.x, label=y) ############################ # Test with DMatrix object -bst = xgboost(DMatrix=dtrain, max_depth=2, eta=1, silent=1, objective='binary:logistic') +bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic') + +# Verbose = 0,1,2 +bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic', + verbose = 0) +bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic', + verbose = 1) +bst = xgboost(data=dtrain, max_depth=2, eta=1, objective='binary:logistic', + verbose = 2) # Test with local file -bst = xgboost(file='agaricus.txt.train', max_depth=2, eta=1, silent=1, objective='binary:logistic') +bst = xgboost(data='agaricus.txt.train', max_depth=2, eta=1, objective='binary:logistic') # Test with Sparse Matrix -bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic') +bst = xgboost(data = x, label = y, max_depth=2, eta=1, objective='binary:logistic') # Test with dense Matrix -bst = xgboost(x = dense.x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic') +bst = xgboost(data = dense.x, label = y, max_depth=2, eta=1, objective='binary:logistic') -# Test with validation set -bst = xgboost(file='agaricus.txt.train', validation='agaricus.txt.test', - max_depth=2, eta=1, silent=1, objective='binary:logistic') ############################ # Test predict @@ -102,17 +107,39 @@ pred = predict(bst, test.x) # save model to text file xgb.dump(bst, 'model.dump') +# save a DMatrix object to hard disk +xgb.DMatrix.save(dtrain,'dtrain.save') + +# load a DMatrix object to R +dtrain = xgb.DMatrix('dtrain.save') + ############################ -# Customized objective and evaluation function +# More flexible training function xgb.train ############################ +param = list(max_depth=2, eta=1, silent = 1, objective="binary:logistic") +watchlist <- list("eval"=dtest,"train"=dtrain) + +# training xgboost model +bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist) + +############################ +# cutomsized loss function +############################ + +param <- list(max_depth = 2, eta = 1, silent =1) + +# note: for customized objective function, we leave objective as default +# note: what we are getting is margin value in prediction +# you must know what you are doing + # user define objective function, given prediction, return gradient and second order gradient # this is loglikelihood loss -logregobj = function(preds, dtrain) { - labels = xgb.getinfo(dtrain, "label") - preds = 1.0 / (1.0 + exp(-preds)) - grad = preds - labels - hess = preds * (1.0-preds) +logregobj <- function(preds, dtrain) { + labels <- xgb.getinfo(dtrain, "label") + preds <- 1.0 / (1.0 + exp(-preds)) + grad <- preds - labels + hess <- preds * (1.0-preds) return(list(grad=grad, hess=hess)) } # user defined evaluation function, return a list(metric="metric-name", value="metric-value") @@ -121,13 +148,14 @@ logregobj = function(preds, dtrain) { # for example, we are doing logistic loss, the prediction is score before logistic transformation # the buildin evaluation error assumes input is after logistic transformation # Take this in mind when you use the customization, and maybe you need write customized evaluation function -evalerror = function(preds, dtrain) { - labels = xgb.getinfo(dtrain, "label") - err = as.numeric(sum(labels != (preds > 0.0))) / length(labels) +evalerror <- function(preds, dtrain) { + labels <- xgb.getinfo(dtrain, "label") + err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels) return(list(metric="error", value=err)) } -bst = xgboost(x = x, y = y, max_depth=2, eta=1, silent=1, objective='binary:logistic', - obj=logregobj, feval=evalerror) +# training with customized objective, we can also do step by step training +# simply look at xgboost.py"s implementation of train +bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)