diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 2a1b79578..79d438917 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -65,6 +65,15 @@ check.booster.params <- function(params, ...) { stop("'num_class' > 1 parameter must be set for multiclass classification") } + # monotone_constraints parser + + if (!is.null(params[['monotone_constraints']]) && + typeof(params[['monotone_constraints']]) != "character") { + vec2str = paste(params[['monotone_constraints']], collapse = ',') + vec2str = paste0('(', vec2str, ')') + params[['monotone_constraints']] = vec2str + } + return(params) } diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index 08cb71b84..f9e7e9d89 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -25,6 +25,7 @@ #' \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1 #' \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1 #' \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1 +#' \item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint. #' } #' #' 2.2. Parameter for Linear Booster diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index a464c04e7..0772d9e5a 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -39,6 +39,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL, \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1 \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1 \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1 + \item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint. } 2.2. Parameter for Linear Booster diff --git a/R-package/tests/testthat/test_monotone.R b/R-package/tests/testthat/test_monotone.R new file mode 100644 index 000000000..822fefb65 --- /dev/null +++ b/R-package/tests/testthat/test_monotone.R @@ -0,0 +1,24 @@ +require(xgboost) + +context("monotone constraints") + +set.seed(1024) +x = rnorm(1000, 10) +y = -1*x + rnorm(1000, 0.001) + 3*sin(x) +train = matrix(x, ncol = 1) + + +test_that("monotone constraints for regression", { + bst = xgboost(data = train, label = y, max_depth = 2, + eta = 0.1, nthread = 2, nrounds = 100, + monotone_constraints = -1) + + pred = predict(bst, train) + + ind = order(train[,1]) + pred.ord = pred[ind] + expect_true({ + !any(diff(pred.ord) > 0) + }, "Monotone Contraint Satisfied") + +})