[R] Monotonic Constraints in Tree Construction (#1557)
* fix cran check * change required R version because of utils::globalVariables * temporary commit, monotone not working * fix test * fix doc * fix doc
This commit is contained in:
parent
fb02797e2a
commit
4733357278
@ -65,6 +65,15 @@ check.booster.params <- function(params, ...) {
|
|||||||
stop("'num_class' > 1 parameter must be set for multiclass classification")
|
stop("'num_class' > 1 parameter must be set for multiclass classification")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# monotone_constraints parser
|
||||||
|
|
||||||
|
if (!is.null(params[['monotone_constraints']]) &&
|
||||||
|
typeof(params[['monotone_constraints']]) != "character") {
|
||||||
|
vec2str = paste(params[['monotone_constraints']], collapse = ',')
|
||||||
|
vec2str = paste0('(', vec2str, ')')
|
||||||
|
params[['monotone_constraints']] = vec2str
|
||||||
|
}
|
||||||
|
|
||||||
return(params)
|
return(params)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -25,6 +25,7 @@
|
|||||||
#' \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1
|
#' \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1
|
||||||
#' \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
|
#' \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
|
||||||
#' \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
|
#' \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
|
||||||
|
#' \item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' 2.2. Parameter for Linear Booster
|
#' 2.2. Parameter for Linear Booster
|
||||||
|
|||||||
@ -39,6 +39,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
|
|||||||
\item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1
|
\item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1
|
||||||
\item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
|
\item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
|
||||||
\item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
|
\item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample < 1} and \code{round = 1}) accordingly. Default: 1
|
||||||
|
\item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
|
||||||
}
|
}
|
||||||
|
|
||||||
2.2. Parameter for Linear Booster
|
2.2. Parameter for Linear Booster
|
||||||
|
|||||||
24
R-package/tests/testthat/test_monotone.R
Normal file
24
R-package/tests/testthat/test_monotone.R
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
require(xgboost)
|
||||||
|
|
||||||
|
context("monotone constraints")
|
||||||
|
|
||||||
|
set.seed(1024)
|
||||||
|
x = rnorm(1000, 10)
|
||||||
|
y = -1*x + rnorm(1000, 0.001) + 3*sin(x)
|
||||||
|
train = matrix(x, ncol = 1)
|
||||||
|
|
||||||
|
|
||||||
|
test_that("monotone constraints for regression", {
|
||||||
|
bst = xgboost(data = train, label = y, max_depth = 2,
|
||||||
|
eta = 0.1, nthread = 2, nrounds = 100,
|
||||||
|
monotone_constraints = -1)
|
||||||
|
|
||||||
|
pred = predict(bst, train)
|
||||||
|
|
||||||
|
ind = order(train[,1])
|
||||||
|
pred.ord = pred[ind]
|
||||||
|
expect_true({
|
||||||
|
!any(diff(pred.ord) > 0)
|
||||||
|
}, "Monotone Contraint Satisfied")
|
||||||
|
|
||||||
|
})
|
||||||
Loading…
x
Reference in New Issue
Block a user