parent
808f61081b
commit
5199b86126
@ -5,8 +5,8 @@
|
||||
#' @export
|
||||
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||
params = list(), nrounds,
|
||||
verbose = 1, print_every_n = 1L,
|
||||
early_stopping_rounds = NULL, maximize = NULL,
|
||||
verbose = 1, print_every_n = 1L,
|
||||
early_stopping_rounds = NULL, maximize = NULL,
|
||||
save_period = NULL, save_name = "xgboost.model",
|
||||
xgb_model = NULL, callbacks = list(), ...) {
|
||||
|
||||
@ -18,16 +18,16 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||
early_stopping_rounds = early_stopping_rounds, maximize = maximize,
|
||||
save_period = save_period, save_name = save_name,
|
||||
xgb_model = xgb_model, callbacks = callbacks, ...)
|
||||
return(bst)
|
||||
return (bst)
|
||||
}
|
||||
|
||||
#' Training part from Mushroom Data Set
|
||||
#'
|
||||
#'
|
||||
#' This data set is originally from the Mushroom data set,
|
||||
#' UCI Machine Learning Repository.
|
||||
#'
|
||||
#'
|
||||
#' This data set includes the following fields:
|
||||
#'
|
||||
#'
|
||||
#' \itemize{
|
||||
#' \item \code{label} the label for each record
|
||||
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
|
||||
@ -35,16 +35,16 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||
#'
|
||||
#' @references
|
||||
#' https://archive.ics.uci.edu/ml/datasets/Mushroom
|
||||
#'
|
||||
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
|
||||
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
|
||||
#'
|
||||
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
|
||||
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
|
||||
#' School of Information and Computer Science.
|
||||
#'
|
||||
#'
|
||||
#' @docType data
|
||||
#' @keywords datasets
|
||||
#' @name agaricus.train
|
||||
#' @usage data(agaricus.train)
|
||||
#' @format A list containing a label vector, and a dgCMatrix object with 6513
|
||||
#' @format A list containing a label vector, and a dgCMatrix object with 6513
|
||||
#' rows and 127 variables
|
||||
NULL
|
||||
|
||||
@ -52,9 +52,9 @@ NULL
|
||||
#'
|
||||
#' This data set is originally from the Mushroom data set,
|
||||
#' UCI Machine Learning Repository.
|
||||
#'
|
||||
#'
|
||||
#' This data set includes the following fields:
|
||||
#'
|
||||
#'
|
||||
#' \itemize{
|
||||
#' \item \code{label} the label for each record
|
||||
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
|
||||
@ -62,16 +62,16 @@ NULL
|
||||
#'
|
||||
#' @references
|
||||
#' https://archive.ics.uci.edu/ml/datasets/Mushroom
|
||||
#'
|
||||
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
|
||||
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
|
||||
#'
|
||||
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
|
||||
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
|
||||
#' School of Information and Computer Science.
|
||||
#'
|
||||
#'
|
||||
#' @docType data
|
||||
#' @keywords datasets
|
||||
#' @name agaricus.test
|
||||
#' @usage data(agaricus.test)
|
||||
#' @format A list containing a label vector, and a dgCMatrix object with 1611
|
||||
#' @format A list containing a label vector, and a dgCMatrix object with 1611
|
||||
#' rows and 126 variables
|
||||
NULL
|
||||
|
||||
@ -107,7 +107,7 @@ NULL
|
||||
#' @importFrom graphics par
|
||||
#' @importFrom graphics title
|
||||
#' @importFrom grDevices rgb
|
||||
#'
|
||||
#'
|
||||
#' @import methods
|
||||
#' @useDynLib xgboost, .registration = TRUE
|
||||
NULL
|
||||
|
||||
@ -313,7 +313,7 @@ SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask,
|
||||
R_ExternalPtrAddr(dmat),
|
||||
asInteger(option_mask),
|
||||
asInteger(ntree_limit),
|
||||
0,
|
||||
asInteger(training),
|
||||
&olen, &res));
|
||||
ret = PROTECT(allocVector(REALSXP, olen));
|
||||
for (size_t i = 0; i < olen; ++i) {
|
||||
|
||||
@ -27,7 +27,7 @@ test_that("train and predict binary classification", {
|
||||
|
||||
pred <- predict(bst, test$data)
|
||||
expect_length(pred, 1611)
|
||||
|
||||
|
||||
pred1 <- predict(bst, train$data, ntreelimit = 1)
|
||||
expect_length(pred1, 6513)
|
||||
err_pred1 <- sum((pred1 > 0.5) != train$label)/length(train$label)
|
||||
@ -35,6 +35,54 @@ test_that("train and predict binary classification", {
|
||||
expect_lt(abs(err_pred1 - err_log), 10e-6)
|
||||
})
|
||||
|
||||
test_that("dart prediction works", {
|
||||
nrounds = 32
|
||||
set.seed(1994)
|
||||
|
||||
d <- cbind(
|
||||
x1 = rnorm(100),
|
||||
x2 = rnorm(100),
|
||||
x3 = rnorm(100))
|
||||
y <- d[,"x1"] + d[,"x2"]^2 +
|
||||
ifelse(d[,"x3"] > .5, d[,"x3"]^2, 2^d[,"x3"]) +
|
||||
rnorm(100)
|
||||
|
||||
set.seed(1994)
|
||||
booster_by_xgboost <- xgboost(data = d, label = y, max_depth = 2, booster = "dart",
|
||||
rate_drop = 0.5, one_drop = TRUE,
|
||||
eta = 1, nthread = 2, nrounds = nrounds, objective = "reg:squarederror")
|
||||
pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0)
|
||||
pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds)
|
||||
expect_true(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE)))
|
||||
|
||||
pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)
|
||||
expect_false(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE)))
|
||||
|
||||
set.seed(1994)
|
||||
dtrain <- xgb.DMatrix(data=d, info = list(label=y))
|
||||
booster_by_train <- xgb.train( params = list(
|
||||
booster = "dart",
|
||||
max_depth = 2,
|
||||
eta = 1,
|
||||
rate_drop = 0.5,
|
||||
one_drop = TRUE,
|
||||
nthread = 1,
|
||||
tree_method= "exact",
|
||||
verbosity = 3,
|
||||
objective = "reg:squarederror"
|
||||
),
|
||||
data = dtrain,
|
||||
nrounds = nrounds
|
||||
)
|
||||
pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, ntreelimit = 0)
|
||||
pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds)
|
||||
pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)
|
||||
|
||||
expect_true(all(matrix(pred_by_train_0, byrow=TRUE) == matrix(pred_by_xgboost_0, byrow=TRUE)))
|
||||
expect_true(all(matrix(pred_by_train_1, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE)))
|
||||
expect_true(all(matrix(pred_by_train_2, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE)))
|
||||
})
|
||||
|
||||
test_that("train and predict softprob", {
|
||||
lb <- as.numeric(iris$Species) - 1
|
||||
set.seed(11)
|
||||
@ -74,7 +122,7 @@ test_that("train and predict softmax", {
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||
|
||||
|
||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_length(pred, nrow(iris))
|
||||
err <- sum(pred != lb)/length(lb)
|
||||
@ -90,12 +138,12 @@ test_that("train and predict RF", {
|
||||
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
|
||||
expect_equal(bst$niter, 1)
|
||||
expect_equal(xgb.ntree(bst), 20)
|
||||
|
||||
|
||||
pred <- predict(bst, train$data)
|
||||
pred_err <- sum((pred > 0.5) != lb)/length(lb)
|
||||
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
|
||||
#expect_lt(pred_err, 0.03)
|
||||
|
||||
|
||||
pred <- predict(bst, train$data, ntreelimit = 20)
|
||||
pred_err_20 <- sum((pred > 0.5) != lb)/length(lb)
|
||||
expect_equal(pred_err_20, pred_err)
|
||||
@ -211,7 +259,7 @@ test_that("train and predict with non-strict classes", {
|
||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
||||
pr0 <- predict(bst, train_dense)
|
||||
|
||||
|
||||
# dense matrix-like input of non-matrix class
|
||||
class(train_dense) <- 'shmatrix'
|
||||
expect_true(is.matrix(train_dense))
|
||||
@ -221,7 +269,7 @@ test_that("train and predict with non-strict classes", {
|
||||
, regexp = NA)
|
||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||
expect_equal(pr0, pr)
|
||||
|
||||
|
||||
# dense matrix-like input of non-matrix class with some inheritance
|
||||
class(train_dense) <- c('pphmatrix','shmatrix')
|
||||
expect_true(is.matrix(train_dense))
|
||||
@ -231,7 +279,7 @@ test_that("train and predict with non-strict classes", {
|
||||
, regexp = NA)
|
||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||
expect_equal(pr0, pr)
|
||||
|
||||
|
||||
# when someone inhertis from xgb.Booster, it should still be possible to use it as xgb.Booster
|
||||
class(bst) <- c('super.Booster', 'xgb.Booster')
|
||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||
|
||||
@ -157,7 +157,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
|
||||
params = c(
|
||||
list(
|
||||
booster = booster,
|
||||
objective = "reg:linear",
|
||||
objective = "reg:squarederror",
|
||||
eval_metric = "rmse"),
|
||||
if (booster == "dart")
|
||||
list(rate_drop = .01, one_drop = T)),
|
||||
|
||||
@ -435,9 +435,9 @@ class Dart : public GBTree {
|
||||
std::fill(out_preds.begin(), out_preds.end(),
|
||||
model_.learner_model_param_->base_score);
|
||||
}
|
||||
|
||||
PredLoopSpecalize(p_fmat, &out_preds, num_group, 0,
|
||||
ntree_limit, training);
|
||||
const int nthread = omp_get_max_threads();
|
||||
InitThreadTemp(nthread);
|
||||
PredLoopSpecalize(p_fmat, &out_preds, num_group, 0, ntree_limit);
|
||||
}
|
||||
|
||||
void PredictInstance(const SparsePage::Inst &inst,
|
||||
@ -489,11 +489,8 @@ class Dart : public GBTree {
|
||||
std::vector<bst_float>* out_preds,
|
||||
int num_group,
|
||||
unsigned tree_begin,
|
||||
unsigned tree_end,
|
||||
bool training) {
|
||||
const int nthread = omp_get_max_threads();
|
||||
unsigned tree_end) {
|
||||
CHECK_EQ(num_group, model_.learner_model_param_->num_output_group);
|
||||
InitThreadTemp(nthread);
|
||||
std::vector<bst_float>& preds = *out_preds;
|
||||
CHECK_EQ(model_.param.size_leaf_vector, 0)
|
||||
<< "size_leaf_vector is enforced to 0 so far";
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user