parent
808f61081b
commit
5199b86126
@ -5,8 +5,8 @@
|
|||||||
#' @export
|
#' @export
|
||||||
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||||
params = list(), nrounds,
|
params = list(), nrounds,
|
||||||
verbose = 1, print_every_n = 1L,
|
verbose = 1, print_every_n = 1L,
|
||||||
early_stopping_rounds = NULL, maximize = NULL,
|
early_stopping_rounds = NULL, maximize = NULL,
|
||||||
save_period = NULL, save_name = "xgboost.model",
|
save_period = NULL, save_name = "xgboost.model",
|
||||||
xgb_model = NULL, callbacks = list(), ...) {
|
xgb_model = NULL, callbacks = list(), ...) {
|
||||||
|
|
||||||
@ -18,16 +18,16 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
|||||||
early_stopping_rounds = early_stopping_rounds, maximize = maximize,
|
early_stopping_rounds = early_stopping_rounds, maximize = maximize,
|
||||||
save_period = save_period, save_name = save_name,
|
save_period = save_period, save_name = save_name,
|
||||||
xgb_model = xgb_model, callbacks = callbacks, ...)
|
xgb_model = xgb_model, callbacks = callbacks, ...)
|
||||||
return(bst)
|
return (bst)
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Training part from Mushroom Data Set
|
#' Training part from Mushroom Data Set
|
||||||
#'
|
#'
|
||||||
#' This data set is originally from the Mushroom data set,
|
#' This data set is originally from the Mushroom data set,
|
||||||
#' UCI Machine Learning Repository.
|
#' UCI Machine Learning Repository.
|
||||||
#'
|
#'
|
||||||
#' This data set includes the following fields:
|
#' This data set includes the following fields:
|
||||||
#'
|
#'
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{label} the label for each record
|
#' \item \code{label} the label for each record
|
||||||
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
|
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
|
||||||
@ -35,16 +35,16 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
|||||||
#'
|
#'
|
||||||
#' @references
|
#' @references
|
||||||
#' https://archive.ics.uci.edu/ml/datasets/Mushroom
|
#' https://archive.ics.uci.edu/ml/datasets/Mushroom
|
||||||
#'
|
#'
|
||||||
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
|
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
|
||||||
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
|
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
|
||||||
#' School of Information and Computer Science.
|
#' School of Information and Computer Science.
|
||||||
#'
|
#'
|
||||||
#' @docType data
|
#' @docType data
|
||||||
#' @keywords datasets
|
#' @keywords datasets
|
||||||
#' @name agaricus.train
|
#' @name agaricus.train
|
||||||
#' @usage data(agaricus.train)
|
#' @usage data(agaricus.train)
|
||||||
#' @format A list containing a label vector, and a dgCMatrix object with 6513
|
#' @format A list containing a label vector, and a dgCMatrix object with 6513
|
||||||
#' rows and 127 variables
|
#' rows and 127 variables
|
||||||
NULL
|
NULL
|
||||||
|
|
||||||
@ -52,9 +52,9 @@ NULL
|
|||||||
#'
|
#'
|
||||||
#' This data set is originally from the Mushroom data set,
|
#' This data set is originally from the Mushroom data set,
|
||||||
#' UCI Machine Learning Repository.
|
#' UCI Machine Learning Repository.
|
||||||
#'
|
#'
|
||||||
#' This data set includes the following fields:
|
#' This data set includes the following fields:
|
||||||
#'
|
#'
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{label} the label for each record
|
#' \item \code{label} the label for each record
|
||||||
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
|
#' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
|
||||||
@ -62,16 +62,16 @@ NULL
|
|||||||
#'
|
#'
|
||||||
#' @references
|
#' @references
|
||||||
#' https://archive.ics.uci.edu/ml/datasets/Mushroom
|
#' https://archive.ics.uci.edu/ml/datasets/Mushroom
|
||||||
#'
|
#'
|
||||||
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
|
#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
|
||||||
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
|
#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
|
||||||
#' School of Information and Computer Science.
|
#' School of Information and Computer Science.
|
||||||
#'
|
#'
|
||||||
#' @docType data
|
#' @docType data
|
||||||
#' @keywords datasets
|
#' @keywords datasets
|
||||||
#' @name agaricus.test
|
#' @name agaricus.test
|
||||||
#' @usage data(agaricus.test)
|
#' @usage data(agaricus.test)
|
||||||
#' @format A list containing a label vector, and a dgCMatrix object with 1611
|
#' @format A list containing a label vector, and a dgCMatrix object with 1611
|
||||||
#' rows and 126 variables
|
#' rows and 126 variables
|
||||||
NULL
|
NULL
|
||||||
|
|
||||||
@ -107,7 +107,7 @@ NULL
|
|||||||
#' @importFrom graphics par
|
#' @importFrom graphics par
|
||||||
#' @importFrom graphics title
|
#' @importFrom graphics title
|
||||||
#' @importFrom grDevices rgb
|
#' @importFrom grDevices rgb
|
||||||
#'
|
#'
|
||||||
#' @import methods
|
#' @import methods
|
||||||
#' @useDynLib xgboost, .registration = TRUE
|
#' @useDynLib xgboost, .registration = TRUE
|
||||||
NULL
|
NULL
|
||||||
|
|||||||
@ -313,7 +313,7 @@ SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask,
|
|||||||
R_ExternalPtrAddr(dmat),
|
R_ExternalPtrAddr(dmat),
|
||||||
asInteger(option_mask),
|
asInteger(option_mask),
|
||||||
asInteger(ntree_limit),
|
asInteger(ntree_limit),
|
||||||
0,
|
asInteger(training),
|
||||||
&olen, &res));
|
&olen, &res));
|
||||||
ret = PROTECT(allocVector(REALSXP, olen));
|
ret = PROTECT(allocVector(REALSXP, olen));
|
||||||
for (size_t i = 0; i < olen; ++i) {
|
for (size_t i = 0; i < olen; ++i) {
|
||||||
|
|||||||
@ -27,7 +27,7 @@ test_that("train and predict binary classification", {
|
|||||||
|
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
expect_length(pred, 1611)
|
expect_length(pred, 1611)
|
||||||
|
|
||||||
pred1 <- predict(bst, train$data, ntreelimit = 1)
|
pred1 <- predict(bst, train$data, ntreelimit = 1)
|
||||||
expect_length(pred1, 6513)
|
expect_length(pred1, 6513)
|
||||||
err_pred1 <- sum((pred1 > 0.5) != train$label)/length(train$label)
|
err_pred1 <- sum((pred1 > 0.5) != train$label)/length(train$label)
|
||||||
@ -35,6 +35,54 @@ test_that("train and predict binary classification", {
|
|||||||
expect_lt(abs(err_pred1 - err_log), 10e-6)
|
expect_lt(abs(err_pred1 - err_log), 10e-6)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test_that("dart prediction works", {
|
||||||
|
nrounds = 32
|
||||||
|
set.seed(1994)
|
||||||
|
|
||||||
|
d <- cbind(
|
||||||
|
x1 = rnorm(100),
|
||||||
|
x2 = rnorm(100),
|
||||||
|
x3 = rnorm(100))
|
||||||
|
y <- d[,"x1"] + d[,"x2"]^2 +
|
||||||
|
ifelse(d[,"x3"] > .5, d[,"x3"]^2, 2^d[,"x3"]) +
|
||||||
|
rnorm(100)
|
||||||
|
|
||||||
|
set.seed(1994)
|
||||||
|
booster_by_xgboost <- xgboost(data = d, label = y, max_depth = 2, booster = "dart",
|
||||||
|
rate_drop = 0.5, one_drop = TRUE,
|
||||||
|
eta = 1, nthread = 2, nrounds = nrounds, objective = "reg:squarederror")
|
||||||
|
pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0)
|
||||||
|
pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds)
|
||||||
|
expect_true(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE)))
|
||||||
|
|
||||||
|
pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)
|
||||||
|
expect_false(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE)))
|
||||||
|
|
||||||
|
set.seed(1994)
|
||||||
|
dtrain <- xgb.DMatrix(data=d, info = list(label=y))
|
||||||
|
booster_by_train <- xgb.train( params = list(
|
||||||
|
booster = "dart",
|
||||||
|
max_depth = 2,
|
||||||
|
eta = 1,
|
||||||
|
rate_drop = 0.5,
|
||||||
|
one_drop = TRUE,
|
||||||
|
nthread = 1,
|
||||||
|
tree_method= "exact",
|
||||||
|
verbosity = 3,
|
||||||
|
objective = "reg:squarederror"
|
||||||
|
),
|
||||||
|
data = dtrain,
|
||||||
|
nrounds = nrounds
|
||||||
|
)
|
||||||
|
pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, ntreelimit = 0)
|
||||||
|
pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds)
|
||||||
|
pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)
|
||||||
|
|
||||||
|
expect_true(all(matrix(pred_by_train_0, byrow=TRUE) == matrix(pred_by_xgboost_0, byrow=TRUE)))
|
||||||
|
expect_true(all(matrix(pred_by_train_1, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE)))
|
||||||
|
expect_true(all(matrix(pred_by_train_2, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE)))
|
||||||
|
})
|
||||||
|
|
||||||
test_that("train and predict softprob", {
|
test_that("train and predict softprob", {
|
||||||
lb <- as.numeric(iris$Species) - 1
|
lb <- as.numeric(iris$Species) - 1
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
@ -74,7 +122,7 @@ test_that("train and predict softmax", {
|
|||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||||
|
|
||||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||||
expect_length(pred, nrow(iris))
|
expect_length(pred, nrow(iris))
|
||||||
err <- sum(pred != lb)/length(lb)
|
err <- sum(pred != lb)/length(lb)
|
||||||
@ -90,12 +138,12 @@ test_that("train and predict RF", {
|
|||||||
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
|
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
|
||||||
expect_equal(bst$niter, 1)
|
expect_equal(bst$niter, 1)
|
||||||
expect_equal(xgb.ntree(bst), 20)
|
expect_equal(xgb.ntree(bst), 20)
|
||||||
|
|
||||||
pred <- predict(bst, train$data)
|
pred <- predict(bst, train$data)
|
||||||
pred_err <- sum((pred > 0.5) != lb)/length(lb)
|
pred_err <- sum((pred > 0.5) != lb)/length(lb)
|
||||||
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
|
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
|
||||||
#expect_lt(pred_err, 0.03)
|
#expect_lt(pred_err, 0.03)
|
||||||
|
|
||||||
pred <- predict(bst, train$data, ntreelimit = 20)
|
pred <- predict(bst, train$data, ntreelimit = 20)
|
||||||
pred_err_20 <- sum((pred > 0.5) != lb)/length(lb)
|
pred_err_20 <- sum((pred > 0.5) != lb)/length(lb)
|
||||||
expect_equal(pred_err_20, pred_err)
|
expect_equal(pred_err_20, pred_err)
|
||||||
@ -211,7 +259,7 @@ test_that("train and predict with non-strict classes", {
|
|||||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
||||||
pr0 <- predict(bst, train_dense)
|
pr0 <- predict(bst, train_dense)
|
||||||
|
|
||||||
# dense matrix-like input of non-matrix class
|
# dense matrix-like input of non-matrix class
|
||||||
class(train_dense) <- 'shmatrix'
|
class(train_dense) <- 'shmatrix'
|
||||||
expect_true(is.matrix(train_dense))
|
expect_true(is.matrix(train_dense))
|
||||||
@ -221,7 +269,7 @@ test_that("train and predict with non-strict classes", {
|
|||||||
, regexp = NA)
|
, regexp = NA)
|
||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
expect_equal(pr0, pr)
|
expect_equal(pr0, pr)
|
||||||
|
|
||||||
# dense matrix-like input of non-matrix class with some inheritance
|
# dense matrix-like input of non-matrix class with some inheritance
|
||||||
class(train_dense) <- c('pphmatrix','shmatrix')
|
class(train_dense) <- c('pphmatrix','shmatrix')
|
||||||
expect_true(is.matrix(train_dense))
|
expect_true(is.matrix(train_dense))
|
||||||
@ -231,7 +279,7 @@ test_that("train and predict with non-strict classes", {
|
|||||||
, regexp = NA)
|
, regexp = NA)
|
||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
expect_equal(pr0, pr)
|
expect_equal(pr0, pr)
|
||||||
|
|
||||||
# when someone inhertis from xgb.Booster, it should still be possible to use it as xgb.Booster
|
# when someone inhertis from xgb.Booster, it should still be possible to use it as xgb.Booster
|
||||||
class(bst) <- c('super.Booster', 'xgb.Booster')
|
class(bst) <- c('super.Booster', 'xgb.Booster')
|
||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
|
|||||||
@ -157,7 +157,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
|
|||||||
params = c(
|
params = c(
|
||||||
list(
|
list(
|
||||||
booster = booster,
|
booster = booster,
|
||||||
objective = "reg:linear",
|
objective = "reg:squarederror",
|
||||||
eval_metric = "rmse"),
|
eval_metric = "rmse"),
|
||||||
if (booster == "dart")
|
if (booster == "dart")
|
||||||
list(rate_drop = .01, one_drop = T)),
|
list(rate_drop = .01, one_drop = T)),
|
||||||
|
|||||||
@ -435,9 +435,9 @@ class Dart : public GBTree {
|
|||||||
std::fill(out_preds.begin(), out_preds.end(),
|
std::fill(out_preds.begin(), out_preds.end(),
|
||||||
model_.learner_model_param_->base_score);
|
model_.learner_model_param_->base_score);
|
||||||
}
|
}
|
||||||
|
const int nthread = omp_get_max_threads();
|
||||||
PredLoopSpecalize(p_fmat, &out_preds, num_group, 0,
|
InitThreadTemp(nthread);
|
||||||
ntree_limit, training);
|
PredLoopSpecalize(p_fmat, &out_preds, num_group, 0, ntree_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredictInstance(const SparsePage::Inst &inst,
|
void PredictInstance(const SparsePage::Inst &inst,
|
||||||
@ -489,11 +489,8 @@ class Dart : public GBTree {
|
|||||||
std::vector<bst_float>* out_preds,
|
std::vector<bst_float>* out_preds,
|
||||||
int num_group,
|
int num_group,
|
||||||
unsigned tree_begin,
|
unsigned tree_begin,
|
||||||
unsigned tree_end,
|
unsigned tree_end) {
|
||||||
bool training) {
|
|
||||||
const int nthread = omp_get_max_threads();
|
|
||||||
CHECK_EQ(num_group, model_.learner_model_param_->num_output_group);
|
CHECK_EQ(num_group, model_.learner_model_param_->num_output_group);
|
||||||
InitThreadTemp(nthread);
|
|
||||||
std::vector<bst_float>& preds = *out_preds;
|
std::vector<bst_float>& preds = *out_preds;
|
||||||
CHECK_EQ(model_.param.size_leaf_vector, 0)
|
CHECK_EQ(model_.param.size_leaf_vector, 0)
|
||||||
<< "size_leaf_vector is enforced to 0 so far";
|
<< "size_leaf_vector is enforced to 0 so far";
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user