parent
b3d2e7644a
commit
8b1afce316
52
.github/workflows/main.yml
vendored
Normal file
52
.github/workflows/main.yml
vendored
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# This is a basic workflow to help you get started with Actions
|
||||||
|
|
||||||
|
name: XGoost-CI
|
||||||
|
|
||||||
|
# Controls when the action will run. Triggers the workflow on push or pull request
|
||||||
|
# events but only for the master branch
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||||
|
jobs:
|
||||||
|
test-with-R:
|
||||||
|
runs-on: ${{ matrix.config.os }}
|
||||||
|
|
||||||
|
name: Test R on OS ${{ matrix.config.os }}, R (${{ matrix.config.r }})
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
config:
|
||||||
|
- {os: windows-latest, r: 'release'}
|
||||||
|
env:
|
||||||
|
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
|
||||||
|
RSPM: ${{ matrix.config.rspm }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
submodules: 'true'
|
||||||
|
|
||||||
|
- uses: r-lib/actions/setup-r@master
|
||||||
|
with:
|
||||||
|
r-version: ${{ matrix.config.r }}
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
shell: Rscript {0}
|
||||||
|
run: |
|
||||||
|
install.packages(c('XML','igraph'))
|
||||||
|
install.packages(c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','knitr','rmarkdown'))
|
||||||
|
|
||||||
|
- name: Config R
|
||||||
|
run: |
|
||||||
|
mkdir build && cd build
|
||||||
|
cmake .. -DCMAKE_CONFIGURATION_TYPES="Release" -DR_LIB=ON
|
||||||
|
|
||||||
|
- name: Build R
|
||||||
|
run: |
|
||||||
|
cmake --build build --target install --config Release
|
||||||
|
|
||||||
|
- name: Test R
|
||||||
|
run: |
|
||||||
|
cd R-package
|
||||||
|
R.exe -q -e "library(testthat); setwd('tests'); source('testthat.R')"
|
||||||
@ -351,13 +351,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
|||||||
|
|
||||||
finalizer <- function(env) {
|
finalizer <- function(env) {
|
||||||
if (!is.null(env$bst)) {
|
if (!is.null(env$bst)) {
|
||||||
attr_best_score = as.numeric(xgb.attr(env$bst$handle, 'best_score'))
|
attr_best_score <- as.numeric(xgb.attr(env$bst$handle, 'best_score'))
|
||||||
if (best_score != attr_best_score)
|
if (best_score != attr_best_score)
|
||||||
stop("Inconsistent 'best_score' values between the closure state: ", best_score,
|
stop("Inconsistent 'best_score' values between the closure state: ", best_score,
|
||||||
" and the xgb.attr: ", attr_best_score)
|
" and the xgb.attr: ", attr_best_score)
|
||||||
env$bst$best_iteration = best_iteration
|
env$bst$best_iteration <- best_iteration
|
||||||
env$bst$best_ntreelimit = best_ntreelimit
|
env$bst$best_ntreelimit <- best_ntreelimit
|
||||||
env$bst$best_score = best_score
|
env$bst$best_score <- best_score
|
||||||
} else {
|
} else {
|
||||||
env$basket$best_iteration <- best_iteration
|
env$basket$best_iteration <- best_iteration
|
||||||
env$basket$best_ntreelimit <- best_ntreelimit
|
env$basket$best_ntreelimit <- best_ntreelimit
|
||||||
@ -372,7 +372,7 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
|||||||
return(finalizer(env))
|
return(finalizer(env))
|
||||||
|
|
||||||
i <- env$iteration
|
i <- env$iteration
|
||||||
score = env$bst_evaluation[metric_idx]
|
score <- env$bst_evaluation[metric_idx]
|
||||||
|
|
||||||
if ((maximize && score > best_score) ||
|
if ((maximize && score > best_score) ||
|
||||||
(!maximize && score < best_score)) {
|
(!maximize && score < best_score)) {
|
||||||
@ -613,9 +613,7 @@ cb.gblinear.history <- function(sparse=FALSE) {
|
|||||||
|
|
||||||
init <- function(env) {
|
init <- function(env) {
|
||||||
if (!is.null(env$bst)) { # xgb.train:
|
if (!is.null(env$bst)) { # xgb.train:
|
||||||
coef_path <- list()
|
|
||||||
} else if (!is.null(env$bst_folds)) { # xgb.cv:
|
} else if (!is.null(env$bst_folds)) { # xgb.cv:
|
||||||
coef_path <- rep(list(), length(env$bst_folds))
|
|
||||||
} else stop("Parent frame has neither 'bst' nor 'bst_folds'")
|
} else stop("Parent frame has neither 'bst' nor 'bst_folds'")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -69,9 +69,9 @@ check.booster.params <- function(params, ...) {
|
|||||||
|
|
||||||
if (!is.null(params[['monotone_constraints']]) &&
|
if (!is.null(params[['monotone_constraints']]) &&
|
||||||
typeof(params[['monotone_constraints']]) != "character") {
|
typeof(params[['monotone_constraints']]) != "character") {
|
||||||
vec2str = paste(params[['monotone_constraints']], collapse = ',')
|
vec2str <- paste(params[['monotone_constraints']], collapse = ',')
|
||||||
vec2str = paste0('(', vec2str, ')')
|
vec2str <- paste0('(', vec2str, ')')
|
||||||
params[['monotone_constraints']] = vec2str
|
params[['monotone_constraints']] <- vec2str
|
||||||
}
|
}
|
||||||
|
|
||||||
# interaction constraints parser (convert from list of column indices to string)
|
# interaction constraints parser (convert from list of column indices to string)
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
# Construct an internal xgboost Booster and return a handle to it.
|
# Construct an internal xgboost Booster and return a handle to it.
|
||||||
# internal utility function
|
# internal utility function
|
||||||
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
xgb.Booster.handle <- function(params = list(), cachelist = list(),
|
||||||
|
modelfile = NULL) {
|
||||||
if (typeof(cachelist) != "list" ||
|
if (typeof(cachelist) != "list" ||
|
||||||
!all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {
|
!all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {
|
||||||
stop("cachelist must be a list of xgb.DMatrix objects")
|
stop("cachelist must be a list of xgb.DMatrix objects")
|
||||||
@ -62,8 +63,8 @@ is.null.handle <- function(handle) {
|
|||||||
return(FALSE)
|
return(FALSE)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster
|
# Return a verified to be valid handle out of either xgb.Booster.handle or
|
||||||
# internal utility function
|
# xgb.Booster internal utility function
|
||||||
xgb.get.handle <- function(object) {
|
xgb.get.handle <- function(object) {
|
||||||
if (inherits(object, "xgb.Booster")) {
|
if (inherits(object, "xgb.Booster")) {
|
||||||
handle <- object$handle
|
handle <- object$handle
|
||||||
|
|||||||
@ -83,5 +83,5 @@ xgb.create.features <- function(model, data, ...){
|
|||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
pred_with_leaf <- predict(model, data, predleaf = TRUE)
|
pred_with_leaf <- predict(model, data, predleaf = TRUE)
|
||||||
cols <- lapply(as.data.frame(pred_with_leaf), factor)
|
cols <- lapply(as.data.frame(pred_with_leaf), factor)
|
||||||
cbind(data, sparse.model.matrix( ~ . -1, cols))
|
cbind(data, sparse.model.matrix(~ . -1, cols)) # nolint
|
||||||
}
|
}
|
||||||
|
|||||||
@ -143,9 +143,9 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
} else if (inherits(data, 'xgb.DMatrix')) {
|
} else if (inherits(data, 'xgb.DMatrix')) {
|
||||||
if (!is.null(label))
|
if (!is.null(label))
|
||||||
warning("xgb.cv: label will be ignored, since data is of type xgb.DMatrix")
|
warning("xgb.cv: label will be ignored, since data is of type xgb.DMatrix")
|
||||||
cv_label = getinfo(data, 'label')
|
cv_label <- getinfo(data, 'label')
|
||||||
} else {
|
} else {
|
||||||
cv_label = label
|
cv_label <- label
|
||||||
}
|
}
|
||||||
|
|
||||||
# CV folds
|
# CV folds
|
||||||
@ -208,8 +208,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
basket <- list()
|
basket <- list()
|
||||||
|
|
||||||
# extract parameters that can affect the relationship b/w #trees and #iterations
|
# extract parameters that can affect the relationship b/w #trees and #iterations
|
||||||
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1)
|
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
|
||||||
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1)
|
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
|
||||||
|
|
||||||
# those are fixed for CV (no training continuation)
|
# those are fixed for CV (no training continuation)
|
||||||
begin_iteration <- 1
|
begin_iteration <- 1
|
||||||
@ -226,7 +226,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
})
|
})
|
||||||
msg <- simplify2array(msg)
|
msg <- simplify2array(msg)
|
||||||
bst_evaluation <- rowMeans(msg)
|
bst_evaluation <- rowMeans(msg)
|
||||||
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2)
|
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2) # nolint
|
||||||
|
|
||||||
for (f in cb$post_iter) f()
|
for (f in cb$post_iter) f()
|
||||||
|
|
||||||
|
|||||||
@ -105,7 +105,7 @@ xgb.ggplot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med
|
|||||||
# internal utility function
|
# internal utility function
|
||||||
multiplot <- function(..., cols = 1) {
|
multiplot <- function(..., cols = 1) {
|
||||||
plots <- list(...)
|
plots <- list(...)
|
||||||
num_plots = length(plots)
|
num_plots <- length(plots)
|
||||||
|
|
||||||
layout <- matrix(seq(1, cols * ceiling(num_plots / cols)),
|
layout <- matrix(seq(1, cols * ceiling(num_plots / cols)),
|
||||||
ncol = cols, nrow = ceiling(num_plots / cols))
|
ncol = cols, nrow = ceiling(num_plots / cols))
|
||||||
|
|||||||
@ -117,8 +117,7 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
|||||||
Weight = weights,
|
Weight = weights,
|
||||||
Class = seq_len(num_class) - 1)[order(Class, -abs(Weight))]
|
Class = seq_len(num_class) - 1)[order(Class, -abs(Weight))]
|
||||||
}
|
}
|
||||||
} else {
|
} else { # tree model
|
||||||
# tree model
|
|
||||||
result <- xgb.model.dt.tree(feature_names = feature_names,
|
result <- xgb.model.dt.tree(feature_names = feature_names,
|
||||||
text = model_text_dump,
|
text = model_text_dump,
|
||||||
trees = trees)[
|
trees = trees)[
|
||||||
|
|||||||
@ -331,9 +331,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
|||||||
if (is_update && nrounds > niter_init)
|
if (is_update && nrounds > niter_init)
|
||||||
stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
|
stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
|
||||||
|
|
||||||
# TODO: distributed code
|
|
||||||
rank <- 0
|
|
||||||
|
|
||||||
niter_skip <- ifelse(is_update, 0, niter_init)
|
niter_skip <- ifelse(is_update, 0, niter_init)
|
||||||
begin_iteration <- niter_skip + 1
|
begin_iteration <- niter_skip + 1
|
||||||
end_iteration <- niter_skip + nrounds
|
end_iteration <- niter_skip + nrounds
|
||||||
@ -345,7 +342,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
|||||||
|
|
||||||
xgb.iter.update(bst$handle, dtrain, iteration - 1, obj)
|
xgb.iter.update(bst$handle, dtrain, iteration - 1, obj)
|
||||||
|
|
||||||
bst_evaluation <- numeric(0)
|
|
||||||
if (length(watchlist) > 0)
|
if (length(watchlist) > 0)
|
||||||
bst_evaluation <- xgb.iter.eval(bst$handle, watchlist, iteration - 1, feval)
|
bst_evaluation <- xgb.iter.eval(bst$handle, watchlist, iteration - 1, feval)
|
||||||
|
|
||||||
@ -360,7 +356,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
|||||||
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
|
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
|
||||||
|
|
||||||
# store the total number of boosting iterations
|
# store the total number of boosting iterations
|
||||||
bst$niter = end_iteration
|
bst$niter <- end_iteration
|
||||||
|
|
||||||
# store the evaluation results
|
# store the evaluation results
|
||||||
if (length(evaluation_log) > 0 &&
|
if (length(evaluation_log) > 0 &&
|
||||||
|
|||||||
@ -9,12 +9,12 @@ test <- agaricus.test
|
|||||||
set.seed(1994)
|
set.seed(1994)
|
||||||
|
|
||||||
# disable some tests for Win32
|
# disable some tests for Win32
|
||||||
windows_flag = .Platform$OS.type == "windows" &&
|
windows_flag <- .Platform$OS.type == "windows" &&
|
||||||
.Machine$sizeof.pointer != 8
|
.Machine$sizeof.pointer != 8
|
||||||
solaris_flag = (Sys.info()['sysname'] == "SunOS")
|
solaris_flag <- (Sys.info()['sysname'] == "SunOS")
|
||||||
|
|
||||||
test_that("train and predict binary classification", {
|
test_that("train and predict binary classification", {
|
||||||
nrounds = 2
|
nrounds <- 2
|
||||||
expect_output(
|
expect_output(
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||||
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic")
|
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic")
|
||||||
@ -37,7 +37,7 @@ test_that("train and predict binary classification", {
|
|||||||
|
|
||||||
test_that("parameter validation works", {
|
test_that("parameter validation works", {
|
||||||
p <- list(foo = "bar")
|
p <- list(foo = "bar")
|
||||||
nrounds = 1
|
nrounds <- 1
|
||||||
set.seed(1994)
|
set.seed(1994)
|
||||||
|
|
||||||
d <- cbind(
|
d <- cbind(
|
||||||
@ -70,7 +70,7 @@ test_that("parameter validation works", {
|
|||||||
|
|
||||||
|
|
||||||
test_that("dart prediction works", {
|
test_that("dart prediction works", {
|
||||||
nrounds = 32
|
nrounds <- 32
|
||||||
set.seed(1994)
|
set.seed(1994)
|
||||||
|
|
||||||
d <- cbind(
|
d <- cbind(
|
||||||
@ -223,7 +223,7 @@ test_that("use of multiple eval metrics works", {
|
|||||||
|
|
||||||
test_that("training continuation works", {
|
test_that("training continuation works", {
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
||||||
watchlist = list(train=dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
|
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
|
||||||
|
|
||||||
# for the reference, use 4 iterations at once:
|
# for the reference, use 4 iterations at once:
|
||||||
@ -255,7 +255,7 @@ test_that("training continuation works", {
|
|||||||
test_that("model serialization works", {
|
test_that("model serialization works", {
|
||||||
out_path <- "model_serialization"
|
out_path <- "model_serialization"
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
||||||
watchlist = list(train=dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
param <- list(objective = "binary:logistic")
|
param <- list(objective = "binary:logistic")
|
||||||
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
|
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
|
||||||
raw <- xgb.serialize(booster)
|
raw <- xgb.serialize(booster)
|
||||||
@ -338,7 +338,7 @@ test_that("max_delta_step works", {
|
|||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||||
watchlist <- list(train = dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
|
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
|
||||||
nrounds = 5
|
nrounds <- 5
|
||||||
# model with no restriction on max_delta_step
|
# model with no restriction on max_delta_step
|
||||||
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
|
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
|
||||||
# model with restricted max_delta_step
|
# model with restricted max_delta_step
|
||||||
|
|||||||
@ -21,7 +21,7 @@ ltrain <- add.noise(train$label, 0.2)
|
|||||||
ltest <- add.noise(test$label, 0.2)
|
ltest <- add.noise(test$label, 0.2)
|
||||||
dtrain <- xgb.DMatrix(train$data, label = ltrain)
|
dtrain <- xgb.DMatrix(train$data, label = ltrain)
|
||||||
dtest <- xgb.DMatrix(test$data, label = ltest)
|
dtest <- xgb.DMatrix(test$data, label = ltest)
|
||||||
watchlist = list(train=dtrain, test=dtest)
|
watchlist <- list(train = dtrain, test = dtest)
|
||||||
|
|
||||||
|
|
||||||
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
|
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
|
||||||
@ -267,7 +267,7 @@ test_that("early stopping xgb.cv works", {
|
|||||||
|
|
||||||
test_that("prediction in xgb.cv works", {
|
test_that("prediction in xgb.cv works", {
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
nrounds = 4
|
nrounds <- 4
|
||||||
cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
|
cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
|
||||||
expect_false(is.null(cv$evaluation_log))
|
expect_false(is.null(cv$evaluation_log))
|
||||||
expect_false(is.null(cv$pred))
|
expect_false(is.null(cv$pred))
|
||||||
|
|||||||
@ -54,14 +54,14 @@ test_that("custom objective using DMatrix attr works", {
|
|||||||
hess <- preds * (1 - preds)
|
hess <- preds * (1 - preds)
|
||||||
return(list(grad = grad, hess = hess))
|
return(list(grad = grad, hess = hess))
|
||||||
}
|
}
|
||||||
param$objective = logregobjattr
|
param$objective <- logregobjattr
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("custom objective with multi-class works", {
|
test_that("custom objective with multi-class works", {
|
||||||
data = as.matrix(iris[, -5])
|
data <- as.matrix(iris[, -5])
|
||||||
label = as.numeric(iris$Species) - 1
|
label <- as.numeric(iris$Species) - 1
|
||||||
dtrain <- xgb.DMatrix(data = data, label = label)
|
dtrain <- xgb.DMatrix(data = data, label = label)
|
||||||
nclasses <- 3
|
nclasses <- 3
|
||||||
|
|
||||||
@ -72,6 +72,6 @@ test_that("custom objective with multi-class works", {
|
|||||||
hess <- rnorm(dim(as.matrix(preds))[1])
|
hess <- rnorm(dim(as.matrix(preds))[1])
|
||||||
return (list(grad = grad, hess = hess))
|
return (list(grad = grad, hess = hess))
|
||||||
}
|
}
|
||||||
param$objective = fake_softprob
|
param$objective <- fake_softprob
|
||||||
bst <- xgb.train(param, dtrain, 1, num_class = nclasses)
|
bst <- xgb.train(param, dtrain, 1, num_class = nclasses)
|
||||||
})
|
})
|
||||||
|
|||||||
@ -16,7 +16,7 @@ test_that("gblinear works", {
|
|||||||
ERR_UL <- 0.005 # upper limit for the test set error
|
ERR_UL <- 0.005 # upper limit for the test set error
|
||||||
VERB <- 0 # chatterbox switch
|
VERB <- 0 # chatterbox switch
|
||||||
|
|
||||||
param$updater = 'shotgun'
|
param$updater <- 'shotgun'
|
||||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
|
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
|
||||||
ypred <- predict(bst, dtest)
|
ypred <- predict(bst, dtest)
|
||||||
expect_equal(length(getinfo(dtest, 'label')), 1611)
|
expect_equal(length(getinfo(dtest, 'label')), 1611)
|
||||||
@ -29,7 +29,7 @@ test_that("gblinear works", {
|
|||||||
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
|
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
|
||||||
expect_is(h, "matrix")
|
expect_is(h, "matrix")
|
||||||
|
|
||||||
param$updater = 'coord_descent'
|
param$updater <- 'coord_descent'
|
||||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
|
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
|
||||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||||
|
|
||||||
|
|||||||
@ -5,10 +5,10 @@ require(data.table)
|
|||||||
require(Matrix)
|
require(Matrix)
|
||||||
require(vcd, quietly = TRUE)
|
require(vcd, quietly = TRUE)
|
||||||
|
|
||||||
float_tolerance = 5e-6
|
float_tolerance <- 5e-6
|
||||||
|
|
||||||
# disable some tests for 32-bit environment
|
# disable some tests for 32-bit environment
|
||||||
flag_32bit = .Machine$sizeof.pointer != 8
|
flag_32bit <- .Machine$sizeof.pointer != 8
|
||||||
|
|
||||||
set.seed(1982)
|
set.seed(1982)
|
||||||
data(Arthritis)
|
data(Arthritis)
|
||||||
@ -16,7 +16,7 @@ df <- data.table(Arthritis, keep.rownames = FALSE)
|
|||||||
df[, AgeDiscret := as.factor(round(Age / 10, 0))]
|
df[, AgeDiscret := as.factor(round(Age / 10, 0))]
|
||||||
df[, AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
|
df[, AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
|
||||||
df[, ID := NULL]
|
df[, ID := NULL]
|
||||||
sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
|
sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df) # nolint
|
||||||
label <- df[, ifelse(Improved == "Marked", 1, 0)]
|
label <- df[, ifelse(Improved == "Marked", 1, 0)]
|
||||||
|
|
||||||
# binary
|
# binary
|
||||||
@ -46,7 +46,7 @@ mbst.GLM <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,
|
|||||||
test_that("xgb.dump works", {
|
test_that("xgb.dump works", {
|
||||||
if (!flag_32bit)
|
if (!flag_32bit)
|
||||||
expect_length(xgb.dump(bst.Tree), 200)
|
expect_length(xgb.dump(bst.Tree), 200)
|
||||||
dump_file = file.path(tempdir(), 'xgb.model.dump')
|
dump_file <- file.path(tempdir(), 'xgb.model.dump')
|
||||||
expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE))
|
expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE))
|
||||||
expect_true(file.exists(dump_file))
|
expect_true(file.exists(dump_file))
|
||||||
expect_gt(file.size(dump_file), 8000)
|
expect_gt(file.size(dump_file), 8000)
|
||||||
@ -170,7 +170,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
|
|||||||
pred <- pr()
|
pred <- pr()
|
||||||
shap <- pr(predcontrib = TRUE)
|
shap <- pr(predcontrib = TRUE)
|
||||||
shapi <- pr(predinteraction = TRUE)
|
shapi <- pr(predinteraction = TRUE)
|
||||||
tol = 1e-5
|
tol <- 1e-5
|
||||||
|
|
||||||
expect_equal(rowSums(shap), pred, tol = tol)
|
expect_equal(rowSums(shap), pred, tol = tol)
|
||||||
expect_equal(apply(shapi, 1, sum), pred, tol = tol)
|
expect_equal(apply(shapi, 1, sum), pred, tol = tol)
|
||||||
|
|||||||
@ -26,7 +26,7 @@ test_that("predict feature interactions works", {
|
|||||||
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
|
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
|
||||||
b <- xgb.train(param, dm, 100)
|
b <- xgb.train(param, dm, 100)
|
||||||
|
|
||||||
pred = predict(b, dm, outputmargin=TRUE)
|
pred <- predict(b, dm, outputmargin = TRUE)
|
||||||
|
|
||||||
# SHAP contributions:
|
# SHAP contributions:
|
||||||
cont <- predict(b, dm, predcontrib = TRUE)
|
cont <- predict(b, dm, predcontrib = TRUE)
|
||||||
@ -73,9 +73,9 @@ test_that("predict feature interactions works", {
|
|||||||
gt_intr[, 2, 3] <- 1. * X[, 2] * X[, 3] # attribute a HALF of the interaction term to each symmetric element
|
gt_intr[, 2, 3] <- 1. * X[, 2] * X[, 3] # attribute a HALF of the interaction term to each symmetric element
|
||||||
gt_intr[, 3, 2] <- gt_intr[, 2, 3]
|
gt_intr[, 3, 2] <- gt_intr[, 2, 3]
|
||||||
# merge-in the diagonal based on 'ground truth' feature contributions
|
# merge-in the diagonal based on 'ground truth' feature contributions
|
||||||
intr_diag = gt_cont - apply(gt_intr, c(1,2), sum)
|
intr_diag <- gt_cont - apply(gt_intr, c(1, 2), sum)
|
||||||
for (j in seq_len(P)) {
|
for (j in seq_len(P)) {
|
||||||
gt_intr[,j,j] = intr_diag[,j]
|
gt_intr[, j, j] <- intr_diag[, j]
|
||||||
}
|
}
|
||||||
# These should be relatively close:
|
# These should be relatively close:
|
||||||
expect_lt(max(abs(intr - gt_intr)), 0.1)
|
expect_lt(max(abs(intr - gt_intr)), 0.1)
|
||||||
@ -119,7 +119,7 @@ test_that("multiclass feature interactions work", {
|
|||||||
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
||||||
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
|
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
|
||||||
b <- xgb.train(param, dm, 40)
|
b <- xgb.train(param, dm, 40)
|
||||||
pred = predict(b, dm, outputmargin=TRUE) %>% array(c(3, 150)) %>% t
|
pred <- predict(b, dm, outputmargin = TRUE) %>% array(c(3, 150)) %>% t
|
||||||
|
|
||||||
# SHAP contributions:
|
# SHAP contributions:
|
||||||
cont <- predict(b, dm, predcontrib = TRUE)
|
cont <- predict(b, dm, predcontrib = TRUE)
|
||||||
|
|||||||
@ -3,22 +3,21 @@ require(xgboost)
|
|||||||
context("monotone constraints")
|
context("monotone constraints")
|
||||||
|
|
||||||
set.seed(1024)
|
set.seed(1024)
|
||||||
x = rnorm(1000, 10)
|
x <- rnorm(1000, 10)
|
||||||
y = -1*x + rnorm(1000, 0.001) + 3*sin(x)
|
y <- -1 * x + rnorm(1000, 0.001) + 3 * sin(x)
|
||||||
train = matrix(x, ncol = 1)
|
train <- matrix(x, ncol = 1)
|
||||||
|
|
||||||
|
|
||||||
test_that("monotone constraints for regression", {
|
test_that("monotone constraints for regression", {
|
||||||
bst = xgboost(data = train, label = y, max_depth = 2,
|
bst <- xgboost(data = train, label = y, max_depth = 2,
|
||||||
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
|
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
|
||||||
monotone_constraints = -1)
|
monotone_constraints = -1)
|
||||||
|
|
||||||
pred = predict(bst, train)
|
pred <- predict(bst, train)
|
||||||
|
|
||||||
ind = order(train[,1])
|
ind <- order(train[, 1])
|
||||||
pred.ord = pred[ind]
|
pred.ord <- pred[ind]
|
||||||
expect_true({
|
expect_true({
|
||||||
!any(diff(pred.ord) > 0)
|
!any(diff(pred.ord) > 0)
|
||||||
}, "Monotone Contraint Satisfied")
|
}, "Monotone Contraint Satisfied")
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|||||||
@ -9,10 +9,10 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
|||||||
|
|
||||||
# Disable flaky tests for 32-bit Windows.
|
# Disable flaky tests for 32-bit Windows.
|
||||||
# See https://github.com/dmlc/xgboost/issues/3720
|
# See https://github.com/dmlc/xgboost/issues/3720
|
||||||
win32_flag = .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
|
win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
|
||||||
|
|
||||||
test_that("updating the model works", {
|
test_that("updating the model works", {
|
||||||
watchlist = list(train = dtrain, test = dtest)
|
watchlist <- list(train = dtrain, test = dtest)
|
||||||
|
|
||||||
# no-subsampling
|
# no-subsampling
|
||||||
p1 <- list(objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2)
|
p1 <- list(objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user