parent
b3d2e7644a
commit
8b1afce316
52
.github/workflows/main.yml
vendored
Normal file
52
.github/workflows/main.yml
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: XGoost-CI
|
||||
|
||||
# Controls when the action will run. Triggers the workflow on push or pull request
|
||||
# events but only for the master branch
|
||||
on: [push, pull_request]
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
test-with-R:
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
|
||||
name: Test R on OS ${{ matrix.config.os }}, R (${{ matrix.config.r }})
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
config:
|
||||
- {os: windows-latest, r: 'release'}
|
||||
env:
|
||||
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
|
||||
RSPM: ${{ matrix.config.rspm }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
- uses: r-lib/actions/setup-r@master
|
||||
with:
|
||||
r-version: ${{ matrix.config.r }}
|
||||
|
||||
- name: Install dependencies
|
||||
shell: Rscript {0}
|
||||
run: |
|
||||
install.packages(c('XML','igraph'))
|
||||
install.packages(c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','knitr','rmarkdown'))
|
||||
|
||||
- name: Config R
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
cmake .. -DCMAKE_CONFIGURATION_TYPES="Release" -DR_LIB=ON
|
||||
|
||||
- name: Build R
|
||||
run: |
|
||||
cmake --build build --target install --config Release
|
||||
|
||||
- name: Test R
|
||||
run: |
|
||||
cd R-package
|
||||
R.exe -q -e "library(testthat); setwd('tests'); source('testthat.R')"
|
||||
@ -351,13 +351,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
|
||||
finalizer <- function(env) {
|
||||
if (!is.null(env$bst)) {
|
||||
attr_best_score = as.numeric(xgb.attr(env$bst$handle, 'best_score'))
|
||||
attr_best_score <- as.numeric(xgb.attr(env$bst$handle, 'best_score'))
|
||||
if (best_score != attr_best_score)
|
||||
stop("Inconsistent 'best_score' values between the closure state: ", best_score,
|
||||
" and the xgb.attr: ", attr_best_score)
|
||||
env$bst$best_iteration = best_iteration
|
||||
env$bst$best_ntreelimit = best_ntreelimit
|
||||
env$bst$best_score = best_score
|
||||
env$bst$best_iteration <- best_iteration
|
||||
env$bst$best_ntreelimit <- best_ntreelimit
|
||||
env$bst$best_score <- best_score
|
||||
} else {
|
||||
env$basket$best_iteration <- best_iteration
|
||||
env$basket$best_ntreelimit <- best_ntreelimit
|
||||
@ -372,7 +372,7 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
return(finalizer(env))
|
||||
|
||||
i <- env$iteration
|
||||
score = env$bst_evaluation[metric_idx]
|
||||
score <- env$bst_evaluation[metric_idx]
|
||||
|
||||
if ((maximize && score > best_score) ||
|
||||
(!maximize && score < best_score)) {
|
||||
@ -613,9 +613,7 @@ cb.gblinear.history <- function(sparse=FALSE) {
|
||||
|
||||
init <- function(env) {
|
||||
if (!is.null(env$bst)) { # xgb.train:
|
||||
coef_path <- list()
|
||||
} else if (!is.null(env$bst_folds)) { # xgb.cv:
|
||||
coef_path <- rep(list(), length(env$bst_folds))
|
||||
} else stop("Parent frame has neither 'bst' nor 'bst_folds'")
|
||||
}
|
||||
|
||||
|
||||
@ -69,9 +69,9 @@ check.booster.params <- function(params, ...) {
|
||||
|
||||
if (!is.null(params[['monotone_constraints']]) &&
|
||||
typeof(params[['monotone_constraints']]) != "character") {
|
||||
vec2str = paste(params[['monotone_constraints']], collapse = ',')
|
||||
vec2str = paste0('(', vec2str, ')')
|
||||
params[['monotone_constraints']] = vec2str
|
||||
vec2str <- paste(params[['monotone_constraints']], collapse = ',')
|
||||
vec2str <- paste0('(', vec2str, ')')
|
||||
params[['monotone_constraints']] <- vec2str
|
||||
}
|
||||
|
||||
# interaction constraints parser (convert from list of column indices to string)
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
# Construct an internal xgboost Booster and return a handle to it.
|
||||
# internal utility function
|
||||
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||
xgb.Booster.handle <- function(params = list(), cachelist = list(),
|
||||
modelfile = NULL) {
|
||||
if (typeof(cachelist) != "list" ||
|
||||
!all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {
|
||||
stop("cachelist must be a list of xgb.DMatrix objects")
|
||||
@ -62,8 +63,8 @@ is.null.handle <- function(handle) {
|
||||
return(FALSE)
|
||||
}
|
||||
|
||||
# Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster
|
||||
# internal utility function
|
||||
# Return a verified to be valid handle out of either xgb.Booster.handle or
|
||||
# xgb.Booster internal utility function
|
||||
xgb.get.handle <- function(object) {
|
||||
if (inherits(object, "xgb.Booster")) {
|
||||
handle <- object$handle
|
||||
|
||||
@ -83,5 +83,5 @@ xgb.create.features <- function(model, data, ...){
|
||||
check.deprecation(...)
|
||||
pred_with_leaf <- predict(model, data, predleaf = TRUE)
|
||||
cols <- lapply(as.data.frame(pred_with_leaf), factor)
|
||||
cbind(data, sparse.model.matrix( ~ . -1, cols))
|
||||
cbind(data, sparse.model.matrix(~ . -1, cols)) # nolint
|
||||
}
|
||||
|
||||
@ -143,9 +143,9 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
} else if (inherits(data, 'xgb.DMatrix')) {
|
||||
if (!is.null(label))
|
||||
warning("xgb.cv: label will be ignored, since data is of type xgb.DMatrix")
|
||||
cv_label = getinfo(data, 'label')
|
||||
cv_label <- getinfo(data, 'label')
|
||||
} else {
|
||||
cv_label = label
|
||||
cv_label <- label
|
||||
}
|
||||
|
||||
# CV folds
|
||||
@ -208,8 +208,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
basket <- list()
|
||||
|
||||
# extract parameters that can affect the relationship b/w #trees and #iterations
|
||||
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1)
|
||||
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1)
|
||||
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
|
||||
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
|
||||
|
||||
# those are fixed for CV (no training continuation)
|
||||
begin_iteration <- 1
|
||||
@ -226,7 +226,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
})
|
||||
msg <- simplify2array(msg)
|
||||
bst_evaluation <- rowMeans(msg)
|
||||
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2)
|
||||
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2) # nolint
|
||||
|
||||
for (f in cb$post_iter) f()
|
||||
|
||||
|
||||
@ -105,7 +105,7 @@ xgb.ggplot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med
|
||||
# internal utility function
|
||||
multiplot <- function(..., cols = 1) {
|
||||
plots <- list(...)
|
||||
num_plots = length(plots)
|
||||
num_plots <- length(plots)
|
||||
|
||||
layout <- matrix(seq(1, cols * ceiling(num_plots / cols)),
|
||||
ncol = cols, nrow = ceiling(num_plots / cols))
|
||||
|
||||
@ -117,8 +117,7 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
||||
Weight = weights,
|
||||
Class = seq_len(num_class) - 1)[order(Class, -abs(Weight))]
|
||||
}
|
||||
} else {
|
||||
# tree model
|
||||
} else { # tree model
|
||||
result <- xgb.model.dt.tree(feature_names = feature_names,
|
||||
text = model_text_dump,
|
||||
trees = trees)[
|
||||
|
||||
@ -331,9 +331,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
if (is_update && nrounds > niter_init)
|
||||
stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
|
||||
|
||||
# TODO: distributed code
|
||||
rank <- 0
|
||||
|
||||
niter_skip <- ifelse(is_update, 0, niter_init)
|
||||
begin_iteration <- niter_skip + 1
|
||||
end_iteration <- niter_skip + nrounds
|
||||
@ -345,7 +342,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
|
||||
xgb.iter.update(bst$handle, dtrain, iteration - 1, obj)
|
||||
|
||||
bst_evaluation <- numeric(0)
|
||||
if (length(watchlist) > 0)
|
||||
bst_evaluation <- xgb.iter.eval(bst$handle, watchlist, iteration - 1, feval)
|
||||
|
||||
@ -360,7 +356,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
|
||||
|
||||
# store the total number of boosting iterations
|
||||
bst$niter = end_iteration
|
||||
bst$niter <- end_iteration
|
||||
|
||||
# store the evaluation results
|
||||
if (length(evaluation_log) > 0 &&
|
||||
|
||||
@ -9,12 +9,12 @@ test <- agaricus.test
|
||||
set.seed(1994)
|
||||
|
||||
# disable some tests for Win32
|
||||
windows_flag = .Platform$OS.type == "windows" &&
|
||||
windows_flag <- .Platform$OS.type == "windows" &&
|
||||
.Machine$sizeof.pointer != 8
|
||||
solaris_flag = (Sys.info()['sysname'] == "SunOS")
|
||||
solaris_flag <- (Sys.info()['sysname'] == "SunOS")
|
||||
|
||||
test_that("train and predict binary classification", {
|
||||
nrounds = 2
|
||||
nrounds <- 2
|
||||
expect_output(
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic")
|
||||
@ -37,7 +37,7 @@ test_that("train and predict binary classification", {
|
||||
|
||||
test_that("parameter validation works", {
|
||||
p <- list(foo = "bar")
|
||||
nrounds = 1
|
||||
nrounds <- 1
|
||||
set.seed(1994)
|
||||
|
||||
d <- cbind(
|
||||
@ -70,7 +70,7 @@ test_that("parameter validation works", {
|
||||
|
||||
|
||||
test_that("dart prediction works", {
|
||||
nrounds = 32
|
||||
nrounds <- 32
|
||||
set.seed(1994)
|
||||
|
||||
d <- cbind(
|
||||
@ -223,7 +223,7 @@ test_that("use of multiple eval metrics works", {
|
||||
|
||||
test_that("training continuation works", {
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
||||
watchlist = list(train=dtrain)
|
||||
watchlist <- list(train = dtrain)
|
||||
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
|
||||
|
||||
# for the reference, use 4 iterations at once:
|
||||
@ -255,7 +255,7 @@ test_that("training continuation works", {
|
||||
test_that("model serialization works", {
|
||||
out_path <- "model_serialization"
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
||||
watchlist = list(train=dtrain)
|
||||
watchlist <- list(train = dtrain)
|
||||
param <- list(objective = "binary:logistic")
|
||||
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
|
||||
raw <- xgb.serialize(booster)
|
||||
@ -338,7 +338,7 @@ test_that("max_delta_step works", {
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
watchlist <- list(train = dtrain)
|
||||
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
|
||||
nrounds = 5
|
||||
nrounds <- 5
|
||||
# model with no restriction on max_delta_step
|
||||
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
|
||||
# model with restricted max_delta_step
|
||||
|
||||
@ -21,7 +21,7 @@ ltrain <- add.noise(train$label, 0.2)
|
||||
ltest <- add.noise(test$label, 0.2)
|
||||
dtrain <- xgb.DMatrix(train$data, label = ltrain)
|
||||
dtest <- xgb.DMatrix(test$data, label = ltest)
|
||||
watchlist = list(train=dtrain, test=dtest)
|
||||
watchlist <- list(train = dtrain, test = dtest)
|
||||
|
||||
|
||||
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
|
||||
@ -267,7 +267,7 @@ test_that("early stopping xgb.cv works", {
|
||||
|
||||
test_that("prediction in xgb.cv works", {
|
||||
set.seed(11)
|
||||
nrounds = 4
|
||||
nrounds <- 4
|
||||
cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
|
||||
expect_false(is.null(cv$evaluation_log))
|
||||
expect_false(is.null(cv$pred))
|
||||
|
||||
@ -54,14 +54,14 @@ test_that("custom objective using DMatrix attr works", {
|
||||
hess <- preds * (1 - preds)
|
||||
return(list(grad = grad, hess = hess))
|
||||
}
|
||||
param$objective = logregobjattr
|
||||
param$objective <- logregobjattr
|
||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||
expect_equal(class(bst), "xgb.Booster")
|
||||
})
|
||||
|
||||
test_that("custom objective with multi-class works", {
|
||||
data = as.matrix(iris[, -5])
|
||||
label = as.numeric(iris$Species) - 1
|
||||
data <- as.matrix(iris[, -5])
|
||||
label <- as.numeric(iris$Species) - 1
|
||||
dtrain <- xgb.DMatrix(data = data, label = label)
|
||||
nclasses <- 3
|
||||
|
||||
@ -72,6 +72,6 @@ test_that("custom objective with multi-class works", {
|
||||
hess <- rnorm(dim(as.matrix(preds))[1])
|
||||
return (list(grad = grad, hess = hess))
|
||||
}
|
||||
param$objective = fake_softprob
|
||||
param$objective <- fake_softprob
|
||||
bst <- xgb.train(param, dtrain, 1, num_class = nclasses)
|
||||
})
|
||||
|
||||
@ -16,7 +16,7 @@ test_that("gblinear works", {
|
||||
ERR_UL <- 0.005 # upper limit for the test set error
|
||||
VERB <- 0 # chatterbox switch
|
||||
|
||||
param$updater = 'shotgun'
|
||||
param$updater <- 'shotgun'
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
|
||||
ypred <- predict(bst, dtest)
|
||||
expect_equal(length(getinfo(dtest, 'label')), 1611)
|
||||
@ -29,7 +29,7 @@ test_that("gblinear works", {
|
||||
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
|
||||
expect_is(h, "matrix")
|
||||
|
||||
param$updater = 'coord_descent'
|
||||
param$updater <- 'coord_descent'
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
|
||||
|
||||
@ -5,10 +5,10 @@ require(data.table)
|
||||
require(Matrix)
|
||||
require(vcd, quietly = TRUE)
|
||||
|
||||
float_tolerance = 5e-6
|
||||
float_tolerance <- 5e-6
|
||||
|
||||
# disable some tests for 32-bit environment
|
||||
flag_32bit = .Machine$sizeof.pointer != 8
|
||||
flag_32bit <- .Machine$sizeof.pointer != 8
|
||||
|
||||
set.seed(1982)
|
||||
data(Arthritis)
|
||||
@ -16,7 +16,7 @@ df <- data.table(Arthritis, keep.rownames = FALSE)
|
||||
df[, AgeDiscret := as.factor(round(Age / 10, 0))]
|
||||
df[, AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
|
||||
df[, ID := NULL]
|
||||
sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
|
||||
sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df) # nolint
|
||||
label <- df[, ifelse(Improved == "Marked", 1, 0)]
|
||||
|
||||
# binary
|
||||
@ -46,7 +46,7 @@ mbst.GLM <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,
|
||||
test_that("xgb.dump works", {
|
||||
if (!flag_32bit)
|
||||
expect_length(xgb.dump(bst.Tree), 200)
|
||||
dump_file = file.path(tempdir(), 'xgb.model.dump')
|
||||
dump_file <- file.path(tempdir(), 'xgb.model.dump')
|
||||
expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE))
|
||||
expect_true(file.exists(dump_file))
|
||||
expect_gt(file.size(dump_file), 8000)
|
||||
@ -170,7 +170,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
|
||||
pred <- pr()
|
||||
shap <- pr(predcontrib = TRUE)
|
||||
shapi <- pr(predinteraction = TRUE)
|
||||
tol = 1e-5
|
||||
tol <- 1e-5
|
||||
|
||||
expect_equal(rowSums(shap), pred, tol = tol)
|
||||
expect_equal(apply(shapi, 1, sum), pred, tol = tol)
|
||||
|
||||
@ -26,7 +26,7 @@ test_that("predict feature interactions works", {
|
||||
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
|
||||
b <- xgb.train(param, dm, 100)
|
||||
|
||||
pred = predict(b, dm, outputmargin=TRUE)
|
||||
pred <- predict(b, dm, outputmargin = TRUE)
|
||||
|
||||
# SHAP contributions:
|
||||
cont <- predict(b, dm, predcontrib = TRUE)
|
||||
@ -73,9 +73,9 @@ test_that("predict feature interactions works", {
|
||||
gt_intr[, 2, 3] <- 1. * X[, 2] * X[, 3] # attribute a HALF of the interaction term to each symmetric element
|
||||
gt_intr[, 3, 2] <- gt_intr[, 2, 3]
|
||||
# merge-in the diagonal based on 'ground truth' feature contributions
|
||||
intr_diag = gt_cont - apply(gt_intr, c(1,2), sum)
|
||||
intr_diag <- gt_cont - apply(gt_intr, c(1, 2), sum)
|
||||
for (j in seq_len(P)) {
|
||||
gt_intr[,j,j] = intr_diag[,j]
|
||||
gt_intr[, j, j] <- intr_diag[, j]
|
||||
}
|
||||
# These should be relatively close:
|
||||
expect_lt(max(abs(intr - gt_intr)), 0.1)
|
||||
@ -119,7 +119,7 @@ test_that("multiclass feature interactions work", {
|
||||
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
||||
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
|
||||
b <- xgb.train(param, dm, 40)
|
||||
pred = predict(b, dm, outputmargin=TRUE) %>% array(c(3, 150)) %>% t
|
||||
pred <- predict(b, dm, outputmargin = TRUE) %>% array(c(3, 150)) %>% t
|
||||
|
||||
# SHAP contributions:
|
||||
cont <- predict(b, dm, predcontrib = TRUE)
|
||||
|
||||
@ -3,22 +3,21 @@ require(xgboost)
|
||||
context("monotone constraints")
|
||||
|
||||
set.seed(1024)
|
||||
x = rnorm(1000, 10)
|
||||
y = -1*x + rnorm(1000, 0.001) + 3*sin(x)
|
||||
train = matrix(x, ncol = 1)
|
||||
x <- rnorm(1000, 10)
|
||||
y <- -1 * x + rnorm(1000, 0.001) + 3 * sin(x)
|
||||
train <- matrix(x, ncol = 1)
|
||||
|
||||
|
||||
test_that("monotone constraints for regression", {
|
||||
bst = xgboost(data = train, label = y, max_depth = 2,
|
||||
bst <- xgboost(data = train, label = y, max_depth = 2,
|
||||
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
|
||||
monotone_constraints = -1)
|
||||
|
||||
pred = predict(bst, train)
|
||||
pred <- predict(bst, train)
|
||||
|
||||
ind = order(train[,1])
|
||||
pred.ord = pred[ind]
|
||||
ind <- order(train[, 1])
|
||||
pred.ord <- pred[ind]
|
||||
expect_true({
|
||||
!any(diff(pred.ord) > 0)
|
||||
}, "Monotone Contraint Satisfied")
|
||||
|
||||
})
|
||||
|
||||
@ -9,10 +9,10 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
# Disable flaky tests for 32-bit Windows.
|
||||
# See https://github.com/dmlc/xgboost/issues/3720
|
||||
win32_flag = .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
|
||||
win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
|
||||
|
||||
test_that("updating the model works", {
|
||||
watchlist = list(train = dtrain, test = dtest)
|
||||
watchlist <- list(train = dtrain, test = dtest)
|
||||
|
||||
# no-subsampling
|
||||
p1 <- list(objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user