0.6-4 submission (#1935)

This commit is contained in:
Tong He 2017-01-04 23:31:05 -08:00 committed by GitHub
parent 8b827425b2
commit ce84af7923
5 changed files with 40 additions and 34 deletions

View File

@ -1,17 +1,18 @@
Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
Version: 0.6-3
Date: 2016-12-28
Version: 0.6-4
Date: 2017-01-04
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
Yuan Tang <terrytangyuan@gmail.com>
Maintainer: Tong He <hetong007@gmail.com>
Description: Extreme Gradient Boosting, which is an efficient implementation
of gradient boosting framework. This package is its R interface. The package
includes efficient linear model solver and tree learning algorithms. The package
can automatically do parallel computation on a single machine which could be
more than 10 times faster than existing gradient boosting packages. It supports
of the gradient boosting framework from Chen & Guestrin (2016) <doi:10.1145/2939672.2939785>.
This package is its R interface. The package includes efficient linear
model solver and tree learning algorithms. The package can automatically
do parallel computation on a single machine which could be more than 10
times faster than existing gradient boosting packages. It supports
various objective functions, including regression, classification and ranking.
The package is made to be extensible, so that users are also allowed to define
their own objectives easily.
@ -23,7 +24,7 @@ Suggests:
knitr,
rmarkdown,
ggplot2 (>= 1.0.1),
DiagrammeR (>= 0.8.1),
DiagrammeR (>= 0.9.0),
Ckmeans.1d.dp (>= 3.3.1),
vcd (>= 1.3),
testthat,

View File

@ -78,7 +78,7 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
nodes.dt <- tree.matrix[,.(Quality = sum(Quality)),by = .(abs.node.position, Feature)][,.(Text =paste0(Feature[1:min(length(Feature), features_keep)], " (", Quality[1:min(length(Quality), features_keep)], ")") %>% paste0(collapse = "\n")), by=abs.node.position]
edges.dt <- tree.matrix[Feature != "Leaf",.(abs.node.position, Yes)] %>% list(tree.matrix[Feature != "Leaf",.(abs.node.position, No)]) %>% rbindlist() %>% setnames(c("From", "To")) %>% .[,.N,.(From, To)] %>% .[,N:=NULL]
nodes <- DiagrammeR::create_nodes(nodes = nodes.dt[,abs.node.position],
nodes <- DiagrammeR::create_node_df(n = nrow(nodes.dt),
label = nodes.dt[,Text],
style = "filled",
color = "DimGray",
@ -87,8 +87,8 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
fontname = "Helvetica"
)
edges <- DiagrammeR::create_edges(from = edges.dt[,From],
to = edges.dt[,To],
edges <- DiagrammeR::create_edge_df(from = match(edges.dt[,From], nodes.dt[,abs.node.position]),
to = match(edges.dt[,To], nodes.dt[,abs.node.position]),
color = "DimGray",
arrowsize = "1.5",
arrowhead = "vee",
@ -96,8 +96,7 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
rel = "leading_to")
graph <- DiagrammeR::create_graph(nodes_df = nodes,
edges_df = edges,
graph_attrs = "rankdir = LR")
edges_df = edges)
DiagrammeR::render_graph(graph, width = plot_width, height = plot_height)
}

View File

@ -49,7 +49,7 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, n_first_tree = NUL
allTrees[, filledcolor:= "Beige"][Feature == "Leaf", filledcolor:= "Khaki"]
# rev is used to put the first tree on top.
nodes <- DiagrammeR::create_nodes(nodes = allTrees[,ID] %>% rev,
nodes <- DiagrammeR::create_node_df(n = length(allTrees[,ID] %>% rev),
label = allTrees[,label] %>% rev,
style = "filled",
color = "DimGray",
@ -59,8 +59,8 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, n_first_tree = NUL
fontname = "Helvetica"
)
edges <- DiagrammeR::create_edges(from = allTrees[Feature != "Leaf", c(ID)] %>% rep(2),
to = allTrees[Feature != "Leaf", c(Yes, No)],
edges <- DiagrammeR::create_edge_df(from = match(allTrees[Feature != "Leaf", c(ID)] %>% rep(2), allTrees[,ID] %>% rev),
to = match(allTrees[Feature != "Leaf", c(Yes, No)],allTrees[,ID] %>% rev),
label = allTrees[Feature != "Leaf", paste("<",Split)] %>% c(rep("",nrow(allTrees[Feature != "Leaf"]))),
color = "DimGray",
arrowsize = "1.5",
@ -69,8 +69,7 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, n_first_tree = NUL
rel = "leading_to")
graph <- DiagrammeR::create_graph(nodes_df = nodes,
edges_df = edges,
graph_attrs = "rankdir = LR")
edges_df = edges)
DiagrammeR::render_graph(graph, width = plot_width, height = plot_height)
}

View File

@ -8,6 +8,8 @@ train <- agaricus.train
test <- agaricus.test
set.seed(1994)
windows_flag = grepl('Windows', Sys.info()[['sysname']])
test_that("train and predict binary classification", {
nrounds = 2
expect_output(
@ -148,18 +150,21 @@ test_that("training continuation works", {
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist)
# continue for two more:
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = bst1)
expect_equal(bst$raw, bst2$raw)
if (!windows_flag)
expect_equal(bst$raw, bst2$raw)
expect_false(is.null(bst2$evaluation_log))
expect_equal(dim(bst2$evaluation_log), c(4, 2))
expect_equal(bst2$evaluation_log, bst$evaluation_log)
# test continuing from raw model data
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = bst1$raw)
expect_equal(bst$raw, bst2$raw)
if (!windows_flag)
expect_equal(bst$raw, bst2$raw)
expect_equal(dim(bst2$evaluation_log), c(2, 2))
# test continuing from a model in file
xgb.save(bst1, "xgboost.model")
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = "xgboost.model")
expect_equal(bst$raw, bst2$raw)
if (!windows_flag)
expect_equal(bst$raw, bst2$raw)
expect_equal(dim(bst2$evaluation_log), c(2, 2))
})

View File

@ -83,20 +83,22 @@ test_that("xgb-attribute functionality", {
expect_null(xgb.attributes(bst))
})
test_that("xgb-attribute numeric precision", {
# check that lossless conversion works with 17 digits
# numeric -> character -> numeric
X <- 10^runif(100, -20, 20)
X2X <- as.numeric(format(X, digits = 17))
expect_identical(X, X2X)
# retrieved attributes to be the same as written
for (x in X) {
xgb.attr(bst.Tree, "x") <- x
expect_identical(as.numeric(xgb.attr(bst.Tree, "x")), x)
xgb.attributes(bst.Tree) <- list(a = "A", b = x)
expect_identical(as.numeric(xgb.attr(bst.Tree, "b")), x)
}
})
if (grepl('Windows', Sys.info()[['sysname']]) || grepl('Linux', Sys.info()[['sysname']]) || grepl('Darwin', Sys.info()[['sysname']])) {
test_that("xgb-attribute numeric precision", {
# check that lossless conversion works with 17 digits
# numeric -> character -> numeric
X <- 10^runif(100, -20, 20)
X2X <- as.numeric(format(X, digits = 17))
expect_identical(X, X2X)
# retrieved attributes to be the same as written
for (x in X) {
xgb.attr(bst.Tree, "x") <- x
expect_identical(as.numeric(xgb.attr(bst.Tree, "x")), x)
xgb.attributes(bst.Tree) <- list(a = "A", b = x)
expect_identical(as.numeric(xgb.attr(bst.Tree, "b")), x)
}
})
}
test_that("xgb.model.dt.tree works with and without feature names", {
names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")