Generate new features based on tree leafs
This commit is contained in:
parent
115c63bcde
commit
c1b2d9cb86
@ -5,6 +5,7 @@ export(setinfo)
|
|||||||
export(slice)
|
export(slice)
|
||||||
export(xgb.DMatrix)
|
export(xgb.DMatrix)
|
||||||
export(xgb.DMatrix.save)
|
export(xgb.DMatrix.save)
|
||||||
|
export(xgb.create.features)
|
||||||
export(xgb.cv)
|
export(xgb.cv)
|
||||||
export(xgb.dump)
|
export(xgb.dump)
|
||||||
export(xgb.importance)
|
export(xgb.importance)
|
||||||
@ -25,6 +26,7 @@ importClassesFrom(Matrix,dgCMatrix)
|
|||||||
importClassesFrom(Matrix,dgeMatrix)
|
importClassesFrom(Matrix,dgeMatrix)
|
||||||
importFrom(Matrix,cBind)
|
importFrom(Matrix,cBind)
|
||||||
importFrom(Matrix,colSums)
|
importFrom(Matrix,colSums)
|
||||||
|
importFrom(Matrix,sparse.model.matrix)
|
||||||
importFrom(Matrix,sparseVector)
|
importFrom(Matrix,sparseVector)
|
||||||
importFrom(data.table,":=")
|
importFrom(data.table,":=")
|
||||||
importFrom(data.table,as.data.table)
|
importFrom(data.table,as.data.table)
|
||||||
|
|||||||
91
R-package/R/xgb.create.features.R
Normal file
91
R-package/R/xgb.create.features.R
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
#' Create new features from a previously learned model
|
||||||
|
#'
|
||||||
|
#' May improve the learning by adding new features to the training data based on the decision trees from a previously learned model.
|
||||||
|
#'
|
||||||
|
#' @importFrom magrittr %>%
|
||||||
|
#' @importFrom Matrix cBind
|
||||||
|
#' @importFrom Matrix sparse.model.matrix
|
||||||
|
#'
|
||||||
|
#' @param model decision tree boosting model learned on the original data
|
||||||
|
#' @param training.data original data (usually provided as a \code{dgCMatrix} matrix)
|
||||||
|
#'
|
||||||
|
#' @return \code{dgCMatrix} matrix including both the original data and the new features.
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' This is the function inspired from the paragraph 3.1 of the paper:
|
||||||
|
#'
|
||||||
|
#' \strong{"Practical Lessons from Predicting Clicks on Ads at Facebook"}
|
||||||
|
#'
|
||||||
|
#' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
|
||||||
|
#' Joaquin Quiñonero Candela)}
|
||||||
|
#'
|
||||||
|
#' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
|
||||||
|
#'
|
||||||
|
#' \url{https://research.facebook.com/publications/758569837499391/practical-lessons-from-predicting-clicks-on-ads-at-facebook/}.
|
||||||
|
#'
|
||||||
|
#' Extract explaining the method:
|
||||||
|
#'
|
||||||
|
#' "\emph{We found that boosted decision trees are a powerful and very
|
||||||
|
#' convenient way to implement non-linear and tuple transformations
|
||||||
|
#' of the kind we just described. We treat each individual
|
||||||
|
#' tree as a categorical feature that takes as value the
|
||||||
|
#' index of the leaf an instance ends up falling in. We use
|
||||||
|
#' 1-of-K coding of this type of features.
|
||||||
|
#'
|
||||||
|
#' For example, consider the boosted tree model in Figure 1 with 2 subtrees,
|
||||||
|
#' where the first subtree has 3 leafs and the second 2 leafs. If an
|
||||||
|
#' instance ends up in leaf 2 in the first subtree and leaf 1 in
|
||||||
|
#' second subtree, the overall input to the linear classifier will
|
||||||
|
#' be the binary vector \code{[0, 1, 0, 1, 0]}, where the first 3 entries
|
||||||
|
#' correspond to the leaves of the first subtree and last 2 to
|
||||||
|
#' those of the second subtree.
|
||||||
|
#'
|
||||||
|
#' [...]
|
||||||
|
#'
|
||||||
|
#' We can understand boosted decision tree
|
||||||
|
#' based transformation as a supervised feature encoding that
|
||||||
|
#' converts a real-valued vector into a compact binary-valued
|
||||||
|
#' vector. A traversal from root node to a leaf node represents
|
||||||
|
#' a rule on certain features.}"
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#' dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
||||||
|
#' dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
||||||
|
#'
|
||||||
|
#' param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||||
|
#' nround = 4
|
||||||
|
#'
|
||||||
|
#' bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
|
||||||
|
#'
|
||||||
|
#' # Model accuracy without new features
|
||||||
|
#' accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
||||||
|
#'
|
||||||
|
#' # Convert previous features to one hot encoding
|
||||||
|
#' new.features.train <- xgb.create.features(model = bst, agaricus.train$data)
|
||||||
|
#' new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
||||||
|
#'
|
||||||
|
#' # learning with new features
|
||||||
|
#' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
||||||
|
#' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
||||||
|
#' watchlist <- list(train = new.dtrain)
|
||||||
|
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
|
||||||
|
#'
|
||||||
|
#' # Model accuracy with new features
|
||||||
|
#' accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
||||||
|
#'
|
||||||
|
#' # Here the accuracy was already good and is now perfect.
|
||||||
|
#' cat(paste("The accuracy was", accuracy.before, "before adding leaf features and it is now", accuracy.after, "!\n"))
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
xgb.create.features <- function(model, training.data){
|
||||||
|
pred_with_leaf = predict(model, training.data, predleaf = TRUE)
|
||||||
|
cols <- list()
|
||||||
|
for(i in 1:length(trees)){
|
||||||
|
# max is not the real max but it s not important for the purpose of adding features
|
||||||
|
leaf.id <- sort(unique(pred_with_leaf[,i]))
|
||||||
|
cols[[i]] <- factor(x = pred_with_leaf[,i], level = leaf.id)
|
||||||
|
}
|
||||||
|
cBind(training.data, sparse.model.matrix( ~ . -1, as.data.frame(cols)))
|
||||||
|
}
|
||||||
@ -1,7 +1,6 @@
|
|||||||
#' Show importance of features in a model
|
#' Show importance of features in a model
|
||||||
#'
|
#'
|
||||||
#' Read a xgboost model text dump.
|
#' Create a \code{data.table} of the most important features of a model.
|
||||||
#' Can be tree or linear model (text dump of linear model are only supported in dev version of \code{Xgboost} for now).
|
|
||||||
#'
|
#'
|
||||||
#' @importFrom data.table data.table
|
#' @importFrom data.table data.table
|
||||||
#' @importFrom data.table setnames
|
#' @importFrom data.table setnames
|
||||||
|
|||||||
@ -25,7 +25,7 @@ pred_with_leaf = predict(bst, dtest, predleaf = TRUE)
|
|||||||
head(pred_with_leaf)
|
head(pred_with_leaf)
|
||||||
|
|
||||||
create.new.tree.features <- function(model, original.features){
|
create.new.tree.features <- function(model, original.features){
|
||||||
pred_with_leaf = predict(model, original.features, predleaf = TRUE)
|
pred_with_leaf <- predict(model, original.features, predleaf = TRUE)
|
||||||
cols <- list()
|
cols <- list()
|
||||||
for(i in 1:length(trees)){
|
for(i in 1:length(trees)){
|
||||||
# max is not the real max but it s not important for the purpose of adding features
|
# max is not the real max but it s not important for the purpose of adding features
|
||||||
@ -49,4 +49,4 @@ bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread =
|
|||||||
accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
||||||
|
|
||||||
# Here the accuracy was already good and is now perfect.
|
# Here the accuracy was already good and is now perfect.
|
||||||
print(paste("The accuracy was", accuracy.before, "before adding leaf features and it is now", accuracy.after, "!"))
|
cat(paste("The accuracy was", accuracy.before, "before adding leaf features and it is now", accuracy.after, "!\n"))
|
||||||
|
|||||||
88
R-package/man/xgb.create.features.Rd
Normal file
88
R-package/man/xgb.create.features.Rd
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.create.features.R
|
||||||
|
\name{xgb.create.features}
|
||||||
|
\alias{xgb.create.features}
|
||||||
|
\title{Create new features from a previously learned model}
|
||||||
|
\usage{
|
||||||
|
xgb.create.features(model, training.data)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{model}{decision tree boosting model learned on the original data}
|
||||||
|
|
||||||
|
\item{training.data}{original data (usually provided as a \code{dgCMatrix} matrix)}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
\code{dgCMatrix} matrix including both the original data and the new features.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
May improve the learning by adding new features to the training data based on the decision trees from a previously learned model.
|
||||||
|
}
|
||||||
|
\details{
|
||||||
|
This is the function inspired from the paragraph 3.1 of the paper:
|
||||||
|
|
||||||
|
\strong{"Practical Lessons from Predicting Clicks on Ads at Facebook"}
|
||||||
|
|
||||||
|
\emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
|
||||||
|
Joaquin Quiñonero Candela)}
|
||||||
|
|
||||||
|
International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
|
||||||
|
|
||||||
|
\url{https://research.facebook.com/publications/758569837499391/practical-lessons-from-predicting-clicks-on-ads-at-facebook/}.
|
||||||
|
|
||||||
|
Extract explaining the method:
|
||||||
|
|
||||||
|
"\emph{We found that boosted decision trees are a powerful and very
|
||||||
|
convenient way to implement non-linear and tuple transformations
|
||||||
|
of the kind we just described. We treat each individual
|
||||||
|
tree as a categorical feature that takes as value the
|
||||||
|
index of the leaf an instance ends up falling in. We use
|
||||||
|
1-of-K coding of this type of features.
|
||||||
|
|
||||||
|
For example, consider the boosted tree model in Figure 1 with 2 subtrees,
|
||||||
|
where the first subtree has 3 leafs and the second 2 leafs. If an
|
||||||
|
instance ends up in leaf 2 in the first subtree and leaf 1 in
|
||||||
|
second subtree, the overall input to the linear classifier will
|
||||||
|
be the binary vector \code{[0, 1, 0, 1, 0]}, where the first 3 entries
|
||||||
|
correspond to the leaves of the first subtree and last 2 to
|
||||||
|
those of the second subtree.
|
||||||
|
|
||||||
|
[...]
|
||||||
|
|
||||||
|
We can understand boosted decision tree
|
||||||
|
based transformation as a supervised feature encoding that
|
||||||
|
converts a real-valued vector into a compact binary-valued
|
||||||
|
vector. A traversal from root node to a leaf node represents
|
||||||
|
a rule on certain features.}"
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
data(agaricus.train, package='xgboost')
|
||||||
|
data(agaricus.test, package='xgboost')
|
||||||
|
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
||||||
|
dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
||||||
|
|
||||||
|
param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||||
|
nround = 4
|
||||||
|
|
||||||
|
bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
|
||||||
|
|
||||||
|
# Model accuracy without new features
|
||||||
|
accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
||||||
|
|
||||||
|
# Convert previous features to one hot encoding
|
||||||
|
new.features.train <- xgb.create.features(model = bst, agaricus.train$data)
|
||||||
|
new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
|
||||||
|
|
||||||
|
# learning with new features
|
||||||
|
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
||||||
|
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
||||||
|
watchlist <- list(train = new.dtrain)
|
||||||
|
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
|
||||||
|
|
||||||
|
# Model accuracy with new features
|
||||||
|
accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
||||||
|
|
||||||
|
# Here the accuracy was already good and is now perfect.
|
||||||
|
cat(paste("The accuracy was", accuracy.before, "before adding leaf features and it is now", accuracy.after, "!\\n"))
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@ -22,8 +22,7 @@ xgb.importance(feature_names = NULL, model = NULL, data = NULL,
|
|||||||
A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
|
A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Read a xgboost model text dump.
|
Create a \code{data.table} of the most important features of a model.
|
||||||
Can be tree or linear model (text dump of linear model are only supported in dev version of \code{Xgboost} for now).
|
|
||||||
}
|
}
|
||||||
\details{
|
\details{
|
||||||
This is the function to understand the model trained (and through your model, your data).
|
This is the function to understand the model trained (and through your model, your data).
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user