Merge pull request #670 from pommedeterresautee/master
Add code im demo to use the pred leaf in R
This commit is contained in:
commit
375192efa1
@ -1,4 +1,9 @@
|
|||||||
require(xgboost)
|
require(xgboost)
|
||||||
|
require(data.table)
|
||||||
|
require(Matrix)
|
||||||
|
|
||||||
|
set.seed(1982)
|
||||||
|
|
||||||
# load in the agaricus dataset
|
# load in the agaricus dataset
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
@ -6,16 +11,42 @@ dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
|||||||
dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
||||||
|
|
||||||
param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic')
|
param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||||
watchlist <- list(eval = dtest, train = dtrain)
|
nround = 4
|
||||||
nround = 5
|
|
||||||
|
|
||||||
# training the model for two rounds
|
# training the model for two rounds
|
||||||
bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2, watchlist = watchlist)
|
bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
|
||||||
cat('start testing prediction from first n trees\n')
|
|
||||||
|
# Model accuracy without new features
|
||||||
|
accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
||||||
|
|
||||||
### predict using first 2 tree
|
|
||||||
pred_with_leaf = predict(bst, dtest, ntreelimit = 2, predleaf = TRUE)
|
|
||||||
head(pred_with_leaf)
|
|
||||||
# by default, we predict using all the trees
|
# by default, we predict using all the trees
|
||||||
|
|
||||||
pred_with_leaf = predict(bst, dtest, predleaf = TRUE)
|
pred_with_leaf = predict(bst, dtest, predleaf = TRUE)
|
||||||
head(pred_with_leaf)
|
head(pred_with_leaf)
|
||||||
|
|
||||||
|
create.new.tree.features <- function(model, original.features){
|
||||||
|
pred_with_leaf = predict(model, original.features, predleaf = TRUE)
|
||||||
|
cols <- list()
|
||||||
|
for(i in 1:length(trees)){
|
||||||
|
# max is not the real max but it s not important for the purpose of adding features
|
||||||
|
max <- max(pred_with_leaf[,i])
|
||||||
|
cols[[i]] <- factor(x = pred_with_leaf[,i], level = seq(to = max))
|
||||||
|
}
|
||||||
|
cBind(original.features, sparse.model.matrix( ~ ., as.data.frame(cols)))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert previous features to one hot encoding
|
||||||
|
new.features.train <- create.new.tree.features(bst, agaricus.train$data)
|
||||||
|
new.features.test <- create.new.tree.features(bst, agaricus.test$data)
|
||||||
|
|
||||||
|
# learning with new features
|
||||||
|
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
|
||||||
|
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
|
||||||
|
watchlist <- list(train = new.dtrain)
|
||||||
|
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
|
||||||
|
|
||||||
|
# Model accuracy with new features
|
||||||
|
accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
|
||||||
|
|
||||||
|
# Here the accuracy was already good and is now perfect.
|
||||||
|
print(paste("The accuracy was", accuracy.before, "before adding leaf features and it is now", accuracy.after, "!"))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user