[R] On-demand serialization + standardization of attributes (#9924)
--------- Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
basic_walkthrough Basic feature walkthrough
|
||||
caret_wrapper Use xgboost to train in caret library
|
||||
custom_objective Customize loss function, and evaluation metric
|
||||
boost_from_prediction Boosting from existing prediction
|
||||
predict_first_ntree Predicting using first n trees
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
XGBoost R Feature Walkthrough
|
||||
====
|
||||
* [Basic walkthrough of wrappers](basic_walkthrough.R)
|
||||
* [Train a xgboost model from caret library](caret_wrapper.R)
|
||||
* [Customize loss function, and evaluation metric](custom_objective.R)
|
||||
* [Boosting from existing prediction](boost_from_prediction.R)
|
||||
* [Predicting using first n trees](predict_first_ntree.R)
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
# install development version of caret library that contains xgboost models
|
||||
require(caret)
|
||||
require(xgboost)
|
||||
require(data.table)
|
||||
require(vcd)
|
||||
require(e1071)
|
||||
|
||||
# Load Arthritis dataset in memory.
|
||||
data(Arthritis)
|
||||
# Create a copy of the dataset with data.table package
|
||||
# (data.table is 100% compliant with R dataframe but its syntax is a lot more consistent
|
||||
# and its performance are really good).
|
||||
df <- data.table(Arthritis, keep.rownames = FALSE)
|
||||
|
||||
# Let's add some new categorical features to see if it helps.
|
||||
# Of course these feature are highly correlated to the Age feature.
|
||||
# Usually it's not a good thing in ML, but Tree algorithms (including boosted trees) are able to select the best features,
|
||||
# even in case of highly correlated features.
|
||||
# For the first feature we create groups of age by rounding the real age.
|
||||
# Note that we transform it to factor (categorical data) so the algorithm treat them as independant values.
|
||||
df[, AgeDiscret := as.factor(round(Age / 10, 0))]
|
||||
|
||||
# Here is an even stronger simplification of the real age with an arbitrary split at 30 years old.
|
||||
# I choose this value based on nothing.
|
||||
# We will see later if simplifying the information based on arbitrary values is a good strategy
|
||||
# (I am sure you already have an idea of how well it will work!).
|
||||
df[, AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
|
||||
|
||||
# We remove ID as there is nothing to learn from this feature (it will just add some noise as the dataset is small).
|
||||
df[, ID := NULL]
|
||||
|
||||
#-------------Basic Training using XGBoost in caret Library-----------------
|
||||
# Set up control parameters for caret::train
|
||||
# Here we use 10-fold cross-validation, repeating twice, and using random search for tuning hyper-parameters.
|
||||
fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 2, search = "random")
|
||||
# train a xgbTree model using caret::train
|
||||
model <- train(factor(Improved) ~ ., data = df, method = "xgbTree", trControl = fitControl)
|
||||
|
||||
# Instead of tree for our boosters, you can also fit a linear regression or logistic regression model
|
||||
# using xgbLinear
|
||||
# model <- train(factor(Improved)~., data = df, method = "xgbLinear", trControl = fitControl)
|
||||
|
||||
# See model results
|
||||
print(model)
|
||||
@@ -27,7 +27,7 @@ head(pred_with_leaf)
|
||||
create.new.tree.features <- function(model, original.features) {
|
||||
pred_with_leaf <- predict(model, original.features, predleaf = TRUE)
|
||||
cols <- list()
|
||||
for (i in 1:model$niter) {
|
||||
for (i in 1:xgb.get.num.boosted.rounds(model)) {
|
||||
# max is not the real max but it s not important for the purpose of adding features
|
||||
leaf.id <- sort(unique(pred_with_leaf[, i]))
|
||||
cols[[i]] <- factor(x = pred_with_leaf[, i], level = leaf.id)
|
||||
|
||||
@@ -9,6 +9,5 @@ demo(create_sparse_matrix, package = 'xgboost')
|
||||
demo(predict_leaf_indices, package = 'xgboost')
|
||||
demo(early_stopping, package = 'xgboost')
|
||||
demo(poisson_regression, package = 'xgboost')
|
||||
demo(caret_wrapper, package = 'xgboost')
|
||||
demo(tweedie_regression, package = 'xgboost')
|
||||
#demo(gpu_accelerated, package = 'xgboost') # can only run when built with GPU support
|
||||
|
||||
Reference in New Issue
Block a user