Fix spelling in documents (#6948)
* Update roxygen2 doc. Co-authored-by: fis <jm.yuan@outlook.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
basic_walkthrough Basic feature walkthrough
|
||||
caret_wrapper Use xgboost to train in caret library
|
||||
custom_objective Cutomize loss function, and evaluation metric
|
||||
custom_objective Customize loss function, and evaluation metric
|
||||
boost_from_prediction Boosting from existing prediction
|
||||
predict_first_ntree Predicting using first n trees
|
||||
generalized_linear_model Generalized Linear Model
|
||||
@@ -8,8 +8,8 @@ cross_validation Cross validation
|
||||
create_sparse_matrix Create Sparse Matrix
|
||||
predict_leaf_indices Predicting the corresponding leaves
|
||||
early_stopping Early Stop in training
|
||||
poisson_regression Poisson Regression on count data
|
||||
tweedie_regression Tweddie Regression
|
||||
poisson_regression Poisson regression on count data
|
||||
tweedie_regression Tweedie regression
|
||||
gpu_accelerated GPU-accelerated tree building algorithms
|
||||
interaction_constraints Interaction constraints among features
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ XGBoost R Feature Walkthrough
|
||||
====
|
||||
* [Basic walkthrough of wrappers](basic_walkthrough.R)
|
||||
* [Train a xgboost model from caret library](caret_wrapper.R)
|
||||
* [Cutomize loss function, and evaluation metric](custom_objective.R)
|
||||
* [Customize loss function, and evaluation metric](custom_objective.R)
|
||||
* [Boosting from existing prediction](boost_from_prediction.R)
|
||||
* [Predicting using first n trees](predict_first_ntree.R)
|
||||
* [Generalized Linear Model](generalized_linear_model.R)
|
||||
|
||||
@@ -40,7 +40,7 @@ print("Train xgboost with verbose 2, also print information about tree")
|
||||
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
|
||||
nthread = 2, objective = "binary:logistic", verbose = 2)
|
||||
|
||||
# you can also specify data as file path to a LibSVM format input
|
||||
# you can also specify data as file path to a LIBSVM format input
|
||||
# since we do not have this file with us, the following line is just for illustration
|
||||
# bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nrounds = 2,objective = "binary:logistic")
|
||||
|
||||
|
||||
@@ -2,17 +2,17 @@ require(xgboost)
|
||||
require(Matrix)
|
||||
require(data.table)
|
||||
if (!require(vcd)) {
|
||||
install.packages('vcd') #Available in Cran. Used for its dataset with categorical values.
|
||||
install.packages('vcd') #Available in CRAN. Used for its dataset with categorical values.
|
||||
require(vcd)
|
||||
}
|
||||
# According to its documentation, Xgboost works only on numbers.
|
||||
# According to its documentation, XGBoost works only on numbers.
|
||||
# Sometimes the dataset we have to work on have categorical data.
|
||||
# A categorical variable is one which have a fixed number of values. By example, if for each observation a variable called "Colour" can have only "red", "blue" or "green" as value, it is a categorical variable.
|
||||
#
|
||||
# In R, categorical variable is called Factor.
|
||||
# Type ?factor in console for more information.
|
||||
#
|
||||
# In this demo we will see how to transform a dense dataframe with categorical variables to a sparse matrix before analyzing it in Xgboost.
|
||||
# In this demo we will see how to transform a dense dataframe with categorical variables to a sparse matrix before analyzing it in XGBoost.
|
||||
# The method we are going to see is usually called "one hot encoding".
|
||||
|
||||
#load Arthritis dataset in memory.
|
||||
@@ -25,13 +25,13 @@ df <- data.table(Arthritis, keep.rownames = FALSE)
|
||||
cat("Print the dataset\n")
|
||||
print(df)
|
||||
|
||||
# 2 columns have factor type, one has ordinal type (ordinal variable is a categorical variable with values wich can be ordered, here: None > Some > Marked).
|
||||
# 2 columns have factor type, one has ordinal type (ordinal variable is a categorical variable with values which can be ordered, here: None > Some > Marked).
|
||||
cat("Structure of the dataset\n")
|
||||
str(df)
|
||||
|
||||
# Let's add some new categorical features to see if it helps. Of course these feature are highly correlated to the Age feature. Usually it's not a good thing in ML, but Tree algorithms (including boosted trees) are able to select the best features, even in case of highly correlated features.
|
||||
|
||||
# For the first feature we create groups of age by rounding the real age. Note that we transform it to factor (categorical data) so the algorithm treat them as independant values.
|
||||
# For the first feature we create groups of age by rounding the real age. Note that we transform it to factor (categorical data) so the algorithm treat them as independent values.
|
||||
df[, AgeDiscret := as.factor(round(Age / 10, 0))]
|
||||
|
||||
# Here is an even stronger simplification of the real age with an arbitrary split at 30 years old. I choose this value based on nothing. We will see later if simplifying the information based on arbitrary values is a good strategy (I am sure you already have an idea of how well it will work!).
|
||||
|
||||
@@ -22,10 +22,10 @@ xgb.cv(param, dtrain, nrounds, nfold = 5,
|
||||
metrics = 'error', showsd = FALSE)
|
||||
|
||||
###
|
||||
# you can also do cross validation with cutomized loss function
|
||||
# you can also do cross validation with customized loss function
|
||||
# See custom_objective.R
|
||||
##
|
||||
print ('running cross validation, with cutomsized loss function')
|
||||
print ('running cross validation, with customized loss function')
|
||||
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
|
||||
@@ -12,7 +12,7 @@ watchlist <- list(eval = dtest, train = dtrain)
|
||||
num_round <- 2
|
||||
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
# this is log likelihood loss
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
preds <- 1 / (1 + exp(-preds))
|
||||
@@ -23,9 +23,9 @@ logregobj <- function(preds, dtrain) {
|
||||
|
||||
# user defined evaluation function, return a pair metric_name, result
|
||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||
# this may make buildin evalution metric not function properly
|
||||
# this may make builtin evaluation metric not function properly
|
||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||
# the buildin evaluation error assumes input is after logistic transformation
|
||||
# the builtin evaluation error assumes input is after logistic transformation
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
|
||||
@@ -11,7 +11,7 @@ param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
|
||||
watchlist <- list(eval = dtest)
|
||||
num_round <- 20
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
# this is log likelihood loss
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
preds <- 1 / (1 + exp(-preds))
|
||||
@@ -21,9 +21,9 @@ logregobj <- function(preds, dtrain) {
|
||||
}
|
||||
# user defined evaluation function, return a pair metric_name, result
|
||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||
# this may make buildin evalution metric not function properly
|
||||
# this may make builtin evaluation metric not function properly
|
||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||
# the buildin evaluation error assumes input is after logistic transformation
|
||||
# the builtin evaluation error assumes input is after logistic transformation
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
|
||||
Reference in New Issue
Block a user