[R] adopt demos and vignettes to a more consistent parameter style
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
require(xgboost)
|
||||
require(methods)
|
||||
|
||||
# we load in the agaricus dataset
|
||||
# In this example, we are aiming to predict whether a mushroom can be eaten
|
||||
# In this example, we are aiming to predict whether a mushroom is edible
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
train <- agaricus.train
|
||||
@@ -15,33 +16,33 @@ class(train$data)
|
||||
# note: we are putting in sparse matrix here, xgboost naturally handles sparse input
|
||||
# use sparse matrix when your feature is sparse(e.g. when you are using one-hot encoding vector)
|
||||
print("Training xgboost with sparseMatrix")
|
||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2,
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1, nrounds = 2,
|
||||
nthread = 2, objective = "binary:logistic")
|
||||
# alternatively, you can put in dense matrix, i.e. basic R-matrix
|
||||
print("Training xgboost with Matrix")
|
||||
bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2,
|
||||
bst <- xgboost(data = as.matrix(train$data), label = train$label, max_depth = 2, eta = 1, nrounds = 2,
|
||||
nthread = 2, objective = "binary:logistic")
|
||||
|
||||
# you can also put in xgb.DMatrix object, which stores label, data and other meta datas needed for advanced features
|
||||
print("Training xgboost with xgb.DMatrix")
|
||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2,
|
||||
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, nthread = 2,
|
||||
objective = "binary:logistic")
|
||||
|
||||
# Verbose = 0,1,2
|
||||
print("Train xgboost with verbose 0, no message")
|
||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
|
||||
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
|
||||
nthread = 2, objective = "binary:logistic", verbose = 0)
|
||||
print("Train xgboost with verbose 1, print evaluation metric")
|
||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
|
||||
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
|
||||
nthread = 2, objective = "binary:logistic", verbose = 1)
|
||||
print("Train xgboost with verbose 2, also print information about tree")
|
||||
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
|
||||
bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
|
||||
nthread = 2, objective = "binary:logistic", verbose = 2)
|
||||
|
||||
# you can also specify data as file path to a LibSVM format input
|
||||
# since we do not have this file with us, the following line is just for illustration
|
||||
# bst <- xgboost(data = 'agaricus.train.svm', max.depth = 2, eta = 1, nround = 2,objective = "binary:logistic")
|
||||
# bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nrounds = 2,objective = "binary:logistic")
|
||||
|
||||
#--------------------basic prediction using xgboost--------------
|
||||
# you can do prediction using the following line
|
||||
@@ -77,19 +78,19 @@ watchlist <- list(train=dtrain, test=dtest)
|
||||
# to train with watchlist, use xgb.train, which contains more advanced features
|
||||
# watchlist allows us to monitor the evaluation result on all data in the list
|
||||
print("Train xgboost using xgb.train with watchlist")
|
||||
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
|
||||
bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nrounds=2, watchlist=watchlist,
|
||||
nthread = 2, objective = "binary:logistic")
|
||||
# we can change evaluation metrics, or use multiple evaluation metrics
|
||||
print("train xgboost using xgb.train with watchlist, watch logloss and error")
|
||||
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
|
||||
eval.metric = "error", eval.metric = "logloss",
|
||||
bst <- xgb.train(data=dtrain, max_depth=2, eta=1, nrounds=2, watchlist=watchlist,
|
||||
eval_metric = "error", eval_metric = "logloss",
|
||||
nthread = 2, objective = "binary:logistic")
|
||||
|
||||
# xgb.DMatrix can also be saved using xgb.DMatrix.save
|
||||
xgb.DMatrix.save(dtrain, "dtrain.buffer")
|
||||
# to load it in, simply call xgb.DMatrix
|
||||
dtrain2 <- xgb.DMatrix("dtrain.buffer")
|
||||
bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nround=2, watchlist=watchlist,
|
||||
bst <- xgb.train(data=dtrain2, max_depth=2, eta=1, nrounds=2, watchlist=watchlist,
|
||||
nthread = 2, objective = "binary:logistic")
|
||||
# information can be extracted from xgb.DMatrix using getinfo
|
||||
label = getinfo(dtest, "label")
|
||||
@@ -98,11 +99,11 @@ err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
|
||||
print(paste("test-error=", err))
|
||||
|
||||
# You can dump the tree you learned using xgb.dump into a text file
|
||||
xgb.dump(bst, "dump.raw.txt", with.stats = T)
|
||||
xgb.dump(bst, "dump.raw.txt", with_stats = T)
|
||||
|
||||
# Finally, you can check which features are the most important.
|
||||
print("Most important features (look at column Gain):")
|
||||
imp_matrix <- xgb.importance(feature_names = train$data@Dimnames[[2]], model = bst)
|
||||
imp_matrix <- xgb.importance(feature_names = colnames(train$data), model = bst)
|
||||
print(imp_matrix)
|
||||
|
||||
# Feature importance bar plot by gain
|
||||
|
||||
@@ -11,8 +11,8 @@ watchlist <- list(eval = dtest, train = dtrain)
|
||||
#
|
||||
print('start running example to start from a initial prediction')
|
||||
# train xgboost for 1 round
|
||||
param <- list(max.depth=2,eta=1,nthread = 2, silent=1,objective='binary:logistic')
|
||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||
param <- list(max_depth=2, eta=1, nthread = 2, silent=1, objective='binary:logistic')
|
||||
bst <- xgb.train(param, dtrain, 1, watchlist)
|
||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation
|
||||
ptrain <- predict(bst, dtrain, outputmargin=TRUE)
|
||||
|
||||
@@ -65,11 +65,10 @@ output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y]
|
||||
|
||||
# Following is the same process as other demo
|
||||
cat("Learning...\n")
|
||||
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
|
||||
eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
|
||||
bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 9,
|
||||
eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")
|
||||
|
||||
# sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||
importance <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst)
|
||||
importance <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst)
|
||||
print(importance)
|
||||
# According to the matrix below, the most important feature in this dataset to predict if the treatment will work is the Age. The second most important feature is having received a placebo or not. The sex is third. Then we see our generated features (AgeDiscret). We can see that their contribution is very low (Gain column).
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
nround <- 2
|
||||
param <- list(max.depth=2,eta=1,silent=1,nthread = 2, objective='binary:logistic')
|
||||
param <- list(max_depth=2, eta=1, silent=1, nthread=2, objective='binary:logistic')
|
||||
|
||||
cat('running cross validation\n')
|
||||
# do cross validation, this will print result out as
|
||||
@@ -19,7 +19,7 @@ cat('running cross validation, disable standard deviation display\n')
|
||||
# [iteration] metric_name:mean_value+std_value
|
||||
# std_value is standard deviation of the metric
|
||||
xgb.cv(param, dtrain, nround, nfold=5,
|
||||
metrics={'error'}, showsd = FALSE)
|
||||
metrics='error', showsd = FALSE)
|
||||
|
||||
###
|
||||
# you can also do cross validation with cutomized loss function
|
||||
@@ -40,12 +40,12 @@ evalerror <- function(preds, dtrain) {
|
||||
return(list(metric = "error", value = err))
|
||||
}
|
||||
|
||||
param <- list(max.depth=2,eta=1,silent=1,
|
||||
param <- list(max_depth=2, eta=1, silent=1,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
# train with customized objective
|
||||
xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5)
|
||||
|
||||
# do cross validation with prediction values for each fold
|
||||
res <- xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5, prediction = TRUE)
|
||||
res$dt
|
||||
res$evaluation_log
|
||||
length(res$pred)
|
||||
|
||||
@@ -33,7 +33,7 @@ evalerror <- function(preds, dtrain) {
|
||||
return(list(metric = "error", value = err))
|
||||
}
|
||||
|
||||
param <- list(max.depth=2, eta=1, nthread = 2, silent=1,
|
||||
param <- list(max_depth=2, eta=1, nthread = 2, silent=1,
|
||||
objective=logregobj, eval_metric=evalerror)
|
||||
print ('start training with user customized objective')
|
||||
# training with customized objective, we can also do step by step training
|
||||
@@ -57,7 +57,7 @@ logregobjattr <- function(preds, dtrain) {
|
||||
hess <- preds * (1 - preds)
|
||||
return(list(grad = grad, hess = hess))
|
||||
}
|
||||
param <- list(max.depth=2, eta=1, nthread = 2, silent=1,
|
||||
param <- list(max_depth=2, eta=1, nthread = 2, silent=1,
|
||||
objective=logregobjattr, eval_metric=evalerror)
|
||||
print ('start training with user customized objective, with additional attributes in DMatrix')
|
||||
# training with customized objective, we can also do step by step training
|
||||
|
||||
@@ -7,7 +7,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
# note: for customized objective function, we leave objective as default
|
||||
# note: what we are getting is margin value in prediction
|
||||
# you must know what you are doing
|
||||
param <- list(max.depth=2,eta=1,nthread = 2, silent=1)
|
||||
param <- list(max_depth=2, eta=1, nthread = 2, silent=1)
|
||||
watchlist <- list(eval = dtest)
|
||||
num_round <- 20
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
@@ -34,7 +34,7 @@ print ('start training with early Stopping setting')
|
||||
|
||||
bst <- xgb.train(param, dtrain, num_round, watchlist,
|
||||
objective = logregobj, eval_metric = evalerror, maximize = FALSE,
|
||||
early.stop.round = 3)
|
||||
early_stopping_round = 3)
|
||||
bst <- xgb.cv(param, dtrain, num_round, nfold = 5,
|
||||
objective = logregobj, eval_metric = evalerror,
|
||||
maximize = FALSE, early.stop.round = 3)
|
||||
maximize = FALSE, early_stopping_rounds = 3)
|
||||
|
||||
@@ -5,7 +5,7 @@ data(agaricus.test, package='xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic')
|
||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
nround = 2
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ data(agaricus.test, package='xgboost')
|
||||
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
nround = 4
|
||||
|
||||
# training the model for two rounds
|
||||
|
||||
Reference in New Issue
Block a user