From fe7cdcefb40a4849422b4d4dc61155e0987249af Mon Sep 17 00:00:00 2001 From: pommedeterresautee Date: Mon, 23 Nov 2015 18:19:59 +0100 Subject: [PATCH] Implement #431 PR --- R-package/demo/basic_walkthrough.R | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R index 532c5d873..0b1e5b817 100644 --- a/R-package/demo/basic_walkthrough.R +++ b/R-package/demo/basic_walkthrough.R @@ -14,28 +14,28 @@ class(train$data) # this is the basic usage of xgboost you can put matrix in data field # note: we are putting in sparse matrix here, xgboost naturally handles sparse input # use sparse matrix when your feature is sparse(e.g. when you are using one-hot encoding vector) -print("training xgboost with sparseMatrix") +print("Training xgboost with sparseMatrix") bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2, nthread = 2, objective = "binary:logistic") # alternatively, you can put in dense matrix, i.e. basic R-matrix -print("training xgboost with Matrix") +print("Training xgboost with Matrix") bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2, nthread = 2, objective = "binary:logistic") # you can also put in xgb.DMatrix object, which stores label, data and other meta datas needed for advanced features -print("training xgboost with xgb.DMatrix") +print("Training xgboost with xgb.DMatrix") dtrain <- xgb.DMatrix(data = train$data, label = train$label) bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2, objective = "binary:logistic") # Verbose = 0,1,2 -print ('train xgboost with verbose 0, no message') +print("Train xgboost with verbose 0, no message") bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2, objective = "binary:logistic", verbose = 0) -print ('train xgboost with verbose 1, print evaluation metric') +print("Train xgboost with verbose 1, print evaluation metric") bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2, objective = "binary:logistic", verbose = 1) -print ('train xgboost with verbose 2, also print information about tree') +print("Train xgboost with verbose 2, also print information about tree") bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2, objective = "binary:logistic", verbose = 2) @@ -76,11 +76,11 @@ dtest <- xgb.DMatrix(data = test$data, label=test$label) watchlist <- list(train=dtrain, test=dtest) # to train with watchlist, use xgb.train, which contains more advanced features # watchlist allows us to monitor the evaluation result on all data in the list -print ('train xgboost using xgb.train with watchlist') +print("Train xgboost using xgb.train with watchlist") bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, nthread = 2, objective = "binary:logistic") # we can change evaluation metrics, or use multiple evaluation metrics -print ('train xgboost using xgb.train with watchlist, watch logloss and error') +print("train xgboost using xgb.train with watchlist, watch logloss and error") bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", nthread = 2, objective = "binary:logistic") @@ -102,4 +102,9 @@ xgb.dump(bst, "dump.raw.txt", with.stats = T) # Finally, you can check which features are the most important. print("Most important features (look at column Gain):") -print(xgb.importance(feature_names = train$data@Dimnames[[2]], filename_dump = "dump.raw.txt")) +imp_matrix <- xgb.importance(feature_names = train$data@Dimnames[[2]], filename_dump = "dump.raw.txt") +print(imp_matrix) + +# Feature importance bar plot by gain +print("Feature importance Plot : ") +print(xgb.plot.importance(imp_matrix))