Implement #431 PR

This commit is contained in:
pommedeterresautee 2015-11-23 18:19:59 +01:00
parent 13829329bd
commit fe7cdcefb4

View File

@ -14,28 +14,28 @@ class(train$data)
# this is the basic usage of xgboost you can put matrix in data field
# note: we are putting in sparse matrix here, xgboost naturally handles sparse input
# use sparse matrix when your feature is sparse(e.g. when you are using one-hot encoding vector)
print("training xgboost with sparseMatrix")
print("Training xgboost with sparseMatrix")
bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2,
nthread = 2, objective = "binary:logistic")
# alternatively, you can put in dense matrix, i.e. basic R-matrix
print("training xgboost with Matrix")
print("Training xgboost with Matrix")
bst <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nround = 2,
nthread = 2, objective = "binary:logistic")
# you can also put in xgb.DMatrix object, which stores label, data and other meta datas needed for advanced features
print("training xgboost with xgb.DMatrix")
print("Training xgboost with xgb.DMatrix")
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2, nthread = 2,
objective = "binary:logistic")
# Verbose = 0,1,2
print ('train xgboost with verbose 0, no message')
print("Train xgboost with verbose 0, no message")
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
nthread = 2, objective = "binary:logistic", verbose = 0)
print ('train xgboost with verbose 1, print evaluation metric')
print("Train xgboost with verbose 1, print evaluation metric")
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
nthread = 2, objective = "binary:logistic", verbose = 1)
print ('train xgboost with verbose 2, also print information about tree')
print("Train xgboost with verbose 2, also print information about tree")
bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nround = 2,
nthread = 2, objective = "binary:logistic", verbose = 2)
@ -76,11 +76,11 @@ dtest <- xgb.DMatrix(data = test$data, label=test$label)
watchlist <- list(train=dtrain, test=dtest)
# to train with watchlist, use xgb.train, which contains more advanced features
# watchlist allows us to monitor the evaluation result on all data in the list
print ('train xgboost using xgb.train with watchlist')
print("Train xgboost using xgb.train with watchlist")
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
nthread = 2, objective = "binary:logistic")
# we can change evaluation metrics, or use multiple evaluation metrics
print ('train xgboost using xgb.train with watchlist, watch logloss and error')
print("train xgboost using xgb.train with watchlist, watch logloss and error")
bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nround=2, watchlist=watchlist,
eval.metric = "error", eval.metric = "logloss",
nthread = 2, objective = "binary:logistic")
@ -102,4 +102,9 @@ xgb.dump(bst, "dump.raw.txt", with.stats = T)
# Finally, you can check which features are the most important.
print("Most important features (look at column Gain):")
print(xgb.importance(feature_names = train$data@Dimnames[[2]], filename_dump = "dump.raw.txt"))
imp_matrix <- xgb.importance(feature_names = train$data@Dimnames[[2]], filename_dump = "dump.raw.txt")
print(imp_matrix)
# Feature importance bar plot by gain
print("Feature importance Plot : ")
print(xgb.plot.importance(imp_matrix))