Cleaning of demo

This commit is contained in:
pommedeterresautee 2015-12-02 15:47:45 +01:00
parent e57043ce62
commit 7479cc68a7
5 changed files with 10 additions and 11 deletions

View File

@ -102,9 +102,9 @@ xgb.dump(bst, "dump.raw.txt", with.stats = T)
# Finally, you can check which features are the most important. # Finally, you can check which features are the most important.
print("Most important features (look at column Gain):") print("Most important features (look at column Gain):")
imp_matrix <- xgb.importance(feature_names = train$data@Dimnames[[2]], filename_dump = "dump.raw.txt") imp_matrix <- xgb.importance(feature_names = train$data@Dimnames[[2]], model = bst)
print(imp_matrix) print(imp_matrix)
# Feature importance bar plot by gain # Feature importance bar plot by gain
print("Feature importance Plot : ") print("Feature importance Plot : ")
print(xgb.plot.importance(imp_matrix)) print(xgb.plot.importance(importance_matrix = imp_matrix))

View File

@ -23,4 +23,4 @@ setinfo(dtrain, "base_margin", ptrain)
setinfo(dtest, "base_margin", ptest) setinfo(dtest, "base_margin", ptest)
print('this is result of boost from initial prediction') print('this is result of boost from initial prediction')
bst <- xgb.train( param, dtrain, 1, watchlist ) bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist)

View File

@ -67,10 +67,9 @@ output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y]
cat("Learning...\n") cat("Learning...\n")
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9, bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
eta = 1, nthread = 2, nround = 10,objective = "binary:logistic") eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
xgb.dump(bst, 'xgb.model.dump', with.stats = T)
# sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix. # sparse_matrix@Dimnames[[2]] represents the column names of the sparse matrix.
importance <- xgb.importance(sparse_matrix@Dimnames[[2]], 'xgb.model.dump') importance <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst)
print(importance) print(importance)
# According to the matrix below, the most important feature in this dataset to predict if the treatment will work is the Age. The second most important feature is having received a placebo or not. The sex is third. Then we see our generated features (AgeDiscret). We can see that their contribution is very low (Gain column). # According to the matrix below, the most important feature in this dataset to predict if the treatment will work is the Age. The second most important feature is having received a placebo or not. The sex is third. Then we see our generated features (AgeDiscret). We can see that their contribution is very low (Gain column).

View File

@ -43,9 +43,9 @@ evalerror <- function(preds, dtrain) {
param <- list(max.depth=2,eta=1,silent=1, param <- list(max.depth=2,eta=1,silent=1,
objective = logregobj, eval_metric = evalerror) objective = logregobj, eval_metric = evalerror)
# train with customized objective # train with customized objective
xgb.cv(param, dtrain, nround, nfold = 5) xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5)
# do cross validation with prediction values for each fold # do cross validation with prediction values for each fold
res <- xgb.cv(param, dtrain, nround, nfold=5, prediction = TRUE) res <- xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5, prediction = TRUE)
res$dt res$dt
length(res$pred) length(res$pred)

View File

@ -2,15 +2,15 @@ require(xgboost)
# load in the agaricus dataset # load in the agaricus dataset
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost') data(agaricus.test, package='xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
param <- list(max.depth=2,eta=1,silent=1,objective='binary:logistic') param <- list(max.depth=2, eta=1, silent=1, objective='binary:logistic')
watchlist <- list(eval = dtest, train = dtrain) watchlist <- list(eval = dtest, train = dtrain)
nround = 5 nround = 5
# training the model for two rounds # training the model for two rounds
bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist) bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2, watchlist = watchlist)
cat('start testing prediction from first n trees\n') cat('start testing prediction from first n trees\n')
### predict using first 2 tree ### predict using first 2 tree