Remove silent from R demos. (#5675)
* Remove silent from R demos. * Vignettes.
This commit is contained in:
parent
dd9aeb60ae
commit
7903286961
@ -11,7 +11,7 @@ watchlist <- list(eval = dtest, train = dtrain)
|
||||
#
|
||||
print('start running example to start from a initial prediction')
|
||||
# train xgboost for 1 round
|
||||
param <- list(max_depth=2, eta=1, nthread = 2, silent=1, objective='binary:logistic')
|
||||
param <- list(max_depth=2, eta=1, nthread = 2, objective='binary:logistic')
|
||||
bst <- xgb.train(param, dtrain, 1, watchlist)
|
||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation
|
||||
|
||||
@ -6,7 +6,7 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
nrounds <- 2
|
||||
param <- list(max_depth=2, eta=1, silent=1, nthread=2, objective='binary:logistic')
|
||||
param <- list(max_depth=2, eta=1, nthread=2, objective='binary:logistic')
|
||||
|
||||
cat('running cross validation\n')
|
||||
# do cross validation, this will print result out as
|
||||
@ -40,7 +40,7 @@ evalerror <- function(preds, dtrain) {
|
||||
return(list(metric = "error", value = err))
|
||||
}
|
||||
|
||||
param <- list(max_depth=2, eta=1, silent=1,
|
||||
param <- list(max_depth=2, eta=1,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
# train with customized objective
|
||||
xgb.cv(params = param, data = dtrain, nrounds = nrounds, nfold = 5)
|
||||
|
||||
@ -5,7 +5,7 @@ data(agaricus.test, package='xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
param <- list(max_depth=2, eta=1, objective='binary:logistic')
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
nrounds = 2
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ data(agaricus.test, package='xgboost')
|
||||
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
|
||||
param <- list(max_depth=2, eta=1, objective='binary:logistic')
|
||||
nrounds = 4
|
||||
|
||||
# training the model for two rounds
|
||||
|
||||
@ -17,8 +17,8 @@ Load xgboost model from the binary model file.
|
||||
}
|
||||
\details{
|
||||
The input file is expected to contain a model saved in an xgboost-internal binary format
|
||||
using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
|
||||
appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
|
||||
using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
|
||||
appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
|
||||
saved from there in xgboost format, could be loaded from R.
|
||||
|
||||
Note: a model saved as an R-object, has to be loaded using corresponding R-methods,
|
||||
@ -29,7 +29,7 @@ data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
xgb.save(bst, 'xgb.model')
|
||||
bst <- xgb.load('xgb.model')
|
||||
|
||||
@ -47,15 +47,15 @@ xgboost.version <- packageDescription("xgboost")$Version
|
||||
|
||||
\section{Introduction}
|
||||
|
||||
This is an introductory document of using the \verb@xgboost@ package in R.
|
||||
This is an introductory document of using the \verb@xgboost@ package in R.
|
||||
|
||||
\verb@xgboost@ is short for eXtreme Gradient Boosting package. It is an efficient
|
||||
and scalable implementation of gradient boosting framework by \citep{friedman2001greedy} \citep{friedman2000additive}.
|
||||
and scalable implementation of gradient boosting framework by \citep{friedman2001greedy} \citep{friedman2000additive}.
|
||||
The package includes efficient linear model solver and tree learning algorithm.
|
||||
It supports various objective functions, including regression, classification
|
||||
and ranking. The package is made to be extendible, so that users are also allowed to define their own objectives easily. It has several features:
|
||||
\begin{enumerate}
|
||||
\item{Speed: }{\verb@xgboost@ can automatically do parallel computation on
|
||||
\item{Speed: }{\verb@xgboost@ can automatically do parallel computation on
|
||||
Windows and Linux, with openmp. It is generally over 10 times faster than
|
||||
\verb@gbm@.}
|
||||
\item{Input Type: }{\verb@xgboost@ takes several types of input data:}
|
||||
@ -65,9 +65,9 @@ and ranking. The package is made to be extendible, so that users are also allowe
|
||||
\item{Data File: }{Local data files}
|
||||
\item{xgb.DMatrix: }{\verb@xgboost@'s own class. Recommended.}
|
||||
\end{itemize}
|
||||
\item{Sparsity: }{\verb@xgboost@ accepts sparse input for both tree booster
|
||||
\item{Sparsity: }{\verb@xgboost@ accepts sparse input for both tree booster
|
||||
and linear booster, and is optimized for sparse input.}
|
||||
\item{Customization: }{\verb@xgboost@ supports customized objective function
|
||||
\item{Customization: }{\verb@xgboost@ supports customized objective function
|
||||
and evaluation function}
|
||||
\item{Performance: }{\verb@xgboost@ has better performance on several different
|
||||
datasets.}
|
||||
@ -76,8 +76,8 @@ and ranking. The package is made to be extendible, so that users are also allowe
|
||||
|
||||
\section{Example with Mushroom data}
|
||||
|
||||
In this section, we will illustrate some common usage of \verb@xgboost@. The
|
||||
Mushroom data is cited from UCI Machine Learning Repository. \citep{Bache+Lichman:2013}
|
||||
In this section, we will illustrate some common usage of \verb@xgboost@. The
|
||||
Mushroom data is cited from UCI Machine Learning Repository. \citep{Bache+Lichman:2013}
|
||||
|
||||
<<Training and prediction with iris>>=
|
||||
library(xgboost)
|
||||
@ -85,7 +85,7 @@ data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
nrounds = 2, objective = "binary:logistic")
|
||||
xgb.save(bst, 'model.save')
|
||||
bst = xgb.load('model.save')
|
||||
@ -97,12 +97,12 @@ pred <- predict(bst, test$data)
|
||||
|
||||
Here we can save the model to a binary local file, and load it when needed.
|
||||
We can't inspect the trees inside. However we have another function to save the
|
||||
model in plain text.
|
||||
model in plain text.
|
||||
<<Dump Model>>=
|
||||
xgb.dump(bst, 'model.dump')
|
||||
@
|
||||
|
||||
The output looks like
|
||||
The output looks like
|
||||
|
||||
\begin{verbatim}
|
||||
booster[0]:
|
||||
@ -122,8 +122,8 @@ booster[1]:
|
||||
\end{verbatim}
|
||||
|
||||
It is important to know \verb@xgboost@'s own data type: \verb@xgb.DMatrix@.
|
||||
It speeds up \verb@xgboost@, and is needed for advanced features such as
|
||||
training from initial prediction value, weighted training instance.
|
||||
It speeds up \verb@xgboost@, and is needed for advanced features such as
|
||||
training from initial prediction value, weighted training instance.
|
||||
|
||||
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
|
||||
<<xgb.DMatrix>>=
|
||||
@ -132,7 +132,7 @@ class(dtrain)
|
||||
head(getinfo(dtrain,'label'))
|
||||
@
|
||||
|
||||
We can also save the matrix to a binary file. Then load it simply with
|
||||
We can also save the matrix to a binary file. Then load it simply with
|
||||
\verb@xgb.DMatrix@
|
||||
<<save model>>=
|
||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix')
|
||||
@ -163,51 +163,51 @@ evalerror <- function(preds, dtrain) {
|
||||
|
||||
dtest <- xgb.DMatrix(test$data, label = test$label)
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
param <- list(max_depth = 2, eta = 1, silent = 1)
|
||||
param <- list(max_depth = 2, eta = 1)
|
||||
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
|
||||
@
|
||||
|
||||
The gradient and second order gradient is required for the output of customized
|
||||
objective function.
|
||||
The gradient and second order gradient is required for the output of customized
|
||||
objective function.
|
||||
|
||||
We also have \verb@slice@ for row extraction. It is useful in
|
||||
We also have \verb@slice@ for row extraction. It is useful in
|
||||
cross-validation.
|
||||
|
||||
For a walkthrough demo, please see \verb@R-package/demo/@ for further
|
||||
For a walkthrough demo, please see \verb@R-package/demo/@ for further
|
||||
details.
|
||||
|
||||
\section{The Higgs Boson competition}
|
||||
|
||||
We have made a demo for \href{http://www.kaggle.com/c/higgs-boson}{the Higgs
|
||||
Boson Machine Learning Challenge}.
|
||||
We have made a demo for \href{http://www.kaggle.com/c/higgs-boson}{the Higgs
|
||||
Boson Machine Learning Challenge}.
|
||||
|
||||
Here are the instructions to make a submission
|
||||
\begin{enumerate}
|
||||
\item Download the \href{http://www.kaggle.com/c/higgs-boson/data}{datasets}
|
||||
and extract them to \verb@data/@.
|
||||
\item Run scripts under \verb@xgboost/demo/kaggle-higgs/@:
|
||||
\href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R}{higgs-train.R}
|
||||
and \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-pred.R}{higgs-pred.R}.
|
||||
The computation will take less than a minute on Intel i7.
|
||||
\item Go to the \href{http://www.kaggle.com/c/higgs-boson/submissions/attach}{submission page}
|
||||
\item Run scripts under \verb@xgboost/demo/kaggle-higgs/@:
|
||||
\href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R}{higgs-train.R}
|
||||
and \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-pred.R}{higgs-pred.R}.
|
||||
The computation will take less than a minute on Intel i7.
|
||||
\item Go to the \href{http://www.kaggle.com/c/higgs-boson/submissions/attach}{submission page}
|
||||
and submit your result.
|
||||
\end{enumerate}
|
||||
|
||||
We provide \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R}{a script}
|
||||
to compare the time cost on the higgs dataset with \verb@gbm@ and \verb@xgboost@.
|
||||
The training set contains 350000 records and 30 features.
|
||||
to compare the time cost on the higgs dataset with \verb@gbm@ and \verb@xgboost@.
|
||||
The training set contains 350000 records and 30 features.
|
||||
|
||||
\verb@xgboost@ can automatically do parallel computation. On a machine with Intel
|
||||
i7-4700MQ and 24GB memories, we found that \verb@xgboost@ costs about 35 seconds, which is about 20 times faster
|
||||
than \verb@gbm@. When we limited \verb@xgboost@ to use only one thread, it was
|
||||
still about two times faster than \verb@gbm@.
|
||||
than \verb@gbm@. When we limited \verb@xgboost@ to use only one thread, it was
|
||||
still about two times faster than \verb@gbm@.
|
||||
|
||||
Meanwhile, the result from \verb@xgboost@ reaches
|
||||
\href{http://www.kaggle.com/c/higgs-boson/details/evaluation}{3.60@AMS} with a
|
||||
single model. This results stands in the
|
||||
\href{http://www.kaggle.com/c/higgs-boson/leaderboard}{top 30\%} of the
|
||||
competition.
|
||||
Meanwhile, the result from \verb@xgboost@ reaches
|
||||
\href{http://www.kaggle.com/c/higgs-boson/details/evaluation}{3.60@AMS} with a
|
||||
single model. This results stands in the
|
||||
\href{http://www.kaggle.com/c/higgs-boson/leaderboard}{top 30\%} of the
|
||||
competition.
|
||||
|
||||
\bibliographystyle{jss}
|
||||
\nocite{*} % list uncited references
|
||||
|
||||
@ -21,7 +21,6 @@ param <- list("objective" = "binary:logitraw",
|
||||
"bst:max_depth" = 6,
|
||||
"eval_metric" = "auc",
|
||||
"eval_metric" = "ams@0.15",
|
||||
"silent" = 1,
|
||||
"nthread" = 16)
|
||||
watchlist <- list("train" = xgmat)
|
||||
nrounds = 120
|
||||
@ -30,4 +29,3 @@ bst = xgb.train(param, xgmat, nrounds, watchlist );
|
||||
# save out model
|
||||
xgb.save(bst, "higgs.model")
|
||||
print ('finish training')
|
||||
|
||||
|
||||
@ -36,7 +36,6 @@ for (i in 1:length(threads)){
|
||||
"bst:max_depth" = 6,
|
||||
"eval_metric" = "auc",
|
||||
"eval_metric" = "ams@0.15",
|
||||
"silent" = 1,
|
||||
"nthread" = thread)
|
||||
watchlist <- list("train" = xgmat)
|
||||
nrounds = 120
|
||||
|
||||
@ -71,7 +71,7 @@ Scala
|
||||
|
||||
import ml.dmlc.xgboost4j.scala.DMatrix
|
||||
import ml.dmlc.xgboost4j.scala.XGBoost
|
||||
|
||||
|
||||
object XGBoostScalaExample {
|
||||
def main(args: Array[String]) {
|
||||
// read trainining data, available at xgboost/demo/data
|
||||
|
||||
@ -41,19 +41,11 @@ TEST(Logging, Basic) {
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_EQ(output.size(), 0);
|
||||
|
||||
args["silent"] = "True";
|
||||
ConsoleLogger::Configure({args.cbegin(), args.cend()});
|
||||
testing::internal::CaptureStderr();
|
||||
LOG(INFO) << "Test silent parameter.";
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_EQ(output.length(), 0);
|
||||
|
||||
testing::internal::CaptureStderr();
|
||||
LOG(CONSOLE) << "Test Log Console"; // ignore global setting.
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_NE(output.find("Test Log Console"), std::string::npos);
|
||||
|
||||
args["silent"] = "False";
|
||||
args["verbosity"] = "2"; // restore
|
||||
ConsoleLogger::Configure({args.cbegin(), args.cend()});
|
||||
}
|
||||
|
||||
@ -9,10 +9,10 @@ dtrain = xgb.DMatrix('../../demo/data/agaricus.txt.train')
|
||||
dtest = xgb.DMatrix('../../demo/data/agaricus.txt.test')
|
||||
|
||||
# Specify parameters via map, definition are same as c++ version
|
||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic' }
|
||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||
|
||||
# Specify validations set to watch performance
|
||||
watchlist = [(dtest,'eval'), (dtrain,'train')]
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 20
|
||||
|
||||
# Run training, all the features in training API is available.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user