remove temp files
This commit is contained in:
parent
801a17fa02
commit
de08c5a3da
@ -1,28 +0,0 @@
|
||||
\relax
|
||||
\providecommand\hyper@newdestlabel[2]{}
|
||||
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
|
||||
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
|
||||
\global\let\oldcontentsline\contentsline
|
||||
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
|
||||
\global\let\oldnewlabel\newlabel
|
||||
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
|
||||
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
|
||||
\AtEndDocument{\ifx\hyper@anchor\@undefined
|
||||
\let\contentsline\oldcontentsline
|
||||
\let\newlabel\oldnewlabel
|
||||
\fi}
|
||||
\fi}
|
||||
\global\let\hyper@last\relax
|
||||
\gdef\HyperFirstAtBeginDocument#1{#1}
|
||||
\providecommand\HyField@AuxAddToFields[1]{}
|
||||
\providecommand\HyField@AuxAddToCoFields[2]{}
|
||||
\citation{friedman2001greedy}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {2}Example with iris}{1}{section.2}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3}Advanced Examples}{2}{section.3}}
|
||||
\bibstyle{jss}
|
||||
\citation{*}
|
||||
\bibdata{xgboost}
|
||||
\bibcite{friedman2000additive}{{1}{2000}{{Friedman \emph {et~al.}}}{{Friedman, Hastie, Tibshirani \emph {et~al.}}}}
|
||||
\bibcite{friedman2001greedy}{{2}{2001}{{Friedman}}{{}}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {4}The Higgs Boson competition}{3}{section.4}}
|
||||
@ -1,24 +0,0 @@
|
||||
\begin{thebibliography}{2}
|
||||
\newcommand{\enquote}[1]{``#1''}
|
||||
\providecommand{\natexlab}[1]{#1}
|
||||
\providecommand{\url}[1]{\texttt{#1}}
|
||||
\providecommand{\urlprefix}{URL }
|
||||
\expandafter\ifx\csname urlstyle\endcsname\relax
|
||||
\providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else
|
||||
\providecommand{\doi}{doi:\discretionary{}{}{}\begingroup
|
||||
\urlstyle{rm}\Url}\fi
|
||||
\providecommand{\eprint}[2][]{\url{#2}}
|
||||
|
||||
\bibitem[{Friedman \emph{et~al.}(2000)Friedman, Hastie, Tibshirani
|
||||
\emph{et~al.}}]{friedman2000additive}
|
||||
Friedman J, Hastie T, Tibshirani R, \emph{et~al.} (2000).
|
||||
\newblock \enquote{Additive logistic regression: a statistical view of boosting
|
||||
(with discussion and a rejoinder by the authors).}
|
||||
\newblock \emph{The annals of statistics}, \textbf{28}(2), 337--407.
|
||||
|
||||
\bibitem[{Friedman(2001)}]{friedman2001greedy}
|
||||
Friedman JH (2001).
|
||||
\newblock \enquote{Greedy function approximation: a gradient boosting machine.}
|
||||
\newblock \emph{Annals of Statistics}, pp. 1189--1232.
|
||||
|
||||
\end{thebibliography}
|
||||
@ -1,47 +0,0 @@
|
||||
This is BibTeX, Version 0.99d (TeX Live 2013/Debian)
|
||||
Capacity: max_strings=35307, hash_size=35307, hash_prime=30011
|
||||
The top-level auxiliary file: xgboost.aux
|
||||
The style file: jss.bst
|
||||
Database file #1: xgboost.bib
|
||||
Reallocated wiz_functions (elt_size=4) to 6000 items from 3000.
|
||||
You've used 2 entries,
|
||||
3140 wiz_defined-function locations,
|
||||
641 strings with 5430 characters,
|
||||
and the built_in function-call counts, 1920 in all, are:
|
||||
= -- 162
|
||||
> -- 44
|
||||
< -- 2
|
||||
+ -- 17
|
||||
- -- 15
|
||||
* -- 149
|
||||
:= -- 256
|
||||
add.period$ -- 8
|
||||
call.type$ -- 2
|
||||
change.case$ -- 12
|
||||
chr.to.int$ -- 2
|
||||
cite$ -- 2
|
||||
duplicate$ -- 171
|
||||
empty$ -- 175
|
||||
format.name$ -- 19
|
||||
if$ -- 395
|
||||
int.to.chr$ -- 1
|
||||
int.to.str$ -- 1
|
||||
missing$ -- 24
|
||||
newline$ -- 21
|
||||
num.names$ -- 8
|
||||
pop$ -- 51
|
||||
preamble$ -- 1
|
||||
purify$ -- 12
|
||||
quote$ -- 0
|
||||
skip$ -- 53
|
||||
stack$ -- 0
|
||||
substring$ -- 181
|
||||
swap$ -- 65
|
||||
text.length$ -- 1
|
||||
text.prefix$ -- 0
|
||||
top$ -- 0
|
||||
type$ -- 18
|
||||
warning$ -- 0
|
||||
while$ -- 16
|
||||
width$ -- 0
|
||||
write$ -- 36
|
||||
@ -1,4 +0,0 @@
|
||||
\BOOKMARK [1][-]{section.1}{Introduction}{}% 1
|
||||
\BOOKMARK [1][-]{section.2}{Example with iris}{}% 2
|
||||
\BOOKMARK [1][-]{section.3}{Advanced Examples}{}% 3
|
||||
\BOOKMARK [1][-]{section.4}{The Higgs Boson competition}{}% 4
|
||||
@ -1,319 +0,0 @@
|
||||
\documentclass{article}\usepackage[]{graphicx}\usepackage[]{color}
|
||||
%% maxwidth is the original width if it is less than linewidth
|
||||
%% otherwise use linewidth (to make sure the graphics do not exceed the margin)
|
||||
\makeatletter
|
||||
\def\maxwidth{ %
|
||||
\ifdim\Gin@nat@width>\linewidth
|
||||
\linewidth
|
||||
\else
|
||||
\Gin@nat@width
|
||||
\fi
|
||||
}
|
||||
\makeatother
|
||||
|
||||
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
|
||||
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
|
||||
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
|
||||
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
|
||||
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
|
||||
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
|
||||
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
|
||||
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
|
||||
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
|
||||
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
|
||||
|
||||
\usepackage{framed}
|
||||
\makeatletter
|
||||
\newenvironment{kframe}{%
|
||||
\def\at@end@of@kframe{}%
|
||||
\ifinner\ifhmode%
|
||||
\def\at@end@of@kframe{\end{minipage}}%
|
||||
\begin{minipage}{\columnwidth}%
|
||||
\fi\fi%
|
||||
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
|
||||
\colorbox{shadecolor}{##1}\hskip-\fboxsep
|
||||
% There is no \\@totalrightmargin, so:
|
||||
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
|
||||
\MakeFramed {\advance\hsize-\width
|
||||
\@totalleftmargin\z@ \linewidth\hsize
|
||||
\@setminipage}}%
|
||||
{\par\unskip\endMakeFramed%
|
||||
\at@end@of@kframe}
|
||||
\makeatother
|
||||
|
||||
\definecolor{shadecolor}{rgb}{.97, .97, .97}
|
||||
\definecolor{messagecolor}{rgb}{0, 0, 0}
|
||||
\definecolor{warningcolor}{rgb}{1, 0, 1}
|
||||
\definecolor{errorcolor}{rgb}{1, 0, 0}
|
||||
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
|
||||
|
||||
\usepackage{alltt}
|
||||
\RequirePackage{url}
|
||||
\usepackage{hyperref}
|
||||
\RequirePackage{amsmath}
|
||||
\RequirePackage{natbib}
|
||||
\RequirePackage[a4paper,lmargin={1.25in},rmargin={1.25in},tmargin={1in},bmargin={1in}]{geometry}
|
||||
|
||||
\makeatletter
|
||||
% \VignetteIndexEntry{xgboost: eXtreme Gradient Boosting}
|
||||
%\VignetteKeywords{xgboost, gbm, gradient boosting machines}
|
||||
%\VignettePackage{xgboost}
|
||||
% \VignetteEngine{knitr::knitr}
|
||||
\makeatother
|
||||
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
|
||||
\begin{document}
|
||||
%\SweaveOpts{concordance=TRUE}
|
||||
|
||||
|
||||
|
||||
%
|
||||
|
||||
%
|
||||
|
||||
\begin{center}
|
||||
\vspace*{6\baselineskip}
|
||||
\rule{\textwidth}{1.6pt}\vspace*{-\baselineskip}\vspace*{2pt}
|
||||
\rule{\textwidth}{0.4pt}\\[2\baselineskip]
|
||||
{\LARGE \textbf{xgboost: eXtreme Gradient Boosting}}\\[1.2\baselineskip]
|
||||
\rule{\textwidth}{0.4pt}\vspace*{-\baselineskip}\vspace{3.2pt}
|
||||
\rule{\textwidth}{1.6pt}\\[2\baselineskip]
|
||||
{\Large Tianqi Chen, Tong He}\\[\baselineskip]
|
||||
{\large Package Version: 0.3-0}\\[\baselineskip]
|
||||
{\large \today}\par
|
||||
\vfill
|
||||
\end{center}
|
||||
|
||||
\thispagestyle{empty}
|
||||
|
||||
\clearpage
|
||||
|
||||
\setcounter{page}{1}
|
||||
|
||||
\section{Introduction}
|
||||
|
||||
This is an introductory document of using the \verb@xgboost@ package in R.
|
||||
|
||||
\verb@xgboost@ is short for eXtreme Gradient Boosting package. It is an efficient
|
||||
and scalable implementation of gradient boosting framework by \citep{friedman2001greedy}.
|
||||
The package includes efficient linear model solver and tree learning algorithm.
|
||||
It supports various objective functions, including regression, classification
|
||||
and ranking. The package is made to be extendible, so that users are also allowed to define their own objectives easily. It has several features:
|
||||
\begin{enumerate}
|
||||
\item{Speed: }{\verb@xgboost@ can automatically do parallel computation on
|
||||
Windows and Linux, with openmp. It is generally over 10 times faster than
|
||||
\verb@gbm@.}
|
||||
\item{Input Type: }{\verb@xgboost@ takes several types of input data:}
|
||||
\begin{itemize}
|
||||
\item{Dense Matrix: }{R's dense matrix, i.e. \verb@matrix@}
|
||||
\item{Sparse Matrix: }{R's sparse matrix \verb@Matrix::dgCMatrix@}
|
||||
\item{Data File: }{Local data files}
|
||||
\item{xgb.DMatrix: }{\verb@xgboost@'s own class. Recommended.}
|
||||
\end{itemize}
|
||||
\item{Sparsity: }{\verb@xgboost@ accepts sparse input for both tree booster
|
||||
and linear booster, and is optimized for sparse input.}
|
||||
\item{Customization: }{\verb@xgboost@ supports customized objective function
|
||||
and evaluation function}
|
||||
\item{Performance: }{\verb@xgboost@ has better performance on several different
|
||||
datasets.}
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
\section{Example with iris}
|
||||
|
||||
In this section, we will illustrate some common usage of \verb@xgboost@.
|
||||
|
||||
\begin{knitrout}
|
||||
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
|
||||
\begin{alltt}
|
||||
\hlkwd{library}\hlstd{(xgboost)}
|
||||
\hlkwd{data}\hlstd{(iris)}
|
||||
\hlstd{bst} \hlkwb{<-} \hlkwd{xgboost}\hlstd{(}\hlkwd{as.matrix}\hlstd{(iris[,}\hlnum{1}\hlopt{:}\hlnum{4}\hlstd{]),}\hlkwd{as.numeric}\hlstd{(iris[,}\hlnum{5}\hlstd{]}\hlopt{==}\hlstr{'setosa'}\hlstd{),}
|
||||
\hlkwc{nrounds} \hlstd{=} \hlnum{5}\hlstd{)}
|
||||
\end{alltt}
|
||||
\begin{verbatim}
|
||||
## [0] train-rmse:0.351971
|
||||
## [1] train-rmse:0.247769
|
||||
## [2] train-rmse:0.174418
|
||||
## [3] train-rmse:0.122783
|
||||
## [4] train-rmse:0.086435
|
||||
\end{verbatim}
|
||||
\begin{alltt}
|
||||
\hlkwd{xgb.save}\hlstd{(bst,} \hlstr{'model.save'}\hlstd{)}
|
||||
\end{alltt}
|
||||
\begin{verbatim}
|
||||
## [1] TRUE
|
||||
\end{verbatim}
|
||||
\begin{alltt}
|
||||
\hlstd{bst} \hlkwb{=} \hlkwd{xgb.load}\hlstd{(}\hlstr{'model.save'}\hlstd{)}
|
||||
\hlstd{pred} \hlkwb{<-} \hlkwd{predict}\hlstd{(bst,} \hlkwd{as.matrix}\hlstd{(iris[,}\hlnum{1}\hlopt{:}\hlnum{4}\hlstd{]))}
|
||||
\end{alltt}
|
||||
\end{kframe}
|
||||
\end{knitrout}
|
||||
|
||||
\verb@xgboost@ is the main function to train a \verb@Booster@, i.e. a model.
|
||||
\verb@predict@ does prediction on the model.
|
||||
|
||||
Here we can save the model to a binary local file, and load it when needed.
|
||||
We can't inspect the trees inside. However we have another function to save the
|
||||
model in plain text.
|
||||
\begin{knitrout}
|
||||
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
|
||||
\begin{alltt}
|
||||
\hlkwd{xgb.dump}\hlstd{(bst,} \hlstr{'model.dump'}\hlstd{)}
|
||||
\end{alltt}
|
||||
\begin{verbatim}
|
||||
## [1] TRUE
|
||||
\end{verbatim}
|
||||
\end{kframe}
|
||||
\end{knitrout}
|
||||
|
||||
The output looks like
|
||||
|
||||
\begin{verbatim}
|
||||
booster[0]:
|
||||
0:[f2<2.45] yes=1,no=2,missing=1
|
||||
1:leaf=0.147059
|
||||
2:[f3<1.65] yes=3,no=4,missing=3
|
||||
3:leaf=0.464151
|
||||
4:leaf=0.722449
|
||||
booster[1]:
|
||||
0:[f2<2.45] yes=1,no=2,missing=1
|
||||
1:leaf=0.103806
|
||||
2:[f2<4.85] yes=3,no=4,missing=3
|
||||
3:leaf=0.316341
|
||||
4:leaf=0.510365
|
||||
\end{verbatim}
|
||||
|
||||
It is important to know \verb@xgboost@'s own data type: \verb@xgb.DMatrix@.
|
||||
It speeds up \verb@xgboost@, and is needed for advanced features such as
|
||||
training from initial prediction value, weighted training instance.
|
||||
|
||||
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
|
||||
\begin{knitrout}
|
||||
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
|
||||
\begin{alltt}
|
||||
\hlstd{iris.mat} \hlkwb{<-} \hlkwd{as.matrix}\hlstd{(iris[,}\hlnum{1}\hlopt{:}\hlnum{4}\hlstd{])}
|
||||
\hlstd{iris.label} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(iris[,}\hlnum{5}\hlstd{]}\hlopt{==}\hlstr{'setosa'}\hlstd{)}
|
||||
\hlstd{diris} \hlkwb{<-} \hlkwd{xgb.DMatrix}\hlstd{(iris.mat,} \hlkwc{label} \hlstd{= iris.label)}
|
||||
\hlkwd{class}\hlstd{(diris)}
|
||||
\end{alltt}
|
||||
\begin{verbatim}
|
||||
## [1] "xgb.DMatrix"
|
||||
\end{verbatim}
|
||||
\begin{alltt}
|
||||
\hlkwd{getinfo}\hlstd{(diris,}\hlstr{'label'}\hlstd{)}
|
||||
\end{alltt}
|
||||
\begin{verbatim}
|
||||
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
|
||||
## [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
## [71] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
## [106] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
## [141] 0 0 0 0 0 0 0 0 0 0
|
||||
\end{verbatim}
|
||||
\end{kframe}
|
||||
\end{knitrout}
|
||||
|
||||
We can also save the matrix to a binary file. Then load it simply with
|
||||
\verb@xgb.DMatrix@
|
||||
\begin{knitrout}
|
||||
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
|
||||
\begin{alltt}
|
||||
\hlkwd{xgb.DMatrix.save}\hlstd{(diris,} \hlstr{'iris.xgb.DMatrix'}\hlstd{)}
|
||||
\end{alltt}
|
||||
\begin{verbatim}
|
||||
## 150x4 matrix with 600 entries is saved to iris.xgb.DMatrix
|
||||
## [1] TRUE
|
||||
\end{verbatim}
|
||||
\begin{alltt}
|
||||
\hlstd{diris} \hlkwb{=} \hlkwd{xgb.DMatrix}\hlstd{(}\hlstr{'iris.xgb.DMatrix'}\hlstd{)}
|
||||
\end{alltt}
|
||||
\begin{verbatim}
|
||||
## 150x4 matrix with 600 entries is loaded from iris.xgb.DMatrix
|
||||
\end{verbatim}
|
||||
\end{kframe}
|
||||
\end{knitrout}
|
||||
|
||||
\section{Advanced Examples}
|
||||
|
||||
The function \verb@xgboost@ is a simple function with less parameter, in order
|
||||
to be R-friendly. The core training function is wrapped in \verb@xgb.train@. It is more flexible than \verb@xgboost@, but it requires users to read the document a bit more carefully.
|
||||
|
||||
\verb@xgb.train@ only accept a \verb@xgb.DMatrix@ object as its input, while it supports advanced features as custom objective and evaluation functions.
|
||||
|
||||
\begin{knitrout}
|
||||
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
|
||||
\begin{alltt}
|
||||
\hlstd{logregobj} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{preds}\hlstd{,} \hlkwc{dtrain}\hlstd{) \{}
|
||||
\hlstd{labels} \hlkwb{<-} \hlkwd{getinfo}\hlstd{(dtrain,} \hlstr{"label"}\hlstd{)}
|
||||
\hlstd{preds} \hlkwb{<-} \hlnum{1}\hlopt{/}\hlstd{(}\hlnum{1} \hlopt{+} \hlkwd{exp}\hlstd{(}\hlopt{-}\hlstd{preds))}
|
||||
\hlstd{grad} \hlkwb{<-} \hlstd{preds} \hlopt{-} \hlstd{labels}
|
||||
\hlstd{hess} \hlkwb{<-} \hlstd{preds} \hlopt{*} \hlstd{(}\hlnum{1} \hlopt{-} \hlstd{preds)}
|
||||
\hlkwd{return}\hlstd{(}\hlkwd{list}\hlstd{(}\hlkwc{grad} \hlstd{= grad,} \hlkwc{hess} \hlstd{= hess))}
|
||||
\hlstd{\}}
|
||||
|
||||
\hlstd{evalerror} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{preds}\hlstd{,} \hlkwc{dtrain}\hlstd{) \{}
|
||||
\hlstd{labels} \hlkwb{<-} \hlkwd{getinfo}\hlstd{(dtrain,} \hlstr{"label"}\hlstd{)}
|
||||
\hlstd{err} \hlkwb{<-} \hlkwd{sqrt}\hlstd{(}\hlkwd{mean}\hlstd{((preds}\hlopt{-}\hlstd{labels)}\hlopt{^}\hlnum{2}\hlstd{))}
|
||||
\hlkwd{return}\hlstd{(}\hlkwd{list}\hlstd{(}\hlkwc{metric} \hlstd{=} \hlstr{"MSE"}\hlstd{,} \hlkwc{value} \hlstd{= err))}
|
||||
\hlstd{\}}
|
||||
|
||||
\hlstd{dtest} \hlkwb{<-} \hlkwd{slice}\hlstd{(diris,}\hlnum{1}\hlopt{:}\hlnum{100}\hlstd{)}
|
||||
\hlstd{watchlist} \hlkwb{<-} \hlkwd{list}\hlstd{(}\hlkwc{eval} \hlstd{= dtest,} \hlkwc{train} \hlstd{= diris)}
|
||||
\hlstd{param} \hlkwb{<-} \hlkwd{list}\hlstd{(}\hlkwc{max_depth} \hlstd{=} \hlnum{2}\hlstd{,} \hlkwc{eta} \hlstd{=} \hlnum{1}\hlstd{,} \hlkwc{silent} \hlstd{=} \hlnum{1}\hlstd{)}
|
||||
|
||||
\hlstd{bst} \hlkwb{<-} \hlkwd{xgb.train}\hlstd{(param, diris,} \hlkwc{nround} \hlstd{=} \hlnum{2}\hlstd{, watchlist, logregobj, evalerror)}
|
||||
\end{alltt}
|
||||
\begin{verbatim}
|
||||
## [1] eval-MSE:1.601 train-MSE:1.76
|
||||
## [2] eval-MSE:2.567 train-MSE:2.745
|
||||
\end{verbatim}
|
||||
\end{kframe}
|
||||
\end{knitrout}
|
||||
|
||||
The gradient and second order gradient is required for the output of customized
|
||||
objective function.
|
||||
|
||||
We also have \verb@slice@ for row extraction. It is useful in
|
||||
cross-validation.
|
||||
|
||||
For a walkthrough demo, please see \verb@R-package/inst/examples/demo.R@ for further
|
||||
details.
|
||||
|
||||
\section{The Higgs Boson competition}
|
||||
|
||||
We have made a demo for \href{http://www.kaggle.com/c/higgs-boson}{the Higgs
|
||||
Boson Machine Learning Challenge}.
|
||||
|
||||
Here are the instructions to make a submission
|
||||
\begin{enumerate}
|
||||
\item Download the \href{http://www.kaggle.com/c/higgs-boson/data}{datasets}
|
||||
and extract them to \verb@data/@.
|
||||
\item Run scripts under \verb@xgboost/demo/kaggle-higgs/@:
|
||||
\href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R}{higgs-train.R}
|
||||
and \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/higgs-pred.R}{higgs-pred.R}.
|
||||
The computation will take less than a minute on Intel i7.
|
||||
\item Go to the \href{http://www.kaggle.com/c/higgs-boson/submissions/attach}{submission page}
|
||||
and submit your result.
|
||||
\end{enumerate}
|
||||
|
||||
We provide \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R}{a script}
|
||||
to compare the time cost on the higgs dataset with \verb@gbm@ and \verb@xgboost@.
|
||||
The training set contains 350000 records and 30 features.
|
||||
|
||||
\verb@xgboost@ can automatically do parallel computation. On a machine with Intel
|
||||
i7-4700MQ and 24GB memories, we found that \verb@xgboost@ costs about 35 seconds, which is about 20 times faster
|
||||
than \verb@gbm@. When we limited \verb@xgboost@ to use only one thread, it was
|
||||
still about two times faster than \verb@gbm@.
|
||||
|
||||
Meanwhile, the result from \verb@xgboost@ reaches
|
||||
\href{http://www.kaggle.com/c/higgs-boson/details/evaluation}{3.60@AMS} with a
|
||||
single model. This results stands in the
|
||||
\href{http://www.kaggle.com/c/higgs-boson/leaderboard}{top 30\%} of the
|
||||
competition.
|
||||
|
||||
\bibliographystyle{jss}
|
||||
\nocite{*} % list uncited references
|
||||
\bibliography{xgboost}
|
||||
|
||||
\end{document}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user