From 84607a34a5d5f5e35c4b00ebc594c46e18d69893 Mon Sep 17 00:00:00 2001 From: hetong Date: Fri, 29 Aug 2014 22:40:07 -0700 Subject: [PATCH 01/16] refine vignette --- R-package/R/xgb.save.R | 3 +- R-package/inst/doc/xgboost.Rnw | 56 +++++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R index a3a3ca6a9..64add0ca9 100644 --- a/R-package/R/xgb.save.R +++ b/R-package/R/xgb.save.R @@ -21,6 +21,7 @@ xgb.save <- function(model, fname) { .Call("XGBoosterSaveModel_R", model, fname, PACKAGE = "xgboost") return(TRUE) } - stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster") + stop("xgb.save: the input must be xgb.Booster. Use xgb.DMatrix.save to save + xgb.DMatrix object.") return(FALSE) } diff --git a/R-package/inst/doc/xgboost.Rnw b/R-package/inst/doc/xgboost.Rnw index 8fabff2ab..acdbbde25 100644 --- a/R-package/inst/doc/xgboost.Rnw +++ b/R-package/inst/doc/xgboost.Rnw @@ -7,9 +7,6 @@ \usepackage{indentfirst} \usepackage[utf8]{inputenc} -\DeclareMathOperator{\var}{var} -\DeclareMathOperator{\cov}{cov} - % \VignetteIndexEntry{xgboost} \begin{document} @@ -25,15 +22,17 @@ foo <- packageDescription("xgboost") \section{Introduction} -This is an example of using the \verb@xgboost@ package in R. +This is an introductory document of using the \verb@xgboost@ package in R. -\verb@xgboost@ is short for eXtreme Gradient Boosting (Tree). It supports -regression and classification analysis on different types of input datasets. +\verb@xgboost@ is short for eXtreme Gradient Boosting (Tree). It is an efficient + and scalable implementation of \cite{gbm}. It supports regression and +classification analysis on different types of input datasets. -Comparing to \verb@gbm@ in R, it has several features: +It has several features: \begin{enumerate} \item{Speed: }{\verb@xgboost@ can automatically do parallel computation on - Windows and Linux, with openmp.} + Windows and Linux, with openmp. It is generally over 10 times faster than + \verb@gbm@.} \item{Input Type: }{\verb@xgboost@ takes several types of input data:} \begin{itemize} \item{Dense Matrix: }{R's dense matrix, i.e. \verb@matrix@} @@ -41,8 +40,8 @@ Comparing to \verb@gbm@ in R, it has several features: \item{Data File: }{Local data files} \item{xgb.DMatrix: }{\verb@xgboost@'s own class. Recommended.} \end{itemize} - \item{Regularization: }{\verb@xgboost@ supports regularization for - $L_1,L_2$ term on weights and $L_2$ term on bias.} + \item{Sparsity: }{\verb@xgboost@ accepts sparse input for both tree booster + and linear booster.} \item{Customization: }{\verb@xgboost@ supports customized objective function and evaluation function} \item{Performance: }{\verb@xgboost@ has better performance on several different @@ -62,7 +61,6 @@ bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), xgb.save(bst, 'model.save') bst = xgb.load('model.save') pred <- predict(bst, as.matrix(iris[,1:4])) -hist(pred) @ \verb@xgboost@ is the main function to train a \verb@Booster@, i.e. a model. @@ -149,14 +147,14 @@ objective function. We also have \verb@slice@ for row extraction. It is useful in cross-validation. +For a walkthrough demo, please see \verb@R-package/demo/demo.R@ for further +details. + \section{The Higgs Boson competition} We have made a demo for \href{http://www.kaggle.com/c/higgs-boson}{the Higgs Boson Machine Learning Challenge}. -Our result reaches 3.60 with a single model. This results stands in the top 30% -of the competition. - Here are the instructions to make a submission \begin{enumerate} \item Download the \href{http://www.kaggle.com/c/higgs-boson/data}{datasets} @@ -169,5 +167,35 @@ Here are the instructions to make a submission and submit your result. \end{enumerate} +We provide \href{https://github.com/tqchen/xgboost/blob/master/demo/kaggle-higgs/speedtest.R}{a script} +to compare the time cost on the higgs dataset with \verb@gbm@ and \verb@xgboost@. +The training set contains 350000 records and 30 features. + +\verb@xgboost@ can automatically do parallel computation. On a machine with Intel +i7-4700MQ and 24GB memories, we found that \verb@xgboost@ costs about 35 seconds, which is about 20 times faster +than \verb@gbm@. When we limited \verb@xgboost@ to use only one thread, it was +still about two times faster than \verb@gbm@. + +Meanwhile, the result from \verb@xgboost@ reaches +\href{http://www.kaggle.com/c/higgs-boson/details/evaluation}{3.60@AMS} with a +single model. This results stands in the +\href{http://www.kaggle.com/c/higgs-boson/leaderboard}{top 30\%} of the +competition. + + +\begin{thebibliography}{} + +\bibitem[Friedman et al.(2001)Friedman, Jerome H.]{gbm} +Friedman, Jerome H. (2001). +\newblock Greedy function approximation: a gradient boosting machine. +\newblock In \emph{ Annals of Statistics} (2001): 1189-1232. + +\bibitem[Friedman(2000)]{logitboost} +Friedman, Jerome, Trevor Hastie, and Robert Tibshirani. (2000). +\newblock Additive logistic regression: a statistical view of boosting (with discussion and a rejoinder by the authors). +\newblock \emph{The annals of statistics} 28.2 (2000):337-407. + +\end{thebibliography} + \end{document} From 86e852d1da963af76e542231c3d7e4c6432cc4cd Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 30 Aug 2014 09:31:14 -0700 Subject: [PATCH 02/16] edit the doc --- R-package/inst/doc/xgboost.Rnw | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/R-package/inst/doc/xgboost.Rnw b/R-package/inst/doc/xgboost.Rnw index acdbbde25..fde6181e6 100644 --- a/R-package/inst/doc/xgboost.Rnw +++ b/R-package/inst/doc/xgboost.Rnw @@ -24,11 +24,12 @@ foo <- packageDescription("xgboost") This is an introductory document of using the \verb@xgboost@ package in R. -\verb@xgboost@ is short for eXtreme Gradient Boosting (Tree). It is an efficient - and scalable implementation of \cite{gbm}. It supports regression and -classification analysis on different types of input datasets. - -It has several features: +\verb@xgboost@ is short for eXtreme Gradient Boosting package. It is an efficient + and scalable implementation of gradient boosting framework by \cite{gbm}. +The package includes efficient linear model solver and tree learning algorithm. +It supports various objective functions, including regression, classification +and ranking. The package is made to be extendible, so that user are also allowed +to define there own objectives easily. It has several features: \begin{enumerate} \item{Speed: }{\verb@xgboost@ can automatically do parallel computation on Windows and Linux, with openmp. It is generally over 10 times faster than @@ -41,12 +42,11 @@ It has several features: \item{xgb.DMatrix: }{\verb@xgboost@'s own class. Recommended.} \end{itemize} \item{Sparsity: }{\verb@xgboost@ accepts sparse input for both tree booster - and linear booster.} + and linear booster, and is optimized for sparse input.} \item{Customization: }{\verb@xgboost@ supports customized objective function and evaluation function} \item{Performance: }{\verb@xgboost@ has better performance on several different - datasets. Its rising popularity and fame in different Kaggle competitions - is the evidence.} + datasets.} \end{enumerate} \section{Example with iris} @@ -91,7 +91,8 @@ booster[1]: \end{verbatim} It is important to know \verb@xgboost@'s own data type: \verb@xgb.DMatrix@. -It speeds up \verb@xgboost@. +It speeds up \verb@xgboost@, and is needed for advanced features such as +training from initial prediction value, weighted training instance. We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object: <>= @@ -117,7 +118,7 @@ is more flexible than \verb@xgboost@, but it requires users to read the document a bit more carefully. \verb@xgb.train@ only accept a \verb@xgb.DMatrix@ object as its input, while it -supports some additional features as custom objective and evaluation functions. +supports advanced features as custom objective and evaluation functions. <>= logregobj <- function(preds, dtrain) { From 784ab8d02c2ec1a14369329e54d644077c713ad6 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 30 Aug 2014 09:58:14 -0700 Subject: [PATCH 03/16] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c81059773..ba4b08bfd 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Build Version ====== -* This version is named xgboost-unity, the code has been refactored from 0.2x to be cleaner and more flexibility +* This version xgboost-0.3, the code has been refactored from 0.2x to be cleaner and more flexibility * This version of xgboost is not compatible with 0.2x, due to huge amount of changes in code structure - This means the model and buffer file of previous version can not be loaded in xgboost-unity * For legacy 0.2x code, refer to [Here](https://github.com/tqchen/xgboost/releases/tag/v0.22) From 7845ee0c85d72cc1e392d5ed517149b2d82e1583 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 30 Aug 2014 09:58:35 -0700 Subject: [PATCH 04/16] Update CHANGES.md --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 62d21c21a..027a077c6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,7 +11,7 @@ xgboost-0.2x * Weighted samples instances * Initial version of pairwise rank -xgboost-unity +xgboost-0.3 ===== * Faster tree construction module - Allows subsample columns during tree construction via ```bst:col_samplebytree=ratio``` From 5e839f6fe7a8bbe0c43730135bab521f839b1355 Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 10:55:13 -0700 Subject: [PATCH 05/16] change location and template of vignette --- R-package/{inst/doc => vignettes}/xgboost.Rnw | 75 ++++++++++++------- R-package/vignettes/xgboost.bib | 20 +++++ 2 files changed, 66 insertions(+), 29 deletions(-) rename R-package/{inst/doc => vignettes}/xgboost.Rnw (81%) create mode 100644 R-package/vignettes/xgboost.bib diff --git a/R-package/inst/doc/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw similarity index 81% rename from R-package/inst/doc/xgboost.Rnw rename to R-package/vignettes/xgboost.Rnw index fde6181e6..be2cfea22 100644 --- a/R-package/inst/doc/xgboost.Rnw +++ b/R-package/vignettes/xgboost.Rnw @@ -1,31 +1,59 @@ \documentclass{article} - -\usepackage{natbib} -\usepackage{graphics} -\usepackage{amsmath} +\RequirePackage{url} \usepackage{hyperref} -\usepackage{indentfirst} -\usepackage[utf8]{inputenc} +\RequirePackage{amsmath} +\RequirePackage{natbib} +\RequirePackage[a4paper,lmargin={1.25in},rmargin={1.25in},tmargin={1in},bmargin={1in}]{geometry} -% \VignetteIndexEntry{xgboost} +\makeatletter +% \VignetteIndexEntry{xgboost: eXtreme Gradient Boosting} +%\VignetteKeywords{xgboost, gbm, gradient boosting machines} +%\VignettePackage{xgboost} +% \VignetteEngine{knitr::knitr} +\makeatother \begin{document} +%\SweaveOpts{concordance=TRUE} -<>= -options(keep.source = TRUE, width = 60) -foo <- packageDescription("xgboost") +<>= +if (require('knitr')) opts_chunk$set(fig.width = 5, fig.height = 5, fig.align = 'center', tidy = FALSE, warning = FALSE, cache = TRUE) @ -\title{xgboost Package Example (Version \Sexpr{foo$Version})} -\author{Tong He} -\maketitle +% +<>= +xgboost.version = '0.3-0' +@ +% + + \begin{center} + \vspace*{6\baselineskip} + \rule{\textwidth}{1.6pt}\vspace*{-\baselineskip}\vspace*{2pt} + \rule{\textwidth}{0.4pt}\\[2\baselineskip] + {\LARGE \textbf{xgboost: eXtreme Gradient Boosting}}\\[1.2\baselineskip] + \rule{\textwidth}{0.4pt}\vspace*{-\baselineskip}\vspace{3.2pt} + \rule{\textwidth}{1.6pt}\\[2\baselineskip] + {\Large Tianqi Chen, Tong He}\\[\baselineskip] + {\large Package Version: \Sexpr{xgboost.version}}\\[\baselineskip] + {\large \today}\par + \vfill + \begin{figure}[h] + \centering + \includegraphics[width=0.4\textwidth]{fig/sfu-logo.pdf} + \end{figure} + \end{center} + +\thispagestyle{empty} + +\clearpage + +\setcounter{page}{1} \section{Introduction} This is an introductory document of using the \verb@xgboost@ package in R. \verb@xgboost@ is short for eXtreme Gradient Boosting package. It is an efficient - and scalable implementation of gradient boosting framework by \cite{gbm}. + and scalable implementation of gradient boosting framework by \citep{friedman2001greedy}. The package includes efficient linear model solver and tree learning algorithm. It supports various objective functions, including regression, classification and ranking. The package is made to be extendible, so that user are also allowed @@ -49,6 +77,7 @@ to define there own objectives easily. It has several features: datasets.} \end{enumerate} + \section{Example with iris} In this section, we will illustrate some common usage of \verb@xgboost@. @@ -183,20 +212,8 @@ single model. This results stands in the \href{http://www.kaggle.com/c/higgs-boson/leaderboard}{top 30\%} of the competition. - -\begin{thebibliography}{} - -\bibitem[Friedman et al.(2001)Friedman, Jerome H.]{gbm} -Friedman, Jerome H. (2001). -\newblock Greedy function approximation: a gradient boosting machine. -\newblock In \emph{ Annals of Statistics} (2001): 1189-1232. - -\bibitem[Friedman(2000)]{logitboost} -Friedman, Jerome, Trevor Hastie, and Robert Tibshirani. (2000). -\newblock Additive logistic regression: a statistical view of boosting (with discussion and a rejoinder by the authors). -\newblock \emph{The annals of statistics} 28.2 (2000):337-407. - -\end{thebibliography} - +\bibliographystyle{jss} +\nocite{*} % list uncited references +\bibliography{xgboost} \end{document} diff --git a/R-package/vignettes/xgboost.bib b/R-package/vignettes/xgboost.bib new file mode 100644 index 000000000..f69866f04 --- /dev/null +++ b/R-package/vignettes/xgboost.bib @@ -0,0 +1,20 @@ +@article{friedman2001greedy, + title={Greedy function approximation: a gradient boosting machine}, + author={Friedman, Jerome H}, + journal={Annals of Statistics}, + pages={1189--1232}, + year={2001}, + publisher={JSTOR} +} + +@article{friedman2000additive, + title={Additive logistic regression: a statistical view of boosting (with discussion and a rejoinder by the authors)}, + author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert and others}, + journal={The annals of statistics}, + volume={28}, + number={2}, + pages={337--407}, + year={2000}, + publisher={Institute of Mathematical Statistics} +} + From efe8b38a35bcfbb5652f3970f68d44417ec34935 Mon Sep 17 00:00:00 2001 From: Tong He Date: Sat, 30 Aug 2014 11:24:15 -0700 Subject: [PATCH 06/16] fix error in demo --- R-package/R/getinfo.xgb.DMatrix.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/getinfo.xgb.DMatrix.R b/R-package/R/getinfo.xgb.DMatrix.R index 4fa8d58fa..5b438049c 100644 --- a/R-package/R/getinfo.xgb.DMatrix.R +++ b/R-package/R/getinfo.xgb.DMatrix.R @@ -11,7 +11,7 @@ setClass('xgb.DMatrix') #' data(iris) #' iris[,5] <- as.numeric(iris[,5]) #' dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) -#' labels <- getinfo(dtest, "label") +#' labels <- getinfo(dtrain, "label") #' @export #' getinfo <- function(object, ...){ From 6d36e8460d2f215f713c895c5dee2173523060b2 Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 11:28:10 -0700 Subject: [PATCH 07/16] change getinfo Rd --- R-package/man/getinfo.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index beee4f850..4f63b5e92 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -17,6 +17,6 @@ Get information of an xgb.DMatrix object data(iris) iris[,5] <- as.numeric(iris[,5]) dtrain <- xgb.DMatrix(as.matrix(iris[,1:4]), label=iris[,5]) -labels <- getinfo(dtest, "label") +labels <- getinfo(dtrain, "label") } From 1b7de855e96d0e01fdadff8e71f3f0ea7a0d183e Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 11:53:58 -0700 Subject: [PATCH 08/16] remove logo --- R-package/vignettes/xgboost.Rnw | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw index be2cfea22..ed4447d57 100644 --- a/R-package/vignettes/xgboost.Rnw +++ b/R-package/vignettes/xgboost.Rnw @@ -36,10 +36,6 @@ xgboost.version = '0.3-0' {\large Package Version: \Sexpr{xgboost.version}}\\[\baselineskip] {\large \today}\par \vfill - \begin{figure}[h] - \centering - \includegraphics[width=0.4\textwidth]{fig/sfu-logo.pdf} - \end{figure} \end{center} \thispagestyle{empty} From 70cdd2787c690b516fd3b5a4cc884418e813bbf9 Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 12:02:01 -0700 Subject: [PATCH 09/16] add 00Index --- R-package/demo/00Index | 1 + 1 file changed, 1 insertion(+) create mode 100644 R-package/demo/00Index diff --git a/R-package/demo/00Index b/R-package/demo/00Index new file mode 100644 index 000000000..2ca4abd32 --- /dev/null +++ b/R-package/demo/00Index @@ -0,0 +1 @@ +demo R code for xgboost usages on agaricus data From 51ef32d73ad9b65e9787e3dab2be0a2419ea4667 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 30 Aug 2014 12:03:32 -0700 Subject: [PATCH 10/16] chg makefile --- R-package/src/Makevars | 7 +++---- R-package/src/Makevars.win | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/R-package/src/Makevars b/R-package/src/Makevars index 7dfda4d57..969571290 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -2,11 +2,10 @@ PKGROOT=../../ # _*_ mode: Makefile; _*_ CXX=`R CMD config CXX` -CFLAGS=`R CMD config CFLAGS` +TCFLAGS=`R CMD config CFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(CFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) - +XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) ifeq ($(no_omp),1) @@ -25,7 +24,7 @@ xgboost_io.o: $(PKGROOT)/src/io/io.cpp xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp -$(CXXOBJ) : +$(CXXOBJ) : $(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) ) clean: diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 3df9891fc..3015a1982 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -2,10 +2,10 @@ PKGROOT=../../ # _*_ mode: Makefile; _*_ CXX=`Rcmd config CXX` -CFLAGS=`Rcmd config CFLAGS` +TCFLAGS=`Rcmd config CFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(CFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) ifeq ($(no_omp),1) From 202a17f148cbbcdadf1a333e2e2845b0f7005783 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 30 Aug 2014 12:10:50 -0700 Subject: [PATCH 11/16] fix windows --- R-package/src/Makevars.win | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 3015a1982..b8e5a8089 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -5,7 +5,7 @@ CXX=`Rcmd config CXX` TCFLAGS=`Rcmd config CFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) +GBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) ifeq ($(no_omp),1) From f9fc1aec2ffb92eca88a1aeea8c0586fa7bece39 Mon Sep 17 00:00:00 2001 From: hetong Date: Sat, 30 Aug 2014 12:11:15 -0700 Subject: [PATCH 12/16] modify licence and desc to standard format --- R-package/DESCRIPTION | 2 +- R-package/LICENSE | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 R-package/LICENSE diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 16a007c0a..7d60143bd 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -6,7 +6,7 @@ Date: 2014-08-23 Author: Tianqi Chen, Tong He Maintainer: Tianqi Chen , Tong He Description: xgboost -License: See LICENSE file in the project root of xgboost. +License: file LICENSE URL: https://github.com/tqchen/xgboost BugReports: https://github.com/tqchen/xgboost/issues Depends: diff --git a/R-package/LICENSE b/R-package/LICENSE new file mode 100644 index 000000000..b9f38c38a --- /dev/null +++ b/R-package/LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2014 by Tianqi Chen and Contributors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. From 99c44f2e51e524c7a517d7e810e71880d4b3a0a4 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 30 Aug 2014 12:25:41 -0700 Subject: [PATCH 13/16] fix makefile in win --- R-package/src/Makevars.win | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index b8e5a8089..d343b94c5 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -5,7 +5,7 @@ CXX=`Rcmd config CXX` TCFLAGS=`Rcmd config CFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -GBFLAG= $(TCFLAGS) -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) ifeq ($(no_omp),1) @@ -20,12 +20,13 @@ all: $(SHLIB) $(SHLIB): $(OBJECTS) xgboost_wrapper.o: $(PKGROOT)/wrapper/xgboost_wrapper.cpp -xgboost_io.o: $(PKGROOT)/src/io/io.cpp +xgboost_io.o: $(PKGROOT)/src/io/io.cpp xgboost_gbm.o: $(PKGROOT)/src/gbm/gbm.cpp xgboost_updater.o: $(PKGROOT)/src/tree/updater.cpp -$(CXXOBJ) : - $(CXX) -c $(PKG_CPPFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) +$(CXXOBJ) : + $(CXX) -c $(XGBFLAG) -o $@ $(firstword $(filter %.cpp %.c, $^) ) clean: rm -rf *.so *.o *~ *.dll + \ No newline at end of file From 3f7aeb22c5a1575b9c2442f05784558a90a16494 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 30 Aug 2014 12:40:51 -0700 Subject: [PATCH 14/16] fix some windows type conversion warning --- R-package/src/Makevars.win | 2 +- src/io/simple_dmatrix-inl.hpp | 15 +++++++++++---- src/learner/dmatrix.h | 3 ++- src/learner/learner-inl.hpp | 5 +++-- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index d343b94c5..d06076def 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -5,7 +5,7 @@ CXX=`Rcmd config CXX` TCFLAGS=`Rcmd config CFLAGS` # expose these flags to R CMD SHLIB PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_ERROR_ -I$(PKGROOT) $(SHLIB_OPENMP_CFLAGS) -XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) +XGBFLAG= -O3 -DXGBOOST_CUSTOMIZE_ERROR_ -fPIC $(SHLIB_OPENMP_CFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) ifeq ($(no_omp),1) diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp index 47be8a41a..5c1093540 100644 --- a/src/io/simple_dmatrix-inl.hpp +++ b/src/io/simple_dmatrix-inl.hpp @@ -105,7 +105,9 @@ class DMatrixSimple : public DataMatrix { if (!silent) { printf("%lux%lu matrix with %lu entries is loaded from %s\n", - info.num_row(), info.num_col(), row_data_.size(), fname); + static_cast(info.num_row()), + static_cast(info.num_col()), + static_cast(row_data_.size()), fname); } fclose(file); // try to load in additional file @@ -155,7 +157,9 @@ class DMatrixSimple : public DataMatrix { if (!silent) { printf("%lux%lu matrix with %lu entries is loaded", - info.num_row(), info.num_col(), row_data_.size()); + static_cast(info.num_row()), + static_cast(info.num_col()), + static_cast(row_data_.size())); if (fname != NULL) { printf(" from %s\n", fname); } else { @@ -183,9 +187,12 @@ class DMatrixSimple : public DataMatrix { if (!silent) { printf("%lux%lu matrix with %lu entries is saved to %s\n", - info.num_row(), info.num_col(), row_data_.size(), fname); + static_cast(info.num_row()), + static_cast(info.num_col()), + static_cast(row_data_.size()), fname); if (info.group_ptr.size() != 0) { - printf("data contains %lu groups\n", info.group_ptr.size()-1); + printf("data contains %u groups\n", + static_cast(info.group_ptr.size()-1)); } } } diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h index cd897f1d5..542b6f6f5 100644 --- a/src/learner/dmatrix.h +++ b/src/learner/dmatrix.h @@ -98,7 +98,8 @@ struct MetaInfo { group_ptr.push_back(group_ptr.back()+nline); } if (!silent) { - printf("%lu groups are loaded from %s\n", group_ptr.size()-1, fname); + printf("%u groups are loaded from %s\n", + static_cast(group_ptr.size()-1), fname); } fclose(fi); return true; diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index c01e2ec15..8e7bce0a8 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -66,10 +66,11 @@ class BoostLearner { snprintf(str_temp, sizeof(str_temp), "%u", num_feature); this->SetParam("bst:num_feature", str_temp); } - snprintf(str_temp, sizeof(str_temp), "%lu", buffer_size); + snprintf(str_temp, sizeof(str_temp), "%lu", + static_cast(buffer_size)); this->SetParam("num_pbuffer", str_temp); if (!silent) { - printf("buffer_size=%ld\n", buffer_size); + printf("buffer_size=%ld\n", static_cast(buffer_size)); } } /*! From 6e054e8fa4c6b663f9eddf161bd5bfc7a77d2197 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 30 Aug 2014 12:45:46 -0700 Subject: [PATCH 15/16] fix indent --- src/io/simple_dmatrix-inl.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp index 5c1093540..8e89efdfb 100644 --- a/src/io/simple_dmatrix-inl.hpp +++ b/src/io/simple_dmatrix-inl.hpp @@ -158,8 +158,8 @@ class DMatrixSimple : public DataMatrix { if (!silent) { printf("%lux%lu matrix with %lu entries is loaded", static_cast(info.num_row()), - static_cast(info.num_col()), - static_cast(row_data_.size())); + static_cast(info.num_col()), + static_cast(row_data_.size())); if (fname != NULL) { printf(" from %s\n", fname); } else { @@ -188,11 +188,11 @@ class DMatrixSimple : public DataMatrix { if (!silent) { printf("%lux%lu matrix with %lu entries is saved to %s\n", static_cast(info.num_row()), - static_cast(info.num_col()), - static_cast(row_data_.size()), fname); + static_cast(info.num_col()), + static_cast(row_data_.size()), fname); if (info.group_ptr.size() != 0) { printf("data contains %u groups\n", - static_cast(info.group_ptr.size()-1)); + static_cast(info.group_ptr.size()-1)); } } } From 2c1aabf6b09067459466ca1cf614c9935f23c386 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 30 Aug 2014 12:47:04 -0700 Subject: [PATCH 16/16] fix indent --- src/io/simple_dmatrix-inl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp index 8e89efdfb..df8bd9fee 100644 --- a/src/io/simple_dmatrix-inl.hpp +++ b/src/io/simple_dmatrix-inl.hpp @@ -106,8 +106,8 @@ class DMatrixSimple : public DataMatrix { if (!silent) { printf("%lux%lu matrix with %lu entries is loaded from %s\n", static_cast(info.num_row()), - static_cast(info.num_col()), - static_cast(row_data_.size()), fname); + static_cast(info.num_col()), + static_cast(row_data_.size()), fname); } fclose(file); // try to load in additional file