diff --git a/.gitignore b/.gitignore index 276ed2d54..27ff1a764 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,6 @@ *group *rar *vali -*data *sdf Release *exe* @@ -36,7 +35,6 @@ ipch *log Debug *suo -*test* .Rhistory *.dll *i386 @@ -51,12 +49,9 @@ Debug ./xgboost ./xgboost.mpi ./xgboost.mock -rabit #.Rbuildignore R-package.Rproj *.cache* -R-package/inst -R-package/src #java java/xgboost4j/target java/xgboost4j/tmp @@ -65,9 +60,13 @@ java/xgboost4j-demo/data/ java/xgboost4j-demo/tmp/ java/xgboost4j-demo/model/ nb-configuration* -dmlc-core # Eclipse .project .cproject .pydevproject .settings/ +build +config.mk +xgboost +*.data +build_plugin diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..b2321b41f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "dmlc-core"] + path = dmlc-core + url = https://github.com/dmlc/dmlc-core +[submodule "rabit"] + path = rabit + url = https://github.com/dmlc/rabit diff --git a/.travis.yml b/.travis.yml index c7049be94..4f09eb083 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ -sudo: true +# disable sudo for container build. +sudo: false # Enabling test on Linux and OS X os: @@ -8,51 +9,60 @@ os: # Use Build Matrix to do lint and build seperately env: matrix: - - TASK=lint LINT_LANG=cpp - - TASK=lint LINT_LANG=python - - TASK=R-package CXX=g++ - - TASK=python-package CXX=g++ - - TASK=python-package3 CXX=g++ - - TASK=java-package CXX=g++ - - TASK=build CXX=g++ - - TASK=build-with-dmlc CXX=g++ + # code lint + - TASK=lint + # r package test + - TASK=r_test + # python package test + - TASK=python_test + # java package test + - TASK=java_test os: - linux - osx +matrix: + exclude: + - os: osx + env: TASK=lint + - os: linux + env: TASK=r_test + - os: osx + env: TASK=java_test + # dependent apt packages addons: apt: packages: - doxygen - - libopenmpi-dev - wget - libcurl4-openssl-dev - unzip - - python-numpy - - python-scipy + - graphviz before_install: - - scripts/travis_osx_install.sh - - git clone https://github.com/dmlc/dmlc-core - - export TRAVIS=dmlc-core/scripts/travis/ + - source dmlc-core/scripts/travis/travis_setup_env.sh - export PYTHONPATH=${PYTHONPATH}:${PWD}/python-package - - source ${TRAVIS}/travis_setup_env.sh install: - - pip install cpplint pylint --user `whoami` + - source tests/travis/setup.sh +script: + - tests/travis/run_test.sh -script: scripts/travis_script.sh +cache: + directories: + - ${HOME}/.cache/usr + - ${HOME}/.cache/pip +before_cache: + - dmlc-core/scripts/travis/travis_before_cache.sh after_failure: - - scripts/travis_after_failure.sh - + - tests/travis/travis_after_failure.sh notifications: email: on_success: change on_failure: always - diff --git a/Makefile b/Makefile index 84636bd71..a9ed1f96f 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,60 @@ -export CC = $(if $(shell which gcc-5 2>/dev/null),gcc-5,gcc) -export CXX = $(if $(shell which g++-5 2>/dev/null),g++-5,g++) - -export MPICXX = mpicxx -export LDFLAGS= -pthread -lm -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -funroll-loops -# java include path -export JAVAINCFLAGS = -I${JAVA_HOME}/include -I./java - -ifeq ($(OS), Windows_NT) - export CXX = g++ -m64 - export CC = gcc -m64 +ifndef config +ifneq ("$(wildcard ./config.mk)","") + config = config.mk +else + config = make/config.mk +endif endif -UNAME= $(shell uname) +ifndef DMLC_CORE + DMLC_CORE = dmlc-core +endif + +ifndef RABIT + RABIT = rabit +endif + +ROOTDIR = $(CURDIR) + +ifeq ($(OS), Windows_NT) + UNAME="Windows" +else + UNAME=$(shell uname) +endif + +include $(config) +ifeq ($(USE_OPENMP), 0) + export NO_OPENMP = 1 +endif +include $(DMLC_CORE)/make/dmlc.mk + +# include the plugins +include $(XGB_PLUGINS) + +# use customized config file +ifndef CC +export CC = $(if $(shell which gcc-5),gcc-5,gcc) +endif +ifndef CXX +export CXX = $(if $(shell which g++-5),g++-5,g++) +endif + +export LDFLAGS= -pthread -lm $(ADD_LDFLAGS) $(DMLC_LDFLAGS) $(PLUGIN_LDFLAGS) +export CFLAGS= -std=c++0x -Wall -O3 -msse2 -Wno-unknown-pragmas -funroll-loops -Iinclude $(ADD_CFLAGS) $(PLUGIN_CFLAGS) +CFLAGS += -I$(DMLC_CORE)/include -I$(RABIT)/include +#java include path +export JAVAINCFLAGS = -I${JAVA_HOME}/include -I./java + +ifndef LINT_LANG + LINT_LANG= "all" +endif + +ifneq ($(UNAME), Windows) + CFLAGS += -fPIC + XGBOOST_DYLIB = lib/libxgboost.so +else + XGBOOST_DYLIB = lib/libxgboost.dll +endif ifeq ($(UNAME), Linux) LDFLAGS += -lrt @@ -23,192 +65,115 @@ ifeq ($(UNAME), Darwin) JAVAINCFLAGS += -I${JAVA_HOME}/include/darwin endif -ifeq ($(no_omp),1) +ifeq ($(USE_OPENMP), 1) + CFLAGS += -fopenmp +else CFLAGS += -DDISABLE_OPENMP -else - #CFLAGS += -fopenmp - ifeq ($(omp_mac_static),1) - #CFLAGS += -fopenmp -Bstatic - CFLAGS += -static-libgcc -static-libstdc++ -L. -fopenmp - #LDFLAGS += -Wl,--whole-archive -lpthread -Wl --no-whole-archive - else - CFLAGS += -fopenmp - endif endif -# by default use c++11 -ifeq ($(cxx11),1) - CFLAGS += -std=c++11 -endif - -# handling dmlc -ifdef dmlc - ifndef config - ifneq ("$(wildcard $(dmlc)/config.mk)","") - config = $(dmlc)/config.mk - else - config = $(dmlc)/make/config.mk - endif - endif - include $(config) - include $(dmlc)/make/dmlc.mk - LDFLAGS+= $(DMLC_LDFLAGS) - LIBDMLC=$(dmlc)/libdmlc.a -else - LIBDMLC=dmlc_simple.o -endif - -ifndef WITH_FPIC - WITH_FPIC = 1 -endif -ifeq ($(WITH_FPIC), 1) - CFLAGS += -fPIC -endif - - -ifeq ($(OS), Windows_NT) - LIBRABIT = subtree/rabit/lib/librabit_empty.a - SLIB = wrapper/xgboost_wrapper.dll -else - LIBRABIT = subtree/rabit/lib/librabit.a - SLIB = wrapper/libxgboostwrapper.so -endif - -# java lib -JLIB = java/libxgboost4j.so - # specify tensor path -BIN = xgboost -MOCKBIN = xgboost.mock -OBJ = updater.o gbm.o io.o main.o dmlc_simple.o -MPIBIN = -ifeq ($(WITH_FPIC), 1) - TARGET = $(BIN) $(OBJ) $(SLIB) -else - TARGET = $(BIN) -endif +.PHONY: clean all lint clean_all doxygen rcpplint Rpack Rbuild Rcheck java -ifndef LINT_LANG - LINT_LANG= "all" -endif -.PHONY: clean all mpi python Rpack lint +all: lib/libxgboost.a $(XGBOOST_DYLIB) xgboost -all: $(TARGET) -mpi: $(MPIBIN) +$(DMLC_CORE)/libdmlc.a: + + cd $(DMLC_CORE); make libdmlc.a config=$(ROOTDIR)/$(config); cd $(ROOTDIR) -python: wrapper/libxgboostwrapper.so -# now the wrapper takes in two files. io and wrapper part -updater.o: src/tree/updater.cpp src/tree/*.hpp src/*.h src/tree/*.h src/utils/*.h -dmlc_simple.o: src/io/dmlc_simple.cpp src/utils/*.h -gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h -io.o: src/io/io.cpp src/io/*.hpp src/utils/*.h src/learner/dmatrix.h src/*.h -main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h -xgboost: updater.o gbm.o io.o main.o $(LIBRABIT) $(LIBDMLC) -wrapper/xgboost_wrapper.dll wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC) +$(RABIT)/lib/$(LIB_RABIT): + + cd $(RABIT); make lib/$(LIB_RABIT); cd $(ROOTDIR) java: java/libxgboost4j.so -java/libxgboost4j.so: java/xgboost4j_wrapper.cpp wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC) -# dependency on rabit -subtree/rabit/lib/librabit.a: subtree/rabit/src/engine.cc - + cd subtree/rabit;make lib/librabit.a; cd ../.. -subtree/rabit/lib/librabit_empty.a: subtree/rabit/src/engine_empty.cc - + cd subtree/rabit;make lib/librabit_empty.a; cd ../.. -subtree/rabit/lib/librabit_mock.a: subtree/rabit/src/engine_mock.cc - + cd subtree/rabit;make lib/librabit_mock.a; cd ../.. -subtree/rabit/lib/librabit_mpi.a: subtree/rabit/src/engine_mpi.cc - + cd subtree/rabit;make lib/librabit_mpi.a; cd ../.. +SRC = $(wildcard src/*.cc src/*/*.cc) +ALL_OBJ = $(patsubst src/%.cc, build/%.o, $(SRC)) $(PLUGIN_OBJS) +AMALGA_OBJ = amalgamation/xgboost-all0.o +LIB_DEP = $(DMLC_CORE)/libdmlc.a $(RABIT)/lib/$(LIB_RABIT) +ALL_DEP = $(filter-out build/cli_main.o, $(ALL_OBJ)) $(LIB_DEP) +CLI_OBJ = build/cli_main.o -$(BIN) : - $(CXX) $(CFLAGS) -fPIC -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) +build/%.o: src/%.cc + @mkdir -p $(@D) + $(CXX) $(CFLAGS) -MM -MT build/$*.o $< >build/$*.d + $(CXX) -c $(CFLAGS) -c $< -o $@ -$(MOCKBIN) : - $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) +build_plugin/%.o: plugin/%.cc + @mkdir -p $(@D) + $(CXX) $(CFLAGS) -MM -MT build_plugin/$*.o $< >build_plugin/$*.d + $(CXX) -c $(CFLAGS) -c $< -o $@ -$(SLIB) : - $(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(DLLFLAGS) +# The should be equivalent to $(ALL_OBJ) except for build/cli_main.o +amalgamation/xgboost-all0.o: amalgamation/xgboost-all0.cc + $(CXX) -c $(CFLAGS) -c $< -o $@ -$(JLIB) : - $(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(JAVAINCFLAGS) +# Equivalent to lib/libxgboost_all.so +lib/libxgboost_all.so: $(AMALGA_OBJ) $(LIB_DEP) + @mkdir -p $(@D) + $(CXX) $(CFLAGS) -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS) -$(OBJ) : - $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) +lib/libxgboost.a: $(ALL_DEP) + @mkdir -p $(@D) + ar crv $@ $(filter %.o, $?) -$(MPIOBJ) : - $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) +lib/libxgboost.dll lib/libxgboost.so: $(ALL_DEP) + @mkdir -p $(@D) + $(CXX) $(CFLAGS) -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS) -$(MPIBIN) : - $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) +java/libxgboost4j.so: java/xgboost4j_wrapper.cpp $(ALL_DEP) + $(CXX) $(CFLAGS) $(JAVAINCFLAGS) -shared -o $@ $(filter %.cpp %.o %.a, $^) $(LDFLAGS) -install: - cp -f -r $(BIN) $(INSTALL_PATH) +xgboost: $(CLI_OBJ) $(ALL_DEP) + $(CXX) $(CFLAGS) -o $@ $(filter %.o %.a, $^) $(LDFLAGS) +rcpplint: + python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src + +lint: rcpplint + python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} include src plugin + +clean: + $(RM) -rf build build_plugin lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o xgboost + +clean_all: clean + cd $(DMLC_CORE); make clean; cd $(ROODIR) + cd $(RABIT); make clean; cd $(ROODIR) + +doxygen: + doxygen doc/Doxyfile + +# Script to make a clean installable R package. Rpack: - make clean - cd subtree/rabit;make clean;cd .. + make clean_all rm -rf xgboost xgboost*.tar.gz cp -r R-package xgboost rm -rf xgboost/src/*.o xgboost/src/*.so xgboost/src/*.dll rm -rf xgboost/src/*/*.o - rm -rf subtree/rabit/src/*.o rm -rf xgboost/demo/*.model xgboost/demo/*.buffer xgboost/demo/*.txt rm -rf xgboost/demo/runall.R cp -r src xgboost/src/src - mkdir xgboost/src/subtree - mkdir xgboost/src/subtree/rabit - cp -r subtree/rabit/include xgboost/src/subtree/rabit/include - cp -r subtree/rabit/src xgboost/src/subtree/rabit/src - rm -rf xgboost/src/subtree/rabit/src/*.o - mkdir xgboost/src/wrapper - cp wrapper/xgboost_wrapper.h xgboost/src/wrapper - cp wrapper/xgboost_wrapper.cpp xgboost/src/wrapper + cp -r include xgboost/src/include + cp -r amalgamation xgboost/src/amalgamation + mkdir -p xgboost/src/rabit + cp -r rabit/include xgboost/src/rabit/include + cp -r rabit/src xgboost/src/rabit/src + rm -rf xgboost/src/rabit/src/*.o + mkdir -p xgboost/src/dmlc-core + cp -r dmlc-core/include xgboost/src/dmlc-core/include + cp -r dmlc-core/src xgboost/src/dmlc-core/src cp ./LICENSE xgboost - cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars + cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' | sed '3s/.*/ENABLE_STD_THREAD=0/' > xgboost/src/Makevars cp xgboost/src/Makevars xgboost/src/Makevars.win - # R CMD build --no-build-vignettes xgboost - # R CMD build xgboost - # rm -rf xgboost - # R CMD check --as-cran xgboost*.tar.gz Rbuild: make Rpack - R CMD build xgboost + R CMD build --no-build-vignettes xgboost rm -rf xgboost Rcheck: make Rbuild - R CMD check --as-cran xgboost*.tar.gz + R CMD check xgboost*.tar.gz -pythonpack: - #for pip maintainer only - cd subtree/rabit;make clean;cd .. - rm -rf xgboost-deploy xgboost*.tar.gz - cp -r python-package xgboost-deploy - #cp *.md xgboost-deploy/ - cp LICENSE xgboost-deploy/ - cp Makefile xgboost-deploy/xgboost - cp -r wrapper xgboost-deploy/xgboost - cp -r subtree xgboost-deploy/xgboost - cp -r multi-node xgboost-deploy/xgboost - cp -r windows xgboost-deploy/xgboost - cp -r src xgboost-deploy/xgboost - cp python-package/setup_pip.py xgboost-deploy/setup.py - #make python - -pythonbuild: - make pythonpack - python setup.py install - -pythoncheck: - make pythonbuild - python -c 'import xgboost;print xgboost.core.find_lib_path()' - -# lint requires dmlc to be in current folder -lint: - dmlc-core/scripts/lint.py xgboost $(LINT_LANG) src wrapper R-package python-package - -clean: - $(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~ - cd subtree/rabit; make clean; cd .. +-include build/*.d +-include build/*/*.d +-include build_plugin/*/*.d diff --git a/CHANGES.md b/NEWS.md similarity index 55% rename from CHANGES.md rename to NEWS.md index 1a10f04e7..e9c89da00 100644 --- a/CHANGES.md +++ b/NEWS.md @@ -1,42 +1,30 @@ -Change Log -========== +XGBoost Change Log +================== -xgboost-0.1 ------------ -* Initial release +This file records the chanegs in xgboost library in reverse chronological order. -xgboost-0.2x ------------- -* Python module -* Weighted samples instances -* Initial version of pairwise rank +## brick: next release candidate +* Major refactor of core library. + - Goal: more flexible and modular code as a portable library. + - Switch to use of c++11 standard code. + - Random number generator defaults to ```std::mt19937```. + - Share the data loading pipeline and logging module from dmlc-core. + - Enable registry pattern to allow optionally plugin of objective, metric, tree constructor, data loader. + - Future plugin modules can be put into xgboost/plugin and register back to the library. + - Remove most of the raw pointers to smart ptrs, for RAII safety. +* Change library name to libxgboost.so +* Backward compatiblity + - The binary buffer file is not backward compatible with previous version. + - The model file is backward compatible on 64 bit platforms. +* The model file is compatible between 64/32 bit platforms(not yet tested). +* External memory version and other advanced features will be exposed to R library as well on linux. + - Previously some of the features are blocked due to C++11 and threading limits. + - The windows version is still blocked due to Rtools do not support ```std::thread```. +* rabit and dmlc-core are maintained through git submodule + - Anyone can open PR to update these dependencies now. -xgboost-0.3 ------------ -* Faster tree construction module - - Allows subsample columns during tree construction via ```bst:col_samplebytree=ratio``` -* Support for boosting from initial predictions -* Experimental version of LambdaRank -* Linear booster is now parallelized, using parallel coordinated descent. -* Add [Code Guide](src/README.md) for customizing objective function and evaluation -* Add R module +## v0.47 (2016.01.14) -xgboost-0.4 ------------ -* Distributed version of xgboost that runs on YARN, scales to billions of examples -* Direct save/load data and model from/to S3 and HDFS -* Feature importance visualization in R module, by Michael Benesty -* Predict leaf index -* Poisson regression for counts data -* Early stopping option in training -* Native save load support in R and python - - xgboost models now can be saved using save/load in R - - xgboost python model is now pickable -* sklearn wrapper is supported in python module -* Experimental External memory version - -xgboost-0.47 ------------- * Changes in R library - fixed possible problem of poisson regression. - switched from 0 to NA for missing values. @@ -52,10 +40,44 @@ xgboost-0.47 - improved compatibility in sklearn module. - additional parameters added for sklearn wrapper. - added pip installation functionality. - - supports more Pandas DataFrame dtypes. + - supports more Pandas DataFrame dtypes. - added best_ntree_limit attribute, in addition to best_score and best_iteration. * Java api is ready for use * Added more test cases and continuous integration to make each build more robust. -on going at master ------------------- +## v0.4 (2015.05.11) + +* Distributed version of xgboost that runs on YARN, scales to billions of examples +* Direct save/load data and model from/to S3 and HDFS +* Feature importance visualization in R module, by Michael Benesty +* Predict leaf index +* Poisson regression for counts data +* Early stopping option in training +* Native save load support in R and python + - xgboost models now can be saved using save/load in R + - xgboost python model is now pickable +* sklearn wrapper is supported in python module +* Experimental External memory version + + +## v0.3 (2014.09.07) + +* Faster tree construction module + - Allows subsample columns during tree construction via ```bst:col_samplebytree=ratio``` +* Support for boosting from initial predictions +* Experimental version of LambdaRank +* Linear booster is now parallelized, using parallel coordinated descent. +* Add [Code Guide](src/README.md) for customizing objective function and evaluation +* Add R module + + +## v0.2x (2014.05.20) + +* Python module +* Weighted samples instances +* Initial version of pairwise rank + + +## v0.1 (2014.03.26) + +* Initial release \ No newline at end of file diff --git a/R-package/README.md b/R-package/README.md index c92bc9b96..e7d45426f 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -3,6 +3,12 @@ R package for xgboost [![CRAN Status Badge](http://www.r-pkg.org/badges/version/xgboost)](http://cran.r-project.org/web/packages/xgboost) [![CRAN Downloads](http://cranlogs.r-pkg.org/badges/xgboost)](http://cran.rstudio.com/web/packages/xgboost/index.html) +[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](http://xgboost.readthedocs.org/en/latest/R-package/index.html) + +Resources +--------- +* [XGBoost R Package Online Documentation](http://xgboost.readthedocs.org/en/latest/R-package/index.html) + - Check this out for detailed documents, examples and tutorials. Installation ------------ @@ -16,7 +22,7 @@ install.packages('xgboost') For up-to-date version, please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first. ```r -devtools::install_github('dmlc/xgboost',subdir='R-package') +devtools::install_git('git://github.com/dmlc/xgboost',subdir='R-package') ``` Examples @@ -24,21 +30,3 @@ Examples * Please visit [walk through example](demo). * See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.R) on this dataset and the one related to [Otto challenge](../demo/kaggle-otto), including a [RMarkdown documentation](../demo/kaggle-otto/understandingXGBoostModel.Rmd). - -Notes ------ - -If you face an issue installing the package using ```devtools::install_github```, something like this (even after updating libxml and RCurl as lot of forums say) - - -``` -devtools::install_github('dmlc/xgboost',subdir='R-package') -Downloading github repo dmlc/xgboost@master -Error in function (type, msg, asError = TRUE) : - Peer certificate cannot be authenticated with given CA certificates -``` -To get around this you can build the package locally as mentioned [here](https://github.com/dmlc/xgboost/issues/347) - -``` -1. Clone the current repository and set your workspace to xgboost/R-package/ -2. Run R CMD INSTALL --build . in terminal to get the tarball. -3. Run install.packages('path_to_the_tarball',repo=NULL) in R to install. -``` diff --git a/R-package/src/Makevars b/R-package/src/Makevars index d0eb23b25..14472acc8 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -1,8 +1,17 @@ # package root PKGROOT=../../ +ENABLE_STD_THREAD=1 # _*_ mode: Makefile; _*_ -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ -I$(PKGROOT) + +CXX_STD = CXX11 + +XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\ + -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\ + -DDMLC_LOG_CUSTOMIZE=1 -DXGBOOST_CUSTOMIZE_LOGGER=1\ + -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ + +PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include -I$(PKGROOT)/rabit/include -I$(PKGROOT) $(XGB_RFLAGS) PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS) -OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o $(PKGROOT)/subtree/rabit/src/engine_empty.o $(PKGROOT)/src/io/dmlc_simple.o - +OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o\ + $(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o $(PKGROOT)/rabit/src/engine_empty.o diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 56b550e7f..4134487fa 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -1,5 +1,6 @@ # package root PKGROOT=./ +ENABLE_STD_THREAD=0 # _*_ mode: Makefile; _*_ # This file is only used for windows compilation from github @@ -9,11 +10,22 @@ all: $(SHLIB) $(SHLIB): xgblib xgblib: cp -r ../../src . - cp -r ../../wrapper . - cp -r ../../subtree . + cp -r ../../rabit . + cp -r ../../dmlc-core . + cp -r ../../include . + cp -r ../../amalgamation . -PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ -I$(PKGROOT) -I../.. +CXX_STD = CXX11 + +XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\ + -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\ + -DDMLC_LOG_CUSTOMIZE=1 -DXGBOOST_CUSTOMIZE_LOGGER=1\ + -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ + +PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include -I$(PKGROOT)/rabit/include -I$(PKGROOT) $(XGB_RFLAGS) PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS) PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS) -OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o $(PKGROOT)/subtree/rabit/src/engine_empty.o $(PKGROOT)/src/io/dmlc_simple.o +OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o\ + $(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o $(PKGROOT)/rabit/src/engine_empty.o + $(OBJECTS) : xgblib diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc new file mode 100644 index 000000000..665fb5faa --- /dev/null +++ b/R-package/src/xgboost_R.cc @@ -0,0 +1,354 @@ +// Copyright (c) 2014 by Contributors +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "./xgboost_R.h" + +/*! + * \brief macro to annotate begin of api + */ +#define R_API_BEGIN() \ + GetRNGstate(); \ + try { +/*! + * \brief macro to annotate end of api + */ +#define R_API_END() \ + } catch(dmlc::Error& e) { \ + PutRNGstate(); \ + error(e.what()); \ + } \ + PutRNGstate(); + +/*! + * \brief macro to check the call. + */ +#define CHECK_CALL(x) \ + if ((x) != 0) { \ + error(XGBGetLastError()); \ + } + + +using namespace dmlc; + +SEXP XGCheckNullPtr_R(SEXP handle) { + return ScalarLogical(R_ExternalPtrAddr(handle) == NULL); +} + +void _DMatrixFinalizer(SEXP ext) { + R_API_BEGIN(); + if (R_ExternalPtrAddr(ext) == NULL) return; + CHECK_CALL(XGDMatrixFree(R_ExternalPtrAddr(ext))); + R_ClearExternalPtr(ext); + R_API_END(); +} + +SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { + SEXP ret; + R_API_BEGIN(); + DMatrixHandle handle; + CHECK_CALL(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle)); + ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + R_API_END(); + return ret; +} + +SEXP XGDMatrixCreateFromMat_R(SEXP mat, + SEXP missing) { + SEXP ret; + R_API_BEGIN(); + SEXP dim = getAttrib(mat, R_DimSymbol); + size_t nrow = static_cast(INTEGER(dim)[0]); + size_t ncol = static_cast(INTEGER(dim)[1]); + double *din = REAL(mat); + std::vector data(nrow * ncol); + #pragma omp parallel for schedule(static) + for (omp_ulong i = 0; i < nrow; ++i) { + for (size_t j = 0; j < ncol; ++j) { + data[i * ncol +j] = din[i + nrow * j]; + } + } + DMatrixHandle handle; + CHECK_CALL(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle)); + ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + R_API_END(); + return ret; +} + +SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, + SEXP indices, + SEXP data) { + SEXP ret; + R_API_BEGIN(); + const int *p_indptr = INTEGER(indptr); + const int *p_indices = INTEGER(indices); + const double *p_data = REAL(data); + int nindptr = length(indptr); + int ndata = length(data); + std::vector col_ptr_(nindptr); + std::vector indices_(ndata); + std::vector data_(ndata); + + for (int i = 0; i < nindptr; ++i) { + col_ptr_[i] = static_cast(p_indptr[i]); + } + #pragma omp parallel for schedule(static) + for (int i = 0; i < ndata; ++i) { + indices_[i] = static_cast(p_indices[i]); + data_[i] = static_cast(p_data[i]); + } + DMatrixHandle handle; + CHECK_CALL(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_), + BeginPtr(data_), nindptr, ndata, + &handle)); + ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + R_API_END(); + return ret; +} + +SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) { + SEXP ret; + R_API_BEGIN(); + int len = length(idxset); + std::vector idxvec(len); + for (int i = 0; i < len; ++i) { + idxvec[i] = INTEGER(idxset)[i] - 1; + } + DMatrixHandle res; + CHECK_CALL(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), + BeginPtr(idxvec), len, + &res)); + ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); + UNPROTECT(1); + R_API_END(); + return ret; +} + +void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { + R_API_BEGIN(); + CHECK_CALL(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), + CHAR(asChar(fname)), + asInteger(silent))); + R_API_END(); +} + +void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { + R_API_BEGIN(); + int len = length(array); + const char *name = CHAR(asChar(field)); + if (!strcmp("group", name)) { + std::vector vec(len); + #pragma omp parallel for schedule(static) + for (int i = 0; i < len; ++i) { + vec[i] = static_cast(INTEGER(array)[i]); + } + CHECK_CALL(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len)); + } else { + std::vector vec(len); + #pragma omp parallel for schedule(static) + for (int i = 0; i < len; ++i) { + vec[i] = REAL(array)[i]; + } + CHECK_CALL(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), + BeginPtr(vec), len)); + } + R_API_END(); +} + +SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { + SEXP ret; + R_API_BEGIN(); + bst_ulong olen; + const float *res; + CHECK_CALL(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), + &olen, + &res)); + ret = PROTECT(allocVector(REALSXP, olen)); + for (size_t i = 0; i < olen; ++i) { + REAL(ret)[i] = res[i]; + } + UNPROTECT(1); + R_API_END(); + return ret; +} + +SEXP XGDMatrixNumRow_R(SEXP handle) { + bst_ulong nrow; + R_API_BEGIN(); + CHECK_CALL(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow)); + R_API_END(); + return ScalarInteger(static_cast(nrow)); +} + +// functions related to booster +void _BoosterFinalizer(SEXP ext) { + if (R_ExternalPtrAddr(ext) == NULL) return; + CHECK_CALL(XGBoosterFree(R_ExternalPtrAddr(ext))); + R_ClearExternalPtr(ext); +} + +SEXP XGBoosterCreate_R(SEXP dmats) { + SEXP ret; + R_API_BEGIN(); + int len = length(dmats); + std::vector dvec; + for (int i = 0; i < len; ++i) { + dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); + } + BoosterHandle handle; + CHECK_CALL(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle)); + ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); + UNPROTECT(1); + R_API_END(); + return ret; +} + +void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { + R_API_BEGIN(); + CHECK_CALL(XGBoosterSetParam(R_ExternalPtrAddr(handle), + CHAR(asChar(name)), + CHAR(asChar(val)))); + R_API_END(); +} + +void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { + R_API_BEGIN(); + CHECK_CALL(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + R_ExternalPtrAddr(dtrain))); + R_API_END(); +} + +void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { + R_API_BEGIN(); + CHECK_EQ(length(grad), length(hess)) + << "gradient and hess must have same length"; + int len = length(grad); + std::vector tgrad(len), thess(len); + #pragma omp parallel for schedule(static) + for (int j = 0; j < len; ++j) { + tgrad[j] = REAL(grad)[j]; + thess[j] = REAL(hess)[j]; + } + CHECK_CALL(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dtrain), + BeginPtr(tgrad), BeginPtr(thess), + len)); + R_API_END(); +} + +SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { + const char *ret; + R_API_BEGIN(); + CHECK_EQ(length(dmats), length(evnames)) + << "dmats and evnams must have same length"; + int len = length(dmats); + std::vector vec_dmats; + std::vector vec_names; + std::vector vec_sptr; + for (int i = 0; i < len; ++i) { + vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); + vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i))))); + } + for (int i = 0; i < len; ++i) { + vec_sptr.push_back(vec_names[i].c_str()); + } + CHECK_CALL(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + BeginPtr(vec_dmats), + BeginPtr(vec_sptr), + len, &ret)); + R_API_END(); + return mkString(ret); +} + +SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) { + SEXP ret; + R_API_BEGIN(); + bst_ulong olen; + const float *res; + CHECK_CALL(XGBoosterPredict(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dmat), + asInteger(option_mask), + asInteger(ntree_limit), + &olen, &res)); + ret = PROTECT(allocVector(REALSXP, olen)); + for (size_t i = 0; i < olen; ++i) { + REAL(ret)[i] = res[i]; + } + UNPROTECT(1); + R_API_END(); + return ret; +} + +void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { + R_API_BEGIN(); + CHECK_CALL(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)))); + R_API_END(); +} + +void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { + R_API_BEGIN(); + CHECK_CALL(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)))); + R_API_END(); +} + +void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { + R_API_BEGIN(); + CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle), + RAW(raw), + length(raw))); + R_API_END(); +} + +SEXP XGBoosterModelToRaw_R(SEXP handle) { + SEXP ret; + R_API_BEGIN(); + bst_ulong olen; + const char *raw; + CHECK_CALL(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw)); + ret = PROTECT(allocVector(RAWSXP, olen)); + if (olen != 0) { + memcpy(RAW(ret), raw, olen); + } + UNPROTECT(1); + R_API_END(); + return ret; +} + +SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) { + SEXP out; + R_API_BEGIN(); + bst_ulong olen; + const char **res; + CHECK_CALL(XGBoosterDumpModel(R_ExternalPtrAddr(handle), + CHAR(asChar(fmap)), + asInteger(with_stats), + &olen, &res)); + out = PROTECT(allocVector(STRSXP, olen)); + for (size_t i = 0; i < olen; ++i) { + std::stringstream stream; + stream << "booster[" << i <<"]\n" << res[i]; + SET_STRING_ELT(out, i, mkChar(stream.str().c_str())); + } + UNPROTECT(1); + R_API_END(); + return out; +} + diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp deleted file mode 100644 index 1d426c496..000000000 --- a/R-package/src/xgboost_R.cpp +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright (c) 2014 by Contributors -#include -#include -#include -#include -#include -#include -#include "wrapper/xgboost_wrapper.h" -#include "src/utils/utils.h" -#include "src/utils/omp.h" -#include "xgboost_R.h" - -using namespace std; -using namespace xgboost; - -extern "C" { - void XGBoostAssert_R(int exp, const char *fmt, ...); - void XGBoostCheck_R(int exp, const char *fmt, ...); - int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...); -} - -// implements error handling -namespace xgboost { -namespace utils { -extern "C" { - void (*Printf)(const char *fmt, ...) = Rprintf; - int (*SPrintf)(char *buf, size_t size, const char *fmt, ...) = XGBoostSPrintf_R; - void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R; - void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R; - void (*Error)(const char *fmt, ...) = error; -} -bool CheckNAN(double v) { - return ISNAN(v); -} -double LogGamma(double v) { - return lgammafn(v); -} -} // namespace utils - -namespace random { -void Seed(unsigned seed) { - // warning("parameter seed is ignored, please set random seed using set.seed"); -} -double Uniform(void) { - return unif_rand(); -} -double Normal(void) { - return norm_rand(); -} -} // namespace random -} // namespace xgboost - -// call before wrapper starts -inline void _WrapperBegin(void) { - GetRNGstate(); -} -// call after wrapper starts -inline void _WrapperEnd(void) { - PutRNGstate(); -} - -// do nothing, check error -inline void CheckErr(int ret) { -} - -extern "C" { - SEXP XGCheckNullPtr_R(SEXP handle) { - return ScalarLogical(R_ExternalPtrAddr(handle) == NULL); - } - void _DMatrixFinalizer(SEXP ext) { - if (R_ExternalPtrAddr(ext) == NULL) return; - XGDMatrixFree(R_ExternalPtrAddr(ext)); - R_ClearExternalPtr(ext); - } - SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { - _WrapperBegin(); - DMatrixHandle handle; - CheckErr(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle)); - _WrapperEnd(); - SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); - R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); - UNPROTECT(1); - return ret; - } - SEXP XGDMatrixCreateFromMat_R(SEXP mat, - SEXP missing) { - _WrapperBegin(); - SEXP dim = getAttrib(mat, R_DimSymbol); - size_t nrow = static_cast(INTEGER(dim)[0]); - size_t ncol = static_cast(INTEGER(dim)[1]); - double *din = REAL(mat); - std::vector data(nrow * ncol); - #pragma omp parallel for schedule(static) - for (bst_omp_uint i = 0; i < nrow; ++i) { - for (size_t j = 0; j < ncol; ++j) { - data[i * ncol +j] = din[i + nrow * j]; - } - } - DMatrixHandle handle; - CheckErr(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle)); - _WrapperEnd(); - SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); - R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); - UNPROTECT(1); - return ret; - } - SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, - SEXP indices, - SEXP data) { - _WrapperBegin(); - const int *p_indptr = INTEGER(indptr); - const int *p_indices = INTEGER(indices); - const double *p_data = REAL(data); - int nindptr = length(indptr); - int ndata = length(data); - std::vector col_ptr_(nindptr); - std::vector indices_(ndata); - std::vector data_(ndata); - - for (int i = 0; i < nindptr; ++i) { - col_ptr_[i] = static_cast(p_indptr[i]); - } - #pragma omp parallel for schedule(static) - for (int i = 0; i < ndata; ++i) { - indices_[i] = static_cast(p_indices[i]); - data_[i] = static_cast(p_data[i]); - } - DMatrixHandle handle; - CheckErr(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_), - BeginPtr(data_), nindptr, ndata, - &handle)); - _WrapperEnd(); - SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); - R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); - UNPROTECT(1); - return ret; - } - SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) { - _WrapperBegin(); - int len = length(idxset); - std::vector idxvec(len); - for (int i = 0; i < len; ++i) { - idxvec[i] = INTEGER(idxset)[i] - 1; - } - DMatrixHandle res; - CheckErr(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), - BeginPtr(idxvec), len, - &res)); - _WrapperEnd(); - SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); - R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); - UNPROTECT(1); - return ret; - } - void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { - _WrapperBegin(); - CheckErr(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), - CHAR(asChar(fname)), asInteger(silent))); - _WrapperEnd(); - } - void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { - _WrapperBegin(); - int len = length(array); - const char *name = CHAR(asChar(field)); - if (!strcmp("group", name)) { - std::vector vec(len); - #pragma omp parallel for schedule(static) - for (int i = 0; i < len; ++i) { - vec[i] = static_cast(INTEGER(array)[i]); - } - CheckErr(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len)); - } else { - std::vector vec(len); - #pragma omp parallel for schedule(static) - for (int i = 0; i < len; ++i) { - vec[i] = REAL(array)[i]; - } - CheckErr(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), - CHAR(asChar(field)), - BeginPtr(vec), len)); - } - _WrapperEnd(); - } - SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { - _WrapperBegin(); - bst_ulong olen; - const float *res; - CheckErr(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), - CHAR(asChar(field)), - &olen, - &res)); - _WrapperEnd(); - SEXP ret = PROTECT(allocVector(REALSXP, olen)); - for (size_t i = 0; i < olen; ++i) { - REAL(ret)[i] = res[i]; - } - UNPROTECT(1); - return ret; - } - SEXP XGDMatrixNumRow_R(SEXP handle) { - bst_ulong nrow; - CheckErr(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow)); - return ScalarInteger(static_cast(nrow)); - } - // functions related to booster - void _BoosterFinalizer(SEXP ext) { - if (R_ExternalPtrAddr(ext) == NULL) return; - CheckErr(XGBoosterFree(R_ExternalPtrAddr(ext))); - R_ClearExternalPtr(ext); - } - SEXP XGBoosterCreate_R(SEXP dmats) { - _WrapperBegin(); - int len = length(dmats); - std::vector dvec; - for (int i = 0; i < len; ++i) { - dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); - } - BoosterHandle handle; - CheckErr(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle)); - _WrapperEnd(); - SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); - R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); - UNPROTECT(1); - return ret; - } - void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { - _WrapperBegin(); - CheckErr(XGBoosterSetParam(R_ExternalPtrAddr(handle), - CHAR(asChar(name)), - CHAR(asChar(val)))); - _WrapperEnd(); - } - void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { - _WrapperBegin(); - CheckErr(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), - asInteger(iter), - R_ExternalPtrAddr(dtrain))); - _WrapperEnd(); - } - void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { - _WrapperBegin(); - utils::Check(length(grad) == length(hess), "gradient and hess must have same length"); - int len = length(grad); - std::vector tgrad(len), thess(len); - #pragma omp parallel for schedule(static) - for (int j = 0; j < len; ++j) { - tgrad[j] = REAL(grad)[j]; - thess[j] = REAL(hess)[j]; - } - CheckErr(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), - R_ExternalPtrAddr(dtrain), - BeginPtr(tgrad), BeginPtr(thess), - len)); - _WrapperEnd(); - } - SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { - _WrapperBegin(); - utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length"); - int len = length(dmats); - std::vector vec_dmats; - std::vector vec_names; - std::vector vec_sptr; - for (int i = 0; i < len; ++i) { - vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); - vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i))))); - } - for (int i = 0; i < len; ++i) { - vec_sptr.push_back(vec_names[i].c_str()); - } - const char *ret; - CheckErr(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), - asInteger(iter), - BeginPtr(vec_dmats), - BeginPtr(vec_sptr), - len, &ret)); - _WrapperEnd(); - return mkString(ret); - } - SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) { - _WrapperBegin(); - bst_ulong olen; - const float *res; - CheckErr(XGBoosterPredict(R_ExternalPtrAddr(handle), - R_ExternalPtrAddr(dmat), - asInteger(option_mask), - asInteger(ntree_limit), - &olen, &res)); - _WrapperEnd(); - SEXP ret = PROTECT(allocVector(REALSXP, olen)); - for (size_t i = 0; i < olen; ++i) { - REAL(ret)[i] = res[i]; - } - UNPROTECT(1); - return ret; - } - void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { - _WrapperBegin(); - CheckErr(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)))); - _WrapperEnd(); - } - void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { - _WrapperBegin(); - CheckErr(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)))); - _WrapperEnd(); - } - void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { - _WrapperBegin(); - XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle), - RAW(raw), - length(raw)); - _WrapperEnd(); - } - SEXP XGBoosterModelToRaw_R(SEXP handle) { - bst_ulong olen; - _WrapperBegin(); - const char *raw; - CheckErr(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw)); - _WrapperEnd(); - SEXP ret = PROTECT(allocVector(RAWSXP, olen)); - if (olen != 0) { - memcpy(RAW(ret), raw, olen); - } - UNPROTECT(1); - return ret; - } - SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) { - _WrapperBegin(); - bst_ulong olen; - const char **res; - CheckErr(XGBoosterDumpModel(R_ExternalPtrAddr(handle), - CHAR(asChar(fmap)), - asInteger(with_stats), - &olen, &res)); - _WrapperEnd(); - SEXP out = PROTECT(allocVector(STRSXP, olen)); - for (size_t i = 0; i < olen; ++i) { - stringstream stream; - stream << "booster[" << i <<"]\n" << res[i]; - SET_STRING_ELT(out, i, mkChar(stream.str().c_str())); - } - UNPROTECT(1); - return out; - } -} diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index 768b2ced7..66dd1f1cf 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -4,155 +4,171 @@ * \author Tianqi Chen * \brief R wrapper of xgboost */ -#ifndef XGBOOST_WRAPPER_R_H_ // NOLINT(*) -#define XGBOOST_WRAPPER_R_H_ // NOLINT(*) +#ifndef XGBOOST_R_H_ // NOLINT(*) +#define XGBOOST_R_H_ // NOLINT(*) extern "C" { #include #include #include } +#include -extern "C" { - /*! - * \brief check whether a handle is NULL - * \param handle - * \return whether it is null ptr +/*! + * \brief check whether a handle is NULL + * \param handle + * \return whether it is null ptr + */ +XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle); + +/*! + * \brief load a data matrix + * \param fname name of the content + * \param silent whether print messages + * \return a loaded data matrix + */ +XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent); + +/*! + * \brief create matrix content from dense matrix + * This assumes the matrix is stored in column major format + * \param data R Matrix object + * \param missing which value to represent missing value + * \return created dmatrix + */ +XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, + SEXP missing); +/*! + * \brief create a matrix content from CSC format + * \param indptr pointer to column headers + * \param indices row indices + * \param data content of the data + * \return created dmatrix + */ +XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, + SEXP indices, + SEXP data); + +/*! + * \brief create a new dmatrix from sliced content of existing matrix + * \param handle instance of data matrix to be sliced + * \param idxset index set + * \return a sliced new matrix + */ +XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset); + +/*! + * \brief load a data matrix into binary file + * \param handle a instance of data matrix + * \param fname file name + * \param silent print statistics when saving + */ +XGB_DLL void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent); + +/*! + * \brief set information to dmatrix + * \param handle a instance of data matrix + * \param field field name, can be label, weight + * \param array pointer to float vector + */ +XGB_DLL void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array); + +/*! + * \brief get info vector from matrix + * \param handle a instance of data matrix + * \param field field name + * \return info vector + */ +XGB_DLL SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field); + +/*! + * \brief return number of rows + * \param handle a instance of data matrix + */ +XGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle); + +/*! + * \brief create xgboost learner + * \param dmats a list of dmatrix handles that will be cached + */ +XGB_DLL SEXP XGBoosterCreate_R(SEXP dmats); + +/*! + * \brief set parameters + * \param handle handle + * \param name parameter name + * \param val value of parameter + */ +XGB_DLL void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val); + +/*! + * \brief update the model in one round using dtrain + * \param handle handle + * \param iter current iteration rounds + * \param dtrain training data + */ +XGB_DLL void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain); + +/*! + * \brief update the model, by directly specify gradient and second order gradient, + * this can be used to replace UpdateOneIter, to support customized loss function + * \param handle handle + * \param dtrain training data + * \param grad gradient statistics + * \param hess second order gradient statistics + */ +XGB_DLL void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess); + +/*! + * \brief get evaluation statistics for xgboost + * \param handle handle + * \param iter current iteration rounds + * \param dmats list of handles to dmatrices + * \param evname name of evaluation + * \return the string containing evaluation stati + */ +XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames); + +/*! + * \brief make prediction based on dmat + * \param handle handle + * \param dmat data matrix + * \param option_mask output_margin:1 predict_leaf:2 + * \param ntree_limit limit number of trees used in prediction + */ +XGB_DLL SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit); +/*! + * \brief load model from existing file + * \param handle handle + * \param fname file name + */ +XGB_DLL void XGBoosterLoadModel_R(SEXP handle, SEXP fname); + +/*! + * \brief save model into existing file + * \param handle handle + * \param fname file name + */ +XGB_DLL void XGBoosterSaveModel_R(SEXP handle, SEXP fname); + +/*! + * \brief load model from raw array + * \param handle handle + */ +XGB_DLL void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw); + +/*! + * \brief save model into R's raw array + * \param handle handle + * \return raw array */ - SEXP XGCheckNullPtr_R(SEXP handle); - /*! - * \brief load a data matrix - * \param fname name of the content - * \param silent whether print messages - * \return a loaded data matrix - */ - SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent); - /*! - * \brief create matrix content from dense matrix - * This assumes the matrix is stored in column major format - * \param data R Matrix object - * \param missing which value to represent missing value - * \return created dmatrix - */ - SEXP XGDMatrixCreateFromMat_R(SEXP mat, - SEXP missing); - /*! - * \brief create a matrix content from CSC format - * \param indptr pointer to column headers - * \param indices row indices - * \param data content of the data - * \return created dmatrix - */ - SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, - SEXP indices, - SEXP data); - /*! - * \brief create a new dmatrix from sliced content of existing matrix - * \param handle instance of data matrix to be sliced - * \param idxset index set - * \return a sliced new matrix - */ - SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset); - /*! - * \brief load a data matrix into binary file - * \param handle a instance of data matrix - * \param fname file name - * \param silent print statistics when saving - */ - void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent); - /*! - * \brief set information to dmatrix - * \param handle a instance of data matrix - * \param field field name, can be label, weight - * \param array pointer to float vector - */ - void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array); - /*! - * \brief get info vector from matrix - * \param handle a instance of data matrix - * \param field field name - * \return info vector - */ - SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field); - /*! - * \brief return number of rows - * \param handle a instance of data matrix - */ - SEXP XGDMatrixNumRow_R(SEXP handle); - /*! - * \brief create xgboost learner - * \param dmats a list of dmatrix handles that will be cached - */ - SEXP XGBoosterCreate_R(SEXP dmats); - /*! - * \brief set parameters - * \param handle handle - * \param name parameter name - * \param val value of parameter - */ - void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val); - /*! - * \brief update the model in one round using dtrain - * \param handle handle - * \param iter current iteration rounds - * \param dtrain training data - */ - void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain); - /*! - * \brief update the model, by directly specify gradient and second order gradient, - * this can be used to replace UpdateOneIter, to support customized loss function - * \param handle handle - * \param dtrain training data - * \param grad gradient statistics - * \param hess second order gradient statistics - */ - void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess); - /*! - * \brief get evaluation statistics for xgboost - * \param handle handle - * \param iter current iteration rounds - * \param dmats list of handles to dmatrices - * \param evname name of evaluation - * \return the string containing evaluation stati - */ - SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames); - /*! - * \brief make prediction based on dmat - * \param handle handle - * \param dmat data matrix - * \param option_mask output_margin:1 predict_leaf:2 - * \param ntree_limit limit number of trees used in prediction - */ - SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit); - /*! - * \brief load model from existing file - * \param handle handle - * \param fname file name - */ - void XGBoosterLoadModel_R(SEXP handle, SEXP fname); - /*! - * \brief save model into existing file - * \param handle handle - * \param fname file name - */ - void XGBoosterSaveModel_R(SEXP handle, SEXP fname); - /*! - * \brief load model from raw array - * \param handle handle - */ - void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw); - /*! - * \brief save model into R's raw array - * \param handle handle - * \return raw array - */ - SEXP XGBoosterModelToRaw_R(SEXP handle); - /*! - * \brief dump model into a string - * \param handle handle - * \param fmap name to fmap can be empty string - * \param with_stats whether dump statistics of splits - */ - SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats); -} +XGB_DLL SEXP XGBoosterModelToRaw_R(SEXP handle); + +/*! + * \brief dump model into a string + * \param handle handle + * \param fmap name to fmap can be empty string + * \param with_stats whether dump statistics of splits + */ +XGB_DLL SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats); #endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*) diff --git a/R-package/src/xgboost_assert.c b/R-package/src/xgboost_assert.c index 072074243..4706a039e 100644 --- a/R-package/src/xgboost_assert.c +++ b/R-package/src/xgboost_assert.c @@ -24,11 +24,3 @@ void XGBoostCheck_R(int exp, const char *fmt, ...) { error("%s\n", buf); } } -int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) { - int ret; - va_list args; - va_start(args, fmt); - ret = vsnprintf(buf, size, fmt, args); - va_end(args); - return ret; -} diff --git a/R-package/src/xgboost_custom.cc b/R-package/src/xgboost_custom.cc new file mode 100644 index 000000000..9d0de76c4 --- /dev/null +++ b/R-package/src/xgboost_custom.cc @@ -0,0 +1,65 @@ +// Copyright (c) 2015 by Contributors +// This file contains the customization implementations of R module +// to change behavior of libxgboost + +#include +#include "src/common/random.h" +#include "./xgboost_R.h" + +// redirect the messages to R's console. +namespace dmlc { +void CustomLogMessage::Log(const std::string& msg) { + Rprintf("%s\n", msg.c_str()); +} +} // namespace dmlc + +// implements rabit error handling. +extern "C" { + void XGBoostAssert_R(int exp, const char *fmt, ...); + void XGBoostCheck_R(int exp, const char *fmt, ...); +} + +namespace rabit { +namespace utils { +extern "C" { + void (*Printf)(const char *fmt, ...) = Rprintf; + void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R; + void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R; + void (*Error)(const char *fmt, ...) = error; +} +} +} + +namespace xgboost { +ConsoleLogger::~ConsoleLogger() { + dmlc::CustomLogMessage::Log(log_stream_.str()); +} +TrackerLogger::~TrackerLogger() { + dmlc::CustomLogMessage::Log(log_stream_.str()); +} +} // namespace xgboost + +namespace xgboost { +namespace common { + +// redirect the nath functions. +bool CheckNAN(double v) { + return ISNAN(v); +} +double LogGamma(double v) { + return lgammafn(v); +} + +// customize random engine. +void CustomGlobalRandomEngine::seed(CustomGlobalRandomEngine::result_type val) { + // ignore the seed +} + +// use R's PRNG to replacd +CustomGlobalRandomEngine::result_type +CustomGlobalRandomEngine::operator()() { + return static_cast( + std::floor(unif_rand() * CustomGlobalRandomEngine::max())); +} +} // namespace common +} // namespace xgboost diff --git a/R-package/tests/testthat/test_poisson_regression.R b/R-package/tests/testthat/test_poisson_regression.R index c5389dd0f..5473d930f 100644 --- a/R-package/tests/testthat/test_poisson_regression.R +++ b/R-package/tests/testthat/test_poisson_regression.R @@ -10,5 +10,5 @@ test_that("poisson regression works", { expect_equal(class(bst), "xgb.Booster") pred <- predict(bst,as.matrix(mtcars[, -11])) expect_equal(length(pred), 32) - expect_equal(sqrt(mean( (pred - mtcars[,11]) ^ 2)), 1.16, tolerance = 0.01) + expect_less_than(sqrt(mean( (pred - mtcars[,11]) ^ 2)), 2.5) }) diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd index 08d6bfdf5..e981df0ed 100644 --- a/R-package/vignettes/discoverYourData.Rmd +++ b/R-package/vignettes/discoverYourData.Rmd @@ -1,6 +1,6 @@ --- title: "Understand your dataset with Xgboost" -output: +output: rmarkdown::html_vignette: css: vignette.css number_sections: yes @@ -12,8 +12,11 @@ vignette: > \usepackage[utf8]{inputenc} --- +Understand your dataset with XGBoost +==================================== + Introduction -============ +------------ The purpose of this Vignette is to show you how to use **Xgboost** to discover and understand your own dataset better. @@ -25,16 +28,16 @@ Pacakge loading: require(xgboost) require(Matrix) require(data.table) -if (!require('vcd')) install.packages('vcd') +if (!require('vcd')) install.packages('vcd') ``` > **VCD** package is used for one of its embedded dataset only. Preparation of the dataset -========================== +-------------------------- + +### Numeric VS categorical variables -Numeric VS categorical variables --------------------------------- **Xgboost** manages only `numeric` vectors. @@ -48,10 +51,9 @@ A *categorical* variable has a fixed number of different values. For instance, i To answer the question above we will convert *categorical* variables to `numeric` one. -Conversion from categorical to numeric variables ------------------------------------------------- +### Conversion from categorical to numeric variables -### Looking at the raw data +#### Looking at the raw data In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features. @@ -85,11 +87,11 @@ str(df) > * can take a limited number of values (like `factor`) ; > * these values are ordered (unlike `factor`). Here these ordered values are: `Marked > Some > None` -### Creation of new features based on old ones +#### Creation of new features based on old ones We will add some new *categorical* features to see if it helps. -#### Grouping per 10 years +##### Grouping per 10 years For the first feature we create groups of age by rounding the real age. @@ -101,7 +103,7 @@ Therefore, 20 is not closer to 30 than 60. To make it short, the distance betwee head(df[,AgeDiscret := as.factor(round(Age/10,0))]) ``` -#### Random split in two groups +##### Random split in two groups Following is an even stronger simplification of the real age with an arbitrary split at 30 years old. I choose this value **based on nothing**. We will see later if simplifying the information based on arbitrary values is a good strategy (you may already have an idea of how well it will work...). @@ -109,15 +111,15 @@ Following is an even stronger simplification of the real age with an arbitrary s head(df[,AgeCat:= as.factor(ifelse(Age > 30, "Old", "Young"))]) ``` -#### Risks in adding correlated features +##### Risks in adding correlated features -These new features are highly correlated to the `Age` feature because they are simple transformations of this feature. +These new features are highly correlated to the `Age` feature because they are simple transformations of this feature. For many machine learning algorithms, using correlated features is not a good idea. It may sometimes make prediction less accurate, and most of the time make interpretation of the model almost impossible. GLM, for instance, assumes that the features are uncorrelated. Fortunately, decision tree algorithms (including boosted trees) are very robust to these features. Therefore we have nothing to do to manage this situation. -#### Cleaning data +##### Cleaning data We remove ID as there is nothing to learn from this feature (it would just add some noise). @@ -132,7 +134,7 @@ levels(df[,Treatment]) ``` -### One-hot encoding +#### One-hot encoding Next step, we will transform the categorical data to dummy variables. This is the [one-hot encoding](http://en.wikipedia.org/wiki/One-hot) step. @@ -156,12 +158,12 @@ Create the output `numeric` vector (not as a sparse `Matrix`): output_vector = df[,Improved] == "Marked" ``` -1. set `Y` vector to `0`; -2. set `Y` to `1` for rows where `Improved == Marked` is `TRUE` ; +1. set `Y` vector to `0`; +2. set `Y` to `1` for rows where `Improved == Marked` is `TRUE` ; 3. return `Y` vector. Build the model -=============== +--------------- The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)). @@ -173,17 +175,17 @@ bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4, You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better. -A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future). +A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future). -> Here you can see the numbers decrease until line 7 and then increase. +> Here you can see the numbers decrease until line 7 and then increase. > > It probably means we are overfitting. To fix that I should reduce the number of rounds to `nround = 4`. I will let things like that because I don't really care for the purpose of this example :-) Feature importance -================== +------------------ + +## Measure feature importance -Measure feature importance --------------------------- ### Build the feature importance data.table @@ -204,7 +206,7 @@ head(importance) `Frequency` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it). -### Improvement in the interpretability of feature importance data.table +#### Improvement in the interpretability of feature importance data.table We can go deeper in the analysis of the model. In the `data.table` above, we have discovered which features counts to predict if the illness will go or not. But we don't yet know the role of these features. For instance, one of the question we may want to answer would be: does receiving a placebo treatment helps to recover from the illness? @@ -233,8 +235,8 @@ Therefore, according to our findings, getting a placebo doesn't seem to help but > You may wonder how to interpret the `< 1.00001` on the first line. Basically, in a sparse `Matrix`, there is no `0`, therefore, looking for one hot-encoded categorical observations validating the rule `< 1.00001` is like just looking for `1` for this feature. -Plotting the feature importance -------------------------------- +### Plotting the feature importance + All these things are nice, but it would be even better to plot the results. @@ -250,11 +252,11 @@ According to the plot above, the most important features in this dataset to pred * the Age ; * having received a placebo or not ; -* the sex is third but already included in the not interesting features group ; +* the sex is third but already included in the not interesting features group ; * then we see our generated features (AgeDiscret). We can see that their contribution is very low. -Do these results make sense? ------------------------------- +### Do these results make sense? + Let's check some **Chi2** between each of these features and the label. @@ -279,18 +281,18 @@ c2 <- chisq.test(df$AgeCat, output_vector) print(c2) ``` -The perfectly random split I did between young and old at 30 years old have a low correlation of **`r round(c2$statistic, 2)`**. It's a result we may expect as may be in my mind > 30 years is being old (I am 32 and starting feeling old, this may explain that), but for the illness we are studying, the age to be vulnerable is not the same. +The perfectly random split I did between young and old at 30 years old have a low correlation of **`r round(c2$statistic, 2)`**. It's a result we may expect as may be in my mind > 30 years is being old (I am 32 and starting feeling old, this may explain that), but for the illness we are studying, the age to be vulnerable is not the same. -Morality: don't let your *gut* lower the quality of your model. +Morality: don't let your *gut* lower the quality of your model. In *data science* expression, there is the word *science* :-) Conclusion -========== +---------- -As you can see, in general *destroying information by simplifying it won't improve your model*. **Chi2** just demonstrates that. +As you can see, in general *destroying information by simplifying it won't improve your model*. **Chi2** just demonstrates that. -But in more complex cases, creating a new feature based on existing one which makes link with the outcome more obvious may help the algorithm and improve the model. +But in more complex cases, creating a new feature based on existing one which makes link with the outcome more obvious may help the algorithm and improve the model. The case studied here is not enough complex to show that. Check [Kaggle website](http://www.kaggle.com/) for some challenging datasets. However it's almost always worse when you add some arbitrary rules. @@ -299,7 +301,7 @@ Moreover, you can notice that even if we have added some not useful new features Linear model may not be that smart in this scenario. Special Note: What about Random Forests™? -========================================== +----------------------------------------- As you may know, [Random Forests™](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family. @@ -313,7 +315,7 @@ However, in Random Forests™ this random choice will be done for each tree, bec In boosting, when a specific link between feature and outcome have been learned by the algorithm, it will try to not refocus on it (in theory it is what happens, reality is not always that simple). Therefore, all the importance will be on feature `A` or on feature `B` (but not both). You will know that one feature have an important role in the link between the observations and the label. It is still up to you to search for the correlated features to the one detected as important if you need to know all of them. -If you want to try Random Forests™ algorithm, you can tweak Xgboost parameters! +If you want to try Random Forests™ algorithm, you can tweak Xgboost parameters! **Warning**: this is still an experimental parameter. diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd index 1e6060eb1..aa33073ad 100644 --- a/R-package/vignettes/xgboostPresentation.Rmd +++ b/R-package/vignettes/xgboostPresentation.Rmd @@ -13,8 +13,11 @@ vignette: > \usepackage[utf8]{inputenc} --- -Introduction -============ +XGBoost R Tutorial +================== + +## Introduction + **Xgboost** is short for e**X**treme **G**radient **Boost**ing package. @@ -40,16 +43,16 @@ It has several features: * Sparsity: it accepts *sparse* input for both *tree booster* and *linear booster*, and is optimized for *sparse* input ; * Customization: it supports customized objective functions and evaluation functions. -Installation -============ +## Installation + + +### Github version -Github version --------------- For up-to-date version (highly recommended), install from *Github*: ```{r installGithub, eval=FALSE} -devtools::install_github('dmlc/xgboost', subdir='R-package') +devtools::install_git('git://github.com/dmlc/xgboost', subdir='R-package') ``` > *Windows* user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first. @@ -61,8 +64,8 @@ As of 2015-03-13, ‘xgboost’ was removed from the CRAN repository. Formerly available versions can be obtained from the CRAN [archive](http://cran.r-project.org/src/contrib/Archive/xgboost) -Learning -======== +## Learning + For the purpose of this tutorial we will load **XGBoost** package. @@ -70,15 +73,15 @@ For the purpose of this tutorial we will load **XGBoost** package. require(xgboost) ``` -Dataset presentation --------------------- +### Dataset presentation + In this example, we are aiming to predict whether a mushroom can be eaten or not (like in many tutorials, example data are the the same as you will use on in your every day life :-). Mushroom data is cited from UCI Machine Learning Repository. @Bache+Lichman:2013. -Dataset loading ---------------- +### Dataset loading + We will load the `agaricus` datasets embedded with the package and will link them to variables. @@ -124,12 +127,12 @@ class(train$data)[1] class(train$label) ``` -Basic Training using XGBoost ----------------------------- +### Basic Training using XGBoost + This step is the most critical part of the process for the quality of our model. -### Basic training +#### Basic training We are using the `train` data. As explained above, both `data` and `label` are stored in a `list`. @@ -148,9 +151,9 @@ bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta > More complex the relationship between your features and your `label` is, more passes you need. -### Parameter variations +#### Parameter variations -#### Dense matrix +##### Dense matrix Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix. @@ -158,7 +161,7 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") ``` -#### xgb.DMatrix +##### xgb.DMatrix **XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later. @@ -167,7 +170,7 @@ dtrain <- xgb.DMatrix(data = train$data, label = train$label) bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") ``` -#### Verbose option +##### Verbose option **XGBoost** has several features to help you to view how the learning progress internally. The purpose is to help you to set the best parameters, which is the key of your model quality. @@ -188,11 +191,11 @@ bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, o bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 2) ``` -Basic prediction using XGBoost -============================== +## Basic prediction using XGBoost + + +## Perform the prediction -Perform the prediction ----------------------- The purpose of the model we have built is to classify new data. As explained before, we will use the `test` dataset for this step. @@ -208,8 +211,8 @@ print(head(pred)) These numbers doesn't look like *binary classification* `{0,1}`. We need to perform a simple transformation before being able to use these results. -Transform the regression in a binary classification ---------------------------------------------------- +## Transform the regression in a binary classification + The only thing that **XGBoost** does is a *regression*. **XGBoost** is using `label` vector to build its *regression* model. @@ -222,8 +225,8 @@ prediction <- as.numeric(pred > 0.5) print(head(prediction)) ``` -Measuring model performance ---------------------------- +## Measuring model performance + To measure the model performance, we will compute a simple metric, the *average error*. @@ -246,14 +249,14 @@ The most important thing to remember is that **to do a classification, you just This metric is **`r round(err, 2)`** and is pretty low: our yummly mushroom model works well! -Advanced features -================= +## Advanced features + Most of the features below have been implemented to help you to improve your model by offering a better understanding of its content. -Dataset preparation -------------------- +### Dataset preparation + For the following advanced features, we need to put data in `xgb.DMatrix` as explained above. @@ -262,8 +265,8 @@ dtrain <- xgb.DMatrix(data = train$data, label=train$label) dtest <- xgb.DMatrix(data = test$data, label=test$label) ``` -Measure learning progress with xgb.train ----------------------------------------- +### Measure learning progress with xgb.train + Both `xgboost` (simple) and `xgb.train` (advanced) functions train models. @@ -295,8 +298,8 @@ bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchli > `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`. -Linear boosting ---------------- +### Linear boosting + Until now, all the learnings we have performed were based on boosting trees. **XGBoost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter). @@ -308,10 +311,10 @@ In this specific case, *linear boosting* gets sligtly better performance metrics In simple cases, it will happen because there is nothing better than a linear algorithm to catch a linear link. However, decision trees are much better to catch a non linear link between predictors and outcome. Because there is no silver bullet, we advise you to check both algorithms with your own datasets to have an idea of what to use. -Manipulating xgb.DMatrix ------------------------- +### Manipulating xgb.DMatrix -### Save / Load + +#### Save / Load Like saving models, `xgb.DMatrix` object (which groups both dataset and outcome) can also be saved using `xgb.DMatrix.save` function. @@ -326,7 +329,7 @@ bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nround=2, watchl file.remove("dtrain.buffer") ``` -### Information extraction +#### Information extraction Information can be extracted from `xgb.DMatrix` using `getinfo` function. Hereafter we will extract `label` data. @@ -337,8 +340,8 @@ err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label) print(paste("test-error=", err)) ``` -View feature importance/influence from the learnt model -------------------------------------------------------- +### View feature importance/influence from the learnt model + Feature importance is similar to R gbm package's relative influence (rel.inf). @@ -348,8 +351,8 @@ print(importance_matrix) xgb.plot.importance(importance_matrix = importance_matrix) ``` -View the trees from a model ---------------------------- +#### View the trees from a model + You can dump the tree you learned using `xgb.dump` into a text file. @@ -365,8 +368,8 @@ xgb.plot.tree(model = bst) > if you provide a path to `fname` parameter you can save the trees to your hard drive. -Save and load models --------------------- +#### Save and load models + Maybe your dataset is big, and it takes time to train a model on it? May be you are not a big fan of losing time in redoing the same task again and again? In these very rare cases, you will want to save your model and load it when required. @@ -416,5 +419,4 @@ print(paste("sum(abs(pred3-pred))=", sum(abs(pred2-pred)))) > Again `0`? It seems that `XGBoost` works pretty well! -References -========== +## References diff --git a/README.md b/README.md index f33394d40..092e7abe3 100644 --- a/README.md +++ b/README.md @@ -7,47 +7,22 @@ [![PyPI version](https://badge.fury.io/py/xgboost.svg)](https://pypi.python.org/pypi/xgboost/) [![Gitter chat for developers at https://gitter.im/dmlc/xgboost](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/dmlc/xgboost?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version. - -It implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework, including [Generalized Linear Model](https://en.wikipedia.org/wiki/Generalized_linear_model) (GLM) and [Gradient Boosted Decision Trees](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) (GBDT). XGBoost can also be [distributed](#features) and scale to Terascale data - -XGBoost is part of [Distributed Machine Learning Common](http://dmlc.github.io/) projects +XGBoost is an optimized distributed gradient boosting library designed to be highly *efficient*, *flexible* and *portable*. +It implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework. +XGBoost provides a parallel tree boosting(also known as GBDT, GBM) that solve many data science problems in a fast and accurate way. +The same code runs on major distributed environment(Hadoop, SGE, MPI) and can solve problems beyond billions of examples. +XGBoost is part of [DMLC](http://dmlc.github.io/) projects. Contents -------- -* [What's New](#whats-new) -* [Version](#version) -* [Documentation](doc/index.md) -* [Build Instruction](doc/build.md) -* [Features](#features) -* [Distributed XGBoost](multi-node) -* [Usecases](doc/index.md#highlight-links) -* [Bug Reporting](#bug-reporting) -* [Contributing to XGBoost](#contributing-to-xgboost) -* [Committers and Contributors](CONTRIBUTORS.md) -* [License](#license) -* [XGBoost in Graphlab Create](#xgboost-in-graphlab-create) +* [Documentation and Tutorials](https://xgboost.readthedocs.org) +* [Code Examples](demo) +* [Installation](doc/build.md) +* [Contribute to XGBoost](http://xgboost.readthedocs.org/en/latest/dev-guide/contribute.html) What's New ---------- - -* XGBoost helps Vlad Mironov, Alexander Guschin to win the [CERN LHCb experiment Flavour of Physics competition](https://www.kaggle.com/c/flavours-of-physics). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/11/30/flavour-of-physics-technical-write-up-1st-place-go-polar-bears/). -* XGBoost helps Mario Filho, Josef Feigl, Lucas, Gilberto to win the [Caterpillar Tube Pricing competition](https://www.kaggle.com/c/caterpillar-tube-pricing). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/09/22/caterpillar-winners-interview-1st-place-gilberto-josef-leustagos-mario/). -* XGBoost helps Halla Yang to win the [Recruit Coupon Purchase Prediction Challenge](https://www.kaggle.com/c/coupon-purchase-prediction). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/10/21/recruit-coupon-purchase-winners-interview-2nd-place-halla-yang/). -* XGBoost helps Owen Zhang to win the [Avito Context Ad Click competition](https://www.kaggle.com/c/avito-context-ad-clicks). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/08/26/avito-winners-interview-1st-place-owen-zhang/). -* XGBoost helps Chenglong Chen to win [Kaggle CrowdFlower Competition](https://www.kaggle.com/c/crowdflower-search-relevance) - Check out the [winning solution](https://github.com/ChenglongChen/Kaggle_CrowdFlower) -* XGBoost-0.4 release, see [CHANGES.md](CHANGES.md#xgboost-04) -* XGBoost helps three champion teams to win [WWW2015 Microsoft Malware Classification Challenge (BIG 2015)](http://www.kaggle.com/c/malware-classification/forums/t/13490/say-no-to-overfitting-approaches-sharing) - Check out the [winning solution](doc/README.md#highlight-links) -* [External Memory Version](doc/external_memory.md) - -Version -------- - -* Current version xgboost-0.4 - - [Change log](CHANGES.md) - - This version is compatible with 0.3x versions +* [XGBoost brick](NEWS.md) Release Features -------- @@ -61,24 +36,17 @@ Features Bug Reporting ------------- - * For reporting bugs please use the [xgboost/issues](https://github.com/dmlc/xgboost/issues) page. * For generic questions or to share your experience using xgboost please use the [XGBoost User Group](https://groups.google.com/forum/#!forum/xgboost-user/) - Contributing to XGBoost ----------------------- - XGBoost has been developed and used by a group of active community members. Everyone is more than welcome to contribute. It is a way to make the project better and more accessible to more users. * Check out [Feature Wish List](https://github.com/dmlc/xgboost/labels/Wish-List) to see what can be improved, or open an issue if you want something. * Contribute to the [documents and examples](https://github.com/dmlc/xgboost/blob/master/doc/) to share your experience with other users. -* Please add your name to [CONTRIBUTORS.md](CONTRIBUTORS.md) after your patch has been merged. +* Please add your name to [CONTRIBUTORS.md](CONTRIBUTORS.md) and after your patch has been merged. + - Please also update [NEWS.md](NEWS.md) on changes and improvements in API and docs. License ------- © Contributors, 2015. Licensed under an [Apache-2](https://github.com/dmlc/xgboost/blob/master/LICENSE) license. - -XGBoost in Graphlab Create --------------------------- -* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to do data manipulation, graph processing, hyper-parameter search, and visualization of TeraBytes scale data in one framework. Try the [Graphlab Create](http://graphlab.com/products/create/quick-start-guide.html) -* Nice [blogpost](http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand) by Jay Gu about using GLC boosted tree to solve kaggle bike sharing challenge: diff --git a/amalgamation/dmlc-minimum0.cc b/amalgamation/dmlc-minimum0.cc new file mode 100644 index 000000000..bce61129e --- /dev/null +++ b/amalgamation/dmlc-minimum0.cc @@ -0,0 +1,14 @@ +/*! + * Copyright 2015 by Contributors. + * \brief Mininum DMLC library Amalgamation, used for easy plugin of dmlc lib. + * Normally this is not needed. + */ +#include "../dmlc-core/src/io/line_split.cc" +#include "../dmlc-core/src/io/recordio_split.cc" +#include "../dmlc-core/src/io/input_split_base.cc" +#include "../dmlc-core/src/io/local_filesys.cc" +#include "../dmlc-core/src/data.cc" +#include "../dmlc-core/src/io.cc" +#include "../dmlc-core/src/recordio.cc" + + diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc new file mode 100644 index 000000000..7cc36c16b --- /dev/null +++ b/amalgamation/xgboost-all0.cc @@ -0,0 +1,57 @@ +/*! + * Copyright 2015 by Contributors. + * \brief XGBoost Amalgamation. + * This offers an alternative way to compile the entire library from this single file. + * + * Example usage command. + * - $(CXX) -std=c++0x -fopenmp -o -shared libxgboost.so xgboost-all0.cc -ldmlc -lrabit + * + * \author Tianqi Chen. + */ + +// metrics +#include "../src/metric/metric.cc" +#include "../src/metric/elementwise_metric.cc" +#include "../src/metric/multiclass_metric.cc" +#include "../src/metric/rank_metric.cc" + +// objectives +#include "../src/objective/objective.cc" +#include "../src/objective/regression_obj.cc" +#include "../src/objective/multiclass_obj.cc" +#include "../src/objective/rank_obj.cc" + +// gbms +#include "../src/gbm/gbm.cc" +#include "../src/gbm/gbtree.cc" +#include "../src/gbm/gblinear.cc" + +// data +#include "../src/data/data.cc" +#include "../src/data/simple_csr_source.cc" +#include "../src/data/simple_dmatrix.cc" +#include "../src/data/sparse_page_raw_format.cc" + +#if DMLC_ENABLE_STD_THREAD +#include "../src/data/sparse_page_source.cc" +#include "../src/data/sparse_page_dmatrix.cc" +#endif + +// tress +#include "../src/tree/tree_model.cc" +#include "../src/tree/tree_updater.cc" +#include "../src/tree/updater_colmaker.cc" +#include "../src/tree/updater_prune.cc" +#include "../src/tree/updater_refresh.cc" +#include "../src/tree/updater_sync.cc" +#include "../src/tree/updater_histmaker.cc" +#include "../src/tree/updater_skmaker.cc" + +// global +#include "../src/learner.cc" +#include "../src/logging.cc" +#include "../src/common/common.cc" + +// c_api +#include "../src/c_api/c_api.cc" +#include "../src/c_api/c_api_error.cc" diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index c1367d52e..000000000 --- a/appveyor.yml +++ /dev/null @@ -1,36 +0,0 @@ -environment: - global: - CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\python-appveyor-demo\\appveyor\\run_with_env.cmd" - DISABLE_OPENMP: 1 - VisualStudioVersion: 12.0 - - matrix: - - PYTHON: "C:\\Python27-x64" - PYTHON_VERSION: "2.7.x" # currently 2.7.9 - PYTHON_ARCH: "64" - - - PYTHON: "C:\\Python33-x64" - PYTHON_VERSION: "3.3.x" # currently 3.3.5 - PYTHON_ARCH: "64" - -platform: - - x64 - -configuration: - - Release - -install: - - cmd: git clone https://github.com/ogrisel/python-appveyor-demo - - ECHO "Filesystem root:" - - ps: "ls \"C:/\"" - - - ECHO "Installed SDKs:" - - ps: "ls \"C:/Program Files/Microsoft SDKs/Windows\"" - - - ps: python-appveyor-demo\appveyor\install.ps1 - - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - - "python --version" - - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" - -build: off - #project: windows\xgboost.sln \ No newline at end of file diff --git a/build.sh b/build.sh index 2a285597a..8480cd4f9 100755 --- a/build.sh +++ b/build.sh @@ -6,27 +6,14 @@ # See additional instruction in doc/build.md -#for building static OpenMP lib in MAC for easier installation in MAC -#doesn't work with XCode clang/LLVM since Apple doesn't support, -#needs brew install gcc 4.9+ with OpenMP. By default the static link is OFF -static_omp=0 -if ((${static_omp}==1)); then - rm libgomp.a - ln -s `g++ -print-file-name=libgomp.a` - make clean - make omp_mac_static=1 - echo "Successfully build multi-thread static link xgboost" - exit 0 -fi - if make; then echo "Successfully build multi-thread xgboost" else echo "-----------------------------" echo "Building multi-thread xgboost failed" echo "Start to build single-thread xgboost" - make clean - make no_omp=1 + make clean_all + make config=config/mininum.mk echo "Successfully build single-thread xgboost" echo "If you want multi-threaded version" echo "See additional instructions in doc/build.md" diff --git a/demo/README.md b/demo/README.md index 5a7a25f76..229ffc6ff 100644 --- a/demo/README.md +++ b/demo/README.md @@ -44,8 +44,15 @@ However, the parameter settings can be applied to all versions * [Multiclass classification](multiclass_classification) * [Regression](regression) * [Learning to Rank](rank) +* [Distributed Training](distributed-training) Benchmarks ---------- * [Starter script for Kaggle Higgs Boson](kaggle-higgs) * [Kaggle Tradeshift winning solution by daxiongshu](https://github.com/daxiongshu/kaggle-tradeshift-winning-solution) + +Machine Learning Challenge Winning Solutions +-------------------------------------------- +* XGBoost helps Vlad Mironov, Alexander Guschin to win the [CERN LHCb experiment Flavour of Physics competition](https://www.kaggle.com/c/flavours-of-physics). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/11/30/flavour-of-physics-technical-write-up-1st-place-go-polar-bears/). +* XGBoost helps Mario Filho, Josef Feigl, Lucas, Gilberto to win the [Caterpillar Tube Pricing competition](https://www.kaggle.com/c/caterpillar-tube-pricing). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/09/22/caterpillar-winners-interview-1st-place-gilberto-josef-leustagos-mario/). +* XGBoost helps Halla Yang to win the [Recruit Coupon Purchase Prediction Challenge](https://www.kaggle.com/c/coupon-purchase-prediction). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/10/21/recruit-coupon-purchase-winners-interview-2nd-place-halla-yang/). diff --git a/demo/distributed-training/README.md b/demo/distributed-training/README.md new file mode 100644 index 000000000..3926612cc --- /dev/null +++ b/demo/distributed-training/README.md @@ -0,0 +1,52 @@ +Distributed XGBoost Training +============================ +This is an tutorial of Distributed XGBoost Training. +Currently xgboost supports distributed training via CLI program with the configuration file. +There is also plan push distributed python and other language bindings, please open an issue +if you are interested in contributing. + +Build XGBoost with Distributed Filesystem Support +------------------------------------------------- +To use distributed xgboost, you only need to turn the options on to build +with distributed filesystems(HDFS or S3) in ```xgboost/make/config.mk```. + +How to Use +---------- +* Input data format: LIBSVM format. The example here uses generated data in ../data folder. +* Put the data into some distribute filesytem (S3 or HDFS) +* Use tracker script in dmlc-core/tracker to submit the jobs +* Like all other DMLC tools, xgboost support taking a path to a folder as input argument + - All the files in the folder will be used as input +* Quick start in Hadoop YARN: run ```bash run_yarn.sh ``` + +Example +------- +* [run_yarn.sh](run_yarn.sh) shows how to submit job to Hadoop via YARN. + +Single machine vs Distributed Version +------------------------------------- +If you have used xgboost (single machine version) before, this section will show you how to run xgboost on hadoop with a slight modification on conf file. +* IO: instead of reading and writing file locally, we now use HDFS, put ```hdfs://``` prefix to the address of file you like to access +* File cache: ```dmlc_yarn.py``` also provide several ways to cache necesary files, including binary file (xgboost), conf file + - ```dmlc_yarn.py``` will automatically cache files in the command line. For example, ```dmlc_yarn.py -n 3 $localPath/xgboost.dmlc mushroom.hadoop.conf``` will cache "xgboost.dmlc" and "mushroom.hadoop.conf". + - You could also use "-f" to manually cache one or more files, like ```-f file1 -f file2``` + - The local path of cached files in command is "./". +* More details of submission can be referred to the usage of ```dmlc_yarn.py```. +* The model saved by hadoop version is compatible with single machine version. + +Notes +----- +* The code is optimized with multi-threading, so you will want to run xgboost with more vcores for best performance. + - You will want to set to be number of cores you have on each machine. + + +External Memory Version +----------------------- +XGBoost supports external memory, this will make each process cache data into local disk during computation, without taking up all the memory for storing the data. +See [external memory](https://github.com/dmlc/xgboost/tree/master/doc/external_memory.md) for syntax using external memory. + +You only need to add cacheprefix to the input file to enable external memory mode. For example set training data as +``` +data=hdfs:///path-to-my-data/#dtrain.cache +``` +This will make xgboost more memory efficient, allows you to run xgboost on larger-scale dataset. diff --git a/demo/distributed-training/run_yarn.sh b/demo/distributed-training/run_yarn.sh new file mode 100755 index 000000000..3d7c6bf05 --- /dev/null +++ b/demo/distributed-training/run_yarn.sh @@ -0,0 +1,33 @@ +#!/bin/bash +if [ "$#" -lt 3 ]; +then + echo "Usage: " + exit -1 +fi + +# put the local training file to HDFS +hadoop fs -mkdir $3/data +hadoop fs -put ../data/agaricus.txt.train $3/data +hadoop fs -put ../data/agaricus.txt.test $3/data + +# running rabit, pass address in hdfs +../../dmlc-core/tracker/dmlc_yarn.py -n $1 --vcores $2 ../../xgboost mushroom.hadoop.conf nthread=$2\ + data=hdfs://$3/data/agaricus.txt.train\ + eval[test]=hdfs://$3/data/agaricus.txt.test\ + model_out=hdfs://$3/mushroom.final.model + +# get the final model file +hadoop fs -get $3/mushroom.final.model final.model + +# use dmlc-core/yarn/run_hdfs_prog.py to setup approperiate env + +# output prediction task=pred +#../../xgboost.dmlc mushroom.hadoop.conf task=pred model_in=final.model test:data=../data/agaricus.txt.test +../../dmlc-core/yarn/run_hdfs_prog.py ../../xgboost mushroom.hadoop.conf task=pred model_in=final.model test:data=../data/agaricus.txt.test +# print the boosters of final.model in dump.raw.txt +#../../xgboost.dmlc mushroom.hadoop.conf task=dump model_in=final.model name_dump=dump.raw.txt +../../dmlc-core/yarn/run_hdfs_prog.py ../../xgboost mushroom.hadoop.conf task=dump model_in=final.model name_dump=dump.raw.txt +# use the feature map in printing for better visualization +#../../xgboost.dmlc mushroom.hadoop.conf task=dump model_in=final.model fmap=../data/featmap.txt name_dump=dump.nice.txt +../../dmlc-core/yarn/run_hdfs_prog.py ../../xgboost mushroom.hadoop.conf task=dump model_in=final.model fmap=../data/featmap.txt name_dump=dump.nice.txt +cat dump.nice.txt diff --git a/demo/guide-python/runall.sh b/demo/guide-python/runall.sh index 5c8ddf93c..21fa59de2 100755 --- a/demo/guide-python/runall.sh +++ b/demo/guide-python/runall.sh @@ -1,4 +1,5 @@ #!/bin/bash +export PYTHONPATH=PYTHONPATH:../../python-package python basic_walkthrough.py python custom_objective.py python boost_from_prediction.py @@ -9,4 +10,4 @@ python predict_leaf_indices.py python sklearn_examples.py python sklearn_parallel.py python external_memory.py -rm -rf *~ *.model *.buffer +rm -rf *~ *.model *.buffer diff --git a/dmlc-core b/dmlc-core new file mode 160000 index 000000000..ad2ddde8b --- /dev/null +++ b/dmlc-core @@ -0,0 +1 @@ +Subproject commit ad2ddde8b6624abf3007a71b2923c3925530cc81 diff --git a/doc/.gitignore b/doc/.gitignore index 382c3419f..61e15164c 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -5,3 +5,4 @@ _* doxygen parser.py *.pyc +web-data diff --git a/doc/Doxyfile b/doc/Doxyfile new file mode 100644 index 000000000..7ec79dace --- /dev/null +++ b/doc/Doxyfile @@ -0,0 +1,2353 @@ +# Doxyfile 1.8.8 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "xgboost" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify an logo or icon that is included in +# the documentation. The maximum height of the logo should not exceed 55 pixels +# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo +# to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = doc/doxygen + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +#ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, +# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), +# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, +# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), +# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, +# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, +# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, +# Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a +# new page for each member. If set to NO, the documentation of a member will be +# part of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: +# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: +# Fortran. In the later case the parser tries to guess whether the code is fixed +# or free formatted code, this is the default for Fortran type files), VHDL. For +# instance to make doxygen treat .inc files as Fortran files (default is PHP), +# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# +# Note For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +#MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by by putting a % sign in front of the word +# or globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +#AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +#EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO these classes will be included in the various overviews. This option has +# no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +#SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the +# todo list. This list is created by putting \todo commands in the +# documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the +# test list. This list is created by putting \test commands in the +# documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES the list +# will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO doxygen will only warn about wrong or incomplete parameter +# documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = YES + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. +# Note: If this tag is empty the current directory is searched. + +INPUT = include src/common + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank the +# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, +# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, +# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, +# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, +# *.qsf, *.as and *.js. + +FILE_PATTERNS = *.h + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = */test/* \ + logging.h + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER ) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +#USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES, then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = YES + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +#SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +# If the CLANG_ASSISTED_PARSING tag is set to YES, then doxygen will use the +# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the +# cost of reduced performance. This can be particularly helpful with template +# rich C++ code for which doxygen's built-in parser lacks the necessary type +# information. +# Note: The availability of this option depends on whether or not doxygen was +# compiled with the --with-libclang option. +# The default value is: NO. + +#CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +#CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefor more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra stylesheet files is of importance (e.g. the last +# stylesheet in the list overrules the setting of the previous ones in the +# list). For an example see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +#HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the stylesheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +#HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler ( hhc.exe). If non-empty +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated ( +# YES) or that it should be included in the master .chm file ( NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated ( +# YES) or a normal table of contents ( NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using prerendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +#MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://www.mathjax.org/mathjax + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +#MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /