Merge pull request #759 from dmlc/brick

Merge Brick into master
2016-01-19 09:24:58 -08:00
parent 0dc68b1aef fb0ced2639
commit ef4dcce737
258 changed files with 13534 additions and 21765 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -25,7 +25,6 @@
 *group
 *rar
 *vali
-*data
 *sdf
 Release
 *exe*
@@ -36,7 +35,6 @@ ipch
 *log
 Debug
 *suo
-*test*
 .Rhistory
 *.dll
 *i386
@@ -51,12 +49,9 @@ Debug
 ./xgboost
 ./xgboost.mpi
 ./xgboost.mock
-rabit
 #.Rbuildignore
 R-package.Rproj
 *.cache*
-R-package/inst
-R-package/src
 #java
 java/xgboost4j/target
 java/xgboost4j/tmp
@@ -65,9 +60,13 @@ java/xgboost4j-demo/data/
 java/xgboost4j-demo/tmp/
 java/xgboost4j-demo/model/
 nb-configuration*
-dmlc-core
 # Eclipse
 .project
 .cproject
 .pydevproject
 .settings/
+build
+config.mk
+xgboost
+*.data
+build_plugin
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "dmlc-core"]
+	path = dmlc-core
+	url = https://github.com/dmlc/dmlc-core
+[submodule "rabit"]
+	path = rabit
+	url = https://github.com/dmlc/rabit
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,5 @@
-sudo: true
+# disable sudo for container build.
+sudo: false

 # Enabling test on Linux and OS X
 os:
@@ -8,51 +9,60 @@ os:
 # Use Build Matrix to do lint and build seperately
 env:
  matrix:
-    - TASK=lint LINT_LANG=cpp
-    - TASK=lint LINT_LANG=python
-    - TASK=R-package CXX=g++
-    - TASK=python-package CXX=g++
-    - TASK=python-package3 CXX=g++
-    - TASK=java-package CXX=g++
-    - TASK=build CXX=g++
-    - TASK=build-with-dmlc CXX=g++
+    # code lint
+    - TASK=lint
+    # r package test
+    - TASK=r_test
+    # python package test
+    - TASK=python_test
+    # java package test
+    - TASK=java_test

 os:
  - linux
  - osx

+matrix:
+  exclude:
+    - os: osx
+      env: TASK=lint
+    - os: linux
+      env: TASK=r_test
+    - os: osx
+      env: TASK=java_test
+
 # dependent apt packages
 addons:
  apt:
    packages:
      - doxygen
-      - libopenmpi-dev
      - wget
      - libcurl4-openssl-dev
      - unzip
-      - python-numpy
-      - python-scipy
+      - graphviz

 before_install:
-  - scripts/travis_osx_install.sh
-  - git clone https://github.com/dmlc/dmlc-core
-  - export TRAVIS=dmlc-core/scripts/travis/
+  - source dmlc-core/scripts/travis/travis_setup_env.sh
  - export PYTHONPATH=${PYTHONPATH}:${PWD}/python-package
-  - source ${TRAVIS}/travis_setup_env.sh

 install:
-  - pip install cpplint pylint --user `whoami`
+  - source tests/travis/setup.sh

+script:
+  - tests/travis/run_test.sh

-script: scripts/travis_script.sh
+cache:
+  directories:
+    - ${HOME}/.cache/usr
+    - ${HOME}/.cache/pip

+before_cache:
+  - dmlc-core/scripts/travis/travis_before_cache.sh

 after_failure:
-  - scripts/travis_after_failure.sh
-
+  - tests/travis/travis_after_failure.sh

 notifications:
  email:
    on_success: change
    on_failure: always
-
--- a/291
+++ b/291
@@ -1,18 +1,60 @@
-export CC  = $(if $(shell which gcc-5 2>/dev/null),gcc-5,gcc)
-export CXX = $(if $(shell which g++-5 2>/dev/null),g++-5,g++)
-
-export MPICXX = mpicxx
-export LDFLAGS= -pthread -lm
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -funroll-loops
-# java include path
-export JAVAINCFLAGS = -I${JAVA_HOME}/include -I./java
-
-ifeq ($(OS), Windows_NT)
-	export CXX = g++ -m64
-	export CC = gcc -m64
+ifndef config
+ifneq ("$(wildcard ./config.mk)","")
+	config = config.mk
+else
+	config = make/config.mk
+endif
 endif

-UNAME= $(shell uname)
+ifndef DMLC_CORE
+	DMLC_CORE = dmlc-core
+endif
+
+ifndef RABIT
+	RABIT = rabit
+endif
+
+ROOTDIR = $(CURDIR)
+
+ifeq ($(OS), Windows_NT)
+	UNAME="Windows"
+else
+	UNAME=$(shell uname)
+endif
+
+include $(config)
+ifeq ($(USE_OPENMP), 0)
+	export NO_OPENMP = 1
+endif
+include $(DMLC_CORE)/make/dmlc.mk
+
+# include the plugins
+include $(XGB_PLUGINS)
+
+# use customized config file
+ifndef CC
+export CC  = $(if $(shell which gcc-5),gcc-5,gcc)
+endif
+ifndef CXX
+export CXX = $(if $(shell which g++-5),g++-5,g++)
+endif
+
+export LDFLAGS= -pthread -lm $(ADD_LDFLAGS) $(DMLC_LDFLAGS) $(PLUGIN_LDFLAGS)
+export CFLAGS=  -std=c++0x -Wall -O3 -msse2  -Wno-unknown-pragmas -funroll-loops -Iinclude $(ADD_CFLAGS) $(PLUGIN_CFLAGS)
+CFLAGS += -I$(DMLC_CORE)/include -I$(RABIT)/include
+#java include path
+export JAVAINCFLAGS = -I${JAVA_HOME}/include -I./java
+
+ifndef LINT_LANG
+	LINT_LANG= "all"
+endif
+
+ifneq ($(UNAME), Windows)
+	CFLAGS += -fPIC
+	XGBOOST_DYLIB = lib/libxgboost.so
+else
+	XGBOOST_DYLIB = lib/libxgboost.dll
+endif

 ifeq ($(UNAME), Linux)
 	LDFLAGS += -lrt
@@ -23,192 +65,115 @@ ifeq ($(UNAME), Darwin)
 	JAVAINCFLAGS += -I${JAVA_HOME}/include/darwin
 endif

-ifeq ($(no_omp),1)
+ifeq ($(USE_OPENMP), 1)
+	CFLAGS += -fopenmp
+else
 	CFLAGS += -DDISABLE_OPENMP
-else
-	#CFLAGS += -fopenmp
-	ifeq ($(omp_mac_static),1)
-		#CFLAGS += -fopenmp -Bstatic
-		CFLAGS += -static-libgcc -static-libstdc++ -L. -fopenmp
-		#LDFLAGS += -Wl,--whole-archive -lpthread -Wl --no-whole-archive
-	else
-		CFLAGS += -fopenmp
-	endif
 endif


-# by default use c++11
-ifeq ($(cxx11),1)
-	CFLAGS += -std=c++11
-endif
-
-# handling dmlc
-ifdef dmlc
-	ifndef config
-		ifneq ("$(wildcard $(dmlc)/config.mk)","")
-			config = $(dmlc)/config.mk
-		else
-			config = $(dmlc)/make/config.mk
-		endif
-	endif
-	include $(config)
-	include $(dmlc)/make/dmlc.mk
-	LDFLAGS+= $(DMLC_LDFLAGS)
-	LIBDMLC=$(dmlc)/libdmlc.a
-else
-	LIBDMLC=dmlc_simple.o
-endif
-
-ifndef WITH_FPIC
-	WITH_FPIC = 1
-endif
-ifeq ($(WITH_FPIC), 1)
-	CFLAGS += -fPIC
-endif
-
-
-ifeq ($(OS), Windows_NT)
-	LIBRABIT = subtree/rabit/lib/librabit_empty.a
-	SLIB = wrapper/xgboost_wrapper.dll
-else
-	LIBRABIT = subtree/rabit/lib/librabit.a
-	SLIB = wrapper/libxgboostwrapper.so
-endif
-
-# java lib
-JLIB = java/libxgboost4j.so
-
 # specify tensor path
-BIN = xgboost
-MOCKBIN = xgboost.mock
-OBJ = updater.o gbm.o io.o main.o dmlc_simple.o
-MPIBIN =
-ifeq ($(WITH_FPIC), 1)
-	TARGET = $(BIN) $(OBJ) $(SLIB)
-else
-	TARGET = $(BIN)
-endif
+.PHONY: clean all lint clean_all doxygen rcpplint Rpack Rbuild Rcheck java

-ifndef LINT_LANG
-	LINT_LANG= "all"
-endif

-.PHONY: clean all mpi python Rpack lint
+all: lib/libxgboost.a $(XGBOOST_DYLIB) xgboost

-all: $(TARGET)
-mpi: $(MPIBIN)
+$(DMLC_CORE)/libdmlc.a:
+	+ cd $(DMLC_CORE); make libdmlc.a config=$(ROOTDIR)/$(config); cd $(ROOTDIR)

-python: wrapper/libxgboostwrapper.so
-# now the wrapper takes in two files. io and wrapper part
-updater.o: src/tree/updater.cpp  src/tree/*.hpp src/*.h src/tree/*.h src/utils/*.h
-dmlc_simple.o: src/io/dmlc_simple.cpp src/utils/*.h
-gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h
-io.o: src/io/io.cpp src/io/*.hpp src/utils/*.h src/learner/dmatrix.h src/*.h
-main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h
-xgboost:  updater.o gbm.o io.o main.o $(LIBRABIT) $(LIBDMLC)
-wrapper/xgboost_wrapper.dll wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h  updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC)
+$(RABIT)/lib/$(LIB_RABIT):
+	+ cd $(RABIT); make lib/$(LIB_RABIT); cd $(ROOTDIR)

 java: java/libxgboost4j.so
-java/libxgboost4j.so: java/xgboost4j_wrapper.cpp wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h  updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC)

-# dependency on rabit
-subtree/rabit/lib/librabit.a: subtree/rabit/src/engine.cc
-	+	cd subtree/rabit;make lib/librabit.a; cd ../..
-subtree/rabit/lib/librabit_empty.a: subtree/rabit/src/engine_empty.cc
-	+	cd subtree/rabit;make lib/librabit_empty.a; cd ../..
-subtree/rabit/lib/librabit_mock.a: subtree/rabit/src/engine_mock.cc
-	+	cd subtree/rabit;make lib/librabit_mock.a; cd ../..
-subtree/rabit/lib/librabit_mpi.a: subtree/rabit/src/engine_mpi.cc
-	+	cd subtree/rabit;make lib/librabit_mpi.a; cd ../..
+SRC = $(wildcard src/*.cc src/*/*.cc)
+ALL_OBJ = $(patsubst src/%.cc, build/%.o, $(SRC)) $(PLUGIN_OBJS)
+AMALGA_OBJ = amalgamation/xgboost-all0.o
+LIB_DEP = $(DMLC_CORE)/libdmlc.a $(RABIT)/lib/$(LIB_RABIT)
+ALL_DEP = $(filter-out build/cli_main.o, $(ALL_OBJ)) $(LIB_DEP)
+CLI_OBJ = build/cli_main.o

-$(BIN) :
-	$(CXX) $(CFLAGS) -fPIC -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
+build/%.o: src/%.cc
+	@mkdir -p $(@D)
+	$(CXX) $(CFLAGS) -MM -MT build/$*.o $< >build/$*.d
+	$(CXX) -c $(CFLAGS) -c $< -o $@

-$(MOCKBIN) :
-	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
+build_plugin/%.o: plugin/%.cc
+	@mkdir -p $(@D)
+	$(CXX) $(CFLAGS) -MM -MT build_plugin/$*.o $< >build_plugin/$*.d
+	$(CXX) -c $(CFLAGS) -c $< -o $@

-$(SLIB) :
-	$(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(DLLFLAGS)
+# The should be equivalent to $(ALL_OBJ)  except for build/cli_main.o
+amalgamation/xgboost-all0.o: amalgamation/xgboost-all0.cc
+	$(CXX) -c $(CFLAGS) -c $< -o $@

-$(JLIB) :
-	$(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS)  $(JAVAINCFLAGS)
+# Equivalent to lib/libxgboost_all.so
+lib/libxgboost_all.so: $(AMALGA_OBJ) $(LIB_DEP)
+	@mkdir -p $(@D)
+	$(CXX) $(CFLAGS) -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS)

-$(OBJ) :
-	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
+lib/libxgboost.a: $(ALL_DEP)
+	@mkdir -p $(@D)
+	ar crv $@ $(filter %.o, $?)

-$(MPIOBJ) :
-	$(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
+lib/libxgboost.dll lib/libxgboost.so: $(ALL_DEP)
+	@mkdir -p $(@D)
+	$(CXX) $(CFLAGS) -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS)

-$(MPIBIN) :
-	$(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
+java/libxgboost4j.so: java/xgboost4j_wrapper.cpp $(ALL_DEP)
+	$(CXX) $(CFLAGS) $(JAVAINCFLAGS) -shared -o $@ $(filter %.cpp %.o %.a, $^) $(LDFLAGS)

-install:
-	cp -f -r $(BIN)  $(INSTALL_PATH)
+xgboost: $(CLI_OBJ) $(ALL_DEP)
+	$(CXX) $(CFLAGS) -o $@  $(filter %.o %.a, $^)  $(LDFLAGS)

+rcpplint:
+	python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src
+
+lint: rcpplint
+	python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} include src plugin
+
+clean:
+	$(RM) -rf build build_plugin lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o xgboost
+
+clean_all: clean
+	cd $(DMLC_CORE); make clean; cd $(ROODIR)
+	cd $(RABIT); make clean; cd $(ROODIR)
+
+doxygen:
+	doxygen doc/Doxyfile
+
+# Script to make a clean installable R package.
 Rpack:
-	make clean
-	cd subtree/rabit;make clean;cd ..
+	make clean_all
 	rm -rf xgboost xgboost*.tar.gz
 	cp -r R-package xgboost
 	rm -rf xgboost/src/*.o xgboost/src/*.so xgboost/src/*.dll
 	rm -rf xgboost/src/*/*.o
-	rm -rf subtree/rabit/src/*.o
 	rm -rf xgboost/demo/*.model xgboost/demo/*.buffer xgboost/demo/*.txt
 	rm -rf xgboost/demo/runall.R
 	cp -r src xgboost/src/src
-	mkdir xgboost/src/subtree
-	mkdir xgboost/src/subtree/rabit
-	cp -r subtree/rabit/include xgboost/src/subtree/rabit/include
-	cp -r subtree/rabit/src xgboost/src/subtree/rabit/src
-	rm -rf xgboost/src/subtree/rabit/src/*.o
-	mkdir xgboost/src/wrapper
-	cp  wrapper/xgboost_wrapper.h xgboost/src/wrapper
-	cp  wrapper/xgboost_wrapper.cpp xgboost/src/wrapper
+	cp -r include xgboost/src/include
+	cp -r amalgamation xgboost/src/amalgamation
+	mkdir -p xgboost/src/rabit
+	cp -r rabit/include xgboost/src/rabit/include
+	cp -r rabit/src xgboost/src/rabit/src
+	rm -rf xgboost/src/rabit/src/*.o
+	mkdir -p xgboost/src/dmlc-core
+	cp -r dmlc-core/include xgboost/src/dmlc-core/include
+	cp -r dmlc-core/src xgboost/src/dmlc-core/src
 	cp ./LICENSE xgboost
-	cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
+	cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' | sed '3s/.*/ENABLE_STD_THREAD=0/' > xgboost/src/Makevars
 	cp xgboost/src/Makevars xgboost/src/Makevars.win
-	# R CMD build --no-build-vignettes xgboost
-	# R CMD build xgboost
-	# rm -rf xgboost
-	# R CMD check --as-cran xgboost*.tar.gz

 Rbuild:
 	make Rpack
-	R CMD build xgboost
+	R CMD build --no-build-vignettes xgboost
 	rm -rf xgboost

 Rcheck:
 	make Rbuild
-	R CMD check --as-cran xgboost*.tar.gz
+	R CMD check  xgboost*.tar.gz

-pythonpack:
-	#for pip maintainer only
-	cd subtree/rabit;make clean;cd ..
-	rm -rf xgboost-deploy xgboost*.tar.gz
-	cp -r python-package xgboost-deploy
-	#cp *.md xgboost-deploy/
-	cp LICENSE xgboost-deploy/
-	cp Makefile xgboost-deploy/xgboost
-	cp -r wrapper xgboost-deploy/xgboost
-	cp -r subtree xgboost-deploy/xgboost
-	cp -r multi-node xgboost-deploy/xgboost
-	cp -r windows xgboost-deploy/xgboost
-	cp -r src xgboost-deploy/xgboost
-	cp python-package/setup_pip.py xgboost-deploy/setup.py
-	#make python
-
-pythonbuild:
-	make pythonpack
-	python setup.py install
-
-pythoncheck:
-	make pythonbuild
-	python -c 'import xgboost;print xgboost.core.find_lib_path()'
-
-# lint requires dmlc to be in current folder
-lint:
-	dmlc-core/scripts/lint.py xgboost $(LINT_LANG) src wrapper R-package python-package
-
-clean:
-	$(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o  */*.o */*/*.o *~ */*~ */*/*~
-	cd subtree/rabit; make clean; cd ..
+-include build/*.d
+-include build/*/*.d
+-include build_plugin/*/*.d
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,42 +1,30 @@
-Change Log
-==========
+XGBoost Change Log
+==================

-xgboost-0.1
-----------
-* Initial release
+This file records the chanegs in xgboost library in reverse chronological order.

-xgboost-0.2x
------------
-* Python module
-* Weighted samples instances
-* Initial version of pairwise rank
+## brick: next release candidate
+* Major refactor of core library.
+  - Goal: more flexible and modular code as a portable library.
+  - Switch to use of c++11 standard code.
+  - Random number generator defaults to ```std::mt19937```.
+  - Share the data loading pipeline and logging module from dmlc-core.
+  - Enable registry pattern to allow optionally plugin of objective, metric, tree constructor, data loader.
+    - Future plugin modules can be put into xgboost/plugin and register back to the library.
+  - Remove most of the raw pointers to smart ptrs, for RAII safety.
+* Change library name to libxgboost.so
+* Backward compatiblity
+  - The binary buffer file is not backward compatible with previous version.
+  - The model file is backward compatible on 64 bit platforms.
+* The model file is compatible between 64/32 bit platforms(not yet tested).
+* External memory version and other advanced features will be exposed to R library as well on linux.
+  - Previously some of the features are blocked due to C++11 and threading limits.
+  - The windows version is still blocked due to Rtools do not support ```std::thread```.
+* rabit and dmlc-core are maintained through git submodule
+  - Anyone can open PR to update these dependencies now.

-xgboost-0.3
-----------
-* Faster tree construction module
-  - Allows subsample columns during tree construction via ```bst:col_samplebytree=ratio```
-* Support for boosting from initial predictions
-* Experimental version of LambdaRank
-* Linear booster is now parallelized, using parallel coordinated descent.
-* Add [Code Guide](src/README.md) for customizing objective function and evaluation
-* Add R module
+## v0.47 (2016.01.14)

-xgboost-0.4
-----------
-* Distributed version of xgboost that runs on YARN, scales to billions of examples
-* Direct save/load data and model from/to S3 and HDFS
-* Feature importance visualization in R module, by Michael Benesty
-* Predict leaf index
-* Poisson regression for counts data
-* Early stopping option in training
-* Native save load support in R and python
-  - xgboost models now can be saved using save/load in R
-  - xgboost python model is now pickable
-* sklearn wrapper is supported in python module
-* Experimental External memory version
-
-xgboost-0.47
------------
 * Changes in R library
  - fixed possible problem of poisson regression.
  - switched from 0 to NA for missing values.
@@ -52,10 +40,44 @@ xgboost-0.47
  - improved compatibility in sklearn module.
  - additional parameters added for sklearn wrapper.
  - added pip installation functionality.
-  - supports more Pandas DataFrame dtypes. 
+  - supports more Pandas DataFrame dtypes.
  - added best_ntree_limit attribute, in addition to best_score and best_iteration.
 * Java api is ready for use
 * Added more test cases and continuous integration to make each build more robust.

-on going at master
------------------
+## v0.4 (2015.05.11)
+
+* Distributed version of xgboost that runs on YARN, scales to billions of examples
+* Direct save/load data and model from/to S3 and HDFS
+* Feature importance visualization in R module, by Michael Benesty
+* Predict leaf index
+* Poisson regression for counts data
+* Early stopping option in training
+* Native save load support in R and python
+  - xgboost models now can be saved using save/load in R
+  - xgboost python model is now pickable
+* sklearn wrapper is supported in python module
+* Experimental External memory version
+
+
+## v0.3 (2014.09.07)
+
+* Faster tree construction module
+  - Allows subsample columns during tree construction via ```bst:col_samplebytree=ratio```
+* Support for boosting from initial predictions
+* Experimental version of LambdaRank
+* Linear booster is now parallelized, using parallel coordinated descent.
+* Add [Code Guide](src/README.md) for customizing objective function and evaluation
+* Add R module
+
+
+## v0.2x (2014.05.20)
+
+* Python module
+* Weighted samples instances
+* Initial version of pairwise rank
+
+
+## v0.1 (2014.03.26)
+
+* Initial release
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -3,6 +3,12 @@ R package for xgboost

 [![CRAN Status Badge](http://www.r-pkg.org/badges/version/xgboost)](http://cran.r-project.org/web/packages/xgboost)
 [![CRAN Downloads](http://cranlogs.r-pkg.org/badges/xgboost)](http://cran.rstudio.com/web/packages/xgboost/index.html)
+[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](http://xgboost.readthedocs.org/en/latest/R-package/index.html)
+
+Resources
+---------
+* [XGBoost R Package Online Documentation](http://xgboost.readthedocs.org/en/latest/R-package/index.html)
+  - Check this out for detailed documents, examples and tutorials.

 Installation
 ------------
@@ -16,7 +22,7 @@ install.packages('xgboost')
 For up-to-date version, please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.

 ```r
-devtools::install_github('dmlc/xgboost',subdir='R-package')
+devtools::install_git('git://github.com/dmlc/xgboost',subdir='R-package')
 ```

 Examples
@@ -24,21 +30,3 @@ Examples

 * Please visit [walk through example](demo).
 * See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.R) on this dataset and the one related to [Otto challenge](../demo/kaggle-otto), including a [RMarkdown documentation](../demo/kaggle-otto/understandingXGBoostModel.Rmd).
-
-Notes
-----
-
-If you face an issue installing the package using  ```devtools::install_github```, something like this (even after updating libxml and RCurl as lot of forums say) -
-
-```
-devtools::install_github('dmlc/xgboost',subdir='R-package')
-Downloading github repo dmlc/xgboost@master
-Error in function (type, msg, asError = TRUE)  :
-  Peer certificate cannot be authenticated with given CA certificates
-```
-To get around this you can build the package locally as mentioned [here](https://github.com/dmlc/xgboost/issues/347) -
-```
-1. Clone the current repository and set your workspace to xgboost/R-package/
-2. Run R CMD INSTALL --build . in terminal to get the tarball.
-3. Run install.packages('path_to_the_tarball',repo=NULL) in R to install.
-```
--- a/R-package/src/Makevars
+++ b/R-package/src/Makevars
@@ -1,8 +1,17 @@
 # package root
 PKGROOT=../../
+ENABLE_STD_THREAD=1
 # _*_ mode: Makefile; _*_
-PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ -I$(PKGROOT)
+
+CXX_STD = CXX11
+
+XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\
+           -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\
+           -DDMLC_LOG_CUSTOMIZE=1 -DXGBOOST_CUSTOMIZE_LOGGER=1\
+           -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_
+
+PKG_CPPFLAGS=  -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include -I$(PKGROOT)/rabit/include -I$(PKGROOT) $(XGB_RFLAGS)
 PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
 PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
-OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o $(PKGROOT)/subtree/rabit/src/engine_empty.o $(PKGROOT)/src/io/dmlc_simple.o
-
+OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o\
+         $(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o $(PKGROOT)/rabit/src/engine_empty.o
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -1,5 +1,6 @@
 # package root
 PKGROOT=./
+ENABLE_STD_THREAD=0
 # _*_ mode: Makefile; _*_

 # This file is only used for windows compilation from github
@@ -9,11 +10,22 @@ all: $(SHLIB)
 $(SHLIB): xgblib
 xgblib:
 	cp -r ../../src .
-	cp -r ../../wrapper .
-	cp -r ../../subtree .
+	cp -r ../../rabit .
+	cp -r ../../dmlc-core .
+	cp -r ../../include .
+	cp -r ../../amalgamation .

-PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ -I$(PKGROOT) -I../..
+CXX_STD = CXX11
+
+XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\
+           -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\
+           -DDMLC_LOG_CUSTOMIZE=1 -DXGBOOST_CUSTOMIZE_LOGGER=1\
+           -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_
+
+PKG_CPPFLAGS=  -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include -I$(PKGROOT)/rabit/include -I$(PKGROOT) $(XGB_RFLAGS)
 PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
 PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
-OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o $(PKGROOT)/subtree/rabit/src/engine_empty.o $(PKGROOT)/src/io/dmlc_simple.o
+OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o\
+         $(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o $(PKGROOT)/rabit/src/engine_empty.o
+
 $(OBJECTS) : xgblib
--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@@ -0,0 +1,354 @@
+// Copyright (c) 2014 by Contributors
+#include <dmlc/logging.h>
+#include <dmlc/omp.h>
+#include <xgboost/c_api.h>
+#include <vector>
+#include <string>
+#include <utility>
+#include <cstring>
+#include <cstdio>
+#include <sstream>
+#include "./xgboost_R.h"
+
+/*!
+ * \brief macro to annotate begin of api
+ */
+#define R_API_BEGIN()                           \
+  GetRNGstate();                                \
+  try {
+/*!
+ * \brief macro to annotate end of api
+ */
+#define R_API_END()                             \
+  } catch(dmlc::Error& e) {                     \
+    PutRNGstate();                              \
+    error(e.what());                            \
+  }                                             \
+  PutRNGstate();
+
+/*!
+ * \brief macro to check the call.
+ */
+#define CHECK_CALL(x)                           \
+  if ((x) != 0) {                               \
+    error(XGBGetLastError());                   \
+  }
+
+
+using namespace dmlc;
+
+SEXP XGCheckNullPtr_R(SEXP handle) {
+  return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
+}
+
+void _DMatrixFinalizer(SEXP ext) {
+  R_API_BEGIN();
+  if (R_ExternalPtrAddr(ext) == NULL) return;
+  CHECK_CALL(XGDMatrixFree(R_ExternalPtrAddr(ext)));
+  R_ClearExternalPtr(ext);
+  R_API_END();
+}
+
+SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
+  SEXP ret;
+  R_API_BEGIN();
+  DMatrixHandle handle;
+  CHECK_CALL(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
+  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
+  UNPROTECT(1);
+  R_API_END();
+  return ret;
+}
+
+SEXP XGDMatrixCreateFromMat_R(SEXP mat,
+                              SEXP missing) {
+  SEXP ret;
+  R_API_BEGIN();
+  SEXP dim = getAttrib(mat, R_DimSymbol);
+  size_t nrow = static_cast<size_t>(INTEGER(dim)[0]);
+  size_t ncol = static_cast<size_t>(INTEGER(dim)[1]);
+  double *din = REAL(mat);
+  std::vector<float> data(nrow * ncol);
+  #pragma omp parallel for schedule(static)
+  for (omp_ulong i = 0; i < nrow; ++i) {
+    for (size_t j = 0; j < ncol; ++j) {
+      data[i * ncol +j] = din[i + nrow * j];
+    }
+  }
+  DMatrixHandle handle;
+  CHECK_CALL(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle));
+  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
+  UNPROTECT(1);
+  R_API_END();
+  return ret;
+}
+
+SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
+                              SEXP indices,
+                              SEXP data) {
+  SEXP ret;
+  R_API_BEGIN();
+  const int *p_indptr = INTEGER(indptr);
+  const int *p_indices = INTEGER(indices);
+  const double *p_data = REAL(data);
+  int nindptr = length(indptr);
+  int ndata = length(data);
+  std::vector<bst_ulong> col_ptr_(nindptr);
+  std::vector<unsigned> indices_(ndata);
+  std::vector<float> data_(ndata);
+
+  for (int i = 0; i < nindptr; ++i) {
+    col_ptr_[i] = static_cast<bst_ulong>(p_indptr[i]);
+  }
+  #pragma omp parallel for schedule(static)
+  for (int i = 0; i < ndata; ++i) {
+    indices_[i] = static_cast<unsigned>(p_indices[i]);
+    data_[i] = static_cast<float>(p_data[i]);
+  }
+  DMatrixHandle handle;
+  CHECK_CALL(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
+                                    BeginPtr(data_), nindptr, ndata,
+                                    &handle));
+  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
+  UNPROTECT(1);
+  R_API_END();
+  return ret;
+}
+
+SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
+  SEXP ret;
+  R_API_BEGIN();
+  int len = length(idxset);
+  std::vector<int> idxvec(len);
+  for (int i = 0; i < len; ++i) {
+    idxvec[i] = INTEGER(idxset)[i] - 1;
+  }
+  DMatrixHandle res;
+  CHECK_CALL(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle),
+                                   BeginPtr(idxvec), len,
+                                   &res));
+  ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
+  R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
+  UNPROTECT(1);
+  R_API_END();
+  return ret;
+}
+
+void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
+  R_API_BEGIN();
+  CHECK_CALL(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
+                                 CHAR(asChar(fname)),
+                                 asInteger(silent)));
+  R_API_END();
+}
+
+void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
+  R_API_BEGIN();
+  int len = length(array);
+  const char *name = CHAR(asChar(field));
+  if (!strcmp("group", name)) {
+    std::vector<unsigned> vec(len);
+    #pragma omp parallel for schedule(static)
+    for (int i = 0; i < len; ++i) {
+      vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
+    }
+    CHECK_CALL(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len));
+  } else {
+    std::vector<float> vec(len);
+    #pragma omp parallel for schedule(static)
+    for (int i = 0; i < len; ++i) {
+      vec[i] = REAL(array)[i];
+    }
+    CHECK_CALL(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
+                                   CHAR(asChar(field)),
+                                   BeginPtr(vec), len));
+  }
+  R_API_END();
+}
+
+SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
+  SEXP ret;
+  R_API_BEGIN();
+  bst_ulong olen;
+  const float *res;
+  CHECK_CALL(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
+                                   CHAR(asChar(field)),
+                                 &olen,
+                                 &res));
+  ret = PROTECT(allocVector(REALSXP, olen));
+  for (size_t i = 0; i < olen; ++i) {
+    REAL(ret)[i] = res[i];
+  }
+  UNPROTECT(1);
+  R_API_END();
+  return ret;
+}
+
+SEXP XGDMatrixNumRow_R(SEXP handle) {
+  bst_ulong nrow;
+  R_API_BEGIN();
+  CHECK_CALL(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));
+  R_API_END();
+  return ScalarInteger(static_cast<int>(nrow));
+}
+
+// functions related to booster
+void _BoosterFinalizer(SEXP ext) {
+  if (R_ExternalPtrAddr(ext) == NULL) return;
+  CHECK_CALL(XGBoosterFree(R_ExternalPtrAddr(ext)));
+  R_ClearExternalPtr(ext);
+}
+
+SEXP XGBoosterCreate_R(SEXP dmats) {
+  SEXP ret;
+  R_API_BEGIN();
+  int len = length(dmats);
+  std::vector<void*> dvec;
+  for (int i = 0; i < len; ++i) {
+    dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
+  }
+  BoosterHandle handle;
+  CHECK_CALL(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle));
+  ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
+  R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
+  UNPROTECT(1);
+  R_API_END();
+  return ret;
+}
+
+void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
+  R_API_BEGIN();
+  CHECK_CALL(XGBoosterSetParam(R_ExternalPtrAddr(handle),
+                             CHAR(asChar(name)),
+                             CHAR(asChar(val))));
+  R_API_END();
+}
+
+void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
+  R_API_BEGIN();
+  CHECK_CALL(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
+                                  asInteger(iter),
+                                  R_ExternalPtrAddr(dtrain)));
+  R_API_END();
+}
+
+void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
+  R_API_BEGIN();
+  CHECK_EQ(length(grad), length(hess))
+      << "gradient and hess must have same length";
+  int len = length(grad);
+  std::vector<float> tgrad(len), thess(len);
+  #pragma omp parallel for schedule(static)
+  for (int j = 0; j < len; ++j) {
+    tgrad[j] = REAL(grad)[j];
+    thess[j] = REAL(hess)[j];
+  }
+  CHECK_CALL(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
+                                 R_ExternalPtrAddr(dtrain),
+                                 BeginPtr(tgrad), BeginPtr(thess),
+                                 len));
+  R_API_END();
+}
+
+SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
+  const char *ret;
+  R_API_BEGIN();
+  CHECK_EQ(length(dmats), length(evnames))
+      << "dmats and evnams must have same length";
+  int len = length(dmats);
+  std::vector<void*> vec_dmats;
+  std::vector<std::string> vec_names;
+  std::vector<const char*> vec_sptr;
+  for (int i = 0; i < len; ++i) {
+    vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
+    vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
+  }
+  for (int i = 0; i < len; ++i) {
+    vec_sptr.push_back(vec_names[i].c_str());
+  }
+  CHECK_CALL(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
+                                asInteger(iter),
+                                BeginPtr(vec_dmats),
+                                BeginPtr(vec_sptr),
+                                len, &ret));
+  R_API_END();
+  return mkString(ret);
+}
+
+SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
+  SEXP ret;
+  R_API_BEGIN();
+  bst_ulong olen;
+  const float *res;
+  CHECK_CALL(XGBoosterPredict(R_ExternalPtrAddr(handle),
+                            R_ExternalPtrAddr(dmat),
+                            asInteger(option_mask),
+                            asInteger(ntree_limit),
+                            &olen, &res));
+  ret = PROTECT(allocVector(REALSXP, olen));
+  for (size_t i = 0; i < olen; ++i) {
+    REAL(ret)[i] = res[i];
+  }
+  UNPROTECT(1);
+  R_API_END();
+  return ret;
+}
+
+void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
+  R_API_BEGIN();
+  CHECK_CALL(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
+  R_API_END();
+}
+
+void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
+  R_API_BEGIN();
+  CHECK_CALL(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
+  R_API_END();
+}
+
+void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
+  R_API_BEGIN();
+  CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle),
+                                          RAW(raw),
+                                          length(raw)));
+  R_API_END();
+}
+
+SEXP XGBoosterModelToRaw_R(SEXP handle) {
+  SEXP ret;
+  R_API_BEGIN();
+  bst_ulong olen;
+  const char *raw;
+  CHECK_CALL(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw));
+  ret = PROTECT(allocVector(RAWSXP, olen));
+  if (olen != 0) {
+    memcpy(RAW(ret), raw, olen);
+  }
+  UNPROTECT(1);
+  R_API_END();
+  return ret;
+}
+
+SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) {
+  SEXP out;
+  R_API_BEGIN();
+  bst_ulong olen;
+  const char **res;
+  CHECK_CALL(XGBoosterDumpModel(R_ExternalPtrAddr(handle),
+                                CHAR(asChar(fmap)),
+                                asInteger(with_stats),
+                                &olen, &res));
+  out = PROTECT(allocVector(STRSXP, olen));
+  for (size_t i = 0; i < olen; ++i) {
+    std::stringstream stream;
+    stream <<  "booster[" << i <<"]\n" << res[i];
+    SET_STRING_ELT(out, i, mkChar(stream.str().c_str()));
+  }
+  UNPROTECT(1);
+  R_API_END();
+  return out;
+}
+
--- a/R-package/src/xgboost_R.cpp
+++ b/R-package/src/xgboost_R.cpp
@@ -1,344 +0,0 @@
-// Copyright (c) 2014 by Contributors
-#include <vector>
-#include <string>
-#include <utility>
-#include <cstring>
-#include <cstdio>
-#include <sstream>
-#include "wrapper/xgboost_wrapper.h"
-#include "src/utils/utils.h"
-#include "src/utils/omp.h"
-#include "xgboost_R.h"
-
-using namespace std;
-using namespace xgboost;
-
-extern "C" {
-  void XGBoostAssert_R(int exp, const char *fmt, ...);
-  void XGBoostCheck_R(int exp, const char *fmt, ...);
-  int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...);
-}
-
-// implements error handling
-namespace xgboost {
-namespace utils {
-extern "C" {
-  void (*Printf)(const char *fmt, ...) = Rprintf;
-  int (*SPrintf)(char *buf, size_t size, const char *fmt, ...) = XGBoostSPrintf_R;
-  void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R;
-  void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R;
-  void (*Error)(const char *fmt, ...) = error;
-}
-bool CheckNAN(double v) {
-  return ISNAN(v);
-}
-double LogGamma(double v) {
-  return lgammafn(v);
-}
-}  // namespace utils
-
-namespace random {
-void Seed(unsigned seed) {
-  //  warning("parameter seed is ignored, please set random seed using set.seed");
-}
-double Uniform(void) {
-  return unif_rand();
-}
-double Normal(void) {
-  return norm_rand();
-}
-}  // namespace random
-}  // namespace xgboost
-
-// call before wrapper starts
-inline void _WrapperBegin(void) {
-  GetRNGstate();
-}
-// call after wrapper starts
-inline void _WrapperEnd(void) {
-  PutRNGstate();
-}
-
-// do nothing, check error
-inline void CheckErr(int ret) {
-}
-
-extern "C" {
-  SEXP XGCheckNullPtr_R(SEXP handle) {
-    return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
-  }
-  void _DMatrixFinalizer(SEXP ext) {
-    if (R_ExternalPtrAddr(ext) == NULL) return;
-    XGDMatrixFree(R_ExternalPtrAddr(ext));
-    R_ClearExternalPtr(ext);
-  }
-  SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
-    _WrapperBegin();
-    DMatrixHandle handle;
-    CheckErr(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
-    _WrapperEnd();
-    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
-    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
-    UNPROTECT(1);
-    return ret;
-  }
-  SEXP XGDMatrixCreateFromMat_R(SEXP mat,
-                                SEXP missing) {
-    _WrapperBegin();
-    SEXP dim = getAttrib(mat, R_DimSymbol);
-    size_t nrow = static_cast<size_t>(INTEGER(dim)[0]);
-    size_t ncol = static_cast<size_t>(INTEGER(dim)[1]);
-    double *din = REAL(mat);
-    std::vector<float> data(nrow * ncol);
-    #pragma omp parallel for schedule(static)
-    for (bst_omp_uint i = 0; i < nrow; ++i) {
-      for (size_t j = 0; j < ncol; ++j) {
-        data[i * ncol +j] = din[i + nrow * j];
-      }
-    }
-    DMatrixHandle handle;
-    CheckErr(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle));
-    _WrapperEnd();
-    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
-    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
-    UNPROTECT(1);
-    return ret;
-  }
-  SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
-                                SEXP indices,
-                                SEXP data) {
-    _WrapperBegin();
-    const int *p_indptr = INTEGER(indptr);
-    const int *p_indices = INTEGER(indices);
-    const double *p_data = REAL(data);
-    int nindptr = length(indptr);
-    int ndata = length(data);
-    std::vector<bst_ulong> col_ptr_(nindptr);
-    std::vector<unsigned> indices_(ndata);
-    std::vector<float> data_(ndata);
-
-    for (int i = 0; i < nindptr; ++i) {
-      col_ptr_[i] = static_cast<bst_ulong>(p_indptr[i]);
-    }
-    #pragma omp parallel for schedule(static)
-    for (int i = 0; i < ndata; ++i) {
-      indices_[i] = static_cast<unsigned>(p_indices[i]);
-      data_[i] = static_cast<float>(p_data[i]);
-    }
-    DMatrixHandle handle;
-    CheckErr(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
-                                    BeginPtr(data_), nindptr, ndata,
-                                    &handle));
-    _WrapperEnd();
-    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
-    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
-    UNPROTECT(1);
-    return ret;
-  }
-  SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
-    _WrapperBegin();
-    int len = length(idxset);
-    std::vector<int> idxvec(len);
-    for (int i = 0; i < len; ++i) {
-      idxvec[i] = INTEGER(idxset)[i] - 1;
-    }
-    DMatrixHandle res;
-    CheckErr(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle),
-                                   BeginPtr(idxvec), len,
-                                   &res));
-    _WrapperEnd();
-    SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
-    R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
-    UNPROTECT(1);
-    return ret;
-  }
-  void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
-    _WrapperBegin();
-    CheckErr(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
-                                 CHAR(asChar(fname)), asInteger(silent)));
-    _WrapperEnd();
-  }
-  void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
-    _WrapperBegin();
-    int len = length(array);
-    const char *name = CHAR(asChar(field));
-    if (!strcmp("group", name)) {
-      std::vector<unsigned> vec(len);
-      #pragma omp parallel for schedule(static)
-      for (int i = 0; i < len; ++i) {
-        vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
-      }
-      CheckErr(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len));
-    } else {
-      std::vector<float> vec(len);
-      #pragma omp parallel for schedule(static)
-      for (int i = 0; i < len; ++i) {
-        vec[i] = REAL(array)[i];
-      }
-      CheckErr(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
-                                     CHAR(asChar(field)),
-                                     BeginPtr(vec), len));
-    }
-    _WrapperEnd();
-  }
-  SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
-    _WrapperBegin();
-    bst_ulong olen;
-    const float *res;
-    CheckErr(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
-                                   CHAR(asChar(field)),
-                                   &olen,
-                                   &res));
-    _WrapperEnd();
-    SEXP ret = PROTECT(allocVector(REALSXP, olen));
-    for (size_t i = 0; i < olen; ++i) {
-      REAL(ret)[i] = res[i];
-    }
-    UNPROTECT(1);
-    return ret;
-  }
-  SEXP XGDMatrixNumRow_R(SEXP handle) {
-    bst_ulong nrow;
-    CheckErr(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));
-    return ScalarInteger(static_cast<int>(nrow));
-  }
-  // functions related to booster
-  void _BoosterFinalizer(SEXP ext) {
-    if (R_ExternalPtrAddr(ext) == NULL) return;
-    CheckErr(XGBoosterFree(R_ExternalPtrAddr(ext)));
-    R_ClearExternalPtr(ext);
-  }
-  SEXP XGBoosterCreate_R(SEXP dmats) {
-    _WrapperBegin();
-    int len = length(dmats);
-    std::vector<void*> dvec;
-    for (int i = 0; i < len; ++i) {
-      dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
-    }
-    BoosterHandle handle;
-    CheckErr(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle));
-    _WrapperEnd();
-    SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
-    R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
-    UNPROTECT(1);
-    return ret;
-  }
-  void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
-    _WrapperBegin();
-    CheckErr(XGBoosterSetParam(R_ExternalPtrAddr(handle),
-                               CHAR(asChar(name)),
-                               CHAR(asChar(val))));
-    _WrapperEnd();
-  }
-  void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
-    _WrapperBegin();
-    CheckErr(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
-                                    asInteger(iter),
-                                    R_ExternalPtrAddr(dtrain)));
-    _WrapperEnd();
-  }
-  void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
-    _WrapperBegin();
-    utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
-    int len = length(grad);
-    std::vector<float> tgrad(len), thess(len);
-    #pragma omp parallel for schedule(static)
-    for (int j = 0; j < len; ++j) {
-      tgrad[j] = REAL(grad)[j];
-      thess[j] = REAL(hess)[j];
-    }
-    CheckErr(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
-                                   R_ExternalPtrAddr(dtrain),
-                                   BeginPtr(tgrad), BeginPtr(thess),
-                                   len));
-    _WrapperEnd();
-  }
-  SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
-    _WrapperBegin();
-    utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
-    int len = length(dmats);
-    std::vector<void*> vec_dmats;
-    std::vector<std::string> vec_names;
-    std::vector<const char*> vec_sptr;
-    for (int i = 0; i < len; ++i) {
-      vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
-      vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
-    }
-    for (int i = 0; i < len; ++i) {
-      vec_sptr.push_back(vec_names[i].c_str());
-    }
-    const char *ret;
-    CheckErr(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
-                                  asInteger(iter),
-                                  BeginPtr(vec_dmats),
-                                  BeginPtr(vec_sptr),
-                                  len, &ret));
-    _WrapperEnd();
-    return mkString(ret);
-  }
-  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
-    _WrapperBegin();
-    bst_ulong olen;
-    const float *res;
-    CheckErr(XGBoosterPredict(R_ExternalPtrAddr(handle),
-                              R_ExternalPtrAddr(dmat),
-                              asInteger(option_mask),
-                              asInteger(ntree_limit),
-                              &olen, &res));
-    _WrapperEnd();
-    SEXP ret = PROTECT(allocVector(REALSXP, olen));
-    for (size_t i = 0; i < olen; ++i) {
-      REAL(ret)[i] = res[i];
-    }
-    UNPROTECT(1);
-    return ret;
-  }
-  void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
-    _WrapperBegin();
-    CheckErr(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
-    _WrapperEnd();
-  }
-  void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
-    _WrapperBegin();
-    CheckErr(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
-    _WrapperEnd();
-  }
-  void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
-    _WrapperBegin();
-    XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle),
-                                 RAW(raw),
-                                 length(raw));
-    _WrapperEnd();
-  }
-  SEXP XGBoosterModelToRaw_R(SEXP handle) {
-    bst_ulong olen;
-    _WrapperBegin();
-    const char *raw;
-    CheckErr(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw));
-    _WrapperEnd();
-    SEXP ret = PROTECT(allocVector(RAWSXP, olen));
-    if (olen != 0) {
-      memcpy(RAW(ret), raw, olen);
-    }
-    UNPROTECT(1);
-    return ret;
-  }
-  SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) {
-    _WrapperBegin();
-    bst_ulong olen;
-    const char **res;
-    CheckErr(XGBoosterDumpModel(R_ExternalPtrAddr(handle),
-                                CHAR(asChar(fmap)),
-                                asInteger(with_stats),
-                                &olen, &res));
-    _WrapperEnd();
-    SEXP out = PROTECT(allocVector(STRSXP, olen));
-    for (size_t i = 0; i < olen; ++i) {
-      stringstream stream;
-      stream <<  "booster[" << i <<"]\n" << res[i];
-      SET_STRING_ELT(out, i, mkChar(stream.str().c_str()));
-    }
-    UNPROTECT(1);
-    return out;
-  }
-}
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -4,155 +4,171 @@
 * \author Tianqi Chen
 * \brief R wrapper of xgboost
 */
-#ifndef XGBOOST_WRAPPER_R_H_ // NOLINT(*)
-#define XGBOOST_WRAPPER_R_H_ // NOLINT(*)
+#ifndef XGBOOST_R_H_ // NOLINT(*)
+#define XGBOOST_R_H_ // NOLINT(*)

 extern "C" {
 #include <Rinternals.h>
 #include <R_ext/Random.h>
 #include <Rmath.h>
 }
+#include <xgboost/c_api.h>

-extern "C" {
-  /*!
-   * \brief check whether a handle is NULL
-   * \param handle
-   * \return whether it is null ptr
+/*!
+ * \brief check whether a handle is NULL
+ * \param handle
+ * \return whether it is null ptr
+ */
+XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle);
+
+/*!
+ * \brief load a data matrix
+ * \param fname name of the content
+ * \param silent whether print messages
+ * \return a loaded data matrix
+ */
+XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
+
+/*!
+ * \brief create matrix content from dense matrix
+ * This assumes the matrix is stored in column major format
+ * \param data R Matrix object
+ * \param missing which value to represent missing value
+ * \return created dmatrix
+ */
+XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat,
+                                      SEXP missing);
+/*!
+ * \brief create a matrix content from CSC format
+ * \param indptr pointer to column headers
+ * \param indices row indices
+ * \param data content of the data
+ * \return created dmatrix
+ */
+XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
+                                      SEXP indices,
+                                      SEXP data);
+
+/*!
+ * \brief create a new dmatrix from sliced content of existing matrix
+ * \param handle instance of data matrix to be sliced
+ * \param idxset index set
+ * \return a sliced new matrix
+ */
+XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset);
+
+/*!
+ * \brief load a data matrix into binary file
+ * \param handle a instance of data matrix
+ * \param fname file name
+ * \param silent print statistics when saving
+ */
+XGB_DLL void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
+
+/*!
+ * \brief set information to dmatrix
+ * \param handle a instance of data matrix
+ * \param field field name, can be label, weight
+ * \param array pointer to float vector
+ */
+XGB_DLL void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array);
+
+/*!
+ * \brief get info vector from matrix
+ * \param handle a instance of data matrix
+ * \param field field name
+ * \return info vector
+ */
+XGB_DLL SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
+
+/*!
+ * \brief return number of rows
+ * \param handle a instance of data matrix
+ */
+XGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle);
+
+/*!
+ * \brief create xgboost learner
+ * \param dmats a list of dmatrix handles that will be cached
+ */
+XGB_DLL SEXP XGBoosterCreate_R(SEXP dmats);
+
+/*!
+ * \brief set parameters
+ * \param handle handle
+ * \param name  parameter name
+ * \param val value of parameter
+ */
+XGB_DLL void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
+
+/*!
+ * \brief update the model in one round using dtrain
+ * \param handle handle
+ * \param iter current iteration rounds
+ * \param dtrain training data
+ */
+XGB_DLL void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain);
+
+/*!
+ * \brief update the model, by directly specify gradient and second order gradient,
+ *        this can be used to replace UpdateOneIter, to support customized loss function
+ * \param handle handle
+ * \param dtrain training data
+ * \param grad gradient statistics
+ * \param hess second order gradient statistics
+ */
+XGB_DLL void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess);
+
+/*!
+ * \brief get evaluation statistics for xgboost
+ * \param handle handle
+ * \param iter current iteration rounds
+ * \param dmats list of handles to dmatrices
+ * \param evname name of evaluation
+ * \return the string containing evaluation stati
+ */
+XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames);
+
+/*!
+ * \brief make prediction based on dmat
+ * \param handle handle
+ * \param dmat data matrix
+ * \param option_mask output_margin:1 predict_leaf:2
+ * \param ntree_limit limit number of trees used in prediction
+ */
+XGB_DLL SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit);
+/*!
+ * \brief load model from existing file
+ * \param handle handle
+ * \param fname file name
+ */
+XGB_DLL void XGBoosterLoadModel_R(SEXP handle, SEXP fname);
+
+/*!
+ * \brief save model into existing file
+ * \param handle handle
+ * \param fname file name
+ */
+XGB_DLL void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
+
+/*!
+ * \brief load model from raw array
+ * \param handle handle
+ */
+XGB_DLL void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw);
+
+/*!
+ * \brief save model into R's raw array
+ * \param handle handle
+ * \return raw array
   */
-  SEXP XGCheckNullPtr_R(SEXP handle);
-  /*!
-   * \brief load a data matrix
-   * \param fname name of the content
-   * \param silent whether print messages
-   * \return a loaded data matrix
-   */
-  SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
-  /*!
-   * \brief create matrix content from dense matrix
-   * This assumes the matrix is stored in column major format
-   * \param data R Matrix object
-   * \param missing which value to represent missing value
-   * \return created dmatrix
-   */
-  SEXP XGDMatrixCreateFromMat_R(SEXP mat,
-                                SEXP missing);
-  /*!
-   * \brief create a matrix content from CSC format
-   * \param indptr pointer to column headers
-   * \param indices row indices
-   * \param data content of the data
-   * \return created dmatrix
-   */
-  SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
-                                SEXP indices,
-                                SEXP data);
-  /*!
-   * \brief create a new dmatrix from sliced content of existing matrix
-   * \param handle instance of data matrix to be sliced
-   * \param idxset index set
-   * \return a sliced new matrix
-   */
-  SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset);
-  /*!
-   * \brief load a data matrix into binary file
-   * \param handle a instance of data matrix
-   * \param fname file name
-   * \param silent print statistics when saving
-   */
-  void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
-  /*!
-   * \brief set information to dmatrix
-   * \param handle a instance of data matrix
-   * \param field field name, can be label, weight
-   * \param array pointer to float vector
-   */
-  void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array);
-  /*!
-   * \brief get info vector from matrix
-   * \param handle a instance of data matrix
-   * \param field field name
-   * \return info vector
-   */
-  SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
-  /*!
-   * \brief return number of rows
-   * \param handle a instance of data matrix
-   */
-  SEXP XGDMatrixNumRow_R(SEXP handle);
-  /*!
-   * \brief create xgboost learner
-   * \param dmats a list of dmatrix handles that will be cached
-   */
-  SEXP XGBoosterCreate_R(SEXP dmats);
-  /*!
-   * \brief set parameters
-   * \param handle handle
-   * \param name  parameter name
-   * \param val value of parameter
-   */
-  void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
-  /*!
-   * \brief update the model in one round using dtrain
-   * \param handle handle
-   * \param iter current iteration rounds
-   * \param dtrain training data
-   */
-  void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain);
-  /*!
-   * \brief update the model, by directly specify gradient and second order gradient,
-   *        this can be used to replace UpdateOneIter, to support customized loss function
-   * \param handle handle
-   * \param dtrain training data
-   * \param grad gradient statistics
-   * \param hess second order gradient statistics
-   */
-  void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess);
-  /*!
-   * \brief get evaluation statistics for xgboost
-   * \param handle handle
-   * \param iter current iteration rounds
-   * \param dmats list of handles to dmatrices
-   * \param evname name of evaluation
-   * \return the string containing evaluation stati
-   */
-  SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames);
-  /*!
-   * \brief make prediction based on dmat
-   * \param handle handle
-   * \param dmat data matrix
-   * \param option_mask output_margin:1 predict_leaf:2
-   * \param ntree_limit limit number of trees used in prediction
-   */
-  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit);
-  /*!
-   * \brief load model from existing file
-   * \param handle handle
-   * \param fname file name
-   */
-  void XGBoosterLoadModel_R(SEXP handle, SEXP fname);
-  /*!
-   * \brief save model into existing file
-   * \param handle handle
-   * \param fname file name
-   */
-  void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
-  /*!
-   * \brief load model from raw array
-   * \param handle handle
-   */
-  void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw);
-  /*!
-   * \brief save model into R's raw array
-   * \param handle handle
-   * \return raw array
-   */
-  SEXP XGBoosterModelToRaw_R(SEXP handle);
-  /*!
-   * \brief dump model into a string
-   * \param handle handle
-   * \param fmap  name to fmap can be empty string
-   * \param with_stats whether dump statistics of splits
-   */
-  SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats);
-}
+XGB_DLL SEXP XGBoosterModelToRaw_R(SEXP handle);
+
+/*!
+ * \brief dump model into a string
+ * \param handle handle
+ * \param fmap  name to fmap can be empty string
+ * \param with_stats whether dump statistics of splits
+ */
+XGB_DLL SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats);
 #endif  // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
--- a/R-package/src/xgboost_assert.c
+++ b/R-package/src/xgboost_assert.c
@@ -24,11 +24,3 @@ void XGBoostCheck_R(int exp, const char *fmt, ...) {
    error("%s\n", buf);
  }
 }
-int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) {
-  int ret;
-  va_list args;
-  va_start(args, fmt);
-  ret = vsnprintf(buf, size, fmt, args);
-  va_end(args);
-  return ret;
-}
--- a/R-package/src/xgboost_custom.cc
+++ b/R-package/src/xgboost_custom.cc
@@ -0,0 +1,65 @@
+// Copyright (c) 2015 by Contributors
+// This file contains the customization implementations of R module
+// to change behavior of libxgboost
+
+#include <xgboost/logging.h>
+#include "src/common/random.h"
+#include "./xgboost_R.h"
+
+// redirect the messages to R's console.
+namespace dmlc {
+void CustomLogMessage::Log(const std::string& msg) {
+  Rprintf("%s\n", msg.c_str());
+}
+}  // namespace dmlc
+
+// implements rabit error handling.
+extern "C" {
+  void XGBoostAssert_R(int exp, const char *fmt, ...);
+  void XGBoostCheck_R(int exp, const char *fmt, ...);
+}
+
+namespace rabit {
+namespace utils {
+extern "C" {
+  void (*Printf)(const char *fmt, ...) = Rprintf;
+  void (*Assert)(int exp, const char *fmt, ...) = XGBoostAssert_R;
+  void (*Check)(int exp, const char *fmt, ...) = XGBoostCheck_R;
+  void (*Error)(const char *fmt, ...) = error;
+}
+}
+}
+
+namespace xgboost {
+ConsoleLogger::~ConsoleLogger() {
+  dmlc::CustomLogMessage::Log(log_stream_.str());
+}
+TrackerLogger::~TrackerLogger() {
+  dmlc::CustomLogMessage::Log(log_stream_.str());
+}
+}  // namespace xgboost
+
+namespace xgboost {
+namespace common {
+
+// redirect the nath functions.
+bool CheckNAN(double v) {
+  return ISNAN(v);
+}
+double LogGamma(double v) {
+  return lgammafn(v);
+}
+
+// customize random engine.
+void CustomGlobalRandomEngine::seed(CustomGlobalRandomEngine::result_type val) {
+  // ignore the seed
+}
+
+// use R's PRNG to replacd
+CustomGlobalRandomEngine::result_type
+CustomGlobalRandomEngine::operator()() {
+  return static_cast<result_type>(
+      std::floor(unif_rand() * CustomGlobalRandomEngine::max()));
+}
+}  // namespace common
+}  // namespace xgboost
--- a/R-package/tests/testthat/test_poisson_regression.R
+++ b/R-package/tests/testthat/test_poisson_regression.R
@@ -10,5 +10,5 @@ test_that("poisson regression works", {
  expect_equal(class(bst), "xgb.Booster")
  pred <- predict(bst,as.matrix(mtcars[, -11]))
  expect_equal(length(pred), 32)
-  expect_equal(sqrt(mean( (pred - mtcars[,11]) ^ 2)), 1.16, tolerance = 0.01)
+  expect_less_than(sqrt(mean( (pred - mtcars[,11]) ^ 2)), 2.5)
 })
--- a/R-package/vignettes/discoverYourData.Rmd
+++ b/R-package/vignettes/discoverYourData.Rmd
@@ -1,6 +1,6 @@
 ---
 title: "Understand your dataset with Xgboost"
-output: 
+output:
  rmarkdown::html_vignette:
    css: vignette.css
    number_sections: yes
@@ -12,8 +12,11 @@ vignette: >
  \usepackage[utf8]{inputenc}
 ---

+Understand your dataset with XGBoost
+====================================
+
 Introduction
-============
+------------

 The purpose of this Vignette is to show you how to use **Xgboost** to discover and understand your own dataset better.

@@ -25,16 +28,16 @@ Pacakge loading:
 require(xgboost)
 require(Matrix)
 require(data.table)
-if (!require('vcd')) install.packages('vcd') 
+if (!require('vcd')) install.packages('vcd')
 ```

 > **VCD** package is used for one of its embedded dataset only.

 Preparation of the dataset
-==========================
+--------------------------
+
+### Numeric VS categorical variables

-Numeric VS categorical variables
--------------------------------

 **Xgboost** manages only `numeric` vectors.

@@ -48,10 +51,9 @@ A *categorical* variable has a fixed number of different values. For instance, i

 To answer the question above we will convert *categorical* variables to `numeric` one.

-Conversion from categorical to numeric variables
------------------------------------------------
+### Conversion from categorical to numeric variables

-### Looking at the raw data
+#### Looking at the raw data

 In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features.

@@ -85,11 +87,11 @@ str(df)
 > * can take a limited number of values (like `factor`) ;
 > * these values are ordered (unlike `factor`). Here these ordered values are: `Marked > Some > None`

-### Creation of new features based on old ones
+#### Creation of new features based on old ones

 We will add some new *categorical* features to see if it helps.

-#### Grouping per 10 years
+##### Grouping per 10 years

 For the first feature we create groups of age by rounding the real age.

@@ -101,7 +103,7 @@ Therefore, 20 is not closer to 30 than 60. To make it short, the distance betwee
 head(df[,AgeDiscret := as.factor(round(Age/10,0))])
 ```

-#### Random split in two groups
+##### Random split in two groups

 Following is an even stronger simplification of the real age with an arbitrary split at 30 years old. I choose this value **based on nothing**. We will see later if simplifying the information based on arbitrary values is a good strategy (you may already have an idea of how well it will work...).

@@ -109,15 +111,15 @@ Following is an even stronger simplification of the real age with an arbitrary s
 head(df[,AgeCat:= as.factor(ifelse(Age > 30, "Old", "Young"))])
 ```

-#### Risks in adding correlated features
+##### Risks in adding correlated features

-These new features are highly correlated to the `Age` feature because they are simple transformations of this feature. 
+These new features are highly correlated to the `Age` feature because they are simple transformations of this feature.

 For many machine learning algorithms, using correlated features is not a good idea. It may sometimes make prediction less accurate, and most of the time make interpretation of the model almost impossible. GLM, for instance, assumes that the features are uncorrelated.

 Fortunately, decision tree algorithms (including boosted trees) are very robust to these features. Therefore we have nothing to do to manage this situation.

-#### Cleaning data
+##### Cleaning data

 We remove ID as there is nothing to learn from this feature (it would just add some noise).

@@ -132,7 +134,7 @@ levels(df[,Treatment])
 ```


-### One-hot encoding
+#### One-hot encoding

 Next step, we will transform the categorical data to dummy variables.
 This is the [one-hot encoding](http://en.wikipedia.org/wiki/One-hot) step.
@@ -156,12 +158,12 @@ Create the output `numeric` vector (not as a sparse `Matrix`):
 output_vector = df[,Improved] == "Marked"
 ```

-1. set `Y` vector to `0`; 
-2. set `Y` to `1` for rows where `Improved == Marked` is `TRUE` ; 
+1. set `Y` vector to `0`;
+2. set `Y` to `1` for rows where `Improved == Marked` is `TRUE` ;
 3. return `Y` vector.

 Build the model
-===============
+---------------

 The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).

@@ -173,17 +175,17 @@ bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4,

 You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better.

-A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future). 
+A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).

-> Here you can see the numbers decrease until line 7 and then increase. 
+> Here you can see the numbers decrease until line 7 and then increase.
 >
 > It probably means we are overfitting. To fix that I should reduce the number of rounds to `nround = 4`. I will let things like that because I don't really care for the purpose of this example :-)

 Feature importance
-==================
+------------------
+
+## Measure feature importance

-Measure feature importance
--------------------------

 ### Build the feature importance data.table

@@ -204,7 +206,7 @@ head(importance)

 `Frequency` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it).

-### Improvement in the interpretability of feature importance data.table
+#### Improvement in the interpretability of feature importance data.table

 We can go deeper in the analysis of the model. In the `data.table` above, we have discovered which features counts to predict if the illness will go or not. But we don't yet know the role of these features. For instance, one of the question we may want to answer would be: does receiving a placebo treatment helps to recover from the illness?

@@ -233,8 +235,8 @@ Therefore, according to our findings, getting a placebo doesn't seem to help but

 > You may wonder how to interpret the `< 1.00001` on the first line. Basically, in a sparse `Matrix`, there is no `0`, therefore, looking for one hot-encoded categorical observations validating the rule `< 1.00001` is like just looking for `1` for this feature.

-Plotting the feature importance
-------------------------------
+### Plotting the feature importance
+

 All these things are nice, but it would be even better to plot the results.

@@ -250,11 +252,11 @@ According to the plot above, the most important features in this dataset to pred

 * the Age ;
 * having received a placebo or not ;
-* the sex is third but already included in the not interesting features group ; 
+* the sex is third but already included in the not interesting features group ;
 * then we see our generated features (AgeDiscret). We can see that their contribution is very low.

-Do these results make sense?
------------------------------
+### Do these results make sense?
+

 Let's check some **Chi2** between each of these features and the label.

@@ -279,18 +281,18 @@ c2 <- chisq.test(df$AgeCat, output_vector)
 print(c2)
 ```

-The perfectly random split I did between young and old at 30 years old have a low correlation of **`r round(c2$statistic, 2)`**. It's a result we may expect as may be in my mind > 30 years is being old (I am 32 and starting feeling old, this may explain that), but for the illness we are studying, the age to be vulnerable is not the same. 
+The perfectly random split I did between young and old at 30 years old have a low correlation of **`r round(c2$statistic, 2)`**. It's a result we may expect as may be in my mind > 30 years is being old (I am 32 and starting feeling old, this may explain that), but for the illness we are studying, the age to be vulnerable is not the same.

-Morality: don't let your *gut* lower the quality of your model. 
+Morality: don't let your *gut* lower the quality of your model.

 In *data science* expression, there is the word *science* :-)

 Conclusion
-==========
+----------

-As you can see, in general *destroying information by simplifying it won't improve your model*. **Chi2** just demonstrates that. 
+As you can see, in general *destroying information by simplifying it won't improve your model*. **Chi2** just demonstrates that.

-But in more complex cases, creating a new feature based on existing one which makes link with the outcome more obvious may help the algorithm and improve the model. 
+But in more complex cases, creating a new feature based on existing one which makes link with the outcome more obvious may help the algorithm and improve the model.

 The case studied here is not enough complex to show that. Check [Kaggle website](http://www.kaggle.com/) for some challenging datasets. However it's almost always worse when you add some arbitrary rules.

@@ -299,7 +301,7 @@ Moreover, you can notice that even if we have added some not useful new features
 Linear model may not be that smart in this scenario.

 Special Note: What about Random Forests™?
-==========================================
+-----------------------------------------

 As you may know, [Random Forests™](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family.

@@ -313,7 +315,7 @@ However, in Random Forests™ this random choice will be done for each tree, bec

 In boosting, when a specific link between feature and outcome have been learned by the algorithm, it will try to not refocus on it (in theory it is what happens, reality is not always that simple). Therefore, all the importance will be on feature `A` or on feature `B` (but not both). You will know that one feature have an important role in the link between the observations and the label. It is still up to you to search for the correlated features to the one detected as important if you need to know all of them.

-If you want to try Random Forests™ algorithm, you can tweak Xgboost parameters! 
+If you want to try Random Forests™ algorithm, you can tweak Xgboost parameters!

 **Warning**: this is still an experimental parameter.

--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -13,8 +13,11 @@ vignette: >
  \usepackage[utf8]{inputenc}
 ---

-Introduction
-============
+XGBoost R Tutorial
+==================
+
+## Introduction
+

 **Xgboost** is short for e**X**treme **G**radient **Boost**ing package.

@@ -40,16 +43,16 @@ It has several features:
 * Sparsity: it accepts *sparse* input for both *tree booster*  and *linear booster*, and is optimized for *sparse* input ;
 * Customization: it supports customized objective functions and evaluation functions.

-Installation
-============
+## Installation
+
+
+### Github version

-Github version
--------------

 For up-to-date version (highly recommended), install from *Github*:

 ```{r installGithub, eval=FALSE}
-devtools::install_github('dmlc/xgboost', subdir='R-package')
+devtools::install_git('git://github.com/dmlc/xgboost', subdir='R-package')
 ```

 > *Windows* user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
@@ -61,8 +64,8 @@ As of 2015-03-13, ‘xgboost’ was removed from the CRAN repository.

 Formerly available versions can be obtained from the CRAN [archive](http://cran.r-project.org/src/contrib/Archive/xgboost)

-Learning
-========
+## Learning
+

 For the purpose of this tutorial we will load **XGBoost** package.

@@ -70,15 +73,15 @@ For the purpose of this tutorial we will load **XGBoost** package.
 require(xgboost)
 ```

-Dataset presentation
--------------------
+### Dataset presentation
+

 In this example, we are aiming to predict whether a mushroom can be eaten or not (like in many tutorials, example data are the the same as you will use on in your every day life :-).

 Mushroom data is cited from UCI Machine Learning Repository. @Bache+Lichman:2013.

-Dataset loading
---------------
+### Dataset loading
+

 We will load the `agaricus` datasets embedded with the package and will link them to variables.

@@ -124,12 +127,12 @@ class(train$data)[1]
 class(train$label)
 ```

-Basic Training using XGBoost
----------------------------
+### Basic Training using XGBoost
+

 This step is the most critical part of the process for the quality of our model.

-### Basic training
+#### Basic training

 We are using the `train` data. As explained above, both `data` and `label` are stored in a `list`.

@@ -148,9 +151,9 @@ bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta

 > More complex the relationship between your features and your `label` is, more passes you need.

-### Parameter variations
+#### Parameter variations

-#### Dense matrix
+##### Dense matrix

 Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix.

@@ -158,7 +161,7 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
 bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
 ```

-#### xgb.DMatrix
+##### xgb.DMatrix

 **XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.

@@ -167,7 +170,7 @@ dtrain <- xgb.DMatrix(data = train$data, label = train$label)
 bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
 ```

-#### Verbose option
+##### Verbose option

 **XGBoost** has several features to help you to view how the learning progress internally. The purpose is to help you to set the best parameters, which is the key of your model quality.

@@ -188,11 +191,11 @@ bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, o
 bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 2)
 ```

-Basic prediction using XGBoost
-==============================
+## Basic prediction using XGBoost
+
+
+## Perform the prediction

-Perform the prediction
----------------------

 The purpose of the model we have built is to classify new data. As explained before, we will use the `test` dataset for this step.

@@ -208,8 +211,8 @@ print(head(pred))

 These numbers doesn't look like *binary classification* `{0,1}`. We need to perform a simple transformation before being able to use these results.

-Transform the regression in a binary classification
---------------------------------------------------
+## Transform the regression in a binary classification
+

 The only thing that **XGBoost** does is a *regression*. **XGBoost** is using `label` vector to build its *regression* model.

@@ -222,8 +225,8 @@ prediction <- as.numeric(pred > 0.5)
 print(head(prediction))
 ```

-Measuring model performance
---------------------------
+## Measuring model performance
+

 To measure the model performance, we will compute a simple metric, the *average error*.

@@ -246,14 +249,14 @@ The most important thing to remember is that **to do a classification, you just

 This metric is **`r round(err, 2)`** and is pretty low: our yummly mushroom model works well!

-Advanced features
-=================
+## Advanced features
+

 Most of the features below have been implemented to help you to improve your model by offering a better understanding of its content.


-Dataset preparation
-------------------
+### Dataset preparation
+

 For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.

@@ -262,8 +265,8 @@ dtrain <- xgb.DMatrix(data = train$data, label=train$label)
 dtest <- xgb.DMatrix(data = test$data, label=test$label)
 ```

-Measure learning progress with xgb.train
----------------------------------------
+### Measure learning progress with xgb.train
+

 Both `xgboost` (simple) and `xgb.train` (advanced) functions train models.

@@ -295,8 +298,8 @@ bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchli

 > `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`.

-Linear boosting
---------------
+### Linear boosting
+

 Until now, all the learnings we have performed were based on boosting trees. **XGBoost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter).

@@ -308,10 +311,10 @@ In this specific case, *linear boosting* gets sligtly better performance metrics

 In simple cases, it will happen because there is nothing better than a linear algorithm to catch a linear link. However, decision trees are much better to catch a non linear link between predictors and outcome. Because there is no silver bullet, we advise you to check both algorithms with your own datasets to have an idea of what to use.

-Manipulating xgb.DMatrix
------------------------
+### Manipulating xgb.DMatrix

-### Save / Load
+
+#### Save / Load

 Like saving models, `xgb.DMatrix` object (which groups both dataset and outcome) can also be saved using `xgb.DMatrix.save` function.

@@ -326,7 +329,7 @@ bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nround=2, watchl
 file.remove("dtrain.buffer")
 ```

-### Information extraction
+#### Information extraction

 Information can be extracted from `xgb.DMatrix` using `getinfo` function. Hereafter we will extract `label` data.

@@ -337,8 +340,8 @@ err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
 print(paste("test-error=", err))
 ```

-View feature importance/influence from the learnt model
-------------------------------------------------------
+### View feature importance/influence from the learnt model
+

 Feature importance is similar to R gbm package's relative influence (rel.inf).

@@ -348,8 +351,8 @@ print(importance_matrix)
 xgb.plot.importance(importance_matrix = importance_matrix)
 ```

-View the trees from a model
---------------------------
+#### View the trees from a model
+

 You can dump the tree you learned using `xgb.dump` into a text file.

@@ -365,8 +368,8 @@ xgb.plot.tree(model = bst)

 > if you provide a path to `fname` parameter you can save the trees to your hard drive.

-Save and load models
--------------------
+#### Save and load models
+

 Maybe your dataset is big, and it takes time to train a model on it? May be you are not a big fan of losing time in redoing the same task again and again? In these very rare cases, you will want to save your model and load it when required.

@@ -416,5 +419,4 @@ print(paste("sum(abs(pred3-pred))=", sum(abs(pred2-pred))))

 > Again `0`? It seems that `XGBoost` works pretty well!

-References
-==========
+## References
--- a/README.md
+++ b/README.md
@@ -7,47 +7,22 @@
 [![PyPI version](https://badge.fury.io/py/xgboost.svg)](https://pypi.python.org/pypi/xgboost/)
 [![Gitter chat for developers at https://gitter.im/dmlc/xgboost](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/dmlc/xgboost?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

-An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version.
-
-It implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework, including [Generalized Linear Model](https://en.wikipedia.org/wiki/Generalized_linear_model) (GLM) and [Gradient Boosted Decision Trees](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) (GBDT). XGBoost can also be [distributed](#features) and scale to Terascale data
-
-XGBoost is part of [Distributed Machine Learning Common](http://dmlc.github.io/) <img src=https://avatars2.githubusercontent.com/u/11508361?v=3&s=20> projects
+XGBoost is an optimized distributed gradient boosting library designed to be highly *efficient*, *flexible* and *portable*.
+It implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework.
+XGBoost provides a parallel tree boosting(also known as GBDT, GBM) that solve many data science problems in a fast and accurate way.
+The same code runs on major distributed environment(Hadoop, SGE, MPI) and can solve problems beyond billions of examples.
+XGBoost is part of [DMLC](http://dmlc.github.io/) projects.

 Contents
 --------
-* [What's New](#whats-new)
-* [Version](#version)
-* [Documentation](doc/index.md)
-* [Build Instruction](doc/build.md)
-* [Features](#features)
-* [Distributed XGBoost](multi-node)
-* [Usecases](doc/index.md#highlight-links)
-* [Bug Reporting](#bug-reporting)
-* [Contributing to XGBoost](#contributing-to-xgboost)
-* [Committers and Contributors](CONTRIBUTORS.md)
-* [License](#license)
-* [XGBoost in Graphlab Create](#xgboost-in-graphlab-create)
+* [Documentation and Tutorials](https://xgboost.readthedocs.org)
+* [Code Examples](demo)
+* [Installation](doc/build.md)
+* [Contribute to XGBoost](http://xgboost.readthedocs.org/en/latest/dev-guide/contribute.html)

 What's New
 ----------
-
-* XGBoost helps Vlad Mironov, Alexander Guschin to win the [CERN LHCb experiment Flavour of Physics competition](https://www.kaggle.com/c/flavours-of-physics). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/11/30/flavour-of-physics-technical-write-up-1st-place-go-polar-bears/).
-* XGBoost helps Mario Filho, Josef Feigl, Lucas, Gilberto to win the [Caterpillar Tube Pricing competition](https://www.kaggle.com/c/caterpillar-tube-pricing). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/09/22/caterpillar-winners-interview-1st-place-gilberto-josef-leustagos-mario/).
-* XGBoost helps Halla Yang to win the [Recruit Coupon Purchase Prediction Challenge](https://www.kaggle.com/c/coupon-purchase-prediction). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/10/21/recruit-coupon-purchase-winners-interview-2nd-place-halla-yang/).
-* XGBoost helps Owen Zhang to win the [Avito Context Ad Click competition](https://www.kaggle.com/c/avito-context-ad-clicks). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/08/26/avito-winners-interview-1st-place-owen-zhang/).
-* XGBoost helps Chenglong Chen to win [Kaggle CrowdFlower Competition](https://www.kaggle.com/c/crowdflower-search-relevance)
-  Check out the [winning solution](https://github.com/ChenglongChen/Kaggle_CrowdFlower)
-* XGBoost-0.4 release, see [CHANGES.md](CHANGES.md#xgboost-04)
-* XGBoost helps three champion teams to win [WWW2015  Microsoft Malware Classification Challenge (BIG 2015)](http://www.kaggle.com/c/malware-classification/forums/t/13490/say-no-to-overfitting-approaches-sharing)
-  Check out the [winning solution](doc/README.md#highlight-links)
-* [External Memory Version](doc/external_memory.md)
-
-Version
-------
-
-* Current version xgboost-0.4
-  - [Change log](CHANGES.md)
-  - This version is compatible with 0.3x versions
+* [XGBoost brick](NEWS.md) Release

 Features
 --------
@@ -61,24 +36,17 @@ Features

 Bug Reporting
 -------------
-
 * For reporting bugs please use the [xgboost/issues](https://github.com/dmlc/xgboost/issues) page.
 * For generic questions or to share your experience using xgboost please use the [XGBoost User Group](https://groups.google.com/forum/#!forum/xgboost-user/)

-
 Contributing to XGBoost
 -----------------------
-
 XGBoost has been developed and used by a group of active community members. Everyone is more than welcome to contribute. It is a way to make the project better and more accessible to more users.
 * Check out [Feature Wish List](https://github.com/dmlc/xgboost/labels/Wish-List) to see what can be improved, or open an issue if you want something.
 * Contribute to the [documents and examples](https://github.com/dmlc/xgboost/blob/master/doc/) to share your experience with other users.
-* Please add your name to [CONTRIBUTORS.md](CONTRIBUTORS.md) after your patch has been merged.
+* Please add your name to [CONTRIBUTORS.md](CONTRIBUTORS.md) and after your patch has been merged.
+  - Please also update [NEWS.md](NEWS.md) on changes and improvements in API and docs.

 License
 -------
 © Contributors, 2015. Licensed under an [Apache-2](https://github.com/dmlc/xgboost/blob/master/LICENSE) license.
-
-XGBoost in Graphlab Create
--------------------------
-* XGBoost is adopted as part of boosted tree toolkit in Graphlab Create (GLC). Graphlab Create is a powerful python toolkit that allows you to do data manipulation, graph processing, hyper-parameter search, and visualization of TeraBytes scale data in one framework. Try the [Graphlab Create](http://graphlab.com/products/create/quick-start-guide.html)
-* Nice [blogpost](http://blog.graphlab.com/using-gradient-boosted-trees-to-predict-bike-sharing-demand) by Jay Gu about using GLC boosted tree to solve kaggle bike sharing challenge:
--- a/amalgamation/dmlc-minimum0.cc
+++ b/amalgamation/dmlc-minimum0.cc
@@ -0,0 +1,14 @@
+/*!
+ * Copyright 2015 by Contributors.
+ * \brief Mininum DMLC library Amalgamation, used for easy plugin of dmlc lib.
+ *  Normally this is not needed.
+ */
+#include "../dmlc-core/src/io/line_split.cc"
+#include "../dmlc-core/src/io/recordio_split.cc"
+#include "../dmlc-core/src/io/input_split_base.cc"
+#include "../dmlc-core/src/io/local_filesys.cc"
+#include "../dmlc-core/src/data.cc"
+#include "../dmlc-core/src/io.cc"
+#include "../dmlc-core/src/recordio.cc"
+
+
--- a/amalgamation/xgboost-all0.cc
+++ b/amalgamation/xgboost-all0.cc
@@ -0,0 +1,57 @@
+/*!
+ * Copyright 2015 by Contributors.
+ * \brief XGBoost Amalgamation.
+ *  This offers an alternative way to compile the entire library from this single file.
+ *
+ *  Example usage command.
+ *  - $(CXX) -std=c++0x -fopenmp -o -shared libxgboost.so xgboost-all0.cc -ldmlc -lrabit
+ *
+ * \author Tianqi Chen.
+ */
+
+// metrics
+#include "../src/metric/metric.cc"
+#include "../src/metric/elementwise_metric.cc"
+#include "../src/metric/multiclass_metric.cc"
+#include "../src/metric/rank_metric.cc"
+
+// objectives
+#include "../src/objective/objective.cc"
+#include "../src/objective/regression_obj.cc"
+#include "../src/objective/multiclass_obj.cc"
+#include "../src/objective/rank_obj.cc"
+
+// gbms
+#include "../src/gbm/gbm.cc"
+#include "../src/gbm/gbtree.cc"
+#include "../src/gbm/gblinear.cc"
+
+// data
+#include "../src/data/data.cc"
+#include "../src/data/simple_csr_source.cc"
+#include "../src/data/simple_dmatrix.cc"
+#include "../src/data/sparse_page_raw_format.cc"
+
+#if DMLC_ENABLE_STD_THREAD
+#include "../src/data/sparse_page_source.cc"
+#include "../src/data/sparse_page_dmatrix.cc"
+#endif
+
+// tress
+#include "../src/tree/tree_model.cc"
+#include "../src/tree/tree_updater.cc"
+#include "../src/tree/updater_colmaker.cc"
+#include "../src/tree/updater_prune.cc"
+#include "../src/tree/updater_refresh.cc"
+#include "../src/tree/updater_sync.cc"
+#include "../src/tree/updater_histmaker.cc"
+#include "../src/tree/updater_skmaker.cc"
+
+// global
+#include "../src/learner.cc"
+#include "../src/logging.cc"
+#include "../src/common/common.cc"
+
+// c_api
+#include "../src/c_api/c_api.cc"
+#include "../src/c_api/c_api_error.cc"
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,36 +0,0 @@
-environment:
-  global:
-   CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\python-appveyor-demo\\appveyor\\run_with_env.cmd"
-   DISABLE_OPENMP: 1
-   VisualStudioVersion: 12.0
-   
-  matrix:
-    - PYTHON: "C:\\Python27-x64"
-      PYTHON_VERSION: "2.7.x" # currently 2.7.9
-      PYTHON_ARCH: "64"
-
-    - PYTHON: "C:\\Python33-x64"
-      PYTHON_VERSION: "3.3.x" # currently 3.3.5
-      PYTHON_ARCH: "64"
-
-platform:
-  - x64
-
-configuration:
-  - Release
-
-install:
-  - cmd: git clone https://github.com/ogrisel/python-appveyor-demo
-  - ECHO "Filesystem root:"
-  - ps: "ls \"C:/\""
-
-  - ECHO "Installed SDKs:"
-  - ps: "ls \"C:/Program Files/Microsoft SDKs/Windows\""
-
-  - ps: python-appveyor-demo\appveyor\install.ps1
-  - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
-  - "python --version"
-  - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
-
-build: off
-  #project: windows\xgboost.sln
--- a/build.sh
+++ b/build.sh
@@ -6,27 +6,14 @@

 # See additional instruction in doc/build.md

-#for building static OpenMP lib in MAC for easier installation in MAC
-#doesn't work with XCode clang/LLVM since Apple doesn't support, 
-#needs brew install gcc 4.9+ with OpenMP. By default the static link is OFF
-static_omp=0
-if ((${static_omp}==1)); then
-    rm libgomp.a
-    ln -s `g++ -print-file-name=libgomp.a`
-    make clean
-    make omp_mac_static=1
-    echo "Successfully build multi-thread static link xgboost"
-    exit 0
-fi
-
 if make; then
    echo "Successfully build multi-thread xgboost"
 else
    echo "-----------------------------"
    echo "Building multi-thread xgboost failed"
    echo "Start to build single-thread xgboost"
-    make clean
-    make no_omp=1
+    make clean_all
+    make config=config/mininum.mk
    echo "Successfully build single-thread xgboost"
    echo "If you want multi-threaded version"
    echo "See additional instructions in doc/build.md"
--- a/demo/README.md
+++ b/demo/README.md
@@ -44,8 +44,15 @@ However, the parameter settings can be applied to all versions
 * [Multiclass classification](multiclass_classification)
 * [Regression](regression)
 * [Learning to Rank](rank)
+* [Distributed Training](distributed-training)

 Benchmarks
 ----------
 * [Starter script for Kaggle Higgs Boson](kaggle-higgs)
 * [Kaggle Tradeshift winning solution by daxiongshu](https://github.com/daxiongshu/kaggle-tradeshift-winning-solution)
+
+Machine Learning Challenge Winning Solutions
+--------------------------------------------
+* XGBoost helps Vlad Mironov, Alexander Guschin to win the [CERN LHCb experiment Flavour of Physics competition](https://www.kaggle.com/c/flavours-of-physics). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/11/30/flavour-of-physics-technical-write-up-1st-place-go-polar-bears/).
+* XGBoost helps Mario Filho, Josef Feigl, Lucas, Gilberto to win the [Caterpillar Tube Pricing competition](https://www.kaggle.com/c/caterpillar-tube-pricing). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/09/22/caterpillar-winners-interview-1st-place-gilberto-josef-leustagos-mario/).
+* XGBoost helps Halla Yang to win the [Recruit Coupon Purchase Prediction Challenge](https://www.kaggle.com/c/coupon-purchase-prediction). Check out the [interview from Kaggle](http://blog.kaggle.com/2015/10/21/recruit-coupon-purchase-winners-interview-2nd-place-halla-yang/).
--- a/demo/distributed-training/README.md
+++ b/demo/distributed-training/README.md
@@ -0,0 +1,52 @@
+Distributed XGBoost Training
+============================
+This is an tutorial of Distributed XGBoost Training.
+Currently xgboost supports distributed training via CLI program with the configuration file.
+There is also plan push distributed python and other language bindings, please open an issue
+if you are interested in contributing.
+
+Build XGBoost with Distributed Filesystem Support
+-------------------------------------------------
+To use distributed xgboost, you only need to turn the options on to build
+with distributed filesystems(HDFS or S3) in ```xgboost/make/config.mk```.
+
+How to Use
+----------
+* Input data format: LIBSVM format. The example here uses generated data in ../data folder.
+* Put the data into some distribute filesytem (S3 or HDFS)
+* Use tracker script in dmlc-core/tracker to submit the jobs
+* Like all other DMLC tools, xgboost support taking a path to a folder as input argument
+  - All the files in the folder will be used as input
+* Quick start in Hadoop YARN: run ```bash run_yarn.sh <n_hadoop_workers> <n_thread_per_worker> <path_in_HDFS>```
+
+Example
+-------
+* [run_yarn.sh](run_yarn.sh) shows how to submit job to Hadoop via YARN.
+
+Single machine vs Distributed Version
+-------------------------------------
+If you have used xgboost (single machine version) before, this section will show you how to run xgboost on hadoop with a slight modification on conf file.
+* IO: instead of reading and writing file locally, we now use HDFS, put ```hdfs://``` prefix to the address of file you like to access
+* File cache: ```dmlc_yarn.py``` also provide several ways to cache necesary files, including binary file (xgboost), conf file
+  - ```dmlc_yarn.py``` will automatically cache files in the command line. For example, ```dmlc_yarn.py -n 3 $localPath/xgboost.dmlc mushroom.hadoop.conf``` will cache "xgboost.dmlc" and "mushroom.hadoop.conf".
+  - You could also use "-f" to manually cache one or more files, like ```-f file1 -f file2```
+  - The local path of cached files in command is "./".
+* More details of submission can be referred to the usage of ```dmlc_yarn.py```.
+* The model saved by hadoop version is compatible with single machine version.
+
+Notes
+-----
+* The code is optimized with multi-threading, so you will want to run xgboost with more vcores for best performance.
+  - You will want to set <n_thread_per_worker> to be number of cores you have on each machine.
+
+
+External Memory Version
+-----------------------
+XGBoost supports external memory, this will make each process cache data into local disk during computation, without taking up all the memory for storing the data.
+See [external memory](https://github.com/dmlc/xgboost/tree/master/doc/external_memory.md) for syntax using external memory.
+
+You only need to add cacheprefix to the input file to enable external memory mode. For example set training data as
+```
+data=hdfs:///path-to-my-data/#dtrain.cache
+```
+This will make xgboost more memory efficient, allows you to run xgboost on larger-scale dataset.
--- a/demo/distributed-training/run_yarn.sh
+++ b/demo/distributed-training/run_yarn.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+if [ "$#" -lt 3 ];
+then
+	echo "Usage: <nworkers> <nthreads> <path_in_HDFS>"
+	exit -1
+fi
+
+# put the local training file to HDFS
+hadoop fs -mkdir $3/data
+hadoop fs -put ../data/agaricus.txt.train $3/data
+hadoop fs -put ../data/agaricus.txt.test $3/data
+
+# running rabit, pass address in hdfs
+../../dmlc-core/tracker/dmlc_yarn.py  -n $1 --vcores $2 ../../xgboost mushroom.hadoop.conf nthread=$2\
+    data=hdfs://$3/data/agaricus.txt.train\
+    eval[test]=hdfs://$3/data/agaricus.txt.test\
+    model_out=hdfs://$3/mushroom.final.model
+
+# get the final model file
+hadoop fs -get $3/mushroom.final.model final.model
+
+# use dmlc-core/yarn/run_hdfs_prog.py to setup approperiate env
+
+# output prediction task=pred
+#../../xgboost.dmlc mushroom.hadoop.conf task=pred model_in=final.model test:data=../data/agaricus.txt.test
+../../dmlc-core/yarn/run_hdfs_prog.py ../../xgboost mushroom.hadoop.conf task=pred model_in=final.model test:data=../data/agaricus.txt.test
+# print the boosters of final.model in dump.raw.txt
+#../../xgboost.dmlc mushroom.hadoop.conf task=dump model_in=final.model name_dump=dump.raw.txt
+../../dmlc-core/yarn/run_hdfs_prog.py ../../xgboost mushroom.hadoop.conf task=dump model_in=final.model name_dump=dump.raw.txt
+# use the feature map in printing for better visualization
+#../../xgboost.dmlc mushroom.hadoop.conf task=dump model_in=final.model fmap=../data/featmap.txt name_dump=dump.nice.txt
+../../dmlc-core/yarn/run_hdfs_prog.py ../../xgboost mushroom.hadoop.conf task=dump model_in=final.model fmap=../data/featmap.txt name_dump=dump.nice.txt
+cat dump.nice.txt
--- a/demo/guide-python/runall.sh
+++ b/demo/guide-python/runall.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+export PYTHONPATH=PYTHONPATH:../../python-package
 python basic_walkthrough.py
 python custom_objective.py
 python boost_from_prediction.py
@@ -9,4 +10,4 @@ python predict_leaf_indices.py
 python sklearn_examples.py
 python sklearn_parallel.py
 python external_memory.py
-rm -rf *~ *.model *.buffer 
+rm -rf *~ *.model *.buffer
--- a/1
+++ b/1
--- a/doc/.gitignore
+++ b/doc/.gitignore
@@ -5,3 +5,4 @@ _*
 doxygen
 parser.py
 *.pyc
+web-data
--- a/doc/Doxyfile
+++ b/doc/Doxyfile
--- a/doc/R-package/.gitignore
+++ b/doc/R-package/.gitignore
@@ -0,0 +1 @@
+*~
--- a/doc/R-package/Makefile
+++ b/doc/R-package/Makefile
@@ -0,0 +1,14 @@
+# This is the makefile for compiling Rmarkdown files into the md file with results.
+PKGROOT=../../R-package
+
+# ADD The Markdown to be built here, with suffix md
+discoverYourData.md: $(PKGROOT)/vignettes/discoverYourData.Rmd
+xgboostPresentation.md: $(PKGROOT)/vignettes/xgboostPresentation.Rmd
+
+# General Rules for build rmarkdowns, need knitr
+%.md:
+	Rscript -e \
+	"require(knitr);"\
+	"knitr::opts_knit\$$set(root.dir=\".\");"\
+	"knitr::opts_chunk\$$set(fig.path=\"../web-data/xgboost/knitr/$(basename $@)-\");"\
+	"knitr::knit(\"$+\")"
--- a/doc/R-package/discoverYourData.md
+++ b/doc/R-package/discoverYourData.md
@@ -0,0 +1,484 @@
+---
+title: "Understand your dataset with Xgboost"
+output:
+  rmarkdown::html_vignette:
+    css: vignette.css
+    number_sections: yes
+    toc: yes
+author: Tianqi Chen, Tong He, Michaël Benesty
+vignette: >
+  %\VignetteIndexEntry{Discover your data}
+  %\VignetteEngine{knitr::rmarkdown}
+  \usepackage[utf8]{inputenc}
+---
+
+Understand your dataset with XGBoost
+====================================
+
+Introduction
+------------
+
+The purpose of this Vignette is to show you how to use **Xgboost** to discover and understand your own dataset better.
+
+This Vignette is not about predicting anything (see [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)). We will explain how to use **Xgboost** to highlight the *link* between the *features* of your data and the *outcome*.
+
+Pacakge loading:
+
+
+```r
+require(xgboost)
+require(Matrix)
+require(data.table)
+if (!require('vcd')) install.packages('vcd')
+```
+
+> **VCD** package is used for one of its embedded dataset only.
+
+Preparation of the dataset
+--------------------------
+
+### Numeric VS categorical variables
+
+
+**Xgboost** manages only `numeric` vectors.
+
+What to do when you have *categorical* data?
+
+A *categorical* variable has a fixed number of different values. For instance, if a variable called *Colour* can have only one of these three values, *red*, *blue* or *green*, then *Colour* is a *categorical* variable.
+
+> In **R**, a *categorical* variable is called `factor`.
+>
+> Type `?factor` in the console for more information.
+
+To answer the question above we will convert *categorical* variables to `numeric` one.
+
+### Conversion from categorical to numeric variables
+
+#### Looking at the raw data
+
+In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features.
+
+The method we are going to see is usually called [one-hot encoding](http://en.wikipedia.org/wiki/One-hot).
+
+The first step is to load `Arthritis` dataset in memory and wrap it with `data.table` package.
+
+
+```r
+data(Arthritis)
+df <- data.table(Arthritis, keep.rownames = F)
+```
+
+> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `panda` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
+
+The first thing we want to do is to have a look to the first lines of the `data.table`:
+
+
+```r
+head(df)
+```
+
+```
+##    ID Treatment  Sex Age Improved
+## 1: 57   Treated Male  27     Some
+## 2: 46   Treated Male  29     None
+## 3: 77   Treated Male  30     None
+## 4: 17   Treated Male  32   Marked
+## 5: 36   Treated Male  46   Marked
+## 6: 23   Treated Male  58   Marked
+```
+
+Now we will check the format of each column.
+
+
+```r
+str(df)
+```
+
+```
+## Classes 'data.table' and 'data.frame':	84 obs. of  5 variables:
+##  $ ID       : int  57 46 77 17 36 23 75 39 33 55 ...
+##  $ Treatment: Factor w/ 2 levels "Placebo","Treated": 2 2 2 2 2 2 2 2 2 2 ...
+##  $ Sex      : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
+##  $ Age      : int  27 29 30 32 46 58 59 59 63 63 ...
+##  $ Improved : Ord.factor w/ 3 levels "None"<"Some"<..: 2 1 1 3 3 3 1 3 1 1 ...
+##  - attr(*, ".internal.selfref")=<externalptr>
+```
+
+2 columns have `factor` type, one has `ordinal` type.
+
+> `ordinal` variable :
+>
+> * can take a limited number of values (like `factor`) ;
+> * these values are ordered (unlike `factor`). Here these ordered values are: `Marked > Some > None`
+
+#### Creation of new features based on old ones
+
+We will add some new *categorical* features to see if it helps.
+
+##### Grouping per 10 years
+
+For the first feature we create groups of age by rounding the real age.
+
+Note that we transform it to `factor` so the algorithm treat these age groups as independent values.
+
+Therefore, 20 is not closer to 30 than 60. To make it short, the distance between ages is lost in this transformation.
+
+
+```r
+head(df[,AgeDiscret := as.factor(round(Age/10,0))])
+```
+
+```
+##    ID Treatment  Sex Age Improved AgeDiscret
+## 1: 57   Treated Male  27     Some          3
+## 2: 46   Treated Male  29     None          3
+## 3: 77   Treated Male  30     None          3
+## 4: 17   Treated Male  32   Marked          3
+## 5: 36   Treated Male  46   Marked          5
+## 6: 23   Treated Male  58   Marked          6
+```
+
+##### Random split in two groups
+
+Following is an even stronger simplification of the real age with an arbitrary split at 30 years old. I choose this value **based on nothing**. We will see later if simplifying the information based on arbitrary values is a good strategy (you may already have an idea of how well it will work...).
+
+
+```r
+head(df[,AgeCat:= as.factor(ifelse(Age > 30, "Old", "Young"))])
+```
+
+```
+##    ID Treatment  Sex Age Improved AgeDiscret AgeCat
+## 1: 57   Treated Male  27     Some          3  Young
+## 2: 46   Treated Male  29     None          3  Young
+## 3: 77   Treated Male  30     None          3  Young
+## 4: 17   Treated Male  32   Marked          3    Old
+## 5: 36   Treated Male  46   Marked          5    Old
+## 6: 23   Treated Male  58   Marked          6    Old
+```
+
+##### Risks in adding correlated features
+
+These new features are highly correlated to the `Age` feature because they are simple transformations of this feature.
+
+For many machine learning algorithms, using correlated features is not a good idea. It may sometimes make prediction less accurate, and most of the time make interpretation of the model almost impossible. GLM, for instance, assumes that the features are uncorrelated.
+
+Fortunately, decision tree algorithms (including boosted trees) are very robust to these features. Therefore we have nothing to do to manage this situation.
+
+##### Cleaning data
+
+We remove ID as there is nothing to learn from this feature (it would just add some noise).
+
+
+```r
+df[,ID:=NULL]
+```
+
+We will list the different values for the column `Treatment`:
+
+
+```r
+levels(df[,Treatment])
+```
+
+```
+## [1] "Placebo" "Treated"
+```
+
+
+#### One-hot encoding
+
+Next step, we will transform the categorical data to dummy variables.
+This is the [one-hot encoding](http://en.wikipedia.org/wiki/One-hot) step.
+
+The purpose is to transform each value of each *categorical* feature in a *binary* feature `{0, 1}`.
+
+For example, the column `Treatment` will be replaced by two columns, `Placebo`, and `Treated`. Each of them will be *binary*. Therefore, an observation which has the value `Placebo` in column `Treatment` before the transformation will have after the transformation the value `1` in the new column `Placebo` and the value `0` in the new column `Treated`. The column `Treatment` will disappear during the one-hot encoding.
+
+Column `Improved` is excluded because it will be our `label` column, the one we want to predict.
+
+
+```r
+sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
+head(sparse_matrix)
+```
+
+```
+## 6 x 10 sparse Matrix of class "dgCMatrix"
+##                       
+## 1 . 1 1 27 1 . . . . 1
+## 2 . 1 1 29 1 . . . . 1
+## 3 . 1 1 30 1 . . . . 1
+## 4 . 1 1 32 1 . . . . .
+## 5 . 1 1 46 . . 1 . . .
+## 6 . 1 1 58 . . . 1 . .
+```
+
+> Formulae `Improved~.-1` used above means transform all *categorical* features but column `Improved` to binary values. The `-1` is here to remove the first column which is full of `1` (this column is generated by the conversion). For more information, you can type `?sparse.model.matrix` in the console.
+
+Create the output `numeric` vector (not as a sparse `Matrix`):
+
+
+```r
+output_vector = df[,Improved] == "Marked"
+```
+
+1. set `Y` vector to `0`;
+2. set `Y` to `1` for rows where `Improved == Marked` is `TRUE` ;
+3. return `Y` vector.
+
+Build the model
+---------------
+
+The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).
+
+
+```r
+bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4,
+               eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
+```
+
+```
+## [0]	train-error:0.202381
+## [1]	train-error:0.166667
+## [2]	train-error:0.166667
+## [3]	train-error:0.166667
+## [4]	train-error:0.154762
+## [5]	train-error:0.154762
+## [6]	train-error:0.154762
+## [7]	train-error:0.166667
+## [8]	train-error:0.166667
+## [9]	train-error:0.166667
+```
+
+You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better.
+
+A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).
+
+> Here you can see the numbers decrease until line 7 and then increase.
+>
+> It probably means we are overfitting. To fix that I should reduce the number of rounds to `nround = 4`. I will let things like that because I don't really care for the purpose of this example :-)
+
+Feature importance
+------------------
+
+## Measure feature importance
+
+
+### Build the feature importance data.table
+
+In the code below, `sparse_matrix@Dimnames[[2]]` represents the column names of the sparse matrix. These names are the original values of the features (remember, each binary column == one value of one *categorical* feature).
+
+
+```r
+importance <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst)
+head(importance)
+```
+
+```
+##             Feature        Gain      Cover  Frequency
+## 1:              Age 0.622031651 0.67251706 0.67241379
+## 2: TreatmentPlacebo 0.285750607 0.11916656 0.10344828
+## 3:          SexMale 0.048744054 0.04522027 0.08620690
+## 4:      AgeDiscret6 0.016604647 0.04784637 0.05172414
+## 5:      AgeDiscret3 0.016373791 0.08028939 0.05172414
+## 6:      AgeDiscret4 0.009270558 0.02858801 0.01724138
+```
+
+> The column `Gain` provide the information we are looking for.
+>
+> As you can see, features are classified by `Gain`.
+
+`Gain` is the improvement in accuracy brought by a feature to the branches it is on. The idea is that before adding a new split on a feature X to the branch there was some wrongly classified elements, after adding the split on this feature, there are two new branches, and each of these branch is more accurate (one branch saying if your observation is on this branch then it should be classified as `1`, and the other branch saying the exact opposite).
+
+`Cover` measures the relative quantity of observations concerned by a feature.
+
+`Frequency` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it).
+
+#### Improvement in the interpretability of feature importance data.table
+
+We can go deeper in the analysis of the model. In the `data.table` above, we have discovered which features counts to predict if the illness will go or not. But we don't yet know the role of these features. For instance, one of the question we may want to answer would be: does receiving a placebo treatment helps to recover from the illness?
+
+One simple solution is to count the co-occurrences of a feature and a class of the classification.
+
+For that purpose we will execute the same function as above but using two more parameters, `data` and `label`.
+
+
+```r
+importanceRaw <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst, data = sparse_matrix, label = output_vector)
+
+# Cleaning for better display
+importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequency=NULL)]
+
+head(importanceClean)
+```
+
+```
+##             Feature        Split       Gain RealCover RealCover %
+## 1: TreatmentPlacebo -1.00136e-05 0.28575061         7   0.2500000
+## 2:              Age         61.5 0.16374034        12   0.4285714
+## 3:              Age           39 0.08705750         8   0.2857143
+## 4:              Age         57.5 0.06947553        11   0.3928571
+## 5:          SexMale -1.00136e-05 0.04874405         4   0.1428571
+## 6:              Age         53.5 0.04620627        10   0.3571429
+```
+
+> In the table above we have removed two not needed columns and select only the first lines.
+
+First thing you notice is the new column `Split`. It is the split applied to the feature on a branch of one of the tree. Each split is present, therefore a feature can appear several times in this table. Here we can see the feature `Age` is used several times with different splits.
+
+How the split is applied to count the co-occurrences? It is always `<`. For instance, in the second line, we measure the number of persons under 61.5 years with the illness gone after the treatment.
+
+The two other new columns are `RealCover` and `RealCover %`. In the first column it measures the number of observations in the dataset where the split is respected and the label marked as `1`. The second column is the percentage of the whole population that `RealCover` represents.
+
+Therefore, according to our findings, getting a placebo doesn't seem to help but being younger than 61 years may help (seems logic).
+
+> You may wonder how to interpret the `< 1.00001` on the first line. Basically, in a sparse `Matrix`, there is no `0`, therefore, looking for one hot-encoded categorical observations validating the rule `< 1.00001` is like just looking for `1` for this feature.
+
+### Plotting the feature importance
+
+
+All these things are nice, but it would be even better to plot the results.
+
+
+```r
+xgb.plot.importance(importance_matrix = importanceRaw)
+```
+
+```
+## Error in xgb.plot.importance(importance_matrix = importanceRaw): Importance matrix is not correct (column names issue)
+```
+
+Feature have automatically been divided in 2 clusters: the interesting features... and the others.
+
+> Depending of the dataset and the learning parameters you may have more than two clusters. Default value is to limit them to `10`, but you can increase this limit. Look at the function documentation for more information.
+
+According to the plot above, the most important features in this dataset to predict if the treatment will work are :
+
+* the Age ;
+* having received a placebo or not ;
+* the sex is third but already included in the not interesting features group ;
+* then we see our generated features (AgeDiscret). We can see that their contribution is very low.
+
+### Do these results make sense?
+
+
+Let's check some **Chi2** between each of these features and the label.
+
+Higher **Chi2** means better correlation.
+
+
+```r
+c2 <- chisq.test(df$Age, output_vector)
+print(c2)
+```
+
+```
+## 
+## 	Pearson's Chi-squared test
+## 
+## data:  df$Age and output_vector
+## X-squared = 35.475, df = 35, p-value = 0.4458
+```
+
+Pearson correlation between Age and illness disapearing is **35.48**.
+
+
+```r
+c2 <- chisq.test(df$AgeDiscret, output_vector)
+print(c2)
+```
+
+```
+## 
+## 	Pearson's Chi-squared test
+## 
+## data:  df$AgeDiscret and output_vector
+## X-squared = 8.2554, df = 5, p-value = 0.1427
+```
+
+Our first simplification of Age gives a Pearson correlation is **8.26**.
+
+
+```r
+c2 <- chisq.test(df$AgeCat, output_vector)
+print(c2)
+```
+
+```
+## 
+## 	Pearson's Chi-squared test with Yates' continuity correction
+## 
+## data:  df$AgeCat and output_vector
+## X-squared = 2.3571, df = 1, p-value = 0.1247
+```
+
+The perfectly random split I did between young and old at 30 years old have a low correlation of **2.36**. It's a result we may expect as may be in my mind > 30 years is being old (I am 32 and starting feeling old, this may explain that), but for the illness we are studying, the age to be vulnerable is not the same.
+
+Morality: don't let your *gut* lower the quality of your model.
+
+In *data science* expression, there is the word *science* :-)
+
+Conclusion
+----------
+
+As you can see, in general *destroying information by simplifying it won't improve your model*. **Chi2** just demonstrates that.
+
+But in more complex cases, creating a new feature based on existing one which makes link with the outcome more obvious may help the algorithm and improve the model.
+
+The case studied here is not enough complex to show that. Check [Kaggle website](http://www.kaggle.com/) for some challenging datasets. However it's almost always worse when you add some arbitrary rules.
+
+Moreover, you can notice that even if we have added some not useful new features highly correlated with other features, the boosting tree algorithm have been able to choose the best one, which in this case is the Age.
+
+Linear model may not be that smart in this scenario.
+
+Special Note: What about Random Forests™?
+-----------------------------------------
+
+As you may know, [Random Forests™](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family.
+
+Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).
+
+This difference have an impact on a corner case in feature importance analysis: the *correlated features*.
+
+Imagine two features perfectly correlated, feature `A` and feature `B`. For one specific tree, if the algorithm needs one of them, it will choose randomly (true in both boosting and Random Forests™).
+
+However, in Random Forests™ this random choice will be done for each tree, because each tree is independent from the others. Therefore, approximatively, depending of your parameters, 50% of the trees will choose feature `A` and the other 50% will choose feature `B`. So the *importance* of the information contained in `A` and `B` (which is the same, because they are perfectly correlated) is diluted in `A` and `B`. So you won't easily know this information is important to predict what you want to predict! It is even worse when you have 10 correlated features...
+
+In boosting, when a specific link between feature and outcome have been learned by the algorithm, it will try to not refocus on it (in theory it is what happens, reality is not always that simple). Therefore, all the importance will be on feature `A` or on feature `B` (but not both). You will know that one feature have an important role in the link between the observations and the label. It is still up to you to search for the correlated features to the one detected as important if you need to know all of them.
+
+If you want to try Random Forests™ algorithm, you can tweak Xgboost parameters!
+
+**Warning**: this is still an experimental parameter.
+
+For instance, to compute a model with 1000 trees, with a 0.5 factor on sampling rows and columns:
+
+
+```r
+data(agaricus.train, package='xgboost')
+data(agaricus.test, package='xgboost')
+train <- agaricus.train
+test <- agaricus.test
+
+#Random Forest™ - 1000 trees
+bst <- xgboost(data = train$data, label = train$label, max.depth = 4, num_parallel_tree = 1000, subsample = 0.5, colsample_bytree =0.5, nround = 1, objective = "binary:logistic")
+```
+
+```
+## [0]	train-error:0.002150
+```
+
+```r
+#Boosting - 3 rounds
+bst <- xgboost(data = train$data, label = train$label, max.depth = 4, nround = 3, objective = "binary:logistic")
+```
+
+```
+## [0]	train-error:0.006142
+## [1]	train-error:0.006756
+## [2]	train-error:0.001228
+```
+
+> Note that the parameter `round` is set to `1`.
+
+> [**Random Forests™**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.
--- a/doc/R-package/index.md
+++ b/doc/R-package/index.md
@@ -0,0 +1,17 @@
+XGBoost R Package
+=================
+[![CRAN Status Badge](http://www.r-pkg.org/badges/version/xgboost)](http://cran.r-project.org/web/packages/xgboost)
+[![CRAN Downloads](http://cranlogs.r-pkg.org/badges/xgboost)](http://cran.rstudio.com/web/packages/xgboost/index.html)
+
+
+You have find XGBoost R Package!
+
+Get Started
+-----------
+* Checkout the [Installation Guide](../build.md) contains instructions to install xgboost, and [Tutorials](#tutorials) for examples on how to use xgboost for various tasks.
+* Please visit [walk through example](demo).
+
+Tutorials
+---------
+- [Introduction to XGBoost in R](xgboostPresentation.md)
+- [Discover your data with XGBoost in R](discoverYourData.md)
--- a/doc/R-package/xgboostPresentation.md
+++ b/doc/R-package/xgboostPresentation.md
@@ -0,0 +1,590 @@
+---
+title: "Xgboost presentation"
+output:
+  rmarkdown::html_vignette:
+    css: vignette.css
+    number_sections: yes
+    toc: yes
+bibliography: xgboost.bib
+author: Tianqi Chen, Tong He, Michaël Benesty
+vignette: >
+  %\VignetteIndexEntry{Xgboost presentation}
+  %\VignetteEngine{knitr::rmarkdown}
+  \usepackage[utf8]{inputenc}
+---
+
+XGBoost R Tutorial
+==================
+
+## Introduction
+
+
+**Xgboost** is short for e**X**treme **G**radient **Boost**ing package.
+
+The purpose of this Vignette is to show you how to use **Xgboost** to build a model and make predictions.
+
+It is an efficient and scalable implementation of gradient boosting framework by @friedman2000additive and @friedman2001greedy. Two solvers are included:
+
+- *linear* model ;
+- *tree learning* algorithm.
+
+It supports various objective functions, including *regression*, *classification* and *ranking*. The package is made to be extendible, so that users are also allowed to define their own objective functions easily.
+
+It has been [used](https://github.com/dmlc/xgboost) to win several [Kaggle](http://www.kaggle.com) competitions.
+
+It has several features:
+
+* Speed: it can automatically do parallel computation on *Windows* and *Linux*, with *OpenMP*. It is generally over 10 times faster than the classical `gbm`.
+* Input Type: it takes several types of input data:
+    * *Dense* Matrix: *R*'s *dense* matrix, i.e. `matrix` ;
+    * *Sparse* Matrix: *R*'s *sparse* matrix, i.e. `Matrix::dgCMatrix` ;
+    * Data File: local data files ;
+    * `xgb.DMatrix`: its own class (recommended).
+* Sparsity: it accepts *sparse* input for both *tree booster*  and *linear booster*, and is optimized for *sparse* input ;
+* Customization: it supports customized objective functions and evaluation functions.
+
+## Installation
+
+
+### Github version
+
+
+For up-to-date version (highly recommended), install from *Github*:
+
+
+```r
+devtools::install_git('git://github.com/dmlc/xgboost', subdir='R-package')
+```
+
+> *Windows* user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
+
+Cran version
+------------
+
+As of 2015-03-13, ‘xgboost’ was removed from the CRAN repository.
+
+Formerly available versions can be obtained from the CRAN [archive](http://cran.r-project.org/src/contrib/Archive/xgboost)
+
+## Learning
+
+
+For the purpose of this tutorial we will load **XGBoost** package.
+
+
+```r
+require(xgboost)
+```
+
+### Dataset presentation
+
+
+In this example, we are aiming to predict whether a mushroom can be eaten or not (like in many tutorials, example data are the the same as you will use on in your every day life :-).
+
+Mushroom data is cited from UCI Machine Learning Repository. @Bache+Lichman:2013.
+
+### Dataset loading
+
+
+We will load the `agaricus` datasets embedded with the package and will link them to variables.
+
+The datasets are already split in:
+
+* `train`: will be used to build the model ;
+* `test`: will be used to assess the quality of our model.
+
+Why *split* the dataset in two parts?
+
+In the first part we will build our model. In the second part we will want to test it and assess its quality. Without dividing the dataset we would test the model on the data which the algorithm have already seen.
+
+
+```r
+data(agaricus.train, package='xgboost')
+data(agaricus.test, package='xgboost')
+train <- agaricus.train
+test <- agaricus.test
+```
+
+> In the real world, it would be up to you to make this division between `train` and `test` data. The way to do it is out of the purpose of this article, however `caret` package may [help](http://topepo.github.io/caret/splitting.html).
+
+Each variable is a `list` containing two things, `label` and `data`:
+
+
+```r
+str(train)
+```
+
+```
+## List of 2
+##  $ data :
+```
+
+```
+## Error in str.default(obj, ...): could not find function "is"
+```
+
+`label` is the outcome of our dataset meaning it is the binary *classification* we will try to predict.
+
+Let's discover the dimensionality of our datasets.
+
+
+```r
+dim(train$data)
+```
+
+```
+## [1] 6513  126
+```
+
+```r
+dim(test$data)
+```
+
+```
+## [1] 1611  126
+```
+
+This dataset is very small to not make the **R** package too heavy, however **XGBoost** is built to manage huge dataset very efficiently.
+
+As seen below, the `data` are stored in a `dgCMatrix` which is a *sparse* matrix and `label` vector is a `numeric` vector (`{0,1}`):
+
+
+```r
+class(train$data)[1]
+```
+
+```
+## [1] "dgCMatrix"
+```
+
+```r
+class(train$label)
+```
+
+```
+## [1] "numeric"
+```
+
+### Basic Training using XGBoost
+
+
+This step is the most critical part of the process for the quality of our model.
+
+#### Basic training
+
+We are using the `train` data. As explained above, both `data` and `label` are stored in a `list`.
+
+In a *sparse* matrix, cells containing `0` are not stored in memory. Therefore, in a dataset mainly made of `0`, memory size is reduced. It is very usual to have such dataset.
+
+We will train decision tree model using the following parameters:
+
+* `objective = "binary:logistic"`: we will train a binary classification model ;
+* `max.deph = 2`: the trees won't be deep, because our case is very simple ;
+* `nthread = 2`: the number of cpu threads we are going to use;
+* `nround = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
+
+
+```r
+bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+```
+
+```
+## [0]	train-error:0.046522
+## [1]	train-error:0.022263
+```
+
+> More complex the relationship between your features and your `label` is, more passes you need.
+
+#### Parameter variations
+
+##### Dense matrix
+
+Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R** matrix.
+
+
+```r
+bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+```
+
+```
+## Error in as.vector(data): no method for coercing this S4 class to a vector
+```
+
+##### xgb.DMatrix
+
+**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
+
+
+```r
+dtrain <- xgb.DMatrix(data = train$data, label = train$label)
+bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+```
+
+```
+## [0]	train-error:0.046522
+## [1]	train-error:0.022263
+```
+
+##### Verbose option
+
+**XGBoost** has several features to help you to view how the learning progress internally. The purpose is to help you to set the best parameters, which is the key of your model quality.
+
+One of the simplest way to see the training progress is to set the `verbose` option (see below for more advanced technics).
+
+
+```r
+# verbose = 0, no message
+bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 0)
+```
+
+
+```r
+# verbose = 1, print evaluation metric
+bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 1)
+```
+
+```
+## [0]	train-error:0.046522
+## [1]	train-error:0.022263
+```
+
+
+```r
+# verbose = 2, also print information about tree
+bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 2)
+```
+
+```
+## [11:43:20] ../..//amalgamation/../src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 0 pruned nodes, max_depth=2
+## [0]	train-error:0.046522
+## [11:43:20] ../..//amalgamation/../src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 0 pruned nodes, max_depth=2
+## [1]	train-error:0.022263
+```
+
+## Basic prediction using XGBoost
+
+
+## Perform the prediction
+
+
+The purpose of the model we have built is to classify new data. As explained before, we will use the `test` dataset for this step.
+
+
+```r
+pred <- predict(bst, test$data)
+
+# size of the prediction vector
+print(length(pred))
+```
+
+```
+## [1] 1611
+```
+
+```r
+# limit display of predictions to the first 10
+print(head(pred))
+```
+
+```
+## [1] 0.28583017 0.92392391 0.28583017 0.28583017 0.05169873 0.92392391
+```
+
+These numbers doesn't look like *binary classification* `{0,1}`. We need to perform a simple transformation before being able to use these results.
+
+## Transform the regression in a binary classification
+
+
+The only thing that **XGBoost** does is a *regression*. **XGBoost** is using `label` vector to build its *regression* model.
+
+How can we use a *regression* model to perform a binary classification?
+
+If we think about the meaning of a regression applied to our data, the numbers we get are probabilities that a datum will be classified as `1`. Therefore, we will set the rule that if this probability for a specific datum is `> 0.5` then the observation is classified as `1` (or `0` otherwise).
+
+
+```r
+prediction <- as.numeric(pred > 0.5)
+print(head(prediction))
+```
+
+```
+## [1] 0 1 0 0 0 1
+```
+
+## Measuring model performance
+
+
+To measure the model performance, we will compute a simple metric, the *average error*.
+
+
+```r
+err <- mean(as.numeric(pred > 0.5) != test$label)
+print(paste("test-error=", err))
+```
+
+```
+## [1] "test-error= 0.0217256362507759"
+```
+
+> Note that the algorithm has not seen the `test` data during the model construction.
+
+Steps explanation:
+
+1. `as.numeric(pred > 0.5)` applies our rule that when the probability (<=> regression <=> prediction) is `> 0.5` the observation is classified as `1` and `0` otherwise ;
+2. `probabilityVectorPreviouslyComputed != test$label` computes the vector of error between true data and computed probabilities ;
+3. `mean(vectorOfErrors)` computes the *average error* itself.
+
+The most important thing to remember is that **to do a classification, you just do a regression to the** `label` **and then apply a threshold**.
+
+*Multiclass* classification works in a similar way.
+
+This metric is **0.02** and is pretty low: our yummly mushroom model works well!
+
+## Advanced features
+
+
+Most of the features below have been implemented to help you to improve your model by offering a better understanding of its content.
+
+
+### Dataset preparation
+
+
+For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
+
+
+```r
+dtrain <- xgb.DMatrix(data = train$data, label=train$label)
+dtest <- xgb.DMatrix(data = test$data, label=test$label)
+```
+
+### Measure learning progress with xgb.train
+
+
+Both `xgboost` (simple) and `xgb.train` (advanced) functions train models.
+
+One of the special feature of `xgb.train` is the capacity to follow the progress of the learning after each round. Because of the way boosting works, there is a time when having too many rounds lead to an overfitting. You can see this feature as a cousin of cross-validation method. The following techniques will help you to avoid overfitting or optimizing the learning time in stopping it as soon as possible.
+
+One way to measure progress in learning of a model is to provide to **XGBoost** a second dataset already classified. Therefore it can learn on the first dataset and test its model on the second one. Some metrics are measured after each round during the learning.
+
+> in some way it is similar to what we have done above with the average error. The main difference is that below it was after building the model, and now it is during the construction that we measure errors.
+
+For the purpose of this example, we use `watchlist` parameter. It is a list of `xgb.DMatrix`, each of them tagged with a name.
+
+
+```r
+watchlist <- list(train=dtrain, test=dtest)
+
+bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
+```
+
+```
+## [0]	train-error:0.046522	test-error:0.042831
+## [1]	train-error:0.022263	test-error:0.021726
+```
+
+**XGBoost** has computed at each round the same average error metric than seen above (we set `nround` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset.
+
+Both training and test error related metrics are very similar, and in some way, it makes sense: what we have learned from the training dataset matches the observations from the test dataset.
+
+If with your own dataset you have not such results, you should think about how you divided your dataset in training and test. May be there is something to fix. Again, `caret` package may [help](http://topepo.github.io/caret/splitting.html).
+
+For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.
+
+
+```r
+bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
+```
+
+```
+## [0]	train-error:0.046522	train-logloss:0.233376	test-error:0.042831	test-logloss:0.226686
+## [1]	train-error:0.022263	train-logloss:0.136658	test-error:0.021726	test-logloss:0.137874
+```
+
+> `eval.metric` allows us to monitor two new metrics for each round, `logloss` and `error`.
+
+### Linear boosting
+
+
+Until now, all the learnings we have performed were based on boosting trees. **XGBoost** implements a second algorithm, based on linear boosting. The only difference with previous command is `booster = "gblinear"` parameter (and removing `eta` parameter).
+
+
+```r
+bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
+```
+
+```
+## [0]	train-error:0.019499	train-logloss:0.176561	test-error:0.018001	test-logloss:0.173835
+## [1]	train-error:0.004760	train-logloss:0.068214	test-error:0.003104	test-logloss:0.065493
+```
+
+In this specific case, *linear boosting* gets sligtly better performance metrics than decision trees based algorithm.
+
+In simple cases, it will happen because there is nothing better than a linear algorithm to catch a linear link. However, decision trees are much better to catch a non linear link between predictors and outcome. Because there is no silver bullet, we advise you to check both algorithms with your own datasets to have an idea of what to use.
+
+### Manipulating xgb.DMatrix
+
+
+#### Save / Load
+
+Like saving models, `xgb.DMatrix` object (which groups both dataset and outcome) can also be saved using `xgb.DMatrix.save` function.
+
+
+```r
+xgb.DMatrix.save(dtrain, "dtrain.buffer")
+```
+
+```
+## [1] TRUE
+```
+
+```r
+# to load it in, simply call xgb.DMatrix
+dtrain2 <- xgb.DMatrix("dtrain.buffer")
+```
+
+```
+## [11:43:20] 6513x126 matrix with 143286 entries loaded from dtrain.buffer
+```
+
+```r
+bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
+```
+
+```
+## [0]	train-error:0.046522	test-error:0.042831
+## [1]	train-error:0.022263	test-error:0.021726
+```
+
+
+
+#### Information extraction
+
+Information can be extracted from `xgb.DMatrix` using `getinfo` function. Hereafter we will extract `label` data.
+
+
+```r
+label = getinfo(dtest, "label")
+pred <- predict(bst, dtest)
+err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
+print(paste("test-error=", err))
+```
+
+```
+## [1] "test-error= 0.0217256362507759"
+```
+
+### View feature importance/influence from the learnt model
+
+
+Feature importance is similar to R gbm package's relative influence (rel.inf).
+
+```
+importance_matrix <- xgb.importance(model = bst)
+print(importance_matrix)
+xgb.plot.importance(importance_matrix = importance_matrix)
+```
+
+#### View the trees from a model
+
+
+You can dump the tree you learned using `xgb.dump` into a text file.
+
+
+```r
+xgb.dump(bst, with.stats = T)
+```
+
+```
+##  [1] "booster[0]"                                                          
+##  [2] "0:[f28<-1.00136e-05] yes=1,no=2,missing=1,gain=4000.53,cover=1628.25"
+##  [3] "1:[f55<-1.00136e-05] yes=3,no=4,missing=3,gain=1158.21,cover=924.5"  
+##  [4] "3:leaf=1.71218,cover=812"                                            
+##  [5] "4:leaf=-1.70044,cover=112.5"                                         
+##  [6] "2:[f108<-1.00136e-05] yes=5,no=6,missing=5,gain=198.174,cover=703.75"
+##  [7] "5:leaf=-1.94071,cover=690.5"                                         
+##  [8] "6:leaf=1.85965,cover=13.25"                                          
+##  [9] "booster[1]"                                                          
+## [10] "0:[f59<-1.00136e-05] yes=1,no=2,missing=1,gain=832.545,cover=788.852"
+## [11] "1:[f28<-1.00136e-05] yes=3,no=4,missing=3,gain=569.725,cover=768.39" 
+## [12] "3:leaf=0.784718,cover=458.937"                                       
+## [13] "4:leaf=-0.96853,cover=309.453"                                       
+## [14] "2:leaf=-6.23624,cover=20.4624"
+```
+
+You can plot the trees from your model using ```xgb.plot.tree``
+
+```
+xgb.plot.tree(model = bst)
+```
+
+> if you provide a path to `fname` parameter you can save the trees to your hard drive.
+
+#### Save and load models
+
+
+Maybe your dataset is big, and it takes time to train a model on it? May be you are not a big fan of losing time in redoing the same task again and again? In these very rare cases, you will want to save your model and load it when required.
+
+Hopefully for you, **XGBoost** implements such functions.
+
+
+```r
+# save model to binary local file
+xgb.save(bst, "xgboost.model")
+```
+
+```
+## [1] TRUE
+```
+
+> `xgb.save` function should return TRUE if everything goes well and crashes otherwise.
+
+An interesting test to see how identical our saved model is to the original one would be to compare the two predictions.
+
+
+```r
+# load binary model to R
+bst2 <- xgb.load("xgboost.model")
+pred2 <- predict(bst2, test$data)
+
+# And now the test
+print(paste("sum(abs(pred2-pred))=", sum(abs(pred2-pred))))
+```
+
+```
+## [1] "sum(abs(pred2-pred))= 0"
+```
+
+
+
+> result is `0`? We are good!
+
+In some very specific cases, like when you want to pilot **XGBoost** from `caret` package, you will want to save the model as a *R* binary vector. See below how to do it.
+
+
+```r
+# save model to R's raw vector
+rawVec <- xgb.save.raw(bst)
+
+# print class
+print(class(rawVec))
+```
+
+```
+## [1] "raw"
+```
+
+```r
+# load binary model to R
+bst3 <- xgb.load(rawVec)
+pred3 <- predict(bst3, test$data)
+
+# pred2 should be identical to pred
+print(paste("sum(abs(pred3-pred))=", sum(abs(pred2-pred))))
+```
+
+```
+## [1] "sum(abs(pred3-pred))= 0"
+```
+
+> Again `0`? It seems that `XGBoost` works pretty well!
+
+## References
--- a/doc/build.md
+++ b/doc/build.md
@@ -1,28 +1,144 @@
-Build XGBoost
-=============
-* Run ```bash build.sh``` (you can also type make)
-* If you have C++11 compiler, it is recommended to type ```make cxx11=1```
-  - C++11 is not used by default
-* If your compiler does not come with OpenMP support, it will fire an warning telling you that the code will compile into single thread mode, and you will get single thread xgboost
-* You may get a error: -lgomp is not found
-  - You can type ```make no_omp=1```, this will get you single thread xgboost
-  - Alternatively, you can upgrade your compiler to compile multi-thread version
-* Windows(VS 2010): see [../windows](../windows) folder
-  - In principle, you put all the cpp files in the Makefile to the project, and build
-* OS X with multi-threading support: see [next section](#openmp-for-os-x)
+Installation Guide
+==================

-Build XGBoost in OS X with OpenMP
---------------------------------
-Here is the complete solution to use OpenMp-enabled compilers to install XGBoost.
+This page gives instructions of how to build and install the xgboost package from
+scratch on various systems. It consists of two steps:

-1. Obtain gcc-5.x.x with openmp support by `brew install gcc --without-multilib`. (`brew` is the de facto standard of `apt-get` on OS X. So installing [HPC](http://hpc.sourceforge.net/) separately is not recommended, but it should work.)
+1. Fist build the shared library from the C++ codes (`libxgboost.so` for linux/osx and `libxgboost.dll` for windows).
+   - Exception: for R-package installation please directly refer to the R package section.
+2. Then install the language packages (e.g. Python Package).

-2. `cd xgboost` then `bash build.sh` to compile XGBoost.
+Please refer to [Installation FAQ](#frequently-asked-questions) first if you had any problem
+during installation. If the instructions do not work for you, please feel free
+to ask questions at [xgboost/issues](https://github.com/dmlc/xgboost/issues), or
+even better to send pull request if you can fix the problem.

-3. Install xgboost package for Python and R
+## Contents
+- [Build the Shared Library](#build-the-shared-library)
+  - [Prerequisites](#prerequisites)
+  - [Building on Ubuntu/Debian](#building-on-ubuntu-debian)
+  - [Building on OSX](#building-on-osx)
+  - [Building on Windows](#building-on-windows)
+  - [Customized Building](#customized-building)
+- [Python Package Installation](#python-package-installation)
+- [R Package Installation](#r-package-installation)
+- [Frequently asked questions](#frequently-asked-questions)

- For Python: go to `python-package` sub-folder to install python version with `python setup.py install` (or `sudo python setup.py install`).
- For R: Set the `Makevars` file in highest piority for R.
+## Build the Shared Library
+
+Our goal is to build the shared library:
+- On Linux/OSX the target library is ```libxgboost.so```
+- On Windows the target libary is ```libxgboost.dll```
+
+The minimal building requirement is
+
+- A recent c++ compiler supporting C++ 11 (g++-4.6 or higher)
+
+We can edit `make/config.mk` to change the compile options, and then build by
+`make`. If everything goes well, we can go the specific language installation section.
+
+### Building on Ubuntu/Debian
+
+On Ubuntu, one build xgboost by
+
+Then build xgboost
+```bash
+git clone --recursive https://github.com/dmlc/xgboost
+cd xgboost; make -j4
+```
+
+### Building on OSX
+
+On Ubuntu OSX, one build xgboost by
+
+```bash
+git clone --recursive https://github.com/dmlc/xgboost
+cd xgboost; cp make/minimum.mk ./config.mk; make -j4
+```
+
+This build xgboost without multi-threading, because by default clang in OSX does not come with open-mp.
+See the following paragraph for OpenMP enabled xgboost.
+
+
+Here is the complete solution to use OpenMP-enabled compilers to install XGBoost.
+Obtain gcc-5.x.x with openmp support by `brew install gcc --without-multilib`. (`brew` is the de facto standard of `apt-get` on OS X. So installing [HPC](http://hpc.sourceforge.net/) separately is not recommended, but it should work.)
+
+```bash
+git clone --recursive https://github.com/dmlc/xgboost
+cd xgboost; cp make/config.mk ./config.mk; make -j4
+```
+
+### Building on Windows
+
+XGBoost support both build by MSVC or MinGW. Here is how you can build xgboost library using MinGW.
+
+Build with mingw
+```bash
+cp make/mingw64.mk config.mk; make -j4
+```
+
+The MSVC build for new version is not yet updated.
+
+
+### Customized Building
+
+The configuration of xgboost can be modified by ```config.mk```
+- modify configuration on various distributed filesystem such as HDFS/Amazon S3/...
+- First copy [make/config.mk](../make/config.mk) to the project root, on which
+  any local modification will be ignored by git, then modify the according flags.
+
+
+
+## Python Package Installation
+
+The python package is located at [python-package](../python-package).
+There are several ways to install the package:
+
+1. Install system-widely, which requires root permission
+
+   ```bash
+   cd python; sudo python setup.py install
+   ```
+
+   You will however need Python `distutils` module for this to
+   work. It is often part of the core python package or it can be installed using your
+   package manager, e.g. in Debian use
+
+   ```bash
+   sudo apt-get install python-setuptools
+   ```
+
+   *NOTE: If you recompiled xgboost, then you need to reinstall it again to
+    make the new library take effect*
+
+2. Only set the environment variable `PYTHONPATH` to tell python where to find
+   the library. For example, assume we cloned `xgboost` on the home directory
+   `~`. then we can added the following line in `~/.bashrc`
+   It is ***recommended for developers*** who may change the codes. The changes will be immediately reflected once you pulled the code and rebuild the project (no need to call ```setup``` again)
+
+    ```bash
+    export PYTHONPATH=~/xgboost/python-package
+    ```
+
+3. Install only for the current user.
+
+    ```bash
+    cd python; python setup.py develop --user
+    ```
+
+## R Package Installation
+
+You can install R package using devtools
+
+```r
+devtools::install_git('git://github.com/dmlc/xgboost',subdir='R-package')
+
+```
+
+For OSX users, single threaded version will be installed, to install multi-threaded version.
+First follow [Building on OSX](#building-on-osx) to get the OpenMP enabled compiler, then:
+
+- Set the `Makevars` file in highest piority for R.

  The point is, there are three `Makevars` : `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by running `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!).

@@ -38,13 +154,19 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost
  devtools::install_local('xgboost/', subdir = 'R-package') # you may use devtools
  ```

+## Frequently Asked Questions

-Build with HDFS and S3 Support
------------------------------
-* To build xgboost use with HDFS/S3 support and distributed learnig. It is recommended to build with dmlc, with the following steps
-  - ```git clone https://github.com/dmlc/dmlc-core```
-  - Follow instruction in dmlc-core/make/config.mk to compile libdmlc.a
-  - In root folder of xgboost, type ```make dmlc=dmlc-core```
-* This will allow xgboost to directly load data and save model from/to hdfs and s3
-  - Simply replace the filename with prefix s3:// or hdfs://
-* This xgboost that can be used for distributed learning
+1. **Compile failed after `git pull`**
+
+   Please first update the submodules, clean all and recompile:
+
+   ```bash
+   git submodule update && make clean_all && make -j4
+   ```
+
+2. **Compile failed after `config.mk` is modified**
+   Need to clean all first:
+
+    ```bash
+    make clean_all && make -j4
+    ```
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -26,7 +26,7 @@ from sphinx_util import MarkdownParser, AutoStructify

 # -- mock out modules
 import mock
-MOCK_MODULES = ['numpy', 'scipy', 'scipy.sparse', 'sklearn', 'matplotlib']
+MOCK_MODULES = ['numpy', 'scipy', 'scipy.sparse', 'sklearn', 'matplotlib', 'pandas', 'graphviz']
 for mod_name in MOCK_MODULES:
    sys.modules[mod_name] = mock.Mock()

@@ -120,6 +120,7 @@ todo_include_todos = False
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 # html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'

 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
--- a/doc/dev-guide/contribute.md
+++ b/doc/dev-guide/contribute.md
@@ -1,13 +1,145 @@
-Developer Guide
-===============
-This page contains guide for developers of xgboost. XGBoost has been developed and used by a group of active community.
-Everyone is more than welcomed to is a great way to make the project better.
-The project is maintained by a committee of [committers](../../CONTRIBUTORS.md#comitters) who will review and merge pull requests from contributors.
+Contribute to XGBoost
+=====================
+XGBoost has been developed and used by a group of active community members.
+Everyone is more than welcome to contribute. It is a way to make the project better and more accessible to more users.

-Contributing Code
-----------------
-* The C++ code follows Google C++ style
-* We follow numpy style to document our python module
-* Tools to precheck codestyle
-  - clone https://github.com/dmlc/dmlc-core into root directory
-  - type ```make lint``` and fix possible errors.
+- Please add your name to [CONTRIBUTORS.md](../CONTRIBUTORS.md) after your patch has been merged.
+- Please also update [NEWS.md](../NEWS.md) to add note on your changes to the API or added a new document.
+
+Guidelines
+----------
+* [Submit Pull Request](#submit-pull-request)
+* [Git Workflow Howtos](#git-workflow-howtos)
+  - [How to resolve conflict with master](#how-to-resolve-conflict-with-master)
+  - [How to combine multiple commits into one](#how-to-combine-multiple-commits-into-one)
+  - [What is the consequence of force push](#what-is-the-consequence-of-force-push)
+* [Document](#document)
+* [Testcases](#testcases)
+* [Examples](#examples)
+* [Core Library](#core-library)
+* [Python Package](#python-package)
+* [R Package](#r-package)
+
+Submit Pull Request
+-------------------
+* Before submit, please rebase your code on the most recent version of master, you can do it by
+```bash
+git remote add upstream https://github.com/dmlc/xgboost
+git fetch upstream
+git rebase upstream/master
+```
+* If you have multiple small commits,
+  it might be good to merge them together(use git rebase then squash) into more meaningful groups.
+* Send the pull request!
+  - Fix the problems reported by automatic checks
+  - If you are contributing a new module, consider add a testcase in [tests](../tests)
+
+Git Workflow Howtos
+-------------------
+### How to resolve conflict with master
+- First rebase to most recent master
+```bash
+# The first two steps can be skipped after you do it once.
+git remote add upstream https://github.com/dmlc/xgboost
+git fetch upstream
+git rebase upstream/master
+```
+- The git may show some conflicts it cannot merge, say ```conflicted.py```.
+  - Manually modify the file to resolve the conflict.
+  - After you resolved the conflict, mark it as resolved by
+```bash
+git add conflicted.py
+```
+- Then you can continue rebase by
+```bash
+git rebase --continue
+```
+- Finally push to your fork, you may need to force push here.
+```bash
+git push --force
+```
+
+### How to combine multiple commits into one
+Sometimes we want to combine multiple commits, especially when later commits are only fixes to previous ones,
+to create a PR with set of meaningful commits. You can do it by following steps.
+- Before doing so, configure the default editor of git if you haven't done so before.
+```bash
+git config core.editor the-editor-you-like
+```
+- Assume we want to merge last 3 commits, type the following commands
+```bash
+git rebase -i HEAD~3
+```
+- It will pop up an text editor. Set the first commit as ```pick```, and change later ones to ```squash```.
+- After you saved the file, it will pop up another text editor to ask you modify the combined commit message.
+- Push the changes to your fork, you need to force push.
+```bash
+git push --force
+```
+
+### What is the consequence of force push
+The previous two tips requires force push, this is because we altered the path of the commits.
+It is fine to force push to your own fork, as long as the commits changed are only yours.
+
+Documents
+---------
+* The document is created using sphinx and [recommonmark](http://recommonmark.readthedocs.org/en/latest/)
+* You can build document locally to see the effect.
+
+Testcases
+---------
+* All the testcases are in [tests](../tests)
+* We use python nose for python test cases.
+
+Examples
+--------
+* Usecases and examples will be in [demo](../demo)
+* We are super excited to hear about your story, if you have blogposts,
+  tutorials code solutions using xgboost, please tell us and we will add
+  a link in the example pages.
+
+Core Library
+------------
+- Follow Google C style for C++.
+- We use doxygen to document all the interface code.
+- You can reproduce the linter checks by typing ```make lint```
+
+Python Package
+--------------
+- Always add docstring to the new functions in numpydoc format.
+- You can reproduce the linter checks by typing ```make lint```
+
+R Package
+---------
+### Code Style
+- We follow Google's C++ Style guide on C++ code.
+  - This is mainly to be consistent with the rest of the project.
+  - Another reason is we will be able to check style automatically with a linter.
+- You can check the style of the code by typing the following command at root folder.
+```bash
+make rcpplint
+```
+- When needed, you can disable the linter warning of certain line with ```// NOLINT(*)``` comments.
+
+### Rmarkdown Vignettes
+Rmarkdown vignettes are placed in [R-package/vignettes](../R-package/vignettes)
+These Rmarkdown files are not compiled. We host the compiled version on [doc/R-package](R-package)
+
+The following steps are followed to add a new Rmarkdown vignettes:
+- Add the original rmarkdown to ```R-package/vignettes```
+- Modify ```doc/R-package/Makefile``` to add the markdown files to be build
+- Clone the [dmlc/web-data](https://github.com/dmlc/web-data) repo to folder ```doc```
+- Now type the following command on ```doc/R-package```
+```bash
+make the-markdown-to-make.md
+```
+- This will generate the markdown, as well as the figures into ```doc/web-data/xgboost/knitr```
+- Modify the ```doc/R-package/index.md``` to point to the generated markdown.
+- Add the generated figure to the ```dmlc/web-data``` repo.
+  - If you already cloned the repo to doc, this means a ```git add```
+- Create PR for both the markdown  and ```dmlc/web-data```
+- You can also build the document locally by typing the followig command at ```doc```
+```bash
+make html
+```
+The reason we do this is to avoid exploded repo size due to generated images sizes.
--- a/doc/index.md
+++ b/doc/index.md
@@ -5,23 +5,26 @@ XGBoost is short for eXtreme gradient boosting. This is a library that is design
 The goal of this library is to push the extreme of the computation limits of machines to provide a ***scalable***, ***portable*** and ***accurate***
 for large scale tree boosting.

-
 This document is hosted at http://xgboost.readthedocs.org/. You can also browse most of the documents in github directly.

-How to Get Started
------------------
-The best way to get started to learn xgboost is by the examples. There are three types of examples you can find in xgboost.
-* [Tutorials](#tutorials) are self-contained tutorials on complete data science tasks.
-* [XGBoost Code Examples](../demo/) are collections of code and benchmarks of xgboost.
-  - There is a walkthrough section in this to walk you through specific API features.
-* [Highlight Solutions](#highlight-solutions) are presentations using xgboost to solve real world problems.
-  - These examples are usually more advanced. You can usually find state-of-art solutions to many problems and challenges in here.
-
-After you gets familiar with the interface, checkout the following additional resources
+User Guide
+----------
+* [Installation Guide](build.md)
+* [Introduction to Boosted Trees](model.md)
+* [Python Package Document](python/index.md)
+* [R Package Document](R-package/index.md)
+* [XGBoost.jl Julia Package](https://github.com/dmlc/XGBoost.jl)
+* [Distributed Training](../demo/distributed-training)
 * [Frequently Asked Questions](faq.md)
-* [Learning what is in Behind: Introduction to Boosted Trees](model.md)
-* [User Guide](#user-guide) contains comprehensive list of documents of xgboost.
-* [Developer Guide](dev-guide/contribute.md)
+* [External Memory Version](external_memory.md)
+* [Learning to use XGBoost by Example](../demo)
+* [Parameters](parameter.md)
+* [Text input format](input_format.md)
+* [Notes on Parameter Tunning](param_tuning.md)
+
+Developer Guide
+---------------
+* [Contributor Guide](dev-guide/contribute.md)

 Tutorials
 ---------
@@ -31,14 +34,13 @@ are great resources to learn xgboost by real examples. If you think you have som
  - This tutorial introduces the basic usage of CLI version of xgboost
 * [Introduction of XGBoost in Python](python/python_intro.md) (python)
  - This tutorial introduces the python package of xgboost
-* [Introduction to XGBoost in R](../R-package/vignettes/xgboostPresentation.Rmd) (R package)
+* [Introduction to XGBoost in R](R-package/xgboostPresentation.md) (R package)
  - This is a general presentation about xgboost in R.
-* [Discover your data with XGBoost in R](../R-package/vignettes/discoverYourData.Rmd) (R package)
+* [Discover your data with XGBoost in R](R-package/discoverYourData.md) (R package)
  - This tutorial explaining feature analysis in xgboost.
 * [Understanding XGBoost Model on Otto Dataset](../demo/kaggle-otto/understandingXGBoostModel.Rmd) (R package)
  - This tutorial teaches you how to use xgboost to compete kaggle otto challenge.

-
 Highlight Solutions
 -------------------
 This section is about blogposts, presentation and videos discussing how to use xgboost to solve your interesting problem. If you think something belongs to here, send a pull request.
@@ -49,23 +51,11 @@ This section is about blogposts, presentation and videos discussing how to use x
 * Video tutorial: [Better Optimization with Repeated Cross Validation and the XGBoost model](https://www.youtube.com/watch?v=Og7CGAfSr_Y)
 * [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/)

-User Guide
----------
-* [Frequently Asked Questions](faq.md)
-* [Introduction to Boosted Trees](model.md)
-* [Using XGBoost in Python](python/python_intro.md)
-* [Using XGBoost in R](../R-package/vignettes/xgboostPresentation.Rmd)
-* [Learning to use XGBoost by Example](../demo)
-* [External Memory Version](external_memory.md)
-* [Text input format](input_format.md)
-* [Build Instruction](build.md)
-* [Parameters](parameter.md)
-* [Notes on Parameter Tunning](param_tuning.md)
+Indices and tables
+------------------

-Developer Guide
---------------
-* [Developer Guide](dev-guide/contribute.md)
-
-API Reference
-------------
-* [Python API Reference](python/python_api.rst)
+```eval_rst
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+```
--- a/doc/python/index.md
+++ b/doc/python/index.md
@@ -0,0 +1,10 @@
+XGBoost Python Package
+======================
+This page contains links to all the python related documents on python package.
+To install the package package, checkout [Build and Installation Instruction](../build.md).
+
+Contents
+--------
+* [Python Overview Tutorial](python_intro.md)
+* [Learning to use XGBoost by Example](../../demo)
+* [Python API Reference](python_api.rst)
--- a/doc/sphinx_util.py
+++ b/doc/sphinx_util.py
@@ -5,11 +5,24 @@ import os
 import docutils
 import subprocess

-if os.environ.get('READTHEDOCS', None) == 'True':
+READTHEDOCS_BUILD = (os.environ.get('READTHEDOCS', None) is not None)
+
+if not os.path.exists('../recommonmark'):
    subprocess.call('cd ..; rm -rf recommonmark;' +
-                    'git clone https://github.com/tqchen/recommonmark', shell=True)
+                    'git clone https://github.com/tqchen/recommonmark', shell = True)
+else:
+    subprocess.call('cd ../recommonmark/; git pull', shell=True)
+
+if not os.path.exists('web-data'):
+    subprocess.call('rm -rf web-data;' +
+                    'git clone https://github.com/dmlc/web-data', shell = True)
+else:
+    subprocess.call('cd web-data; git pull', shell=True)
+

 sys.path.insert(0, os.path.abspath('../recommonmark/'))
+sys.stderr.write('READTHEDOCS=%s\n' % (READTHEDOCS_BUILD))
+

 from recommonmark import parser, transform

--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -0,0 +1,85 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file base.h
+ * \brief defines configuration macros of xgboost.
+ */
+#ifndef XGBOOST_BASE_H_
+#define XGBOOST_BASE_H_
+
+#include <dmlc/base.h>
+#include <dmlc/omp.h>
+
+/*!
+ * \brief string flag for R library, to leave hooks when needed.
+ */
+#ifndef XGBOOST_STRICT_R_MODE
+#define XGBOOST_STRICT_R_MODE 0
+#endif
+
+/*!
+ * \brief Whether always log console message with time.
+ *  It will display like, with timestamp appended to head of the message.
+ *  "[21:47:50] 6513x126 matrix with 143286 entries loaded from ../data/agaricus.txt.train"
+ */
+#ifndef XGBOOST_LOG_WITH_TIME
+#define XGBOOST_LOG_WITH_TIME 1
+#endif
+
+/*!
+ * \brief Whether customize the logger outputs.
+ */
+#ifndef XGBOOST_CUSTOMIZE_LOGGER
+#define XGBOOST_CUSTOMIZE_LOGGER XGBOOST_STRICT_R_MODE
+#endif
+
+/*!
+ * \brief Whether to customize global PRNG.
+ */
+#ifndef XGBOOST_CUSTOMIZE_GLOBAL_PRNG
+#define XGBOOST_CUSTOMIZE_GLOBAL_PRNG  XGBOOST_STRICT_R_MODE
+#endif
+
+/*! \brief namespace of xgboo st*/
+namespace xgboost {
+/*!
+ * \brief unsigned interger type used in boost,
+ *  used for feature index and row index.
+ */
+typedef uint32_t bst_uint;
+/*! \brief long integers */
+typedef unsigned long bst_ulong;  // NOLINT(*)
+/*! \brief float type, used for storing statistics */
+typedef float bst_float;
+
+/*! \brief gradient statistics pair usually needed in gradient boosting */
+struct bst_gpair {
+  /*! \brief gradient statistics */
+  bst_float grad;
+  /*! \brief second order gradient statistics */
+  bst_float hess;
+  bst_gpair() {}
+  bst_gpair(bst_float grad, bst_float hess) : grad(grad), hess(hess) {}
+};
+
+/*! \brief small eps gap for minimum split decision. */
+const float rt_eps = 1e-5f;
+/*! \brief min gap between feature values to allow a split happen */
+const float rt_2eps = rt_eps * 2.0f;
+
+/*! \brief define unsigned long for openmp loop */
+typedef dmlc::omp_ulong omp_ulong;
+/*! \brief define unsigned int for openmp loop */
+typedef dmlc::omp_uint bst_omp_uint;
+
+/*!
+ * \brief define compatible keywords in g++
+ *  Used to support g++-4.6 and g++4.7
+ */
+#if DMLC_USE_CXX11 && defined(__GNUC__) && !defined(__clang_version__)
+#if __GNUC__ == 4 && __GNUC_MINOR__ < 8
+#define override
+#define final
+#endif
+#endif
+}  // namespace xgboost
+#endif  // XGBOOST_BASE_H_
--- a/wrapper/xgboost_wrapper.h
+++ b/wrapper/xgboost_wrapper.h
@@ -1,12 +1,11 @@
 /*!
- * Copyright (c) 2014 by Contributors
- * \file xgboost_wrapper.h
+ * Copyright (c) 2015 by Contributors
+ * \file c_api.h
 * \author Tianqi Chen
- * \brief a C style wrapper of xgboost
- *  can be used to create wrapper of other languages
+ * \brief C API of XGBoost, used to interfacing with other languages.
 */
-#ifndef XGBOOST_WRAPPER_H_
-#define XGBOOST_WRAPPER_H_
+#ifndef XGBOOST_C_API_H_
+#define XGBOOST_C_API_H_

 #ifdef __cplusplus
 #define XGB_EXTERN_C extern "C"
@@ -170,7 +169,8 @@ XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
 * \brief get uint32 info vector from matrix
 * \param handle a instance of data matrix
 * \param field field name
- * \param out_ptr pointer to the result
+ * \param out_len The length of the field.
+ * \param out_dptr pointer to the result
 * \return 0 when success, -1 when failure happens
 */
 XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
@@ -178,8 +178,9 @@ XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
                                 bst_ulong* out_len,
                                 const unsigned **out_dptr);
 /*!
- * \brief get number of rows
+ * \brief get number of rows.
 * \param handle the handle to the DMatrix
+ * \param out The address to hold number of rows.
 * \return 0 when success, -1 when failure happens
 */
 XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle,
@@ -187,6 +188,7 @@ XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle,
 /*!
 * \brief get number of columns
 * \param handle the handle to the DMatrix
+ * \param out The output of number of columns
 * \return 0 when success, -1 when failure happens
 */
 XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle,
@@ -213,7 +215,7 @@ XGB_DLL int XGBoosterFree(BoosterHandle handle);
 * \brief set parameters
 * \param handle handle
 * \param name  parameter name
- * \param val value of parameter
+ * \param value value of parameter
 * \return 0 when success, -1 when failure happens
 */
 XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
@@ -336,11 +338,11 @@ XGB_DLL int XGBoosterDumpModel(BoosterHandle handle,
 * \brief dump model, return array of strings representing model dump
 * \param handle handle
 * \param fnum number of features
- * \param fnum names of features
- * \param fnum types of features
+ * \param fname names of features
+ * \param ftype types of features
 * \param with_stats whether to dump with statistics
 * \param out_len length of output array
- * \param out_dump_array pointer to hold representing dump of each model
+ * \param out_models pointer to hold representing dump of each model
 * \return 0 when success, -1 when failure happens
 */
 XGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle,
@@ -348,7 +350,7 @@ XGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle,
                                           const char **fname,
                                           const char **ftype,
                                           int with_stats,
-                                           bst_ulong *len,
+                                           bst_ulong *out_len,
                                           const char ***out_models);

-#endif  // XGBOOST_WRAPPER_H_
+#endif  // XGBOOST_C_API_H_
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -0,0 +1,298 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file data.h
+ * \brief The input data structure of xgboost.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_DATA_H_
+#define XGBOOST_DATA_H_
+
+#include <dmlc/base.h>
+#include <dmlc/data.h>
+#include <string>
+#include <memory>
+#include <vector>
+#include "./base.h"
+
+namespace xgboost {
+// forward declare learner.
+class LearnerImpl;
+
+/*! \brief data type accepted by xgboost interface */
+enum DataType {
+  kFloat32 = 1,
+  kDouble = 2,
+  kUInt32 = 3,
+  kUInt64 = 4
+};
+
+/*!
+ * \brief Meta information about dataset, always sit in memory.
+ */
+struct MetaInfo {
+  /*! \brief number of rows in the data */
+  uint64_t num_row;
+  /*! \brief number of columns in the data */
+  uint64_t num_col;
+  /*! \brief number of nonzero entries in the data */
+  uint64_t num_nonzero;
+  /*! \brief label of each instance */
+  std::vector<bst_float> labels;
+  /*!
+   * \brief specified root index of each instance,
+   *  can be used for multi task setting
+   */
+  std::vector<bst_uint> root_index;
+  /*!
+   * \brief the index of begin and end of a group
+   *  needed when the learning task is ranking.
+   */
+  std::vector<bst_uint> group_ptr;
+  /*! \brief weights of each instance, optional */
+  std::vector<bst_float> weights;
+  /*!
+   * \brief initialized margins,
+   * if specified, xgboost will start from this init margin
+   * can be used to specify initial prediction to boost from.
+   */
+  std::vector<bst_float> base_margin;
+  /*! \brief version flag, used to check version of this info */
+  static const int kVersion = 1;
+  /*! \brief default constructor */
+  MetaInfo() : num_row(0), num_col(0), num_nonzero(0) {}
+  /*!
+   * \brief Get weight of each instances.
+   * \param i Instance index.
+   * \return The weight.
+   */
+  inline float GetWeight(size_t i) const {
+    return weights.size() != 0 ?  weights[i] : 1.0f;
+  }
+  /*!
+   * \brief Get the root index of i-th instance.
+   * \param i Instance index.
+   * \return The pre-defined root index of i-th instance.
+   */
+  inline unsigned GetRoot(size_t i) const {
+    return root_index.size() != 0 ? root_index[i] : 0U;
+  }
+  /*! \brief clear all the information */
+  void Clear();
+  /*!
+   * \brief Load the Meta info from binary stream.
+   * \param fi The input stream
+   */
+  void LoadBinary(dmlc::Stream* fi);
+  /*!
+   * \brief Save the Meta info to binary stream
+   * \param fo The output stream.
+   */
+  void SaveBinary(dmlc::Stream* fo) const;
+  /*!
+   * \brief Set information in the meta info.
+   * \param key The key of the information.
+   * \param dptr The data pointer of the source array.
+   * \param dtype The type of the source data.
+   * \param num Number of elements in the source array.
+   */
+  void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num);
+};
+
+/*! \brief read-only sparse instance batch in CSR format */
+struct SparseBatch {
+  /*! \brief an entry of sparse vector */
+  struct Entry {
+    /*! \brief feature index */
+    bst_uint index;
+    /*! \brief feature value */
+    bst_float fvalue;
+    /*! \brief default constructor */
+    Entry() {}
+    /*!
+     * \brief constructor with index and value
+     * \param index The feature or row index.
+     * \param fvalue THe feature value.
+     */
+    Entry(bst_uint index, bst_float fvalue) : index(index), fvalue(fvalue) {}
+    /*! \brief reversely compare feature values */
+    inline static bool CmpValue(const Entry& a, const Entry& b) {
+      return a.fvalue < b.fvalue;
+    }
+  };
+
+  /*! \brief an instance of sparse vector in the batch */
+  struct Inst {
+    /*! \brief pointer to the elements*/
+    const Entry *data;
+    /*! \brief length of the instance */
+    bst_uint length;
+    /*! \brief constructor */
+    Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
+    /*! \brief get i-th pair in the sparse vector*/
+    inline const Entry& operator[](size_t i) const {
+      return data[i];
+    }
+  };
+
+  /*! \brief batch size */
+  size_t size;
+};
+
+/*! \brief read-only row batch, used to access row continuously */
+struct RowBatch : public SparseBatch {
+  /*! \brief the offset of rowid of this batch */
+  size_t base_rowid;
+  /*! \brief array[size+1], row pointer of each of the elements */
+  const size_t *ind_ptr;
+  /*! \brief array[ind_ptr.back()], content of the sparse element */
+  const Entry *data_ptr;
+  /*! \brief get i-th row from the batch */
+  inline Inst operator[](size_t i) const {
+    return Inst(data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i + 1] - ind_ptr[i]));
+  }
+};
+
+/*!
+ * \brief read-only column batch, used to access columns,
+ * the columns are not required to be continuous
+ */
+struct ColBatch : public SparseBatch {
+  /*! \brief column index of each columns in the data */
+  const bst_uint *col_index;
+  /*! \brief pointer to the column data */
+  const Inst *col_data;
+  /*! \brief get i-th column from the batch */
+  inline Inst operator[](size_t i) const {
+    return col_data[i];
+  }
+};
+
+/*!
+ * \brief This is data structure that user can pass to DMatrix::Create
+ *  to create a DMatrix for training, user can create this data structure
+ *  for customized Data Loading on single machine.
+ *
+ *  On distributed setting, usually an customized dmlc::Parser is needed instead.
+ */
+class DataSource : public dmlc::DataIter<RowBatch> {
+ public:
+  /*!
+   * \brief Meta information about the dataset
+   * The subclass need to be able to load this correctly from data.
+   */
+  MetaInfo info;
+};
+
+/*!
+ * \brief Internal data structured used by XGBoost during training.
+ *  There are two ways to create a customized DMatrix that reads in user defined-format.
+ *
+ *  - Provide a dmlc::Parser and pass into the DMatrix::Create
+ *  - Alternatively, if data can be represented by an URL, define a new dmlc::Parser and register by DMLC_REGISTER_DATA_PARSER;
+ *      - This works best for user defined data input source, such as data-base, filesystem.
+ *  - Provdie a DataSource, that can be passed to DMatrix::Create
+ *      This can be used to re-use inmemory data structure into DMatrix.
+ */
+class DMatrix {
+ public:
+  /*! \brief default constructor */
+  DMatrix() : cache_learner_ptr_(nullptr) {}
+  /*! \brief meta information of the dataset */
+  virtual MetaInfo& info() = 0;
+  /*! \brief meta information of the dataset */
+  virtual const MetaInfo& info() const = 0;
+  /*!
+   * \brief get the row iterator, reset to beginning position
+   * \note Only either RowIterator or  column Iterator can be active.
+   */
+  virtual dmlc::DataIter<RowBatch>* RowIterator() = 0;
+  /*!\brief get column iterator, reset to the beginning position */
+  virtual dmlc::DataIter<ColBatch>* ColIterator() = 0;
+  /*!
+   * \brief get the column iterator associated with subset of column features.
+   * \param fset is the list of column index set that must be contained in the returning Column iterator
+   * \return the column iterator, initialized so that it reads the elements in fset
+   */
+  virtual dmlc::DataIter<ColBatch>* ColIterator(const std::vector<bst_uint>& fset) = 0;
+  /*!
+   * \brief check if column access is supported, if not, initialize column access.
+   * \param enabled whether certain feature should be included in column access.
+   * \param subsample subsample ratio when generating column access.
+   * \param max_row_perbatch auxilary information, maximum row used in each column batch.
+   *         this is a hint information that can be ignored by the implementation.
+   * \return Number of column blocks in the column access.
+   */
+  virtual void InitColAccess(const std::vector<bool>& enabled,
+                             float subsample,
+                             size_t max_row_perbatch) = 0;
+  // the following are column meta data, should be able to answer them fast.
+  /*! \return whether column access is enabled */
+  virtual bool HaveColAccess() const = 0;
+  /*! \return Whether the data columns single column block. */
+  virtual bool SingleColBlock() const = 0;
+  /*! \brief get number of non-missing entries in column */
+  virtual size_t GetColSize(size_t cidx) const = 0;
+  /*! \brief get column density */
+  virtual float GetColDensity(size_t cidx) const = 0;
+  /*! \return reference of buffered rowset, in column access */
+  virtual const std::vector<bst_uint>& buffered_rowset() const = 0;
+  /*! \brief virtual destructor */
+  virtual ~DMatrix() {}
+  /*!
+   * \brief Save DMatrix to local file.
+   *  The saved file only works for non-sharded dataset(single machine training).
+   *  This API is deprecated and dis-encouraged to use.
+   * \param fname The file name to be saved.
+   * \return The created DMatrix.
+   */
+  virtual void SaveToLocalFile(const std::string& fname);
+  /*!
+   * \brief Load DMatrix from URI.
+   * \param uri The URI of input.
+   * \param silent Whether print information during loading.
+   * \param load_row_split Flag to read in part of rows, divided among the workers in distributed mode.
+   * \param file_format The format type of the file, used for dmlc::Parser::Create.
+   *   By default "auto" will be able to load in both local binary file.
+   * \return The created DMatrix.
+   */
+  static DMatrix* Load(const std::string& uri,
+                       bool silent,
+                       bool load_row_split,
+                       const std::string& file_format = "auto");
+  /*!
+   * \brief create a new DMatrix, by wrapping a row_iterator, and meta info.
+   * \param source The source iterator of the data, the create function takes ownership of the source.
+   * \param cache_prefix The path to prefix of temporary cache file of the DMatrix when used in external memory mode.
+   *     This can be nullptr for common cases, and in-memory mode will be used.
+   * \return a Created DMatrix.
+   */
+  static DMatrix* Create(std::unique_ptr<DataSource>&& source,
+                         const std::string& cache_prefix = "");
+  /*!
+   * \brief Create a DMatrix by loaidng data from parser.
+   *  Parser can later be deleted after the DMatrix i created.
+   * \param parser The input data parser
+   * \param cache_prefix The path to prefix of temporary cache file of the DMatrix when used in external memory mode.
+   *     This can be nullptr for common cases, and in-memory mode will be used.
+   * \sa dmlc::Parser
+   * \note dmlc-core provides efficient distributed data parser for libsvm format.
+   *  User can create and register customized parser to load their own format using DMLC_REGISTER_DATA_PARSER.
+   *  See "dmlc-core/include/dmlc/data.h" for detail.
+   * \return A created DMatrix.
+   */
+  static DMatrix* Create(dmlc::Parser<uint32_t>* parser,
+                         const std::string& cache_prefix = "");
+
+ private:
+  // allow learner class to access this field.
+  friend class LearnerImpl;
+  /*! \brief public field to back ref cached matrix. */
+  LearnerImpl* cache_learner_ptr_;
+};
+
+}  // namespace xgboost
+
+namespace dmlc {
+DMLC_DECLARE_TRAITS(is_pod, xgboost::SparseBatch::Entry, true);
+}
+#endif  // XGBOOST_DATA_H_
--- a/include/xgboost/feature_map.h
+++ b/include/xgboost/feature_map.h
@@ -0,0 +1,92 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file feature_map.h
+ * \brief Feature map data structure to help visualization and model dump.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_FEATURE_MAP_H_
+#define XGBOOST_FEATURE_MAP_H_
+
+#include <vector>
+#include <string>
+#include <cstring>
+#include <iostream>
+
+namespace xgboost {
+/*!
+ * \brief Feature map data structure to help text model dump.
+ * TODO(tqchen) consider make it even more lightweight.
+ */
+class FeatureMap {
+ public:
+  /*! \brief type of feature maps */
+  enum Type {
+    kIndicator = 0,
+    kQuantitive = 1,
+    kInteger = 2,
+    kFloat = 3
+  };
+  /*!
+   * \brief load feature map from input stream
+   * \param is Input text stream
+   */
+  inline void LoadText(std::istream& is) { // NOLINT(*)
+    int fid;
+    std::string fname, ftype;
+    while (is >> fid >> fname >> ftype) {
+      this->PushBack(fid, fname.c_str(), ftype.c_str());
+    }
+  }
+  /*!
+   * \brief push back feature map.
+   * \param fid The feature index.
+   * \param fname The feature name.
+   * \param ftype The feature type.
+   */
+  inline void PushBack(int fid, const char *fname, const char *ftype) {
+    CHECK_EQ(fid, static_cast<int>(names_.size()));
+    names_.push_back(std::string(fname));
+    types_.push_back(GetType(ftype));
+  }
+  /*! \brief clear the feature map */
+  inline void Clear() {
+    names_.clear();
+    types_.clear();
+  }
+  /*! \return number of known features */
+  inline size_t size() const {
+    return names_.size();
+  }
+  /*! \return name of specific feature */
+  inline const char* name(size_t idx) const {
+    CHECK_LT(idx,  names_.size()) << "FeatureMap feature index exceed bound";
+    return names_[idx].c_str();
+  }
+  /*! \return type of specific feature */
+  const Type type(size_t idx) const {
+    CHECK_LT(idx, names_.size()) << "FeatureMap feature index exceed bound";
+    return types_[idx];
+  }
+
+ private:
+  /*!
+   * \return feature type enum given name.
+   * \param tname The type name.
+   * \return The translated type.
+   */
+  inline static Type GetType(const char* tname) {
+    using namespace std;
+    if (!strcmp("i", tname)) return kIndicator;
+    if (!strcmp("q", tname)) return kQuantitive;
+    if (!strcmp("int", tname)) return kInteger;
+    if (!strcmp("float", tname)) return kFloat;
+    LOG(FATAL) << "unknown feature type, use i for indicator and q for quantity";
+    return kIndicator;
+  }
+  /*! \brief name of the feature */
+  std::vector<std::string> names_;
+  /*! \brief type of the feature */
+  std::vector<Type> types_;
+};
+}  // namespace xgboost
+#endif  // XGBOOST_FEATURE_MAP_H_
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -0,0 +1,168 @@
+/*!
+ * Copyright by Contributors
+ * \file gbm.h
+ * \brief Interface of gradient booster,
+ *  that learns through gradient statistics.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_GBM_H_
+#define XGBOOST_GBM_H_
+
+#include <dmlc/registry.h>
+#include <vector>
+#include <utility>
+#include <string>
+#include <functional>
+#include "./base.h"
+#include "./data.h"
+#include "./feature_map.h"
+
+namespace xgboost {
+/*!
+ * \brief interface of gradient boosting model.
+ */
+class GradientBooster {
+ public:
+  /*! \brief virtual destructor */
+  virtual ~GradientBooster() {}
+  /*!
+   * \brief set configuration from pair iterators.
+   * \param begin The beginning iterator.
+   * \param end The end iterator.
+   * \tparam PairIter iterator<std::pair<std::string, std::string> >
+   */
+  template<typename PairIter>
+  inline void Configure(PairIter begin, PairIter end);
+  /*!
+   * \brief Set the configuration of gradient boosting.
+   *  User must call configure once before InitModel and Training.
+   *
+   * \param cfg configurations on both training and model parameters.
+   */
+  virtual void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) = 0;
+  /*!
+   * \brief load model from stream
+   * \param fi input stream.
+   */
+  virtual void Load(dmlc::Stream* fi) = 0;
+  /*!
+   * \brief save model to stream.
+   * \param fo output stream
+   */
+  virtual void Save(dmlc::Stream* fo) const = 0;
+  /*!
+   * \brief reset the predict buffer size.
+   *  This will invalidate all the previous cached results
+   *  and recalculate from scratch
+   * \param num_pbuffer The size of predict buffer.
+   */
+  virtual void ResetPredBuffer(size_t num_pbuffer) {}
+  /*!
+   * \brief whether the model allow lazy checkpoint
+   * return true if model is only updated in DoBoost
+   * after all Allreduce calls
+   */
+  virtual bool AllowLazyCheckPoint() const {
+    return false;
+  }
+  /*!
+   * \brief perform update to the model(boosting)
+   * \param p_fmat feature matrix that provide access to features
+   * \param buffer_offset buffer index offset of these instances, if equals -1
+   *        this means we do not have buffer index allocated to the gbm
+   * \param in_gpair address of the gradient pair statistics of the data
+   * the booster may change content of gpair
+   */
+  virtual void DoBoost(DMatrix* p_fmat,
+                       int64_t buffer_offset,
+                       std::vector<bst_gpair>* in_gpair) = 0;
+  /*!
+   * \brief generate predictions for given feature matrix
+   * \param dmat feature matrix
+   * \param buffer_offset buffer index offset of these instances, if equals -1
+   *        this means we do not have buffer index allocated to the gbm
+   *  a buffer index is assigned to each instance that requires repeative prediction
+   *  the size of buffer is set by convention using GradientBooster.ResetPredBuffer(size);
+   * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
+   *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
+   */
+  virtual void Predict(DMatrix* dmat,
+                       int64_t buffer_offset,
+                       std::vector<float>* out_preds,
+                       unsigned ntree_limit = 0) = 0;
+  /*!
+   * \brief online prediction function, predict score for one instance at a time
+   *  NOTE: use the batch prediction interface if possible, batch prediction is usually
+   *        more efficient than online prediction
+   *        This function is NOT threadsafe, make sure you only call from one thread
+   *
+   * \param inst the instance you want to predict
+   * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction
+   * \param root_index the root index
+   * \sa Predict
+   */
+  virtual void Predict(const SparseBatch::Inst& inst,
+                       std::vector<float>* out_preds,
+                       unsigned ntree_limit = 0,
+                       unsigned root_index = 0) = 0;
+  /*!
+   * \brief predict the leaf index of each tree, the output will be nsample * ntree vector
+   *        this is only valid in gbtree predictor
+   * \param dmat feature matrix
+   * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
+   *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
+   */
+  virtual void PredictLeaf(DMatrix* dmat,
+                           std::vector<float>* out_preds,
+                           unsigned ntree_limit = 0) = 0;
+  /*!
+   * \brief dump the model to text format
+   * \param fmap feature map that may help give interpretations of feature
+   * \param option extra option of the dump model
+   * \return a vector of dump for boosters.
+   */
+  virtual std::vector<std::string> Dump2Text(const FeatureMap& fmap, int option) const = 0;
+  /*!
+   * \brief create a gradient booster from given name
+   * \param name name of gradient booster
+   * \return The created booster.
+   */
+  static GradientBooster* Create(const std::string& name);
+};
+
+// implementing configure.
+template<typename PairIter>
+inline void GradientBooster::Configure(PairIter begin, PairIter end) {
+  std::vector<std::pair<std::string, std::string> > vec(begin, end);
+  this->Configure(vec);
+}
+
+/*!
+ * \brief Registry entry for tree updater.
+ */
+struct GradientBoosterReg
+    : public dmlc::FunctionRegEntryBase<GradientBoosterReg,
+                                        std::function<GradientBooster* ()> > {
+};
+
+/*!
+ * \brief Macro to register gradient booster.
+ *
+ * \code
+ * // example of registering a objective ndcg@k
+ * XGBOOST_REGISTER_GBM(GBTree, "gbtree")
+ * .describe("Boosting tree ensembles.")
+ * .set_body([]() {
+ *     return new GradientBooster<TStats>();
+ *   });
+ * \endcode
+ */
+#define XGBOOST_REGISTER_GBM(UniqueId, Name)                            \
+  static ::xgboost::GradientBoosterReg & __make_ ## GradientBoosterReg ## _ ## UniqueId ## __ = \
+      ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->__REGISTER__(Name)
+
+}  // namespace xgboost
+#endif  // XGBOOST_GBM_H_
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -0,0 +1,178 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file learner.h
+ * \brief Learner interface that integrates objective, gbm and evaluation together.
+ *  This is the user facing XGBoost training module.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_LEARNER_H_
+#define XGBOOST_LEARNER_H_
+
+#include <rabit.h>
+#include <utility>
+#include <string>
+#include <vector>
+#include "./base.h"
+#include "./gbm.h"
+#include "./metric.h"
+#include "./objective.h"
+
+namespace xgboost {
+/*!
+ * \brief Learner class that do trainig and prediction.
+ *  This is the user facing module of xgboost training.
+ *  The Load/Save function corresponds to the model used in python/R.
+ *  \code
+ *
+ *  std::unique_ptr<Learner> learner(new Learner::Create(cache_mats));
+ *  learner.Configure(configs);
+ *
+ *  for (int iter = 0; iter < max_iter; ++i) {
+ *    learner->UpdateOneIter(iter, train_mat);
+ *    LOG(INFO) << learner->EvalOneIter(iter, data_sets, data_names);
+ *  }
+ *
+ *  \endcode
+ */
+class Learner : public rabit::Serializable {
+ public:
+  /*! \brief virtual destructor */
+  virtual ~Learner() {}
+  /*!
+   * \brief set configuration from pair iterators.
+   * \param begin The beginning iterator.
+   * \param end The end iterator.
+   * \tparam PairIter iterator<std::pair<std::string, std::string> >
+   */
+  template<typename PairIter>
+  inline void Configure(PairIter begin, PairIter end);
+  /*!
+   * \brief Set the configuration of gradient boosting.
+   *  User must call configure once before InitModel and Training.
+   *
+   * \param cfg configurations on both training and model parameters.
+   */
+  virtual void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) = 0;
+  /*!
+   * \brief Initialize the model using the specified configurations via Configure.
+   *  An model have to be either Loaded or initialized before Update/Predict/Save can be called.
+   */
+  virtual void InitModel() = 0;
+  /*!
+   * \brief load model from stream
+   * \param fi input stream.
+   */
+  virtual void Load(dmlc::Stream* fi) = 0;
+  /*!
+   * \brief save model to stream.
+   * \param fo output stream
+   */
+  virtual void Save(dmlc::Stream* fo) const = 0;
+  /*!
+   * \brief update the model for one iteration
+   *  With the specified objective function.
+   * \param iter current iteration number
+   * \param train reference to the data matrix.
+   */
+  virtual void UpdateOneIter(int iter, DMatrix* train) = 0;
+  /*!
+   * \brief Do customized gradient boosting with in_gpair.
+   *  in_gair can be mutated after this call.
+   * \param iter current iteration number
+   * \param train reference to the data matrix.
+   * \param in_gpair The input gradient statistics.
+   */
+  virtual void BoostOneIter(int iter,
+                            DMatrix* train,
+                            std::vector<bst_gpair>* in_gpair) = 0;
+  /*!
+   * \brief evaluate the model for specific iteration using the configured metrics.
+   * \param iter iteration number
+   * \param data_sets datasets to be evaluated.
+   * \param data_names name of each dataset
+   * \return a string corresponding to the evaluation result
+   */
+  virtual std::string EvalOneIter(int iter,
+                                  const std::vector<DMatrix*>& data_sets,
+                                  const std::vector<std::string>& data_names) = 0;
+  /*!
+   * \brief get prediction given the model.
+   * \param data input data
+   * \param output_margin whether to only predict margin value instead of transformed prediction
+   * \param out_preds output vector that stores the prediction
+   * \param ntree_limit limit number of trees used for boosted tree
+   *   predictor, when it equals 0, this means we are using all the trees
+   * \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor
+   */
+  virtual void Predict(DMatrix* data,
+                       bool output_margin,
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0,
+                       bool pred_leaf = false) const = 0;
+  /*!
+   * \return whether the model allow lazy checkpoint in rabit.
+   */
+  bool AllowLazyCheckPoint() const;
+  /*!
+   * \brief dump the model in text format
+   * \param fmap feature map that may help give interpretations of feature
+   * \param option extra option of the dump model
+   * \return a vector of dump for boosters.
+   */
+  std::vector<std::string> Dump2Text(const FeatureMap& fmap, int option) const;
+  /*!
+   * \brief online prediction function, predict score for one instance at a time
+   *  NOTE: use the batch prediction interface if possible, batch prediction is usually
+   *        more efficient than online prediction
+   *        This function is NOT threadsafe, make sure you only call from one thread.
+   *
+   * \param inst the instance you want to predict
+   * \param output_margin whether to only predict margin value instead of transformed prediction
+   * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction
+   */
+  inline void Predict(const SparseBatch::Inst &inst,
+                      bool output_margin,
+                      std::vector<float> *out_preds,
+                      unsigned ntree_limit = 0) const;
+  /*!
+   * \brief Create a new instance of learner.
+   * \param cache_data The matrix to cache the prediction.
+   * \return Created learner.
+   */
+  static Learner* Create(const std::vector<DMatrix*>& cache_data);
+
+ protected:
+  /*! \brief internal base score of the model */
+  bst_float base_score_;
+  /*! \brief objective function */
+  std::unique_ptr<ObjFunction> obj_;
+  /*! \brief The gradient boosted used by the model*/
+  std::unique_ptr<GradientBooster> gbm_;
+  /*! \brief The evaluation metrics used to evaluate the model. */
+  std::vector<std::unique_ptr<Metric> > metrics_;
+};
+
+// implementation of inline functions.
+inline void Learner::Predict(const SparseBatch::Inst& inst,
+                             bool output_margin,
+                             std::vector<float>* out_preds,
+                             unsigned ntree_limit) const {
+  gbm_->Predict(inst, out_preds, ntree_limit);
+  if (out_preds->size() == 1) {
+    (*out_preds)[0] += base_score_;
+  }
+  if (!output_margin) {
+    obj_->PredTransform(out_preds);
+  }
+}
+
+// implementing configure.
+template<typename PairIter>
+inline void Learner::Configure(PairIter begin, PairIter end) {
+  std::vector<std::pair<std::string, std::string> > vec(begin, end);
+  this->Configure(vec);
+}
+
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_H_
--- a/include/xgboost/logging.h
+++ b/include/xgboost/logging.h
@@ -0,0 +1,50 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file logging.h
+ * \brief defines console logging options for xgboost.
+ *  Use to enforce unified print behavior.
+ *  For debug loggers, use LOG(INFO) and LOG(ERROR).
+ */
+#ifndef XGBOOST_LOGGING_H_
+#define XGBOOST_LOGGING_H_
+
+#include <dmlc/logging.h>
+#include <sstream>
+#include "./base.h"
+
+namespace xgboost {
+
+class BaseLogger {
+ public:
+  BaseLogger() {
+#if XGBOOST_LOG_WITH_TIME
+    log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] ";
+#endif
+  }
+  std::ostream& stream() { return log_stream_; }
+
+ protected:
+  std::ostringstream log_stream_;
+};
+
+class ConsoleLogger : public BaseLogger {
+ public:
+  ~ConsoleLogger();
+};
+
+class TrackerLogger : public BaseLogger {
+ public:
+  ~TrackerLogger();
+};
+
+// redefines the logging macro if not existed
+#ifndef LOG
+#define LOG(severity) LOG_##severity.stream()
+#endif
+
+// Enable LOG(CONSOLE) for print messages to console.
+#define LOG_CONSOLE ::xgboost::ConsoleLogger()
+// Enable LOG(TRACKER) for print messages to tracker
+#define LOG_TRACKER ::xgboost::TrackerLogger()
+}  // namespace xgboost.
+#endif  // XGBOOST_LOGGING_H_
--- a/include/xgboost/metric.h
+++ b/include/xgboost/metric.h
@@ -0,0 +1,76 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file metric.h
+ * \brief interface of evaluation metric function supported in xgboost.
+ * \author Tianqi Chen, Kailong Chen
+ */
+#ifndef XGBOOST_METRIC_H_
+#define XGBOOST_METRIC_H_
+
+#include <dmlc/registry.h>
+#include <vector>
+#include <string>
+#include <functional>
+#include "./data.h"
+#include "./base.h"
+
+namespace xgboost {
+/*!
+ * \brief interface of evaluation metric used to evaluate model performance.
+ *  This has nothing to do with training, but merely act as evaluation purpose.
+ */
+class Metric {
+ public:
+  /*!
+   * \brief evaluate a specific metric
+   * \param preds prediction
+   * \param info information, including label etc.
+   * \param distributed whether a call to Allreduce is needed to gather
+   *        the average statistics across all the node,
+   *        this is only supported by some metrics
+   */
+  virtual float Eval(const std::vector<float>& preds,
+                     const MetaInfo& info,
+                     bool distributed) const = 0;
+  /*! \return name of metric */
+  virtual const char* Name() const = 0;
+  /*! \brief virtual destructor */
+  virtual ~Metric() {}
+  /*!
+   * \brief create a metric according to name.
+   * \param name name of the metric.
+   *  name can be in form metric[@]param
+   *  and the name will be matched in the registry.
+   * \return the created metric.
+   */
+  static Metric* Create(const std::string& name);
+};
+
+/*!
+ * \brief Registry entry for Metric factory functions.
+ *  The additional parameter const char* param gives the value after @, can be null.
+ *  For example, metric map@3, then: param == "3".
+ */
+struct MetricReg
+    : public dmlc::FunctionRegEntryBase<MetricReg,
+                                        std::function<Metric* (const char*)> > {
+};
+
+/*!
+ * \brief Macro to register metric.
+ *
+ * \code
+ * // example of registering a objective ndcg@k
+ * XGBOOST_REGISTER_METRIC(RMSE, "ndcg")
+ * .describe("Rooted mean square error.")
+ * .set_body([](const char* param) {
+ *     int at_k = atoi(param);
+ *     return new NDCG(at_k);
+ *   });
+ * \endcode
+ */
+#define XGBOOST_REGISTER_METRIC(UniqueId, Name)                         \
+  ::xgboost::MetricReg&  __make_ ## MetricReg ## _ ## UniqueId ## __ =  \
+      ::dmlc::Registry< ::xgboost::MetricReg>::Get()->__REGISTER__(Name)
+}  // namespace xgboost
+#endif  // XGBOOST_METRIC_H_
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -0,0 +1,111 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file objective.h
+ * \brief interface of objective function used by xgboost.
+ * \author Tianqi Chen, Kailong Chen
+ */
+#ifndef XGBOOST_OBJECTIVE_H_
+#define XGBOOST_OBJECTIVE_H_
+
+#include <dmlc/registry.h>
+#include <vector>
+#include <utility>
+#include <string>
+#include <functional>
+#include "./data.h"
+#include "./base.h"
+
+namespace xgboost {
+/*! \brief interface of objective function */
+class ObjFunction {
+ public:
+  /*! \brief virtual destructor */
+  virtual ~ObjFunction() {}
+  /*!
+   * \brief set configuration from pair iterators.
+   * \param begin The beginning iterator.
+   * \param end The end iterator.
+   * \tparam PairIter iterator<std::pair<std::string, std::string> >
+   */
+  template<typename PairIter>
+  inline void Configure(PairIter begin, PairIter end);
+  /*!
+   * \brief Configure the objective with the specified parameters.
+   * \param args arguments to the objective function.
+   */
+  virtual void Configure(const std::vector<std::pair<std::string, std::string> >& args) = 0;
+  /*!
+   * \brief Get gradient over each of predictions, given existing information.
+   * \param preds prediction of current round
+   * \param info information about labels, weights, groups in rank
+   * \param iteration current iteration number.
+   * \param out_gpair output of get gradient, saves gradient and second order gradient in
+   */
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo& info,
+                           int iteration,
+                           std::vector<bst_gpair>* out_gpair) = 0;
+  /*! \return the default evaluation metric for the objective */
+  virtual const char* DefaultEvalMetric() const = 0;
+  // the following functions are optional, most of time default implementation is good enough
+  /*!
+   * \brief transform prediction values, this is only called when Prediction is called
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void PredTransform(std::vector<float> *io_preds) {}
+  /*!
+   * \brief transform prediction values, this is only called when Eval is called,
+   *  usually it redirect to PredTransform
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void EvalTransform(std::vector<float> *io_preds) {
+    this->PredTransform(io_preds);
+  }
+  /*!
+   * \brief transform probability value back to margin
+   * this is used to transform user-set base_score back to margin
+   * used by gradient boosting
+   * \return transformed value
+   */
+  virtual float ProbToMargin(float base_score) const {
+    return base_score;
+  }
+  /*!
+   * \brief Create an objective function according to name.
+   * \param name Name of the objective.
+   */
+  static ObjFunction* Create(const std::string& name);
+};
+
+// implementing configure.
+template<typename PairIter>
+inline void ObjFunction::Configure(PairIter begin, PairIter end) {
+  std::vector<std::pair<std::string, std::string> > vec(begin, end);
+  this->Configure(vec);
+}
+
+/*!
+ * \brief Registry entry for objective factory functions.
+ */
+struct ObjFunctionReg
+    : public dmlc::FunctionRegEntryBase<ObjFunctionReg,
+                                        std::function<ObjFunction* ()> > {
+};
+
+/*!
+ * \brief Macro to register objective function.
+ *
+ * \code
+ * // example of registering a objective
+ * XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
+ * .describe("Linear regression objective")
+ * .set_body([]() {
+ *     return new RegLossObj(LossType::kLinearSquare);
+ *   });
+ * \endcode
+ */
+#define XGBOOST_REGISTER_OBJECTIVE(UniqueId, Name)                      \
+  static ::xgboost::ObjFunctionReg & __make_ ## ObjFunctionReg ## _ ## UniqueId ## __ = \
+      ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->__REGISTER__(Name)
+}  // namespace xgboost
+#endif  // XGBOOST_OBJECTIVE_H_
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -1,25 +1,66 @@
 /*!
 * Copyright 2014 by Contributors
- * \file model.h
+ * \file tree_model.h
 * \brief model structure for tree
 * \author Tianqi Chen
 */
 #ifndef XGBOOST_TREE_MODEL_H_
 #define XGBOOST_TREE_MODEL_H_

+#include <dmlc/io.h>
+#include <dmlc/parameter.h>
+#include <limits>
+#include <vector>
 #include <string>
 #include <cstring>
-#include <sstream>
-#include <limits>
 #include <algorithm>
-#include <vector>
-#include <cmath>
-#include "../utils/io.h"
-#include "../utils/fmap.h"
-#include "../utils/utils.h"
+#include "./base.h"
+#include "./data.h"
+#include "./logging.h"
+#include "./feature_map.h"

 namespace xgboost {
-namespace tree {
+
+/*! \brief meta parameters of the tree */
+struct TreeParam : public dmlc::Parameter<TreeParam> {
+  /*! \brief number of start root */
+  int num_roots;
+  /*! \brief total number of nodes */
+  int num_nodes;
+  /*!\brief number of deleted nodes */
+  int num_deleted;
+  /*! \brief maximum depth, this is a statistics of the tree */
+  int max_depth;
+  /*! \brief number of features used for tree construction */
+  int num_feature;
+  /*!
+   * \brief leaf vector size, used for vector tree
+   * used to store more than one dimensional information in tree
+   */
+  int size_leaf_vector;
+  /*! \brief reserved part, make sure alignment works for 64bit */
+  int reserved[31];
+  /*! \brief constructor */
+  TreeParam() {
+    // assert compact alignment
+    static_assert(sizeof(TreeParam) == (31 + 6) * sizeof(int),
+                  "TreeParam: 64 bit align");
+    std::memset(this, 0, sizeof(TreeParam));
+    num_nodes = num_roots = 1;
+  }
+  // declare the parameters
+  DMLC_DECLARE_PARAMETER(TreeParam) {
+    // only declare the parameters that can be set by the user.
+    // other arguments are set by the algorithm.
+    DMLC_DECLARE_FIELD(num_roots).set_lower_bound(1).set_default(1)
+        .describe("Number of start root of trees.");
+    DMLC_DECLARE_FIELD(num_feature)
+        .describe("Number of features used in tree construction.");
+    DMLC_DECLARE_FIELD(size_leaf_vector).set_lower_bound(0).set_default(0)
+        .describe("Size of leaf vector, reserved for vector tree");
+  }
+};
+
 /*!
 * \brief template class of TreeModel
 * \tparam TSplitCond data type to indicate split condition
@@ -32,98 +73,65 @@ class TreeModel {
  typedef TNodeStat  NodeStat;
  /*! \brief auxiliary statistics of node to help tree building */
  typedef TSplitCond SplitCond;
-  /*! \brief parameters of the tree */
-  struct Param{
-    /*! \brief number of start root */
-    int num_roots;
-    /*! \brief total number of nodes */
-    int num_nodes;
-    /*!\brief number of deleted nodes */
-    int num_deleted;
-    /*! \brief maximum depth, this is a statistics of the tree */
-    int max_depth;
-    /*! \brief  number of features used for tree construction */
-    int num_feature;
-    /*!
-     * \brief leaf vector size, used for vector tree
-     * used to store more than one dimensional information in tree
-     */
-    int size_leaf_vector;
-    /*! \brief reserved part */
-    int reserved[31];
-    /*! \brief constructor */
-    Param(void) {
-      max_depth = 0;
-      size_leaf_vector = 0;
-      std::memset(reserved, 0, sizeof(reserved));
-    }
-    /*!
-     * \brief set parameters from outside
-     * \param name name of the parameter
-     * \param val  value of the parameter
-     */
-    inline void SetParam(const char *name, const char *val) {
-      using namespace std;
-      if (!strcmp("num_roots", name)) num_roots = atoi(val);
-      if (!strcmp("num_feature", name)) num_feature = atoi(val);
-      if (!strcmp("size_leaf_vector", name)) size_leaf_vector = atoi(val);
-    }
-  };
  /*! \brief tree node */
  class Node {
   public:
-    Node(void) : sindex_(0) {}
+    Node() : sindex_(0) {
+      // assert compact alignment
+      static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info),
+                    "Node: 64 bit align");
+    }
    /*! \brief index of left child */
-    inline int cleft(void) const {
+    inline int cleft() const {
      return this->cleft_;
    }
    /*! \brief index of right child */
-    inline int cright(void) const {
+    inline int cright() const {
      return this->cright_;
    }
    /*! \brief index of default child when feature is missing */
-    inline int cdefault(void) const {
+    inline int cdefault() const {
      return this->default_left() ? this->cleft() : this->cright();
    }
    /*! \brief feature index of split condition */
-    inline unsigned split_index(void) const {
+    inline unsigned split_index() const {
      return sindex_ & ((1U << 31) - 1U);
    }
    /*! \brief when feature is unknown, whether goes to left child */
-    inline bool default_left(void) const {
+    inline bool default_left() const {
      return (sindex_ >> 31) != 0;
    }
    /*! \brief whether current node is leaf node */
-    inline bool is_leaf(void) const {
+    inline bool is_leaf() const {
      return cleft_ == -1;
    }
-    /*! \brief get leaf value of leaf node */
-    inline float leaf_value(void) const {
+    /*! \return get leaf value of leaf node */
+    inline float leaf_value() const {
      return (this->info_).leaf_value;
    }
-    /*! \brief get split condition of the node */
-    inline TSplitCond split_cond(void) const {
+    /*! \return get split condition of the node */
+    inline TSplitCond split_cond() const {
      return (this->info_).split_cond;
    }
    /*! \brief get parent of the node */
-    inline int parent(void) const {
+    inline int parent() const {
      return parent_ & ((1U << 31) - 1);
    }
    /*! \brief whether current node is left child */
-    inline bool is_left_child(void) const {
+    inline bool is_left_child() const {
      return (parent_ & (1U << 31)) != 0;
    }
    /*! \brief whether this node is deleted */
-    inline bool is_deleted(void) const {
+    inline bool is_deleted() const {
      return sindex_ == std::numeric_limits<unsigned>::max();
    }
    /*! \brief whether current node is root */
-    inline bool is_root(void) const {
+    inline bool is_root() const {
      return parent_ == -1;
    }
    /*!
     * \brief set the right child
-     * \param nide node id to right child
+     * \param nid node id to right child
     */
    inline void set_right_child(int nid) {
      this->cright_ = nid;
@@ -152,7 +160,7 @@ class TreeModel {
      this->cright_ = right;
    }
    /*! \brief mark that this node is deleted */
-    inline void mark_delete(void) {
+    inline void mark_delete() {
      this->sindex_ = std::numeric_limits<unsigned>::max();
    }

@@ -193,7 +201,7 @@ class TreeModel {
  std::vector<bst_float> leaf_vector;
  // allocate a new node,
  // !!!!!! NOTE: may cause BUG here, nodes.resize
-  inline int AllocNode(void) {
+  inline int AllocNode() {
    if (param.num_deleted != 0) {
      int nd = deleted_nodes.back();
      deleted_nodes.pop_back();
@@ -201,8 +209,8 @@ class TreeModel {
      return nd;
    }
    int nd = param.num_nodes++;
-    utils::Check(param.num_nodes < std::numeric_limits<int>::max(),
-                 "number of nodes in the tree exceed 2^31");
+    CHECK_LT(param.num_nodes, std::numeric_limits<int>::max())
+        << "number of nodes in the tree exceed 2^31";
    nodes.resize(param.num_nodes);
    stats.resize(param.num_nodes);
    leaf_vector.resize(param.num_nodes * param.size_leaf_vector);
@@ -210,7 +218,7 @@ class TreeModel {
  }
  // delete a tree node, keep the parent field to allow trace back
  inline void DeleteNode(int nid) {
-    utils::Assert(nid >= param.num_roots, "can not delete root");
+    CHECK_GE(nid, param.num_roots);
    deleted_nodes.push_back(nid);
    nodes[nid].mark_delete();
    ++param.num_deleted;
@@ -220,13 +228,11 @@ class TreeModel {
  /*!
   * \brief change a non leaf node to a leaf node, delete its children
   * \param rid node id of the node
-   * \param new leaf value
+   * \param value new leaf value
   */
  inline void ChangeToLeaf(int rid, float value) {
-    utils::Assert(nodes[nodes[rid].cleft() ].is_leaf(),
-                  "can not delete a non termial child");
-    utils::Assert(nodes[nodes[rid].cright()].is_leaf(),
-                  "can not delete a non termial child");
+    CHECK(nodes[nodes[rid].cleft() ].is_leaf());
+    CHECK(nodes[nodes[rid].cright()].is_leaf());
    this->DeleteNode(nodes[rid].cleft());
    this->DeleteNode(nodes[rid].cright());
    nodes[rid].set_leaf(value);
@@ -234,7 +240,7 @@ class TreeModel {
  /*!
   * \brief collapse a non leaf node to a leaf node, delete its children
   * \param rid node id of the node
-   * \param new leaf value
+   * \param value new leaf value
   */
  inline void CollapseToLeaf(int rid, float value) {
    if (nodes[rid].is_leaf()) return;
@@ -249,38 +255,42 @@ class TreeModel {

 public:
  /*! \brief model parameter */
-  Param param;
+  TreeParam param;
  /*! \brief constructor */
-  TreeModel(void) {
+  TreeModel() {
    param.num_nodes = 1;
    param.num_roots = 1;
    param.num_deleted = 0;
    nodes.resize(1);
  }
  /*! \brief get node given nid */
-  inline Node &operator[](int nid) {
+  inline Node& operator[](int nid) {
    return nodes[nid];
  }
  /*! \brief get node given nid */
-  inline const Node &operator[](int nid) const {
+  inline const Node& operator[](int nid) const {
    return nodes[nid];
  }
  /*! \brief get node statistics given nid */
-  inline NodeStat &stat(int nid) {
+  inline NodeStat& stat(int nid) {
+    return stats[nid];
+  }
+  /*! \brief get node statistics given nid */
+  inline const NodeStat& stat(int nid) const {
    return stats[nid];
  }
  /*! \brief get leaf vector given nid */
  inline bst_float* leafvec(int nid) {
-    if (leaf_vector.size() == 0) return NULL;
-    return &leaf_vector[nid * param.size_leaf_vector];
+    if (leaf_vector.size() == 0) return nullptr;
+    return& leaf_vector[nid * param.size_leaf_vector];
  }
  /*! \brief get leaf vector given nid */
  inline const bst_float* leafvec(int nid) const {
-    if (leaf_vector.size() == 0) return NULL;
-    return &leaf_vector[nid * param.size_leaf_vector];
+    if (leaf_vector.size() == 0) return nullptr;
+    return& leaf_vector[nid * param.size_leaf_vector];
  }
  /*! \brief initialize the model */
-  inline void InitModel(void) {
+  inline void InitModel() {
    param.num_nodes = param.num_roots;
    nodes.resize(param.num_nodes);
    stats.resize(param.num_nodes);
@@ -294,41 +304,37 @@ class TreeModel {
   * \brief load model from stream
   * \param fi input stream
   */
-  inline void LoadModel(utils::IStream &fi) { // NOLINT(*)
-    utils::Check(fi.Read(&param, sizeof(Param)) > 0,
-                 "TreeModel: wrong format");
-    nodes.resize(param.num_nodes); stats.resize(param.num_nodes);
-    utils::Assert(param.num_nodes != 0, "invalid model");
-    utils::Check(fi.Read(BeginPtr(nodes), sizeof(Node) * nodes.size()) > 0,
-                 "TreeModel: wrong format");
-    utils::Check(fi.Read(BeginPtr(stats), sizeof(NodeStat) * stats.size()) > 0,
-                 "TreeModel: wrong format");
+  inline void Load(dmlc::Stream* fi) {
+    CHECK_EQ(fi->Read(&param, sizeof(TreeParam)), sizeof(TreeParam));
+    nodes.resize(param.num_nodes);
+    stats.resize(param.num_nodes);
+    CHECK_NE(param.num_nodes, 0);
+    CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size()),
+             sizeof(Node) * nodes.size());
+    CHECK_EQ(fi->Read(dmlc::BeginPtr(stats), sizeof(NodeStat) * stats.size()),
+             sizeof(NodeStat) * stats.size());
    if (param.size_leaf_vector != 0) {
-      utils::Check(fi.Read(&leaf_vector), "TreeModel: wrong format");
+      CHECK(fi->Read(&leaf_vector));
    }
    // chg deleted nodes
    deleted_nodes.resize(0);
    for (int i = param.num_roots; i < param.num_nodes; ++i) {
      if (nodes[i].is_deleted()) deleted_nodes.push_back(i);
    }
-    utils::Assert(static_cast<int>(deleted_nodes.size()) == param.num_deleted,
-                  "number of deleted nodes do not match, num_deleted=%d, dnsize=%lu, num_nodes=%d",
-                  param.num_deleted, deleted_nodes.size(), param.num_nodes);
+    CHECK_EQ(static_cast<int>(deleted_nodes.size()), param.num_deleted);
  }
  /*!
   * \brief save model to stream
   * \param fo output stream
   */
-  inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
-    utils::Assert(param.num_nodes == static_cast<int>(nodes.size()),
-                  "TreeModel::SaveModel");
-    utils::Assert(param.num_nodes == static_cast<int>(stats.size()),
-                  "TreeModel::SaveModel");
-    fo.Write(&param, sizeof(Param));
-    utils::Assert(param.num_nodes != 0, "invalid model");
-    fo.Write(BeginPtr(nodes), sizeof(Node) * nodes.size());
-    fo.Write(BeginPtr(stats), sizeof(NodeStat) * nodes.size());
-    if (param.size_leaf_vector != 0) fo.Write(leaf_vector);
+  inline void Save(dmlc::Stream* fo) const {
+    CHECK_EQ(param.num_nodes, static_cast<int>(nodes.size()));
+    CHECK_EQ(param.num_nodes, static_cast<int>(stats.size()));
+    fo->Write(&param, sizeof(TreeParam));
+    CHECK_NE(param.num_nodes, 0);
+    fo->Write(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size());
+    fo->Write(dmlc::BeginPtr(stats), sizeof(NodeStat) * nodes.size());
+    if (param.size_leaf_vector != 0) fo->Write(leaf_vector);
  }
  /*!
   * \brief add child nodes to node
@@ -344,7 +350,7 @@ class TreeModel {
  }
  /*!
   * \brief only add a right child to a leaf node
-   * \param node id to add right child
+   * \param nid node id to add right child
   */
  inline void AddRightChild(int nid) {
    int pright = this->AllocNode();
@@ -376,7 +382,7 @@ class TreeModel {
  /*!
   * \brief get maximum depth
   */
-  inline int MaxDepth(void) {
+  inline int MaxDepth() {
    int maxd = 0;
    for (int i = 0; i < param.num_roots; ++i) {
      maxd = std::max(maxd, MaxDepth(i));
@@ -384,80 +390,9 @@ class TreeModel {
    return maxd;
  }
  /*! \brief number of extra nodes besides the root */
-  inline int num_extra_nodes(void) const {
+  inline int num_extra_nodes() const {
    return param.num_nodes - param.num_roots - param.num_deleted;
  }
-  /*!
-   * \brief dump model to text string
-   * \param fmap feature map of feature types
-   * \param with_stats whether dump out statistics as well
-   * \return the string of dumped model
-   */
-  inline std::string DumpModel(const utils::FeatMap& fmap, bool with_stats) {
-    std::stringstream fo("");
-    for (int i = 0; i < param.num_roots; ++i) {
-      this->Dump(i, fo, fmap, 0, with_stats);
-    }
-    return fo.str();
-  }
-
- private:
-  void Dump(int nid, std::stringstream &fo, // NOLINT(*)
-            const utils::FeatMap& fmap, int depth, bool with_stats) {
-    for (int i = 0;  i < depth; ++i) {
-      fo << '\t';
-    }
-    if (nodes[nid].is_leaf()) {
-      fo << nid << ":leaf=" << nodes[nid].leaf_value();
-      if (with_stats) {
-        stat(nid).Print(fo, true);
-      }
-      fo << '\n';
-    } else {
-      // right then left,
-      TSplitCond cond = nodes[nid].split_cond();
-      const unsigned split_index = nodes[nid].split_index();
-      if (split_index < fmap.size()) {
-        switch (fmap.type(split_index)) {
-          case utils::FeatMap::kIndicator: {
-            int nyes = nodes[nid].default_left() ?
-                nodes[nid].cright() : nodes[nid].cleft();
-            fo << nid << ":[" << fmap.name(split_index) << "] yes=" << nyes
-               << ",no=" << nodes[nid].cdefault();
-            break;
-          }
-          case utils::FeatMap::kInteger: {
-            fo << nid << ":[" << fmap.name(split_index) << "<"
-               << int(float(cond)+1.0f)
-               << "] yes=" << nodes[nid].cleft()
-               << ",no=" << nodes[nid].cright()
-               << ",missing=" << nodes[nid].cdefault();
-            break;
-          }
-          case utils::FeatMap::kFloat:
-          case utils::FeatMap::kQuantitive: {
-            fo << nid << ":[" << fmap.name(split_index) << "<"<< float(cond)
-               << "] yes=" << nodes[nid].cleft()
-               << ",no=" << nodes[nid].cright()
-               << ",missing=" << nodes[nid].cdefault();
-            break;
-          }
-          default: utils::Error("unknown fmap type");
-        }
-      } else {
-        fo << nid << ":[f" << split_index << "<"<< float(cond)
-           << "] yes=" << nodes[nid].cleft()
-           << ",no=" << nodes[nid].cright()
-           << ",missing=" << nodes[nid].cdefault();
-      }
-      if (with_stats) {
-        stat(nid).Print(fo, false);
-      }
-      fo << '\n';
-      this->Dump(nodes[nid].cleft(), fo, fmap, depth+1, with_stats);
-      this->Dump(nodes[nid].cright(), fo, fmap, depth+1, with_stats);
-    }
-  }
 };

 /*! \brief node statistics used in regression tree */
@@ -469,63 +404,59 @@ struct RTreeNodeStat {
  /*! \brief weight of current node */
  float base_weight;
  /*! \brief number of child that is leaf node known up to now */
-  int   leaf_child_cnt;
-  /*! \brief print information of current stats to fo */
-  inline void Print(std::stringstream &fo, bool is_leaf) const { // NOLINT(*)
-    if (!is_leaf) {
-      fo << ",gain=" << loss_chg << ",cover=" << sum_hess;
-    } else {
-      fo << ",cover=" << sum_hess;
-    }
-  }
+  int leaf_child_cnt;
 };

-/*! \brief define regression tree to be the most common tree model */
-class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
+/*!
+ * \brief define regression tree to be the most common tree model.
+ *  This is the data structure used in xgboost's major tree models.
+ */
+class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
 public:
  /*!
   * \brief dense feature vector that can be taken by RegTree
-   * to do traverse efficiently
-   * and can be construct from sparse feature vector
+   * and can be construct from sparse feature vector.
   */
  struct FVec {
+   public:
+    /*!
+     * \brief initialize the vector with size vector
+     * \param size The size of the feature vector.
+     */
+    inline void Init(size_t size);
+    /*!
+     * \brief fill the vector with sparse vector
+     * \param inst The sparse instance to fil.
+     */
+    inline void Fill(const RowBatch::Inst& inst);
+    /*!
+     * \brief drop the trace after fill, must be called after fill.
+     * \param inst The sparse instanc to drop.
+     */
+    inline void Drop(const RowBatch::Inst& inst);
+    /*!
+     * \brief get ith value
+     * \param i feature index.
+     * \return the i-th feature value
+     */
+    inline float fvalue(size_t i) const;
+    /*!
+     * \brief check whether i-th entry is missing
+     * \param i feature index.
+     * \return whether i-th value is missing.
+     */
+    inline bool is_missing(size_t i) const;
+
+   private:
    /*!
     * \brief a union value of value and flag
-     * when flag == -1, this indicate the value is missing
+     *  when flag == -1, this indicate the value is missing
     */
-    union Entry{
+    union Entry {
      float fvalue;
      int flag;
    };
    std::vector<Entry> data;
-    /*! \brief initialize the vector with size vector */
-    inline void Init(size_t size) {
-      Entry e; e.flag = -1;
-      data.resize(size);
-      std::fill(data.begin(), data.end(), e);
-    }
-    /*! \brief fill the vector with sparse vector */
-    inline void Fill(const RowBatch::Inst &inst) {
-      for (bst_uint i = 0; i < inst.length; ++i) {
-        if (inst[i].index >= data.size()) continue;
-        data[inst[i].index].fvalue = inst[i].fvalue;
-      }
-    }
-    /*! \brief drop the trace after fill, must be called after fill */
-    inline void Drop(const RowBatch::Inst &inst) {
-      for (bst_uint i = 0; i < inst.length; ++i) {
-        if (inst[i].index >= data.size()) continue;
-        data[inst[i].index].flag = -1;
-      }
-    }
-    /*! \brief get ith value */
-    inline float fvalue(size_t i) const {
-      return data[i].fvalue;
-    }
-    /*! \brief check whether i-th entry is missing */
-    inline bool is_missing(size_t i) const {
-      return data[i].flag == -1;
-    }
  };
  /*!
   * \brief get the leaf index
@@ -533,41 +464,86 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
   * \param root_id starting root index of the instance
   * \return the leaf index of the given feature
   */
-  inline int GetLeafIndex(const FVec &feat, unsigned root_id = 0) const {
-    // start from groups that belongs to current data
-    int pid = static_cast<int>(root_id);
-    // traverse tree
-    while (!(*this)[ pid ].is_leaf()) {
-      unsigned split_index = (*this)[pid].split_index();
-      pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
-    }
-    return pid;
-  }
+  inline int GetLeafIndex(const FVec& feat, unsigned root_id = 0) const;
  /*!
   * \brief get the prediction of regression tree, only accepts dense feature vector
-   * \param feats dense feature vector, if the feature is missing the field is set to NaN
+   * \param feat dense feature vector, if the feature is missing the field is set to NaN
   * \param root_id starting root index of the instance
   * \return the leaf index of the given feature
   */
-  inline float Predict(const FVec &feat, unsigned root_id = 0) const {
-    int pid = this->GetLeafIndex(feat, root_id);
-    return (*this)[pid].leaf_value();
-  }
-  /*! \brief get next position of the tree given current pid */
-  inline int GetNext(int pid, float fvalue, bool is_unknown) const {
-    float split_value = (*this)[pid].split_cond();
-    if (is_unknown) {
-      return (*this)[pid].cdefault();
-    } else {
-      if (fvalue < split_value) {
-        return (*this)[pid].cleft();
-      } else {
-        return (*this)[pid].cright();
-      }
-    }
-  }
+  inline float Predict(const FVec& feat, unsigned root_id = 0) const;
+  /*!
+   * \brief get next position of the tree given current pid
+   * \param pid Current node id.
+   * \param fvalue feature value if not missing.
+   * \param is_unknown Whether current required feature is missing.
+   */
+  inline int GetNext(int pid, float fvalue, bool is_unknown) const;
+  /*!
+   * \brief dump model to text string
+   * \param fmap feature map of feature types
+   * \param with_stats whether dump out statistics as well
+   * \return the string of dumped model
+   */
+  std::string Dump2Text(const FeatureMap& fmap, bool with_stats) const;
 };

-}  // namespace tree
+// implementations of inline functions
+// do not need to read if only use the model
+inline void RegTree::FVec::Init(size_t size) {
+  Entry e; e.flag = -1;
+  data.resize(size);
+  std::fill(data.begin(), data.end(), e);
+}
+
+inline void RegTree::FVec::Fill(const RowBatch::Inst& inst) {
+  for (bst_uint i = 0; i < inst.length; ++i) {
+    if (inst[i].index >= data.size()) continue;
+    data[inst[i].index].fvalue = inst[i].fvalue;
+  }
+}
+
+inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) {
+  for (bst_uint i = 0; i < inst.length; ++i) {
+    if (inst[i].index >= data.size()) continue;
+    data[inst[i].index].flag = -1;
+  }
+}
+
+inline float RegTree::FVec::fvalue(size_t i) const {
+  return data[i].fvalue;
+}
+
+inline bool RegTree::FVec::is_missing(size_t i) const {
+  return data[i].flag == -1;
+}
+
+inline int RegTree::GetLeafIndex(const RegTree::FVec& feat, unsigned root_id) const {
+  int pid = static_cast<int>(root_id);
+  while (!(*this)[pid].is_leaf()) {
+    unsigned split_index = (*this)[pid].split_index();
+    pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
+  }
+  return pid;
+}
+
+inline float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const {
+  int pid = this->GetLeafIndex(feat, root_id);
+  return (*this)[pid].leaf_value();
+}
+
+/*! \brief get next position of the tree given current pid */
+inline int RegTree::GetNext(int pid, float fvalue, bool is_unknown) const {
+  float split_value = (*this)[pid].split_cond();
+  if (is_unknown) {
+    return (*this)[pid].cdefault();
+  } else {
+    if (fvalue < split_value) {
+      return (*this)[pid].cleft();
+    } else {
+      return (*this)[pid].cright();
+    }
+  }
+}
 }  // namespace xgboost
 #endif  // XGBOOST_TREE_MODEL_H_
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@@ -0,0 +1,85 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file tree_updater.h
+ * \brief General primitive for tree learning,
+ *   Updating a collection of trees given the information.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_TREE_UPDATER_H_
+#define XGBOOST_TREE_UPDATER_H_
+
+#include <dmlc/registry.h>
+#include <vector>
+#include <utility>
+#include <string>
+#include "./base.h"
+#include "./data.h"
+#include "./tree_model.h"
+
+namespace xgboost {
+/*!
+ * \brief interface of tree update module, that performs update of a tree.
+ */
+class TreeUpdater {
+ public:
+  /*! \brief virtual destructor */
+  virtual ~TreeUpdater() {}
+  /*!
+   * \brief Initialize the updater with given arguments.
+   * \param args arguments to the objective function.
+   */
+  virtual void Init(const std::vector<std::pair<std::string, std::string> >& args) = 0;
+  /*!
+   * \brief perform update to the tree models
+   * \param gpair the gradient pair statistics of the data
+   * \param data The data matrix passed to the updater.
+   * \param trees references the trees to be updated, updater will change the content of trees
+   *   note: all the trees in the vector are updated, with the same statistics,
+   *         but maybe different random seeds, usually one tree is passed in at a time,
+   *         there can be multiple trees when we train random forest style model
+   */
+  virtual void Update(const std::vector<bst_gpair>& gpair,
+                      DMatrix* data,
+                      const std::vector<RegTree*>& trees) = 0;
+  /*!
+   * \brief this is simply a function for optimizing performance
+   * this function asks the updater to return the leaf position of each instance in the previous performed update.
+   * if it is cached in the updater, if it is not available, return nullptr
+   * \return array of leaf position of each instance in the last updated tree
+   */
+  virtual const int* GetLeafPosition() const {
+    return nullptr;
+  }
+  /*!
+   * \brief Create a tree updater given name
+   * \param name Name of the tree updater.
+   */
+  static TreeUpdater* Create(const std::string& name);
+};
+
+/*!
+ * \brief Registry entry for tree updater.
+ */
+struct TreeUpdaterReg
+    : public dmlc::FunctionRegEntryBase<TreeUpdaterReg,
+                                        std::function<TreeUpdater* ()> > {
+};
+
+/*!
+ * \brief Macro to register tree updater.
+ *
+ * \code
+ * // example of registering a objective ndcg@k
+ * XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "colmaker")
+ * .describe("Column based tree maker.")
+ * .set_body([]() {
+ *     return new ColMaker<TStats>();
+ *   });
+ * \endcode
+ */
+#define XGBOOST_REGISTER_TREE_UPDATER(UniqueId, Name)                   \
+  static ::xgboost::TreeUpdaterReg& __make_ ## TreeUpdaterReg ## _ ## UniqueId ## __ = \
+      ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->__REGISTER__(Name)
+
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_UPDATER_H_
--- a/java/xgboost4j_wrapper.cpp
+++ b/java/xgboost4j_wrapper.cpp
@@ -12,7 +12,7 @@
 limitations under the License.
 */

-#include "../wrapper/xgboost_wrapper.h"
+#include "xgboost/c_api.h"
 #include "xgboost4j_wrapper.h"
 #include <cstring>

--- a/make/config.mk
+++ b/make/config.mk
@@ -0,0 +1,54 @@
+#-----------------------------------------------------
+#  xgboost: the configuration compile script
+#
+#  If you want to change the configuration, please use the following
+#  steps. Assume you are on the root directory of xgboost.
+#  First copy the this file so that any local changes will be ignored by git
+#
+#  $ cp make/config.mk .
+#
+#  Next modify the according entries, and then compile by
+#
+#  $ make
+#
+#  or build in parallel with 8 threads
+#
+#  $ make -j8
+#----------------------------------------------------
+
+# choice of compiler, by default use system preference.
+# export CC = gcc
+# export CXX = g++
+# export MPICXX = mpicxx
+
+# the additional link flags you want to add
+ADD_LDFLAGS =
+
+# the additional compile flags you want to add
+ADD_CFLAGS =
+
+# Whether enable openmp support, needed for multi-threading.
+USE_OPENMP = 1
+
+# whether use HDFS support during compile
+USE_HDFS = 0
+
+# whether use AWS S3 support during compile
+USE_S3 = 0
+
+# whether use Azure blob support during compile
+USE_AZURE = 0
+
+# Rabit library version,
+# - librabit.a Normal distributed version.
+# - librabit_empty.a Non distributed mock version,
+LIB_RABIT = librabit.a
+
+# path to libjvm.so
+LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server
+
+# List of additional plugins, checkout plugin folder.
+# uncomment the following lines to include these plugins
+# you can also add your own plugin like this
+#
+# XGB_PLUGINS += plugin/example/plugin.mk
--- a/make/mingw64.mk
+++ b/make/mingw64.mk
@@ -0,0 +1,30 @@
+#-----------------------------------------------------------
+# xgboost: Configuration for MinGW(Windows 64bit)
+# This allows to compile xgboost on windows by using mingw.
+# You will need to get install an mingw toolchain.
+# g++-4.6 or later is required.
+#
+# see config.mk for template.
+#-----------------------------------------------------------
+export CXX=g++ -m64
+export CC=gcc -m64
+
+# Whether enable openmp support, needed for multi-threading.
+USE_OPENMP = 1
+
+# whether use HDFS support during compile
+USE_HDFS = 0
+
+# whether use AWS S3 support during compile
+USE_S3 = 0
+
+# whether use Azure blob support during compile
+USE_AZURE = 0
+
+# Rabit library version,
+# - librabit.a Normal distributed version.
+# - librabit_empty.a Non distributed mock version,
+LIB_RABIT = librabit_empty.a
+
+DMLC_CFLAGS = -DDMLC_ENABLE_STD_THREAD=0
+ADD_CFLAGS = -DDMLC_ENABLE_STD_THREAD=0
--- a/make/minimum.mk
+++ b/make/minimum.mk
@@ -0,0 +1,22 @@
+#-----------------------------------------------------
+# xgboost: minumum dependency configuration,
+# see config.mk for template.
+#----------------------------------------------------
+
+# Whether enable openmp support, needed for multi-threading.
+USE_OPENMP = 0
+
+# whether use HDFS support during compile
+USE_HDFS = 0
+
+# whether use AWS S3 support during compile
+USE_S3 = 0
+
+# whether use Azure blob support during compile
+USE_AZURE = 0
+
+# Rabit library version,
+# - librabit.a Normal distributed version.
+# - librabit_empty.a Non distributed mock version,
+LIB_RABIT = librabit_empty.a
+
--- a/make/minimum_parallel.mk
+++ b/make/minimum_parallel.mk
@@ -0,0 +1,23 @@
+#------------------------------------------------------------------------
+# xgboost: minumum dependency configuration with Parallelization.
+# This configuration is standard but cannot run distributed computing.
+#
+# see config.mk for template.
+#------------------------------------------------------------------------
+
+# Whether enable openmp support, needed for multi-threading.
+USE_OPENMP = 1
+
+# whether use HDFS support during compile
+USE_HDFS = 0
+
+# whether use AWS S3 support during compile
+USE_S3 = 0
+
+# whether use Azure blob support during compile
+USE_AZURE = 0
+
+# Rabit library version,
+# - librabit.a Normal distributed version.
+# - librabit_empty.a Non distributed mock version,
+LIB_RABIT = librabit_empty.a
--- a/make/travis.mk
+++ b/make/travis.mk
@@ -0,0 +1,33 @@
+
+# the additional link flags you want to add
+ADD_LDFLAGS =
+
+# the additional compile flags you want to add
+ADD_CFLAGS =
+
+# Whether enable openmp support, needed for multi-threading.
+USE_OPENMP = 1
+
+# whether use HDFS support during compile
+USE_HDFS = 0
+
+# whether use AWS S3 support during compile
+USE_S3 = 0
+
+# whether use Azure blob support during compile
+USE_AZURE = 0
+
+# Rabit library version,
+# - librabit.a Normal distributed version.
+# - librabit_empty.a Non distributed mock version,
+LIB_RABIT = librabit.a
+
+# path to libjvm.so
+LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server
+
+# List of additional plugins, checkout plugin folder.
+# uncomment the following lines to include these plugins
+# you can also add your own plugin like this
+#
+XGB_PLUGINS += plugin/example/plugin.mk
+XGB_PLUGINS += plugin/lz4/plugin.mk
--- a/multi-node/README.md
+++ b/multi-node/README.md
@@ -1,28 +0,0 @@
-Distributed XGBoost
-======
-Distributed XGBoost is now part of [Wormhole](https://github.com/dmlc/wormhole).
-Checkout this [Link](https://github.com/dmlc/wormhole/tree/master/learn/xgboost) for usage examples, build and job submissions.
-* The distributed version is built on Rabit:[Reliable Allreduce and Broadcast Library](https://github.com/dmlc/rabit)
-  - Rabit is a portable library that provides fault-tolerance for Allreduce calls for distributed machine learning  
-  - This makes xgboost portable and fault-tolerant against node failures
-
-Notes
-====
-* Rabit handles all the fault tolerant and communications efficiently, we only use platform specific command to start programs
-  - The Hadoop version does not rely on Mapreduce to do iterations
-  - You can expect xgboost not suffering the drawbacks of iterative MapReduce program
-* The design choice was made because Allreduce is very natural and efficient for distributed tree building
-  - In current version of xgboost, the distributed version is only adds several lines of Allreduce synchronization code
-* The multi-threading nature of xgboost is inheritated in distributed mode
-  - This means xgboost efficiently use all the threads in one machine, and communicates only between machines
-  - Remember to run on xgboost process per machine and this will give you maximum speedup
-* For more information about rabit and how it works, see the [Rabit's Tutorial](https://github.com/dmlc/rabit/tree/master/guide)
-
-Solvers
-=====
-* Column-based solver split data by column, each node work on subset of columns, 
-  it uses exactly the same algorithm as single node version.
-* Row-based solver split data by row, each node work on subset of rows,
-  it uses an approximate histogram count algorithm, and will only examine subset of 
-  potential split points as opposed to all split points.
-  - This is the mode used by current hadoop version, since usually data was stored by rows in many industry system
--- a/multi-node/col-split/README.md
+++ b/multi-node/col-split/README.md
@@ -1,19 +0,0 @@
-Distributed XGBoost: Column Split Version
-====
-* run ```bash mushroom-col-rabit.sh <n-process>```
-  - mushroom-col-rabit.sh starts xgboost job using rabit's allreduce
-* run ```bash mushroom-col-rabit-mock.sh <n-process>```
-  - mushroom-col-rabit-mock.sh starts xgboost job using rabit's allreduce, inserts suicide signal at certain point and test recovery
-
-How to Use
-====
-* First split the data by column, 
-* In the config, specify data file as containing a wildcard %d, where %d is the rank of the node, each node will load their part of data
-* Enable column split mode by ```dsplit=col```
-
-Notes
-====
-* The code is multi-threaded, so you want to run one process per node
-* The code will work correctly as long as union of each column subset is all the columns we are interested in.
-  - The column subset can overlap with each other.
-* It uses exactly the same algorithm as single node version, to examine all potential split points.
--- a/multi-node/col-split/mushroom-col-rabit-mock.sh
+++ b/multi-node/col-split/mushroom-col-rabit-mock.sh
@@ -1,25 +0,0 @@
-#!/bin/bash
-if [[ $# -ne 1 ]]
-then
-    echo "Usage: nprocess"
-    exit -1
-fi
-
-#
-# This script is same as mushroom-col except that we will be using xgboost instead of xgboost-mpi
-# xgboost used built in tcp-based allreduce module, and can be run on more enviroment, so long as we know how to start job by modifying ../submit_job_tcp.py
-#
-rm -rf train.col* *.model
-k=$1
-
-# split the lib svm file into k subfiles
-python splitsvm.py ../../demo/data/agaricus.txt.train train $k
-
-# run xgboost mpi
-../../subtree/rabit/tracker/rabit_demo.py -n $k  ../../xgboost.mock mushroom-col.conf dsplit=col mock=0,2,0,0 mock=1,2,0,0 mock=2,2,8,0 mock=2,3,0,0
-
-# the model can be directly loaded by single machine xgboost solver, as usuall
-#../../xgboost mushroom-col.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
-
-
-#cat dump.nice.$k.txt
--- a/multi-node/col-split/mushroom-col-rabit.sh
+++ b/multi-node/col-split/mushroom-col-rabit.sh
@@ -1,28 +0,0 @@
-#!/bin/bash
-if [[ $# -ne 1 ]]
-then
-    echo "Usage: nprocess"
-    exit -1
-fi
-
-#
-# This script is same as mushroom-col except that we will be using xgboost instead of xgboost-mpi
-# xgboost used built in tcp-based allreduce module, and can be run on more enviroment, so long as we know how to start job by modifying ../submit_job_tcp.py
-#
-rm -rf train.col* *.model
-k=$1
-
-# split the lib svm file into k subfiles
-python splitsvm.py ../../demo/data/agaricus.txt.train train $k
-
-# run xgboost mpi
-../../subtree/rabit/tracker/rabit_demo.py -n $k ../../xgboost mushroom-col.conf dsplit=col
-
-# the model can be directly loaded by single machine xgboost solver, as usuall
-../../xgboost mushroom-col.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt 
-
-# run for one round, and continue training
-../../subtree/rabit/tracker/rabit_demo.py -n $k  ../../xgboost mushroom-col.conf dsplit=col num_round=1
-../../subtree/rabit/tracker/rabit_demo.py -n $k  ../../xgboost mushroom-col.conf  mushroom-col.conf dsplit=col model_in=0001.model
-
-cat dump.nice.$k.txt
--- a/multi-node/col-split/mushroom-col.conf
+++ b/multi-node/col-split/mushroom-col.conf
@@ -1,35 +0,0 @@
-# General Parameters, see comment for each definition
-# choose the booster, can be gbtree or gblinear
-booster = gbtree
-# choose logistic regression loss function for binary classification
-objective = binary:logistic
-
-# Tree Booster Parameters
-# step size shrinkage
-eta = 1.0 
-# minimum loss reduction required to make a further partition
-gamma = 1.0 
-# minimum sum of instance weight(hessian) needed in a child
-min_child_weight = 1 
-# maximum depth of a tree
-max_depth = 3 
-
-# Task Parameters
-# the number of round to do boosting
-num_round = 2
-# 0 means do not save any model except the final round model
-save_period = 0 
-use_buffer = 0
-
-# The path of training data %d is the wildcard for the rank of the data
-# The idea is each process take a feature matrix with subset of columns
-#
-data = "train.col%d" 
-
-# The path of validation data, used to monitor training process, here [test] sets name of the validation set
-eval[test] = "../../demo/data/agaricus.txt.test" 
-# evaluate on training data as well each round
-eval_train = 1
-
-# The path of test data, need to use full data of test, try not use it, or keep an subsampled version
-test:data = "../../demo/data/agaricus.txt.test"      
--- a/multi-node/col-split/splitsvm.py
+++ b/multi-node/col-split/splitsvm.py
@@ -1,32 +0,0 @@
-#!/usr/bin/python
-import sys
-import random
-
-# split libsvm file into different subcolumns
-if len(sys.argv) < 4:
-    print ('Usage:<fin> <fo> k')
-    exit(0)
-
-random.seed(10)
-fmap = {}
-
-k = int(sys.argv[3])
-fi = open( sys.argv[1], 'r' )
-fos = []
-
-for i in range(k):
-    fos.append(open( sys.argv[2]+'.col%d' % i, 'w' ))
-    
-for l in open(sys.argv[1]):
-    arr = l.split()
-    for f in fos:
-        f.write(arr[0])
-    for it in arr[1:]:
-        fid = int(it.split(':')[0])
-        if fid not in fmap:
-            fmap[fid] = random.randint(0, k-1)
-        fos[fmap[fid]].write(' '+it)
-    for f in fos:
-        f.write('\n')
-for f in fos:    
-    f.close()
--- a/plugin/README.md
+++ b/plugin/README.md
@@ -0,0 +1,32 @@
+XGBoost Plugins Modules
+=======================
+This folder contains plugin modules to xgboost that can be optionally installed.
+The plugin system helps us to extend xgboost with additional features,
+and add experimental features that may not yet ready to be included in main project.
+
+To include a certain plugin, say ```plugin_a```, you only need to add the following line to the config.mk.
+
+```makefile
+# Add plugin by include the plugin in config
+XGB_PLUGINS += plugin/plugin_a/plugin.mk
+```
+
+Then rebuild libxgboost by typing make, you can get a new library with the plugin enabled.
+
+Link Static XGBoost Library with Plugins
+----------------------------------------
+This problem only happens when you link ```libxgboost.a```.
+If you only use ```libxgboost.so```(this include python and other bindings),
+you can ignore this section.
+
+When you want to link ```libxgboost.a``` with additional plugins included,
+you will need to enabled whole archeive via The following option.
+```bash
+--whole-archive libxgboost.a --no-whole-archive
+```
+
+Write Your Own Plugin
+---------------------
+You can plugin your own modules to xgboost by adding code to this folder,
+without modification to the main code repo.
+The [example](example) folder provides an example to write a plugin.
--- a/plugin/example/README.md
+++ b/plugin/example/README.md
@@ -0,0 +1,21 @@
+XGBoost Plugin Example
+======================
+This folder provides an example of xgboost plugin.
+
+There are three steps you need to to do to add plugin to xgboost
+- Create your source .cc file, implement a new extension
+  - In this example [custom_obj.cc](custom_obj.cc)
+- Register this extension to xgboost via registration macr
+  - In this example ```XGBOOST_REGISTER_OBJECTIVE``` in [this line](custom_obj.cc#L75)
+- Create a [plugin.mk](plugin.mk) on this folder
+
+To add this plugin, add the following line to ```config.mk```(template in make/config.mk).
+```makefile
+# Add plugin by include the plugin in config
+XGB_PLUGINS += plugin/plugin_a/plugin.mk
+```
+
+Then you can test this plugin by using ```objective=mylogistic``` parameter.
+
+
+
--- a/plugin/example/custom_obj.cc
+++ b/plugin/example/custom_obj.cc
@@ -0,0 +1,80 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file custom_metric.cc
+ * \brief This is an example to define plugin of xgboost.
+ *  This plugin defines the additional metric function.
+ */
+#include <xgboost/base.h>
+#include <dmlc/parameter.h>
+#include <xgboost/objective.h>
+
+namespace xgboost {
+namespace obj {
+
+// This is a helpful data structure to define parameters
+// You do not have to use it.
+// see http://dmlc-core.readthedocs.org/en/latest/parameter.html
+// for introduction of this module.
+struct MyLogisticParam : public dmlc::Parameter<MyLogisticParam> {
+  float scale_neg_weight;
+  // declare parameters
+  DMLC_DECLARE_PARAMETER(MyLogisticParam) {
+    DMLC_DECLARE_FIELD(scale_neg_weight).set_default(1.0f).set_lower_bound(0.0f)
+        .describe("Scale the weight of negative examples by this factor");
+  }
+};
+
+DMLC_REGISTER_PARAMETER(MyLogisticParam);
+
+// Define a customized logistic regression objective in C++.
+// Implement the interface.
+class MyLogistic : public ObjFunction {
+ public:
+  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
+    param_.InitAllowUnknown(args);
+  }
+  void GetGradient(const std::vector<float> &preds,
+                   const MetaInfo &info,
+                   int iter,
+                   std::vector<bst_gpair> *out_gpair) override {
+    out_gpair->resize(preds.size());
+    for (size_t i = 0; i < preds.size(); ++i) {
+      float w = info.GetWeight(i);
+      // scale the negative examples!
+      if (info.labels[i] == 0.0f) w *= param_.scale_neg_weight;
+      // logistic transoformation
+      float p = 1.0f / (1.0f + expf(-preds[i]));
+      // this is the gradient
+      float grad = (p - info.labels[i]) * w;
+      // this is the second order gradient
+      float hess = p * (1.0f - p) * w;
+      out_gpair->at(i) = bst_gpair(grad, hess);
+    }
+  }
+  const char* DefaultEvalMetric() const override {
+    return "error";
+  }
+  void PredTransform(std::vector<float> *io_preds) override {
+    // transform margin value to probability.
+    std::vector<float> &preds = *io_preds;
+    for (size_t i = 0; i < preds.size(); ++i) {
+      preds[i] = 1.0f / (1.0f + expf(-preds[i]));
+    }
+  }
+  float ProbToMargin(float base_score) const override {
+    // transform probability to margin value
+    return -std::log(1.0f / base_score - 1.0f);
+  }
+
+ private:
+  MyLogisticParam param_;
+};
+
+// Finally register the objective function.
+// After it succeeds you can try use xgboost with objective=mylogistic
+XGBOOST_REGISTER_OBJECTIVE(MyLogistic, "mylogistic")
+.describe("User defined logistic regression plugin")
+.set_body([]() { return new MyLogistic(); });
+
+}  // namespace obj
+}  // namespace xgboost
--- a/plugin/example/plugin.mk
+++ b/plugin/example/plugin.mk
@@ -0,0 +1,4 @@
+# Add the object files you like to include in this plugin.
+PLUGIN_OBJS += build_plugin/example/custom_obj.o
+# Add additional dependent libraries this plugin might have
+PLUGIN_LDFLAGS +=
--- a/plugin/lz4/plugin.mk
+++ b/plugin/lz4/plugin.mk
@@ -0,0 +1,2 @@
+PLUGIN_OBJS += build_plugin/lz4/sparse_page_lz4_format.o
+PLUGIN_LDFLAGS += -llz4
--- a/plugin/lz4/sparse_page_lz4_format.cc
+++ b/plugin/lz4/sparse_page_lz4_format.cc
@@ -0,0 +1,327 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file sparse_page_lz4_format.cc
+ *  XGBoost Plugin to enable LZ4 compressed format on the external memory pages.
+ */
+#include <xgboost/data.h>
+#include <xgboost/logging.h>
+#include <dmlc/registry.h>
+#include <dmlc/parameter.h>
+#include <lz4.h>
+#include <lz4hc.h>
+#include "../../src/data/sparse_batch_page.h"
+
+namespace xgboost {
+namespace data {
+
+DMLC_REGISTRY_FILE_TAG(sparse_page_lz4_format);
+
+// array to help compression of decompression.
+template<typename DType>
+class CompressArray {
+ public:
+  // the data content.
+  std::vector<DType> data;
+  // Decompression helper
+  // number of chunks
+  inline int num_chunk() const {
+    CHECK_GT(raw_chunks_.size(), 1);
+    return static_cast<int>(raw_chunks_.size() - 1);
+  }
+  // raw bytes
+  inline size_t RawBytes() const {
+    return raw_chunks_.back() * sizeof(DType);
+  }
+  // encoded bytes
+  inline size_t EncodedBytes() const {
+    return encoded_chunks_.back() +
+        (encoded_chunks_.size() + raw_chunks_.size()) * sizeof(bst_uint);
+  }
+  // load the array from file.
+  inline void Read(dmlc::SeekStream* fi);
+  // run decode on chunk_id
+  inline void Decompress(int chunk_id);
+  // Compression helper
+  // initialize the compression chunks
+  inline void InitCompressChunks(const std::vector<bst_uint>& chunk_ptr);
+  // initialize the compression chunks
+  inline void InitCompressChunks(size_t chunk_size, size_t max_nchunk);
+  // run decode on chunk_id, level = -1 means default.
+  inline void Compress(int chunk_id, bool use_hc);
+  // save the output buffer into file.
+  inline void Write(dmlc::Stream* fo);
+
+ private:
+  // the chunk split of the data, by number of elements
+  std::vector<bst_uint> raw_chunks_;
+  // the encoded chunk, by number of bytes
+  std::vector<bst_uint> encoded_chunks_;
+  // output buffer of compression.
+  std::vector<std::string> out_buffer_;
+  // input buffer of data.
+  std::string in_buffer_;
+};
+
+template<typename DType>
+inline void CompressArray<DType>::Read(dmlc::SeekStream* fi) {
+  CHECK(fi->Read(&raw_chunks_));
+  CHECK(fi->Read(&encoded_chunks_));
+  size_t buffer_size = encoded_chunks_.back();
+  in_buffer_.resize(buffer_size);
+  CHECK_EQ(fi->Read(dmlc::BeginPtr(in_buffer_), buffer_size), buffer_size);
+  data.resize(raw_chunks_.back());
+}
+
+template<typename DType>
+inline void CompressArray<DType>::Decompress(int chunk_id) {
+  int chunk_size = static_cast<int>(
+      raw_chunks_[chunk_id + 1] - raw_chunks_[chunk_id]) * sizeof(DType);
+  int encoded_size = static_cast<int>(
+      encoded_chunks_[chunk_id + 1] - encoded_chunks_[chunk_id]);
+  // decompress data
+  int src_size = LZ4_decompress_fast(
+      dmlc::BeginPtr(in_buffer_) + encoded_chunks_[chunk_id],
+      reinterpret_cast<char*>(dmlc::BeginPtr(data) + raw_chunks_[chunk_id]),
+      chunk_size);
+  CHECK_EQ(encoded_size, src_size);
+}
+
+template<typename DType>
+inline void CompressArray<DType>::InitCompressChunks(
+    const std::vector<bst_uint>& chunk_ptr) {
+  raw_chunks_ = chunk_ptr;
+  CHECK_GE(raw_chunks_.size(), 2);
+  out_buffer_.resize(raw_chunks_.size() - 1);
+  for (size_t i = 0; i < out_buffer_.size(); ++i) {
+    out_buffer_[i].resize(raw_chunks_[i + 1] - raw_chunks_[i]);
+  }
+}
+
+template<typename DType>
+inline void CompressArray<DType>::InitCompressChunks(size_t chunk_size, size_t max_nchunk) {
+  raw_chunks_.clear();
+  raw_chunks_.push_back(0);
+  size_t min_chunk_size = data.size() / max_nchunk;
+  chunk_size = std::max(min_chunk_size, chunk_size);
+  size_t nstep = data.size() / chunk_size;
+  for (size_t i = 0; i < nstep; ++i) {
+    raw_chunks_.push_back(raw_chunks_.back() + chunk_size);
+    CHECK_LE(raw_chunks_.back(), data.size());
+  }
+  if (nstep == 0) raw_chunks_.push_back(0);
+  raw_chunks_.back() = data.size();
+  CHECK_GE(raw_chunks_.size(), 2);
+  out_buffer_.resize(raw_chunks_.size() - 1);
+  for (size_t i = 0; i < out_buffer_.size(); ++i) {
+    out_buffer_[i].resize(raw_chunks_[i + 1] - raw_chunks_[i]);
+  }
+}
+
+template<typename DType>
+inline void CompressArray<DType>::Compress(int chunk_id, bool use_hc) {
+  CHECK_LT(static_cast<size_t>(chunk_id + 1), raw_chunks_.size());
+  std::string& buf = out_buffer_[chunk_id];
+  size_t raw_chunk_size = (raw_chunks_[chunk_id + 1] - raw_chunks_[chunk_id]) * sizeof(DType);
+  int bound = LZ4_compressBound(raw_chunk_size);
+  CHECK_NE(bound, 0);
+  buf.resize(bound);
+  int encoded_size;
+  if (use_hc) {
+    encoded_size = LZ4_compress_HC(
+        reinterpret_cast<char*>(dmlc::BeginPtr(data) + raw_chunks_[chunk_id]),
+        dmlc::BeginPtr(buf), raw_chunk_size, buf.length(), 9);
+  } else {
+    encoded_size = LZ4_compress_default(
+        reinterpret_cast<char*>(dmlc::BeginPtr(data) + raw_chunks_[chunk_id]),
+        dmlc::BeginPtr(buf), raw_chunk_size, buf.length());
+  }
+  CHECK_NE(encoded_size, 0);
+  CHECK_LE(static_cast<size_t>(encoded_size), buf.length());
+  buf.resize(encoded_size);
+}
+
+template<typename DType>
+inline void CompressArray<DType>::Write(dmlc::Stream* fo) {
+  encoded_chunks_.clear();
+  encoded_chunks_.push_back(0);
+  for (size_t i = 0; i < out_buffer_.size(); ++i) {
+    encoded_chunks_.push_back(encoded_chunks_.back() + out_buffer_[i].length());
+  }
+  fo->Write(raw_chunks_);
+  fo->Write(encoded_chunks_);
+  for (const std::string& buf : out_buffer_) {
+    fo->Write(dmlc::BeginPtr(buf), buf.length());
+  }
+}
+
+template<typename StorageIndex>
+class SparsePageLZ4Format : public SparsePage::Format {
+ public:
+  explicit SparsePageLZ4Format(bool use_lz4_hc)
+      : use_lz4_hc_(use_lz4_hc) {
+    raw_bytes_ = raw_bytes_value_ = raw_bytes_index_ = 0;
+    encoded_bytes_value_ = encoded_bytes_index_ = 0;
+    nthread_ = dmlc::GetEnv("XGBOOST_LZ4_DECODE_NTHREAD", 4);
+    nthread_write_ = dmlc::GetEnv("XGBOOST_LZ4_COMPRESS_NTHREAD", 12);
+  }
+  virtual ~SparsePageLZ4Format() {
+    size_t encoded_bytes = raw_bytes_ +  encoded_bytes_value_ + encoded_bytes_index_;
+    raw_bytes_ += raw_bytes_value_ + raw_bytes_index_;
+    if (raw_bytes_ != 0) {
+      LOG(CONSOLE) << "raw_bytes=" << raw_bytes_
+                   << ", encoded_bytes=" << encoded_bytes
+                   << ", ratio=" << double(encoded_bytes) / raw_bytes_
+                   << ", ratio-index=" << double(encoded_bytes_index_) /raw_bytes_index_
+                   << ", ratio-value=" << double(encoded_bytes_value_) /raw_bytes_value_;
+    }
+  }
+
+  bool Read(SparsePage* page, dmlc::SeekStream* fi) override {
+    if (!fi->Read(&(page->offset))) return false;
+    CHECK_NE(page->offset.size(), 0) << "Invalid SparsePage file";
+    this->LoadIndexValue(fi);
+
+    page->data.resize(page->offset.back());
+    CHECK_EQ(index_.data.size(), value_.data.size());
+    CHECK_EQ(index_.data.size(), page->data.size());
+    for (size_t i = 0; i < page->data.size(); ++i) {
+      page->data[i] = SparseBatch::Entry(index_.data[i] + min_index_, value_.data[i]);
+    }
+    return true;
+  }
+
+  bool Read(SparsePage* page,
+            dmlc::SeekStream* fi,
+            const std::vector<bst_uint>& sorted_index_set) override {
+    if (!fi->Read(&disk_offset_)) return false;
+    this->LoadIndexValue(fi);
+
+    page->offset.clear();
+    page->offset.push_back(0);
+    for (bst_uint cid : sorted_index_set) {
+      page->offset.push_back(
+          page->offset.back() + disk_offset_[cid + 1] - disk_offset_[cid]);
+    }
+    page->data.resize(page->offset.back());
+    CHECK_EQ(index_.data.size(), value_.data.size());
+    CHECK_EQ(index_.data.size(), disk_offset_.back());
+
+    for (size_t i = 0; i < sorted_index_set.size(); ++i) {
+      bst_uint cid = sorted_index_set[i];
+      size_t dst_begin = page->offset[i];
+      size_t src_begin = disk_offset_[cid];
+      size_t num = disk_offset_[cid + 1] - disk_offset_[cid];
+      for (size_t j = 0; j < num; ++j) {
+        page->data[dst_begin + j] = SparseBatch::Entry(
+            index_.data[src_begin + j] + min_index_, value_.data[src_begin + j]);
+      }
+    }
+    return true;
+  }
+
+  void Write(const SparsePage& page, dmlc::Stream* fo) override {
+    CHECK(page.offset.size() != 0 && page.offset[0] == 0);
+    CHECK_EQ(page.offset.back(), page.data.size());
+    fo->Write(page.offset);
+    min_index_ = page.min_index;
+    fo->Write(&min_index_, sizeof(min_index_));
+    index_.data.resize(page.data.size());
+    value_.data.resize(page.data.size());
+
+    for (size_t i = 0; i < page.data.size(); ++i) {
+      bst_uint idx = page.data[i].index - min_index_;
+      CHECK_LE(idx, static_cast<bst_uint>(std::numeric_limits<StorageIndex>::max()))
+          << "The storage index is chosen to limited to smaller equal than "
+          << std::numeric_limits<StorageIndex>::max()
+          << "min_index=" << min_index_;
+      index_.data[i] = static_cast<StorageIndex>(idx);
+      value_.data[i] = page.data[i].fvalue;
+    }
+
+    index_.InitCompressChunks(kChunkSize, kMaxChunk);
+    value_.InitCompressChunks(kChunkSize, kMaxChunk);
+
+    int nindex = index_.num_chunk();
+    int nvalue = value_.num_chunk();
+    int ntotal = nindex + nvalue;
+    #pragma omp parallel for schedule(dynamic, 1)  num_threads(nthread_write_)
+    for (int i = 0; i < ntotal; ++i) {
+      if (i < nindex) {
+        index_.Compress(i, use_lz4_hc_);
+      } else {
+        value_.Compress(i - nindex, use_lz4_hc_);
+      }
+    }
+    index_.Write(fo);
+    value_.Write(fo);
+    // statistics
+    raw_bytes_index_ += index_.RawBytes() * sizeof(bst_uint) / sizeof(StorageIndex);
+    raw_bytes_value_ += value_.RawBytes();
+    encoded_bytes_index_ += index_.EncodedBytes();
+    encoded_bytes_value_ += value_.EncodedBytes();
+    raw_bytes_ += page.offset.size() * sizeof(size_t);
+  }
+
+  inline void LoadIndexValue(dmlc::SeekStream* fi) {
+    fi->Read(&min_index_, sizeof(min_index_));
+    index_.Read(fi);
+    value_.Read(fi);
+
+    int nindex = index_.num_chunk();
+    int nvalue = value_.num_chunk();
+    int ntotal = nindex + nvalue;
+    #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread_)
+    for (int i = 0; i < ntotal; ++i) {
+      if (i < nindex) {
+        index_.Decompress(i);
+      } else {
+        value_.Decompress(i - nindex);
+      }
+    }
+  }
+
+ private:
+  // default chunk size.
+  static const size_t kChunkSize = 64 << 10UL;
+  // maximum chunk size.
+  static const size_t kMaxChunk = 128;
+  // bool whether use hc
+  bool use_lz4_hc_;
+  // number of threads
+  int nthread_;
+  // number of writing threads
+  int nthread_write_;
+  // raw bytes
+  size_t raw_bytes_, raw_bytes_index_, raw_bytes_value_;
+  // encoded bytes
+  size_t encoded_bytes_index_, encoded_bytes_value_;
+  /*! \brief minimum index value */
+  uint32_t min_index_;
+  /*! \brief external memory column offset */
+  std::vector<size_t> disk_offset_;
+  // internal index
+  CompressArray<StorageIndex> index_;
+  // value set.
+  CompressArray<bst_float> value_;
+};
+
+XGBOOST_REGISTER_SPARSE_PAGE_FORMAT(lz4)
+.describe("Apply LZ4 binary data compression for ext memory.")
+.set_body([]() {
+    return new SparsePageLZ4Format<bst_uint>(false);
+  });
+
+XGBOOST_REGISTER_SPARSE_PAGE_FORMAT(lz4hc)
+.describe("Apply LZ4 binary data compression(high compression ratio) for ext memory.")
+.set_body([]() {
+    return new SparsePageLZ4Format<bst_uint>(true);
+  });
+
+XGBOOST_REGISTER_SPARSE_PAGE_FORMAT(lz4i16hc)
+.describe("Apply LZ4 binary data compression(16 bit index mode) for ext memory.")
+.set_body([]() {
+    return new SparsePageLZ4Format<uint16_t>(true);
+  });
+
+}  // namespace data
+}  // namespace xgboost
--- a/python-package/MANIFEST.in
+++ b/python-package/MANIFEST.in
@@ -1,14 +1,11 @@
-include *.sh *.md *.rst
+include *.md *.rst
 recursive-include xgboost *
-recursive-include xgboost/wrapper *
-recursive-include xgboost/windows *
-recursive-include xgboost/subtree *
+recursive-include xgboost/include *
 recursive-include xgboost/src *
-recursive-include xgboost/multi-node *
 #exclude pre-compiled .o file for less confusions
 #include the pre-compiled .so is needed as a placeholder
 #since it will be copy after compiling on the fly
-global-exclude xgboost/wrapper/*.so.gz
+global-exclude xgboost/build/*
 global-exclude xgboost/*.o
 global-exclude *.pyo
 global-exclude *.pyc
--- a/python-package/setup.py
+++ b/python-package/setup.py
@@ -17,7 +17,7 @@ libpath = {'__file__': libpath_py}
 exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpath)

 LIB_PATH = libpath['find_lib_path']()
-
+print("Install libxgboost from: %s" % LIB_PATH)
 #Please use setup_pip.py for generating and deploying pip installation
 #detailed instruction in setup_pip.py
 setup(name='xgboost',
--- a/python-package/xgboost/init.py
+++ b/python-package/xgboost/init.py
@@ -14,7 +14,7 @@ try:
    from .sklearn import XGBModel, XGBClassifier, XGBRegressor
    from .plotting import plot_importance, plot_tree, to_graphviz
 except ImportError:
-    print('Error when loading sklearn/plotting. Please install scikit-learn')
+    pass

 VERSION_FILE = os.path.join(os.path.dirname(__file__), 'VERSION')
 __version__ = open(VERSION_FILE).read().strip()
--- a/python-package/xgboost/libpath.py
+++ b/python-package/xgboost/libpath.py
@@ -20,8 +20,8 @@ def find_lib_path():
    """
    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
    # make pythonpack hack: copy this directory one level upper for setup.py
-    dll_path = [curr_path, os.path.join(curr_path, '../../wrapper/'),
-                os.path.join(curr_path, './wrapper/')]
+    dll_path = [curr_path, os.path.join(curr_path, '../../lib/'),
+                os.path.join(curr_path, './lib/')]
    if os.name == 'nt':
        if platform.architecture()[0] == '64bit':
            dll_path.append(os.path.join(curr_path, '../../windows/x64/Release/'))
@@ -32,9 +32,9 @@ def find_lib_path():
            # hack for pip installation when copy all parent source directory here
            dll_path.append(os.path.join(curr_path, './windows/Release/'))
    if os.name == 'nt':
-        dll_path = [os.path.join(p, 'xgboost_wrapper.dll') for p in dll_path]
+        dll_path = [os.path.join(p, 'libxgboost.dll') for p in dll_path]
    else:
-        dll_path = [os.path.join(p, 'libxgboostwrapper.so') for p in dll_path]
+        dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path]
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
    #From github issues, most of installation errors come from machines w/o compilers
    if len(lib_path) == 0 and not os.environ.get('XGBOOST_BUILD_DOC', False):
--- a/1
+++ b/1
--- a/scripts/travis_R_script.sh
+++ b/scripts/travis_R_script.sh
@@ -1,14 +0,0 @@
-#!/bin/bash
-# Test R package of xgboost
-set -e
-export _R_CHECK_TIMINGS_=0
-export R_BUILD_ARGS="--no-build-vignettes --no-manual"
-export R_CHECK_ARGS="--no-vignettes --no-manual"
-
-curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
-chmod 755 ./travis-tool.sh
-./travis-tool.sh bootstrap
-make Rpack
-cd ./xgboost
-../travis-tool.sh install_deps
-../travis-tool.sh run_tests
--- a/scripts/travis_after_failure.sh
+++ b/scripts/travis_after_failure.sh
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-if [ ${TASK} == "R-package" ]; then
-    cat xgboost/xgboost.Rcheck/*.log
-fi
--- a/scripts/travis_java_script.sh
+++ b/scripts/travis_java_script.sh
@@ -1,7 +0,0 @@
-# Test java package of xgboost
-set -e
-cd java
-./create_wrap.sh
-cd xgboost4j
-mvn clean install -DskipTests=true
-mvn test
--- a/scripts/travis_osx_install.sh
+++ b/scripts/travis_osx_install.sh
@@ -1,7 +0,0 @@
-#!/bin/bash
-
-if [ ${TRAVIS_OS_NAME} != "osx" ]; then
-    exit 0
-fi
-
-brew update
--- a/scripts/travis_script.sh
+++ b/scripts/travis_script.sh
@@ -1,82 +0,0 @@
-#!/bin/bash
-
-# main script of travis
-if [ ${TASK} == "lint" ]; then
-    if [ ${TRAVIS_OS_NAME} != "osx" ]; then
-        make lint  || exit -1
-    fi
-fi
-
-if [ ${TRAVIS_OS_NAME} == "osx" ]; then
-    export no_omp=1
-    export NO_OPENMP=1
-fi
-
-if [ ${TASK} == "build" ]; then
-    make all CXX=${CXX} || exit -1
-fi
-
-if [ ${TASK} == "build-with-dmlc" ]; then
-    cd dmlc-core
-    cp make/config.mk .
-    if [ ${TRAVIS_OS_NAME} != "osx" ]; then
-        echo "USE_S3=1" >> config.mk
-    else
-        echo "USE_S3=0" >> config.mk
-    fi
-    make all CXX=${CXX}|| exit -1
-    cd ..
-    make dmlc=dmlc-core CXX=${CXX} || exit -1
-fi
-
-if [ ${TASK} == "R-package" ]; then
-    scripts/travis_R_script.sh || exit -1
-fi
-
-if [ ${TASK} == "python-package" -o ${TASK} == "python-package3" ]; then
-
-    if [ ${TRAVIS_OS_NAME} == "osx" ]; then
-        brew install graphviz
-        if [ ${TASK} == "python-package3" ]; then
-            wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
-        else
-            wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh
-        fi
-    else
-        sudo apt-get install graphviz
-        if [ ${TASK} == "python-package3" ]; then
-            wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-        else
-            wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh
-        fi
-    fi
-    bash conda.sh -b -p $HOME/miniconda
-    export PATH="$HOME/miniconda/bin:$PATH"
-    hash -r
-    conda config --set always_yes yes --set changeps1 no
-    conda update -q conda
-    # Useful for debugging any issues with conda
-    conda info -a
-
-    if [ ${TASK} == "python-package3" ]; then
-        conda create -n myenv python=3.4
-    else
-        conda create -n myenv python=2.7
-    fi
-    source activate myenv
-    conda install numpy scipy pandas matplotlib nose scikit-learn
-    python -m pip install graphviz
-
-    make all CXX=${CXX} || exit -1
-
-    python -m nose tests/python || exit -1
-    python --version
-fi
-
-# only test java under linux for now
-if [ ${TASK} == "java-package" ]; then
-    if [ ${TRAVIS_OS_NAME} != "osx" ]; then
-        make java CXX=${CXX} || exit -1
-        scripts/travis_java_script.sh || exit -1
-    fi
-fi
--- a/src/README.md
+++ b/src/README.md
@@ -1,26 +0,0 @@
-Coding Guide
-======
-This file is intended to be notes about code structure in xgboost
-
-Project Logical Layout
-=======
-* Dependency order: io->learner->gbm->tree
-  - All module depends on data.h
-* tree are implementations of tree construction algorithms.
-* gbm is gradient boosting interface, that takes trees and other base learner to do boosting.
-  - gbm only takes gradient as sufficient statistics, it does not compute the gradient.
-* learner is learning module that computes gradient for specific object, and pass it to GBM
-
-File Naming Convention
-======= 
-* .h files are data structures and interface, which are needed to use functions in that layer.
-* -inl.hpp files are implementations of interface, like cpp file in most project.
-  - You only need to understand the interface file to understand the usage of that layer
-* In each folder, there can be a .cpp file, that compiles the module of that layer
-
-How to Hack the Code
-======
-* Add objective function: add to learner/objective-inl.hpp and register it in learner/objective.h ```CreateObjFunction``` 
-  - You can also directly do it in python
-* Add new evaluation metric: add to learner/evaluation-inl.hpp and register it in learner/evaluation.h ```CreateEvaluator``` 
-* Add wrapper for a new language, most likely you can do it by taking the functions in python/xgboost_wrapper.h, which is purely C based, and call these C functions to use xgboost
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -0,0 +1,528 @@
+// Copyright (c) 2014 by Contributors
+
+#include <xgboost/data.h>
+#include <xgboost/learner.h>
+#include <xgboost/c_api.h>
+#include <cstdio>
+#include <vector>
+#include <string>
+#include <cstring>
+#include <memory>
+
+#include "./c_api_error.h"
+#include "../data/simple_csr_source.h"
+#include "../common/thread_local.h"
+#include "../common/math.h"
+#include "../common/io.h"
+#include "../common/group_data.h"
+
+namespace xgboost {
+
+// booster wrapper for backward compatible reason.
+class Booster {
+ public:
+  explicit Booster(const std::vector<DMatrix*>& cache_mats)
+      : configured_(false),
+        initialized_(false),
+        learner_(Learner::Create(cache_mats)) {}
+
+  inline Learner* learner() {
+    return learner_.get();
+  }
+
+  inline void SetParam(const std::string& name, const std::string& val) {
+    cfg_.push_back(std::make_pair(name, val));
+    if (configured_) {
+      learner_->Configure(cfg_);
+    }
+  }
+
+  inline void LazyInit() {
+    if (!configured_) {
+      learner_->Configure(cfg_);
+      configured_ = true;
+    }
+    if (!initialized_) {
+      learner_->InitModel();
+      initialized_ = true;
+    }
+  }
+
+  inline void LoadModel(dmlc::Stream* fi) {
+    learner_->Load(fi);
+    initialized_ = true;
+  }
+
+ public:
+  bool configured_;
+  bool initialized_;
+  std::unique_ptr<Learner> learner_;
+  std::vector<std::pair<std::string, std::string> > cfg_;
+};
+}  // namespace xgboost
+
+using namespace xgboost; // NOLINT(*);
+
+/*! \brief entry to to easily hold returning information */
+struct XGBAPIThreadLocalEntry {
+  /*! \brief result holder for returning string */
+  std::string ret_str;
+  /*! \brief result holder for returning strings */
+  std::vector<std::string> ret_vec_str;
+  /*! \brief result holder for returning string pointers */
+  std::vector<const char *> ret_vec_charp;
+  /*! \brief returning float vector. */
+  std::vector<float> ret_vec_float;
+  /*! \brief temp variable of gradient pairs. */
+  std::vector<bst_gpair> tmp_gpair;
+};
+
+// define the threadlocal store.
+typedef xgboost::common::ThreadLocalStore<XGBAPIThreadLocalEntry> XGBAPIThreadLocalStore;
+
+int XGDMatrixCreateFromFile(const char *fname,
+                            int silent,
+                            DMatrixHandle *out) {
+  API_BEGIN();
+  *out = DMatrix::Load(
+      fname, silent != 0, false);
+  API_END();
+}
+
+int XGDMatrixCreateFromCSR(const bst_ulong* indptr,
+                           const unsigned *indices,
+                           const float* data,
+                           bst_ulong nindptr,
+                           bst_ulong nelem,
+                           DMatrixHandle* out) {
+  std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+
+  API_BEGIN();
+  data::SimpleCSRSource& mat = *source;
+  mat.row_ptr_.resize(nindptr);
+  for (bst_ulong i = 0; i < nindptr; ++i) {
+    mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
+  }
+  mat.row_data_.resize(nelem);
+  for (bst_ulong i = 0; i < nelem; ++i) {
+    mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]);
+    mat.info.num_col = std::max(mat.info.num_col,
+                                static_cast<uint64_t>(indices[i] + 1));
+  }
+  mat.info.num_row = nindptr - 1;
+  mat.info.num_nonzero = static_cast<uint64_t>(nelem);
+  *out  = DMatrix::Create(std::move(source));
+  API_END();
+}
+
+int XGDMatrixCreateFromCSC(const bst_ulong* col_ptr,
+                           const unsigned* indices,
+                           const float* data,
+                           bst_ulong nindptr,
+                           bst_ulong nelem,
+                           DMatrixHandle* out) {
+  std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+
+  API_BEGIN();
+  int nthread;
+  #pragma omp parallel
+  {
+    nthread = omp_get_num_threads();
+  }
+  data::SimpleCSRSource& mat = *source;
+  common::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
+  builder.InitBudget(0, nthread);
+  long ncol = static_cast<long>(nindptr - 1);  // NOLINT(*)
+  #pragma omp parallel for schedule(static)
+  for (long i = 0; i < ncol; ++i) {  // NOLINT(*)
+    int tid = omp_get_thread_num();
+    for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
+      builder.AddBudget(indices[j], tid);
+    }
+  }
+  builder.InitStorage();
+  #pragma omp parallel for schedule(static)
+  for (long i = 0; i < ncol; ++i) {  // NOLINT(*)
+    int tid = omp_get_thread_num();
+    for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
+      builder.Push(indices[j],
+                   RowBatch::Entry(static_cast<bst_uint>(i), data[j]),
+                   tid);
+    }
+}
+  mat.info.num_row = mat.row_ptr_.size() - 1;
+  mat.info.num_col = static_cast<uint64_t>(ncol);
+  mat.info.num_nonzero = nelem;
+  *out  = DMatrix::Create(std::move(source));
+  API_END();
+}
+
+int XGDMatrixCreateFromMat(const float* data,
+                           bst_ulong nrow,
+                           bst_ulong ncol,
+                           float  missing,
+                           DMatrixHandle* out) {
+  std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+
+  API_BEGIN();
+  data::SimpleCSRSource& mat = *source;
+  bool nan_missing = common::CheckNAN(missing);
+  mat.info.num_row = nrow;
+  mat.info.num_col = ncol;
+  for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
+    bst_ulong nelem = 0;
+    for (bst_ulong j = 0; j < ncol; ++j) {
+      if (common::CheckNAN(data[j])) {
+        CHECK(nan_missing)
+            << "There are NAN in the matrix, however, you did not set missing=NAN";
+      } else {
+        if (nan_missing || data[j] != missing) {
+          mat.row_data_.push_back(RowBatch::Entry(j, data[j]));
+          ++nelem;
+        }
+      }
+    }
+    mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
+  }
+  mat.info.num_nonzero = mat.row_data_.size();
+  *out  = DMatrix::Create(std::move(source));
+  API_END();
+}
+
+int XGDMatrixSliceDMatrix(DMatrixHandle handle,
+                          const int* idxset,
+                          bst_ulong len,
+                          DMatrixHandle* out) {
+  std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+
+  API_BEGIN();
+  data::SimpleCSRSource src;
+  src.CopyFrom(static_cast<DMatrix*>(handle));
+  data::SimpleCSRSource& ret = *source;
+
+  CHECK_EQ(src.info.group_ptr.size(), 0)
+      << "slice does not support group structure";
+
+  ret.Clear();
+  ret.info.num_row = len;
+  ret.info.num_col = src.info.num_col;
+
+  dmlc::DataIter<RowBatch>* iter = &src;
+  iter->BeforeFirst();
+  CHECK(iter->Next());
+
+  const RowBatch& batch = iter->Value();
+  for (bst_ulong i = 0; i < len; ++i) {
+    const int ridx = idxset[i];
+    RowBatch::Inst inst = batch[ridx];
+    CHECK_LT(static_cast<bst_ulong>(ridx), batch.size);
+    ret.row_data_.resize(ret.row_data_.size() + inst.length);
+    std::memcpy(dmlc::BeginPtr(ret.row_data_) + ret.row_ptr_.back(), inst.data,
+                sizeof(RowBatch::Entry) * inst.length);
+    ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
+    ret.info.num_nonzero += inst.length;
+
+    if (src.info.labels.size() != 0) {
+      ret.info.labels.push_back(src.info.labels[ridx]);
+    }
+    if (src.info.weights.size() != 0) {
+      ret.info.weights.push_back(src.info.weights[ridx]);
+    }
+    if (src.info.root_index.size() != 0) {
+      ret.info.root_index.push_back(src.info.root_index[ridx]);
+    }
+  }
+  *out  = DMatrix::Create(std::move(source));
+  API_END();
+}
+
+int XGDMatrixFree(DMatrixHandle handle) {
+  API_BEGIN();
+  delete static_cast<DMatrix*>(handle);
+  API_END();
+}
+
+int XGDMatrixSaveBinary(DMatrixHandle handle,
+                        const char* fname,
+                        int silent) {
+  API_BEGIN();
+  static_cast<DMatrix*>(handle)->SaveToLocalFile(fname);
+  API_END();
+}
+
+int XGDMatrixSetFloatInfo(DMatrixHandle handle,
+                          const char* field,
+                          const float* info,
+                          bst_ulong len) {
+  API_BEGIN();
+  static_cast<DMatrix*>(handle)->info().SetInfo(field, info, kFloat32, len);
+  API_END();
+}
+
+int XGDMatrixSetUIntInfo(DMatrixHandle handle,
+                         const char* field,
+                         const unsigned* info,
+                         bst_ulong len) {
+  API_BEGIN();
+  static_cast<DMatrix*>(handle)->info().SetInfo(field, info, kUInt32, len);
+  API_END();
+}
+
+int XGDMatrixSetGroup(DMatrixHandle handle,
+                      const unsigned* group,
+                      bst_ulong len) {
+  API_BEGIN();
+  DMatrix *pmat = static_cast<DMatrix*>(handle);
+  MetaInfo& info = pmat->info();
+  info.group_ptr.resize(len + 1);
+  info.group_ptr[0] = 0;
+  for (uint64_t i = 0; i < len; ++i) {
+    info.group_ptr[i + 1] = info.group_ptr[i] + group[i];
+  }
+  API_END();
+}
+
+int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
+                          const char* field,
+                          bst_ulong* out_len,
+                          const float** out_dptr) {
+  API_BEGIN();
+  const MetaInfo& info = static_cast<const DMatrix*>(handle)->info();
+  const std::vector<float>* vec = nullptr;
+  if (!std::strcmp(field, "label")) {
+    vec = &info.labels;
+  } else if (!std::strcmp(field, "weight")) {
+    vec = &info.weights;
+  } else if (!std::strcmp(field, "base_margin")) {
+    vec = &info.base_margin;
+  } else {
+    LOG(FATAL) << "Unknown float field name " << field;
+  }
+  *out_len = static_cast<bst_ulong>(vec->size());
+  *out_dptr = dmlc::BeginPtr(*vec);
+  API_END();
+}
+
+int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
+                         const char *field,
+                         bst_ulong *out_len,
+                         const unsigned **out_dptr) {
+  API_BEGIN();
+  const MetaInfo& info = static_cast<const DMatrix*>(handle)->info();
+  const std::vector<unsigned>* vec = nullptr;
+  if (!std::strcmp(field, "root_index")) {
+    vec = &info.root_index;
+  } else {
+    LOG(FATAL) << "Unknown uint field name " << field;
+  }
+  *out_len = static_cast<bst_ulong>(vec->size());
+  *out_dptr = dmlc::BeginPtr(*vec);
+  API_END();
+}
+
+int XGDMatrixNumRow(const DMatrixHandle handle,
+                    bst_ulong *out) {
+  API_BEGIN();
+  *out = static_cast<bst_ulong>(static_cast<const DMatrix*>(handle)->info().num_row);
+  API_END();
+}
+
+int XGDMatrixNumCol(const DMatrixHandle handle,
+                    bst_ulong *out) {
+  API_BEGIN();
+  *out = static_cast<size_t>(static_cast<const DMatrix*>(handle)->info().num_col);
+  API_END();
+}
+
+// xgboost implementation
+int XGBoosterCreate(DMatrixHandle dmats[],
+                    bst_ulong len,
+                    BoosterHandle *out) {
+  API_BEGIN();
+  std::vector<DMatrix*> mats;
+  for (bst_ulong i = 0; i < len; ++i) {
+    mats.push_back(static_cast<DMatrix*>(dmats[i]));
+  }
+  *out = new Booster(mats);
+  API_END();
+}
+
+int XGBoosterFree(BoosterHandle handle) {
+  API_BEGIN();
+  delete static_cast<Booster*>(handle);
+  API_END();
+}
+
+int XGBoosterSetParam(BoosterHandle handle,
+                      const char *name,
+                      const char *value) {
+  API_BEGIN();
+  static_cast<Booster*>(handle)->SetParam(name, value);
+  API_END();
+}
+
+int XGBoosterUpdateOneIter(BoosterHandle handle,
+                           int iter,
+                           DMatrixHandle dtrain) {
+  API_BEGIN();
+  Booster* bst = static_cast<Booster*>(handle);
+  DMatrix *dtr = static_cast<DMatrix*>(dtrain);
+
+  bst->LazyInit();
+  bst->learner()->UpdateOneIter(iter, dtr);
+  API_END();
+}
+
+int XGBoosterBoostOneIter(BoosterHandle handle,
+                          DMatrixHandle dtrain,
+                          float *grad,
+                          float *hess,
+                          bst_ulong len) {
+  std::vector<bst_gpair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair;
+  API_BEGIN();
+  Booster* bst = static_cast<Booster*>(handle);
+  DMatrix* dtr = static_cast<DMatrix*>(dtrain);
+  tmp_gpair.resize(len);
+  for (bst_ulong i = 0; i < len; ++i) {
+    tmp_gpair[i] = bst_gpair(grad[i], hess[i]);
+  }
+
+  bst->LazyInit();
+  bst->learner()->BoostOneIter(0, dtr, &tmp_gpair);
+  API_END();
+}
+
+int XGBoosterEvalOneIter(BoosterHandle handle,
+                         int iter,
+                         DMatrixHandle dmats[],
+                         const char* evnames[],
+                         bst_ulong len,
+                         const char** out_str) {
+  std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str;
+  API_BEGIN();
+  Booster* bst = static_cast<Booster*>(handle);
+  std::vector<DMatrix*> data_sets;
+  std::vector<std::string> data_names;
+
+  for (bst_ulong i = 0; i < len; ++i) {
+    data_sets.push_back(static_cast<DMatrix*>(dmats[i]));
+    data_names.push_back(std::string(evnames[i]));
+  }
+
+  bst->LazyInit();
+  eval_str = bst->learner()->EvalOneIter(iter, data_sets, data_names);
+  *out_str = eval_str.c_str();
+  API_END();
+}
+
+int XGBoosterPredict(BoosterHandle handle,
+                     DMatrixHandle dmat,
+                     int option_mask,
+                     unsigned ntree_limit,
+                     bst_ulong *len,
+                     const float **out_result) {
+  std::vector<float>& preds = XGBAPIThreadLocalStore::Get()->ret_vec_float;
+  API_BEGIN();
+  Booster *bst = static_cast<Booster*>(handle);
+  bst->LazyInit();
+  bst->learner()->Predict(
+      static_cast<DMatrix*>(dmat),
+      (option_mask & 1) != 0,
+      &preds, ntree_limit,
+      (option_mask & 2) != 0);
+  *out_result = dmlc::BeginPtr(preds);
+  *len = static_cast<bst_ulong>(preds.size());
+  API_END();
+}
+
+int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
+  API_BEGIN();
+  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname, "r"));
+  static_cast<Booster*>(handle)->LoadModel(fi.get());
+  API_END();
+}
+
+int XGBoosterSaveModel(BoosterHandle handle, const char* fname) {
+  API_BEGIN();
+  std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname, "w"));
+  Booster *bst = static_cast<Booster*>(handle);
+  bst->LazyInit();
+  bst->learner()->Save(fo.get());
+  API_END();
+}
+
+int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
+                                 const void* buf,
+                                 bst_ulong len) {
+  API_BEGIN();
+  common::MemoryFixSizeBuffer fs((void*)buf, len);  // NOLINT(*)
+  static_cast<Booster*>(handle)->LoadModel(&fs);
+  API_END();
+}
+
+int XGBoosterGetModelRaw(BoosterHandle handle,
+                         bst_ulong* out_len,
+                         const char** out_dptr) {
+  std::string& raw_str = XGBAPIThreadLocalStore::Get()->ret_str;
+  raw_str.resize(0);
+
+  API_BEGIN();
+  common::MemoryBufferStream fo(&raw_str);
+  Booster *bst = static_cast<Booster*>(handle);
+  bst->LazyInit();
+  bst->learner()->Save(&fo);
+  *out_dptr = dmlc::BeginPtr(raw_str);
+  *out_len = static_cast<bst_ulong>(raw_str.length());
+  API_END();
+}
+
+inline void XGBoostDumpModelImpl(
+    BoosterHandle handle,
+    const FeatureMap& fmap,
+    int with_stats,
+    bst_ulong* len,
+    const char*** out_models) {
+  std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
+  std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
+  Booster *bst = static_cast<Booster*>(handle);
+  bst->LazyInit();
+  str_vecs = bst->learner()->Dump2Text(fmap, with_stats != 0);
+  charp_vecs.resize(str_vecs.size());
+  for (size_t i = 0; i < str_vecs.size(); ++i) {
+    charp_vecs[i] = str_vecs[i].c_str();
+  }
+  *out_models = dmlc::BeginPtr(charp_vecs);
+  *len = static_cast<bst_ulong>(charp_vecs.size());
+}
+int XGBoosterDumpModel(BoosterHandle handle,
+                       const char* fmap,
+                       int with_stats,
+                       bst_ulong* len,
+                       const char*** out_models) {
+  API_BEGIN();
+  FeatureMap featmap;
+  if (strlen(fmap) != 0) {
+    std::unique_ptr<dmlc::Stream> fs(
+        dmlc::Stream::Create(fmap, "r"));
+    dmlc::istream is(fs.get());
+    featmap.LoadText(is);
+  }
+  XGBoostDumpModelImpl(handle, featmap, with_stats, len, out_models);
+  API_END();
+}
+
+int XGBoosterDumpModelWithFeatures(BoosterHandle handle,
+                                   int fnum,
+                                   const char** fname,
+                                   const char** ftype,
+                                   int with_stats,
+                                   bst_ulong* len,
+                                   const char*** out_models) {
+  API_BEGIN();
+  FeatureMap featmap;
+  for (int i = 0; i < fnum; ++i) {
+    featmap.PushBack(i, fname[i], ftype[i]);
+  }
+  XGBoostDumpModelImpl(handle, featmap, with_stats, len, out_models);
+  API_END();
+}
--- a/src/c_api/c_api_error.cc
+++ b/src/c_api/c_api_error.cc
@@ -0,0 +1,21 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file c_api_error.cc
+ * \brief C error handling
+ */
+#include "./c_api_error.h"
+#include "../common/thread_local.h"
+
+struct XGBAPIErrorEntry {
+  std::string last_error;
+};
+
+typedef xgboost::common::ThreadLocalStore<XGBAPIErrorEntry> XGBAPIErrorStore;
+
+const char *XGBGetLastError() {
+  return XGBAPIErrorStore::Get()->last_error.c_str();
+}
+
+void XGBAPISetLastError(const char* msg) {
+  XGBAPIErrorStore::Get()->last_error = msg;
+}
--- a/src/c_api/c_api_error.h
+++ b/src/c_api/c_api_error.h
@@ -0,0 +1,39 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file c_api_error.h
+ * \brief Error handling for C API.
+ */
+#ifndef XGBOOST_C_API_C_API_ERROR_H_
+#define XGBOOST_C_API_C_API_ERROR_H_
+
+#include <dmlc/base.h>
+#include <dmlc/logging.h>
+#include <xgboost/c_api.h>
+
+/*! \brief  macro to guard beginning and end section of all functions */
+#define API_BEGIN() try {
+/*! \brief every function starts with API_BEGIN();
+     and finishes with API_END() or API_END_HANDLE_ERROR */
+#define API_END() } catch(dmlc::Error &_except_) { return XGBAPIHandleException(_except_); } return 0;  // NOLINT(*)
+/*!
+ * \brief every function starts with API_BEGIN();
+ *   and finishes with API_END() or API_END_HANDLE_ERROR
+ *   The finally clause contains procedure to cleanup states when an error happens.
+ */
+#define API_END_HANDLE_ERROR(Finalize) } catch(dmlc::Error &_except_) { Finalize; return XGBAPIHandleException(_except_); } return 0; // NOLINT(*)
+
+/*!
+ * \brief Set the last error message needed by C API
+ * \param msg The error message to set.
+ */
+void XGBAPISetLastError(const char* msg);
+/*!
+ * \brief handle exception throwed out
+ * \param e the exception
+ * \return the return value of API after exception is handled
+ */
+inline int XGBAPIHandleException(const dmlc::Error &e) {
+  XGBAPISetLastError(e.what());
+  return -1;
+}
+#endif  // XGBOOST_C_API_C_API_ERROR_H_
--- a/src/cli_main.cc
+++ b/src/cli_main.cc
@@ -0,0 +1,352 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file cli_main.cc
+ * \brief The command line interface program of xgboost.
+ *  This file is not included in dynamic library.
+ */
+// Copyright 2014 by Contributors
+#define _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_DEPRECATE
+#define NOMINMAX
+
+#include <xgboost/learner.h>
+#include <xgboost/data.h>
+#include <xgboost/logging.h>
+#include <dmlc/timer.h>
+#include <iomanip>
+#include <ctime>
+#include <string>
+#include <cstdio>
+#include <cstring>
+#include <vector>
+#include "./common/sync.h"
+#include "./common/config.h"
+
+
+namespace xgboost {
+
+enum CLITask {
+  kTrain = 0,
+  kDump2Text = 1,
+  kPredict = 2
+};
+
+struct CLIParam : public dmlc::Parameter<CLIParam> {
+  /*! \brief the task name */
+  int task;
+  /*! \brief whether silent */
+  int silent;
+  /*! \brief whether evaluate training statistics */
+  bool eval_train;
+  /*! \brief number of boosting iterations */
+  int num_round;
+  /*! \brief the period to save the model, 0 means only save the final round model */
+  int save_period;
+  /*! \brief the path of training set */
+  std::string train_path;
+  /*! \brief path of test dataset */
+  std::string test_path;
+  /*! \brief the path of test model file, or file to restart training */
+  std::string model_in;
+  /*! \brief the path of final model file, to be saved */
+  std::string model_out;
+  /*! \brief the path of directory containing the saved models */
+  std::string model_dir;
+  /*! \brief name of predict file */
+  std::string name_pred;
+  /*! \brief data split mode */
+  int dsplit;
+  /*!\brief limit number of trees in prediction */
+  int ntree_limit;
+  /*!\brief whether to directly output margin value */
+  bool pred_margin;
+  /*! \brief whether dump statistics along with model */
+  int dump_stats;
+  /*! \brief name of feature map */
+  std::string name_fmap;
+  /*! \brief name of dump file */
+  std::string name_dump;
+  /*! \brief the paths of validation data sets */
+  std::vector<std::string> eval_data_paths;
+  /*! \brief the names of the evaluation data used in output log */
+  std::vector<std::string> eval_data_names;
+  /*! \brief all the configurations */
+  std::vector<std::pair<std::string, std::string> > cfg;
+
+  // declare parameters
+  DMLC_DECLARE_PARAMETER(CLIParam) {
+    // NOTE: declare everything except eval_data_paths.
+    DMLC_DECLARE_FIELD(task).set_default(kTrain)
+        .add_enum("train", kTrain)
+        .add_enum("dump", kDump2Text)
+        .add_enum("pred", kPredict)
+        .describe("Task to be performed by the CLI program.");
+    DMLC_DECLARE_FIELD(silent).set_default(0).set_range(0, 2)
+        .describe("Silent level during the task.");
+    DMLC_DECLARE_FIELD(eval_train).set_default(false)
+        .describe("Whether evaluate on training data during training.");
+    DMLC_DECLARE_FIELD(num_round).set_default(10).set_lower_bound(1)
+        .describe("Number of boosting iterations");
+    DMLC_DECLARE_FIELD(save_period).set_default(0).set_lower_bound(0)
+        .describe("The period to save the model, 0 means only save final model.");
+    DMLC_DECLARE_FIELD(train_path).set_default("NULL")
+        .describe("Training data path.");
+    DMLC_DECLARE_FIELD(test_path).set_default("NULL")
+        .describe("Test data path.");
+    DMLC_DECLARE_FIELD(model_in).set_default("NULL")
+        .describe("Input model path, if any.");
+    DMLC_DECLARE_FIELD(model_out).set_default("NULL")
+        .describe("Output model path, if any.");
+    DMLC_DECLARE_FIELD(model_dir).set_default("./")
+        .describe("Output directory of period checkpoint.");
+    DMLC_DECLARE_FIELD(name_pred).set_default("pred.txt")
+        .describe("Name of the prediction file.");
+    DMLC_DECLARE_FIELD(dsplit).set_default(0)
+        .add_enum("auto", 0)
+        .add_enum("col", 1)
+        .add_enum("row", 2)
+        .describe("Data split mode.");
+    DMLC_DECLARE_FIELD(ntree_limit).set_default(0).set_lower_bound(0)
+        .describe("Number of trees used for prediction, 0 means use all trees.");
+    DMLC_DECLARE_FIELD(pred_margin).set_default(false)
+        .describe("Whether to predict margin value instead of probability.");
+    DMLC_DECLARE_FIELD(dump_stats).set_default(false)
+        .describe("Whether dump the model statistics.");
+    DMLC_DECLARE_FIELD(name_fmap).set_default("NULL")
+        .describe("Name of the feature map file.");
+    DMLC_DECLARE_FIELD(name_dump).set_default("dump.txt")
+        .describe("Name of the output dump text file.");
+    // alias
+    DMLC_DECLARE_ALIAS(train_path, data);
+    DMLC_DECLARE_ALIAS(test_path, test:data);
+    DMLC_DECLARE_ALIAS(name_fmap, fmap);
+  }
+  // customized configure function of CLIParam
+  inline void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) {
+    this->cfg = cfg;
+    this->InitAllowUnknown(cfg);
+    for (const auto& kv : cfg) {
+      if (!strncmp("eval[", kv.first.c_str(), 5)) {
+        char evname[256];
+        CHECK_EQ(sscanf(kv.first.c_str(), "eval[%[^]]", evname), 1)
+            << "must specify evaluation name for display";
+        eval_data_names.push_back(std::string(evname));
+        eval_data_paths.push_back(kv.second);
+      }
+    }
+    // constraint.
+    if (name_pred == "stdout") {
+      save_period = 0;
+      silent = 1;
+    }
+    if (dsplit == 0 && rabit::IsDistributed()) {
+      dsplit = 2;
+    }
+    if (rabit::GetRank() != 0) {
+      silent = 2;
+    }
+  }
+};
+
+DMLC_REGISTER_PARAMETER(CLIParam);
+
+void CLITrain(const CLIParam& param) {
+  if (rabit::IsDistributed()) {
+    std::string pname = rabit::GetProcessorName();
+    LOG(CONSOLE) << "start " << pname << ":" << rabit::GetRank();
+  }
+  // load in data.
+  std::unique_ptr<DMatrix> dtrain(
+      DMatrix::Load(param.train_path, param.silent != 0, param.dsplit == 2));
+  std::vector<std::unique_ptr<DMatrix> > deval;
+  std::vector<DMatrix*> cache_mats, eval_datasets;
+  cache_mats.push_back(dtrain.get());
+  for (size_t i = 0; i < param.eval_data_names.size(); ++i) {
+    deval.emplace_back(
+        DMatrix::Load(param.eval_data_paths[i], param.silent != 0, param.dsplit == 2));
+    eval_datasets.push_back(deval.back().get());
+    cache_mats.push_back(deval.back().get());
+  }
+  std::vector<std::string> eval_data_names = param.eval_data_names;
+  if (param.eval_train) {
+    eval_datasets.push_back(dtrain.get());
+    eval_data_names.push_back(std::string("train"));
+  }
+  // initialize the learner.
+  std::unique_ptr<Learner> learner(Learner::Create(cache_mats));
+  learner->Configure(param.cfg);
+  int version = rabit::LoadCheckPoint(learner.get());
+  if (version == 0) {
+    // initializ the model if needed.
+    if (param.model_in != "NULL") {
+      std::unique_ptr<dmlc::Stream> fi(
+          dmlc::Stream::Create(param.model_in.c_str(), "r"));
+      learner->Load(fi.get());
+    } else {
+      learner->InitModel();
+    }
+  }
+  // start training.
+  const double start = dmlc::GetTime();
+  for (int i = version / 2; i < param.num_round; ++i) {
+    double elapsed = dmlc::GetTime() - start;
+    if (version % 2 == 0) {
+      if (param.silent == 0) {
+        LOG(CONSOLE) << "boosting round " << i << ", " << elapsed << " sec elapsed";
+      }
+      learner->UpdateOneIter(i, dtrain.get());
+      if (learner->AllowLazyCheckPoint()) {
+        rabit::LazyCheckPoint(learner.get());
+      } else {
+        rabit::CheckPoint(learner.get());
+      }
+      version += 1;
+    }
+    CHECK_EQ(version, rabit::VersionNumber());
+    std::string res = learner->EvalOneIter(i, eval_datasets, eval_data_names);
+    if (rabit::IsDistributed()) {
+      if (rabit::GetRank() == 0) {
+        LOG(TRACKER) << res;
+      }
+    } else {
+      if (param.silent < 2) {
+        LOG(CONSOLE) << res;
+      }
+    }
+    if (param.save_period != 0 && (i + 1) % param.save_period == 0) {
+      std::ostringstream os;
+      os << param.model_dir << '/'
+         << std::setfill('0') << std::setw(4)
+         << i + 1 << ".model";
+      std::unique_ptr<dmlc::Stream> fo(
+          dmlc::Stream::Create(os.str().c_str(), "w"));
+      learner->Save(fo.get());
+    }
+
+    if (learner->AllowLazyCheckPoint()) {
+      rabit::LazyCheckPoint(learner.get());
+    } else {
+      rabit::CheckPoint(learner.get());
+    }
+    version += 1;
+    CHECK_EQ(version, rabit::VersionNumber());
+  }
+  // always save final round
+  if ((param.save_period == 0 || param.num_round % param.save_period != 0) &&
+      param.model_out != "NONE") {
+    std::ostringstream os;
+    if (param.model_out == "NULL") {
+      os << param.model_dir << '/'
+         << std::setfill('0') << std::setw(4)
+         << param.num_round << ".model";
+    } else {
+      os << param.model_out;
+    }
+    std::unique_ptr<dmlc::Stream> fo(
+        dmlc::Stream::Create(os.str().c_str(), "w"));
+    learner->Save(fo.get());
+  }
+
+  if (param.silent == 0) {
+    double elapsed = dmlc::GetTime() - start;
+    LOG(CONSOLE) << "update end, " << elapsed << " sec in all";
+  }
+}
+
+void CLIDump2Text(const CLIParam& param) {
+  FeatureMap fmap;
+  if (param.name_fmap != "NULL") {
+    std::unique_ptr<dmlc::Stream> fs(
+        dmlc::Stream::Create(param.name_fmap.c_str(), "r"));
+    dmlc::istream is(fs.get());
+    fmap.LoadText(is);
+  }
+  // load model
+  CHECK_NE(param.model_in, "NULL")
+      << "Must specifiy model_in for dump";
+  std::unique_ptr<Learner> learner(Learner::Create({}));
+  std::unique_ptr<dmlc::Stream> fi(
+      dmlc::Stream::Create(param.model_in.c_str(), "r"));
+  learner->Load(fi.get());
+  // dump data
+  std::vector<std::string> dump = learner->Dump2Text(fmap, param.dump_stats);
+  std::unique_ptr<dmlc::Stream> fo(
+      dmlc::Stream::Create(param.name_dump.c_str(), "w"));
+  dmlc::ostream os(fo.get());
+  for (size_t i = 0; i < dump.size(); ++i) {
+    os << "booster[" << i << "]:\n";
+    os << dump[i];
+  }
+  // force flush before fo destruct.
+  os.set_stream(nullptr);
+}
+
+void CLIPredict(const CLIParam& param) {
+  CHECK_NE(param.test_path, "NULL")
+      << "Test dataset parameter test:data must be specified.";
+  // load data
+  std::unique_ptr<DMatrix> dtest(
+      DMatrix::Load(param.test_path, param.silent != 0, param.dsplit == 2));
+  // load model
+  CHECK_NE(param.model_in, "NULL")
+      << "Must specifiy model_in for dump";
+  std::unique_ptr<Learner> learner(Learner::Create({}));
+  std::unique_ptr<dmlc::Stream> fi(
+      dmlc::Stream::Create(param.model_in.c_str(), "r"));
+  learner->Load(fi.get());
+
+  if (param.silent == 0) {
+    LOG(CONSOLE) << "start prediction...";
+  }
+  std::vector<float> preds;
+  learner->Predict(dtest.get(), param.pred_margin, &preds, param.ntree_limit);
+  if (param.silent == 0) {
+    LOG(CONSOLE) << "writing prediction to " << param.name_pred;
+  }
+  std::unique_ptr<dmlc::Stream> fo(
+      dmlc::Stream::Create(param.name_pred.c_str(), "w"));
+  dmlc::ostream os(fo.get());
+  for (float p : preds) {
+    os << p << '\n';
+  }
+  // force flush before fo destruct.
+  os.set_stream(nullptr);
+}
+
+int CLIRunTask(int argc, char *argv[]) {
+  if (argc < 2) {
+    printf("Usage: <config>\n");
+    return 0;
+  }
+
+  std::vector<std::pair<std::string, std::string> > cfg;
+  cfg.push_back(std::make_pair("seed", "0"));
+
+  common::ConfigIterator itr(argv[1]);
+  while (itr.Next()) {
+    cfg.push_back(std::make_pair(std::string(itr.name()), std::string(itr.val())));
+  }
+
+  for (int i = 2; i < argc; ++i) {
+    char name[256], val[256];
+    if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
+      cfg.push_back(std::make_pair(std::string(name), std::string(val)));
+    }
+  }
+  CLIParam param;
+  param.Configure(cfg);
+
+  rabit::Init(argc, argv);
+  switch (param.task) {
+    case kTrain: CLITrain(param); break;
+    case kDump2Text: CLIDump2Text(param); break;
+    case kPredict: CLIPredict(param); break;
+  }
+  rabit::Finalize();
+  return 0;
+}
+}  // namespace xgboost
+
+int main(int argc, char *argv[]) {
+  return xgboost::CLIRunTask(argc, argv);
+}
--- a/src/utils/base64-inl.h
+++ b/src/utils/base64-inl.h
@@ -5,16 +5,17 @@
 * base64 is easier to store and pass as text format in mapreduce
 * \author Tianqi Chen
 */
-#ifndef XGBOOST_UTILS_BASE64_INL_H_
-#define XGBOOST_UTILS_BASE64_INL_H_
+#ifndef XGBOOST_COMMON_BASE64_H_
+#define XGBOOST_COMMON_BASE64_H_

+#include <xgboost/logging.h>
 #include <cctype>
 #include <cstdio>
 #include <string>
 #include "./io.h"

 namespace xgboost {
-namespace utils {
+namespace common {
 /*! \brief buffer reader of the stream that allows you to get */
 class StreamBufferReader {
 public:
@@ -26,7 +27,7 @@ class StreamBufferReader {
  /*!
   * \brief set input stream
   */
-  inline void set_stream(IStream *stream) {
+  inline void set_stream(dmlc::Stream *stream) {
    stream_ = stream;
    read_len_ = read_ptr_ = 1;
  }
@@ -51,7 +52,7 @@ class StreamBufferReader {

 private:
  /*! \brief the underlying stream */
-  IStream *stream_;
+  dmlc::Stream *stream_;
  /*! \brief buffer to hold data */
  std::string buffer_;
  /*! \brief length of valid data in buffer */
@@ -80,9 +81,9 @@ static const char EncodeTable[] =
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 }  // namespace base64
 /*! \brief the stream that reads from base64, note we take from file pointers */
-class Base64InStream: public IStream {
+class Base64InStream: public dmlc::Stream {
 public:
-  explicit Base64InStream(IStream *fs) : reader_(256) {
+  explicit Base64InStream(dmlc::Stream *fs) : reader_(256) {
    reader_.set_stream(fs);
    num_prev = 0; tmp_ch = 0;
  }
@@ -134,20 +135,22 @@ class Base64InStream: public IStream {
      nvalue = DecodeTable[tmp_ch] << 18;
      {
        // second byte
-        utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
-                     "invalid base64 format");
+        tmp_ch = reader_.GetChar();
+        CHECK(tmp_ch != EOF && !isspace(tmp_ch)) << "invalid base64 format";
        nvalue |= DecodeTable[tmp_ch] << 12;
        *cptr++ = (nvalue >> 16) & 0xFF; --tlen;
-      }
+        }
      {
        // third byte
-        utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
-                     "invalid base64 format");
+        tmp_ch = reader_.GetChar();
+        CHECK(tmp_ch != EOF && !isspace(tmp_ch)) << "invalid base64 format";
        // handle termination
        if (tmp_ch == '=') {
-          utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format");
-          utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
-                       "invalid base64 format");
+          tmp_ch = reader_.GetChar();
+          CHECK(tmp_ch == '=') << "invalid base64 format";
+          tmp_ch = reader_.GetChar();
+          CHECK(tmp_ch == EOF || isspace(tmp_ch))
+              << "invalid base64 format";
          break;
        }
        nvalue |= DecodeTable[tmp_ch] << 6;
@@ -159,11 +162,13 @@ class Base64InStream: public IStream {
      }
      {
        // fourth byte
-        utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
-                     "invalid base64 format");
+        tmp_ch = reader_.GetChar();
+        CHECK(tmp_ch != EOF && !isspace(tmp_ch))
+            << "invalid base64 format";
        if (tmp_ch == '=') {
-          utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
-                       "invalid base64 format");
+          tmp_ch = reader_.GetChar();
+          CHECK(tmp_ch == EOF || isspace(tmp_ch))
+              << "invalid base64 format";
          break;
        }
        nvalue |= DecodeTable[tmp_ch];
@@ -177,12 +182,12 @@ class Base64InStream: public IStream {
      tmp_ch = reader_.GetChar();
    }
    if (kStrictCheck) {
-      utils::Check(tlen == 0, "Base64InStream: read incomplete");
+      CHECK_EQ(tlen, 0) << "Base64InStream: read incomplete";
    }
    return size - tlen;
  }
  virtual void Write(const void *ptr, size_t size) {
-    utils::Error("Base64InStream do not support write");
+    LOG(FATAL) << "Base64InStream do not support write";
  }

 private:
@@ -194,9 +199,9 @@ class Base64InStream: public IStream {
  static const bool kStrictCheck = false;
 };
 /*! \brief the stream that write to base64, note we take from file pointers */
-class Base64OutStream: public IStream {
+class Base64OutStream: public dmlc::Stream {
 public:
-  explicit Base64OutStream(IStream *fp) : fp(fp) {
+  explicit Base64OutStream(dmlc::Stream *fp) : fp(fp) {
    buf_top = 0;
  }
  virtual void Write(const void *ptr, size_t size) {
@@ -218,7 +223,7 @@ class Base64OutStream: public IStream {
    }
  }
  virtual size_t Read(void *ptr, size_t size) {
-    utils::Error("Base64OutStream do not support read");
+    LOG(FATAL) << "Base64OutStream do not support read";
    return 0;
  }
  /*!
@@ -245,7 +250,7 @@ class Base64OutStream: public IStream {
  }

 private:
-  IStream *fp;
+  dmlc::Stream *fp;
  int buf_top;
  unsigned char buf[4];
  std::string out_buf;
@@ -262,6 +267,6 @@ class Base64OutStream: public IStream {
    }
  }
 };
-}  // namespace utils
+}  // namespace common
 }  // namespace xgboost
-#endif  // XGBOOST_UTILS_BASE64_INL_H_
+#endif  // XGBOOST_COMMON_BASE64_H_
--- a/src/common/bitmap.h
+++ b/src/common/bitmap.h
@@ -5,15 +5,14 @@
 *  NOTE: bitmap is only threadsafe per word access, remember this when using bitmap
 * \author Tianqi Chen
 */
-#ifndef XGBOOST_UTILS_BITMAP_H_
-#define XGBOOST_UTILS_BITMAP_H_
+#ifndef XGBOOST_COMMON_BITMAP_H_
+#define XGBOOST_COMMON_BITMAP_H_

+#include <dmlc/omp.h>
 #include <vector>
-#include "./utils.h"
-#include "./omp.h"

 namespace xgboost {
-namespace utils {
+namespace common {
 /*! \brief bit map that contains set of bit indicators */
 struct BitMap {
  /*! \brief internal data structure */
@@ -40,7 +39,7 @@ struct BitMap {
    data[i >> 5] |= (1 << (i & 31U));
  }
  /*! \brief initialize the value of bit map from vector of bool*/
-  inline void InitFromBool(const std::vector<int> &vec) {
+  inline void InitFromBool(const std::vector<int>& vec) {
    this->Resize(vec.size());
    // parallel over the full cases
    bst_omp_uint nsize = static_cast<bst_omp_uint>(vec.size() / 32);
@@ -59,10 +58,10 @@ struct BitMap {
    }
  }
  /*! \brief clear the bitmap, set all places to false */
-  inline void Clear(void) {
+  inline void Clear() {
    std::fill(data.begin(), data.end(), 0U);
  }
 };
-}  // namespace utils
+}  // namespace common
 }  // namespace xgboost
-#endif  // XGBOOST_UTILS_BITMAP_H_
+#endif  // XGBOOST_COMMON_BITMAP_H_
--- a/src/common/common.cc
+++ b/src/common/common.cc
@@ -0,0 +1,15 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file common.cc
+ * \brief Enable all kinds of global variables in common.
+ */
+#include "./random.h"
+
+namespace xgboost {
+namespace common {
+GlobalRandomEngine& GlobalRandom() {
+  static GlobalRandomEngine inst;
+  return inst;
+}
+}
+}  // namespace xgboost
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -4,18 +4,17 @@
 * \brief helper class to load in configures from file
 * \author Tianqi Chen
 */
-#ifndef XGBOOST_UTILS_CONFIG_H_
-#define XGBOOST_UTILS_CONFIG_H_
+#ifndef XGBOOST_COMMON_CONFIG_H_
+#define XGBOOST_COMMON_CONFIG_H_

 #include <cstdio>
 #include <cstring>
 #include <string>
 #include <istream>
 #include <fstream>
-#include "./utils.h"

 namespace xgboost {
-namespace utils {
+namespace common {
 /*!
 * \brief base implementation of config reader
 */
@@ -79,11 +78,11 @@ class ConfigReaderBase {
        case '\\': *tok += this->GetChar(); break;
        case '\"': return;
        case '\r':
-        case '\n': Error("ConfigReader: unterminated string");
+        case '\n': LOG(FATAL)<< "ConfigReader: unterminated string";
        default: *tok += ch_buf;
      }
    }
-    Error("ConfigReader: unterminated string");
+    LOG(FATAL) << "ConfigReader: unterminated string";
  }
  inline void ParseStrML(std::string *tok) {
    while ((ch_buf = this->GetChar()) != EOF) {
@@ -93,7 +92,7 @@ class ConfigReaderBase {
        default: *tok += ch_buf;
      }
    }
-    Error("unterminated string");
+    LOG(FATAL) << "unterminated string";
  }
  // return newline
  inline bool GetNextToken(std::string *tok) {
@@ -106,13 +105,13 @@ class ConfigReaderBase {
          if (tok->length() == 0) {
            ParseStr(tok); ch_buf = this->GetChar(); return new_line;
          } else {
-            Error("ConfigReader: token followed directly by string");
+            LOG(FATAL) << "ConfigReader: token followed directly by string";
          }
        case '\'':
          if (tok->length() == 0) {
            ParseStrML(tok); ch_buf = this->GetChar(); return new_line;
          } else {
-            Error("ConfigReader: token followed directly by string");
+            LOG(FATAL) << "ConfigReader: token followed directly by string";
          }
        case '=':
          if (tok->length() == 0) {
@@ -148,7 +147,7 @@ class ConfigStreamReader: public ConfigReaderBase {
 public:
  /*!
   * \brief constructor
-   * \param istream input stream
+   * \param fin istream input stream
   */
  explicit ConfigStreamReader(std::istream &fin) : fin(fin) {}

@@ -177,7 +176,7 @@ class ConfigIterator: public ConfigStreamReader {
  explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) {
    fi.open(fname);
    if (fi.fail()) {
-      utils::Error("cannot open file %s", fname);
+      LOG(FATAL) << "cannot open file " << fname;
    }
    ConfigReaderBase::Init();
  }
@@ -189,6 +188,6 @@ class ConfigIterator: public ConfigStreamReader {
 private:
  std::ifstream fi;
 };
-}  // namespace utils
+}  // namespace common
 }  // namespace xgboost
-#endif  // XGBOOST_UTILS_CONFIG_H_
+#endif  // XGBOOST_COMMON_CONFIG_H_
--- a/src/common/group_data.h
+++ b/src/common/group_data.h
@@ -11,13 +11,13 @@
 * The major algorithm is a two pass linear scan algorithm that requires two pass scan over the data
 * \author Tianqi Chen
 */
-#ifndef XGBOOST_UTILS_GROUP_DATA_H_
-#define XGBOOST_UTILS_GROUP_DATA_H_
+#ifndef XGBOOST_COMMON_GROUP_DATA_H_
+#define XGBOOST_COMMON_GROUP_DATA_H_

 #include <vector>

 namespace xgboost {
-namespace utils {
+namespace common {
 /*!
 * \brief multi-thread version of group builder
 * \tparam ValueType type of entries in the sparse matrix
@@ -91,7 +91,8 @@ struct ParallelGroupBuilder {
   * \brief step 4: add data to the allocated space,
   *   the calls to this function should be exactly match previous call to AddBudget
   *
-   * \param key the key of
+   * \param key the key of group.
+   * \param value The value to be pushed to the group.
   * \param threadid the id of thread that calls this function
   */
  inline void Push(size_t key, ValueType value, int threadid) {
@@ -105,10 +106,10 @@ struct ParallelGroupBuilder {
  /*! \brief index of nonzero entries in each row */
  std::vector<ValueType> &data;
  /*! \brief thread local data structure */
-  std::vector< std::vector<SizeType> > &thread_rptr;
+  std::vector<std::vector<SizeType> > &thread_rptr;
  /*! \brief local temp thread ptr, use this if not specified by the constructor */
-  std::vector< std::vector<SizeType> > tmp_thread_rptr;
+  std::vector<std::vector<SizeType> > tmp_thread_rptr;
 };
-}  // namespace utils
+}  // namespace common
 }  // namespace xgboost
-#endif  // XGBOOST_UTILS_GROUP_DATA_H_
+#endif  // XGBOOST_COMMON_GROUP_DATA_H_
--- a/src/common/io.h
+++ b/src/common/io.h
@@ -0,0 +1,75 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file io.h
+ * \brief general stream interface for serialization, I/O
+ * \author Tianqi Chen
+ */
+
+#ifndef XGBOOST_COMMON_IO_H_
+#define XGBOOST_COMMON_IO_H_
+
+#include <dmlc/io.h>
+#include <string>
+#include <cstring>
+#include "./sync.h"
+
+namespace xgboost {
+namespace common {
+typedef rabit::utils::MemoryFixSizeBuffer MemoryFixSizeBuffer;
+typedef rabit::utils::MemoryBufferStream MemoryBufferStream;
+
+/*!
+ * \brief Input stream that support additional PeekRead
+ *  operation, besides read.
+ */
+class PeekableInStream : public dmlc::Stream {
+ public:
+  explicit PeekableInStream(dmlc::Stream* strm)
+      : strm_(strm), buffer_ptr_(0) {}
+
+  size_t Read(void* dptr, size_t size) override {
+    size_t nbuffer = buffer_.length() - buffer_ptr_;
+    if (nbuffer == 0) return strm_->Read(dptr, size);
+    if (nbuffer < size) {
+      std::memcpy(dptr, dmlc::BeginPtr(buffer_) + buffer_ptr_, nbuffer);
+      buffer_ptr_ += nbuffer;
+      return nbuffer + strm_->Read(reinterpret_cast<char*>(dptr) + nbuffer,
+                                   size - nbuffer);
+    } else {
+      std::memcpy(dptr, dmlc::BeginPtr(buffer_) + buffer_ptr_, size);
+      buffer_ptr_ += size;
+      return size;
+    }
+  }
+
+  size_t PeekRead(void* dptr, size_t size) {
+    size_t nbuffer = buffer_.length() - buffer_ptr_;
+    if (nbuffer < size) {
+      buffer_ = buffer_.substr(buffer_ptr_, buffer_.length());
+      buffer_ptr_ = 0;
+      buffer_.resize(size);
+      size_t nadd = strm_->Read(dmlc::BeginPtr(buffer_) + nbuffer, size - nbuffer);
+      buffer_.resize(nbuffer + nadd);
+      std::memcpy(dptr, dmlc::BeginPtr(buffer_), buffer_.length());
+      return buffer_.length();
+    } else {
+      std::memcpy(dptr, dmlc::BeginPtr(buffer_) + buffer_ptr_, size);
+      return size;
+    }
+  }
+
+  void Write(const void* dptr, size_t size) override {
+    LOG(FATAL) << "Not implemented";
+  }
+
+ private:
+  /*! \brief input stream */
+  dmlc::Stream *strm_;
+  /*! \brief current buffer pointer */
+  size_t buffer_ptr_;
+  /*! \brief internal buffer */
+  std::string buffer_;
+};
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_IO_H_
--- a/src/common/math.h
+++ b/src/common/math.h
@@ -0,0 +1,137 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file math.h
+ * \brief additional math utils
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_COMMON_MATH_H_
+#define XGBOOST_COMMON_MATH_H_
+
+#include <utility>
+#include <vector>
+#include <cmath>
+#include <algorithm>
+
+namespace xgboost {
+namespace common {
+/*!
+ * \brief calculate the sigmoid of the input.
+ * \param x input parameter
+ * \return the transformed value.
+ */
+inline float Sigmoid(float x) {
+  return 1.0f / (1.0f + std::exp(-x));
+}
+
+/*!
+ * \brief do inplace softmax transformaton on p_rec
+ * \param p_rec the input/output vector of the values.
+ */
+inline void Softmax(std::vector<float>* p_rec) {
+  std::vector<float> &rec = *p_rec;
+  float wmax = rec[0];
+  for (size_t i = 1; i < rec.size(); ++i) {
+    wmax = std::max(rec[i], wmax);
+  }
+  double wsum = 0.0f;
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] = std::exp(rec[i] - wmax);
+    wsum += rec[i];
+  }
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] /= static_cast<float>(wsum);
+  }
+}
+
+/*!
+ * \brief Find the maximum iterator within the iterators
+ * \param begin The begining iterator.
+ * \param end The end iterator.
+ * \return the iterator point to the maximum value.
+ * \tparam Iterator The type of the iterator.
+ */
+template<typename Iterator>
+inline Iterator FindMaxIndex(Iterator begin, Iterator end) {
+  Iterator maxit = begin;
+  for (Iterator it = begin; it != end; ++it) {
+    if (*it > *maxit) maxit = it;
+  }
+  return maxit;
+}
+
+/*!
+ * \brief perform numerically safe logsum
+ * \param x left input operand
+ * \param y right input operand
+ * \return  log(exp(x) + exp(y))
+ */
+inline float LogSum(float x, float y) {
+  if (x < y) {
+    return y + std::log(std::exp(x - y) + 1.0f);
+  } else {
+    return x + std::log(std::exp(y - x) + 1.0f);
+  }
+}
+
+/*!
+ * \brief perform numerically safe logsum
+ * \param begin The begining iterator.
+ * \param end The end iterator.
+ * \return the iterator point to the maximum value.
+ * \tparam Iterator The type of the iterator.
+ */
+template<typename Iterator>
+inline float LogSum(Iterator begin, Iterator end) {
+  float mx = *begin;
+  for (Iterator it = begin; it != end; ++it) {
+    mx = std::max(mx, *it);
+  }
+  float sum = 0.0f;
+  for (Iterator it = begin; it != end; ++it) {
+    sum += std::exp(*it - mx);
+  }
+  return mx + std::log(sum);
+}
+
+// comparator functions for sorting pairs in descending order
+inline static bool CmpFirst(const std::pair<float, unsigned> &a,
+                            const std::pair<float, unsigned> &b) {
+  return a.first > b.first;
+}
+inline static bool CmpSecond(const std::pair<float, unsigned> &a,
+                             const std::pair<float, unsigned> &b) {
+  return a.second > b.second;
+}
+
+#if XGBOOST_STRICT_R_MODE
+// check nan
+bool CheckNAN(double v);
+double LogGamma(double v);
+#else
+template<typename T>
+inline bool CheckNAN(T v) {
+#ifdef _MSC_VER
+  return (_isnan(v) != 0);
+#else
+  return std::isnan(v);
+#endif
+}
+template<typename T>
+inline T LogGamma(T v) {
+#ifdef _MSC_VER
+#if _MSC_VER >= 1800
+  return lgamma(v);
+#else
+#pragma message("Warning: lgamma function was not available until VS2013"\
+                ", poisson regression will be disabled")
+  utils::Error("lgamma function was not available until VS2013");
+  return static_cast<T>(1.0);
+#endif
+#else
+  return lgamma(v);
+#endif
+}
+#endif  // XGBOOST_STRICT_R_MODE_
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_MATH_H_
--- a/src/common/quantile.h
+++ b/src/common/quantile.h
@@ -4,19 +4,19 @@
 * \brief util to compute quantiles
 * \author Tianqi Chen
 */
-#ifndef XGBOOST_UTILS_QUANTILE_H_
-#define XGBOOST_UTILS_QUANTILE_H_
+#ifndef XGBOOST_COMMON_QUANTILE_H_
+#define XGBOOST_COMMON_QUANTILE_H_

+#include <dmlc/base.h>
+#include <xgboost/logging.h>
 #include <cmath>
 #include <vector>
 #include <cstring>
 #include <algorithm>
 #include <iostream>
-#include "./io.h"
-#include "./utils.h"

 namespace xgboost {
-namespace utils {
+namespace common {
 /*!
 * \brief experimental wsummary
 * \tparam DType type of data content
@@ -35,7 +35,7 @@ struct WQSummary {
    /*! \brief the value of data */
    DType value;
    // constructor
-    Entry(void) {}
+    Entry() {}
    // constructor
    Entry(RType rmin, RType rmax, RType wmin, DType value)
        : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
@@ -44,15 +44,15 @@ struct WQSummary {
     * \param eps the tolerate level for violating the relation
     */
    inline void CheckValid(RType eps = 0) const {
-      utils::Assert(rmin >= 0 && rmax >= 0 && wmin >= 0, "nonneg constraint");
-      utils::Assert(rmax- rmin - wmin > -eps, "relation constraint: min/max");
+      CHECK(rmin >= 0 && rmax >= 0 && wmin >= 0) << "nonneg constraint";
+      CHECK(rmax- rmin - wmin > -eps) <<  "relation constraint: min/max";
    }
    /*! \return rmin estimation for v strictly bigger than value */
-    inline RType rmin_next(void) const {
+    inline RType rmin_next() const {
      return rmin + wmin;
    }
    /*! \return rmax estimation for v strictly smaller than value */
-    inline RType rmax_prev(void) const {
+    inline RType rmax_prev() const {
      return rmax - wmin;
    }
  };
@@ -65,7 +65,7 @@ struct WQSummary {
      // weight of instance
      RType weight;
      // default constructor
-      QEntry(void) {}
+      QEntry() {}
      // constructor
      QEntry(DType value, RType weight)
          : value(value), weight(weight) {}
@@ -113,7 +113,7 @@ struct WQSummary {
  /*!
   * \return the maximum error of the Summary
   */
-  inline RType MaxError(void) const {
+  inline RType MaxError() const {
    RType res = data[0].rmax - data[0].rmin - data[0].wmin;
    for (size_t i = 1; i < size; ++i) {
      res = std::max(data[i].rmax_prev() - data[i - 1].rmin_next(), res);
@@ -147,7 +147,7 @@ struct WQSummary {
    }
  }
  /*! \return maximum rank in the summary */
-  inline RType MaxRank(void) const {
+  inline RType MaxRank() const {
    return data[size - 1].rmax;
  }
  /*!
@@ -168,8 +168,8 @@ struct WQSummary {
    for (size_t i = 0; i < size; ++i) {
      data[i].CheckValid(eps);
      if (i != 0) {
-        utils::Assert(data[i].rmin >= data[i - 1].rmin + data[i - 1].wmin, "rmin range constraint");
-        utils::Assert(data[i].rmax >= data[i - 1].rmax + data[i].wmin, "rmax range constraint");
+        CHECK(data[i].rmin >= data[i - 1].rmin + data[i - 1].wmin) << "rmin range constraint";
+        CHECK(data[i].rmax >= data[i - 1].rmax + data[i].wmin) << "rmax range constraint";
      }
    }
  }
@@ -196,7 +196,7 @@ struct WQSummary {
      // find first i such that  d < (rmax[i+1] + rmin[i+1]) / 2
      while (i < src.size - 1
             && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
-      utils::Assert(i != src.size - 1, "this cannot happen");
+      CHECK(i != src.size - 1);
      if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
        if (i != lastidx) {
          data[size++] = src.data[i]; lastidx = i;
@@ -224,7 +224,7 @@ struct WQSummary {
    if (sb.size == 0) {
      this->CopyFrom(sa); return;
    }
-    utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
+    CHECK(sa.size > 0 && sb.size > 0);
    const Entry *a = sa.data, *a_end = sa.data + sa.size;
    const Entry *b = sb.data, *b_end = sb.data + sb.size;
    // extended rmin value
@@ -272,18 +272,19 @@ struct WQSummary {
    RType err_mingap, err_maxgap, err_wgap;
    this->FixError(&err_mingap, &err_maxgap, &err_wgap);
    if (err_mingap > tol || err_maxgap > tol || err_wgap > tol) {
-      utils::Printf("INFO: mingap=%g, maxgap=%g, wgap=%g\n",
-                    err_mingap, err_maxgap, err_wgap);
+      LOG(INFO) << "mingap=" << err_mingap
+                << ", maxgap=" << err_maxgap
+                << ", wgap=" << err_wgap;
    }
-
-    utils::Assert(size <= sa.size + sb.size, "bug in combine");
+    CHECK(size <= sa.size + sb.size) << "bug in combine";
  }
  // helper function to print the current content of sketch
  inline void Print() const {
    for (size_t i = 0; i < this->size; ++i) {
-      utils::Printf("[%lu] rmin=%g, rmax=%g, wmin=%g, v=%g\n",
-                    i, data[i].rmin, data[i].rmax,
-                    data[i].wmin, data[i].value);
+      LOG(INFO) << "[" << i << "] rmin=" << data[i].rmin
+                << ", rmax=" << data[i].rmax
+                << ", wmin=" << data[i].wmin
+                << ", v=" << data[i].value;
    }
  }
  // try to fix rounding error
@@ -320,7 +321,7 @@ struct WQSummary {
    for (size_t i = 0; i < this->size; ++i) {
      if (data[i].rmin + data[i].wmin > data[i].rmax + tol ||
          data[i].rmin < -1e-6f || data[i].rmax < -1e-6f) {
-        utils::Printf("----%s: Check not Pass------\n", msg);
+        LOG(INFO) << "----------check not pass----------";
        this->Print();
        return false;
      }
@@ -380,12 +381,11 @@ struct WXQSummary : public WQSummary<DType, RType> {
    }
    if (nbig >= n - 1) {
      // see what was the case
-      utils::Printf("LOG: check quantile stats, nbig=%lu, n=%lu\n", nbig, n);
-      utils::Printf("LOG: srcsize=%lu, maxsize=%lu, range=%g, chunk=%g\n",
-                    src.size, maxsize, static_cast<double>(range),
-                    static_cast<double>(chunk));
+      LOG(INFO) << " check quantile stats, nbig=" << nbig << ", n=" << n;
+      LOG(INFO) << " srcsize=" << src.size << ", maxsize=" << maxsize
+                << ", range=" << range << ", chunk=" << chunk;
      src.Print();
-      utils::Assert(nbig < n - 1, "quantile: too many large chunk");
+      CHECK(nbig < n - 1) << "quantile: too many large chunk";
    }
    this->data[0] = src.data[0];
    this->size = 1;
@@ -440,7 +440,7 @@ struct GKSummary {
    /*! \brief the value of data */
    DType value;
    // constructor
-    Entry(void) {}
+    Entry() {}
    // constructor
    Entry(RType rmin, RType rmax, DType value)
        : rmin(rmin), rmax(rmax), value(value) {}
@@ -470,7 +470,7 @@ struct GKSummary {
  GKSummary(Entry *data, size_t size)
      : data(data), size(size) {}
  /*! \brief the maximum error of the summary */
-  inline RType MaxError(void) const {
+  inline RType MaxError() const {
    RType res = 0;
    for (size_t i = 1; i < size; ++i) {
      res = std::max(data[i].rmax - data[i-1].rmin, res);
@@ -478,7 +478,7 @@ struct GKSummary {
    return res;
  }
  /*! \return maximum rank in the summary */
-  inline RType MaxRank(void) const {
+  inline RType MaxRank() const {
    return data[size - 1].rmax;
  }
  /*!
@@ -493,7 +493,7 @@ struct GKSummary {
    // assume always valid
  }
  /*! \brief used for debug purpose, print the summary */
-  inline void Print(void) const {
+  inline void Print() const {
    for (size_t i = 0; i < size; ++i) {
      std::cout << "x=" << data[i].value << "\t"
                << "[" << data[i].rmin << "," << data[i].rmax << "]"
@@ -536,7 +536,7 @@ struct GKSummary {
    if (sb.size == 0) {
      this->CopyFrom(sa); return;
    }
-    utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
+    CHECK(sa.size > 0 && sb.size > 0) << "invalid input for merge";
    const Entry *a = sa.data, *a_end = sa.data + sa.size;
    const Entry *b = sb.data, *b_end = sb.data + sb.size;
    this->size = sa.size + sb.size;
@@ -569,7 +569,7 @@ struct GKSummary {
        ++dst; ++b;
      } while (b != b_end);
    }
-    utils::Assert(dst == data + size, "bug in combine");
+    CHECK(dst == data + size) << "bug in combine";
  }
 };

@@ -592,15 +592,15 @@ class QuantileSketchTemplate {
    std::vector<Entry> space;
    SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
      this->space = src.space;
-      this->data = BeginPtr(this->space);
+      this->data = dmlc::BeginPtr(this->space);
    }
-    SummaryContainer(void) : Summary(NULL, 0) {
+    SummaryContainer() : Summary(NULL, 0) {
    }
    /*! \brief reserve space for summary */
    inline void Reserve(size_t size) {
      if (size > space.size()) {
        space.resize(size);
-        this->data = BeginPtr(space);
+        this->data = dmlc::BeginPtr(space);
      }
    }
    /*!
@@ -610,7 +610,7 @@ class QuantileSketchTemplate {
     */
    inline void SetMerge(const Summary *begin,
                         const Summary *end) {
-      utils::Assert(begin < end, "can not set combine to empty instance");
+      CHECK(begin < end) << "can not set combine to empty instance";
      size_t len = end - begin;
      if (len == 1) {
        this->Reserve(begin[0].size);
@@ -631,7 +631,7 @@ class QuantileSketchTemplate {
     * \brief do elementwise combination of summary array
     *        this[i] = combine(this[i], src[i]) for each i
     * \param src the source summary
-     * \param max_nbyte, maximum number of byte allowed in here
+     * \param max_nbyte maximum number of byte allowed in here
     */
    inline void Reduce(const Summary &src, size_t max_nbyte) {
      this->Reserve((max_nbyte - sizeof(this->size)) / sizeof(Entry));
@@ -655,11 +655,11 @@ class QuantileSketchTemplate {
    /*! \brief load data structure from input stream */
    template<typename TStream>
    inline void Load(TStream &fi) {  // NOLINT(*)
-      utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1");
+      CHECK_EQ(fi.Read(&this->size, sizeof(this->size)), sizeof(this->size));
      this->Reserve(this->size);
      if (this->size != 0) {
-        utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0,
-                     "invalid SummaryArray 2");
+        CHECK_EQ(fi.Read(this->data, this->size * sizeof(Entry)),
+                 this->size * sizeof(Entry));
      }
    }
  };
@@ -678,8 +678,8 @@ class QuantileSketchTemplate {
    }
    // check invariant
    size_t n = (1UL << nlevel);
-    utils::Assert(n * limit_size >= maxn, "invalid init parameter");
-    utils::Assert(nlevel <= limit_size * eps, "invalid init parameter");
+    CHECK(n * limit_size >= maxn) << "invalid init parameter";
+    CHECK(nlevel <= limit_size * eps) << "invalid init parameter";
    // lazy reserve the space, if there is only one value, no need to allocate space
    inqueue.queue.resize(1);
    inqueue.qtail = 0;
@@ -688,7 +688,8 @@ class QuantileSketchTemplate {
  }
  /*!
   * \brief add an element to a sketch
-   * \param x the element added to the sketch
+   * \param x The element added to the sketch
+   * \param w The weight of the element.
   */
  inline void Push(DType x, RType w = 1) {
    if (w == static_cast<RType>(0)) return;
@@ -707,7 +708,7 @@ class QuantileSketchTemplate {
    inqueue.Push(x, w);
  }
  /*! \brief push up temp */
-  inline void PushTemp(void) {
+  inline void PushTemp() {
    temp.Reserve(limit_size * 2);
    for (size_t l = 1; true; ++l) {
      this->InitLevel(l + 1);
@@ -769,7 +770,7 @@ class QuantileSketchTemplate {
    data.resize(limit_size * nlevel);
    level.resize(nlevel, Summary(NULL, 0));
    for (size_t l = 0; l < level.size(); ++l) {
-      level[l].data = BeginPtr(data) + l * limit_size;
+      level[l].data = dmlc::BeginPtr(data) + l * limit_size;
    }
  }
  // input data queue
@@ -793,7 +794,7 @@ class QuantileSketchTemplate {
 */
 template<typename DType, typename RType = unsigned>
 class WQuantileSketch :
-      public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> >{
+      public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> > {
 };

 /*!
@@ -803,7 +804,7 @@ class WQuantileSketch :
 */
 template<typename DType, typename RType = unsigned>
 class WXQuantileSketch :
-      public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> >{
+      public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> > {
 };
 /*!
 * \brief Quantile sketch use WQSummary
@@ -812,9 +813,8 @@ class WXQuantileSketch :
 */
 template<typename DType, typename RType = unsigned>
 class GKQuantileSketch :
-      public QuantileSketchTemplate<DType, RType, GKSummary<DType, RType> >{
+      public QuantileSketchTemplate<DType, RType, GKSummary<DType, RType> > {
 };
-
-}  // namespace utils
+}  // namespace common
 }  // namespace xgboost
-#endif  // XGBOOST_UTILS_QUANTILE_H_
+#endif  // XGBOOST_COMMON_QUANTILE_H_
--- a/src/common/random.h
+++ b/src/common/random.h
@@ -0,0 +1,70 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file random.h
+ * \brief Utility related to random.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_COMMON_RANDOM_H_
+#define XGBOOST_COMMON_RANDOM_H_
+
+#include <random>
+#include <limits>
+
+namespace xgboost {
+namespace common {
+/*!
+ * \brief Define mt19937 as default type Random Engine.
+ */
+typedef std::mt19937 RandomEngine;
+
+#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
+/*!
+ * \brief An customized random engine, used to be plugged in PRNG from other systems.
+ *  The implementation of this library is not provided by xgboost core library.
+ *  Instead the other library can implement this class, which will be used as GlobalRandomEngine
+ *  If XGBOOST_RANDOM_CUSTOMIZE = 1, by default this is switched off.
+ */
+class CustomGlobalRandomEngine {
+ public:
+  /*! \brief The result type */
+  typedef size_t result_type;
+  /*! \brief The minimum of random numbers generated */
+  inline static constexpr result_type min() {
+    return 0;
+  }
+  /*! \brief The maximum random numbers generated */
+  inline static constexpr result_type max() {
+    return std::numeric_limits<size_t>::max();
+  }
+  /*!
+   * \brief seed function, to be implemented
+   * \param val The value of the seed.
+   */
+  void seed(result_type val);
+  /*!
+   * \return next random number.
+   */
+  result_type operator()();
+};
+
+/*!
+ * \brief global random engine
+ */
+typedef CustomGlobalRandomEngine GlobalRandomEngine;
+
+#else
+/*!
+ * \brief global random engine
+ */
+typedef RandomEngine GlobalRandomEngine;
+#endif
+
+/*!
+ * \brief global singleton of a random engine.
+ *  Only use this engine when necessary, not thread-safe.
+ */
+GlobalRandomEngine& GlobalRandom(); // NOLINT(*)
+
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_RANDOM_H_
--- a/src/common/sync.h
+++ b/src/common/sync.h
@@ -0,0 +1,13 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file sync.h
+ * \brief the synchronization module of rabit
+ *        redirects to rabit header
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_COMMON_SYNC_H_
+#define XGBOOST_COMMON_SYNC_H_
+
+#include <rabit.h>
+
+#endif  // XGBOOST_COMMON_SYNC_H_
--- a/src/common/thread_local.h
+++ b/src/common/thread_local.h
@@ -0,0 +1,87 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file thread_local.h
+ * \brief Common utility for thread local storage.
+ */
+#ifndef XGBOOST_COMMON_THREAD_LOCAL_H_
+#define XGBOOST_COMMON_THREAD_LOCAL_H_
+
+#if DMLC_ENABLE_STD_THREAD
+#include <mutex>
+#endif
+
+#include <memory>
+#include <vector>
+
+namespace xgboost {
+namespace common {
+
+// macro hanlding for threadlocal variables
+#ifdef __GNUC__
+  #define MX_TREAD_LOCAL __thread
+#elif __STDC_VERSION__ >= 201112L
+  #define  MX_TREAD_LOCAL _Thread_local
+#elif defined(_MSC_VER)
+  #define MX_TREAD_LOCAL __declspec(thread)
+#endif
+
+#ifndef MX_TREAD_LOCAL
+#message("Warning: Threadlocal is not enabled");
+#endif
+
+/*!
+ * \brief A threadlocal store to store threadlocal variables.
+ *  Will return a thread local singleton of type T
+ * \tparam T the type we like to store
+ */
+template<typename T>
+class ThreadLocalStore {
+ public:
+  /*! \return get a thread local singleton */
+  static T* Get() {
+    static MX_TREAD_LOCAL T* ptr = nullptr;
+    if (ptr == nullptr) {
+      ptr = new T();
+      Singleton()->RegisterDelete(ptr);
+    }
+    return ptr;
+  }
+
+ private:
+  /*! \brief constructor */
+  ThreadLocalStore() {}
+  /*! \brief destructor */
+  ~ThreadLocalStore() {
+    for (size_t i = 0; i < data_.size(); ++i) {
+      delete data_[i];
+    }
+  }
+  /*! \return singleton of the store */
+  static ThreadLocalStore<T> *Singleton() {
+    static ThreadLocalStore<T> inst;
+    return &inst;
+  }
+  /*!
+   * \brief register str for internal deletion
+   * \param str the string pointer
+   */
+  void RegisterDelete(T *str) {
+#if DMLC_ENABLE_STD_THREAD
+    std::unique_lock<std::mutex> lock(mutex_);
+    data_.push_back(str);
+    lock.unlock();
+#else
+    data_.push_back(str);
+#endif
+  }
+
+#if DMLC_ENABLE_STD_THREAD
+  /*! \brief internal mutex */
+  std::mutex mutex_;
+#endif
+  /*!\brief internal data */
+  std::vector<T*> data_;
+};
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_THREAD_LOCAL_H_
--- a/src/data.h
+++ b/src/data.h
@@ -1,166 +0,0 @@
-/*!
- * Copyright (c) 2014 by Contributors
- * \file data.h
- * \brief the input data structure for gradient boosting
- * \author Tianqi Chen
- */
-#ifndef XGBOOST_DATA_H_
-#define XGBOOST_DATA_H_
-
-#include <cstdio>
-#include <vector>
-#include "utils/utils.h"
-#include "utils/iterator.h"
-
-namespace xgboost {
-/*!
- * \brief unsigned integer type used in boost,
- *        used for feature index and row index
- */
-typedef unsigned bst_uint;
-/*! \brief float type, used for storing statistics */
-typedef float bst_float;
-const float rt_eps = 1e-5f;
-// min gap between feature values to allow a split happen
-const float rt_2eps = rt_eps * 2.0f;
-
-/*! \brief gradient statistics pair usually needed in gradient boosting */
-struct bst_gpair {
-  /*! \brief gradient statistics */
-  bst_float grad;
-  /*! \brief second order gradient statistics */
-  bst_float hess;
-  bst_gpair(void) {}
-  bst_gpair(bst_float grad, bst_float hess) : grad(grad), hess(hess) {}
-};
-
-/*!
- * \brief extra information that might be needed by gbm and tree module
- * this information is not necessarily present, and can be empty
- */
-struct BoosterInfo {
-  /*! \brief number of rows in the data */
-  size_t num_row;
-  /*! \brief number of columns in the data */
-  size_t num_col;
-  /*!
-   * \brief specified root index of each instance,
-   *  can be used for multi task setting
-   */
-  std::vector<unsigned> root_index;
-  /*! \brief set fold indicator */
-  std::vector<unsigned> fold_index;
-  /*! \brief number of rows, number of columns */
-  BoosterInfo(void) : num_row(0), num_col(0) {
-  }
-  /*! \brief get root of i-th instance */
-  inline unsigned GetRoot(size_t i) const {
-    return root_index.size() == 0 ? 0 : root_index[i];
-  }
-};
-
-/*! \brief read-only sparse instance batch in CSR format */
-struct SparseBatch {
-  /*! \brief an entry of sparse vector */
-  struct Entry {
-    /*! \brief feature index */
-    bst_uint index;
-    /*! \brief feature value */
-    bst_float fvalue;
-    // default constructor
-    Entry(void) {}
-    Entry(bst_uint index, bst_float fvalue) : index(index), fvalue(fvalue) {}
-    /*! \brief reversely compare feature values */
-    inline static bool CmpValue(const Entry &a, const Entry &b) {
-      return a.fvalue < b.fvalue;
-    }
-  };
-  /*! \brief an instance of sparse vector in the batch */
-  struct Inst {
-    /*! \brief pointer to the elements*/
-    const Entry *data;
-    /*! \brief length of the instance */
-    bst_uint length;
-    /*! \brief constructor */
-    Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
-    /*! \brief get i-th pair in the sparse vector*/
-    inline const Entry& operator[](size_t i) const {
-      return data[i];
-    }
-  };
-  /*! \brief batch size */
-  size_t size;
-};
-/*! \brief read-only row batch, used to access row continuously */
-struct RowBatch : public SparseBatch {
-  /*! \brief the offset of rowid of this batch */
-  size_t base_rowid;
-  /*! \brief array[size+1], row pointer of each of the elements */
-  const size_t *ind_ptr;
-  /*! \brief array[ind_ptr.back()], content of the sparse element */
-  const Entry *data_ptr;
-  /*! \brief get i-th row from the batch */
-  inline Inst operator[](size_t i) const {
-    return Inst(data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i+1] - ind_ptr[i]));
-  }
-};
-/*!
- * \brief read-only column batch, used to access columns,
- * the columns are not required to be continuous
- */
-struct ColBatch : public SparseBatch {
-  /*! \brief column index of each columns in the data */
-  const bst_uint *col_index;
-  /*! \brief pointer to the column data */
-  const Inst *col_data;
-  /*! \brief get i-th column from the batch */
-  inline Inst operator[](size_t i) const {
-    return col_data[i];
-  }
-};
-/**
- * \brief interface of feature matrix, needed for tree construction
- *  this interface defines two ways to access features:
- *   row access is defined by iterator of RowBatch
- *   col access is optional, checked by HaveColAccess, and defined by iterator of ColBatch
- */
-class IFMatrix {
- public:
-  // the interface only need to guarantee row iter
-  // column iter is active, when ColIterator is called, row_iter can be disabled
-  /*! \brief get the row iterator associated with FMatrix */
-  virtual utils::IIterator<RowBatch> *RowIterator(void) = 0;
-  /*!\brief get column iterator */
-  virtual utils::IIterator<ColBatch> *ColIterator(void) = 0;
-  /*!
-   * \brief get the column iterator associated with FMatrix with subset of column features
-   * \param fset is the list of column index set that must be contained in the returning Column iterator
-   * \return the column iterator, initialized so that it reads the elements in fset
-   */
-  virtual utils::IIterator<ColBatch> *ColIterator(const std::vector<bst_uint> &fset) = 0;
-  /*!
-   * \brief check if column access is supported, if not, initialize column access
-   * \param enabled whether certain feature should be included in column access
-   * \param subsample subsample ratio when generating column access
-   * \param max_row_perbatch auxiliary information, maximum row used in each column batch
-   *         this is a hint information that can be ignored by the implementation
-   */
-  virtual void InitColAccess(const std::vector<bool> &enabled,
-                             float subsample,
-                             size_t max_row_perbatch) = 0;
-  // the following are column meta data, should be able to answer them fast
-  /*! \return whether column access is enabled */
-  virtual bool HaveColAccess(void) const = 0;
-  /*! \return number of columns in the FMatrix */
-  virtual size_t NumCol(void) const = 0;
-  /*! \brief get number of non-missing entries in column */
-  virtual size_t GetColSize(size_t cidx) const = 0;
-  /*! \brief get column density */
-  virtual float GetColDensity(size_t cidx) const = 0;
-  /*! \brief reference of buffered rowset */
-  virtual const std::vector<bst_uint> &buffered_rowset(void) const = 0;
-  // virtual destructor
-  virtual ~IFMatrix(void){}
-};
-}  // namespace xgboost
-#endif  // XGBOOST_DATA_H_
--- a/src/data/data.cc
+++ b/src/data/data.cc
@@ -0,0 +1,278 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file data.cc
+ */
+#include <xgboost/data.h>
+#include <xgboost/logging.h>
+#include <dmlc/registry.h>
+#include <cstring>
+#include "./sparse_batch_page.h"
+#include "./simple_dmatrix.h"
+#include "./simple_csr_source.h"
+#include "../common/io.h"
+
+#if DMLC_ENABLE_STD_THREAD
+#include "./sparse_page_source.h"
+#include "./sparse_page_dmatrix.h"
+#endif
+
+namespace dmlc {
+DMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg);
+}  // namespace dmlc
+
+namespace xgboost {
+// implementation of inline functions
+void MetaInfo::Clear() {
+  num_row = num_col = num_nonzero = 0;
+  labels.clear();
+  root_index.clear();
+  group_ptr.clear();
+  weights.clear();
+  base_margin.clear();
+}
+
+void MetaInfo::SaveBinary(dmlc::Stream *fo) const {
+  int version = kVersion;
+  fo->Write(&version, sizeof(version));
+  fo->Write(&num_row, sizeof(num_row));
+  fo->Write(&num_col, sizeof(num_col));
+  fo->Write(&num_nonzero, sizeof(num_nonzero));
+  fo->Write(labels);
+  fo->Write(group_ptr);
+  fo->Write(weights);
+  fo->Write(root_index);
+  fo->Write(base_margin);
+}
+
+void MetaInfo::LoadBinary(dmlc::Stream *fi) {
+  int version;
+  CHECK(fi->Read(&version, sizeof(version)) == sizeof(version)) << "MetaInfo: invalid version";
+  CHECK_EQ(version, kVersion) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&num_row, sizeof(num_row)) == sizeof(num_row)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&num_col, sizeof(num_col)) == sizeof(num_col)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&num_nonzero, sizeof(num_nonzero)) == sizeof(num_nonzero))
+      << "MetaInfo: invalid format";
+  CHECK(fi->Read(&labels)) <<  "MetaInfo: invalid format";
+  CHECK(fi->Read(&group_ptr)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&weights)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&root_index)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&base_margin)) << "MetaInfo: invalid format";
+}
+
+// try to load group information from file, if exists
+inline bool MetaTryLoadGroup(const std::string& fname,
+                             std::vector<unsigned>* group) {
+  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
+  if (fi.get() == nullptr) return false;
+  dmlc::istream is(fi.get());
+  group->clear();
+  group->push_back(0);
+  unsigned nline;
+  while (is >> nline) {
+    group->push_back(group->back() + nline);
+  }
+  return true;
+}
+
+// try to load weight information from file, if exists
+inline bool MetaTryLoadFloatInfo(const std::string& fname,
+                                 std::vector<float>* data) {
+  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
+  if (fi.get() == nullptr) return false;
+  dmlc::istream is(fi.get());
+  data->clear();
+  float value;
+  while (is >> value) {
+    data->push_back(value);
+  }
+  return true;
+}
+
+// macro to dispatch according to specified pointer types
+#define DISPATCH_CONST_PTR(dtype, old_ptr, cast_ptr, proc)              \
+  switch (dtype) {                                                      \
+    case kFloat32: {                                                    \
+      const float* cast_ptr = reinterpret_cast<const float*>(old_ptr); proc; break; \
+    }                                                                   \
+    case kDouble: {                                                     \
+      const double* cast_ptr = reinterpret_cast<const double*>(old_ptr); proc; break; \
+    }                                                                   \
+    case kUInt32: {                                                     \
+      const uint32_t* cast_ptr = reinterpret_cast<const uint32_t*>(old_ptr); proc; break; \
+    }                                                                   \
+    case kUInt64: {                                                     \
+      const uint64_t* cast_ptr = reinterpret_cast<const uint64_t*>(old_ptr); proc; break; \
+    }                                                                   \
+    default: LOG(FATAL) << "Unknown data type" << dtype;                \
+  }                                                                     \
+
+
+void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
+  if (!std::strcmp(key, "root_index")) {
+    root_index.resize(num);
+    DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
+                       std::copy(cast_dptr, cast_dptr + num, root_index.begin()));
+  } else if (!std::strcmp(key, "label")) {
+    labels.resize(num);
+    DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
+                       std::copy(cast_dptr, cast_dptr + num, labels.begin()));
+  } else if (!std::strcmp(key, "weight")) {
+    weights.resize(num);
+    DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
+                       std::copy(cast_dptr, cast_dptr + num, weights.begin()));
+  } else if (!std::strcmp(key, "base_margin")) {
+    base_margin.resize(num);
+    DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
+                       std::copy(cast_dptr, cast_dptr + num, base_margin.begin()));
+  }
+}
+
+
+DMatrix* DMatrix::Load(const std::string& uri,
+                       bool silent,
+                       bool load_row_split,
+                       const std::string& file_format) {
+  std::string fname, cache_file;
+  size_t dlm_pos = uri.find('#');
+  if (dlm_pos != std::string::npos) {
+    cache_file = uri.substr(dlm_pos + 1, uri.length());
+    fname = uri.substr(0, dlm_pos);
+    CHECK_EQ(cache_file.find('#'), std::string::npos)
+        << "Only one `#` is allowed in file path for cache file specification.";
+    if (load_row_split) {
+      std::ostringstream os;
+      os << cache_file << ".r" << rabit::GetRank();
+      cache_file = os.str();
+    }
+  } else {
+    fname = uri;
+  }
+  int partid = 0, npart = 1;
+  if (load_row_split) {
+    partid = rabit::GetRank();
+    npart = rabit::GetWorldSize();
+  } else {
+    // test option to load in part
+    npart = dmlc::GetEnv("XGBOOST_TEST_NPART", 1);
+    if (npart != 1) {
+      LOG(CONSOLE) << "Partial load option on npart=" << npart;
+    }
+  }
+  // legacy handling of binary data loading
+  if (file_format == "auto" && !load_row_split) {
+    int magic;
+    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
+    if (fi.get() != nullptr) {
+      common::PeekableInStream is(fi.get());
+      if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic) &&
+          magic == data::SimpleCSRSource::kMagic) {
+        std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+        source->LoadBinary(&is);
+        DMatrix* dmat = DMatrix::Create(std::move(source), cache_file);
+        if (!silent) {
+          LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with "
+                       << dmat->info().num_nonzero << " entries loaded from " << uri;
+        }
+        return dmat;
+      }
+    }
+  }
+
+  std::string ftype = file_format;
+  if (file_format == "auto") ftype = "libsvm";
+  std::unique_ptr<dmlc::Parser<uint32_t> > parser(
+      dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, ftype.c_str()));
+  DMatrix* dmat = DMatrix::Create(parser.get(), cache_file);
+  if (!silent) {
+    LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with "
+                 << dmat->info().num_nonzero << " entries loaded from " << uri;
+  }
+  // backward compatiblity code.
+  if (!load_row_split) {
+    MetaInfo& info = dmat->info();
+    if (MetaTryLoadGroup(fname + ".group", &info.group_ptr) && !silent) {
+      LOG(CONSOLE) << info.group_ptr.size() - 1
+                   << " groups are loaded from " << fname << ".group";
+    }
+    if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin) && !silent) {
+      LOG(CONSOLE) << info.base_margin.size()
+                   << " base_margin are loaded from " << fname << ".base_margin";
+    }
+  }
+  return dmat;
+}
+
+DMatrix* DMatrix::Create(dmlc::Parser<uint32_t>* parser,
+                         const std::string& cache_prefix) {
+  if (cache_prefix.length() == 0) {
+    std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+    source->CopyFrom(parser);
+    return DMatrix::Create(std::move(source), cache_prefix);
+  } else {
+#if DMLC_ENABLE_STD_THREAD
+    if (!data::SparsePageSource::CacheExist(cache_prefix)) {
+      data::SparsePageSource::Create(parser, cache_prefix);
+    }
+    std::unique_ptr<data::SparsePageSource> source(new data::SparsePageSource(cache_prefix));
+    return DMatrix::Create(std::move(source), cache_prefix);
+#else
+    LOG(FATAL) << "External memory is not enabled in mingw";
+    return nullptr;
+#endif
+  }
+}
+
+void DMatrix::SaveToLocalFile(const std::string& fname) {
+  data::SimpleCSRSource source;
+  source.CopyFrom(this);
+  std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
+  source.SaveBinary(fo.get());
+}
+
+DMatrix* DMatrix::Create(std::unique_ptr<DataSource>&& source,
+                         const std::string& cache_prefix) {
+  if (cache_prefix.length() == 0) {
+    return new data::SimpleDMatrix(std::move(source));
+  } else {
+#if DMLC_ENABLE_STD_THREAD
+    return new data::SparsePageDMatrix(std::move(source), cache_prefix);
+#else
+    LOG(FATAL) << "External memory is not enabled in mingw";
+    return nullptr;
+#endif
+  }
+}
+}  // namespace xgboost
+
+namespace xgboost {
+namespace data {
+SparsePage::Format* SparsePage::Format::Create(const std::string& name) {
+  auto *e = ::dmlc::Registry< ::xgboost::data::SparsePageFormatReg>::Get()->Find(name);
+  if (e == nullptr) {
+    LOG(FATAL) << "Unknown format type " << name;
+  }
+  return (e->body)();
+}
+
+std::pair<std::string, std::string>
+SparsePage::Format::DecideFormat(const std::string& cache_prefix) {
+  size_t pos = cache_prefix.rfind(".fmt-");
+
+  if (pos != std::string::npos) {
+    std::string fmt = cache_prefix.substr(pos + 5, cache_prefix.length());
+    size_t cpos = fmt.rfind('-');
+    if (cpos != std::string::npos) {
+      return std::make_pair(fmt.substr(0, cpos), fmt.substr(cpos + 1, fmt.length()));
+    } else {
+      return std::make_pair(fmt, fmt);
+    }
+  } else {
+    std::string raw = "raw";
+    return std::make_pair(raw, raw);
+  }
+}
+
+// List of files that will be force linked in static links.
+DMLC_REGISTRY_LINK_TAG(sparse_page_raw_format);
+}  // namespace data
+}  // namespace xgboost
--- a/src/data/simple_csr_source.cc
+++ b/src/data/simple_csr_source.cc
@@ -0,0 +1,101 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file simple_csr_source.cc
+ */
+#include <dmlc/base.h>
+#include <xgboost/logging.h>
+#include "./simple_csr_source.h"
+
+namespace xgboost {
+namespace data {
+
+void SimpleCSRSource::Clear() {
+  row_data_.clear();
+  row_ptr_.resize(1);
+  row_ptr_[0] = 0;
+  this->info.Clear();
+}
+
+void SimpleCSRSource::CopyFrom(DMatrix* src) {
+  this->Clear();
+  this->info = src->info();
+  dmlc::DataIter<RowBatch>* iter = src->RowIterator();
+  iter->BeforeFirst();
+  while (iter->Next()) {
+    const RowBatch &batch = iter->Value();
+    for (size_t i = 0; i < batch.size; ++i) {
+      RowBatch::Inst inst = batch[i];
+      row_data_.insert(row_data_.end(), inst.data, inst.data + inst.length);
+      row_ptr_.push_back(row_ptr_.back() + inst.length);
+    }
+  }
+}
+
+void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
+  this->Clear();
+  while (parser->Next()) {
+    const dmlc::RowBlock<uint32_t>& batch = parser->Value();
+    if (batch.label != nullptr) {
+      info.labels.insert(info.labels.end(), batch.label, batch.label + batch.size);
+    }
+    if (batch.weight != nullptr) {
+      info.weights.insert(info.weights.end(), batch.weight, batch.weight + batch.size);
+    }
+    row_data_.reserve(row_data_.size() + batch.offset[batch.size] - batch.offset[0]);
+    CHECK(batch.index != nullptr);
+    // update information
+    this->info.num_row += batch.size;
+    // copy the data over
+    for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
+      uint32_t index = batch.index[i];
+      bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
+      row_data_.push_back(SparseBatch::Entry(index, fvalue));
+      this->info.num_col = std::max(this->info.num_col,
+                                    static_cast<uint64_t>(index + 1));
+    }
+    size_t top = row_ptr_.size();
+    row_ptr_.resize(top + batch.size);
+    for (size_t i = 0; i < batch.size; ++i) {
+      row_ptr_[top + i] = row_ptr_[top - 1] + batch.offset[i + 1] - batch.offset[0];
+    }
+  }
+  this->info.num_nonzero = static_cast<uint64_t>(row_data_.size());
+}
+
+void SimpleCSRSource::LoadBinary(dmlc::Stream* fi) {
+  int tmagic;
+  CHECK(fi->Read(&tmagic, sizeof(tmagic)) == sizeof(tmagic)) << "invalid input file format";
+  CHECK_EQ(tmagic, kMagic) << "invalid format, magic number mismatch";
+  info.LoadBinary(fi);
+  fi->Read(&row_ptr_);
+  fi->Read(&row_data_);
+}
+
+void SimpleCSRSource::SaveBinary(dmlc::Stream* fo) const {
+  int tmagic = kMagic;
+  fo->Write(&tmagic, sizeof(tmagic));
+  info.SaveBinary(fo);
+  fo->Write(row_ptr_);
+  fo->Write(row_data_);
+}
+
+void SimpleCSRSource::BeforeFirst() {
+  at_first_ = true;
+}
+
+bool SimpleCSRSource::Next() {
+  if (!at_first_) return false;
+  at_first_ = false;
+  batch_.size = row_ptr_.size() - 1;
+  batch_.base_rowid = 0;
+  batch_.ind_ptr = dmlc::BeginPtr(row_ptr_);
+  batch_.data_ptr = dmlc::BeginPtr(row_data_);
+  return true;
+}
+
+const RowBatch& SimpleCSRSource::Value() const {
+  return batch_;
+}
+
+}  // namespace data
+}  // namespace xgboost
--- a/src/data/simple_csr_source.h
+++ b/src/data/simple_csr_source.h
@@ -0,0 +1,81 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file simple_csr_source.h
+ * \brief The simplest form of data source, can be used to create DMatrix.
+ *  This is an in-memory data structure that holds the data in row oriented format.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_DATA_SIMPLE_CSR_SOURCE_H_
+#define XGBOOST_DATA_SIMPLE_CSR_SOURCE_H_
+
+#include <xgboost/base.h>
+#include <xgboost/data.h>
+#include <vector>
+#include <algorithm>
+
+
+namespace xgboost {
+namespace data {
+/*!
+ * \brief The simplest form of data holder, can be used to create DMatrix.
+ *  This is an in-memory data structure that holds the data in row oriented format.
+ * \code
+ * std::unique_ptr<DataSource> source(new SimpleCSRSource());
+ * // add data to source
+ * DMatrix* dmat = DMatrix::Create(std::move(source));
+ * \encode
+ */
+class SimpleCSRSource : public DataSource {
+ public:
+  // public data members
+  // MetaInfo info;  // inheritated from DataSource
+  /*! \brief row pointer of CSR sparse storage */
+  std::vector<size_t> row_ptr_;
+  /*! \brief data in the CSR sparse storage */
+  std::vector<RowBatch::Entry> row_data_;
+  // functions
+  /*! \brief default constructor */
+  SimpleCSRSource() : row_ptr_(1, 0), at_first_(true) {}
+  /*! \brief destructor */
+  virtual ~SimpleCSRSource() {}
+  /*! \brief clear the data structure */
+  void Clear();
+  /*!
+   * \brief copy content of data from src
+   * \param src source data iter.
+   */
+  void CopyFrom(DMatrix* src);
+  /*!
+   * \brief copy content of data from parser, also set the additional information.
+   * \param src source data iter.
+   * \param info The additional information reflected in the parser.
+   */
+  void CopyFrom(dmlc::Parser<uint32_t>* src);
+  /*!
+   * \brief Load data from binary stream.
+   * \param fi the pointer to load data from.
+   */
+  void LoadBinary(dmlc::Stream* fi);
+  /*!
+   * \brief Save data into binary stream
+   * \param fo The output stream.
+   */
+  void SaveBinary(dmlc::Stream* fo) const;
+  // implement Next
+  bool Next() override;
+  // implement BeforeFirst
+  void BeforeFirst() override;
+  // implement Value
+  const RowBatch &Value() const override;
+  /*! \brief magic number used to identify SimpleCSRSource */
+  static const int kMagic = 0xffffab01;
+
+ private:
+  /*! \brief internal variable, used to support iterator interface */
+  bool at_first_;
+  /*! \brief */
+  RowBatch batch_;
+};
+}  // namespace data
+}  // namespace xgboost
+#endif  // XGBOOST_DATA_SIMPLE_CSR_SOURCE_H_
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@@ -0,0 +1,265 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file simple_dmatrix.cc
+ * \brief the input data structure for gradient boosting
+ * \author Tianqi Chen
+ */
+#include <xgboost/data.h>
+#include <limits>
+#include <algorithm>
+#include <vector>
+#include "./simple_dmatrix.h"
+#include "../common/random.h"
+#include "../common/group_data.h"
+
+namespace xgboost {
+namespace data {
+
+bool SimpleDMatrix::ColBatchIter::Next() {
+  if (data_ptr_ >= cpages_.size()) return false;
+  data_ptr_ += 1;
+  SparsePage* pcol = cpages_[data_ptr_ - 1].get();
+  batch_.size = col_index_.size();
+  col_data_.resize(col_index_.size(), SparseBatch::Inst(NULL, 0));
+  for (size_t i = 0; i < col_data_.size(); ++i) {
+    const bst_uint ridx = col_index_[i];
+    col_data_[i] = SparseBatch::Inst
+        (dmlc::BeginPtr(pcol->data) + pcol->offset[ridx],
+         static_cast<bst_uint>(pcol->offset[ridx + 1] - pcol->offset[ridx]));
+  }
+  batch_.col_index = dmlc::BeginPtr(col_index_);
+  batch_.col_data = dmlc::BeginPtr(col_data_);
+  return true;
+}
+
+dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator() {
+  size_t ncol = this->info().num_col;
+  col_iter_.col_index_.resize(ncol);
+  for (size_t i = 0; i < ncol; ++i) {
+    col_iter_.col_index_[i] = static_cast<bst_uint>(i);
+  }
+  col_iter_.BeforeFirst();
+  return &col_iter_;
+}
+
+dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator(const std::vector<bst_uint>&fset) {
+  size_t ncol = this->info().num_col;
+  col_iter_.col_index_.resize(0);
+  for (size_t i = 0; i < fset.size(); ++i) {
+    if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]);
+  }
+  col_iter_.BeforeFirst();
+  return &col_iter_;
+}
+
+void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
+                                  float pkeep,
+                                  size_t max_row_perbatch) {
+  if (this->HaveColAccess()) return;
+
+  col_iter_.cpages_.clear();
+  if (info().num_row < max_row_perbatch) {
+    std::unique_ptr<SparsePage> page(new SparsePage());
+    this->MakeOneBatch(enabled, pkeep, page.get());
+    col_iter_.cpages_.push_back(std::move(page));
+  } else {
+    this->MakeManyBatch(enabled, pkeep, max_row_perbatch);
+  }
+  // setup col-size
+  col_size_.resize(info().num_col);
+  std::fill(col_size_.begin(), col_size_.end(), 0);
+  for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) {
+    SparsePage *pcol = col_iter_.cpages_[i].get();
+    for (size_t j = 0; j < pcol->Size(); ++j) {
+      col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];
+    }
+  }
+}
+
+// internal function to make one batch from row iter.
+void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled,
+                                 float pkeep,
+                                 SparsePage *pcol) {
+  // clear rowset
+  buffered_rowset_.clear();
+  // bit map
+  int nthread;
+  std::vector<bool> bmap;
+  #pragma omp parallel
+  {
+    nthread = omp_get_num_threads();
+  }
+
+  pcol->Clear();
+  common::ParallelGroupBuilder<SparseBatch::Entry>
+      builder(&pcol->offset, &pcol->data);
+  builder.InitBudget(info().num_col, nthread);
+  // start working
+  dmlc::DataIter<RowBatch>* iter = this->RowIterator();
+  iter->BeforeFirst();
+  while (iter->Next()) {
+    const RowBatch& batch = iter->Value();
+    bmap.resize(bmap.size() + batch.size, true);
+    std::bernoulli_distribution coin_flip(pkeep);
+    auto& rnd = common::GlobalRandom();
+
+    long batch_size = static_cast<long>(batch.size); // NOLINT(*)
+    for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
+      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      if (pkeep == 1.0f || coin_flip(rnd)) {
+        buffered_rowset_.push_back(ridx);
+      } else {
+        bmap[i] = false;
+      }
+    }
+    #pragma omp parallel for schedule(static)
+    for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
+      int tid = omp_get_thread_num();
+      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      if (bmap[ridx]) {
+        RowBatch::Inst inst = batch[i];
+        for (bst_uint j = 0; j < inst.length; ++j) {
+          if (enabled[inst[j].index]) {
+            builder.AddBudget(inst[j].index, tid);
+          }
+        }
+      }
+    }
+  }
+  builder.InitStorage();
+
+  iter->BeforeFirst();
+  while (iter->Next()) {
+    const RowBatch& batch = iter->Value();
+    #pragma omp parallel for schedule(static)
+    for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
+      int tid = omp_get_thread_num();
+      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      if (bmap[ridx]) {
+        RowBatch::Inst inst = batch[i];
+        for (bst_uint j = 0; j < inst.length; ++j) {
+          if (enabled[inst[j].index]) {
+            builder.Push(inst[j].index,
+                         SparseBatch::Entry((bst_uint)(batch.base_rowid+i),
+                                            inst[j].fvalue), tid);
+          }
+        }
+      }
+    }
+  }
+
+  CHECK_EQ(pcol->Size(), info().num_col);
+  // sort columns
+  bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
+  #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
+  for (bst_omp_uint i = 0; i < ncol; ++i) {
+    if (pcol->offset[i] < pcol->offset[i + 1]) {
+      std::sort(dmlc::BeginPtr(pcol->data) + pcol->offset[i],
+                dmlc::BeginPtr(pcol->data) + pcol->offset[i + 1],
+                SparseBatch::Entry::CmpValue);
+    }
+  }
+}
+
+void SimpleDMatrix::MakeManyBatch(const std::vector<bool>& enabled,
+                                  float pkeep,
+                                  size_t max_row_perbatch) {
+  size_t btop = 0;
+  std::bernoulli_distribution coin_flip(pkeep);
+  auto& rnd = common::GlobalRandom();
+  buffered_rowset_.clear();
+  // internal temp cache
+  SparsePage tmp; tmp.Clear();
+  // start working
+  dmlc::DataIter<RowBatch>* iter = this->RowIterator();
+  iter->BeforeFirst();
+
+  while (iter->Next()) {
+    const RowBatch &batch = iter->Value();
+    for (size_t i = 0; i < batch.size; ++i) {
+      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      if (pkeep == 1.0f || coin_flip(rnd)) {
+        buffered_rowset_.push_back(ridx);
+        tmp.Push(batch[i]);
+      }
+      if (tmp.Size() >= max_row_perbatch) {
+        std::unique_ptr<SparsePage> page(new SparsePage());
+        this->MakeColPage(tmp.GetRowBatch(0),
+                          dmlc::BeginPtr(buffered_rowset_) + btop,
+                          enabled, page.get());
+        col_iter_.cpages_.push_back(std::move(page));
+        btop = buffered_rowset_.size();
+        tmp.Clear();
+      }
+    }
+  }
+
+  if (tmp.Size() != 0) {
+    std::unique_ptr<SparsePage> page(new SparsePage());
+    this->MakeColPage(tmp.GetRowBatch(0),
+                      dmlc::BeginPtr(buffered_rowset_) + btop,
+                      enabled, page.get());
+    col_iter_.cpages_.push_back(std::move(page));
+  }
+}
+
+// make column page from subset of rowbatchs
+void SimpleDMatrix::MakeColPage(const RowBatch& batch,
+                                const bst_uint* ridx,
+                                const std::vector<bool>& enabled,
+                                SparsePage* pcol) {
+  int nthread;
+  #pragma omp parallel
+  {
+    nthread = omp_get_num_threads();
+    int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
+    if (nthread > max_nthread) {
+      nthread = max_nthread;
+    }
+  }
+  pcol->Clear();
+  common::ParallelGroupBuilder<SparseBatch::Entry>
+      builder(&pcol->offset, &pcol->data);
+  builder.InitBudget(info().num_col, nthread);
+  bst_omp_uint ndata = static_cast<bst_uint>(batch.size);
+  #pragma omp parallel for schedule(static) num_threads(nthread)
+  for (bst_omp_uint i = 0; i < ndata; ++i) {
+    int tid = omp_get_thread_num();
+    RowBatch::Inst inst = batch[i];
+    for (bst_uint j = 0; j < inst.length; ++j) {
+      const SparseBatch::Entry &e = inst[j];
+      if (enabled[e.index]) {
+        builder.AddBudget(e.index, tid);
+      }
+    }
+  }
+  builder.InitStorage();
+  #pragma omp parallel for schedule(static) num_threads(nthread)
+  for (bst_omp_uint i = 0; i < ndata; ++i) {
+    int tid = omp_get_thread_num();
+    RowBatch::Inst inst = batch[i];
+    for (bst_uint j = 0; j < inst.length; ++j) {
+      const SparseBatch::Entry &e = inst[j];
+      builder.Push(e.index,
+                   SparseBatch::Entry(ridx[i], e.fvalue),
+                   tid);
+    }
+  }
+  CHECK_EQ(pcol->Size(), info().num_col);
+  // sort columns
+  bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
+  #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
+  for (bst_omp_uint i = 0; i < ncol; ++i) {
+    if (pcol->offset[i] < pcol->offset[i + 1]) {
+      std::sort(dmlc::BeginPtr(pcol->data) + pcol->offset[i],
+                dmlc::BeginPtr(pcol->data) + pcol->offset[i + 1],
+                SparseBatch::Entry::CmpValue);
+    }
+  }
+}
+
+bool SimpleDMatrix::SingleColBlock() const {
+  return col_iter_.cpages_.size() <= 1;
+}
+}  // namespace data
+}  // namespace xgboost
--- a/src/data/simple_dmatrix.h
+++ b/src/data/simple_dmatrix.h
@@ -0,0 +1,119 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file simple_dmatrix.h
+ * \brief In-memory version of DMatrix.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_DATA_SIMPLE_DMATRIX_H_
+#define XGBOOST_DATA_SIMPLE_DMATRIX_H_
+
+#include <xgboost/base.h>
+#include <xgboost/data.h>
+#include <vector>
+#include <algorithm>
+#include <cstring>
+#include "./sparse_batch_page.h"
+
+namespace xgboost {
+namespace data {
+
+class SimpleDMatrix : public DMatrix {
+ public:
+  explicit SimpleDMatrix(std::unique_ptr<DataSource>&& source)
+      : source_(std::move(source)) {}
+
+  MetaInfo& info() override {
+    return source_->info;
+  }
+
+  const MetaInfo& info() const override {
+    return source_->info;
+  }
+
+  dmlc::DataIter<RowBatch>* RowIterator() override {
+    dmlc::DataIter<RowBatch>* iter = source_.get();
+    iter->BeforeFirst();
+    return iter;
+  }
+
+  bool HaveColAccess() const override {
+    return col_size_.size() != 0;
+  }
+
+  const std::vector<bst_uint>& buffered_rowset() const override {
+    return buffered_rowset_;
+  }
+
+  size_t GetColSize(size_t cidx) const {
+    return col_size_[cidx];
+  }
+
+  float GetColDensity(size_t cidx) const override {
+    size_t nmiss = buffered_rowset_.size() - col_size_[cidx];
+    return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
+  }
+
+  dmlc::DataIter<ColBatch>* ColIterator() override;
+
+  dmlc::DataIter<ColBatch>* ColIterator(const std::vector<bst_uint>& fset) override;
+
+  void InitColAccess(const std::vector<bool>& enabled,
+                     float subsample,
+                     size_t max_row_perbatch) override;
+
+  bool SingleColBlock() const override;
+
+ private:
+  // in-memory column batch iterator.
+  struct ColBatchIter: dmlc::DataIter<ColBatch> {
+   public:
+    ColBatchIter() : data_ptr_(0) {}
+    void BeforeFirst() override {
+      data_ptr_ = 0;
+    }
+    const ColBatch &Value() const override {
+      return batch_;
+    }
+    bool Next() override;
+
+   private:
+    // allow SimpleDMatrix to access it.
+    friend class SimpleDMatrix;
+    // data content
+    std::vector<bst_uint> col_index_;
+    // column content
+    std::vector<ColBatch::Inst> col_data_;
+    // column sparse pages
+    std::vector<std::unique_ptr<SparsePage> > cpages_;
+    // data pointer
+    size_t data_ptr_;
+    // temporal space for batch
+    ColBatch batch_;
+  };
+
+  // source data pointer.
+  std::unique_ptr<DataSource> source_;
+  // column iterator
+  ColBatchIter col_iter_;
+  // list of row index that are buffered.
+  std::vector<bst_uint> buffered_rowset_;
+  /*! \brief sizeof column data */
+  std::vector<size_t> col_size_;
+
+  // internal function to make one batch from row iter.
+  void MakeOneBatch(const std::vector<bool>& enabled,
+                    float pkeep,
+                    SparsePage *pcol);
+
+  void MakeManyBatch(const std::vector<bool>& enabled,
+                     float pkeep,
+                     size_t max_row_perbatch);
+
+  void MakeColPage(const RowBatch& batch,
+                   const bst_uint* ridx,
+                   const std::vector<bool>& enabled,
+                   SparsePage* pcol);
+};
+}  // namespace data
+}  // namespace xgboost
+#endif  // XGBOOST_DATA_SIMPLE_DMATRIX_H_
--- a/Show More
+++ b/Show More