diff --git a/.gitignore b/.gitignore
index 44a215435..73ae6748e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -66,3 +66,4 @@ java/xgboost4j-demo/data/
 java/xgboost4j-demo/tmp/
 java/xgboost4j-demo/model/
 nb-configuration*
+dmlc-core
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..102c87353
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,46 @@
+sudo: true
+
+# Use Build Matrix to do lint and build seperately
+env:
+  matrix:
+    - TASK=lint LINT_LANG=cpp
+    - TASK=lint LINT_LANG=python
+    - TASK=R-package CXX=g++
+    - TASK=python-package CXX=g++
+    - TASK=build CXX=g++
+    - TASK=build-with-dmlc CXX=g++
+
+# dependent apt packages
+addons:
+  apt:
+    packages:
+      - doxygen
+      - libopenmpi-dev
+      - wget
+      - libcurl4-openssl-dev
+      - unzip
+      - python-numpy
+      - python-scipy
+      - python-nose
+
+before_install:
+  - git clone https://github.com/dmlc/dmlc-core
+  - export TRAVIS=dmlc-core/scripts/travis/
+  - export PYTHONPATH=${PYTHONPATH}:${PWD}/wrapper
+  - source ${TRAVIS}/travis_setup_env.sh
+
+install:
+  - pip install cpplint pylint --user `whoami`
+
+script: scripts/travis_script.sh
+
+
+after_failure:
+  - scripts/travis_after_failure.sh
+
+
+notifications:
+  email:
+    on_success: change
+    on_failure: always
+
diff --git a/Makefile b/Makefile
index 360d55e84..a24bea327 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 export CC  = gcc
 export CXX = g++
 export MPICXX = mpicxx
-export LDFLAGS= -pthread -lm 
+export LDFLAGS= -pthread -lm
 export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -funroll-loops
 # java include path
 export JAVAINCFLAGS = -I${JAVA_HOME}/include -I${JAVA_HOME}/include/linux -I./java
@@ -12,8 +12,8 @@ ifeq ($(OS), Windows_NT)
 endif
 
 ifeq ($(no_omp),1)
-	CFLAGS += -DDISABLE_OPENMP 
-else 
+	CFLAGS += -DDISABLE_OPENMP
+else
 	CFLAGS += -fopenmp
 endif
 
@@ -29,7 +29,7 @@ ifdef dmlc
 			config = $(dmlc)/config.mk
 		else
 			config = $(dmlc)/make/config.mk
-		endif	
+		endif
 	endif
 	include $(config)
 	include $(dmlc)/make/dmlc.mk
@@ -43,7 +43,7 @@ ifndef WITH_FPIC
 	WITH_FPIC = 1
 endif
 ifeq ($(WITH_FPIC), 1)
-	CFLAGS += -fPIC	
+	CFLAGS += -fPIC
 endif
 
 
@@ -69,7 +69,11 @@ else
 	TARGET = $(BIN)
 endif
 
-.PHONY: clean all mpi python Rpack
+ifndef LINT_LANG
+	LINT_LANG= "all"
+endif
+
+.PHONY: clean all mpi python Rpack lint
 
 all: $(TARGET)
 mpi: $(MPIBIN)
@@ -78,9 +82,9 @@ python: wrapper/libxgboostwrapper.so
 # now the wrapper takes in two files. io and wrapper part
 updater.o: src/tree/updater.cpp  src/tree/*.hpp src/*.h src/tree/*.h src/utils/*.h
 dmlc_simple.o: src/io/dmlc_simple.cpp src/utils/*.h
-gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h 
+gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h
 io.o: src/io/io.cpp src/io/*.hpp src/utils/*.h src/learner/dmatrix.h src/*.h
-main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h 
+main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h
 xgboost:  updater.o gbm.o io.o main.o $(LIBRABIT) $(LIBDMLC)
 wrapper/xgboost_wrapper.dll wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h  updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC)
 
@@ -97,11 +101,11 @@ subtree/rabit/lib/librabit_mock.a: subtree/rabit/src/engine_mock.cc
 subtree/rabit/lib/librabit_mpi.a: subtree/rabit/src/engine_mpi.cc
 	+	cd subtree/rabit;make lib/librabit_mpi.a; cd ../..
 
-$(BIN) : 
-	$(CXX) $(CFLAGS) -fPIC -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) 
+$(BIN) :
+	$(CXX) $(CFLAGS) -fPIC -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
 
-$(MOCKBIN) : 
-	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) 
+$(MOCKBIN) :
+	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
 
 $(SLIB) :
 	$(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(DLLFLAGS)
@@ -109,13 +113,13 @@ $(SLIB) :
 $(JLIB) :
 	$(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS)  $(JAVAINCFLAGS)
 
-$(OBJ) : 
+$(OBJ) :
 	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
 
-$(MPIOBJ) : 
-	$(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) 
+$(MPIOBJ) :
+	$(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
 
-$(MPIBIN) : 
+$(MPIBIN) :
 	$(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
 
 install:
@@ -144,10 +148,23 @@ Rpack:
 	cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars
 	cp xgboost/src/Makevars xgboost/src/Makevars.win
 	# R CMD build --no-build-vignettes xgboost
+	# R CMD build xgboost
+	# rm -rf xgboost
+	# R CMD check --as-cran xgboost*.tar.gz
+
+Rbuild:
+	make Rpack
 	R CMD build xgboost
 	rm -rf xgboost
+
+Rcheck:
+	make Rbuild
 	R CMD check --as-cran xgboost*.tar.gz
 
+# lint requires dmlc to be in current folder
+lint:
+	dmlc-core/scripts/lint.py xgboost $(LINT_LANG) src wrapper R-package
+
 clean:
 	$(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o  */*.o */*/*.o *~ */*~ */*/*~
 	cd subtree/rabit; make clean; cd ..
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 4a5d99c7d..f7f6b9192 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -220,7 +220,8 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
     stop("nfold must be bigger than 1")
   }
   if(is.null(folds)) {
-    if (exists('objective', where=param) && strtrim(param[['objective']], 5) == 'rank:') {
+    if (exists('objective', where=param) && is.character(param$objective) &&
+        strtrim(param[['objective']], 5) == 'rank:') {
       stop("\tAutomatic creation of CV-folds is not implemented for ranking!\n",
            "\tConsider providing pre-computed CV-folds through the folds parameter.")
     }
@@ -234,7 +235,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
       # For classification, need to convert y labels to factor before making the folds,
       # and then do stratification by factor levels.
       # For regression, leave y numeric and do stratification by quantiles.
-      if (exists('objective', where=param)) {
+      if (exists('objective', where=param) && is.character(param$objective)) {
         # If 'objective' provided in params, assume that y is a classification label
         # unless objective is reg:linear
         if (param[['objective']] != 'reg:linear') y <- factor(y)
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index df7fd5648..793d904cd 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -95,152 +95,160 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
                    prediction = FALSE, showsd = TRUE, metrics=list(), 
                    obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L,
                    early.stop.round = NULL, maximize = NULL, ...) {
-  if (typeof(params) != "list") {
-    stop("xgb.cv: first argument params must be list")
-  }
-  if(!is.null(folds)) {
-    if(class(folds)!="list" | length(folds) < 2) {
-      stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold")
+    if (typeof(params) != "list") {
+        stop("xgb.cv: first argument params must be list")
     }
-    nfold <- length(folds)
-  }
-  if (nfold <= 1) {
-    stop("nfold must be bigger than 1")
-  }
-  if (is.null(missing)) {
-    dtrain <- xgb.get.DMatrix(data, label)
-  } else {
-    dtrain <- xgb.get.DMatrix(data, label, missing)
-  }
-  params <- append(params, list(...))
-  params <- append(params, list(silent=1))
-  for (mc in metrics) {
-    params <- append(params, list("eval_metric"=mc))
-  }
-  
-  # customized objective and evaluation metric interface
-  if (!is.null(params$objective) && !is.null(obj))
-    stop("xgb.cv: cannot assign two different objectives")
-  if (!is.null(params$objective))
-    if (class(params$objective)=='function') {
-      obj = params$objective
-      params$objective = NULL
+    if(!is.null(folds)) {
+        if(class(folds)!="list" | length(folds) < 2) {
+            stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold")
+        }
+        nfold <- length(folds)
     }
-  if (!is.null(params$eval_metric) && !is.null(feval))
-    stop("xgb.cv: cannot assign two different evaluation metrics")
-  if (!is.null(params$eval_metric))
-    if (class(params$eval_metric)=='function') {
-      feval = params$eval_metric
-      params$eval_metric = NULL
+    if (nfold <= 1) {
+        stop("nfold must be bigger than 1")
     }
-  
-  # Early Stopping
-  if (!is.null(early.stop.round)){
-    if (!is.null(feval) && is.null(maximize))
-      stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
-    if (is.null(maximize) && is.null(params$eval_metric))
-      stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
-    if (is.null(maximize))
-    {
-      if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
-        maximize = FALSE
-      } else {
-        maximize = TRUE
-      }
-    }
-    
-    if (maximize) {
-      bestScore = 0
+    if (is.null(missing)) {
+        dtrain <- xgb.get.DMatrix(data, label)
     } else {
-      bestScore = Inf
+        dtrain <- xgb.get.DMatrix(data, label, missing)
+    }
+    dot.params = list(...)
+    nms.params = names(params)
+    nms.dot.params = names(dot.params)
+    if (length(intersect(nms.params,nms.dot.params))>0)
+        stop("Duplicated defined term in parameters. Please check your list of params.")
+    params <- append(params, dot.params)
+    params <- append(params, list(silent=1))
+    for (mc in metrics) {
+        params <- append(params, list("eval_metric"=mc))
     }
-    bestInd = 0
-    earlyStopflag = FALSE
     
-    if (length(metrics)>1)
-      warning('Only the first metric is used for early stopping process.')
-  }
-  
-  xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
-  obj_type = params[['objective']]
-  mat_pred = FALSE
-  if (!is.null(obj_type) && obj_type=='multi:softprob')
-  {
-    num_class = params[['num_class']]
-    if (is.null(num_class))
-      stop('must set num_class to use softmax')
-    predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
-    mat_pred = TRUE
-  }
-  else
-    predictValues <- rep(0,xgb.numrow(dtrain))
-  history <- c()
-  print.every.n = max(as.integer(print.every.n), 1L)
-  for (i in 1:nrounds) {
-    msg <- list()
-    for (k in 1:nfold) {
-      fd <- xgb_folds[[k]]
-      succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
-      if (i<nrounds) {
-          msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
-      } else {
-        if (!prediction) {
-          msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
-        } else {
-          res <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval, prediction)
-          if (mat_pred) {
-            pred_mat = matrix(res[[2]],num_class,length(fd$index))
-            predictValues[fd$index,] <- t(pred_mat)
-          } else {
-            predictValues[fd$index] <- res[[2]]
-          }
-          msg[[k]] <- res[[1]] %>% str_split("\t") %>% .[[1]]
+    # customized objective and evaluation metric interface
+    if (!is.null(params$objective) && !is.null(obj))
+        stop("xgb.cv: cannot assign two different objectives")
+    if (!is.null(params$objective))
+        if (class(params$objective)=='function') {
+            obj = params$objective
+            params[['objective']] = NULL
+        }
+    # if (!is.null(params$eval_metric) && !is.null(feval))
+    #  stop("xgb.cv: cannot assign two different evaluation metrics")
+    if (!is.null(params$eval_metric))
+        if (class(params$eval_metric)=='function') {
+            feval = params$eval_metric
+            params[['eval_metric']] = NULL
         }
-      }
-    }
-    ret <- xgb.cv.aggcv(msg, showsd)
-    history <- c(history, ret)
-    if(verbose)
-      if (0==(i-1L)%%print.every.n)
-        cat(ret, "\n", sep="")
     
-    # early_Stopping
+    # Early Stopping
     if (!is.null(early.stop.round)){
-      score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
-      score = strsplit(score,'\\+|:')[[1]][[2]]
-      score = as.numeric(score)
-      if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
-        bestScore = score
-        bestInd = i
-      } else {
-        if (i-bestInd>=early.stop.round) {
-          earlyStopflag = TRUE
-          cat('Stopping. Best iteration:',bestInd)
-          break
+        if (!is.null(feval) && is.null(maximize))
+            stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
+        if (is.null(maximize) && is.null(params$eval_metric))
+            stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
+        if (is.null(maximize))
+        {
+            if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
+                maximize = FALSE
+            } else {
+                maximize = TRUE
+            }
         }
-      }
+        
+        if (maximize) {
+            bestScore = 0
+        } else {
+            bestScore = Inf
+        }
+        bestInd = 0
+        earlyStopflag = FALSE
+        
+        if (length(metrics)>1)
+            warning('Only the first metric is used for early stopping process.')
     }
     
-  }
-  
-  colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
-  colnamesMean <- paste(colnames, "mean")
-  if(showsd) colnamesStd <- paste(colnames, "std")
-  
-  colnames <- c()
-  if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
-  else colnames <- colnamesMean
-  
-  type <- rep(x = "numeric", times = length(colnames))
-  dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
-  split <- str_split(string = history, pattern = "\t")
-  
-  for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
-  
-  if (prediction) {
-    return(list(dt = dt,pred = predictValues))
-  }
-  return(dt)
+    xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds)
+    obj_type = params[['objective']]
+    mat_pred = FALSE
+    if (!is.null(obj_type) && obj_type=='multi:softprob')
+    {
+        num_class = params[['num_class']]
+        if (is.null(num_class))
+            stop('must set num_class to use softmax')
+        predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
+        mat_pred = TRUE
+    }
+    else
+        predictValues <- rep(0,xgb.numrow(dtrain))
+    history <- c()
+    print.every.n = max(as.integer(print.every.n), 1L)
+    for (i in 1:nrounds) {
+        msg <- list()
+        for (k in 1:nfold) {
+            fd <- xgb_folds[[k]]
+            succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
+            msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
+        }
+        ret <- xgb.cv.aggcv(msg, showsd)
+        history <- c(history, ret)
+        if(verbose)
+            if (0==(i-1L)%%print.every.n)
+                cat(ret, "\n", sep="")
+        
+        # early_Stopping
+        if (!is.null(early.stop.round)){
+            score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
+            score = strsplit(score,'\\+|:')[[1]][[2]]
+            score = as.numeric(score)
+            if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
+                bestScore = score
+                bestInd = i
+            } else {
+                if (i-bestInd>=early.stop.round) {
+                    earlyStopflag = TRUE
+                    cat('Stopping. Best iteration:',bestInd)
+                    break
+                }
+            }
+        }
+        
+    }
+    
+    if (prediction) {
+        for (k in 1:nfold) {
+            fd = xgb_folds[[k]]
+            if (!is.null(early.stop.round) && earlyStopflag) {
+              res = xgb.iter.eval(fd$booster, fd$watchlist, bestInd - 1, feval, prediction)
+            } else {
+              res = xgb.iter.eval(fd$booster, fd$watchlist, nrounds - 1, feval, prediction)
+            }
+            if (mat_pred) {
+                pred_mat = matrix(res[[2]],num_class,length(fd$index))
+                predictValues[fd$index,] = t(pred_mat)
+            } else {
+                predictValues[fd$index] = res[[2]]
+            }
+        }
+    }
+    
+    
+    colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
+    colnamesMean <- paste(colnames, "mean")
+    if(showsd) colnamesStd <- paste(colnames, "std")
+    
+    colnames <- c()
+    if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
+    else colnames <- colnamesMean
+    
+    type <- rep(x = "numeric", times = length(colnames))
+    dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
+    split <- str_split(string = history, pattern = "\t")
+    
+    for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
+    
+    if (prediction) {
+        return(list(dt = dt,pred = predictValues))
+    }
+    return(dt)
 }
 
 # Avoid error messages during CRAN check.
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 0700577f7..fb403143a 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -136,7 +136,13 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
   if (length(watchlist) != 0 && verbose == 0) {
     warning('watchlist is provided but verbose=0, no evaluation information will be printed')
   }
-  params = append(params, list(...))
+  
+  dot.params = list(...)
+  nms.params = names(params)
+  nms.dot.params = names(dot.params)
+  if (length(intersect(nms.params,nms.dot.params))>0)
+    stop("Duplicated term in parameters. Please check your list of params.")
+  params = append(params, dot.params)
   
   # customized objective and evaluation metric interface
   if (!is.null(params$objective) && !is.null(obj))
diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp
index de6ed339f..a8084b206 100644
--- a/R-package/src/xgboost_R.cpp
+++ b/R-package/src/xgboost_R.cpp
@@ -1,9 +1,10 @@
+// Copyright (c) 2014 by Contributors
 #include <vector>
 #include <string>
 #include <utility>
 #include <cstring>
 #include <cstdio>
-#include <sstream> 
+#include <sstream>
 #include "wrapper/xgboost_wrapper.h"
 #include "src/utils/utils.h"
 #include "src/utils/omp.h"
@@ -34,7 +35,7 @@ bool CheckNAN(double v) {
 bool LogGamma(double v) {
   return lgammafn(v);
 }
-} // namespace utils
+}  // namespace utils
 
 namespace random {
 void Seed(unsigned seed) {
@@ -58,25 +59,30 @@ inline void _WrapperEnd(void) {
   PutRNGstate();
 }
 
+// do nothing, check error
+inline void CheckErr(int ret) {
+}
+
 extern "C" {
   SEXP XGCheckNullPtr_R(SEXP handle) {
     return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
   }
-  void _DMatrixFinalizer(SEXP ext) {    
+  void _DMatrixFinalizer(SEXP ext) {
     if (R_ExternalPtrAddr(ext) == NULL) return;
     XGDMatrixFree(R_ExternalPtrAddr(ext));
     R_ClearExternalPtr(ext);
   }
   SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
     _WrapperBegin();
-    void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
+    DMatrixHandle handle;
+    CheckErr(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
     _WrapperEnd();
     SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
     UNPROTECT(1);
     return ret;
   }
-  SEXP XGDMatrixCreateFromMat_R(SEXP mat, 
+  SEXP XGDMatrixCreateFromMat_R(SEXP mat,
                                 SEXP missing) {
     _WrapperBegin();
     SEXP dim = getAttrib(mat, R_DimSymbol);
@@ -90,12 +96,13 @@ extern "C" {
         data[i * ncol +j] = din[i + nrow * j];
       }
     }
-    void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing));
+    DMatrixHandle handle;
+    CheckErr(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle));
     _WrapperEnd();
     SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
     UNPROTECT(1);
-    return ret;    
+    return ret;
   }
   SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
                                 SEXP indices,
@@ -118,8 +125,10 @@ extern "C" {
       indices_[i] = static_cast<unsigned>(p_indices[i]);
       data_[i] = static_cast<float>(p_data[i]);
     }
-    void *handle = XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
-                                          BeginPtr(data_), nindptr, ndata);
+    DMatrixHandle handle;
+    CheckErr(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
+                                    BeginPtr(data_), nindptr, ndata,
+                                    &handle));
     _WrapperEnd();
     SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@@ -133,17 +142,20 @@ extern "C" {
     for (int i = 0; i < len; ++i) {
       idxvec[i] = INTEGER(idxset)[i] - 1;
     }
-    void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len);
+    DMatrixHandle res;
+    CheckErr(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle),
+                                   BeginPtr(idxvec), len,
+                                   &res));
     _WrapperEnd();
     SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
     UNPROTECT(1);
-    return ret;        
+    return ret;
   }
   void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
     _WrapperBegin();
-    XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
-                        CHAR(asChar(fname)), asInteger(silent));
+    CheckErr(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
+                                 CHAR(asChar(fname)), asInteger(silent)));
     _WrapperEnd();
   }
   void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
@@ -152,28 +164,31 @@ extern "C" {
     const char *name = CHAR(asChar(field));
     if (!strcmp("group", name)) {
       std::vector<unsigned> vec(len);
-      #pragma omp parallel for schedule(static)      
+      #pragma omp parallel for schedule(static)
       for (int i = 0; i < len; ++i) {
         vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
       }
-      XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len);
+      CheckErr(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len));
     } else {
       std::vector<float> vec(len);
       #pragma omp parallel for schedule(static)
       for (int i = 0; i < len; ++i) {
         vec[i] = REAL(array)[i];
       }
-      XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), 
-                            CHAR(asChar(field)),
-                            BeginPtr(vec), len);
+      CheckErr(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
+                                     CHAR(asChar(field)),
+                                     BeginPtr(vec), len));
     }
     _WrapperEnd();
   }
   SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
     _WrapperBegin();
     bst_ulong olen;
-    const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
-                                             CHAR(asChar(field)), &olen);
+    const float *res;
+    CheckErr(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
+                                   CHAR(asChar(field)),
+                                   &olen,
+                                   &res));
     _WrapperEnd();
     SEXP ret = PROTECT(allocVector(REALSXP, olen));
     for (size_t i = 0; i < olen; ++i) {
@@ -183,23 +198,25 @@ extern "C" {
     return ret;
   }
   SEXP XGDMatrixNumRow_R(SEXP handle) {
-    bst_ulong nrow = XGDMatrixNumRow(R_ExternalPtrAddr(handle));
+    bst_ulong nrow;
+    CheckErr(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));
     return ScalarInteger(static_cast<int>(nrow));
   }
   // functions related to booster
-  void _BoosterFinalizer(SEXP ext) {    
+  void _BoosterFinalizer(SEXP ext) {
     if (R_ExternalPtrAddr(ext) == NULL) return;
-    XGBoosterFree(R_ExternalPtrAddr(ext));
+    CheckErr(XGBoosterFree(R_ExternalPtrAddr(ext)));
     R_ClearExternalPtr(ext);
   }
   SEXP XGBoosterCreate_R(SEXP dmats) {
     _WrapperBegin();
     int len = length(dmats);
     std::vector<void*> dvec;
-    for (int i = 0; i < len; ++i){
+    for (int i = 0; i < len; ++i) {
       dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
     }
-    void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size());
+    BoosterHandle handle;
+    CheckErr(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle));
     _WrapperEnd();
     SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
     R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
@@ -208,16 +225,16 @@ extern "C" {
   }
   void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
     _WrapperBegin();
-    XGBoosterSetParam(R_ExternalPtrAddr(handle),
-                      CHAR(asChar(name)),
-                      CHAR(asChar(val)));
+    CheckErr(XGBoosterSetParam(R_ExternalPtrAddr(handle),
+                               CHAR(asChar(name)),
+                               CHAR(asChar(val))));
     _WrapperEnd();
   }
   void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
     _WrapperBegin();
-    XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
-                           asInteger(iter),
-                           R_ExternalPtrAddr(dtrain));
+    CheckErr(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
+                                    asInteger(iter),
+                                    R_ExternalPtrAddr(dtrain)));
     _WrapperEnd();
   }
   void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
@@ -230,9 +247,10 @@ extern "C" {
       tgrad[j] = REAL(grad)[j];
       thess[j] = REAL(hess)[j];
     }
-    XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
-                          R_ExternalPtrAddr(dtrain),
-                          BeginPtr(tgrad), BeginPtr(thess), len);
+    CheckErr(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
+                                   R_ExternalPtrAddr(dtrain),
+                                   BeginPtr(tgrad), BeginPtr(thess),
+                                   len));
     _WrapperEnd();
   }
   SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
@@ -249,21 +267,24 @@ extern "C" {
     for (int i = 0; i < len; ++i) {
       vec_sptr.push_back(vec_names[i].c_str());
     }
-    const char *ret =
-        XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
-                             asInteger(iter),
-                             BeginPtr(vec_dmats), BeginPtr(vec_sptr), len);  
+    const char *ret;
+    CheckErr(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
+                                  asInteger(iter),
+                                  BeginPtr(vec_dmats),
+                                  BeginPtr(vec_sptr),
+                                  len, &ret));
     _WrapperEnd();
     return mkString(ret);
   }
   SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
     _WrapperBegin();
     bst_ulong olen;
-    const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
-                                        R_ExternalPtrAddr(dmat),
-                                        asInteger(option_mask),
-                                        asInteger(ntree_limit),
-                                        &olen);
+    const float *res;
+    CheckErr(XGBoosterPredict(R_ExternalPtrAddr(handle),
+                              R_ExternalPtrAddr(dmat),
+                              asInteger(option_mask),
+                              asInteger(ntree_limit),
+                              &olen, &res));
     _WrapperEnd();
     SEXP ret = PROTECT(allocVector(REALSXP, olen));
     for (size_t i = 0; i < olen; ++i) {
@@ -274,15 +295,15 @@ extern "C" {
   }
   void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
     _WrapperBegin();
-    XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
+    CheckErr(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
     _WrapperEnd();
   }
   void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
     _WrapperBegin();
-    XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
+    CheckErr(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
     _WrapperEnd();
   }
-  void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {    
+  void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
     _WrapperBegin();
     XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle),
                                  RAW(raw),
@@ -292,28 +313,29 @@ extern "C" {
   SEXP XGBoosterModelToRaw_R(SEXP handle) {
     bst_ulong olen;
     _WrapperBegin();
-    const char *raw = XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen);
+    const char *raw;
+    CheckErr(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw));
     _WrapperEnd();
     SEXP ret = PROTECT(allocVector(RAWSXP, olen));
     if (olen != 0) {
       memcpy(RAW(ret), raw, olen);
     }
-    UNPROTECT(1);    
+    UNPROTECT(1);
     return ret;
   }
   SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) {
     _WrapperBegin();
     bst_ulong olen;
-    const char **res =
-        XGBoosterDumpModel(R_ExternalPtrAddr(handle),
-                           CHAR(asChar(fmap)),
-                           asInteger(with_stats),
-                           &olen);
+    const char **res;
+    CheckErr(XGBoosterDumpModel(R_ExternalPtrAddr(handle),
+                                CHAR(asChar(fmap)),
+                                asInteger(with_stats),
+                                &olen, &res));
     _WrapperEnd();
-    SEXP out = PROTECT(allocVector(STRSXP, olen));    
-    for (size_t i = 0; i < olen; ++i) {     
+    SEXP out = PROTECT(allocVector(STRSXP, olen));
+    for (size_t i = 0; i < olen; ++i) {
       stringstream stream;
-      stream <<  "booster["<<i<<"]\n" << res[i];
+      stream <<  "booster[" << i <<"]\n" << res[i];
       SET_STRING_ELT(out, i, mkChar(stream.str().c_str()));
     }
     UNPROTECT(1);
diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h
index 58a179192..768b2ced7 100644
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_WRAPPER_R_H_
-#define XGBOOST_WRAPPER_R_H_
 /*!
+ * Copyright 2014 (c) by Contributors
  * \file xgboost_wrapper_R.h
  * \author Tianqi Chen
  * \brief R wrapper of xgboost
  */
+#ifndef XGBOOST_WRAPPER_R_H_ // NOLINT(*)
+#define XGBOOST_WRAPPER_R_H_ // NOLINT(*)
+
 extern "C" {
 #include <Rinternals.h>
 #include <R_ext/Random.h>
@@ -19,7 +21,7 @@ extern "C" {
    */
   SEXP XGCheckNullPtr_R(SEXP handle);
   /*!
-   * \brief load a data matrix 
+   * \brief load a data matrix
    * \param fname name of the content
    * \param silent whether print messages
    * \return a loaded data matrix
@@ -32,9 +34,9 @@ extern "C" {
    * \param missing which value to represent missing value
    * \return created dmatrix
    */
-  SEXP XGDMatrixCreateFromMat_R(SEXP mat, 
+  SEXP XGDMatrixCreateFromMat_R(SEXP mat,
                                 SEXP missing);
-  /*! 
+  /*!
    * \brief create a matrix content from CSC format
    * \param indptr pointer to column headers
    * \param indices row indices
@@ -70,26 +72,26 @@ extern "C" {
    * \param handle a instance of data matrix
    * \param field field name
    * \return info vector
-   */  
+   */
   SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
   /*!
    * \brief return number of rows
    * \param handle a instance of data matrix
    */
   SEXP XGDMatrixNumRow_R(SEXP handle);
-  /*! 
-   * \brief create xgboost learner 
+  /*!
+   * \brief create xgboost learner
    * \param dmats a list of dmatrix handles that will be cached
-   */  
+   */
   SEXP XGBoosterCreate_R(SEXP dmats);
-  /*! 
-   * \brief set parameters 
+  /*!
+   * \brief set parameters
    * \param handle handle
    * \param name  parameter name
    * \param val value of parameter
    */
   void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
-  /*! 
+  /*!
    * \brief update the model in one round using dtrain
    * \param handle handle
    * \param iter current iteration rounds
@@ -132,12 +134,12 @@ extern "C" {
    * \brief save model into existing file
    * \param handle handle
    * \param fname file name
-   */    
+   */
   void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
   /*!
    * \brief load model from raw array
    * \param handle handle
-   */    
+   */
   void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw);
   /*!
    * \brief save model into R's raw array
@@ -153,4 +155,4 @@ extern "C" {
    */
   SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats);
 }
-#endif  // XGBOOST_WRAPPER_R_H_
+#endif  // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
diff --git a/R-package/src/xgboost_assert.c b/R-package/src/xgboost_assert.c
index 20b789492..072074243 100644
--- a/R-package/src/xgboost_assert.c
+++ b/R-package/src/xgboost_assert.c
@@ -1,3 +1,4 @@
+// Copyright (c) 2014 by Contributors
 #include <stdio.h>
 #include <stdarg.h>
 #include <Rinternals.h>
@@ -6,17 +7,17 @@
 void XGBoostAssert_R(int exp, const char *fmt, ...) {
   char buf[1024];
   if (exp == 0) {
-    va_list args;  
+    va_list args;
     va_start(args, fmt);
     vsprintf(buf, fmt, args);
     va_end(args);
     error("AssertError:%s\n", buf);
-  }  
+  }
 }
 void XGBoostCheck_R(int exp, const char *fmt, ...) {
   char buf[1024];
   if (exp == 0) {
-    va_list args;  
+    va_list args;
     va_start(args, fmt);
     vsprintf(buf, fmt, args);
     va_end(args);
@@ -25,7 +26,7 @@ void XGBoostCheck_R(int exp, const char *fmt, ...) {
 }
 int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) {
   int ret;
-  va_list args;  
+  va_list args;
   va_start(args, fmt);
   ret = vsnprintf(buf, size, fmt, args);
   va_end(args);
diff --git a/README.md b/README.md
index 415bf771b..cdd4c02f7 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
-XGBoost: eXtreme Gradient Boosting 
+XGBoost: eXtreme Gradient Boosting
 ==================================
 
+[![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost)
+
 An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version.
 It implements machine learning algorithm under gradient boosting framework, including generalized linear model and gradient boosted regression tree (GBDT). XGBoost can also be distributed and scale to Terascale data
 
diff --git a/demo/binary_classification/README.md b/demo/binary_classification/README.md
index 02c06e550..8d1e5e2a5 100644
--- a/demo/binary_classification/README.md
+++ b/demo/binary_classification/README.md
@@ -147,7 +147,7 @@ Run the command again, we can find the log file becomes
 ```
 The rule is eval[name-printed-in-log] = filename, then the file will be added to monitoring process, and evaluated each round.
 
-xgboost also support monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes
+xgboost also supports monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes
 ```
 [0]     test-error:0.016139     test-negllik:0.029795   trainname-error:0.014433        trainname-negllik:0.027023
 [1]     test-error:0.000000     test-negllik:0.000000   trainname-error:0.001228        trainname-negllik:0.002457
@@ -166,7 +166,7 @@ When you are working with a large dataset, you may want to take advantage of par
 
 #### Additional Notes
 * What are ```agaricus.txt.test.buffer``` and ```agaricus.txt.train.buffer``` generated during runexp.sh? 
-  - By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. When next time you run xgboost, it detects i
-Demonstrating how to use XGBoost accomplish binary classification tasks  on UCI mushroom dataset  http://archive.ics.uci.edu/ml/datasets/Mushroom
+  - By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. Next time when you run xgboost, it will detects these binary files.
+
 
 
diff --git a/doc/README.md b/doc/README.md
index d9884c682..371e18f21 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -29,3 +29,7 @@ This section is about blogposts, presentation and videos discussing how to use x
 Contribution
 ====
 Contribution of documents and use-cases are welcomed!
+* This package use Google C++ style
+* Check tool of codestyle
+  - clone https://github.com/dmlc/dmlc-core into root directory
+  - type ```make lint``` and fix possible errors.
diff --git a/doc/build.md b/doc/build.md
index a5261b604..f9a626603 100644
--- a/doc/build.md
+++ b/doc/build.md
@@ -17,13 +17,15 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost
 
 1. Obtain gcc with openmp support by `brew install gcc --without-multilib` **or** clang with openmp by `brew install clang-omp`. The clang one is recommended because the first method requires us compiling gcc inside the machine (more than an hour in mine)! (BTW, `brew` is the de facto standard of `apt-get` on OS X. So installing [HPC](http://hpc.sourceforge.net/) separately is not recommended, but it should work.)
 
-2. **if plaing to use clang-omp** in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to 
+2. **if you are planing to use clang-omp** - in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to 
 
   ```C++
   #include <libiomp/omp.h> /* instead of #include <omp.h> */` 
   ```
 
-  to make it work, otherwise the following steps would show `src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...`
+  to make it work, otherwise you might get this error 
+  
+  `src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...`
 
 
 
@@ -47,7 +49,7 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost
 
 4. Set the `Makevars` file in highest piority for R. 
 
-  The point is, there are three `Makevars` inside the machine: `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by runing `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!).
+  The point is, there are three `Makevars` : `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by running `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!).
 
   So, **add** or **change** `~/.R/Makevars` to the following lines:
 
diff --git a/scripts/travis_R_script.sh b/scripts/travis_R_script.sh
new file mode 100755
index 000000000..5a9ea7528
--- /dev/null
+++ b/scripts/travis_R_script.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Test R package of xgboost
+set -e
+export _R_CHECK_TIMINGS_=0
+export R_BUILD_ARGS="--no-build-vignettes --no-manual"
+export R_CHECK_ARGS="--no-vignettes --no-manual"
+
+curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
+chmod 755 ./travis-tool.sh
+./travis-tool.sh bootstrap
+make Rpack
+cd ./xgboost
+../travis-tool.sh install_deps
+../travis-tool.sh run_tests
\ No newline at end of file
diff --git a/scripts/travis_after_failure.sh b/scripts/travis_after_failure.sh
new file mode 100755
index 000000000..230f3348c
--- /dev/null
+++ b/scripts/travis_after_failure.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+if [ ${TASK} == "R-package" ]; then
+    cat R-package/xgboost.Rcheck/00install.out
+fi
\ No newline at end of file
diff --git a/scripts/travis_script.sh b/scripts/travis_script.sh
new file mode 100755
index 000000000..b90f8d3ee
--- /dev/null
+++ b/scripts/travis_script.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# main script of travis
+if [ ${TASK} == "lint" ]; then
+    make lint  || exit -1
+fi
+
+if [ ${TASK} == "build" ]; then
+    make all CXX=${CXX} || exit -1
+fi
+
+if [ ${TASK} == "build-with-dmlc" ]; then
+    cd dmlc-core
+    cp make/config.mk .
+    echo "USE_S3=1" >> config.mk
+    make all CXX=${CXX}|| exit -1
+    cd ..
+    make dmlc=dmlc-core CXX=${CXX} || exit -1
+fi
+
+if [ ${TASK} == "R-package" ]; then
+    scripts/travis_R_script.sh || exit -1
+fi
+
+if [ ${TASK} == "python-package" ]; then
+    make all CXX=${CXX} || exit -1
+    nosetests tests/python || exit -1
+fi
diff --git a/src/data.h b/src/data.h
index 63dd2d78f..3c4a14987 100644
--- a/src/data.h
+++ b/src/data.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_DATA_H
-#define XGBOOST_DATA_H
 /*!
+ * Copyright (c) 2014 by Contributors
  * \file data.h
  * \brief the input data structure for gradient boosting
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_DATA_H_
+#define XGBOOST_DATA_H_
+
 #include <cstdio>
 #include <vector>
 #include "utils/utils.h"
@@ -32,7 +34,7 @@ struct bst_gpair {
   bst_gpair(bst_float grad, bst_float hess) : grad(grad), hess(hess) {}
 };
 
-/*! 
+/*!
  * \brief extra information that might needed by gbm and tree module
  * these information are not necessarily presented, and can be empty
  */
@@ -102,7 +104,7 @@ struct RowBatch : public SparseBatch {
     return Inst(data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i+1] - ind_ptr[i]));
   }
 };
-/*! 
+/*!
  * \brief read-only column batch, used to access columns,
  * the columns are not required to be continuous
  */
@@ -131,7 +133,7 @@ class IFMatrix {
   /*!\brief get column iterator */
   virtual utils::IIterator<ColBatch> *ColIterator(void) = 0;
   /*!
-   * \brief get the column iterator associated with FMatrix with subset of column features 
+   * \brief get the column iterator associated with FMatrix with subset of column features
    * \param fset is the list of column index set that must be contained in the returning Column iterator
    * \return the column iterator, initialized so that it reads the elements in fset
    */
@@ -154,11 +156,11 @@ class IFMatrix {
   /*! \brief get number of non-missing entries in column */
   virtual size_t GetColSize(size_t cidx) const = 0;
   /*! \brief get column density */
-  virtual float GetColDensity(size_t cidx) const = 0;  
+  virtual float GetColDensity(size_t cidx) const = 0;
   /*! \brief reference of buffered rowset */
   virtual const std::vector<bst_uint> &buffered_rowset(void) const = 0;
   // virtual destructor
   virtual ~IFMatrix(void){}
 };
 }  // namespace xgboost
-#endif  // XGBOOST_DATA_H
+#endif  // XGBOOST_DATA_H_
diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp
index 3d2f36f5f..17d90e556 100644
--- a/src/gbm/gblinear-inl.hpp
+++ b/src/gbm/gblinear-inl.hpp
@@ -1,11 +1,13 @@
-#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_
-#define XGBOOST_GBM_GBLINEAR_INL_HPP_
 /*!
+ * Copyright by Contributors
  * \file gblinear-inl.hpp
  * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
  *        the update rule is parallel coordinate descent (shotgun)
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_
+#define XGBOOST_GBM_GBLINEAR_INL_HPP_
+
 #include <vector>
 #include <string>
 #include <sstream>
@@ -33,10 +35,10 @@ class GBLinear : public IGradBooster {
       model.param.SetParam(name, val);
     }
   }
-  virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) {
+  virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*)
     model.LoadModel(fi);
   }
-  virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
+  virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
     model.SaveModel(fo);
   }
   virtual void InitModel(void) {
@@ -92,7 +94,8 @@ class GBLinear : public IGradBooster {
             sum_hess += p.hess * v * v;
           }
           float &w = model[fid][gid];
-          bst_float dw = static_cast<bst_float>(param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w));
+          bst_float dw = static_cast<bst_float>(param.learning_rate *
+                                                param.CalcDelta(sum_grad, sum_hess, w));
           w += dw;
           // update grad value
           for (bst_uint j = 0; j < col.length; ++j) {
@@ -258,12 +261,12 @@ class GBLinear : public IGradBooster {
       std::fill(weight.begin(), weight.end(), 0.0f);
     }
     // save the model to file
-    inline void SaveModel(utils::IStream &fo) const {
+    inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
       fo.Write(&param, sizeof(Param));
       fo.Write(weight);
     }
     // load model from file
-    inline void LoadModel(utils::IStream &fi) {
+    inline void LoadModel(utils::IStream &fi) { // NOLINT(*)
       utils::Assert(fi.Read(&param, sizeof(Param)) != 0, "Load LinearBooster");
       fi.Read(&weight);
     }
diff --git a/src/gbm/gbm.cpp b/src/gbm/gbm.cpp
index fe8d778e4..13ad44c57 100644
--- a/src/gbm/gbm.cpp
+++ b/src/gbm/gbm.cpp
@@ -1,3 +1,4 @@
+// Copyright by Contributors
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #define NOMINMAX
diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h
index f07d277ac..60b7474e1 100644
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -1,11 +1,14 @@
-#ifndef XGBOOST_GBM_GBM_H_
-#define XGBOOST_GBM_GBM_H_
 /*!
+ * Copyright by Contributors
  * \file gbm.h
  * \brief interface of gradient booster, that learns through gradient statistics
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_GBM_GBM_H_
+#define XGBOOST_GBM_GBM_H_
+
 #include <vector>
+#include <string>
 #include "../data.h"
 #include "../utils/io.h"
 #include "../utils/fmap.h"
@@ -13,7 +16,7 @@
 namespace xgboost {
 /*! \brief namespace for gradient booster */
 namespace gbm {
-/*! 
+/*!
  * \brief interface of gradient boosting model
  */
 class IGradBooster {
@@ -29,26 +32,26 @@ class IGradBooster {
    * \param fi input stream
    * \param with_pbuffer whether the incoming data contains pbuffer
    */
-  virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0;
+  virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0; // NOLINT(*)
   /*!
    * \brief save model to stream
    * \param fo output stream
    * \param with_pbuffer whether save out pbuffer
    */
-  virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0;
+  virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0; // NOLINT(*)
   /*!
    * \brief initialize the model
    */
   virtual void InitModel(void) = 0;
-  /*! 
+  /*!
    * \brief reset the predict buffer
    * this will invalidate all the previous cached results
    * and recalculate from scratch
    */
   virtual void ResetPredBuffer(size_t num_pbuffer) {}
-  /*! 
+  /*!
    * \brief whether the model allow lazy checkpoint
-   * return true if model is only updated in DoBoost 
+   * return true if model is only updated in DoBoost
    * after all Allreduce calls
    */
   virtual bool AllowLazyCheckPoint(void) const {
@@ -76,20 +79,20 @@ class IGradBooster {
    *  the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
    * \param info extra side information that may be needed for prediction
    * \param out_preds output vector to hold the predictions
-   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means 
+   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
    *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
    */
   virtual void Predict(IFMatrix *p_fmat,
                        int64_t buffer_offset,
                        const BoosterInfo &info,
                        std::vector<float> *out_preds,
-                       unsigned ntree_limit = 0) = 0;  
+                       unsigned ntree_limit = 0) = 0;
   /*!
    * \brief online prediction funciton, predict score for one instance at a time
    *  NOTE: use the batch prediction interface if possible, batch prediction is usually
    *        more efficient than online prediction
    *        This function is NOT threadsafe, make sure you only call from one thread
-   *    
+   *
    * \param inst the instance you want to predict
    * \param out_preds output vector to hold the predictions
    * \param ntree_limit limit the number of trees used in prediction
@@ -106,7 +109,7 @@ class IGradBooster {
    * \param p_fmat feature matrix
    * \param info extra side information that may be needed for prediction
    * \param out_preds output vector to hold the predictions
-   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means 
+   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
    *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
    */
   virtual void PredictLeaf(IFMatrix *p_fmat,
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index c868c302a..9335ef8e7 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -1,13 +1,16 @@
-#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
-#define XGBOOST_GBM_GBTREE_INL_HPP_
 /*!
+ * Copyright by Contributors
  * \file gbtree-inl.hpp
  * \brief gradient boosted tree implementation
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
+#define XGBOOST_GBM_GBTREE_INL_HPP_
+
 #include <vector>
 #include <utility>
 #include <string>
+#include <limits>
 #include "./gbm.h"
 #include "../utils/omp.h"
 #include "../tree/updater.h"
@@ -39,7 +42,7 @@ class GBTree : public IGradBooster {
     tparam.SetParam(name, val);
     if (trees.size() == 0) mparam.SetParam(name, val);
   }
-  virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) {
+  virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*)
     this->Clear();
     utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
                  "GBTree: invalid model file");
@@ -62,10 +65,10 @@ class GBTree : public IGradBooster {
                    "GBTree: invalid model file");
     }
   }
-  virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
+  virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
     utils::Assert(mparam.num_trees == static_cast<int>(trees.size()), "GBTree");
     if (with_pbuffer) {
-      fo.Write(&mparam, sizeof(ModelParam));      
+      fo.Write(&mparam, sizeof(ModelParam));
     } else {
       ModelParam p = mparam;
       p.num_pbuffer = 0;
@@ -129,7 +132,7 @@ class GBTree : public IGradBooster {
                        int64_t buffer_offset,
                        const BoosterInfo &info,
                        std::vector<float> *out_preds,
-                       unsigned ntree_limit = 0) {    
+                       unsigned ntree_limit = 0) {
     int nthread;
     #pragma omp parallel
     {
@@ -160,12 +163,12 @@ class GBTree : public IGradBooster {
           this->Pred(batch[i],
                      buffer_offset < 0 ? -1 : buffer_offset + ridx,
                      gid, info.GetRoot(ridx), &feats,
-                     &preds[ridx * mparam.num_output_group + gid], stride, 
+                     &preds[ridx * mparam.num_output_group + gid], stride,
                      ntree_limit);
         }
       }
     }
-  }  
+  }
   virtual void Predict(const SparseBatch::Inst &inst,
                        std::vector<float> *out_preds,
                        unsigned ntree_limit,
@@ -178,10 +181,10 @@ class GBTree : public IGradBooster {
     // loop over output groups
     for (int gid = 0; gid < mparam.num_output_group; ++gid) {
       this->Pred(inst, -1, gid, root_index, &thread_temp[0],
-                 &(*out_preds)[gid], mparam.num_output_group, 
+                 &(*out_preds)[gid], mparam.num_output_group,
                  ntree_limit);
     }
-  }  
+  }
   virtual void PredictLeaf(IFMatrix *p_fmat,
                            const BoosterInfo &info,
                            std::vector<float> *out_preds,
@@ -196,7 +199,6 @@ class GBTree : public IGradBooster {
       thread_temp[i].Init(mparam.num_feature);
     }
     this->PredPath(p_fmat, info, out_preds, ntree_limit);
-    
   }
   virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
     std::vector<std::string> dump;
@@ -260,7 +262,7 @@ class GBTree : public IGradBooster {
     // update the trees
     for (size_t i = 0; i < updaters.size(); ++i) {
       updaters[i]->Update(gpair, p_fmat, info, new_trees);
-    }    
+    }
     // optimization, update buffer, if possible
     // this is only under distributed column mode
     // for safety check of lazy checkpoint
@@ -287,7 +289,7 @@ class GBTree : public IGradBooster {
   }
   // update buffer by pre-cached position
   inline void UpdateBufferByPosition(IFMatrix *p_fmat,
-                                     int64_t buffer_offset, 
+                                     int64_t buffer_offset,
                                      int bst_group,
                                      const tree::RegTree &new_tree,
                                      const int* leaf_position) {
@@ -313,11 +315,11 @@ class GBTree : public IGradBooster {
                    int bst_group,
                    unsigned root_index,
                    tree::RegTree::FVec *p_feats,
-                   float *out_pred, size_t stride, 
+                   float *out_pred, size_t stride,
                    unsigned ntree_limit) {
     size_t itop = 0;
     float  psum = 0.0f;
-    // sum of leaf vector 
+    // sum of leaf vector
     std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
     const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
     // number of valid trees
@@ -339,7 +341,7 @@ class GBTree : public IGradBooster {
           for (int j = 0; j < mparam.size_leaf_vector; ++j) {
             vec_psum[j] += trees[i]->leafvec(tid)[j];
           }
-          if(--treeleft == 0) break;
+          if (--treeleft == 0) break;
         }
       }
       p_feats->Drop(inst);
@@ -365,7 +367,7 @@ class GBTree : public IGradBooster {
     // number of valid trees
     if (ntree_limit == 0 || ntree_limit > trees.size()) {
       ntree_limit = static_cast<unsigned>(trees.size());
-    } 
+    }
     std::vector<float> &preds = *out_preds;
     preds.resize(info.num_row * ntree_limit);
     // start collecting the prediction
@@ -389,7 +391,7 @@ class GBTree : public IGradBooster {
       }
     }
   }
-                       
+
   // --- data structure ---
   /*! \brief training parameters */
   struct TrainParam {
@@ -442,10 +444,10 @@ class GBTree : public IGradBooster {
     int num_feature;
     /*! \brief size of predicton buffer allocated used for buffering */
     int64_t num_pbuffer;
-    /*! 
+    /*!
      * \brief how many output group a single instance can produce
      *  this affects the behavior of number of output we have:
-     *    suppose we have n instance and k group, output will be k*n 
+     *    suppose we have n instance and k group, output will be k*n
      */
     int num_output_group;
     /*! \brief size of leaf vector needed in tree */
@@ -478,8 +480,8 @@ class GBTree : public IGradBooster {
     inline size_t PredBufferSize(void) const {
       return num_output_group * num_pbuffer * (size_leaf_vector + 1);
     }
-    /*! 
-     * \brief get the buffer offset given a buffer index and group id  
+    /*!
+     * \brief get the buffer offset given a buffer index and group id
      * \return calculated buffer offset
      */
     inline int64_t BufferOffset(int64_t buffer_index, int bst_group) const {
diff --git a/src/io/dmlc_simple.cpp b/src/io/dmlc_simple.cpp
index 065877a19..3fbf34734 100644
--- a/src/io/dmlc_simple.cpp
+++ b/src/io/dmlc_simple.cpp
@@ -1,6 +1,8 @@
+// Copyright by Contributors
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #define NOMINMAX
+#include <string>
 #include "../utils/io.h"
 
 // implements a single no split version of DMLC
@@ -9,7 +11,7 @@
 namespace xgboost {
 namespace utils {
 /*!
- * \brief line split implementation from single FILE 
+ * \brief line split implementation from single FILE
  * simply returns lines of files, used for stdin
  */
 class SingleFileSplit : public dmlc::InputSplit {
@@ -32,7 +34,7 @@ class SingleFileSplit : public dmlc::InputSplit {
   }
   virtual size_t Read(void *ptr, size_t size) {
     return std::fread(ptr, 1, size, fp_);
-  }  
+  }
   virtual void Write(const void *ptr, size_t size) {
     utils::Error("cannot do write in inputsplit");
   }
@@ -47,13 +49,13 @@ class SingleFileSplit : public dmlc::InputSplit {
                                 chunk_end_);
     out_rec->dptr = chunk_begin_;
     out_rec->size = next - chunk_begin_;
-    chunk_begin_ = next;    
+    chunk_begin_ = next;
     return true;
   }
   virtual bool NextChunk(Blob *out_chunk) {
     if (chunk_begin_ == chunk_end_) {
       if (!LoadChunk()) return false;
-    }    
+    }
     out_chunk->dptr = chunk_begin_;
     out_chunk->size = chunk_end_ - chunk_begin_;
     chunk_begin_ = chunk_end_;
@@ -64,8 +66,8 @@ class SingleFileSplit : public dmlc::InputSplit {
     if (max_size <= overflow_.length()) {
       *size = 0; return true;
     }
-    if (overflow_.length() != 0) { 
-      std::memcpy(buf, BeginPtr(overflow_), overflow_.length());  
+    if (overflow_.length() != 0) {
+      std::memcpy(buf, BeginPtr(overflow_), overflow_.length());
     }
     size_t olen = overflow_.length();
     overflow_.resize(0);
@@ -88,13 +90,13 @@ class SingleFileSplit : public dmlc::InputSplit {
       return true;
     }
   }
-  
+
  protected:
   inline const char* FindLastRecordBegin(const char *begin,
                                          const char *end) {
     if (begin == end) return begin;
     for (const char *p = end - 1; p != begin; --p) {
-      if (*p == '\n' || *p == '\r') return p + 1; 
+      if (*p == '\n' || *p == '\r') return p + 1;
     }
     return begin;
   }
@@ -143,7 +145,7 @@ class StdFile : public dmlc::Stream {
  public:
   explicit StdFile(std::FILE *fp, bool use_stdio)
       : fp(fp), use_stdio(use_stdio) {
-  }  
+  }
   virtual ~StdFile(void) {
     this->Close();
   }
@@ -154,7 +156,7 @@ class StdFile : public dmlc::Stream {
     std::fwrite(ptr, size, 1, fp);
   }
   virtual void Seek(size_t pos) {
-    std::fseek(fp, static_cast<long>(pos), SEEK_SET);
+    std::fseek(fp, static_cast<long>(pos), SEEK_SET);  // NOLINT(*)
   }
   virtual size_t Tell(void) {
     return std::ftell(fp);
@@ -197,7 +199,7 @@ Stream *Stream::Create(const char *fname, const char * const mode, bool allow_nu
       "to use hdfs, s3 or distributed version, compile with make dmlc=1";
   utils::Check(strncmp(fname, "s3://", 5) != 0, msg);
   utils::Check(strncmp(fname, "hdfs://", 7) != 0, msg);
-  
+
   std::FILE *fp = NULL;
   bool use_stdio = false;
   using namespace std;
diff --git a/src/io/io.cpp b/src/io/io.cpp
index dd4336170..b3713f0c5 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -1,3 +1,4 @@
+// Copyright 2014 by Contributors
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #define NOMINMAX
@@ -17,7 +18,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
                            const char *cache_file) {
   using namespace std;
   std::string fname_ = fname;
-  
+
   const char *dlm = strchr(fname, '#');
   if (dlm != NULL) {
     utils::Check(strchr(dlm + 1, '#') == NULL,
@@ -29,7 +30,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
     cache_file = dlm +1;
   }
 
-  if (cache_file == NULL) { 
+  if (cache_file == NULL) {
     if (!std::strcmp(fname, "stdin") ||
         !std::strncmp(fname, "s3://", 5) ||
         !std::strncmp(fname, "hdfs://", 7) ||
@@ -42,7 +43,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
     utils::FileStream fs(utils::FopenCheck(fname, "rb"));
     utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
     fs.Seek(0);
-    if (magic == DMatrixSimple::kMagic) { 
+    if (magic == DMatrixSimple::kMagic) {
       DMatrixSimple *dmat = new DMatrixSimple();
       dmat->LoadBinary(fs, silent, fname);
       fs.Close();
@@ -81,7 +82,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
   }
 }
 
-void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {  
+void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
   if (dmat.magic == DMatrixSimple::kMagic) {
     const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
     p_dmat->SaveBinary(fname, silent);
diff --git a/src/io/io.h b/src/io/io.h
index ed075977c..267bb0bff 100644
--- a/src/io/io.h
+++ b/src/io/io.h
@@ -1,11 +1,13 @@
-#ifndef XGBOOST_IO_IO_H_
-#define XGBOOST_IO_IO_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file io.h
  * \brief handles input data format of xgboost
  *    I/O module handles a specific DMatrix format
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_IO_IO_H_
+#define XGBOOST_IO_IO_H_
+
 #include "../data.h"
 #include "../learner/dmatrix.h"
 
@@ -32,7 +34,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
                            bool loadsplit,
                            const char *cache_file = NULL);
 /*!
- * \brief save DataMatrix into stream, 
+ * \brief save DataMatrix into stream,
  *  note: the saved dmatrix format may not be in exactly same as input
  *  SaveDMatrix will choose the best way to materialize the dmatrix.
  * \param dmat the dmatrix to be saved
@@ -40,7 +42,6 @@ DataMatrix* LoadDataMatrix(const char *fname,
  * \param silent whether print message during saving
  */
 void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false);
-
 }  // namespace io
 }  // namespace xgboost
 #endif  // XGBOOST_IO_IO_H_
diff --git a/src/io/libsvm_parser.h b/src/io/libsvm_parser.h
index 0e69d0467..92eeaf35d 100644
--- a/src/io/libsvm_parser.h
+++ b/src/io/libsvm_parser.h
@@ -22,7 +22,7 @@ namespace io {
 /*! \brief page returned by libsvm parser */
 struct LibSVMPage : public SparsePage {
   std::vector<float> label;
-  // overload clear  
+  // overload clear
   inline void Clear() {
     SparsePage::Clear();
     label.clear();
@@ -35,7 +35,7 @@ struct LibSVMPage : public SparsePage {
  */
 class LibSVMPageFactory  {
  public:
-  explicit LibSVMPageFactory() 
+  LibSVMPageFactory()
       : bytes_read_(0), at_head_(true) {
   }
   inline bool Init(void) {
@@ -85,7 +85,7 @@ class LibSVMPageFactory  {
     data->resize(nthread);
     bytes_read_ += chunk.size;
     utils::Assert(chunk.size != 0, "LibSVMParser.FileData");
-    char *head = reinterpret_cast<char*>(chunk.dptr);        
+    char *head = reinterpret_cast<char*>(chunk.dptr);
     #pragma omp parallel num_threads(nthread_)
     {
       // threadid
@@ -150,7 +150,7 @@ class LibSVMPageFactory  {
     }
     return begin;
   }
-  
+
  private:
   // nthread
   int nthread_;
@@ -199,12 +199,13 @@ class LibSVMParser : public utils::IIterator<LibSVMPage> {
   inline size_t bytes_read(void) const {
     return itr.get_factory().bytes_read();
   }
+
  private:
   bool at_end_;
   size_t data_ptr_;
   std::vector<LibSVMPage> *data_;
   utils::ThreadBuffer<std::vector<LibSVMPage>*, LibSVMPageFactory> itr;
-}; 
+};
 
 }  // namespace io
 }  // namespace xgboost
diff --git a/src/io/page_dmatrix-inl.hpp b/src/io/page_dmatrix-inl.hpp
index 79455d130..3012af564 100644
--- a/src/io/page_dmatrix-inl.hpp
+++ b/src/io/page_dmatrix-inl.hpp
@@ -1,11 +1,15 @@
-#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
-#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
 /*!
+ *  Copyright (c) 2014 by Contributors
  * \file page_dmatrix-inl.hpp
  *   row iterator based on sparse page
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
+#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
+
 #include <vector>
+#include <string>
+#include <algorithm>
 #include "../data.h"
 #include "../utils/iterator.h"
 #include "../utils/thread_buffer.h"
@@ -94,12 +98,12 @@ class DMatrixPageBase : public DataMatrix {
     fbin.Close();
     if (!silent) {
       utils::Printf("DMatrixPage: %lux%lu is saved to %s\n",
-                    static_cast<unsigned long>(mat.info.num_row()),
-                    static_cast<unsigned long>(mat.info.num_col()), fname_);
+                    static_cast<unsigned long>(mat.info.num_row()), // NOLINT(*)
+                    static_cast<unsigned long>(mat.info.num_col()), fname_); // NOLINT(*)
     }
   }
   /*! \brief load and initialize the iterator with fi */
-  inline void LoadBinary(utils::FileStream &fi,
+  inline void LoadBinary(utils::FileStream &fi,  // NOLINT(*)
                          bool silent,
                          const char *fname_) {
     this->set_cache_file(fname_);
@@ -114,8 +118,8 @@ class DMatrixPageBase : public DataMatrix {
     iter_->Load(fs);
     if (!silent) {
       utils::Printf("DMatrixPage: %lux%lu matrix is loaded",
-                    static_cast<unsigned long>(info.num_row()),
-                    static_cast<unsigned long>(info.num_col()));
+                    static_cast<unsigned long>(info.num_row()),  // NOLINT(*)
+                    static_cast<unsigned long>(info.num_col()));  // NOLINT(*)
       if (fname_ != NULL) {
         utils::Printf(" from %s\n", fname_);
       } else {
@@ -141,7 +145,7 @@ class DMatrixPageBase : public DataMatrix {
     }
     this->set_cache_file(cache_file);
     std::string fname_row = std::string(cache_file) + ".row.blob";
-    utils::FileStream fo(utils::FopenCheck(fname_row.c_str(), "wb"));    
+    utils::FileStream fo(utils::FopenCheck(fname_row.c_str(), "wb"));
     SparsePage page;
     size_t bytes_write = 0;
     double tstart = rabit::utils::GetTime();
@@ -178,8 +182,8 @@ class DMatrixPageBase : public DataMatrix {
     if (page.data.size() != 0) {
       page.Save(&fo);
     }
-    fo.Close();    
-    iter_->Load(utils::FileStream(utils::FopenCheck(fname_row.c_str(), "rb")));    
+    fo.Close();
+    iter_->Load(utils::FileStream(utils::FopenCheck(fname_row.c_str(), "rb")));
     // save data matrix
     utils::FileStream fs(utils::FopenCheck(cache_file, "wb"));
     int tmagic = kMagic;
@@ -188,8 +192,8 @@ class DMatrixPageBase : public DataMatrix {
     fs.Close();
     if (!silent) {
       utils::Printf("DMatrixPage: %lux%lu is parsed from %s\n",
-                    static_cast<unsigned long>(info.num_row()),
-                    static_cast<unsigned long>(info.num_col()),
+                    static_cast<unsigned long>(info.num_row()),  // NOLINT(*)
+                    static_cast<unsigned long>(info.num_col()),  // NOLINT(*)
                     uri);
     }
   }
@@ -241,12 +245,12 @@ class DMatrixHalfRAM : public DMatrixPageBase<0xffffab03> {
   virtual IFMatrix *fmat(void) const {
     return fmat_;
   }
-  virtual void set_cache_file(const std::string &cache_file) {    
+  virtual void set_cache_file(const std::string &cache_file) {
   }
   virtual void CheckMagic(int tmagic) {
     utils::Check(tmagic == DMatrixPageBase<0xffffab02>::kMagic ||
                  tmagic == DMatrixPageBase<0xffffab03>::kMagic,
-                 "invalid format,magic number mismatch");   
+                 "invalid format,magic number mismatch");
   }
   /*! \brief the real fmatrix */
   IFMatrix *fmat_;
diff --git a/src/io/page_fmatrix-inl.hpp b/src/io/page_fmatrix-inl.hpp
index 18f4c6dee..2aaec5b19 100644
--- a/src/io/page_fmatrix-inl.hpp
+++ b/src/io/page_fmatrix-inl.hpp
@@ -1,10 +1,16 @@
-#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
-#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
 /*!
+ * Copyright (c) 2014 by Contributors
  * \file page_fmatrix-inl.hpp
  *   col iterator based on sparse page
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
+#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
+
+#include <vector>
+#include <string>
+#include <algorithm>
+
 namespace xgboost {
 namespace io {
 /*! \brief thread buffer iterator */
@@ -42,9 +48,9 @@ class ThreadColPageIterator: public utils::IIterator<ColBatch> {
   }
   // set index set
   inline void SetIndexSet(const std::vector<bst_uint> &fset, bool load_all) {
-    itr.get_factory().SetIndexSet(fset, load_all);    
+    itr.get_factory().SetIndexSet(fset, load_all);
   }
-  
+
  private:
   // output data
   ColBatch out_;
@@ -96,7 +102,7 @@ struct ColConvertFactory {
         return true;
       }
     }
-    if (tmp_.Size() != 0){
+    if (tmp_.Size() != 0) {
         this->MakeColPage(tmp_, BeginPtr(*buffered_rowset_) + btop,
                           *enabled_, val);
         return true;
@@ -104,7 +110,7 @@ struct ColConvertFactory {
       return false;
     }
   }
-  inline void Destroy(void) {}  
+  inline void Destroy(void) {}
   inline void BeforeFirst(void) {}
   inline void MakeColPage(const SparsePage &prow,
                           const bst_uint *ridx,
@@ -115,7 +121,7 @@ struct ColConvertFactory {
     #pragma omp parallel
     {
       nthread = omp_get_num_threads();
-      int max_nthread = std::max(omp_get_num_procs() / 2 - 4, 1); 
+      int max_nthread = std::max(omp_get_num_procs() / 2 - 4, 1);
       if (nthread > max_nthread) {
         nthread = max_nthread;
       }
@@ -130,10 +136,10 @@ struct ColConvertFactory {
       int tid = omp_get_thread_num();
       for (size_t j = prow.offset[i]; j < prow.offset[i+1]; ++j) {
         const SparseBatch::Entry &e = prow.data[j];
-        if (enabled[e.index]) { 
+        if (enabled[e.index]) {
           builder.AddBudget(e.index, tid);
         }
-      }    
+      }
     }
     builder.InitStorage();
     #pragma omp parallel for schedule(static) num_threads(nthread)
@@ -169,7 +175,7 @@ struct ColConvertFactory {
   // buffered rowset
   std::vector<bst_uint> *buffered_rowset_;
   // enabled marks
-  const std::vector<bool> *enabled_;  
+  const std::vector<bool> *enabled_;
   // internal temp cache
   SparsePage tmp_;
   /*! \brief page size 256 M */
@@ -191,7 +197,7 @@ class FMatrixPage : public IFMatrix {
     if (iter_ != NULL) delete iter_;
   }
   /*! \return whether column access is enabled */
-  virtual bool HaveColAccess(void) const {   
+  virtual bool HaveColAccess(void) const {
     return col_size_.size() != 0;
   }
   /*! \brief get number of colmuns */
@@ -212,7 +218,7 @@ class FMatrixPage : public IFMatrix {
     size_t nmiss = num_buffered_row_ - (col_size_[cidx]);
     return 1.0f - (static_cast<float>(nmiss)) / num_buffered_row_;
   }
-  virtual void InitColAccess(const std::vector<bool> &enabled, 
+  virtual void InitColAccess(const std::vector<bool> &enabled,
                              float pkeep, size_t max_row_perbatch) {
     if (this->HaveColAccess()) return;
     if (TryLoadColData()) return;
@@ -242,11 +248,11 @@ class FMatrixPage : public IFMatrix {
   /*!
    * \brief colmun based iterator
    */
-  virtual utils::IIterator<ColBatch> *ColIterator(const std::vector<bst_uint> &fset) {    
+  virtual utils::IIterator<ColBatch> *ColIterator(const std::vector<bst_uint> &fset) {
     size_t ncol = this->NumCol();
     col_index_.resize(0);
     for (size_t i = 0; i < fset.size(); ++i) {
-      if (fset[i] < ncol) col_index_.push_back(fset[i]); 
+      if (fset[i] < ncol) col_index_.push_back(fset[i]);
     }
     col_iter_.SetIndexSet(col_index_, false);
     col_iter_.BeforeFirst();
@@ -255,13 +261,13 @@ class FMatrixPage : public IFMatrix {
   // set the cache file name
   inline void set_cache_file(const std::string &cache_file) {
     col_data_name_ = std::string(cache_file) + ".col.blob";
-    col_meta_name_ = std::string(cache_file) + ".col.meta";    
+    col_meta_name_ = std::string(cache_file) + ".col.meta";
   }
 
  protected:
   inline bool TryLoadColData(void) {
     std::FILE *fi = fopen64(col_meta_name_.c_str(), "rb");
-    if (fi == NULL) return false;    
+    if (fi == NULL) return false;
     utils::FileStream fs(fi);
     LoadMeta(&fs);
     fs.Close();
@@ -306,12 +312,12 @@ class FMatrixPage : public IFMatrix {
     SparsePage *pcol;
     while (citer.Next(pcol)) {
       for (size_t i = 0; i < pcol->Size(); ++i) {
-        col_size_[i] += pcol->offset[i + 1] - pcol->offset[i];        
+        col_size_[i] += pcol->offset[i + 1] - pcol->offset[i];
       }
       pcol->Save(&fo);
       size_t spage = pcol->MemCostBytes();
       bytes_write += spage;
-      double tnow = rabit::utils::GetTime();      
+      double tnow = rabit::utils::GetTime();
       double tdiff = tnow - tstart;
       utils::Printf("Writting to %s in %g MB/s, %lu MB written current speed:%g MB/s\n",
                     col_data_name_.c_str(),
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index 3876c21ad..190cbdcdf 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -1,13 +1,15 @@
-#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
-#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file simple_dmatrix-inl.hpp
- * \brief simple implementation of DMatrixS that can be used 
+ * \brief simple implementation of DMatrixS that can be used
  *  the data format of xgboost is templatized, which means it can accept
  *  any data structure that implements the function defined by FMatrix
  *  this file is a specific implementation of input data structure that can be used by BoostLearner
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
+#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
+
 #include <string>
 #include <cstring>
 #include <vector>
@@ -119,13 +121,13 @@ class DMatrixSimple : public DataMatrix {
       for (size_t i = 0; i < batch.data.size(); ++i) {
         info.info.num_col = std::max(info.info.num_col,
                                      static_cast<size_t>(batch.data[i].index+1));
-      }      
+      }
     }
     if (!silent) {
       utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
-                    static_cast<unsigned long>(info.num_row()),
-                    static_cast<unsigned long>(info.num_col()),
-                    static_cast<unsigned long>(row_data_.size()), uri);
+                    static_cast<unsigned long>(info.num_row()),  // NOLINT(*)
+                    static_cast<unsigned long>(info.num_col()),  // NOLINT(*)
+                    static_cast<unsigned long>(row_data_.size()), uri);  // NOLINT(*)
     }
     // try to load in additional file
     if (!loadsplit) {
@@ -141,7 +143,7 @@ class DMatrixSimple : public DataMatrix {
                      "DMatrix: weight data does not match the number of rows in features");
       }
       std::string mname = name + ".base_margin";
-      if (info.TryLoadFloatInfo("base_margin", mname.c_str(), silent)) {      
+      if (info.TryLoadFloatInfo("base_margin", mname.c_str(), silent)) {
       }
     }
   }
@@ -165,10 +167,11 @@ class DMatrixSimple : public DataMatrix {
    * \param silent whether print information during loading
    * \param fname file name, used to print message
    */
-  inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
+  inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {  // NOLINT(*)
     int tmagic;
     utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
-    utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch", fname == NULL ? "" : fname);
+    utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch",
+                 fname == NULL ? "" : fname);
 
     info.LoadBinary(fs);
     LoadBinary(fs, &row_ptr_, &row_data_);
@@ -176,9 +179,9 @@ class DMatrixSimple : public DataMatrix {
 
     if (!silent) {
       utils::Printf("%lux%lu matrix with %lu entries is loaded",
-                    static_cast<unsigned long>(info.num_row()),
-                    static_cast<unsigned long>(info.num_col()),
-                    static_cast<unsigned long>(row_data_.size()));
+                    static_cast<unsigned long>(info.num_row()),  // NOLINT(*)
+                    static_cast<unsigned long>(info.num_col()),  // NOLINT(*)
+                    static_cast<unsigned long>(row_data_.size()));  // NOLINT(*)
       if (fname != NULL) {
         utils::Printf(" from %s\n", fname);
       } else {
@@ -205,9 +208,9 @@ class DMatrixSimple : public DataMatrix {
 
     if (!silent) {
       utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
-                    static_cast<unsigned long>(info.num_row()),
-                    static_cast<unsigned long>(info.num_col()),
-                    static_cast<unsigned long>(row_data_.size()), fname);
+                    static_cast<unsigned long>(info.num_row()),  // NOLINT(*)
+                    static_cast<unsigned long>(info.num_col()),  // NOLINT(*)
+                    static_cast<unsigned long>(row_data_.size()), fname);  // NOLINT(*)
       if (info.group_ptr.size() != 0) {
         utils::Printf("data contains %u groups\n",
                       static_cast<unsigned>(info.group_ptr.size()-1));
@@ -256,7 +259,7 @@ class DMatrixSimple : public DataMatrix {
    * \param ptr pointer data
    * \param data data content
    */
-  inline static void SaveBinary(utils::IStream &fo,
+  inline static void SaveBinary(utils::IStream &fo,  // NOLINT(*)
                                 const std::vector<size_t> &ptr,
                                 const std::vector<RowBatch::Entry> &data) {
     size_t nrow = ptr.size() - 1;
@@ -272,7 +275,7 @@ class DMatrixSimple : public DataMatrix {
    * \param out_ptr pointer data
    * \param out_data data content
    */
-  inline static void LoadBinary(utils::IStream &fi,
+  inline static void LoadBinary(utils::IStream &fi,  // NOLINT(*)
                                 std::vector<size_t> *out_ptr,
                                 std::vector<RowBatch::Entry> *out_data) {
     size_t nrow;
@@ -314,7 +317,7 @@ class DMatrixSimple : public DataMatrix {
     DMatrixSimple *parent_;
     // temporal space for batch
     RowBatch batch_;
-  }; 
+  };
 };
 }  // namespace io
 }  // namespace xgboost
diff --git a/src/io/simple_fmatrix-inl.hpp b/src/io/simple_fmatrix-inl.hpp
index fc6aab8f9..0e0da4461 100644
--- a/src/io/simple_fmatrix-inl.hpp
+++ b/src/io/simple_fmatrix-inl.hpp
@@ -1,11 +1,15 @@
-#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
-#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file simple_fmatrix-inl.hpp
  * \brief the input data structure for gradient boosting
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
+#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
+
 #include <limits>
+#include <algorithm>
+#include <vector>
 #include "../data.h"
 #include "../utils/utils.h"
 #include "../utils/random.h"
@@ -30,7 +34,7 @@ class FMatrixS : public IFMatrix {
   }
   // destructor
   virtual ~FMatrixS(void) {
-    if (iter_ != NULL) delete iter_;    
+    if (iter_ != NULL) delete iter_;
   }
   /*! \return whether column access is enabled */
   virtual bool HaveColAccess(void) const {
@@ -39,7 +43,7 @@ class FMatrixS : public IFMatrix {
   /*! \brief get number of colmuns */
   virtual size_t NumCol(void) const {
     utils::Check(this->HaveColAccess(), "NumCol:need column access");
-    return col_size_.size() - 1;
+    return col_size_.size();
   }
   /*! \brief get number of buffered rows */
   virtual const std::vector<bst_uint> &buffered_rowset(void) const {
@@ -54,7 +58,7 @@ class FMatrixS : public IFMatrix {
     size_t nmiss = buffered_rowset_.size() - col_size_[cidx];
     return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
   }
-  virtual void InitColAccess(const std::vector<bool> &enabled, 
+  virtual void InitColAccess(const std::vector<bool> &enabled,
                              float pkeep, size_t max_row_perbatch) {
     if (this->HaveColAccess()) return;
     this->InitColData(enabled, pkeep, max_row_perbatch);
@@ -85,7 +89,7 @@ class FMatrixS : public IFMatrix {
     size_t ncol = this->NumCol();
     col_iter_.col_index_.resize(0);
     for (size_t i = 0; i < fset.size(); ++i) {
-      if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]); 
+      if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]);
     }
     col_iter_.BeforeFirst();
     return &col_iter_;
@@ -94,7 +98,7 @@ class FMatrixS : public IFMatrix {
    * \brief save column access data into stream
    * \param fo output stream to save to
    */
-  inline void SaveColAccess(utils::IStream &fo) const {
+  inline void SaveColAccess(utils::IStream &fo) const { // NOLINT(*)
     size_t n = 0;
     fo.Write(&n, sizeof(n));
   }
@@ -102,10 +106,10 @@ class FMatrixS : public IFMatrix {
    * \brief load column access data from stream
    * \param fo output stream to load from
    */
-  inline void LoadColAccess(utils::IStream &fi) {
+  inline void LoadColAccess(utils::IStream &fi) { // NOLINT(*)
     // do nothing in load col access
   }
-  
+
  protected:
   /*!
    * \brief intialize column data
@@ -129,7 +133,7 @@ class FMatrixS : public IFMatrix {
     for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) {
       SparsePage *pcol = col_iter_.cpages_[i];
       for (size_t j = 0; j < pcol->Size(); ++j) {
-        col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];        
+        col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];
       }
     }
   }
@@ -139,7 +143,7 @@ class FMatrixS : public IFMatrix {
    * \param pcol the target column
    */
   inline void MakeOneBatch(const std::vector<bool> &enabled,
-                           float pkeep,                          
+                           float pkeep,
                            SparsePage *pcol) {
     // clear rowset
     buffered_rowset_.clear();
@@ -153,14 +157,14 @@ class FMatrixS : public IFMatrix {
     pcol->Clear();
     utils::ParallelGroupBuilder<SparseBatch::Entry>
         builder(&pcol->offset, &pcol->data);
-    builder.InitBudget(0, nthread);
+    builder.InitBudget(info_.num_col(), nthread);
     // start working
     iter_->BeforeFirst();
     while (iter_->Next()) {
       const RowBatch &batch = iter_->Value();
       bmap.resize(bmap.size() + batch.size, true);
-	  long batch_size = static_cast<long>(batch.size);
-      for (long i = 0; i < batch_size; ++i) {
+      long batch_size = static_cast<long>(batch.size); // NOLINT(*)
+      for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
         bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
         if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
           buffered_rowset_.push_back(ridx);
@@ -169,13 +173,13 @@ class FMatrixS : public IFMatrix {
         }
       }
       #pragma omp parallel for schedule(static)
-      for (long i = 0; i < batch_size; ++i) {
+      for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
         int tid = omp_get_thread_num();
         bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
         if (bmap[ridx]) {
           RowBatch::Inst inst = batch[i];
           for (bst_uint j = 0; j < inst.length; ++j) {
-            if (enabled[inst[j].index]){ 
+            if (enabled[inst[j].index]) {
               builder.AddBudget(inst[j].index, tid);
             }
           }
@@ -183,18 +187,18 @@ class FMatrixS : public IFMatrix {
       }
     }
     builder.InitStorage();
-    
+
     iter_->BeforeFirst();
     while (iter_->Next()) {
       const RowBatch &batch = iter_->Value();
       #pragma omp parallel for schedule(static)
-      for (long i = 0; i < static_cast<long>(batch.size); ++i) {
+      for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
         int tid = omp_get_thread_num();
         bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
         if (bmap[ridx]) {
           RowBatch::Inst inst = batch[i];
           for (bst_uint j = 0; j < inst.length; ++j) {
-            if (enabled[inst[j].index]) { 
+            if (enabled[inst[j].index]) {
               builder.Push(inst[j].index,
                            Entry((bst_uint)(batch.base_rowid+i),
                                  inst[j].fvalue), tid);
@@ -204,7 +208,8 @@ class FMatrixS : public IFMatrix {
       }
     }
 
-    utils::Assert(pcol->Size() == info_.num_col(), "inconsistent col data");
+    utils::Assert(pcol->Size() == info_.num_col(),
+                  "inconsistent col data");
     // sort columns
     bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
     #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
@@ -260,7 +265,7 @@ class FMatrixS : public IFMatrix {
     #pragma omp parallel
     {
       nthread = omp_get_num_threads();
-      int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1); 
+      int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
       if (nthread > max_nthread) {
         nthread = max_nthread;
       }
@@ -276,7 +281,7 @@ class FMatrixS : public IFMatrix {
       RowBatch::Inst inst = batch[i];
       for (bst_uint j = 0; j < inst.length; ++j) {
         const SparseBatch::Entry &e = inst[j];
-        if (enabled[e.index]) { 
+        if (enabled[e.index]) {
           builder.AddBudget(e.index, tid);
         }
       }
@@ -329,10 +334,10 @@ class FMatrixS : public IFMatrix {
              static_cast<bst_uint>(pcol->offset[ridx + 1] - pcol->offset[ridx]));
       }
       batch_.col_index = BeginPtr(col_index_);
-      batch_.col_data = BeginPtr(col_data_);      
+      batch_.col_data = BeginPtr(col_data_);
       return true;
     }
-    virtual const ColBatch &Value(void) const {      
+    virtual const ColBatch &Value(void) const {
       return batch_;
     }
     inline void Clear(void) {
@@ -346,7 +351,7 @@ class FMatrixS : public IFMatrix {
     // column content
     std::vector<ColBatch::Inst> col_data_;
     // column sparse pages
-    std::vector<SparsePage*> cpages_;    
+    std::vector<SparsePage*> cpages_;
     // data pointer
     size_t data_ptr_;
     // temporal space for batch
@@ -356,7 +361,7 @@ class FMatrixS : public IFMatrix {
   // column iterator
   ColBatchIter col_iter_;
   // shared meta info with DMatrix
-  const learner::MetaInfo &info_;  
+  const learner::MetaInfo &info_;
   // row iterator
   utils::IIterator<RowBatch> *iter_;
   /*! \brief list of row index that are buffered */
@@ -366,4 +371,4 @@ class FMatrixS : public IFMatrix {
 };
 }  // namespace io
 }  // namespace xgboost
-#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP
+#endif  // XGBOOST_IO_SLICE_FMATRIX_INL_HPP_
diff --git a/src/io/sparse_batch_page.h b/src/io/sparse_batch_page.h
index d94141a6e..24546f785 100644
--- a/src/io/sparse_batch_page.h
+++ b/src/io/sparse_batch_page.h
@@ -1,18 +1,22 @@
-#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
-#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
 /*!
+ * Copyright (c) 2014 by Contributors
  * \file sparse_batch_page.h
  *   content holder of sparse batch that can be saved to disk
  *   the representation can be effectively
  *   use in external memory computation
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
+#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
+
+#include <vector>
+#include <algorithm>
 #include "../data.h"
 
 namespace xgboost {
 namespace io {
 /*!
- * \brief storage unit of sparse batch  
+ * \brief storage unit of sparse batch
  */
 class SparsePage {
  public:
@@ -96,7 +100,7 @@ class SparsePage {
   }
   /*!
    * \brief save the data to fo, when a page was written
-   *    to disk it must contain all the elements in the 
+   *    to disk it must contain all the elements in the
    * \param fo output stream
    */
   inline void Save(utils::IStream *fo) const {
@@ -124,7 +128,7 @@ class SparsePage {
    */
   inline bool PushLoad(utils::IStream *fi) {
     if (!fi->Read(&disk_offset_)) return false;
-    data.resize(offset.back() + disk_offset_.back());    
+    data.resize(offset.back() + disk_offset_.back());
     if (disk_offset_.back() != 0) {
       utils::Check(fi->Read(BeginPtr(data) + offset.back(),
                             disk_offset_.back() * sizeof(SparseBatch::Entry)) != 0,
@@ -138,7 +142,7 @@ class SparsePage {
     }
     return true;
   }
-  /*! 
+  /*!
    * \brief Push row batch into the page
    * \param batch the row batch
    */
@@ -154,7 +158,7 @@ class SparsePage {
       offset[i + begin] = top + batch.ind_ptr[i + 1] - batch.ind_ptr[0];
     }
   }
-  /*! 
+  /*!
    * \brief Push a sparse page
    * \param batch the row page
    */
@@ -170,7 +174,7 @@ class SparsePage {
       offset[i + begin] = top + batch.offset[i + 1];
     }
   }
-  /*! 
+  /*!
    * \brief Push one instance into page
    *  \param row an instance row
    */
@@ -202,7 +206,7 @@ class SparsePage {
 };
 /*!
  * \brief factory class for SparsePage,
- *        used in threadbuffer template  
+ *        used in threadbuffer template
  */
 class SparsePageFactory {
  public:
@@ -217,7 +221,7 @@ class SparsePageFactory {
     return action_index_set_;
   }
   // set index set, will be used after next before first
-  inline void SetIndexSet(const std::vector<bst_uint> &index_set, 
+  inline void SetIndexSet(const std::vector<bst_uint> &index_set,
                           bool load_all) {
     set_load_all_ = load_all;
     if (!set_load_all_) {
@@ -229,7 +233,7 @@ class SparsePageFactory {
     return true;
   }
   inline void SetParam(const char *name, const char *val) {}
-  inline bool LoadNext(SparsePage *val) {    
+  inline bool LoadNext(SparsePage *val) {
     if (!action_load_all_) {
       if (action_index_set_.size() == 0) {
         return false;
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index b58f7b2bb..3fbc579de 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -1,11 +1,13 @@
-#ifndef XGBOOST_LEARNER_DMATRIX_H_
-#define XGBOOST_LEARNER_DMATRIX_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file dmatrix.h
- * \brief meta data and template data structure 
+ * \brief meta data and template data structure
  *        used for regression/classification/ranking
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_LEARNER_DMATRIX_H_
+#define XGBOOST_LEARNER_DMATRIX_H_
+
 #include <vector>
 #include <cstring>
 #include "../data.h"
@@ -16,8 +18,8 @@ namespace learner {
  * \brief meta information needed in training, including label, weight
  */
 struct MetaInfo {
-  /*! 
-   * \brief information needed by booster 
+  /*!
+   * \brief information needed by booster
    * BoosterInfo does not implement save and load,
    * all serialization is done in MetaInfo
    */
@@ -31,7 +33,7 @@ struct MetaInfo {
   std::vector<bst_uint> group_ptr;
   /*! \brief weights of each instance, optional */
   std::vector<float> weights;
-  /*! 
+  /*!
    * \brief initialized margins,
    * if specified, xgboost will start from this init margin
    * can be used to specify initial prediction to boost from
@@ -66,7 +68,7 @@ struct MetaInfo {
       return 1.0f;
     }
   }
-  inline void SaveBinary(utils::IStream &fo) const {
+  inline void SaveBinary(utils::IStream &fo) const { // NOLINT(*)
     int version = kVersion;
     fo.Write(&version, sizeof(version));
     fo.Write(&info.num_row, sizeof(info.num_row));
@@ -77,7 +79,7 @@ struct MetaInfo {
     fo.Write(info.root_index);
     fo.Write(base_margin);
   }
-  inline void LoadBinary(utils::IStream &fi) {
+  inline void LoadBinary(utils::IStream &fi) { // NOLINT(*)
     int version;
     utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format");
     utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format");
@@ -114,7 +116,7 @@ struct MetaInfo {
     return labels;
   }
   inline const std::vector<float>& GetFloatInfo(const char *field) const {
-    return ((MetaInfo*)this)->GetFloatInfo(field);
+    return ((MetaInfo*)this)->GetFloatInfo(field); // NOLINT(*)
   }
   inline std::vector<unsigned> &GetUIntInfo(const char *field) {
     using namespace std;
@@ -124,7 +126,7 @@ struct MetaInfo {
     return info.root_index;
   }
   inline const std::vector<unsigned> &GetUIntInfo(const char *field) const {
-    return ((MetaInfo*)this)->GetUIntInfo(field);
+    return ((MetaInfo*)this)->GetUIntInfo(field);  // NOLINT(*)
   }
   // try to load weight information from file, if exists
   inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
@@ -149,14 +151,14 @@ struct MetaInfo {
  * \tparam FMatrix type of feature data source
  */
 struct DMatrix {
-  /*! 
-   * \brief magic number associated with this object 
+  /*!
+   * \brief magic number associated with this object
    *    used to check if it is specific instance
    */
   const int magic;
   /*! \brief meta information about the dataset */
   MetaInfo info;
-  /*! 
+  /*!
    * \brief cache pointer to verify if the data structure is cached in some learner
    *  used to verify if DMatrix is cached
    */
diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index 433b5a00b..2b69a43a8 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -1,10 +1,12 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file xgboost_evaluation-inl.hpp
+ * \brief evaluation metrics for regression and classification and rank
+ * \author Kailong Chen, Tianqi Chen
+ */
 #ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_
 #define XGBOOST_LEARNER_EVALUATION_INL_HPP_
-/*!
-* \file xgboost_evaluation-inl.hpp
-* \brief evaluation metrics for regression and classification and rank
-* \author Kailong Chen, Tianqi Chen
-*/
+
 #include <vector>
 #include <utility>
 #include <string>
@@ -18,8 +20,8 @@
 
 namespace xgboost {
 namespace learner {
-/*! 
- * \brief base class of elementwise evaluation 
+/*!
+ * \brief base class of elementwise evaluation
  * \tparam Derived the name of subclass
  */
 template<typename Derived>
@@ -47,15 +49,15 @@ struct EvalEWiseBase : public IEvaluator {
     }
     return Derived::GetFinal(dat[0], dat[1]);
   }
-  /*! 
-   * \brief to be implemented by subclass, 
-   *   get evaluation result from one row 
+  /*!
+   * \brief to be implemented by subclass,
+   *   get evaluation result from one row
    * \param label label of current instance
    * \param pred prediction value of current instance
    */
   inline static float EvalRow(float label, float pred);
-  /*! 
-   * \brief to be overide by subclas, final trasnformation 
+  /*!
+   * \brief to be overide by subclas, final trasnformation
    * \param esum the sum statistics returned by EvalRow
    * \param wsum sum of weight
    */
@@ -87,9 +89,9 @@ struct EvalLogLoss : public EvalEWiseBase<EvalLogLoss> {
     const float eps = 1e-16f;
     const float pneg = 1.0f - py;
     if (py < eps) {
-      return -y * std::log(eps) - (1.0f - y)  * std::log(1.0f - eps); 
+      return -y * std::log(eps) - (1.0f - y)  * std::log(1.0f - eps);
     } else if (pneg < eps) {
-      return -y * std::log(1.0f - eps) - (1.0f - y)  * std::log(eps); 
+      return -y * std::log(1.0f - eps) - (1.0f - y)  * std::log(eps);
     } else {
       return -y * std::log(py) - (1.0f - y) * std::log(pneg);
     }
@@ -119,7 +121,7 @@ struct EvalPoissionNegLogLik : public EvalEWiseBase<EvalPoissionNegLogLik> {
   }
 };
 
-/*! 
+/*!
  * \brief base class of multi-class evaluation
  * \tparam Derived the name of subclass
  */
@@ -139,7 +141,7 @@ struct EvalMClassBase : public IEvaluator {
     float sum = 0.0, wsum = 0.0;
     int label_error = 0;
     #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
-    for (bst_omp_uint i = 0; i < ndata; ++i) {      
+    for (bst_omp_uint i = 0; i < ndata; ++i) {
       const float wt = info.GetWeight(i);
       int label =  static_cast<int>(info.labels[i]);
       if (label >= 0 && label < static_cast<int>(nclass)) {
@@ -161,18 +163,18 @@ struct EvalMClassBase : public IEvaluator {
     }
     return Derived::GetFinal(dat[0], dat[1]);
   }
-  /*! 
-   * \brief to be implemented by subclass, 
-   *   get evaluation result from one row 
+  /*!
+   * \brief to be implemented by subclass,
+   *   get evaluation result from one row
    * \param label label of current instance
-   * \param pred prediction value of current instance 
+   * \param pred prediction value of current instance
    * \param nclass number of class in the prediction
    */
   inline static float EvalRow(int label,
                               const float *pred,
                               size_t nclass);
-  /*! 
-   * \brief to be overide by subclas, final trasnformation 
+  /*!
+   * \brief to be overide by subclas, final trasnformation
    * \param esum the sum statistics returned by EvalRow
    * \param wsum sum of weight
    */
@@ -208,7 +210,7 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
     } else {
       return -std::log(eps);
     }
-  } 
+  }
 };
 
 /*! \brief ctest */
@@ -240,7 +242,7 @@ struct EvalCTest: public IEvaluator {
           tpred.push_back(preds[i + (k + 1) * ndata]);
           tinfo.labels.push_back(info.labels[i]);
           tinfo.weights.push_back(info.GetWeight(i));
-        }        
+        }
       }
       wsum += base_->Eval(tpred, tinfo);
     }
@@ -328,7 +330,7 @@ struct EvalPrecisionRatio : public IEvaluator{
                      const MetaInfo &info,
                      bool distributed) const {
     utils::Check(!distributed, "metric %s do not support distributed evaluation", Name());
-    utils::Check(info.labels.size() != 0, "label set cannot be empty");    
+    utils::Check(info.labels.size() != 0, "label set cannot be empty");
     utils::Assert(preds.size() % info.labels.size() == 0,
                   "label size predict size not match");
     std::vector< std::pair<float, unsigned> > rec;
@@ -344,7 +346,8 @@ struct EvalPrecisionRatio : public IEvaluator{
   }
 
  protected:
-  inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec, const MetaInfo &info) const {
+  inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec,
+                           const MetaInfo &info) const {
     size_t cutoff = static_cast<size_t>(ratio_ * rec.size());
     double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0;
     for (size_t j = 0; j < cutoff; ++j) {
@@ -372,7 +375,7 @@ struct EvalAuc : public IEvaluator {
     utils::Check(info.labels.size() != 0, "label set cannot be empty");
     utils::Check(preds.size() % info.labels.size() == 0,
                  "label size predict size not match");
-    std::vector<unsigned> tgptr(2, 0); 
+    std::vector<unsigned> tgptr(2, 0);
     tgptr[1] = static_cast<unsigned>(info.labels.size());
 
     const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
@@ -417,8 +420,8 @@ struct EvalAuc : public IEvaluator {
     }
     if (distributed) {
       float dat[2];
-	  dat[0] = static_cast<float>(sum_auc);
-	  dat[1] = static_cast<float>(ngroup);      
+      dat[0] = static_cast<float>(sum_auc);
+      dat[1] = static_cast<float>(ngroup);
       // approximately estimate auc using mean
       rabit::Allreduce<rabit::op::Sum>(dat, 2);
       return dat[0] / dat[1];
@@ -463,8 +466,8 @@ struct EvalRankList : public IEvaluator {
     }
     if (distributed) {
       float dat[2];
-	  dat[0] = static_cast<float>(sum_metric);
-	  dat[1] = static_cast<float>(ngroup);      
+      dat[0] = static_cast<float>(sum_metric);
+      dat[1] = static_cast<float>(ngroup);
       // approximately estimate auc using mean
       rabit::Allreduce<rabit::op::Sum>(dat, 2);
       return dat[0] / dat[1];
@@ -489,7 +492,7 @@ struct EvalRankList : public IEvaluator {
     }
   }
   /*! \return evaluation metric, given the pair_sort record, (pred,label) */
-  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0;
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0; // NOLINT(*)
 
  protected:
   unsigned topn_;
@@ -524,13 +527,13 @@ struct EvalNDCG : public EvalRankList{
     double sumdcg = 0.0;
     for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
       const unsigned rel = rec[i].second;
-      if (rel != 0) { 
+      if (rel != 0) {
         sumdcg += ((1 << rel) - 1) / std::log(i + 2.0);
       }
     }
     return static_cast<float>(sumdcg);
   }
-  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const { // NOLINT(*)
     std::stable_sort(rec.begin(), rec.end(), CmpFirst);
     float dcg = this->CalcDCG(rec);
     std::stable_sort(rec.begin(), rec.end(), CmpSecond);
diff --git a/src/learner/evaluation.h b/src/learner/evaluation.h
index 85358e72e..a98c47495 100644
--- a/src/learner/evaluation.h
+++ b/src/learner/evaluation.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_LEARNER_EVALUATION_H_
-#define XGBOOST_LEARNER_EVALUATION_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file evaluation.h
  * \brief interface of evaluation function supported in xgboost
  * \author Tianqi Chen, Kailong Chen
  */
+#ifndef XGBOOST_LEARNER_EVALUATION_H_
+#define XGBOOST_LEARNER_EVALUATION_H_
+
 #include <string>
 #include <vector>
 #include <cstdio>
@@ -19,7 +21,7 @@ struct IEvaluator{
    * \brief evaluate a specific metric
    * \param preds prediction
    * \param info information, including label etc.
-   * \param distributed whether a call to Allreduce is needed to gather 
+   * \param distributed whether a call to Allreduce is needed to gather
    *        the average statistics across all the node,
    *        this is only supported by some metrics
    */
diff --git a/src/learner/helper_utils.h b/src/learner/helper_utils.h
index d318cf8bd..7ca7ba59c 100644
--- a/src/learner/helper_utils.h
+++ b/src/learner/helper_utils.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
-#define XGBOOST_LEARNER_HELPER_UTILS_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file helper_utils.h
  * \brief useful helper functions
  * \author Tianqi Chen, Kailong Chen
  */
+#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
+#define XGBOOST_LEARNER_HELPER_UTILS_H_
+
 #include <utility>
 #include <vector>
 #include <cmath>
@@ -61,7 +63,7 @@ inline float LogSum(const float *rec, size_t size) {
   for (size_t i = 0; i < size; ++i) {
     sum += std::exp(rec[i] - mx);
   }
-  return mx + std::log(sum);  
+  return mx + std::log(sum);
 }
 
 inline static bool CmpFirst(const std::pair<float, unsigned> &a,
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 45e312aa7..f051992d3 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
-#define XGBOOST_LEARNER_LEARNER_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file learner-inl.hpp
- * \brief learning algorithm 
+ * \brief learning algorithm
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
+#define XGBOOST_LEARNER_LEARNER_INL_HPP_
+
 #include <algorithm>
 #include <vector>
 #include <utility>
@@ -19,7 +21,7 @@
 namespace xgboost {
 /*! \brief namespace for learning algorithm */
 namespace learner {
-/*! 
+/*!
  * \brief learner that takes do gradient boosting on specific objective functions
  *  and do training and prediction
  */
@@ -30,7 +32,7 @@ class BoostLearner : public rabit::Serializable {
     gbm_ = NULL;
     name_obj_ = "reg:linear";
     name_gbm_ = "gbtree";
-    silent= 0;
+    silent = 0;
     prob_buffer_row = 1.0f;
     distributed_mode = 0;
     updater_mode = 0;
@@ -47,10 +49,10 @@ class BoostLearner : public rabit::Serializable {
    * \brief add internal cache space for mat, this can speedup prediction for matrix,
    *        please cache prediction for training and eval data
    *    warning: if the model is loaded from file from some previous training history
-   *             set cache data must be called with exactly SAME 
+   *             set cache data must be called with exactly SAME
    *             data matrices to continue training otherwise it will cause error
    * \param mats array of pointers to matrix whose prediction result need to be cached
-   */          
+   */
   inline void SetCacheData(const std::vector<DMatrix*>& mats) {
     utils::Assert(cache_.size() == 0, "can only call cache data once");
     // assign buffer index
@@ -67,10 +69,10 @@ class BoostLearner : public rabit::Serializable {
       buffer_size += mats[i]->info.num_row();
     }
     char str_temp[25];
-    utils::SPrintf(str_temp, sizeof(str_temp), "%lu", 
-                   static_cast<unsigned long>(buffer_size));
+    utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
+                   static_cast<unsigned long>(buffer_size)); // NOLINT(*)
     this->SetParam("num_pbuffer", str_temp);
-    this->pred_buffer_size = buffer_size;    
+    this->pred_buffer_size = buffer_size;
   }
   /*!
    * \brief set parameters from outside
@@ -79,7 +81,7 @@ class BoostLearner : public rabit::Serializable {
    */
   inline void SetParam(const char *name, const char *val) {
     using namespace std;
-    // in this version, bst: prefix is no longer required 
+    // in this version, bst: prefix is no longer required
     if (strncmp(name, "bst:", 4) != 0) {
       std::string n = "bst:"; n += name;
       this->SetParam(n.c_str(), val);
@@ -119,7 +121,7 @@ class BoostLearner : public rabit::Serializable {
       if (!strcmp(name, "objective")) name_obj_ = val;
       if (!strcmp(name, "booster")) name_gbm_ = val;
       mparam.SetParam(name, val);
-    }    
+    }
     if (gbm_ != NULL) gbm_->SetParam(name, val);
     if (obj_ != NULL) obj_->SetParam(name, val);
     if (gbm_ == NULL || obj_ == NULL) {
@@ -133,16 +135,16 @@ class BoostLearner : public rabit::Serializable {
       // estimate feature bound
       unsigned num_feature = 0;
       for (size_t i = 0; i < cache_.size(); ++i) {
-        num_feature = std::max(num_feature, 
+        num_feature = std::max(num_feature,
                                static_cast<unsigned>(cache_[i].mat_->info.num_col()));
       }
       // run allreduce on num_feature to find the maximum value
       rabit::Allreduce<rabit::op::Max>(&num_feature, 1);
       if (num_feature > mparam.num_feature) mparam.num_feature = num_feature;
-    } 
+    }
     char str_temp[25];
     utils::SPrintf(str_temp, sizeof(str_temp), "%d", mparam.num_feature);
-    this->SetParam("bst:num_feature", str_temp);   
+    this->SetParam("bst:num_feature", str_temp);
   }
   /*!
    * \brief initialize the model
@@ -161,13 +163,13 @@ class BoostLearner : public rabit::Serializable {
    * \param fi input stream
    * \param calc_num_feature whether call InitTrainer with calc_num_feature
    */
-  inline void LoadModel(utils::IStream &fi,
+  inline void LoadModel(utils::IStream &fi,  // NOLINT(*)
                         bool calc_num_feature = true) {
     utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
                  "BoostLearner: wrong model format");
     {
       // backward compatibility code for compatible with old model type
-      // for new model, Read(&name_obj_) is suffice      
+      // for new model, Read(&name_obj_) is suffice
       uint64_t len;
       utils::Check(fi.Read(&len, sizeof(len)) != 0, "BoostLearner: wrong model format");
       if (len >= std::numeric_limits<unsigned>::max()) {
@@ -226,9 +228,9 @@ class BoostLearner : public rabit::Serializable {
       fi = utils::IStream::Create(fname, "r");
       this->LoadModel(*fi, true);
     }
-    delete fi;   
+    delete fi;
   }
-  inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
+  inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
     ModelParam p = mparam;
     p.saved_with_pbuffer = static_cast<int>(with_pbuffer);
     fo.Write(&p, sizeof(ModelParam));
@@ -247,7 +249,7 @@ class BoostLearner : public rabit::Serializable {
       fo->Write("bs64\t", 5);
       utils::Base64OutStream bout(fo);
       this->SaveModel(bout, with_pbuffer);
-      bout.Finish('\n');    
+      bout.Finish('\n');
     } else {
       fo->Write("binf", 4);
       this->SaveModel(*fo, with_pbuffer);
@@ -260,7 +262,7 @@ class BoostLearner : public rabit::Serializable {
    * \param p_train pointer to the matrix used by training
    */
   inline void CheckInit(DMatrix *p_train) {
-    int ncol = static_cast<int>(p_train->info.info.num_col);    
+    int ncol = static_cast<int>(p_train->info.info.num_col);
     std::vector<bool> enabled(ncol, true);
     // set max row per batch to limited value
     // in distributed mode, use safe choice otherwise
@@ -345,10 +347,9 @@ class BoostLearner : public rabit::Serializable {
                       bool output_margin,
                       std::vector<float> *out_preds,
                       unsigned ntree_limit = 0,
-                      bool pred_leaf = false
-                      ) const {
+                      bool pred_leaf = false) const {
     if (pred_leaf) {
-      gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);      
+      gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);
     } else {
       this->PredictRaw(data, out_preds, ntree_limit);
       if (!output_margin) {
@@ -361,7 +362,7 @@ class BoostLearner : public rabit::Serializable {
    *  NOTE: use the batch prediction interface if possible, batch prediction is usually
    *        more efficient than online prediction
    *        This function is NOT threadsafe, make sure you only call from one thread
-   *    
+   *
    * \param inst the instance you want to predict
    * \param output_margin whether to only predict margin value instead of transformed prediction
    * \param out_preds output vector to hold the predictions
@@ -387,8 +388,8 @@ class BoostLearner : public rabit::Serializable {
   }
 
  protected:
-  /*! 
-   * \brief initialize the objective function and GBM, 
+  /*!
+   * \brief initialize the objective function and GBM,
    * if not yet done
    */
   inline void InitObjGBM(void) {
@@ -401,12 +402,12 @@ class BoostLearner : public rabit::Serializable {
     for (size_t i = 0; i < cfg_.size(); ++i) {
       obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
       gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
-    }   
+    }
     if (evaluator_.Size() == 0) {
       evaluator_.AddEval(obj_->DefaultEvalMetric());
     }
   }
-  /*! 
+  /*!
    * \brief additional default value for specific objs
    */
   inline void InitAdditionDefaultParam(void) {
@@ -415,12 +416,12 @@ class BoostLearner : public rabit::Serializable {
       gbm_->SetParam("max_delta_step", "0.7");
     }
   }
-  /*! 
+  /*!
    * \brief get un-transformed prediction
    * \param data training data matrix
    * \param out_preds output vector that stores the prediction
    * \param ntree_limit limit number of trees used for boosted tree
-   *   predictor, when it equals 0, this means we are using all the trees   
+   *   predictor, when it equals 0, this means we are using all the trees
    */
   inline void PredictRaw(const DMatrix &data,
                          std::vector<float> *out_preds,
@@ -517,7 +518,7 @@ class BoostLearner : public rabit::Serializable {
 
  protected:
   // magic number to transform random seed
-  const static int kRandSeedMagic = 127;
+  static const int kRandSeedMagic = 127;
   // cache entry object that helps handle feature caching
   struct CacheEntry {
     const DMatrix *mat_;
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index d0ecf7a27..b6d388e3c 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
-#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file objective-inl.hpp
  * \brief objective function implementations
  * \author Tianqi Chen, Kailong Chen
  */
+#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
+#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
+
 #include <vector>
 #include <algorithm>
 #include <utility>
@@ -176,14 +178,14 @@ class RegLossObj : public IObjFunction {
 // poisson regression for count
 class PoissonRegression : public IObjFunction {
  public:
-  explicit PoissonRegression(void) {
+  PoissonRegression(void) {
     max_delta_step = 0.0f;
   }
   virtual ~PoissonRegression(void) {}
-  
+
   virtual void SetParam(const char *name, const char *val) {
     using namespace std;
-    if (!strcmp( "max_delta_step", name )) {
+    if (!strcmp("max_delta_step", name)) {
       max_delta_step = static_cast<float>(atof(val));
     }
   }
@@ -201,9 +203,9 @@ class PoissonRegression : public IObjFunction {
     // check if label in range
     bool label_correct = true;
     // start calculating gradient
-    const long ndata = static_cast<bst_omp_uint>(preds.size());
+    const long ndata = static_cast<bst_omp_uint>(preds.size()); // NOLINT(*)
     #pragma omp parallel for schedule(static)
-    for (long i = 0; i < ndata; ++i) {
+    for (long i = 0; i < ndata; ++i) { // NOLINT(*)
       float p = preds[i];
       float w = info.GetWeight(i);
       float y = info.labels[i];
@@ -219,9 +221,9 @@ class PoissonRegression : public IObjFunction {
   }
   virtual void PredTransform(std::vector<float> *io_preds) {
     std::vector<float> &preds = *io_preds;
-    const long ndata = static_cast<long>(preds.size());
+    const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
     #pragma omp parallel for schedule(static)
-    for (long j = 0; j < ndata; ++j) {
+    for (long j = 0; j < ndata; ++j) {  // NOLINT(*)
       preds[j] = std::exp(preds[j]);
     }
   }
@@ -234,7 +236,7 @@ class PoissonRegression : public IObjFunction {
   virtual const char* DefaultEvalMetric(void) const {
     return "poisson-nloglik";
   }
-  
+
  private:
   float max_delta_step;
 };
@@ -467,7 +469,7 @@ class LambdaRankObj : public IObjFunction {
         : pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
   };
   /*!
-   * \brief get lambda weight for existing pairs 
+   * \brief get lambda weight for existing pairs
    * \param list a list that is sorted by pred score
    * \param io_pairs record of pairs, containing the pairs to fill in weights
    */
@@ -555,10 +557,10 @@ class LambdaRankObjMAP : public LambdaRankObj {
     float ap_acc;
     /*!
      * \brief the accumulated precision,
-     *   assuming a positive instance is missing 
+     *   assuming a positive instance is missing
      */
     float ap_acc_miss;
-    /*! 
+    /*!
      * \brief the accumulated precision,
      * assuming that one more positive instance is inserted ahead
      */
diff --git a/src/learner/objective.h b/src/learner/objective.h
index c0a525a43..08b57f528 100644
--- a/src/learner/objective.h
+++ b/src/learner/objective.h
@@ -1,11 +1,14 @@
-#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
-#define XGBOOST_LEARNER_OBJECTIVE_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file objective.h
  * \brief interface of objective function used for gradient boosting
  * \author Tianqi Chen, Kailong Chen
  */
-#include "dmatrix.h"
+#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
+#define XGBOOST_LEARNER_OBJECTIVE_H_
+
+#include <vector>
+#include "./dmatrix.h"
 
 namespace xgboost {
 namespace learner {
@@ -13,13 +16,13 @@ namespace learner {
 class IObjFunction{
  public:
   /*! \brief virtual destructor */
-  virtual ~IObjFunction(void){}
+  virtual ~IObjFunction(void) {}
   /*!
    * \brief set parameters from outside
    * \param name name of the parameter
    * \param val value of the parameter
    */
-  virtual void SetParam(const char *name, const char *val) = 0;  
+  virtual void SetParam(const char *name, const char *val) = 0;
   /*!
    * \brief get gradient over each of predictions, given existing information
    * \param preds prediction of current round
@@ -38,9 +41,9 @@ class IObjFunction{
    * \brief transform prediction values, this is only called when Prediction is called
    * \param io_preds prediction values, saves to this vector as well
    */
-  virtual void PredTransform(std::vector<float> *io_preds){}
+  virtual void PredTransform(std::vector<float> *io_preds) {}
   /*!
-   * \brief transform prediction values, this is only called when Eval is called, 
+   * \brief transform prediction values, this is only called when Eval is called,
    *  usually it redirect to PredTransform
    * \param io_preds prediction values, saves to this vector as well
    */
@@ -49,7 +52,7 @@ class IObjFunction{
   }
   /*!
    * \brief transform probability value back to margin
-   * this is used to transform user-set base_score back to margin 
+   * this is used to transform user-set base_score back to margin
    * used by gradient boosting
    * \return transformed value
    */
@@ -77,7 +80,7 @@ inline IObjFunction* CreateObjFunction(const char *name) {
   if (!strcmp("multi:softprob", name)) return new SoftmaxMultiClassObj(1);
   if (!strcmp("rank:pairwise", name )) return new PairwiseRankObj();
   if (!strcmp("rank:ndcg", name)) return new LambdaRankObjNDCG();
-  if (!strcmp("rank:map", name)) return new LambdaRankObjMAP();  
+  if (!strcmp("rank:map", name)) return new LambdaRankObjMAP();
   utils::Error("unknown objective function type: %s", name);
   return NULL;
 }
diff --git a/src/sync/sync.h b/src/sync/sync.h
index 3a371b03c..b9bdf89fe 100644
--- a/src/sync/sync.h
+++ b/src/sync/sync.h
@@ -1,13 +1,13 @@
-#ifndef XGBOOST_SYNC_H_
-#define XGBOOST_SYNC_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file sync.h
  * \brief the synchronization module of rabit
  *        redirects to subtree rabit header
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_SYNC_SYNC_H_
+#define XGBOOST_SYNC_SYNC_H_
+
 #include "../../subtree/rabit/include/rabit.h"
 #include "../../subtree/rabit/include/rabit/timer.h"
-#endif  // XGBOOST_SYNC_H_
-
-
+#endif  // XGBOOST_SYNC_SYNC_H_
diff --git a/src/tree/model.h b/src/tree/model.h
index 4eea34911..6a22aa5f1 100644
--- a/src/tree/model.h
+++ b/src/tree/model.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_TREE_MODEL_H_
-#define XGBOOST_TREE_MODEL_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file model.h
  * \brief model structure for tree
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_MODEL_H_
+#define XGBOOST_TREE_MODEL_H_
+
 #include <string>
 #include <cstring>
 #include <sstream>
@@ -19,7 +21,7 @@
 namespace xgboost {
 namespace tree {
 /*!
- * \brief template class of TreeModel 
+ * \brief template class of TreeModel
  * \tparam TSplitCond data type to indicate split condition
  * \tparam TNodeStat auxiliary statistics of node to help tree building
  */
@@ -42,7 +44,7 @@ class TreeModel {
     int max_depth;
     /*! \brief  number of features used for tree construction */
     int num_feature;
-    /*! 
+    /*!
      * \brief leaf vector size, used for vector tree
      * used to store more than one dimensional information in tree
      */
@@ -55,8 +57,8 @@ class TreeModel {
       size_leaf_vector = 0;
       std::memset(reserved, 0, sizeof(reserved));
     }
-    /*! 
-     * \brief set parameters from outside 
+    /*!
+     * \brief set parameters from outside
      * \param name name of the parameter
      * \param val  value of the parameter
      */
@@ -70,7 +72,7 @@ class TreeModel {
   /*! \brief tree node */
   class Node {
    public:
-	Node(void) : sindex_(0) {}
+    Node(void) : sindex_(0) {}
     /*! \brief index of left child */
     inline int cleft(void) const {
       return this->cleft_;
@@ -119,15 +121,15 @@ class TreeModel {
     inline bool is_root(void) const {
       return parent_ == -1;
     }
-    /*! 
-     * \brief set the right child 
+    /*!
+     * \brief set the right child
      * \param nide node id to right child
      */
     inline void set_right_child(int nid) {
       this->cright_ = nid;
     }
-    /*! 
-     * \brief set split condition of current node 
+    /*!
+     * \brief set split condition of current node
      * \param split_index feature index to split
      * \param split_cond  split condition
      * \param default_left the default direction when feature is unknown
@@ -138,10 +140,10 @@ class TreeModel {
       this->sindex_ = split_index;
       (this->info_).split_cond = split_cond;
     }
-    /*! 
+    /*!
      * \brief set the leaf value of the node
      * \param value leaf value
-     * \param right right index, could be used to store 
+     * \param right right index, could be used to store
      *        additional information
      */
     inline void set_leaf(float value, int right = -1) {
@@ -153,12 +155,12 @@ class TreeModel {
     inline void mark_delete(void) {
       this->sindex_ = std::numeric_limits<unsigned>::max();
     }
-    
+
    private:
     friend class TreeModel<TSplitCond, TNodeStat>;
-    /*! 
-     * \brief in leaf node, we have weights, in non-leaf nodes, 
-     *        we have split condition 
+    /*!
+     * \brief in leaf node, we have weights, in non-leaf nodes,
+     *        we have split condition
      */
     union Info{
       float leaf_value;
@@ -203,7 +205,7 @@ class TreeModel {
                  "number of nodes in the tree exceed 2^31");
     nodes.resize(param.num_nodes);
     stats.resize(param.num_nodes);
-    leaf_vector.resize(param.num_nodes * param.size_leaf_vector); 
+    leaf_vector.resize(param.num_nodes * param.size_leaf_vector);
     return nd;
   }
   // delete a tree node, keep the parent field to allow trace back
@@ -215,7 +217,7 @@ class TreeModel {
   }
 
  public:
-  /*! 
+  /*!
    * \brief change a non leaf node to a leaf node, delete its children
    * \param rid node id of the node
    * \param new leaf value
@@ -229,7 +231,7 @@ class TreeModel {
     this->DeleteNode(nodes[rid].cright());
     nodes[rid].set_leaf(value);
   }
-  /*! 
+  /*!
    * \brief collapse a non leaf node to a leaf node, delete its children
    * \param rid node id of the node
    * \param new leaf value
@@ -273,7 +275,7 @@ class TreeModel {
     return &leaf_vector[nid * param.size_leaf_vector];
   }
   /*! \brief get leaf vector given nid */
-  inline const bst_float* leafvec(int nid) const{
+  inline const bst_float* leafvec(int nid) const {
     if (leaf_vector.size() == 0) return NULL;
     return &leaf_vector[nid * param.size_leaf_vector];
   }
@@ -288,15 +290,15 @@ class TreeModel {
       nodes[i].set_parent(-1);
     }
   }
-  /*! 
+  /*!
    * \brief load model from stream
    * \param fi input stream
    */
-  inline void LoadModel(utils::IStream &fi) {
+  inline void LoadModel(utils::IStream &fi) { // NOLINT(*)
     utils::Check(fi.Read(&param, sizeof(Param)) > 0,
                  "TreeModel: wrong format");
     nodes.resize(param.num_nodes); stats.resize(param.num_nodes);
-    utils::Assert(param.num_nodes != 0, "invalid model"); 
+    utils::Assert(param.num_nodes != 0, "invalid model");
     utils::Check(fi.Read(BeginPtr(nodes), sizeof(Node) * nodes.size()) > 0,
                  "TreeModel: wrong format");
     utils::Check(fi.Read(BeginPtr(stats), sizeof(NodeStat) * stats.size()) > 0,
@@ -313,22 +315,22 @@ class TreeModel {
                   "number of deleted nodes do not match, num_deleted=%d, dnsize=%lu, num_nodes=%d",
                   param.num_deleted, deleted_nodes.size(), param.num_nodes);
   }
-  /*! 
+  /*!
    * \brief save model to stream
    * \param fo output stream
    */
-  inline void SaveModel(utils::IStream &fo) const {
+  inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
     utils::Assert(param.num_nodes == static_cast<int>(nodes.size()),
                   "Tree::SaveModel");
     utils::Assert(param.num_nodes == static_cast<int>(stats.size()),
                   "Tree::SaveModel");
     fo.Write(&param, sizeof(Param));
-    utils::Assert(param.num_nodes != 0, "invalid model"); 
+    utils::Assert(param.num_nodes != 0, "invalid model");
     fo.Write(BeginPtr(nodes), sizeof(Node) * nodes.size());
     fo.Write(BeginPtr(stats), sizeof(NodeStat) * nodes.size());
     if (param.size_leaf_vector != 0) fo.Write(leaf_vector);
   }
-  /*! 
+  /*!
    * \brief add child nodes to node
    * \param nid node id to add childs
    */
@@ -340,8 +342,8 @@ class TreeModel {
     nodes[nodes[nid].cleft() ].set_parent(nid, true);
     nodes[nodes[nid].cright()].set_parent(nid, false);
   }
-  /*! 
-   * \brief only add a right child to a leaf node 
+  /*!
+   * \brief only add a right child to a leaf node
    * \param node id to add right child
    */
   inline void AddRightChild(int nid) {
@@ -385,7 +387,7 @@ class TreeModel {
   inline int num_extra_nodes(void) const {
     return param.num_nodes - param.num_roots - param.num_deleted;
   }
-  /*! 
+  /*!
    * \brief dump model to text string
    * \param fmap feature map of feature types
    * \param with_stats whether dump out statistics as well
@@ -400,7 +402,7 @@ class TreeModel {
   }
 
  private:
-  void Dump(int nid, std::stringstream &fo,
+  void Dump(int nid, std::stringstream &fo, // NOLINT(*)
             const utils::FeatMap& fmap, int depth, bool with_stats) {
     for (int i = 0;  i < depth; ++i) {
       fo << '\t';
@@ -469,7 +471,7 @@ struct RTreeNodeStat {
   /*! \brief number of child that is leaf node known up to now */
   int   leaf_child_cnt;
   /*! \brief print information of current stats to fo */
-  inline void Print(std::stringstream &fo, bool is_leaf) const {
+  inline void Print(std::stringstream &fo, bool is_leaf) const { // NOLINT(*)
     if (!is_leaf) {
       fo << ",gain=" << loss_chg << ",cover=" << sum_hess;
     } else {
@@ -481,13 +483,13 @@ struct RTreeNodeStat {
 /*! \brief define regression tree to be the most common tree model */
 class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
  public:
-  /*! 
+  /*!
    * \brief dense feature vector that can be taken by RegTree
    * to do tranverse efficiently
    * and can be construct from sparse feature vector
    */
   struct FVec {
-    /*! 
+    /*!
      * \brief a union value of value and flag
      * when flag == -1, this indicate the value is missing
      */
@@ -510,7 +512,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
       }
     }
     /*! \brief drop the trace after fill, must be called after fill */
-    inline void Drop(const RowBatch::Inst &inst) {      
+    inline void Drop(const RowBatch::Inst &inst) {
       for (bst_uint i = 0; i < inst.length; ++i) {
         if (inst[i].index >= data.size()) continue;
         data[inst[i].index].flag = -1;
@@ -526,10 +528,10 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
     }
   };
   /*!
-   * \brief get the leaf index 
+   * \brief get the leaf index
    * \param feats dense feature vector, if the feature is missing the field is set to NaN
    * \param root_gid starting root index of the instance
-   * \return the leaf index of the given feature 
+   * \return the leaf index of the given feature
    */
   inline int GetLeafIndex(const FVec&feat, unsigned root_id = 0) const {
     // start from groups that belongs to current data
@@ -545,7 +547,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
    * \brief get the prediction of regression tree, only accepts dense feature vector
    * \param feats dense feature vector, if the feature is missing the field is set to NaN
    * \param root_gid starting root index of the instance
-   * \return the leaf index of the given feature 
+   * \return the leaf index of the given feature
    */
   inline float Predict(const FVec &feat, unsigned root_id = 0) const {
     int pid = this->GetLeafIndex(feat, root_id);
diff --git a/src/tree/param.h b/src/tree/param.h
index 20ba1e6c0..f06365a17 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -1,10 +1,13 @@
-#ifndef XGBOOST_TREE_PARAM_H_
-#define XGBOOST_TREE_PARAM_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file param.h
  * \brief training parameters, statistics used to support tree construction
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_PARAM_H_
+#define XGBOOST_TREE_PARAM_H_
+
+#include <vector>
 #include <cstring>
 #include "../data.h"
 
@@ -27,7 +30,7 @@ struct TrainParam{
   // L1 regularization factor
   float reg_alpha;
   // default direction choice
-  int default_direction;  
+  int default_direction;
   // maximum delta update we can add in weight estimation
   // this parameter can be used to stablize update
   // default=0 means no constraint on weight delta
@@ -45,7 +48,7 @@ struct TrainParam{
   // accuracy of sketch
   float sketch_ratio;
   // leaf vector size
-  int size_leaf_vector;  
+  int size_leaf_vector;
   // option for parallelization
   int parallel_option;
   // option to open cacheline optimizaton
@@ -74,11 +77,11 @@ struct TrainParam{
     sketch_ratio = 2.0f;
     cache_opt = 1;
   }
-  /*! 
-   * \brief set parameters from outside 
+  /*!
+   * \brief set parameters from outside
    * \param name name of the parameter
    * \param val  value of the parameter
-   */            
+   */
   inline void SetParam(const char *name, const char *val) {
     using namespace std;
     // sync-names
@@ -116,7 +119,7 @@ struct TrainParam{
       if (reg_alpha == 0.0f) {
         return Sqr(sum_grad) / (sum_hess + reg_lambda);
       } else {
-        return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda); 
+        return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda);
       }
     } else {
       double w = CalcWeight(sum_grad, sum_hess);
@@ -213,7 +216,7 @@ struct GradStats {
   inline static void CheckInfo(const BoosterInfo &info) {
   }
   /*!
-   * \brief accumulate statistics 
+   * \brief accumulate statistics
    * \param p the gradient pair
    */
   inline void Add(bst_gpair p) {
@@ -222,7 +225,7 @@ struct GradStats {
   /*!
    * \brief accumulate statistics, more complicated version
    * \param gpair the vector storing the gradient statistics
-   * \param info the additional information 
+   * \param info the additional information
    * \param ridx instance index of this instance
    */
   inline void Add(const std::vector<bst_gpair> &gpair,
@@ -244,7 +247,7 @@ struct GradStats {
     this->Add(b.sum_grad, b.sum_hess);
   }
   /*! \brief same as add, reduce is used in All Reduce */
-  inline static void Reduce(GradStats &a, const GradStats &b) {
+  inline static void Reduce(GradStats &a, const GradStats &b) { // NOLINT(*)
     a.Add(b);
   }
   /*! \brief set current value to a - b */
@@ -257,8 +260,8 @@ struct GradStats {
     return sum_hess == 0.0;
   }
   /*! \brief set leaf vector value based on statistics */
-  inline void SetLeafVec(const TrainParam &param, bst_float *vec) const{
-  }  
+  inline void SetLeafVec(const TrainParam &param, bst_float *vec) const {
+  }
   // constructor to allow inheritance
   GradStats(void) {}
   /*! \brief add statistics to the data */
@@ -311,7 +314,7 @@ struct CVGradStats : public GradStats {
       ret += param.CalcGain(train[i].sum_grad,
                             train[i].sum_hess,
                             vsize * valid[i].sum_grad,
-                            vsize * valid[i].sum_hess);      
+                            vsize * valid[i].sum_hess);
     }
     return ret / vsize;
   }
@@ -324,7 +327,7 @@ struct CVGradStats : public GradStats {
     }
   }
   /*! \brief same as add, reduce is used in All Reduce */
-  inline static void Reduce(CVGradStats &a, const CVGradStats &b) {
+  inline static void Reduce(CVGradStats &a, const CVGradStats &b) { // NOLINT(*)
     a.Add(b);
   }
   /*! \brief set current value to a - b */
@@ -344,8 +347,8 @@ struct CVGradStats : public GradStats {
   }
 };
 
-/*! 
- * \brief statistics that is helpful to store 
+/*!
+ * \brief statistics that is helpful to store
  *   and represent a split solution for the tree
  */
 struct SplitEntry{
@@ -357,12 +360,12 @@ struct SplitEntry{
   float split_value;
   /*! \brief constructor */
   SplitEntry(void) : loss_chg(0.0f), sindex(0), split_value(0.0f) {}
-  /*! 
-   * \brief decides whether a we can replace current entry with the statistics given 
+  /*!
+   * \brief decides whether a we can replace current entry with the statistics given
    *   This function gives better priority to lower index when loss_chg equals
    *    not the best way, but helps to give consistent result during multi-thread execution
    * \param loss_chg the loss reduction get through the split
-   * \param split_index the feature index where the split is on 
+   * \param split_index the feature index where the split is on
    */
   inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
     if (this->split_index() <= split_index) {
@@ -371,7 +374,7 @@ struct SplitEntry{
       return !(this->loss_chg > new_loss_chg);
     }
   }
-  /*! 
+  /*!
    * \brief update the split entry, replace it if e is better
    * \param e candidate split solution
    * \return whether the proposed split is better and can replace current split
@@ -386,7 +389,7 @@ struct SplitEntry{
       return false;
     }
   }
-  /*! 
+  /*!
    * \brief update the split entry, replace it if e is better
    * \param loss_chg loss reduction of new candidate
    * \param split_index feature index to split on
@@ -407,7 +410,7 @@ struct SplitEntry{
     }
   }
   /*! \brief same as update, used by AllReduce*/
-  inline static void Reduce(SplitEntry &dst, const SplitEntry &src) {
+  inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { // NOLINT(*)
     dst.Update(src);
   }
   /*!\return feature index to split on */
diff --git a/src/tree/updater.cpp b/src/tree/updater.cpp
index 5d2e99820..eb2e06925 100644
--- a/src/tree/updater.cpp
+++ b/src/tree/updater.cpp
@@ -1,3 +1,4 @@
+// Copyright 2014 by Contributors
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #define NOMINMAX
diff --git a/src/tree/updater.h b/src/tree/updater.h
index 4ced21e5e..1cf74a699 100644
--- a/src/tree/updater.h
+++ b/src/tree/updater.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_TREE_UPDATER_H_
-#define XGBOOST_TREE_UPDATER_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file updater.h
  * \brief interface to update the tree
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_UPDATER_H_
+#define XGBOOST_TREE_UPDATER_H_
+
 #include <vector>
 
 #include "../data.h"
@@ -12,7 +14,7 @@
 
 namespace xgboost {
 namespace tree {
-/*! 
+/*!
  * \brief interface of tree update module, that performs update of a tree
  */
 class IUpdater {
@@ -21,7 +23,7 @@ class IUpdater {
    * \brief set parameters from outside
    * \param name name of the parameter
    * \param val  value of the parameter
-   */  
+   */
   virtual void SetParam(const char *name, const char *val) = 0;
   /*!
    * \brief peform update to the tree models
@@ -29,8 +31,8 @@ class IUpdater {
    * \param p_fmat feature matrix that provide access to features
    * \param info extra side information that may be need, such as root index
    * \param trees pointer to the trees to be updated, upater will change the content of the tree
-   *   note: all the trees in the vector are updated, with the same statistics, 
-   *         but maybe different random seeds, usually one tree is passed in at a time, 
+   *   note: all the trees in the vector are updated, with the same statistics,
+   *         but maybe different random seeds, usually one tree is passed in at a time,
    *         there can be multiple trees when we train random forest style model
    */
   virtual void Update(const std::vector<bst_gpair> &gpair,
@@ -38,7 +40,7 @@ class IUpdater {
                       const BoosterInfo &info,
                       const std::vector<RegTree*> &trees) = 0;
 
-  /*! 
+  /*!
    * \brief this is simply a function for optimizing performance
    * this function asks the updater to return the leaf position of each instance in the p_fmat,
    * if it is cached in the updater, if it is not available, return NULL
@@ -50,8 +52,8 @@ class IUpdater {
   // destructor
   virtual ~IUpdater(void) {}
 };
-/*! 
- * \brief create a updater based on name 
+/*!
+ * \brief create a updater based on name
  * \param name name of updater
  * \return return the updater instance
  */
diff --git a/src/tree/updater_basemaker-inl.hpp b/src/tree/updater_basemaker-inl.hpp
index f144ae199..6204c47b7 100644
--- a/src/tree/updater_basemaker-inl.hpp
+++ b/src/tree/updater_basemaker-inl.hpp
@@ -1,12 +1,14 @@
-#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
-#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file updater_basemaker-inl.hpp
  * \brief implement a common tree constructor
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
+#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
 #include <vector>
 #include <algorithm>
+#include <string>
 #include <limits>
 #include "../sync/sync.h"
 #include "../utils/random.h"
@@ -14,7 +16,7 @@
 
 namespace xgboost {
 namespace tree {
-/*! 
+/*!
  * \brief base tree maker class that defines common operation
  *  needed in tree making
  */
@@ -26,7 +28,7 @@ class BaseMaker: public IUpdater {
   virtual void SetParam(const char *name, const char *val) {
     param.SetParam(name, val);
   }
-   
+
  protected:
   // helper to collect and query feature meta information
   struct FMetaHelper {
@@ -60,8 +62,11 @@ class BaseMaker: public IUpdater {
       bst_float a = fminmax[fid * 2];
       bst_float b = fminmax[fid * 2 + 1];
       if (a == -std::numeric_limits<bst_float>::max()) return 0;
-      if (-a == b) return 1;
-      else return 2;
+      if (-a == b) {
+        return 1;
+      } else {
+        return 2;
+      }
     }
     inline bst_float MaxValue(bst_uint fid) const {
       return fminmax[fid *2 + 1];
@@ -70,7 +75,7 @@ class BaseMaker: public IUpdater {
       std::vector<bst_uint> &findex = *p_findex;
       findex.clear();
       for (size_t i = 0; i < fminmax.size(); i += 2) {
-		const bst_uint fid = static_cast<bst_uint>(i / 2);
+        const bst_uint fid = static_cast<bst_uint>(i / 2);
         if (this->Type(fid) != 0) findex.push_back(fid);
       }
       unsigned n = static_cast<unsigned>(p * findex.size());
@@ -86,7 +91,7 @@ class BaseMaker: public IUpdater {
       rabit::Broadcast(&s_cache, 0);
       fs.Read(&findex);
     }
-    
+
    private:
     std::vector<bst_float> fminmax;
   };
@@ -116,7 +121,7 @@ class BaseMaker: public IUpdater {
     }
     return nthread;
   }
-  // ------class member helpers---------
+  //  ------class member helpers---------
   /*! \brief initialize temp data structure */
   inline void InitData(const std::vector<bst_gpair> &gpair,
                        const IFMatrix &fmat,
@@ -124,7 +129,8 @@ class BaseMaker: public IUpdater {
                        const RegTree &tree) {
     utils::Assert(tree.param.num_nodes == tree.param.num_roots,
                   "TreeMaker: can only grow new tree");
-    {// setup position
+    {
+      // setup position
       position.resize(gpair.size());
       if (root_index.size() == 0) {
         std::fill(position.begin(), position.end(), 0);
@@ -147,7 +153,8 @@ class BaseMaker: public IUpdater {
         }
       }
     }
-    {// expand query
+    {
+      // expand query
       qexpand.reserve(256); qexpand.clear();
       for (int i = 0; i < tree.param.num_roots; ++i) {
         qexpand.push_back(i);
@@ -170,7 +177,7 @@ class BaseMaker: public IUpdater {
     this->UpdateNode2WorkIndex(tree);
   }
   // return decoded position
-  inline int DecodePosition(bst_uint ridx) const{
+  inline int DecodePosition(bst_uint ridx) const {
     const int pid = position[ridx];
     return pid < 0 ? ~pid : pid;
   }
@@ -182,23 +189,24 @@ class BaseMaker: public IUpdater {
       position[ridx] = nid;
     }
   }
-  /*! 
+  /*!
    * \brief this is helper function uses column based data structure,
    *        reset the positions to the lastest one
    * \param nodes the set of nodes that contains the split to be used
    * \param p_fmat feature matrix needed for tree construction
    * \param tree the regression tree structure
    */
-  inline void ResetPositionCol(const std::vector<int> &nodes, IFMatrix *p_fmat, const RegTree &tree) {
+  inline void ResetPositionCol(const std::vector<int> &nodes,
+                               IFMatrix *p_fmat, const RegTree &tree) {
     // set the positions in the nondefault
     this->SetNonDefaultPositionCol(nodes, p_fmat, tree);
     // set rest of instances to default position
     const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
     // set default direct nodes to default
-    // for leaf nodes that are not fresh, mark then to ~nid, 
+    // for leaf nodes that are not fresh, mark then to ~nid,
     // so that they are ignored in future statistics collection
     const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
-    
+
     #pragma omp parallel for schedule(static)
     for (bst_omp_uint i = 0; i < ndata; ++i) {
       const bst_uint ridx = rowset[i];
@@ -237,7 +245,7 @@ class BaseMaker: public IUpdater {
     }
     std::sort(fsplits.begin(), fsplits.end());
     fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
-    
+
     utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(fsplits);
     while (iter->Next()) {
       const ColBatch &batch = iter->Value();
@@ -252,7 +260,7 @@ class BaseMaker: public IUpdater {
           const int nid = this->DecodePosition(ridx);
           // go back to parent, correct those who are not default
           if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
-            if(fvalue < tree[nid].split_cond()) {
+            if (fvalue < tree[nid].split_cond()) {
               this->SetEncodePosition(ridx, tree[nid].cleft());
             } else {
               this->SetEncodePosition(ridx, tree[nid].cright());
@@ -324,7 +332,7 @@ class BaseMaker: public IUpdater {
       sketch->temp.size = 0;
     }
     /*!
-     * \brief push a new element to sketch 
+     * \brief push a new element to sketch
      * \param fvalue feature value, comes in sorted ascending order
      * \param w weight
      * \param max_size
@@ -337,31 +345,32 @@ class BaseMaker: public IUpdater {
         return;
       }
       if (last_fvalue != fvalue) {
-	double rmax = rmin + wmin;
+        double rmax = rmin + wmin;
         if (rmax >= next_goal && sketch->temp.size != max_size) {
-          if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
+          if (sketch->temp.size == 0 ||
+              last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
             // push to sketch
             sketch->temp.data[sketch->temp.size] =
                 utils::WXQuantileSketch<bst_float, bst_float>::
                 Entry(static_cast<bst_float>(rmin),
-			          static_cast<bst_float>(rmax),
-					  static_cast<bst_float>(wmin), last_fvalue);
+                      static_cast<bst_float>(rmax),
+                      static_cast<bst_float>(wmin), last_fvalue);
             utils::Assert(sketch->temp.size < max_size,
                           "invalid maximum size max_size=%u, stemp.size=%lu\n",
                           max_size, sketch->temp.size);
             ++sketch->temp.size;
           }
           if (sketch->temp.size == max_size) {
-            next_goal = sum_total * 2.0f + 1e-5f;	    
-          } else{
+            next_goal = sum_total * 2.0f + 1e-5f;
+          } else {
             next_goal = static_cast<bst_float>(sketch->temp.size * sum_total / max_size);
           }
         } else {
-	  if (rmax >= next_goal) {
-	    rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n",
-				 rmax, sum_total, next_goal, sketch->temp.size);
-	  }
-	}
+          if (rmax >= next_goal) {
+            rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n",
+                                 rmax, sum_total, next_goal, sketch->temp.size);
+          }
+        }
         rmin = rmax;
         wmin = w;
         last_fvalue = fvalue;
@@ -375,13 +384,13 @@ class BaseMaker: public IUpdater {
       if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
         utils::Assert(sketch->temp.size <= max_size,
                       "Finalize: invalid maximum size, max_size=%u, stemp.size=%lu",
-                      sketch->temp.size, max_size );
+                      sketch->temp.size, max_size);
         // push to sketch
         sketch->temp.data[sketch->temp.size] =
             utils::WXQuantileSketch<bst_float, bst_float>::
             Entry(static_cast<bst_float>(rmin),
-			      static_cast<bst_float>(rmax),
-				  static_cast<bst_float>(wmin), last_fvalue);
+                  static_cast<bst_float>(rmax),
+                  static_cast<bst_float>(wmin), last_fvalue);
         ++sketch->temp.size;
       }
       sketch->PushTemp();
@@ -415,4 +424,4 @@ class BaseMaker: public IUpdater {
 };
 }  // namespace tree
 }  // namespace xgboost
-#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
+#endif  // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index db3581aac..e3070d495 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
-#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file updater_colmaker-inl.hpp
  * \brief use columnwise update to construct a tree
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
+#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
+
 #include <vector>
 #include <cmath>
 #include <algorithm>
@@ -114,10 +116,13 @@ class ColMaker: public IUpdater {
     // initialize temp data structure
     inline void InitData(const std::vector<bst_gpair> &gpair,
                          const IFMatrix &fmat,
-                         const std::vector<unsigned> &root_index, const RegTree &tree) {
-      utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree");
+                         const std::vector<unsigned> &root_index,
+                         const RegTree &tree) {
+      utils::Assert(tree.param.num_nodes == tree.param.num_roots,
+                    "ColMaker: can only grow new tree");
       const std::vector<bst_uint> &rowset = fmat.buffered_rowset();
-      {// setup position
+      {
+        // setup position
         position.resize(gpair.size());
         if (root_index.size() == 0) {
           for (size_t i = 0; i < rowset.size(); ++i) {
@@ -127,7 +132,8 @@ class ColMaker: public IUpdater {
           for (size_t i = 0; i < rowset.size(); ++i) {
             const bst_uint ridx = rowset[i];
             position[ridx] = root_index[ridx];
-            utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots, "root index exceed setting");
+            utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots,
+                          "root index exceed setting");
           }
         }
         // mark delete for the deleted datas
@@ -154,11 +160,12 @@ class ColMaker: public IUpdater {
         }
         unsigned n = static_cast<unsigned>(param.colsample_bytree * feat_index.size());
         random::Shuffle(feat_index);
-        //utils::Check(n > 0, "colsample_bytree is too small that no feature can be included");
-        utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included", param.colsample_bytree);
+        utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included",
+                     param.colsample_bytree);
         feat_index.resize(n);
       }
-      {// setup temp space for each thread
+      {
+        // setup temp space for each thread
         #pragma omp parallel
         {
           this->nthread = omp_get_num_threads();
@@ -171,20 +178,25 @@ class ColMaker: public IUpdater {
         }
         snode.reserve(256);
       }
-      {// expand query
+      {
+        // expand query
         qexpand_.reserve(256); qexpand_.clear();
         for (int i = 0; i < tree.param.num_roots; ++i) {
           qexpand_.push_back(i);
         }
       }
     }
-    /*! \brief initialize the base_weight, root_gain, and NodeEntry for all the new nodes in qexpand */
+    /*!
+     * \brief initialize the base_weight, root_gain,
+     *  and NodeEntry for all the new nodes in qexpand
+     */
     inline void InitNewNode(const std::vector<int> &qexpand,
                             const std::vector<bst_gpair> &gpair,
                             const IFMatrix &fmat,
                             const BoosterInfo &info,
                             const RegTree &tree) {
-      {// setup statistics space for each tree node
+      {
+        // setup statistics space for each tree node
         for (size_t i = 0; i < stemp.size(); ++i) {
           stemp[i].resize(tree.param.num_nodes, ThreadEntry(param));
         }
@@ -226,7 +238,7 @@ class ColMaker: public IUpdater {
       }
       // use new nodes for qexpand
       qexpand = newnodes;
-    }    
+    }
     // parallel find the best split of current fid
     // this function does not support nested functions
     inline void ParallelFindSplit(const ColBatch::Inst &col,
@@ -280,26 +292,30 @@ class ColMaker: public IUpdater {
           ThreadEntry &e = stemp[tid][nid];
           float fsplit;
           if (tid != 0) {
-            if(std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) {
+            if (std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) {
               fsplit = (stemp[tid - 1][nid].last_fvalue - e.first_fvalue) * 0.5f;
             } else {
               continue;
             }
           } else {
             fsplit = e.first_fvalue - rt_eps;
-          }                        
+          }
           if (need_forward && tid != 0) {
             c.SetSubstract(snode[nid].stats, e.stats);
-            if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
-              bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
+            if (c.sum_hess >= param.min_child_weight &&
+                e.stats.sum_hess >= param.min_child_weight) {
+              bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
+                                                          c.CalcGain(param) - snode[nid].root_gain);
               e.best.Update(loss_chg, fid, fsplit, false);
             }
           }
           if (need_backward) {
             tmp.SetSubstract(sum, e.stats);
             c.SetSubstract(snode[nid].stats, tmp);
-            if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
-              bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
+            if (c.sum_hess >= param.min_child_weight &&
+                tmp.sum_hess >= param.min_child_weight) {
+              bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) +
+                                                          c.CalcGain(param) - snode[nid].root_gain);
               e.best.Update(loss_chg, fid, fsplit, true);
             }
           }
@@ -308,8 +324,10 @@ class ColMaker: public IUpdater {
           tmp = sum;
           ThreadEntry &e = stemp[nthread-1][nid];
           c.SetSubstract(snode[nid].stats, tmp);
-          if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
-            bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
+          if (c.sum_hess >= param.min_child_weight &&
+              tmp.sum_hess >= param.min_child_weight) {
+            bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) +
+                                                        c.CalcGain(param) - snode[nid].root_gain);
             e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true);
           }
         }
@@ -335,25 +353,31 @@ class ColMaker: public IUpdater {
             e.first_fvalue = fvalue;
           } else {
             // forward default right
-            if (std::abs(fvalue - e.first_fvalue) > rt_2eps){
-              if (need_forward) { 
+            if (std::abs(fvalue - e.first_fvalue) > rt_2eps) {
+              if (need_forward) {
                 c.SetSubstract(snode[nid].stats, e.stats);
-                if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
-                  bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
+                if (c.sum_hess >= param.min_child_weight &&
+                    e.stats.sum_hess >= param.min_child_weight) {
+                  bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
+                                                              c.CalcGain(param) -
+                                                              snode[nid].root_gain);
                   e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false);
                 }
               }
               if (need_backward) {
                 cright.SetSubstract(e.stats_extra, e.stats);
                 c.SetSubstract(snode[nid].stats, cright);
-                if (c.sum_hess >= param.min_child_weight && cright.sum_hess >= param.min_child_weight) {
-                  bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
+                if (c.sum_hess >= param.min_child_weight &&
+                    cright.sum_hess >= param.min_child_weight) {
+                  bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) +
+                                                              c.CalcGain(param) -
+                                                              snode[nid].root_gain);
                   e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true);
                 }
               }
-            }          
+            }
             e.stats.Add(gpair, info, ridx);
-            e.first_fvalue = fvalue;            
+            e.first_fvalue = fvalue;
           }
         }
       }
@@ -361,7 +385,7 @@ class ColMaker: public IUpdater {
     // update enumeration solution
     inline void UpdateEnumeration(int nid, bst_gpair gstats,
                                   float fvalue, int d_step, bst_uint fid,
-                                  TStats &c, std::vector<ThreadEntry> &temp) {
+                                  TStats &c, std::vector<ThreadEntry> &temp) { // NOLINT(*)
       // get the statistics of nid
       ThreadEntry &e = temp[nid];
       // test if first hit, this is fine, because we set 0 during init
@@ -370,10 +394,12 @@ class ColMaker: public IUpdater {
         e.last_fvalue = fvalue;
       } else {
         // try to find a split
-        if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
+        if (std::abs(fvalue - e.last_fvalue) > rt_2eps &&
+            e.stats.sum_hess >= param.min_child_weight) {
           c.SetSubstract(snode[nid].stats, e.stats);
           if (c.sum_hess >= param.min_child_weight) {
-            bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
+            bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
+                                                        c.CalcGain(param) - snode[nid].root_gain);
             e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1);
           }
         }
@@ -388,7 +414,7 @@ class ColMaker: public IUpdater {
                                        int d_step,
                                        bst_uint fid,
                                        const std::vector<bst_gpair> &gpair,
-                                       std::vector<ThreadEntry> &temp) {
+                                       std::vector<ThreadEntry> &temp) { // NOLINT(*)
       const std::vector<int> &qexpand = qexpand_;
       // clear all the temp statistics
       for (size_t j = 0; j < qexpand.size(); ++j) {
@@ -423,7 +449,7 @@ class ColMaker: public IUpdater {
           this->UpdateEnumeration(nid, buf_gpair[i],
                                   p->fvalue, d_step,
                                   fid, c, temp);
-        }        
+        }
       }
       // finish up the ending piece
       for (it = align_end, i = 0; it != end; ++i, it += d_step) {
@@ -436,14 +462,15 @@ class ColMaker: public IUpdater {
         this->UpdateEnumeration(nid, buf_gpair[i],
                                 it->fvalue, d_step,
                                 fid, c, temp);
-      }            
+      }
       // finish updating all statistics, check if it is possible to include all sum statistics
       for (size_t i = 0; i < qexpand.size(); ++i) {
         const int nid = qexpand[i];
         ThreadEntry &e = temp[nid];
         c.SetSubstract(snode[nid].stats, e.stats);
         if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
-          bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
+          bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
+                                                      c.CalcGain(param) - snode[nid].root_gain);
           const float gap = std::abs(e.last_fvalue) + rt_eps;
           const float delta = d_step == +1 ? gap: -gap;
           e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
@@ -458,7 +485,7 @@ class ColMaker: public IUpdater {
                                bst_uint fid,
                                const std::vector<bst_gpair> &gpair,
                                const BoosterInfo &info,
-                               std::vector<ThreadEntry> &temp) {
+                               std::vector<ThreadEntry> &temp) { // NOLINT(*)
       // use cacheline aware optimization
       if (TStats::kSimpleStats != 0 && param.cache_opt != 0) {
         EnumerateSplitCacheOpt(begin, end, d_step, fid, gpair, temp);
@@ -471,7 +498,7 @@ class ColMaker: public IUpdater {
       }
       // left statistics
       TStats c(param);
-      for(const ColBatch::Entry *it = begin; it != end; it += d_step) {
+      for (const ColBatch::Entry *it = begin; it != end; it += d_step) {
         const bst_uint ridx = it->index;
         const int nid = position[ridx];
         if (nid < 0) continue;
@@ -485,10 +512,12 @@ class ColMaker: public IUpdater {
           e.last_fvalue = fvalue;
         } else {
           // try to find a split
-          if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
+          if (std::abs(fvalue - e.last_fvalue) > rt_2eps &&
+              e.stats.sum_hess >= param.min_child_weight) {
             c.SetSubstract(snode[nid].stats, e.stats);
             if (c.sum_hess >= param.min_child_weight) {
-              bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
+              bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
+                                                          c.CalcGain(param) - snode[nid].root_gain);
               e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1);
             }
           }
@@ -503,7 +532,8 @@ class ColMaker: public IUpdater {
         ThreadEntry &e = temp[nid];
         c.SetSubstract(snode[nid].stats, e.stats);
         if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
-          bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
+          bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) +
+                                                      c.CalcGain(param) - snode[nid].root_gain);
           const float gap = std::abs(e.last_fvalue) + rt_eps;
           const float delta = d_step == +1 ? gap: -gap;
           e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
@@ -511,14 +541,14 @@ class ColMaker: public IUpdater {
       }
     }
 
-    // update the solution candidate 
+    // update the solution candidate
     virtual void UpdateSolution(const ColBatch &batch,
                                 const std::vector<bst_gpair> &gpair,
                                 const IFMatrix &fmat,
                                 const BoosterInfo &info) {
       // start enumeration
       const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
-      #if defined(_OPENMP)                                                                
+      #if defined(_OPENMP)
       const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
       #endif
       int poption = param.parallel_option;
@@ -533,11 +563,11 @@ class ColMaker: public IUpdater {
           const ColBatch::Inst c = batch[i];
           const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue;
           if (param.need_forward_search(fmat.GetColDensity(fid), ind)) {
-            this->EnumerateSplit(c.data, c.data + c.length, +1, 
+            this->EnumerateSplit(c.data, c.data + c.length, +1,
                                  fid, gpair, info, stemp[tid]);
           }
           if (param.need_backward_search(fmat.GetColDensity(fid), ind)) {
-            this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1, 
+            this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1,
                                  fid, gpair, info, stemp[tid]);
           }
         }
@@ -546,7 +576,7 @@ class ColMaker: public IUpdater {
           this->ParallelFindSplit(batch[i], batch.col_index[i],
                                   fmat, gpair, info);
         }
-      }      
+      }
     }
     // find splits at current level, do split per level
     inline void FindSplit(int depth,
@@ -571,7 +601,7 @@ class ColMaker: public IUpdater {
       // get the best result, we can synchronize the solution
       for (size_t i = 0; i < qexpand.size(); ++i) {
         const int nid = qexpand[i];
-        NodeEntry &e = snode[nid];        
+        NodeEntry &e = snode[nid];
         // now we know the solution in snode[nid], set split
         if (e.best.loss_chg > rt_eps) {
           p_tree->AddChilds(nid);
@@ -582,19 +612,20 @@ class ColMaker: public IUpdater {
         } else {
           (*p_tree)[nid].set_leaf(e.weight * param.learning_rate);
         }
-      } 
+      }
     }
     // reset position of each data points after split is created in the tree
-    inline void ResetPosition(const std::vector<int> &qexpand, IFMatrix *p_fmat, const RegTree &tree) {
+    inline void ResetPosition(const std::vector<int> &qexpand,
+                              IFMatrix *p_fmat, const RegTree &tree) {
       // set the positions in the nondefault
-      this->SetNonDefaultPosition(qexpand, p_fmat, tree);      
+      this->SetNonDefaultPosition(qexpand, p_fmat, tree);
       // set rest of instances to default position
       const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
       // set default direct nodes to default
-      // for leaf nodes that are not fresh, mark then to ~nid, 
+      // for leaf nodes that are not fresh, mark then to ~nid,
       // so that they are ignored in future statistics collection
       const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
-      
+
       #pragma omp parallel for schedule(static)
       for (bst_omp_uint i = 0; i < ndata; ++i) {
         const bst_uint ridx = rowset[i];
@@ -655,7 +686,7 @@ class ColMaker: public IUpdater {
             const float fvalue = col[j].fvalue;
             // go back to parent, correct those who are not default
             if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
-              if(fvalue < tree[nid].split_cond()) {
+              if (fvalue < tree[nid].split_cond()) {
                 this->SetEncodePosition(ridx, tree[nid].cleft());
               } else {
                 this->SetEncodePosition(ridx, tree[nid].cright());
@@ -667,7 +698,7 @@ class ColMaker: public IUpdater {
     }
     // utils to get/set position, with encoded format
     // return decoded position
-    inline int DecodePosition(bst_uint ridx) const{
+    inline int DecodePosition(bst_uint ridx) const {
       const int pid = position[ridx];
       return pid < 0 ? ~pid : pid;
     }
@@ -679,7 +710,7 @@ class ColMaker: public IUpdater {
         position[ridx] = nid;
       }
     }
-    //--data fields--
+    //  --data fields--
     const TrainParam &param;
     // number of omp thread used during training
     int nthread;
diff --git a/src/tree/updater_distcol-inl.hpp b/src/tree/updater_distcol-inl.hpp
index c989f4e47..e3d3f8b59 100644
--- a/src/tree/updater_distcol-inl.hpp
+++ b/src/tree/updater_distcol-inl.hpp
@@ -1,11 +1,15 @@
-#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
-#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file updater_distcol-inl.hpp
- * \brief beta distributed version that takes a sub-column 
+ * \brief beta distributed version that takes a sub-column
  *        and construct a tree
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
+#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
+
+#include <vector>
+#include <algorithm>
 #include "../sync/sync.h"
 #include "../utils/bitmap.h"
 #include "../utils/io.h"
@@ -27,7 +31,7 @@ class DistColMaker : public ColMaker<TStats> {
   virtual void Update(const std::vector<bst_gpair> &gpair,
                       IFMatrix *p_fmat,
                       const BoosterInfo &info,
-                      const std::vector<RegTree*> &trees) {    
+                      const std::vector<RegTree*> &trees) {
     TStats::CheckInfo(info);
     utils::Check(trees.size() == 1, "DistColMaker: only support one tree at a time");
     // build the tree
@@ -39,11 +43,12 @@ class DistColMaker : public ColMaker<TStats> {
   }
   virtual const int* GetLeafPosition(void) const {
     return builder.GetLeafPosition();
-  }  
+  }
+
  private:
   struct Builder : public ColMaker<TStats>::Builder {
    public:
-    Builder(const TrainParam &param) 
+    explicit Builder(const TrainParam &param)
         : ColMaker<TStats>::Builder(param) {
     }
     inline void UpdatePosition(IFMatrix *p_fmat, const RegTree &tree) {
@@ -63,7 +68,8 @@ class DistColMaker : public ColMaker<TStats> {
     virtual const int* GetLeafPosition(void) const {
       return BeginPtr(this->position);
     }
-   protected:    
+
+   protected:
     virtual void SetNonDefaultPosition(const std::vector<int> &qexpand,
                                        IFMatrix *p_fmat, const RegTree &tree) {
       // step 2, classify the non-default data into right places
@@ -87,7 +93,7 @@ class DistColMaker : public ColMaker<TStats> {
         #pragma omp parallel for schedule(static)
         for (bst_omp_uint j = 0; j < ndata; ++j) {
             boolmap[j] = 0;
-        }        
+        }
       }
       utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(fsplits);
       while (iter->Next()) {
@@ -111,7 +117,7 @@ class DistColMaker : public ColMaker<TStats> {
           }
         }
       }
-      
+
       bitmap.InitFromBool(boolmap);
       // communicate bitmap
       rabit::Allreduce<rabit::op::BitOR>(BeginPtr(bitmap.data), bitmap.data.size());
@@ -142,7 +148,7 @@ class DistColMaker : public ColMaker<TStats> {
         }
         vec.push_back(this->snode[nid].best);
       }
-      // TODO, lazy version
+      // TODO(tqchen) lazy version
       // communicate best solution
       reducer.Allreduce(BeginPtr(vec), vec.size());
       // assign solution back
@@ -151,7 +157,7 @@ class DistColMaker : public ColMaker<TStats> {
         this->snode[nid].best = vec[i];
       }
     }
-    
+
    private:
     utils::BitMap bitmap;
     std::vector<int> boolmap;
@@ -162,8 +168,8 @@ class DistColMaker : public ColMaker<TStats> {
   // training parameter
   TrainParam param;
   // pointer to the builder
-  Builder builder; 
+  Builder builder;
 };
 }  // namespace tree
 }  // namespace xgboost
-#endif
+#endif  // XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_
diff --git a/src/tree/updater_histmaker-inl.hpp b/src/tree/updater_histmaker-inl.hpp
index f739f23f3..d86204e4b 100644
--- a/src/tree/updater_histmaker-inl.hpp
+++ b/src/tree/updater_histmaker-inl.hpp
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
-#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file updater_histmaker-inl.hpp
  * \brief use histogram counting to construct a tree
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
+#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_
+
 #include <vector>
 #include <algorithm>
 #include "../sync/sync.h"
@@ -38,7 +40,7 @@ class HistMaker: public BaseMaker {
   struct HistUnit {
     /*! \brief cutting point of histogram, contains maximum point */
     const bst_float *cut;
-    /*! \brief content of statistics data */    
+    /*! \brief content of statistics data */
     TStats *data;
     /*! \brief size of histogram */
     unsigned size;
@@ -48,13 +50,13 @@ class HistMaker: public BaseMaker {
     HistUnit(const bst_float *cut, TStats *data, unsigned size)
         : cut(cut), data(data), size(size) {}
     /*! \brief add a histogram to data */
-    inline void Add(bst_float fv, 
+    inline void Add(bst_float fv,
                     const std::vector<bst_gpair> &gpair,
                     const BoosterInfo &info,
                     const bst_uint ridx) {
       unsigned i = std::upper_bound(cut, cut + size, fv) - cut;
       utils::Assert(size != 0, "try insert into size=0");
-      utils::Assert(i < size, 
+      utils::Assert(i < size,
                     "maximum value must be in cut, fv = %g, cutmax=%g", fv, cut[size-1]);
       data[i].Add(gpair, info, ridx);
     }
@@ -74,7 +76,7 @@ class HistMaker: public BaseMaker {
                       rptr[fid+1] - rptr[fid]);
     }
   };
-  // thread workspace 
+  // thread workspace
   struct ThreadWSpace {
     /*! \brief actual unit pointer */
     std::vector<unsigned> rptr;
@@ -92,7 +94,7 @@ class HistMaker: public BaseMaker {
         }
         hset[tid].rptr = BeginPtr(rptr);
         hset[tid].cut = BeginPtr(cut);
-        hset[tid].data.resize(cut.size(), TStats(param));        
+        hset[tid].data.resize(cut.size(), TStats(param));
       }
     }
     // aggregate all statistics to hset[0]
@@ -147,7 +149,7 @@ class HistMaker: public BaseMaker {
   }
   // this function does two jobs
   // (1) reset the position in array position, to be the latest leaf id
-  // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly 
+  // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly
   virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
                                   IFMatrix *p_fmat,
                                   const BoosterInfo &info,
@@ -171,8 +173,9 @@ class HistMaker: public BaseMaker {
                           const BoosterInfo &info,
                           const std::vector <bst_uint> &fset,
                           const RegTree &tree)  = 0;
+
  private:
-  inline void EnumerateSplit(const HistUnit &hist, 
+  inline void EnumerateSplit(const HistUnit &hist,
                              const TStats &node_sum,
                              bst_uint fid,
                              SplitEntry *best,
@@ -187,7 +190,7 @@ class HistMaker: public BaseMaker {
         c.SetSubstract(node_sum, s);
         if (c.sum_hess >= param.min_child_weight) {
           double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
-          if (best->Update((float)loss_chg, fid, hist.cut[i], false)) {
+          if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i], false)) {
             *left_sum = s;
           }
         }
@@ -200,7 +203,7 @@ class HistMaker: public BaseMaker {
         c.SetSubstract(node_sum, s);
         if (c.sum_hess >= param.min_child_weight) {
           double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
-          if (best->Update((float)loss_chg, fid, hist.cut[i-1], true)) {
+          if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i-1], true)) {
             *left_sum = c;
           }
         }
@@ -216,22 +219,22 @@ class HistMaker: public BaseMaker {
     const size_t num_feature = fset.size();
     // get the best split condition for each node
     std::vector<SplitEntry> sol(qexpand.size());
-    std::vector<TStats> left_sum(qexpand.size());    
+    std::vector<TStats> left_sum(qexpand.size());
     bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
     #pragma omp parallel for schedule(dynamic, 1)
-    for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
+    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
       const int nid = qexpand[wid];
       utils::Assert(node2workindex[nid] == static_cast<int>(wid),
                     "node2workindex inconsistent");
       SplitEntry &best = sol[wid];
       TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
-      for (size_t i = 0; i < fset.size(); ++ i) {
+      for (size_t i = 0; i < fset.size(); ++i) {
         EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)],
                        node_sum, fset[i], &best, &left_sum[wid]);
       }
     }
     // get the best result, we can synchronize the solution
-    for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
+    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
       const int nid = qexpand[wid];
       const SplitEntry &best = sol[wid];
       const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
@@ -244,7 +247,7 @@ class HistMaker: public BaseMaker {
         (*p_tree)[nid].set_split(best.split_index(),
                                  best.split_value, best.default_left());
         // mark right child as 0, to indicate fresh leaf
-        (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);        
+        (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
         (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
         // right side sum
         TStats right_sum;
@@ -256,11 +259,11 @@ class HistMaker: public BaseMaker {
       }
     }
   }
-  
+
   inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) {
     p_tree->stat(nid).base_weight = static_cast<float>(node_sum.CalcWeight(param));
     p_tree->stat(nid).sum_hess = static_cast<float>(node_sum.sum_hess);
-    node_sum.SetLeafVec(param, p_tree->leafvec(nid));    
+    node_sum.SetLeafVec(param, p_tree->leafvec(nid));
   }
 };
 
@@ -270,7 +273,7 @@ class CQHistMaker: public HistMaker<TStats> {
   struct HistEntry {
     typename HistMaker<TStats>::HistUnit hist;
     unsigned istart;
-    /*! 
+    /*!
      * \brief add a histogram to data,
      * do linear scan, start from istart
      */
@@ -282,7 +285,7 @@ class CQHistMaker: public HistMaker<TStats> {
       utils::Assert(istart != hist.size, "the bound variable must be max");
       hist.data[istart].Add(gpair, info, ridx);
     }
-    /*! 
+    /*!
      * \brief add a histogram to data,
      * do linear scan, start from istart
      */
@@ -302,7 +305,7 @@ class CQHistMaker: public HistMaker<TStats> {
     feat_helper.InitByCol(p_fmat, tree);
     feat_helper.SampleCol(this->param.colsample_bytree, p_fset);
   }
-  // code to create histogram  
+  // code to create histogram
   virtual void CreateHist(const std::vector<bst_gpair> &gpair,
                           IFMatrix *p_fmat,
                           const BoosterInfo &info,
@@ -313,7 +316,7 @@ class CQHistMaker: public HistMaker<TStats> {
     std::fill(feat2workindex.begin(), feat2workindex.end(), -1);
     for (size_t i = 0; i < fset.size(); ++i) {
       feat2workindex[fset[i]] = static_cast<int>(i);
-    } 
+    }
     // start to work
     this->wspace.Init(this->param, 1);
     // if it is C++11, use lazy evaluation for Allreduce,
@@ -350,11 +353,11 @@ class CQHistMaker: public HistMaker<TStats> {
     // sync the histogram
     // if it is C++11, use lazy evaluation for Allreduce
 #if __cplusplus >= 201103L
-    this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), 
+    this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data),
                             this->wspace.hset[0].data.size(), lazy_get_hist);
 #else
-    this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size());   
-#endif    
+    this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size());
+#endif
   }
   virtual void ResetPositionAfterSplit(IFMatrix *p_fmat,
                                        const RegTree &tree) {
@@ -374,11 +377,11 @@ class CQHistMaker: public HistMaker<TStats> {
         feat2workindex[fset[i]] = static_cast<int>(freal_set.size());
         freal_set.push_back(fset[i]);
       } else {
-        feat2workindex[fset[i]] = -2;  
+        feat2workindex[fset[i]] = -2;
       }
     }
     this->GetNodeStats(gpair, *p_fmat, tree, info,
-                       &thread_stats, &node_stats);       
+                       &thread_stats, &node_stats);
     sketchs.resize(this->qexpand.size() * freal_set.size());
     for (size_t i = 0; i < sketchs.size(); ++i) {
       sketchs[i].Init(info.num_row, this->param.sketch_eps);
@@ -394,7 +397,8 @@ class CQHistMaker: public HistMaker<TStats> {
 #if __cplusplus >= 201103L
     auto lazy_get_summary = [&]()
 #endif
-    {// get smmary
+        {
+      // get smmary
       thread_sketch.resize(this->get_nthread());
       // number of rows in
       const size_t nrows = p_fmat->buffered_rowset().size();
@@ -457,9 +461,9 @@ class CQHistMaker: public HistMaker<TStats> {
           this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
         } else {
           utils::Assert(offset == -2, "BUG in mark");
-          bst_float cpt = feat_helper.MaxValue(fset[i]);        
+          bst_float cpt = feat_helper.MaxValue(fset[i]);
           this->wspace.cut.push_back(cpt + fabs(cpt) + rt_eps);
-          this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));        
+          this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
         }
       }
       // reserve last value for global statistics
@@ -470,7 +474,7 @@ class CQHistMaker: public HistMaker<TStats> {
                   (fset.size() + 1) * this->qexpand.size() + 1,
                   "cut space inconsistent");
   }
-  
+
  private:
   inline void UpdateHistCol(const std::vector<bst_gpair> &gpair,
                             const ColBatch::Inst &c,
@@ -554,9 +558,9 @@ class CQHistMaker: public HistMaker<TStats> {
       }
     } else {
       for (size_t i = 0; i < this->qexpand.size(); ++i) {
-        const unsigned nid = this->qexpand[i];        
+        const unsigned nid = this->qexpand[i];
         sbuilder[nid].sum_total = static_cast<bst_float>(nstats[nid].sum_hess);
-      } 
+      }
     }
     // if only one value, no need to do second pass
     if (c[0].fvalue  == c[c.length-1].fvalue) {
@@ -589,7 +593,7 @@ class CQHistMaker: public HistMaker<TStats> {
           if (nid >= 0) {
             sbuilder[nid].Push(c[j + i].fvalue, buf_hess[i], max_size);
           }
-        }        
+        }
       }
       for (bst_uint j = align_length; j < c.length; ++j) {
         const bst_uint ridx = c[j].index;
@@ -617,7 +621,7 @@ class CQHistMaker: public HistMaker<TStats> {
   // temp space to map feature id to working index
   std::vector<int> feat2workindex;
   // set of index from fset that are real
-  std::vector<bst_uint> freal_set; 
+  std::vector<bst_uint> freal_set;
   // thread temp data
   std::vector< std::vector<BaseMaker::SketchEntry> > thread_sketch;
   // used to hold statistics
@@ -631,18 +635,18 @@ class CQHistMaker: public HistMaker<TStats> {
   // reducer for summary
   rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
   // per node, per feature sketch
-  std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;  
+  std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
 };
 
 template<typename TStats>
-class QuantileHistMaker: public HistMaker<TStats> {  
+class QuantileHistMaker: public HistMaker<TStats> {
  protected:
   typedef utils::WXQuantileSketch<bst_float, bst_float> WXQSketch;
   virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
                                   IFMatrix *p_fmat,
                                   const BoosterInfo &info,
                                   const std::vector <bst_uint> &fset,
-                                  const RegTree &tree) {    
+                                  const RegTree &tree) {
     // initialize the data structure
     int nthread = BaseMaker::get_nthread();
     sketchs.resize(this->qexpand.size() * tree.param.num_feature);
@@ -658,7 +662,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
       utils::ParallelGroupBuilder<SparseBatch::Entry> builder(&col_ptr, &col_data, &thread_col_ptr);
       builder.InitBudget(tree.param.num_feature, nthread);
 
-      const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);      
+      const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
       #pragma omp parallel for schedule(static)
       for (bst_omp_uint i = 0; i < nbatch; ++i) {
         RowBatch::Inst inst = batch[i];
@@ -667,11 +671,11 @@ class QuantileHistMaker: public HistMaker<TStats> {
         if (nid >= 0) {
           if (!tree[nid].is_leaf()) {
             this->position[ridx] = nid = HistMaker<TStats>::NextLevel(inst, tree, nid);
-          } 
+          }
           if (this->node2workindex[nid] < 0) {
             this->position[ridx] = ~nid;
-          } else{
-            for (bst_uint j = 0; j < inst.length; ++j) { 
+          } else {
+            for (bst_uint j = 0; j < inst.length; ++j) {
               builder.AddBudget(inst[j].index, omp_get_thread_num());
             }
           }
@@ -712,8 +716,8 @@ class QuantileHistMaker: public HistMaker<TStats> {
       summary_array[i].Reserve(max_size);
       summary_array[i].SetPrune(out, max_size);
     }
-    
-    size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);    
+
+    size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
     sreducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size());
     // now we get the final result of sketch, setup the cut
     this->wspace.cut.clear();
diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp
index e7e5f9f0b..dc99e94e4 100644
--- a/src/tree/updater_prune-inl.hpp
+++ b/src/tree/updater_prune-inl.hpp
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
-#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file updater_prune-inl.hpp
- * \brief prune a tree given the statistics 
+ * \brief prune a tree given the statistics
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
+#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
+
 #include <vector>
 #include "./param.h"
 #include "./updater.h"
@@ -37,9 +39,10 @@ class TreePruner: public IUpdater {
     param.learning_rate = lr;
     syncher.Update(gpair, p_fmat, info, trees);
   }
+
  private:
   // try to prune off current leaf
-  inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) {
+  inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
     if (tree[nid].is_root()) return npruned;
     int pid = tree[nid].parent();
     RegTree::NodeStat &s = tree.stat(pid);
@@ -51,10 +54,10 @@ class TreePruner: public IUpdater {
       return this->TryPruneLeaf(tree, pid, depth - 1, npruned+2);
     } else {
       return npruned;
-    }    
+    }
   }
   /*! \brief do prunning of a tree */
-  inline void DoPrune(RegTree &tree) {
+  inline void DoPrune(RegTree &tree) { // NOLINT(*)
     int npruned = 0;
     // initialize auxiliary statistics
     for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp
index 8613c8ea6..b6c5ee89e 100644
--- a/src/tree/updater_refresh-inl.hpp
+++ b/src/tree/updater_refresh-inl.hpp
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
-#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file updater_refresh-inl.hpp
  * \brief refresh the statistics and leaf value on the tree on the dataset
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
+#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_
+
 #include <vector>
 #include <limits>
 #include "../sync/sync.h"
@@ -27,7 +29,7 @@ class TreeRefresher: public IUpdater {
   virtual void Update(const std::vector<bst_gpair> &gpair,
                       IFMatrix *p_fmat,
                       const BoosterInfo &info,
-                      const std::vector<RegTree*> &trees) {        
+                      const std::vector<RegTree*> &trees) {
     if (trees.size() == 0) return;
     // number of threads
     // thread temporal space
@@ -100,7 +102,7 @@ class TreeRefresher: public IUpdater {
     float lr = param.learning_rate;
     param.learning_rate = lr / trees.size();
     int offset = 0;
-    for (size_t i = 0; i < trees.size(); ++i) {      
+    for (size_t i = 0; i < trees.size(); ++i) {
       for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) {
         this->Refresh(BeginPtr(stemp[0]) + offset, rid, trees[i]);
       }
@@ -147,7 +149,7 @@ class TreeRefresher: public IUpdater {
   // training parameter
   TrainParam param;
   // reducer
-  rabit::Reducer<TStats, TStats::Reduce> reducer;  
+  rabit::Reducer<TStats, TStats::Reduce> reducer;
 };
 
 }  // namespace tree
diff --git a/src/tree/updater_skmaker-inl.hpp b/src/tree/updater_skmaker-inl.hpp
index 6bc2fc39a..ade22011b 100644
--- a/src/tree/updater_skmaker-inl.hpp
+++ b/src/tree/updater_skmaker-inl.hpp
@@ -1,11 +1,13 @@
-#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
-#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file updater_skmaker-inl.hpp
  * \brief use approximation sketch to construct a tree,
           a refresh is needed to make the statistics exactly correct
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
+#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
+
 #include <vector>
 #include <algorithm>
 #include "../sync/sync.h"
@@ -30,7 +32,7 @@ class SketchMaker: public BaseMaker {
     }
     param.learning_rate = lr;
   }
- 
+
  protected:
   inline void Update(const std::vector<bst_gpair> &gpair,
                       IFMatrix *p_fmat,
@@ -79,9 +81,9 @@ class SketchMaker: public BaseMaker {
     double pos_grad;
     /*! \brief sum of all negative gradient */
     double neg_grad;
-    /*! \brief sum of hessian statistics */    
+    /*! \brief sum of hessian statistics */
     double sum_hess;
-    explicit SKStats(void) {}
+    SKStats(void) {}
     // constructor
     explicit SKStats(const TrainParam &param) {
       this->Clear();
@@ -123,7 +125,7 @@ class SketchMaker: public BaseMaker {
       sum_hess += b.sum_hess;
     }
     /*! \brief same as add, reduce is used in All Reduce */
-    inline static void Reduce(SKStats &a, const SKStats &b) {
+    inline static void Reduce(SKStats &a, const SKStats &b) { // NOLINT(*)
       a.Add(b);
     }
     /*! \brief set leaf vector value based on statistics */
@@ -139,7 +141,7 @@ class SketchMaker: public BaseMaker {
       sketchs[i].Init(info.num_row, this->param.sketch_eps);
     }
     thread_sketch.resize(this->get_nthread());
-    // number of rows in 
+    // number of rows in
     const size_t nrows = p_fmat->buffered_rowset().size();
     // start accumulating statistics
     utils::IIterator<ColBatch> *iter = p_fmat->ColIterator();
@@ -156,7 +158,7 @@ class SketchMaker: public BaseMaker {
                               batch[i].length == nrows,
                               &thread_sketch[omp_get_thread_num()]);
       }
-    }       
+    }
     // setup maximum size
     unsigned max_size = param.max_sketch_size();
     // synchronize sketch
@@ -167,8 +169,8 @@ class SketchMaker: public BaseMaker {
       summary_array[i].Reserve(max_size);
       summary_array[i].SetPrune(out, max_size);
     }
-    size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);    
-    sketch_reducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size());    
+    size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
+    sketch_reducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size());
   }
   // update sketch information in column fid
   inline void UpdateSketchCol(const std::vector<bst_gpair> &gpair,
@@ -209,7 +211,7 @@ class SketchMaker: public BaseMaker {
         const unsigned nid = this->qexpand[i];
         sbuilder[3 * nid + 0].sum_total = static_cast<bst_float>(nstats[nid].pos_grad);
         sbuilder[3 * nid + 1].sum_total = static_cast<bst_float>(nstats[nid].neg_grad);
-        sbuilder[3 * nid + 2].sum_total = static_cast<bst_float>(nstats[nid].sum_hess);        
+        sbuilder[3 * nid + 2].sum_total = static_cast<bst_float>(nstats[nid].sum_hess);
       }
     }
     // if only one value, no need to do second pass
@@ -217,7 +219,9 @@ class SketchMaker: public BaseMaker {
       for (size_t i = 0; i < this->qexpand.size(); ++i) {
         const int nid = this->qexpand[i];
         for (int k = 0; k < 3; ++k) {
-          sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, static_cast<bst_float>(sbuilder[3 * nid + k].sum_total));
+          sbuilder[3 * nid + k].sketch->Push(c[0].fvalue,
+                                             static_cast<bst_float>(
+                                                 sbuilder[3 * nid + k].sum_total));
         }
       }
       return;
@@ -250,7 +254,7 @@ class SketchMaker: public BaseMaker {
         sbuilder[3 * nid + k].Finalize(max_size);
       }
     }
-  }  
+  }
   inline void SyncNodeStats(void) {
     utils::Assert(qexpand.size() != 0, "qexpand must not be empty");
     std::vector<SKStats> tmp(qexpand.size());
@@ -272,12 +276,12 @@ class SketchMaker: public BaseMaker {
     std::vector<SplitEntry> sol(qexpand.size());
     bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
     #pragma omp parallel for schedule(dynamic, 1)
-    for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
+    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
       const int nid = qexpand[wid];
       utils::Assert(node2workindex[nid] == static_cast<int>(wid),
                     "node2workindex inconsistent");
       SplitEntry &best = sol[wid];
-      for (bst_uint fid = 0; fid < num_feature; ++ fid) {
+      for (bst_uint fid = 0; fid < num_feature; ++fid) {
         unsigned base = (wid * p_tree->param.num_feature + fid) * 3;
         EnumerateSplit(summary_array[base + 0],
                        summary_array[base + 1],
@@ -286,7 +290,7 @@ class SketchMaker: public BaseMaker {
       }
     }
     // get the best result, we can synchronize the solution
-    for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) {
+    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
       const int nid = qexpand[wid];
       const SplitEntry &best = sol[wid];
       // set up the values
@@ -337,7 +341,7 @@ class SketchMaker: public BaseMaker {
     feat_sum.neg_grad = neg_grad.data[neg_grad.size - 1].rmax;
     feat_sum.sum_hess = sum_hess.data[sum_hess.size - 1].rmax;
     size_t ipos = 0, ineg = 0, ihess = 0;
-    for (size_t i = 1; i < fsplits.size(); ++i) {      
+    for (size_t i = 1; i < fsplits.size(); ++i) {
       WXQSketch::Entry pos = pos_grad.Query(fsplits[i], ipos);
       WXQSketch::Entry neg = neg_grad.Query(fsplits[i], ineg);
       WXQSketch::Entry hess = sum_hess.Query(fsplits[i], ihess);
@@ -345,11 +349,11 @@ class SketchMaker: public BaseMaker {
       s.pos_grad = 0.5f * (pos.rmin + pos.rmax - pos.wmin);
       s.neg_grad = 0.5f * (neg.rmin + neg.rmax - neg.wmin);
       s.sum_hess = 0.5f * (hess.rmin + hess.rmax - hess.wmin);
-      c.SetSubstract(node_sum, s);      
+      c.SetSubstract(node_sum, s);
       // forward
       if (s.sum_hess >= param.min_child_weight &&
           c.sum_hess >= param.min_child_weight) {
-        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;        
+        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
         best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], false);
       }
       // backward
@@ -357,22 +361,23 @@ class SketchMaker: public BaseMaker {
       s.SetSubstract(node_sum, c);
       if (s.sum_hess >= param.min_child_weight &&
           c.sum_hess >= param.min_child_weight) {
-        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;        
+        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
         best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], true);
-      }      
+      }
     }
-    {// all including
+    {
+      // all including
       SKStats s = feat_sum, c;
       c.SetSubstract(node_sum, s);
       if (s.sum_hess >= param.min_child_weight &&
           c.sum_hess >= param.min_child_weight) {
         bst_float cpt = fsplits.back();
-        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;        
+        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
         best->Update(static_cast<bst_float>(loss_chg), fid, cpt + fabsf(cpt) + 1.0f, false);
       }
     }
   }
-   
+
   // thread temp data
   // used to hold temporal sketch
   std::vector< std::vector<SketchEntry> > thread_sketch;
@@ -389,6 +394,6 @@ class SketchMaker: public BaseMaker {
   // per node, per feature sketch
   std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
 };
-}  // tree
-}  // xgboost
-#endif
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_
diff --git a/src/tree/updater_sync-inl.hpp b/src/tree/updater_sync-inl.hpp
index 2aa534aa8..e76d1f76d 100644
--- a/src/tree/updater_sync-inl.hpp
+++ b/src/tree/updater_sync-inl.hpp
@@ -1,18 +1,21 @@
-#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
-#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
 /*!
+ * Copyright 2014 by Contributors
  * \file updater_sync-inl.hpp
  * \brief synchronize the tree in all distributed nodes
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
+#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_
+
 #include <vector>
+#include <string>
 #include <limits>
 #include "../sync/sync.h"
 #include "./updater.h"
 
 namespace xgboost {
 namespace tree {
-/*! 
+/*!
  * \brief syncher that synchronize the tree in all distributed nodes
  * can implement various strategies, so far it is always set to node 0's tree
  */
@@ -28,7 +31,7 @@ class TreeSyncher: public IUpdater {
                       const std::vector<RegTree*> &trees) {
     this->SyncTrees(trees);
   }
-  
+
  private:
   // synchronize the trees in different nodes, take tree from rank 0
   inline void SyncTrees(const std::vector<RegTree *> &trees) {
@@ -43,7 +46,7 @@ class TreeSyncher: public IUpdater {
     }
     fs.Seek(0);
     rabit::Broadcast(&s_model, 0);
-    for (size_t i = 0; i < trees.size(); ++i) {      
+    for (size_t i = 0; i < trees.size(); ++i) {
       trees[i]->LoadModel(fs);
     }
   }
diff --git a/src/utils/base64-inl.h b/src/utils/base64-inl.h
index 9fd5fc49f..49cd65254 100644
--- a/src/utils/base64-inl.h
+++ b/src/utils/base64-inl.h
@@ -1,13 +1,16 @@
-#ifndef XGBOOST_UTILS_BASE64_INL_H_
-#define XGBOOST_UTILS_BASE64_INL_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file base64.h
  * \brief data stream support to input and output from/to base64 stream
  * base64 is easier to store and pass as text format in mapreduce
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_BASE64_INL_H_
+#define XGBOOST_UTILS_BASE64_INL_H_
+
 #include <cctype>
 #include <cstdio>
+#include <string>
 #include "./io.h"
 
 namespace xgboost {
@@ -15,7 +18,7 @@ namespace utils {
 /*! \brief buffer reader of the stream that allows you to get */
 class StreamBufferReader {
  public:
-  StreamBufferReader(size_t buffer_size)
+  explicit StreamBufferReader(size_t buffer_size)
       :stream_(NULL),
        read_len_(1), read_ptr_(1) {
     buffer_.resize(buffer_size);
@@ -45,7 +48,7 @@ class StreamBufferReader {
   inline bool AtEnd(void) const {
     return read_len_ == 0;
   }
-  
+
  private:
   /*! \brief the underlying stream */
   IStream *stream_;
@@ -75,7 +78,7 @@ const char DecodeTable[] = {
 };
 static const char EncodeTable[] =
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-} // namespace base64
+}  // namespace base64
 /*! \brief the stream that reads from base64, note we take from file pointers */
 class Base64InStream: public IStream {
  public:
@@ -83,8 +86,8 @@ class Base64InStream: public IStream {
     reader_.set_stream(fs);
     num_prev = 0; tmp_ch = 0;
   }
-  /*! 
-   * \brief initialize the stream position to beginning of next base64 stream 
+  /*!
+   * \brief initialize the stream position to beginning of next base64 stream
    * call this function before actually start read
    */
   inline void InitPosition(void) {
@@ -132,19 +135,19 @@ class Base64InStream: public IStream {
       {
         // second byte
         utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
-              "invalid base64 format");
+                     "invalid base64 format");
         nvalue |= DecodeTable[tmp_ch] << 12;
         *cptr++ = (nvalue >> 16) & 0xFF; --tlen;
       }
       {
         // third byte
         utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
-              "invalid base64 format");
+                     "invalid base64 format");
         // handle termination
         if (tmp_ch == '=') {
           utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format");
           utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
-                "invalid base64 format");
+                       "invalid base64 format");
           break;
         }
         nvalue |= DecodeTable[tmp_ch] << 6;
@@ -157,10 +160,10 @@ class Base64InStream: public IStream {
       {
         // fourth byte
         utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
-              "invalid base64 format");
+                     "invalid base64 format");
         if (tmp_ch == '=') {
           utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
-                "invalid base64 format");
+                       "invalid base64 format");
           break;
         }
         nvalue |= DecodeTable[tmp_ch];
@@ -240,13 +243,13 @@ class Base64OutStream: public IStream {
     if (endch != EOF) PutChar(endch);
     this->Flush();
   }
-    
- private:  
+
+ private:
   IStream *fp;
   int buf_top;
   unsigned char buf[4];
   std::string out_buf;
-  const static size_t kBufferSize = 256;
+  static const size_t kBufferSize = 256;
 
   inline void PutChar(char ch) {
     out_buf += ch;
@@ -260,5 +263,5 @@ class Base64OutStream: public IStream {
   }
 };
 }  // namespace utils
-}  // namespace rabit
-#endif  // RABIT_LEARN_UTILS_BASE64_INL_H_
+}  // namespace xgboost
+#endif  // XGBOOST_UTILS_BASE64_INL_H_
diff --git a/src/utils/bitmap.h b/src/utils/bitmap.h
index ba12caf41..eecccbda5 100644
--- a/src/utils/bitmap.h
+++ b/src/utils/bitmap.h
@@ -1,11 +1,13 @@
-#ifndef XGBOOST_UTILS_BITMAP_H_
-#define XGBOOST_UTILS_BITMAP_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file bitmap.h
  * \brief a simple implement of bitmap
  *  NOTE: bitmap is only threadsafe per word access, remember this when using bitmap
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_BITMAP_H_
+#define XGBOOST_UTILS_BITMAP_H_
+
 #include <vector>
 #include "./utils.h"
 #include "./omp.h"
@@ -16,22 +18,22 @@ namespace utils {
 struct BitMap {
   /*! \brief internal data structure */
   std::vector<uint32_t> data;
-  /*! 
-   * \brief resize the bitmap to be certain size 
+  /*!
+   * \brief resize the bitmap to be certain size
    * \param size the size of bitmap
    */
   inline void Resize(size_t size) {
     data.resize((size + 31U) >> 5, 0);
   }
-  /*! 
-   * \brief query the i-th position of bitmap 
-   * \param i the position in 
+  /*!
+   * \brief query the i-th position of bitmap
+   * \param i the position in
    */
   inline bool Get(size_t i) const {
     return (data[i >> 5] >> (i & 31U)) & 1U;
   }
-  /*! 
-   * \brief set i-th position to true 
+  /*!
+   * \brief set i-th position to true
    * \param i position index
    */
   inline void SetTrue(size_t i) {
@@ -63,4 +65,4 @@ struct BitMap {
 };
 }  // namespace utils
 }  // namespace xgboost
-#endif
+#endif  // XGBOOST_UTILS_BITMAP_H_
diff --git a/src/utils/config.h b/src/utils/config.h
index 19f4980cf..43d7bc8bd 100644
--- a/src/utils/config.h
+++ b/src/utils/config.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_UTILS_CONFIG_H_
-#define XGBOOST_UTILS_CONFIG_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file config.h
  * \brief helper class to load in configures from file
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_CONFIG_H_
+#define XGBOOST_UTILS_CONFIG_H_
+
 #include <cstdio>
 #include <cstring>
 #include <string>
@@ -14,26 +16,26 @@
 
 namespace xgboost {
 namespace utils {
-/*! 
+/*!
  * \brief base implementation of config reader
  */
 class ConfigReaderBase {
  public:
-  /*! 
+  /*!
    * \brief get current name, called after Next returns true
-   * \return current parameter name 
+   * \return current parameter name
    */
   inline const char *name(void) const {
     return s_name.c_str();
   }
-  /*! 
+  /*!
    * \brief get current value, called after Next returns true
-   * \return current parameter value 
+   * \return current parameter value
    */
   inline const char *val(void) const {
     return s_val.c_str();
   }
-  /*! 
+  /*!
    * \brief move iterator to next position
    * \return true if there is value in next position
    */
@@ -55,7 +57,7 @@ class ConfigReaderBase {
  protected:
   /*!
    * \brief to be implemented by subclass,
-   * get next token, return EOF if end of file 
+   * get next token, return EOF if end of file
    */
   virtual char GetChar(void) = 0;
   /*! \brief to be implemented by child, check if end of stream */
@@ -144,9 +146,9 @@ class ConfigReaderBase {
  */
 class ConfigStreamReader: public ConfigReaderBase {
  public:
-  /*! 
-   * \brief constructor 
-   * \param istream input stream 
+  /*!
+   * \brief constructor
+   * \param istream input stream
    */
   explicit ConfigStreamReader(std::istream &fin) : fin(fin) {}
 
@@ -163,13 +165,13 @@ class ConfigStreamReader: public ConfigReaderBase {
   std::istream &fin;
 };
 
-/*! 
+/*!
  * \brief an iterator that iterates over a configure file and gets the configures
  */
 class ConfigIterator: public ConfigStreamReader {
  public:
-  /*! 
-   * \brief constructor 
+  /*!
+   * \brief constructor
    * \param fname name of configure file
    */
   explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) {
diff --git a/src/utils/fmap.h b/src/utils/fmap.h
index 607f37013..218a61aa4 100644
--- a/src/utils/fmap.h
+++ b/src/utils/fmap.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_UTILS_FMAP_H_
-#define XGBOOST_UTILS_FMAP_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file fmap.h
  * \brief helper class that holds the feature names and interpretations
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_FMAP_H_
+#define XGBOOST_UTILS_FMAP_H_
+
 #include <vector>
 #include <string>
 #include <cstring>
@@ -78,4 +80,4 @@ class FeatMap {
 
 }  // namespace utils
 }  // namespace xgboost
-#endif  // XGBOOST_FMAP_H_
+#endif  // XGBOOST_UTILS_FMAP_H_
diff --git a/src/utils/group_data.h b/src/utils/group_data.h
index 6e12a39ff..31f9c3a50 100644
--- a/src/utils/group_data.h
+++ b/src/utils/group_data.h
@@ -1,6 +1,5 @@
-#ifndef XGBOOST_UTILS_GROUP_DATA_H_
-#define XGBOOST_UTILS_GROUP_DATA_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file group_data.h
  * \brief this file defines utils to group data by integer keys
  *     Input: given input sequence (key,value), (k1,v1), (k2,v2)
@@ -12,6 +11,11 @@
  * The major algorithm is a two pass linear scan algorithm that requires two pass scan over the data
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_GROUP_DATA_H_
+#define XGBOOST_UTILS_GROUP_DATA_H_
+
+#include <vector>
+
 namespace xgboost {
 namespace utils {
 /*!
@@ -32,10 +36,10 @@ struct ParallelGroupBuilder {
                        std::vector< std::vector<SizeType> > *p_thread_rptr)
       : rptr(*p_rptr), data(*p_data), thread_rptr(*p_thread_rptr) {
   }
-  
+
  public:
   /*!
-   * \brief step 1: initialize the helper, with hint of number keys 
+   * \brief step 1: initialize the helper, with hint of number keys
    *                and thread used in the construction
    * \param nkeys number of keys in the matrix, can be smaller than expected
    * \param nthread number of thread that will be used in construction
@@ -56,7 +60,7 @@ struct ParallelGroupBuilder {
   inline void AddBudget(size_t key, int threadid, SizeType nelem = 1) {
     std::vector<SizeType> &trptr = thread_rptr[threadid];
     if (trptr.size() < key + 1) {
-      trptr.resize(key + 1, 0);      
+      trptr.resize(key + 1, 0);
     }
     trptr[key] += nelem;
   }
@@ -84,13 +88,13 @@ struct ParallelGroupBuilder {
     data.resize(start);
   }
   /*!
-   * \brief step 4: add data to the allocated space, 
+   * \brief step 4: add data to the allocated space,
    *   the calls to this function should be exactly match previous call to AddBudget
    *
-   * \param key the key of 
+   * \param key the key of
    * \param threadid the id of thread that calls this function
    */
-  inline void Push(size_t key, ValueType value, int threadid) {    
+  inline void Push(size_t key, ValueType value, int threadid) {
     SizeType &rp = thread_rptr[threadid][key];
     data[rp++] = value;
   }
@@ -107,5 +111,4 @@ struct ParallelGroupBuilder {
 };
 }  // namespace utils
 }  // namespace xgboost
-#endif
-
+#endif  // XGBOOST_UTILS_GROUP_DATA_H_
diff --git a/src/utils/io.h b/src/utils/io.h
index d96d16e2a..5b366e51c 100644
--- a/src/utils/io.h
+++ b/src/utils/io.h
@@ -1,16 +1,19 @@
-#ifndef XGBOOST_UTILS_IO_H
-#define XGBOOST_UTILS_IO_H
+/*!
+ * Copyright 2014 by Contributors
+ * \file io.h
+ * \brief general stream interface for serialization, I/O
+ * \author Tianqi Chen
+ */
+
+#ifndef XGBOOST_UTILS_IO_H_
+#define XGBOOST_UTILS_IO_H_
 #include <cstdio>
 #include <vector>
 #include <string>
 #include <cstring>
 #include "./utils.h"
 #include "../sync/sync.h"
-/*!
- * \file io.h
- * \brief general stream interface for serialization, I/O
- * \author Tianqi Chen
- */
+
 namespace xgboost {
 namespace utils {
 // reuse the definitions of streams
@@ -23,7 +26,7 @@ typedef rabit::utils::MemoryBufferStream MemoryBufferStream;
 class FileStream : public ISeekStream {
  public:
   explicit FileStream(std::FILE *fp) : fp(fp) {}
-  explicit FileStream(void) {
+  FileStream(void) {
     this->fp = NULL;
   }
   virtual size_t Read(void *ptr, size_t size) {
@@ -33,7 +36,7 @@ class FileStream : public ISeekStream {
     std::fwrite(ptr, size, 1, fp);
   }
   virtual void Seek(size_t pos) {
-    std::fseek(fp, static_cast<long>(pos), SEEK_SET);
+    std::fseek(fp, static_cast<long>(pos), SEEK_SET); // NOLINT(*)
   }
   virtual size_t Tell(void) {
     return std::ftell(fp);
@@ -42,7 +45,7 @@ class FileStream : public ISeekStream {
     return std::feof(fp) != 0;
   }
   inline void Close(void) {
-    if (fp != NULL){
+    if (fp != NULL) {
       std::fclose(fp); fp = NULL;
     }
   }
@@ -52,6 +55,5 @@ class FileStream : public ISeekStream {
 };
 }  // namespace utils
 }  // namespace xgboost
-
 #include "./base64-inl.h"
-#endif
+#endif  // XGBOOST_UTILS_IO_H_
diff --git a/src/utils/iterator.h b/src/utils/iterator.h
index 3f5b23310..5d986b2e4 100644
--- a/src/utils/iterator.h
+++ b/src/utils/iterator.h
@@ -1,11 +1,13 @@
-#ifndef XGBOOST_UTILS_ITERATOR_H
-#define XGBOOST_UTILS_ITERATOR_H
-#include <cstdio>
 /*!
+ * Copyright 2014 by Contributors
  * \file iterator.h
  * \brief itertator interface
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_ITERATOR_H_
+#define XGBOOST_UTILS_ITERATOR_H_
+#include <cstdio>
+
 namespace xgboost {
 namespace utils {
 /*!
@@ -16,7 +18,7 @@ template<typename DType>
 class IIterator {
  public:
   /*!
-   * \brief set the parameter 
+   * \brief set the parameter
    * \param name name of parameter
    * \param val value of parameter
    */
@@ -36,5 +38,5 @@ class IIterator {
 
 }  // namespace utils
 }  // namespace xgboost
-#endif
+#endif  // XGBOOST_UTILS_ITERATOR_H_
 
diff --git a/src/utils/math.h b/src/utils/math.h
index e0bf8c466..7609df076 100644
--- a/src/utils/math.h
+++ b/src/utils/math.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_UTILS_MATH_H_
-#define XGBOOST_UTILS_MATH_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file math.h
  * \brief support additional math
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_MATH_H_
+#define XGBOOST_UTILS_MATH_H_
+
 #include <cmath>
 
 namespace xgboost {
@@ -28,7 +30,8 @@ inline T LogGamma(T v) {
 #if _MSC_VER >= 1800
   return lgamma(v);
 #else
-#pragma message ("Warning: lgamma function was not available until VS2013, poisson regression will be disabled")
+#pragma message("Warning: lgamma function was not available until VS2013"\
+                ", poisson regression will be disabled")
   utils::Error("lgamma function was not available until VS2013");
   return static_cast<T>(1.0);
 #endif
diff --git a/src/utils/omp.h b/src/utils/omp.h
index 87cad380e..ddd3467d9 100644
--- a/src/utils/omp.h
+++ b/src/utils/omp.h
@@ -1,16 +1,20 @@
-#ifndef XGBOOST_UTILS_OMP_H_
-#define XGBOOST_UTILS_OMP_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file omp.h
  * \brief header to handle OpenMP compatibility issues
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_OMP_H_
+#define XGBOOST_UTILS_OMP_H_
+
 #if defined(_OPENMP)
 #include <omp.h>
 #else
 #ifndef DISABLE_OPENMP
 // use pragma message instead of warning
-#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading")
+#pragma message("Warning: OpenMP is not available,"\
+                "xgboost will be compiled into single-thread code."\
+                "Use OpenMP-enabled compiler to get benefit of multi-threading")
 #endif
 inline int omp_get_thread_num() { return 0; }
 inline int omp_get_num_threads() { return 1; }
@@ -25,6 +29,6 @@ typedef int bst_omp_uint;
 #else
 typedef unsigned bst_omp_uint;
 #endif
-} // namespace xgboost
+}  // namespace xgboost
 
 #endif  // XGBOOST_UTILS_OMP_H_
diff --git a/src/utils/quantile.h b/src/utils/quantile.h
index 4e885e254..ffd9142da 100644
--- a/src/utils/quantile.h
+++ b/src/utils/quantile.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_UTILS_QUANTILE_H_
-#define XGBOOST_UTILS_QUANTILE_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file quantile.h
- * \brief util to compute quantiles 
+ * \brief util to compute quantiles
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_QUANTILE_H_
+#define XGBOOST_UTILS_QUANTILE_H_
+
 #include <cmath>
 #include <vector>
 #include <cstring>
@@ -37,8 +39,8 @@ struct WQSummary {
     // constructor
     Entry(RType rmin, RType rmax, RType wmin, DType value)
         : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
-    /*! 
-     * \brief debug function,  check Valid 
+    /*!
+     * \brief debug function,  check Valid
      * \param eps the tolerate level for violating the relation
      */
     inline void CheckValid(RType eps = 0) const {
@@ -65,7 +67,7 @@ struct WQSummary {
       // default constructor
       QEntry(void) {}
       // constructor
-      QEntry(DType value, RType weight) 
+      QEntry(DType value, RType weight)
           : value(value), weight(weight) {}
       // comparator on value
       inline bool operator<(const QEntry &b) const {
@@ -83,11 +85,11 @@ struct WQSummary {
       } else {
         queue[qtail - 1].weight += w;
       }
-    }   
+    }
     inline void MakeSummary(WQSummary *out) {
       std::sort(queue.begin(), queue.begin() + qtail);
       out->size = 0;
-      // start update sketch      
+      // start update sketch
       RType wsum = 0;
       // construct data with unique weights
       for (size_t i = 0; i < qtail;) {
@@ -106,7 +108,7 @@ struct WQSummary {
   /*! \brief number of elements in the summary */
   size_t size;
   // constructor
-  WQSummary(Entry *data, size_t size) 
+  WQSummary(Entry *data, size_t size)
       : data(data), size(size) {}
   /*!
    * \return the maximum error of the Summary
@@ -119,12 +121,12 @@ struct WQSummary {
     }
     return res;
   }
-  /*! 
+  /*!
    * \brief query qvalue, start from istart
    * \param qvalue the value we query for
    * \param istart starting position
    */
-  inline Entry Query(DType qvalue, size_t &istart) const {
+  inline Entry Query(DType qvalue, size_t &istart) const { // NOLINT(*)
     while (istart < size && qvalue > data[istart].value) {
       ++istart;
     }
@@ -136,7 +138,7 @@ struct WQSummary {
       return data[istart];
     } else {
       if (istart == 0) {
-        return Entry(0.0f, 0.0f, 0.0f, qvalue);    
+        return Entry(0.0f, 0.0f, 0.0f, qvalue);
       } else {
         return Entry(data[istart - 1].rmin_next(),
                      data[istart].rmax_prev(),
@@ -154,12 +156,12 @@ struct WQSummary {
    */
   inline void CopyFrom(const WQSummary &src) {
     size = src.size;
-    std::memcpy(data, src.data, sizeof(Entry) * size);    
-  }  
-  /*! 
-   * \brief debug function, validate whether the summary 
+    std::memcpy(data, src.data, sizeof(Entry) * size);
+  }
+  /*!
+   * \brief debug function, validate whether the summary
    *  run consistency check to check if it is a valid summary
-   * \param eps the tolerate error level, used when RType is floating point and 
+   * \param eps the tolerate error level, used when RType is floating point and
    *        some inconsistency could occur due to rounding error
    */
   inline void CheckValid(RType eps) const {
@@ -199,8 +201,8 @@ struct WQSummary {
     size_t i = 1, lastidx = 0;
     for (size_t k = 1; k < n; ++k) {
       RType dx2 =  2 * ((k * range) / n + begin);
-      // find first i such that  d < (rmax[i+1] + rmin[i+1]) / 2 
-      while (i < src.size - 1 
+      // find first i such that  d < (rmax[i+1] + rmin[i+1]) / 2
+      while (i < src.size - 1
              && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
       utils::Assert(i != src.size - 1, "this cannot happen");
       if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
@@ -217,7 +219,7 @@ struct WQSummary {
       data[size++] = src.data[src.size - 1];
     }
   }
-  /*! 
+  /*!
    * \brief set current summary to be merged summary of sa and sb
    * \param sa first input summary to be merged
    * \param sb second input summar to be merged
@@ -230,7 +232,7 @@ struct WQSummary {
     if (sb.size == 0) {
       this->CopyFrom(sa); return;
     }
-    utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge"); 
+    utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
     const Entry *a = sa.data, *a_end = sa.data + sa.size;
     const Entry *b = sb.data, *b_end = sb.data + sb.size;
     // extended rmin value
@@ -297,7 +299,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
     RType begin = src.data[0].rmax;
     size_t n = maxsize - 1, nbig = 0;
     RType range = src.data[src.size - 1].rmin - begin;
-    // prune off zero weights 
+    // prune off zero weights
     if (range == 0.0f) {
       // special case, contain only two effective data pts
       this->data[0] = src.data[0];
@@ -331,7 +333,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
       utils::Printf("LOG: check quantile stats, nbig=%lu, n=%lu\n", nbig, n);
       utils::Printf("LOG: srcsize=%lu, maxsize=%lu, range=%g, chunk=%g\n",
                     src.size, maxsize, static_cast<double>(range),
-                    static_cast<double>(chunk));      
+                    static_cast<double>(chunk));
       for (size_t i = 0; i < src.size; ++i) {
         utils::Printf("[%lu] rmin=%g, rmax=%g, wmin=%g, v=%g, isbig=%d\n", i,
                       src.data[i].rmin, src.data[i].rmax,  src.data[i].wmin,
@@ -352,7 +354,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
           RType maxdx2 = src.data[end].rmax_prev() * 2;
           for (; k < n; ++k) {
             RType dx2 =  2 * ((k * mrange) / n + begin);
-            if (dx2 >= maxdx2) break; 
+            if (dx2 >= maxdx2) break;
             while (i < end &&
                    dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
             if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
@@ -371,13 +373,13 @@ struct WXQSummary : public WQSummary<DType, RType> {
           lastidx = end;
         }
         bid = end;
-        // shift base by the gap 
+        // shift base by the gap
         begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev();
       }
     }
   }
 };
-/*! 
+/*!
  * \brief traditional GK summary
  */
 template<typename DType, typename RType>
@@ -405,7 +407,7 @@ struct GKSummary {
     // push data to the queue
     inline void Push(DType x, RType w) {
       queue[qtail++] = x;
-    }   
+    }
     inline void MakeSummary(GKSummary *out) {
       std::sort(queue.begin(), queue.begin() + qtail);
       out->size = qtail;
@@ -419,7 +421,7 @@ struct GKSummary {
   /*! \brief number of elements in the summary */
   size_t size;
   GKSummary(Entry *data, size_t size)
-      : data(data), size(size) {} 
+      : data(data), size(size) {}
   /*! \brief the maximum error of the summary */
   inline RType MaxError(void) const {
     RType res = 0;
@@ -432,7 +434,7 @@ struct GKSummary {
   inline RType MaxRank(void) const {
     return data[size - 1].rmax;
   }
-  /*! 
+  /*!
    * \brief copy content from src
    * \param src source sketch
    */
@@ -450,8 +452,8 @@ struct GKSummary {
                 << "[" << data[i].rmin << "," << data[i].rmax << "]"
                 << std::endl;
     }
-  }  
-  /*! 
+  }
+  /*!
    * \brief set current summary to be pruned summary of src
    *        assume data field is already allocated to be at least maxsize
    * \param src source summary
@@ -486,8 +488,8 @@ struct GKSummary {
     }
     if (sb.size == 0) {
       this->CopyFrom(sa); return;
-    }    
-    utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge"); 
+    }
+    utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
     const Entry *a = sa.data, *a_end = sa.data + sa.size;
     const Entry *b = sb.data, *b_end = sb.data + sb.size;
     this->size = sa.size + sb.size;
@@ -500,7 +502,7 @@ struct GKSummary {
         aprev_rmin = a->rmin;
         ++dst; ++a;
       } else {
-        *dst = Entry(aprev_rmin + b->rmin, 
+        *dst = Entry(aprev_rmin + b->rmin,
                      b->rmax + a->rmax - 1, b->value);
         bprev_rmin = b->rmin;
         ++dst; ++b;
@@ -537,15 +539,15 @@ class QuantileSketchTemplate {
   /*! \brief type of summary type */
   typedef TSummary Summary;
   /*! \brief the entry type */
-  typedef typename Summary::Entry Entry;   
+  typedef typename Summary::Entry Entry;
   /*! \brief same as summary, but use STL to backup the space */
   struct SummaryContainer : public Summary {
     std::vector<Entry> space;
-    SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) { 
+    SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
       this->space = src.space;
       this->data = BeginPtr(this->space);
     }
-    SummaryContainer(void) : Summary(NULL, 0) { 
+    SummaryContainer(void) : Summary(NULL, 0) {
     }
     /*! \brief reserve space for summary */
     inline void Reserve(size_t size) {
@@ -554,7 +556,7 @@ class QuantileSketchTemplate {
         this->data = BeginPtr(space);
       }
     }
-    /*! 
+    /*!
      * \brief set the space to be merge of all Summary arrays
      * \param begin begining position in th summary array
      * \param end ending position in the Summary array
@@ -597,7 +599,7 @@ class QuantileSketchTemplate {
     }
     /*! \brief save the data structure into stream */
     template<typename TStream>
-    inline void Save(TStream &fo) const {
+    inline void Save(TStream &fo) const {  // NOLINT(*)
       fo.Write(&(this->size), sizeof(this->size));
       if (this->size != 0) {
         fo.Write(this->data, this->size * sizeof(Entry));
@@ -605,15 +607,16 @@ class QuantileSketchTemplate {
     }
     /*! \brief load data structure from input stream */
     template<typename TStream>
-    inline void Load(TStream &fi) {
+    inline void Load(TStream &fi) {  // NOLINT(*)
       utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1");
       this->Reserve(this->size);
       if (this->size != 0) {
-        utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0, "invalid SummaryArray 2");
+        utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0,
+                     "invalid SummaryArray 2");
       }
     }
   };
-  /*! 
+  /*!
    * \brief intialize the quantile sketch, given the performance specification
    * \param maxn maximum number of data points can be feed into sketch
    * \param eps accuracy level of summary
@@ -741,8 +744,8 @@ class QuantileSketchTemplate {
  * \tparam DType type of data content
  * \tparam RType type of rank
  */
-template<typename DType, typename RType=unsigned>
-class WQuantileSketch : 
+template<typename DType, typename RType = unsigned>
+class WQuantileSketch :
       public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> >{
 };
 
@@ -751,8 +754,8 @@ class WQuantileSketch :
  * \tparam DType type of data content
  * \tparam RType type of rank
  */
-template<typename DType, typename RType=unsigned>
-class WXQuantileSketch : 
+template<typename DType, typename RType = unsigned>
+class WXQuantileSketch :
       public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> >{
 };
 /*!
@@ -760,11 +763,11 @@ class WXQuantileSketch :
  * \tparam DType type of data content
  * \tparam RType type of rank
  */
-template<typename DType, typename RType=unsigned>
-class GKQuantileSketch : 
+template<typename DType, typename RType = unsigned>
+class GKQuantileSketch :
       public QuantileSketchTemplate<DType, RType, GKSummary<DType, RType> >{
 };
 
-}  // utils
-}  // xgboost
-#endif
+}  // namespace utils
+}  // namespace xgboost
+#endif  // XGBOOST_UTILS_QUANTILE_H_
diff --git a/src/utils/random.h b/src/utils/random.h
index 1e3e617f9..7d52c2ae7 100644
--- a/src/utils/random.h
+++ b/src/utils/random.h
@@ -1,12 +1,14 @@
-#ifndef XGBOOST_UTILS_RANDOM_H_
-#define XGBOOST_UTILS_RANDOM_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file xgboost_random.h
  * \brief PRNG to support random number generation
  * \author Tianqi Chen: tianqi.tchen@gmail.com
  *
  * Use standard PRNG from stdlib
  */
+#ifndef XGBOOST_UTILS_RANDOM_H_
+#define XGBOOST_UTILS_RANDOM_H_
+
 #include <cmath>
 #include <cstdlib>
 #include <vector>
@@ -23,11 +25,11 @@ inline void Seed(unsigned seed) {
 }
 /*! \brief basic function, uniform */
 inline double Uniform(void) {
-  return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
+  return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0); // NOLINT(*)
 }
 /*! \brief return a real numer uniform in (0,1) */
 inline double NextDouble2(void) {
-  return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
+  return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0); // NOLINT(*)
 }
 /*! \brief return  x~N(0,1) */
 inline double Normal(void) {
@@ -73,7 +75,7 @@ inline void Shuffle(T *data, size_t sz) {
 }
 // random shuffle the data inside, require PRNG
 template<typename T>
-inline void Shuffle(std::vector<T> &data) {
+inline void Shuffle(std::vector<T> &data) { // NOLINT(*)
   Shuffle(&data[0], data.size());
 }
 
@@ -81,17 +83,18 @@ inline void Shuffle(std::vector<T> &data) {
 struct Random{
   /*! \brief set random number seed */
   inline void Seed(unsigned sd) {
-	 this->rseed = sd;
-#if defined(_MSC_VER)||defined(_WIN32)
-     ::xgboost::random::Seed(sd);
+    this->rseed = sd;
+#if defined(_MSC_VER) || defined(_WIN32)
+    ::xgboost::random::Seed(sd);
 #endif
   }
   /*! \brief return a real number uniform in [0,1) */
   inline double RandDouble(void) {
-	// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
-	// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
-	// todo, replace with another PRNG
-#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_)
+    // use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
+    // For cygwin and mingw, this can slows down parallelism,
+    // but rand_r is only used in objective-inl.hpp, won't affect speed in general
+    // todo, replace with another PRNG
+#if defined(_MSC_VER) || defined(_WIN32) || defined(XGBOOST_STRICT_CXX98_)
     return Uniform();
 #else
     return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
diff --git a/src/utils/thread.h b/src/utils/thread.h
index ef6335a74..78b488cff 100644
--- a/src/utils/thread.h
+++ b/src/utils/thread.h
@@ -1,16 +1,17 @@
-#ifndef XGBOOST_UTILS_THREAD_H
-#define XGBOOST_UTILS_THREAD_H
 /*!
+ * Copyright by Contributors
  * \file thread.h
- * \brief this header include the minimum necessary resource for multi-threading
+ * \brief this header include the minimum necessary resource
+ * for multi-threading that can be compiled in windows, linux, mac
  * \author Tianqi Chen
- * Acknowledgement: this file is adapted from SVDFeature project, by same author. 
- *  The MAC support part of this code is provided by Artemy Kolchinsky
  */
+#ifndef XGBOOST_UTILS_THREAD_H_ // NOLINT(*)
+#define XGBOOST_UTILS_THREAD_H_ // NOLINT(*)
+
 #ifdef _MSC_VER
-#include "utils.h"
 #include <windows.h>
 #include <process.h>
+#include "../xgboost/utils.h"
 namespace xgboost {
 namespace utils {
 /*! \brief simple semaphore used for synchronization */
@@ -18,29 +19,80 @@ class Semaphore {
  public :
   inline void Init(int init_val) {
     sem = CreateSemaphore(NULL, init_val, 10, NULL);
-    utils::Assert(sem != NULL, "create Semaphore error");
+    utils::Check(sem != NULL, "create Semaphore error");
   }
   inline void Destroy(void) {
     CloseHandle(sem);
   }
   inline void Wait(void) {
-    utils::Assert(WaitForSingleObject(sem, INFINITE) == WAIT_OBJECT_0, "WaitForSingleObject error");
+    utils::Check(WaitForSingleObject(sem, INFINITE) == WAIT_OBJECT_0, "WaitForSingleObject error");
   }
   inline void Post(void) {
-    utils::Assert(ReleaseSemaphore(sem, 1, NULL)  != 0, "ReleaseSemaphore error");
+    utils::Check(ReleaseSemaphore(sem, 1, NULL) != 0, "ReleaseSemaphore error");
   }
+
  private:
   HANDLE sem;
 };
+
+/*! \brief mutex under windows */
+class Mutex {
+ public:
+  inline void Init(void) {
+    utils::Check(InitializeCriticalSectionAndSpinCount(&mutex, 0x00000400) != 0,
+                   "Mutex::Init fail");
+  }
+  inline void Lock(void) {
+    EnterCriticalSection(&mutex);
+  }
+  inline void Unlock(void) {
+    LeaveCriticalSection(&mutex);
+  }
+  inline void Destroy(void) {
+    DeleteCriticalSection(&mutex);
+  }
+
+ private:
+  friend class ConditionVariable;
+  CRITICAL_SECTION mutex;
+};
+
+// conditional variable that uses pthread
+class ConditionVariable {
+ public:
+  // initialize conditional variable
+  inline void Init(void) {
+    InitializeConditionVariable(&cond);
+  }
+  // destroy the thread
+  inline void Destroy(void) {
+    // DeleteConditionVariable(&cond);
+  }
+  // wait on the conditional variable
+  inline void Wait(Mutex *mutex) {
+    utils::Check(SleepConditionVariableCS(&cond, &(mutex->mutex), INFINITE) != 0,
+                 "ConditionVariable:Wait fail");
+  }
+  inline void Broadcast(void) {
+    WakeAllConditionVariable(&cond);
+  }
+  inline void Signal(void) {
+    WakeConditionVariable(&cond);
+  }
+
+ private:
+  CONDITION_VARIABLE cond;
+};
+
 /*! \brief simple thread that wraps windows thread */
 class Thread {
  private:
   HANDLE    thread_handle;
-  unsigned  thread_id;            
+  unsigned  thread_id;
  public:
-  inline void Start(unsigned int __stdcall entry(void*), void *param) {
+  inline void Start(unsigned int __stdcall entry(void*p), void *param) {
     thread_handle = (HANDLE)_beginthreadex(NULL, 0, entry, param, 0, &thread_id);
-  }            
+  }
   inline int Join(void) {
     WaitForSingleObject(thread_handle, INFINITE);
     return 0;
@@ -54,39 +106,41 @@ inline void ThreadExit(void *status) {
 }  // namespace utils
 }  // namespace xgboost
 #else
-// thread interface using g++     
-extern "C" {
+// thread interface using g++
 #include <semaphore.h>
 #include <pthread.h>
-}
+#include <errno.h>
 namespace xgboost {
 namespace utils {
 /*!\brief semaphore class */
 class Semaphore {
   #ifdef __APPLE__
+
  private:
   sem_t* semPtr;
-  char sema_name[20];            
+  char sema_name[20];
+
  private:
   inline void GenRandomString(char *s, const int len) {
-    static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" ;
+    static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
     for (int i = 0; i < len; ++i) {
       s[i] = alphanum[rand() % (sizeof(alphanum) - 1)];
     }
     s[len] = 0;
   }
+
  public:
   inline void Init(int init_val) {
-    sema_name[0]='/'; 
-    sema_name[1]='s'; 
-    sema_name[2]='e'; 
-    sema_name[3]='/'; 
+    sema_name[0] = '/';
+    sema_name[1] = 's';
+    sema_name[2] = 'e';
+    sema_name[3] = '/';
     GenRandomString(&sema_name[4], 16);
-    if((semPtr = sem_open(sema_name, O_CREAT, 0644, init_val)) == SEM_FAILED) {
+    if ((semPtr = sem_open(sema_name, O_CREAT, 0644, init_val)) == SEM_FAILED) {
       perror("sem_open");
       exit(1);
     }
-    utils::Assert(semPtr != NULL, "create Semaphore error");
+    utils::Check(semPtr != NULL, "create Semaphore error");
   }
   inline void Destroy(void) {
     if (sem_close(semPtr) == -1) {
@@ -103,53 +157,93 @@ class Semaphore {
   }
   inline void Post(void) {
     sem_post(semPtr);
-  }               
+  }
   #else
+
  private:
   sem_t sem;
+
  public:
   inline void Init(int init_val) {
-    sem_init(&sem, 0, init_val);
+    if (sem_init(&sem, 0, init_val) != 0) {
+      utils::Error("Semaphore.Init:%s", strerror(errno));
+    }
   }
   inline void Destroy(void) {
-    sem_destroy(&sem);
+    if (sem_destroy(&sem) != 0) {
+      utils::Error("Semaphore.Destroy:%s", strerror(errno));
+    }
   }
   inline void Wait(void) {
-    sem_wait(&sem);
+    if (sem_wait(&sem) != 0) {
+      utils::Error("Semaphore.Wait:%s", strerror(errno));
+    }
   }
   inline void Post(void) {
-    sem_post(&sem);
+    if (sem_post(&sem) != 0) {
+      utils::Error("Semaphore.Post:%s", strerror(errno));
+    }
   }
-  #endif  
+  #endif
 };
 
-// helper for c thread
-// used to strictly call c++ function from pthread
-struct ThreadContext {
-  void *(*entry)(void*);
-  void *param;
-};
-extern "C" {
-  inline void *RunThreadContext(void *ctx_) {
-    ThreadContext *ctx = reinterpret_cast<ThreadContext*>(ctx_);
-    void *ret = (*ctx->entry)(ctx->param);
-    delete ctx;
-    return ret;
+// mutex that works with pthread
+class Mutex {
+ public:
+  inline void Init(void) {
+    pthread_mutex_init(&mutex, NULL);
   }
-}
+  inline void Lock(void) {
+    pthread_mutex_lock(&mutex);
+  }
+  inline void Unlock(void) {
+    pthread_mutex_unlock(&mutex);
+  }
+  inline void Destroy(void) {
+    pthread_mutex_destroy(&mutex);
+  }
+
+ private:
+  friend class ConditionVariable;
+  pthread_mutex_t mutex;
+};
+
+// conditional variable that uses pthread
+class ConditionVariable {
+ public:
+  // initialize conditional variable
+  inline void Init(void) {
+    pthread_cond_init(&cond, NULL);
+  }
+  // destroy the thread
+  inline void Destroy(void) {
+    pthread_cond_destroy(&cond);
+  }
+  // wait on the conditional variable
+  inline void Wait(Mutex *mutex) {
+    pthread_cond_wait(&cond, &(mutex->mutex));
+  }
+  inline void Broadcast(void) {
+    pthread_cond_broadcast(&cond);
+  }
+  inline void Signal(void) {
+    pthread_cond_signal(&cond);
+  }
+
+ private:
+  pthread_cond_t cond;
+};
+
 /*!\brief simple thread class */
 class Thread {
  private:
-  pthread_t thread;                
-
+  pthread_t thread;
  public :
-  inline void Start(void *entry(void*), void *param) {
+  inline void Start(void * entry(void*), void *param) { // NOLINT(*)
     pthread_attr_t attr;
     pthread_attr_init(&attr);
     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
-    ThreadContext *ctx = new ThreadContext();
-    ctx->entry = entry; ctx->param = param;
-    pthread_create(&thread, &attr, RunThreadContext, ctx);
+    pthread_create(&thread, &attr, entry, param);
   }
   inline int Join(void) {
     void *status;
@@ -159,9 +253,8 @@ class Thread {
 inline void ThreadExit(void *status) {
   pthread_exit(status);
 }
-
 }  // namespace utils
 }  // namespace xgboost
 #define XGBOOST_THREAD_PREFIX void *
-#endif
-#endif
+#endif  // Linux
+#endif  // XGBOOST_UTILS_THREAD_H_  NOLINT(*)
diff --git a/src/utils/thread_buffer.h b/src/utils/thread_buffer.h
index 45da6ec84..2119f53ab 100644
--- a/src/utils/thread_buffer.h
+++ b/src/utils/thread_buffer.h
@@ -1,10 +1,12 @@
-#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_
-#define XGBOOST_UTILS_THREAD_BUFFER_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file thread_buffer.h
  * \brief  multi-thread buffer, iterator, can be used to create parallel pipeline
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_
+#define XGBOOST_UTILS_THREAD_BUFFER_H_
+
 #include <vector>
 #include <cstring>
 #include <cstdlib>
@@ -27,7 +29,7 @@ class ThreadBuffer {
     this->buf_size = 30;
   }
   ~ThreadBuffer(void) {
-    if(init_end) this->Destroy();
+    if (init_end) this->Destroy();
   }
   /*!\brief set parameter, will also pass the parameter to factory */
   inline void SetParam(const char *name, const char *val) {
@@ -38,7 +40,7 @@ class ThreadBuffer {
   /*!
    * \brief initalize the buffered iterator
    * \param param a initialize parameter that will pass to factory, ignore it if not necessary
-   * \return false if the initlization can't be done, e.g. buffer file hasn't been created 
+   * \return false if the initlization can't be done, e.g. buffer file hasn't been created
    */
   inline bool Init(void) {
     if (!factory.Init()) return false;
@@ -49,7 +51,7 @@ class ThreadBuffer {
     this->init_end = true;
     this->StartLoader();
     return true;
-  }  
+  }
   /*!\brief place the iterator before first value */
   inline void BeforeFirst(void) {
     // wait till last loader end
@@ -70,7 +72,7 @@ class ThreadBuffer {
     loading_need.Post();
     // set buffer value
     buf_index = 0;
-  }  
+  }
   /*! \brief destroy the buffer iterator, will deallocate the buffer */
   inline void Destroy(void) {
     // wait until the signal is consumed
@@ -78,7 +80,7 @@ class ThreadBuffer {
     loading_need.Post();
     loader_thread.Join();
     loading_need.Destroy();
-    loading_end.Destroy();    
+    loading_end.Destroy();
     for (size_t i = 0; i < bufA.size(); ++i) {
       factory.FreeSpace(bufA[i]);
     }
@@ -88,37 +90,38 @@ class ThreadBuffer {
     bufA.clear(); bufB.clear();
     factory.Destroy();
     this->init_end = false;
-  }  
+  }
   /*!
    * \brief get the next element needed in buffer
    * \param elem element to store into
    * \return whether reaches end of data
    */
-  inline bool Next(Elem &elem) {
+  inline bool Next(Elem &elem) { // NOLINT(*)
     // end of buffer try to switch
     if (buf_index == buf_size) {
       this->SwitchBuffer();
       buf_index = 0;
     }
-    if (buf_index >= (current_buf ? endA : endB)) { 
+    if (buf_index >= (current_buf ? endA : endB)) {
       return false;
     }
     std::vector<Elem> &buf = current_buf ? bufA : bufB;
     elem = buf[buf_index];
     ++buf_index;
     return true;
-  }      
+  }
   /*!
    * \brief get the factory object
    */
   inline ElemFactory &get_factory(void) {
     return factory;
   }
-  inline const ElemFactory &get_factory(void) const{
+  inline const ElemFactory &get_factory(void) const {
     return factory;
   }
   // size of buffer
   int  buf_size;
+
  private:
   // factory object used to load configures
   ElemFactory factory;
@@ -147,15 +150,15 @@ class ThreadBuffer {
    * this implementation is like producer-consumer style
    */
   inline void RunLoader(void) {
-    while(!destroy_signal) {
+    while (!destroy_signal) {
       // sleep until loading is needed
-      loading_need.Wait();      
+      loading_need.Wait();
       std::vector<Elem> &buf = current_buf ? bufB : bufA;
       int i;
       for (i = 0; i < buf_size ; ++i) {
         if (!factory.LoadNext(buf[i])) {
           int &end = current_buf ? endB : endA;
-          end = i; // marks the termination
+          end = i;  // marks the termination
           break;
         }
       }
@@ -166,14 +169,14 @@ class ThreadBuffer {
   }
   /*!\brief entry point of loader thread */
   inline static XGBOOST_THREAD_PREFIX LoaderEntry(void *pthread) {
-    static_cast< ThreadBuffer<Elem,ElemFactory>* >(pthread)->RunLoader();
+    static_cast< ThreadBuffer<Elem, ElemFactory>* >(pthread)->RunLoader();
     return NULL;
   }
   /*!\brief start loader thread */
   inline void StartLoader(void) {
     destroy_signal = false;
     // set param
-    current_buf = 1;    
+    current_buf = 1;
     loading_need.Init(1);
     loading_end .Init(0);
     // reset terminate limit
@@ -185,8 +188,8 @@ class ThreadBuffer {
     current_buf = 0;
     // wake loader for next part
     data_loaded = false;
-    loading_need.Post();    
-    buf_index = 0; 
+    loading_need.Post();
+    buf_index = 0;
   }
   /*!\brief switch double buffer */
   inline void SwitchBuffer(void) {
@@ -198,7 +201,6 @@ class ThreadBuffer {
     loading_need.Post();
   }
 };
-
 }  // namespace utils
 }  // namespace xgboost
-#endif
+#endif  // XGBOOST_UTILS_THREAD_BUFFER_H_
diff --git a/src/utils/utils.h b/src/utils/utils.h
index e6026c3a6..7a8f18390 100644
--- a/src/utils/utils.h
+++ b/src/utils/utils.h
@@ -1,15 +1,18 @@
-#ifndef XGBOOST_UTILS_UTILS_H_
-#define XGBOOST_UTILS_UTILS_H_
 /*!
+ * Copyright 2014 by Contributors
  * \file utils.h
  * \brief simple utils to support the code
  * \author Tianqi Chen
  */
+#ifndef XGBOOST_UTILS_UTILS_H_
+#define XGBOOST_UTILS_UTILS_H_
+
 #define _CRT_SECURE_NO_WARNINGS
 #include <cstdio>
 #include <string>
 #include <cstdlib>
 #include <vector>
+#include <stdexcept>
 
 #ifndef XGBOOST_STRICT_CXX98_
 #include <cstdarg>
@@ -19,18 +22,18 @@
 #define fopen64 std::fopen
 #endif
 #ifdef _MSC_VER
-// NOTE: sprintf_s is not equivalent to snprintf, 
+// NOTE: sprintf_s is not equivalent to snprintf,
 // they are equivalent when success, which is sufficient for our case
 #define snprintf sprintf_s
 #define vsnprintf vsprintf_s
 #else
 #ifdef _FILE_OFFSET_BITS
 #if _FILE_OFFSET_BITS == 32
-#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit")
+#pragma message("Warning: FILE OFFSET BITS defined to be 32 bit")
 #endif
 #endif
 
-#ifdef __APPLE__ 
+#ifdef __APPLE__
 #define off64_t off_t
 #define fopen64 std::fopen
 #endif
@@ -58,21 +61,20 @@ namespace utils {
 const int kPrintBuffer = 1 << 12;
 
 #ifndef XGBOOST_CUSTOMIZE_MSG_
-/*! 
+/*!
  * \brief handling of Assert error, caused by in-apropriate input
- * \param msg error message 
+ * \param msg error message
  */
 inline void HandleAssertError(const char *msg) {
   fprintf(stderr, "AssertError:%s\n", msg);
   exit(-1);
 }
-/*! 
+/*!
  * \brief handling of Check error, caused by in-apropriate input
- * \param msg error message 
+ * \param msg error message
  */
 inline void HandleCheckError(const char *msg) {
-  fprintf(stderr, "%s\n", msg);
-  exit(-1);
+  throw std::runtime_error(msg);
 }
 inline void HandlePrint(const char *msg) {
   printf("%s", msg);
@@ -158,7 +160,7 @@ inline std::FILE *FopenCheck(const char *fname, const char *flag) {
 // easy utils that can be directly acessed in xgboost
 /*! \brief get the beginning address of a vector */
 template<typename T>
-inline T *BeginPtr(std::vector<T> &vec) {
+inline T *BeginPtr(std::vector<T> &vec) { // NOLINT(*)
   if (vec.size() == 0) {
     return NULL;
   } else {
@@ -174,7 +176,7 @@ inline const T *BeginPtr(const std::vector<T> &vec) {
     return &vec[0];
   }
 }
-inline char* BeginPtr(std::string &str) {
+inline char* BeginPtr(std::string &str) { // NOLINT(*)
   if (str.length() == 0) return NULL;
   return &str[0];
 }
diff --git a/src/xgboost_main.cpp b/src/xgboost_main.cpp
index 769e3be3b..773001503 100644
--- a/src/xgboost_main.cpp
+++ b/src/xgboost_main.cpp
@@ -1,18 +1,20 @@
+// Copyright 2014 by Contributors
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
 #define NOMINMAX
 #include <ctime>
 #include <string>
 #include <cstring>
+#include <vector>
 #include "./sync/sync.h"
-#include "io/io.h"
-#include "utils/utils.h"
-#include "utils/config.h"
-#include "learner/learner-inl.hpp"
+#include "./io/io.h"
+#include "./utils/utils.h"
+#include "./utils/config.h"
+#include "./learner/learner-inl.hpp"
 
 namespace xgboost {
 /*!
- * \brief wrapping the training process 
+ * \brief wrapping the training process
  */
 class BoostLearnTask {
  public:
@@ -20,7 +22,7 @@ class BoostLearnTask {
     if (argc < 2) {
       printf("Usage: <config>\n");
       return 0;
-    }    
+    }
     utils::ConfigIterator itr(argv[1]);
     while (itr.Next()) {
       this->SetParam(itr.name(), itr.val());
@@ -44,10 +46,10 @@ class BoostLearnTask {
     }
     if (rabit::IsDistributed() && data_split == "NONE") {
       this->SetParam("dsplit", "row");
-    }    
+    }
     if (rabit::GetRank() != 0) {
       this->SetParam("silent", "2");
-    }    
+    }
     this->InitData();
 
     if (task == "train") {
@@ -90,12 +92,14 @@ class BoostLearnTask {
     if (!strcmp("save_pbuffer", name)) save_with_pbuffer = atoi(val);
     if (!strncmp("eval[", name, 5)) {
       char evname[256];
-      utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display");
+      utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1,
+                    "must specify evaluation name for display");
       eval_data_names.push_back(std::string(evname));
       eval_data_paths.push_back(std::string(val));
     }
     learner.SetParam(name, val);
   }
+
  public:
   BoostLearnTask(void) {
     // default parameters
@@ -119,12 +123,13 @@ class BoostLearnTask {
     save_with_pbuffer = 0;
     data = NULL;
   }
-  ~BoostLearnTask(void){
-    for (size_t i = 0; i < deval.size(); i++){
+  ~BoostLearnTask(void) {
+    for (size_t i = 0; i < deval.size(); i++) {
       delete deval[i];
     }
     if (data != NULL) delete data;
   }
+
  private:
   inline void InitData(void) {
     if (strchr(train_path.c_str(), '%') != NULL) {
@@ -151,14 +156,14 @@ class BoostLearnTask {
                                            loadsplit));
         devalall.push_back(deval.back());
       }
-            
+
       std::vector<io::DataMatrix *> dcache(1, data);
-      for (size_t i = 0; i < deval.size(); ++ i) {
+      for (size_t i = 0; i < deval.size(); ++i) {
         dcache.push_back(deval[i]);
       }
       // set cache data to be all training and evaluation data
       learner.SetCacheData(dcache);
-      
+
       // add training set to evaluation set if needed
       if (eval_train != 0) {
         devalall.push_back(data);
@@ -178,13 +183,13 @@ class BoostLearnTask {
     int version = rabit::LoadCheckPoint(&learner);
     if (version == 0) this->InitLearner();
     const time_t start = time(NULL);
-    unsigned long elapsed = 0;
+    unsigned long elapsed = 0;  // NOLINT(*)
     learner.CheckInit(data);
 
     bool allow_lazy = learner.AllowLazyCheckPoint();
     for (int i = version / 2; i < num_round; ++i) {
-      elapsed = (unsigned long)(time(NULL) - start);
-      if (version % 2 == 0) { 
+      elapsed = (unsigned long)(time(NULL) - start);  // NOLINT(*)
+      if (version % 2 == 0) {
         if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
         learner.UpdateOneIter(i, *data);
         if (allow_lazy) {
@@ -196,7 +201,7 @@ class BoostLearnTask {
       }
       utils::Assert(version == rabit::VersionNumber(), "consistent check");
       std::string res = learner.EvalOneIter(i, devalall, eval_data_names);
-      if (rabit::IsDistributed()){
+      if (rabit::IsDistributed()) {
         if (rabit::GetRank() == 0) {
           rabit::TrackerPrintf("%s\n", res.c_str());
         }
@@ -215,29 +220,29 @@ class BoostLearnTask {
       }
       version += 1;
       utils::Assert(version == rabit::VersionNumber(), "consistent check");
-      elapsed = (unsigned long)(time(NULL) - start);
+      elapsed = (unsigned long)(time(NULL) - start);  // NOLINT(*)
     }
     // always save final round
     if ((save_period == 0 || num_round % save_period != 0) && model_out != "NONE") {
-      if (model_out == "NULL"){
+      if (model_out == "NULL") {
         this->SaveModel(num_round - 1);
       } else {
         this->SaveModel(model_out.c_str());
       }
     }
-    if (!silent){
+    if (!silent) {
       printf("\nupdating end, %lu sec in all\n", elapsed);
     }
   }
   inline void TaskEval(void) {
     learner.EvalOneIter(0, devalall, eval_data_names);
   }
-  inline void TaskDump(void){
+  inline void TaskDump(void) {
     FILE *fo = utils::FopenCheck(name_dump.c_str(), "w");
     std::vector<std::string> dump = learner.DumpModel(fmap, dump_model_stats != 0);
-    for (size_t i = 0; i < dump.size(); ++ i) {
-      fprintf(fo,"booster[%lu]:\n", i);
-      fprintf(fo,"%s", dump[i].c_str()); 
+    for (size_t i = 0; i < dump.size(); ++i) {
+      fprintf(fo, "booster[%lu]:\n", i);
+      fprintf(fo, "%s", dump[i].c_str());
     }
     fclose(fo);
   }
@@ -247,14 +252,15 @@ class BoostLearnTask {
   }
   inline void SaveModel(int i) const {
     char fname[256];
-    sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1);
+    utils::SPrintf(fname, sizeof(fname),
+                   "%s/%04d.model", model_dir_path.c_str(), i + 1);
     this->SaveModel(fname);
   }
   inline void TaskPred(void) {
     std::vector<float> preds;
     if (!silent) printf("start prediction...\n");
     learner.Predict(*data, pred_margin != 0, &preds, ntree_limit);
-    if (!silent) printf("writing prediction to %s\n", name_pred.c_str());    
+    if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
     FILE *fo;
     if (name_pred != "stdout") {
       fo = utils::FopenCheck(name_pred.c_str(), "w");
@@ -266,6 +272,7 @@ class BoostLearnTask {
     }
     if (fo != stdout) fclose(fo);
   }
+
  private:
   /*! \brief whether silent */
   int silent;
@@ -273,7 +280,7 @@ class BoostLearnTask {
   int load_part;
   /*! \brief whether use auto binary buffer */
   int use_buffer;
-  /*! \brief whether evaluate training statistics */            
+  /*! \brief whether evaluate training statistics */
   int eval_train;
   /*! \brief number of boosting iterations */
   int num_round;
@@ -309,6 +316,7 @@ class BoostLearnTask {
   std::vector<std::string> eval_data_paths;
   /*! \brief the names of the evaluation data used in output log */
   std::vector<std::string> eval_data_names;
+
  private:
   io::DataMatrix* data;
   std::vector<io::DataMatrix*> deval;
@@ -316,9 +324,9 @@ class BoostLearnTask {
   utils::FeatMap fmap;
   learner::BoostLearner learner;
 };
-}
+}  // namespace xgboost
 
-int main(int argc, char *argv[]){
+int main(int argc, char *argv[]) {
   xgboost::BoostLearnTask tsk;
   tsk.SetParam("seed", "0");
   int ret = tsk.Run(argc, argv);
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 000000000..19e34d5df
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1 @@
+This folder contains tetstcases for xgboost.
\ No newline at end of file
diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py
new file mode 100644
index 000000000..77d19595b
--- /dev/null
+++ b/tests/python/test_basic.py
@@ -0,0 +1,31 @@
+import numpy as np
+import xgboost as xgb
+
+dpath = 'demo/data/'
+
+def test_basic():
+    dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+    dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+    param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
+    # specify validations set to watch performance
+    watchlist  = [(dtest,'eval'), (dtrain,'train')]
+    num_round = 2
+    bst = xgb.train(param, dtrain, num_round, watchlist)
+    # this is prediction
+    preds = bst.predict(dtest)
+    labels = dtest.get_label()
+    err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
+    # error must be smaller than 10%
+    assert err < 0.1
+
+    # save dmatrix into binary buffer
+    dtest.save_binary('dtest.buffer')
+    # save model
+    bst.save_model('xgb.model')
+    # load model and data in
+    bst2 = xgb.Booster(model_file='xgb.model')
+    dtest2 = xgb.DMatrix('dtest.buffer')
+    preds2 = bst2.predict(dtest2)
+    # assert they are the same
+    assert np.sum(np.abs(preds2-preds)) == 0
+
diff --git a/wrapper/setup.py b/wrapper/setup.py
index 52bf1cf82..5365d61b0 100644
--- a/wrapper/setup.py
+++ b/wrapper/setup.py
@@ -1,9 +1,12 @@
+# pylint: disable=invalid-name
+"""Setup xgboost package."""
 import os
 import platform
 from setuptools import setup
 
 
 class XGBoostLibraryNotFound(Exception):
+    """Exception to raise when xgboost library cannot be found."""
     pass
 
 
@@ -15,7 +18,7 @@ if os.name == 'nt':
         dll_path.append(os.path.join(curr_dir, '../windows/x64/Release/'))
     else:
         dll_path.append(os.path.join(curr_dir, '../windows/Release/'))
-        
+
 
 if os.name == 'nt':
     dll_path = [os.path.join(p, 'xgboost_wrapper.dll') for p in dll_path]
diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index 0280d87b3..96f6c2573 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -6,7 +6,7 @@ Version: 0.40
 Authors: Tianqi Chen, Bing Xu
 Early stopping by Zygmunt Zając
 """
-
+# pylint: disable=too-many-arguments, too-many-locals, too-many-lines, invalid-name
 from __future__ import absolute_import
 
 import os
@@ -28,20 +28,24 @@ except ImportError:
     SKLEARN_INSTALLED = False
 
 class XGBoostLibraryNotFound(Exception):
+    """Error throwed by when xgboost is not found"""
     pass
 
 class XGBoostError(Exception):
+    """Error throwed by xgboost trainer."""
     pass
 
 __all__ = ['DMatrix', 'CVPack', 'Booster', 'aggcv', 'cv', 'mknfold', 'train']
 
 if sys.version_info[0] == 3:
-    string_types = str,
+    # pylint: disable=invalid-name
+    STRING_TYPES = str,
 else:
-    string_types = basestring,
-
+    # pylint: disable=invalid-name
+    STRING_TYPES = basestring,
 
 def load_xglib():
+    """Load the xgboost library."""
     curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
     dll_path = [curr_path]
     if os.name == 'nt':
@@ -55,36 +59,33 @@ def load_xglib():
         dll_path = [os.path.join(p, 'libxgboostwrapper.so') for p in dll_path]
     lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
     if len(dll_path) == 0:
-        raise XGBoostLibraryNotFound('cannot find find the files in the candicate path ' + str(dll_path))
+        raise XGBoostLibraryNotFound(
+            'cannot find find the files in the candicate path ' + str(dll_path))
     lib = ctypes.cdll.LoadLibrary(lib_path[0])
-
-    # DMatrix functions
-    lib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
-    lib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
-    lib.XGDMatrixCreateFromCSC.restype = ctypes.c_void_p
-    lib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
-    lib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
-    lib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
-    lib.XGDMatrixGetUIntInfo.restype = ctypes.POINTER(ctypes.c_uint)
-    lib.XGDMatrixNumRow.restype = ctypes.c_ulong
-
-    # Booster functions
-    lib.XGBoosterCreate.restype = ctypes.c_void_p
-    lib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
-    lib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
-    lib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
-    lib.XGBoosterGetModelRaw.restype = ctypes.POINTER(ctypes.c_char)
-    lib.XGBoosterLoadModelFromBuffer.restype = ctypes.c_void_p
+    lib.XGBGetLastError.restype = ctypes.c_char_p
 
     return lib
 
 # load the XGBoost library globally
-xglib = load_xglib()
+_LIB = load_xglib()
+
+def _check_call(ret):
+    """Check the return value of C API call
+
+    This function will raise exception when error occurs.
+    Wrap every API call with this function
+
+    Parameters
+    ----------
+    ret : int
+        return value from API calls
+    """
+    if ret != 0:
+        raise XGBoostError(_LIB.XGBGetLastError())
 
 
 def ctypes2numpy(cptr, length, dtype):
-    """
-    Convert a ctypes pointer array to a numpy array.
+    """Convert a ctypes pointer array to a numpy array.
     """
     if not isinstance(cptr, ctypes.POINTER(ctypes.c_float)):
         raise RuntimeError('expected float pointer')
@@ -95,6 +96,7 @@ def ctypes2numpy(cptr, length, dtype):
 
 
 def ctypes2buffer(cptr, length):
+    """Convert ctypes pointer to buffer type."""
     if not isinstance(cptr, ctypes.POINTER(ctypes.c_char)):
         raise RuntimeError('expected char pointer')
     res = bytearray(length)
@@ -105,14 +107,17 @@ def ctypes2buffer(cptr, length):
 
 
 def c_str(string):
+    """Convert a python string to cstring."""
     return ctypes.c_char_p(string.encode('utf-8'))
 
 
 def c_array(ctype, values):
+    """Convert a python string to c array."""
     return (ctype * len(values))(*values)
 
 
 class DMatrix(object):
+    """Data Matrix used in XGBoost."""
     def __init__(self, data, label=None, missing=0.0, weight=None, silent=False):
         """
         Data matrix used in XGBoost.
@@ -135,8 +140,11 @@ class DMatrix(object):
         if data is None:
             self.handle = None
             return
-        if isinstance(data, string_types):
-            self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromFile(c_str(data), int(silent)))
+        if isinstance(data, STRING_TYPES):
+            self.handle = ctypes.c_void_p()
+            _check_call(_LIB.XGDMatrixCreateFromFile(c_str(data),
+                                                     int(silent),
+                                                     ctypes.byref(self.handle)))
         elif isinstance(data, scipy.sparse.csr_matrix):
             self._init_from_csr(data)
         elif isinstance(data, scipy.sparse.csc_matrix):
@@ -160,11 +168,12 @@ class DMatrix(object):
         """
         if len(csr.indices) != len(csr.data):
             raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data)))
-        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSR(
-            c_array(ctypes.c_ulong, csr.indptr),
-            c_array(ctypes.c_uint, csr.indices),
-            c_array(ctypes.c_float, csr.data),
-            len(csr.indptr), len(csr.data)))
+        self.handle = ctypes.c_void_p()
+        _check_call(_LIB.XGDMatrixCreateFromCSR(c_array(ctypes.c_ulong, csr.indptr),
+                                                c_array(ctypes.c_uint, csr.indices),
+                                                c_array(ctypes.c_float, csr.data),
+                                                len(csr.indptr), len(csr.data),
+                                                ctypes.byref(self.handle)))
 
     def _init_from_csc(self, csc):
         """
@@ -172,45 +181,103 @@ class DMatrix(object):
         """
         if len(csc.indices) != len(csc.data):
             raise ValueError('length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data)))
-        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSC(
-            c_array(ctypes.c_ulong, csc.indptr),
-            c_array(ctypes.c_uint, csc.indices),
-            c_array(ctypes.c_float, csc.data),
-            len(csc.indptr), len(csc.data)))
+        self.handle = ctypes.c_void_p()
+        _check_call(_LIB.XGDMatrixCreateFromCSC(c_array(ctypes.c_ulong, csc.indptr),
+                                                c_array(ctypes.c_uint, csc.indices),
+                                                c_array(ctypes.c_float, csc.data),
+                                                len(csc.indptr), len(csc.data),
+                                                ctypes.byref(self.handle)))
 
     def _init_from_npy2d(self, mat, missing):
         """
         Initialize data from a 2-D numpy matrix.
         """
         data = np.array(mat.reshape(mat.size), dtype=np.float32)
-        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromMat(
-            data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
-            mat.shape[0], mat.shape[1], ctypes.c_float(missing)))
+        self.handle = ctypes.c_void_p()
+        _check_call(_LIB.XGDMatrixCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
+                                                mat.shape[0], mat.shape[1],
+                                                ctypes.c_float(missing),
+                                                ctypes.byref(self.handle)))
 
     def __del__(self):
-        xglib.XGDMatrixFree(self.handle)
+        _check_call(_LIB.XGDMatrixFree(self.handle))
 
     def get_float_info(self, field):
+        """Get float property from the DMatrix.
+
+        Parameters
+        ----------
+        field: str
+            The field name of the information
+
+        Returns
+        -------
+        info : array
+            a numpy array of float information of the data
+        """
         length = ctypes.c_ulong()
-        ret = xglib.XGDMatrixGetFloatInfo(self.handle, c_str(field), ctypes.byref(length))
+        ret = ctypes.POINTER(ctypes.c_float)()
+        _check_call(_LIB.XGDMatrixGetFloatInfo(self.handle,
+                                               c_str(field),
+                                               ctypes.byref(length),
+                                               ctypes.byref(ret)))
         return ctypes2numpy(ret, length.value, np.float32)
 
     def get_uint_info(self, field):
+        """Get unsigned integer property from the DMatrix.
+
+        Parameters
+        ----------
+        field: str
+            The field name of the information
+
+        Returns
+        -------
+        info : array
+            a numpy array of float information of the data
+        """
         length = ctypes.c_ulong()
-        ret = xglib.XGDMatrixGetUIntInfo(self.handle, c_str(field), ctypes.byref(length))
+        ret = ctypes.POINTER(ctypes.c_uint)()
+        _check_call(_LIB.XGDMatrixGetUIntInfo(self.handle,
+                                              c_str(field),
+                                              ctypes.byref(length),
+                                              ctypes.byref(ret)))
         return ctypes2numpy(ret, length.value, np.uint32)
 
     def set_float_info(self, field, data):
-        xglib.XGDMatrixSetFloatInfo(self.handle, c_str(field),
-                                    c_array(ctypes.c_float, data), len(data))
+        """Set float type property into the DMatrix.
+
+        Parameters
+        ----------
+        field: str
+            The field name of the information
+
+        data: numpy array
+            The array ofdata to be set
+        """
+        _check_call(_LIB.XGDMatrixSetFloatInfo(self.handle,
+                                               c_str(field),
+                                               c_array(ctypes.c_float, data),
+                                               len(data)))
 
     def set_uint_info(self, field, data):
-        xglib.XGDMatrixSetUIntInfo(self.handle, c_str(field),
-                                   c_array(ctypes.c_uint, data), len(data))
+        """Set uint type property into the DMatrix.
+
+        Parameters
+        ----------
+        field: str
+            The field name of the information
+
+        data: numpy array
+            The array ofdata to be set
+        """
+        _check_call(_LIB.XGDMatrixSetUIntInfo(self.handle,
+                                              c_str(field),
+                                              c_array(ctypes.c_uint, data),
+                                              len(data)))
 
     def save_binary(self, fname, silent=True):
-        """
-        Save DMatrix to an XGBoost buffer.
+        """Save DMatrix to an XGBoost buffer.
 
         Parameters
         ----------
@@ -219,74 +286,78 @@ class DMatrix(object):
         silent : bool (optional; default: True)
             If set, the output is suppressed.
         """
-        xglib.XGDMatrixSaveBinary(self.handle, c_str(fname), int(silent))
+        _check_call(_LIB.XGDMatrixSaveBinary(self.handle,
+                                             c_str(fname),
+                                             int(silent)))
 
     def set_label(self, label):
-        """set label of dmatrix
-            Args:
-                label: list
-                       label for DMatrix
-            Returns:
-                None
+        """Set label of dmatrix
+
+        Parameters
+        ----------
+        label: array like
+            The label information to be set into DMatrix
         """
         self.set_float_info('label', label)
 
     def set_weight(self, weight):
-        """
-        Set weight of each instance.
+        """ Set weight of each instance.
 
         Parameters
         ----------
-        weight : float
-            Weight for positive instance.
+        weight : array like
+            Weight for each data point
         """
         self.set_float_info('weight', weight)
 
     def set_base_margin(self, margin):
-        """
-        set base margin of booster to start from
-        this can be used to specify a prediction value of
+        """ Set base margin of booster to start from.
+
+        This can be used to specify a prediction value of
         existing model to be base_margin
         However, remember margin is needed, instead of transformed prediction
         e.g. for logistic regression: need to put in value before logistic transformation
         see also example/demo.py
+
+        Parameters
+        ----------
+        margin: array like
+            Prediction margin of each datapoint
         """
         self.set_float_info('base_margin', margin)
 
     def set_group(self, group):
-        """
-        Set group size of DMatrix (used for ranking).
+        """Set group size of DMatrix (used for ranking).
 
         Parameters
         ----------
-        group : int
-            Group size.
+        group : array like
+            Group size of each group
         """
-        xglib.XGDMatrixSetGroup(self.handle, c_array(ctypes.c_uint, group), len(group))
+        _check_call(_LIB.XGDMatrixSetGroup(self.handle,
+                                           c_array(ctypes.c_uint, group),
+                                           len(group)))
 
     def get_label(self):
-        """
-        Get the label of the DMatrix.
+        """Get the label of the DMatrix.
 
         Returns
         -------
-        label : list
+        label : array
         """
         return self.get_float_info('label')
 
     def get_weight(self):
-        """
-        Get the weight of the DMatrix.
+        """Get the weight of the DMatrix.
 
         Returns
         -------
-        weight : float
+        weight : array
         """
         return self.get_float_info('weight')
 
     def get_base_margin(self):
-        """
-        Get the base margin of the DMatrix.
+        """Get the base margin of the DMatrix.
 
         Returns
         -------
@@ -295,18 +366,19 @@ class DMatrix(object):
         return self.get_float_info('base_margin')
 
     def num_row(self):
-        """
-        Get the number of rows in the DMatrix.
+        """Get the number of rows in the DMatrix.
 
         Returns
         -------
         number of rows : int
         """
-        return xglib.XGDMatrixNumRow(self.handle)
+        ret = ctypes.c_ulong()
+        _check_call(_LIB.XGDMatrixNumRow(self.handle,
+                                         ctypes.byref(ret)))
+        return ret.value
 
     def slice(self, rindex):
-        """
-        Slice the DMatrix and return a new DMatrix that only contains `rindex`.
+        """Slice the DMatrix and return a new DMatrix that only contains `rindex`.
 
         Parameters
         ----------
@@ -319,13 +391,18 @@ class DMatrix(object):
             A new DMatrix containing only selected indices.
         """
         res = DMatrix(None)
-        res.handle = ctypes.c_void_p(xglib.XGDMatrixSliceDMatrix(
-            self.handle, c_array(ctypes.c_int, rindex), len(rindex)))
+        res.handle = ctypes.c_void_p()
+        _check_call(_LIB.XGDMatrixSliceDMatrix(self.handle,
+                                               c_array(ctypes.c_int, rindex),
+                                               len(rindex),
+                                               ctypes.byref(res.handle)))
         return res
 
 
 class Booster(object):
+    """"A Booster of of XGBoost."""
     def __init__(self, params=None, cache=(), model_file=None):
+        # pylint: disable=invalid-name
         """
         Learner class.
 
@@ -342,14 +419,15 @@ class Booster(object):
             if not isinstance(d, DMatrix):
                 raise TypeError('invalid cache item: {}'.format(type(d).__name__))
         dmats = c_array(ctypes.c_void_p, [d.handle for d in cache])
-        self.handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, len(cache)))
+        self.handle = ctypes.c_void_p()
+        _check_call(_LIB.XGBoosterCreate(dmats, len(cache), ctypes.byref(self.handle)))
         self.set_param({'seed': 0})
         self.set_param(params or {})
         if model_file is not None:
             self.load_model(model_file)
 
     def __del__(self):
-        xglib.XGBoosterFree(self.handle)
+        _LIB.XGBoosterFree(self.handle)
 
     def __getstate__(self):
         # can't pickle ctypes pointers
@@ -367,10 +445,11 @@ class Booster(object):
         if handle is not None:
             buf = handle
             dmats = c_array(ctypes.c_void_p, [])
-            handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, 0))
+            handle = ctypes.c_void_p()
+            _check_call(_LIB.XGBoosterCreate(dmats, 0, ctypes.byref(handle)))
             length = ctypes.c_ulong(len(buf))
             ptr = (ctypes.c_char * len(buf)).from_buffer(buf)
-            xglib.XGBoosterLoadModelFromBuffer(handle, ptr, length)
+            _check_call(_LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length))
             state['handle'] = handle
         self.__dict__.update(state)
         self.set_param({'seed': 0})
@@ -379,11 +458,10 @@ class Booster(object):
         return self.__deepcopy__()
 
     def __deepcopy__(self):
-        return Booster(model_file = self.save_raw())
+        return Booster(model_file=self.save_raw())
 
     def copy(self):
-        """
-        Copy the booster object
+        """Copy the booster object.
 
         Returns
         --------
@@ -391,15 +469,16 @@ class Booster(object):
         """
         return self.__copy__()
 
-    def set_param(self, params, pv=None):
+    def set_param(self, params, value=None):
+        """Set parameters into the DMatrix."""
         if isinstance(params, collections.Mapping):
             params = params.items()
-        elif isinstance(params, string_types) and pv is not None:
-            params = [(params, pv)]
-        for k, v in params:
-            xglib.XGBoosterSetParam(self.handle, c_str(k), c_str(str(v)))
+        elif isinstance(params, STRING_TYPES) and value is not None:
+            params = [(params, value)]
+        for key, val in params:
+            _check_call(_LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val))))
 
-    def update(self, dtrain, it, fobj=None):
+    def update(self, dtrain, iteration, fobj=None):
         """
         Update (one iteration).
 
@@ -407,7 +486,7 @@ class Booster(object):
         ----------
         dtrain : DMatrix
             Training data.
-        it : int
+        iteration : int
             Current iteration number.
         fobj : function
             Customized objective function.
@@ -415,7 +494,7 @@ class Booster(object):
         if not isinstance(dtrain, DMatrix):
             raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__))
         if fobj is None:
-            xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
+            _check_call(_LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle))
         else:
             pred = self.predict(dtrain)
             grad, hess = fobj(pred, dtrain)
@@ -438,20 +517,20 @@ class Booster(object):
             raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess)))
         if not isinstance(dtrain, DMatrix):
             raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__))
-        xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
-                                    c_array(ctypes.c_float, grad),
-                                    c_array(ctypes.c_float, hess),
-                                    len(grad))
+        _check_call(_LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle,
+                                               c_array(ctypes.c_float, grad),
+                                               c_array(ctypes.c_float, hess),
+                                               len(grad)))
 
-    def eval_set(self, evals, it=0, feval=None):
-        """
-        Evaluate by a metric.
+    def eval_set(self, evals, iteration=0, feval=None):
+        # pylint: disable=invalid-name
+        """Evaluate  a set of data.
 
         Parameters
         ----------
         evals : list of tuples (DMatrix, string)
             List of items to be evaluated.
-        it : int
+        iteration : int
             Current iteration.
         feval : function
             Custom evaluation function.
@@ -464,20 +543,39 @@ class Booster(object):
             for d in evals:
                 if not isinstance(d[0], DMatrix):
                     raise TypeError('expected DMatrix, got {}'.format(type(d[0]).__name__))
-                if not isinstance(d[1], string_types):
+                if not isinstance(d[1], STRING_TYPES):
                     raise TypeError('expected string, got {}'.format(type(d[1]).__name__))
             dmats = c_array(ctypes.c_void_p, [d[0].handle for d in evals])
             evnames = c_array(ctypes.c_char_p, [c_str(d[1]) for d in evals])
-            return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
+            msg = ctypes.c_char_p()
+            _check_call(_LIB.XGBoosterEvalOneIter(self.handle, iteration,
+                                                  dmats, evnames, len(evals),
+                                                  ctypes.byref(msg)))
+            return msg.value
         else:
-            res = '[%d]' % it
-            for dm, evname in evals:
-                name, val = feval(self.predict(dm), dm)
+            res = '[%d]' % iteration
+            for dmat, evname in evals:
+                name, val = feval(self.predict(dmat), dmat)
                 res += '\t%s-%s:%f' % (evname, name, val)
             return res
 
-    def eval(self, mat, name='eval', it=0):
-        return self.eval_set([(mat, name)], it)
+    def eval(self, data, name='eval', iteration=0):
+        """Evaluate the model on mat.
+
+
+        Parameters
+        ---------
+        data : DMatrix
+            The dmatrix storing the input.
+
+        name : str (default = 'eval')
+            The name of the dataset
+
+
+        iteration : int (default = 0)
+            The current iteration number
+        """
+        return self.eval_set([(data, name)], iteration)
 
     def predict(self, data, output_margin=False, ntree_limit=0, pred_leaf=False):
         """
@@ -492,10 +590,13 @@ class Booster(object):
         ----------
         data : DMatrix
             The dmatrix storing the input.
+
         output_margin : bool
             Whether to output the raw untransformed margin value.
+
         ntree_limit : int
             Limit number of trees in the prediction; defaults to 0 (use all trees).
+
         pred_leaf : bool
             When this option is on, the output will be a matrix of (nsample, ntrees)
             with each record indicating the predicted leaf index of each sample in each tree.
@@ -512,8 +613,11 @@ class Booster(object):
         if pred_leaf:
             option_mask |= 0x02
         length = ctypes.c_ulong()
-        preds = xglib.XGBoosterPredict(self.handle, data.handle,
-                                       option_mask, ntree_limit, ctypes.byref(length))
+        preds = ctypes.POINTER(ctypes.c_float)()
+        _check_call(_LIB.XGBoosterPredict(self.handle, data.handle,
+                                          option_mask, ntree_limit,
+                                          ctypes.byref(length),
+                                          ctypes.byref(preds)))
         preds = ctypes2numpy(preds, length.value, np.float32)
         if pred_leaf:
             preds = preds.astype(np.int32)
@@ -531,8 +635,8 @@ class Booster(object):
         fname : string
             Output file name
         """
-        if isinstance(fname, string_types):  # assume file name
-            xglib.XGBoosterSaveModel(self.handle, c_str(fname))
+        if isinstance(fname, STRING_TYPES):  # assume file name
+            _check_call(_LIB.XGBoosterSaveModel(self.handle, c_str(fname)))
         else:
             raise TypeError("fname must be a string")
 
@@ -545,8 +649,10 @@ class Booster(object):
         a in memory buffer represetation of the model
         """
         length = ctypes.c_ulong()
-        cptr = xglib.XGBoosterGetModelRaw(self.handle,
-                                          ctypes.byref(length))
+        cptr = ctypes.POINTER(ctypes.c_char)()
+        _check_call(_LIB.XGBoosterGetModelRaw(self.handle,
+                                              ctypes.byref(length),
+                                              ctypes.byref(cptr)))
         return ctypes2buffer(cptr, length.value)
 
     def load_model(self, fname):
@@ -559,59 +665,67 @@ class Booster(object):
             Input file name or memory buffer(see also save_raw)
         """
         if isinstance(fname, str):  # assume file name
-            xglib.XGBoosterLoadModel(self.handle, c_str(fname))
+            _LIB.XGBoosterLoadModel(self.handle, c_str(fname))
         else:
             buf = fname
             length = ctypes.c_ulong(len(buf))
             ptr = (ctypes.c_char * len(buf)).from_buffer(buf)
-            xglib.XGBoosterLoadModelFromBuffer(self.handle, ptr, length)
+            _check_call(_LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length))
 
-    def dump_model(self, fo, fmap='', with_stats=False):
+    def dump_model(self, fout, fmap='', with_stats=False):
         """
         Dump model into a text file.
 
         Parameters
         ----------
-        fo : string
+        foout : string
             Output file name.
         fmap : string, optional
             Name of the file containing feature map names.
         with_stats : bool (optional)
             Controls whether the split statistics are output.
         """
-        if isinstance(fo, string_types):
-            fo = open(fo, 'w')
+        if isinstance(fout, STRING_TYPES):
+            fout = open(fout, 'w')
             need_close = True
         else:
             need_close = False
         ret = self.get_dump(fmap, with_stats)
         for i in range(len(ret)):
-            fo.write('booster[{}]:\n'.format(i))
-            fo.write(ret[i])
+            fout.write('booster[{}]:\n'.format(i))
+            fout.write(ret[i])
         if need_close:
-            fo.close()
+            fout.close()
 
     def get_dump(self, fmap='', with_stats=False):
         """
         Returns the dump the model as a list of strings.
         """
         length = ctypes.c_ulong()
-        sarr = xglib.XGBoosterDumpModel(self.handle, c_str(fmap),
-                                        int(with_stats), ctypes.byref(length))
+        sarr = ctypes.POINTER(ctypes.c_char_p)()
+        _check_call(_LIB.XGBoosterDumpModel(self.handle,
+                                            c_str(fmap),
+                                            int(with_stats),
+                                            ctypes.byref(length),
+                                            ctypes.byref(sarr)))
         res = []
         for i in range(length.value):
             res.append(str(sarr[i].decode('ascii')))
         return res
 
     def get_fscore(self, fmap=''):
-        """
-        Get feature importance of each feature.
+        """Get feature importance of each feature.
+
+        Parameters
+        ----------
+        fmap: str (optional)
+           The name of feature map file
         """
         trees = self.get_dump(fmap)
         fmap = {}
         for tree in trees:
-            for l in tree.split('\n'):
-                arr = l.split('[')
+            for line in tree.split('\n'):
+                arr = line.split('[')
                 if len(arr) == 1:
                     continue
                 fid = arr[1].split(']')[0]
@@ -624,9 +738,9 @@ class Booster(object):
 
 
 def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
-    early_stopping_rounds=None,evals_result=None):
-    """
-    Train a booster with given parameters.
+          early_stopping_rounds=None, evals_result=None):
+    # pylint: disable=too-many-statements,too-many-branches, attribute-defined-outside-init
+    """Train a booster with given parameters.
 
     Parameters
     ----------
@@ -663,7 +777,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
     bst = Booster(params, [dtrain] + [d[0] for d in evals])
 
     if evals_result is not None:
-        if type(evals_result) is not dict:
+        if isinstance(evals_result, dict):
             raise TypeError('evals_result has to be a dictionary')
         else:
             evals_name = [d[1] for d in evals]
@@ -675,37 +789,38 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
             bst.update(dtrain, i, obj)
             if len(evals) != 0:
                 bst_eval_set = bst.eval_set(evals, i, feval)
-                if isinstance(bst_eval_set, string_types):
+                if isinstance(bst_eval_set, STRING_TYPES):
                     msg = bst_eval_set
                 else:
                     msg = bst_eval_set.decode()
 
                 sys.stderr.write(msg + '\n')
                 if evals_result is not None:
-                    res = re.findall(":([0-9.]+).",msg)
-                    for key,val in zip(evals_name,res):
+                    res = re.findall(":([0-9.]+).", msg)
+                    for key, val in zip(evals_name, res):
                         evals_result[key].append(val)
         return bst
 
     else:
         # early stopping
-
         if len(evals) < 1:
             raise ValueError('For early stopping you need at least one set in evals.')
 
-        sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(evals[-1][1], early_stopping_rounds))
+        sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(\
+                evals[-1][1], early_stopping_rounds))
 
         # is params a list of tuples? are we using multiple eval metrics?
-        if type(params) == list:
+        if isinstance(params, list):
             if len(params) != len(dict(params).items()):
-                raise ValueError('Check your params. Early stopping works with single eval metric only.')
+                raise ValueError('Check your params.'\
+                                     'Early stopping works with single eval metric only.')
             params = dict(params)
 
         # either minimize loss or maximize AUC/MAP/NDCG
         maximize_score = False
         if 'eval_metric' in params:
             maximize_metrics = ('auc', 'map', 'ndcg')
-            if list(filter(lambda x: params['eval_metric'].startswith(x), maximize_metrics)):
+            if any(params['eval_metric'].startswith(x) for x in maximize_metrics):
                 maximize_score = True
 
         if maximize_score:
@@ -720,7 +835,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
             bst.update(dtrain, i, obj)
             bst_eval_set = bst.eval_set(evals, i, feval)
 
-            if isinstance(bst_eval_set, string_types):
+            if isinstance(bst_eval_set, STRING_TYPES):
                 msg = bst_eval_set
             else:
                 msg = bst_eval_set.decode()
@@ -728,8 +843,8 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
             sys.stderr.write(msg + '\n')
 
             if evals_result is not None:
-                res = re.findall(":([0-9.]+).",msg)
-                for key,val in zip(evals_name,res):
+                res = re.findall(":([0-9.]+).", msg)
+                for key, val in zip(evals_name, res):
                     evals_result[key].append(val)
 
             score = float(msg.rsplit(':', 1)[1])
@@ -748,17 +863,21 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
         return bst
 
 class CVPack(object):
+    """"Auxiliary datastruct to hold one fold of CV."""
     def __init__(self, dtrain, dtest, param):
+        """"Initialize the CVPack"""
         self.dtrain = dtrain
         self.dtest = dtest
         self.watchlist = [(dtrain, 'train'), (dtest, 'test')]
         self.bst = Booster(param, [dtrain, dtest])
 
-    def update(self, r, fobj):
-        self.bst.update(self.dtrain, r, fobj)
+    def update(self, iteration, fobj):
+        """"Update the boosters for one iteration"""
+        self.bst.update(self.dtrain, iteration, fobj)
 
-    def eval(self, r, feval):
-        return self.bst.eval_set(self.watchlist, r, feval)
+    def eval(self, iteration, feval):
+        """"Evaluate the CVPack for one iteration."""
+        return self.bst.eval_set(self.watchlist, iteration, feval)
 
 
 def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None):
@@ -785,6 +904,7 @@ def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None):
 
 
 def aggcv(rlist, show_stdv=True):
+    # pylint: disable=invalid-name
     """
     Aggregate cross-validation results.
     """
@@ -794,7 +914,7 @@ def aggcv(rlist, show_stdv=True):
         arr = line.split()
         assert ret == arr[0]
         for it in arr[1:]:
-            if not isinstance(it, string_types):
+            if not isinstance(it, STRING_TYPES):
                 it = it.decode()
             k, v = it.split(':')
             if k not in cvmap:
@@ -802,7 +922,7 @@ def aggcv(rlist, show_stdv=True):
             cvmap[k].append(float(v))
     for k, v in sorted(cvmap.items(), key=lambda x: x[0]):
         v = np.array(v)
-        if not isinstance(ret, string_types):
+        if not isinstance(ret, STRING_TYPES):
             ret = ret.decode()
         if show_stdv:
             ret += '\tcv-%s:%f+%f' % (k, np.mean(v), np.std(v))
@@ -813,8 +933,8 @@ def aggcv(rlist, show_stdv=True):
 
 def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
        obj=None, feval=None, fpreproc=None, show_stdv=True, seed=0):
-    """
-    Cross-validation with given paramaters.
+    # pylint: disable = invalid-name
+    """Cross-validation with given paramaters.
 
     Parameters
     ----------
@@ -847,8 +967,8 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
     results = []
     cvfolds = mknfold(dtrain, nfold, params, seed, metrics, fpreproc)
     for i in range(num_boost_round):
-        for f in cvfolds:
-            f.update(i, obj)
+        for fold in cvfolds:
+            fold.update(i, obj)
         res = aggcv([f.eval(i, feval) for f in cvfolds], show_stdv)
         sys.stderr.write(res + '\n')
         results.append(res)
@@ -857,16 +977,16 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
 
 # used for compatiblity without sklearn
 XGBModelBase = object
-XGBClassifier = object
-XGBRegressor = object
+XGBClassifierBase = object
+XGBRegressorBase = object
 if SKLEARN_INSTALLED:
     XGBModelBase = BaseEstimator
-    XGBRegressor = RegressorMixin
-    XGBClassifier = ClassifierMixin
+    XGBRegressorBase = RegressorMixin
+    XGBClassifierBase = ClassifierMixin
 
 class XGBModel(XGBModelBase):
-    """
-    Implementation of the Scikit-Learn API for XGBoost.
+    # pylint: disable=too-many-arguments, too-many-instance-attributes, invalid-name
+    """Implementation of the Scikit-Learn API for XGBoost.
 
     Parameters
     ----------
@@ -902,8 +1022,10 @@ class XGBModel(XGBModelBase):
         Value in the data which needs to be present as a missing value. If
         None, defaults to np.nan.
     """
-    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear",
-                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1,
+    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100,
+                 silent=True, objective="reg:linear",
+                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0,
+                 subsample=1, colsample_bytree=1,
                  base_score=0.5, seed=0, missing=None):
         if not SKLEARN_INSTALLED:
             raise XGBoostError('sklearn needs to be installed in order to use this module')
@@ -923,7 +1045,6 @@ class XGBModel(XGBModelBase):
         self.base_score = base_score
         self.seed = seed
         self.missing = missing if missing is not None else np.nan
-
         self._Booster = None
 
     def __setstate__(self, state):
@@ -936,9 +1057,9 @@ class XGBModel(XGBModelBase):
         self.__dict__.update(state)
 
     def booster(self):
-        """
-        get the underlying xgboost Booster of this model
-        will raise an exception when fit was not called
+        """Get the underlying xgboost Booster of this model.
+
+        This will raise an exception when fit was not called
 
         Returns
         -------
@@ -949,12 +1070,14 @@ class XGBModel(XGBModelBase):
         return self._Booster
 
     def get_params(self, deep=False):
+        """Get parameter.s"""
         params = super(XGBModel, self).get_params(deep=deep)
         if params['missing'] is np.nan:
             params['missing'] = None  # sklearn doesn't handle nan. see #4725
         return params
 
     def get_xgb_params(self):
+        """Get xgboost type parameters."""
         xgb_params = self.get_params()
 
         xgb_params['silent'] = 1 if self.silent else 0
@@ -963,30 +1086,39 @@ class XGBModel(XGBModelBase):
             xgb_params.pop('nthread', None)
         return xgb_params
 
-    def fit(self, X, y):
-        trainDmatrix = DMatrix(X, label=y, missing=self.missing)
-        self._Booster = train(self.get_xgb_params(), trainDmatrix, self.n_estimators)
+    def fit(self, data, y):
+        # pylint: disable=missing-docstring,invalid-name
+        train_dmatrix = DMatrix(data, label=y, missing=self.missing)
+        self._Booster = train(self.get_xgb_params(), train_dmatrix, self.n_estimators)
         return self
 
-    def predict(self, X):
-        testDmatrix = DMatrix(X, missing=self.missing)
-        return self.booster().predict(testDmatrix)
+    def predict(self, data):
+        # pylint: disable=missing-docstring,invalid-name
+        test_dmatrix = DMatrix(data, missing=self.missing)
+        return self.booster().predict(test_dmatrix)
 
 
-class XGBClassifier(XGBModel, XGBClassifier):
+class XGBClassifier(XGBModel, XGBClassifierBase):
+    # pylint: disable=missing-docstring,too-many-arguments,invalid-name
     __doc__ = """
     Implementation of the scikit-learn API for XGBoost classification
     """ + "\n".join(XGBModel.__doc__.split('\n')[2:])
 
-    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic",
-                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1,
+    def __init__(self, max_depth=3, learning_rate=0.1,
+                 n_estimators=100, silent=True,
+                 objective="binary:logistic",
+                 nthread=-1, gamma=0, min_child_weight=1,
+                 max_delta_step=0, subsample=1, colsample_bytree=1,
                  base_score=0.5, seed=0, missing=None):
-        super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective,
-                                            nthread, gamma, min_child_weight, max_delta_step, subsample,
+        super(XGBClassifier, self).__init__(max_depth, learning_rate,
+                                            n_estimators, silent, objective,
+                                            nthread, gamma, min_child_weight,
+                                            max_delta_step, subsample,
                                             colsample_bytree,
                                             base_score, seed, missing)
 
     def fit(self, X, y, sample_weight=None):
+        # pylint: disable = attribute-defined-outside-init,arguments-differ
         self.classes_ = list(np.unique(y))
         self.n_classes_ = len(self.classes_)
         if self.n_classes_ > 2:
@@ -1001,29 +1133,29 @@ class XGBClassifier(XGBModel, XGBClassifier):
         training_labels = self._le.transform(y)
 
         if sample_weight is not None:
-            trainDmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
-                                   missing=self.missing)
+            train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
+                                    missing=self.missing)
         else:
-            trainDmatrix = DMatrix(X, label=training_labels,
-                                   missing=self.missing)
+            train_dmatrix = DMatrix(X, label=training_labels,
+                                    missing=self.missing)
 
-        self._Booster = train(xgb_options, trainDmatrix, self.n_estimators)
+        self._Booster = train(xgb_options, train_dmatrix, self.n_estimators)
 
         return self
 
-    def predict(self, X):
-        testDmatrix = DMatrix(X, missing=self.missing)
-        class_probs = self.booster().predict(testDmatrix)
+    def predict(self, data):
+        test_dmatrix = DMatrix(data, missing=self.missing)
+        class_probs = self.booster().predict(test_dmatrix)
         if len(class_probs.shape) > 1:
             column_indexes = np.argmax(class_probs, axis=1)
         else:
-            column_indexes = np.repeat(0, X.shape[0])
+            column_indexes = np.repeat(0, data.shape[0])
             column_indexes[class_probs > 0.5] = 1
         return self._le.inverse_transform(column_indexes)
 
-    def predict_proba(self, X):
-        testDmatrix = DMatrix(X, missing=self.missing)
-        class_probs = self.booster().predict(testDmatrix)
+    def predict_proba(self, data):
+        test_dmatrix = DMatrix(data, missing=self.missing)
+        class_probs = self.booster().predict(test_dmatrix)
         if self.objective == "multi:softprob":
             return class_probs
         else:
@@ -1031,9 +1163,8 @@ class XGBClassifier(XGBModel, XGBClassifier):
             classzero_probs = 1.0 - classone_probs
             return np.vstack((classzero_probs, classone_probs)).transpose()
 
-class XGBRegressor(XGBModel, XGBRegressor):
+class XGBRegressor(XGBModel, XGBRegressorBase):
+    # pylint: disable=missing-docstring
     __doc__ = """
     Implementation of the scikit-learn API for XGBoost regression
     """ + "\n".join(XGBModel.__doc__.split('\n')[2:])
-
-    pass
diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp
index 4d7828faf..18c1eae49 100644
--- a/wrapper/xgboost_wrapper.cpp
+++ b/wrapper/xgboost_wrapper.cpp
@@ -1,3 +1,4 @@
+// Copyright (c) 2014 by Contributors
 // implementations in ctypes
 #define _CRT_SECURE_NO_WARNINGS
 #define _CRT_SECURE_NO_DEPRECATE
@@ -7,6 +8,7 @@
 #include <cstring>
 #include <cmath>
 #include <algorithm>
+#include <exception>
 // include all std functions
 using namespace std;
 #include "./xgboost_wrapper.h"
@@ -31,9 +33,11 @@ class Booster: public learner::BoostLearner {
     this->init_model = false;
     this->SetCacheData(mats);
   }
-  inline const float *Pred(const DataMatrix &dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) {
+  inline const float *Pred(const DataMatrix &dmat, int option_mask,
+                           unsigned ntree_limit, bst_ulong *len) {
     this->CheckInitModel();
-    this->Predict(dmat, (option_mask&1) != 0, &this->preds_, ntree_limit, (option_mask&2) != 0);
+    this->Predict(dmat, (option_mask&1) != 0, &this->preds_,
+                  ntree_limit, (option_mask&2) != 0);
     *len = static_cast<bst_ulong>(this->preds_.size());
     return BeginPtr(this->preds_);
   }
@@ -57,9 +61,9 @@ class Booster: public learner::BoostLearner {
     this->init_model = true;
   }
   inline void LoadModelFromBuffer(const void *buf, size_t size) {
-    utils::MemoryFixSizeBuffer fs((void*)buf, size);
+    utils::MemoryFixSizeBuffer fs((void*)buf, size);  // NOLINT(*)
     learner::BoostLearner::LoadModel(fs, true);
-    this->init_model = true;    
+    this->init_model = true;
   }
   inline const char *GetModelRaw(bst_ulong *out_len) {
     this->CheckInitModel();
@@ -94,251 +98,459 @@ class Booster: public learner::BoostLearner {
  private:
   bool init_model;
 };
+
+// helper to support threadlocal
+struct ThreadLocalStore {
+  std::vector<std::string*> data;
+  // allocate a string
+  inline std::string *Alloc() {
+    mutex.Lock();
+    data.push_back(new std::string());
+    std::string *ret = data.back();
+    mutex.Unlock();
+    return ret;
+  }
+  ThreadLocalStore() {
+    mutex.Init();
+  }
+  ~ThreadLocalStore() {
+    for (size_t i = 0; i < data.size(); ++i) {
+      delete data[i];
+    }
+    mutex.Destroy();
+  }
+  utils::Mutex mutex;
+};
+
+static ThreadLocalStore thread_local_store;
 }  // namespace wrapper
 }  // namespace xgboost
 
 using namespace xgboost::wrapper;
 
-extern "C"{
-  void* XGDMatrixCreateFromFile(const char *fname, int silent) {
-    return LoadDataMatrix(fname, silent != 0, false, false);
+/*! \brief  macro to guard beginning and end section of all functions */
+#define API_BEGIN() try {
+/*!
+ * \brief every function starts with API_BEGIN(); and finishes with API_END();
+ * \param Finalize optionally put in a finalizer
+ */
+#define API_END(Finalize) } catch(std::exception &e) {  \
+    Finalize; return XGBHandleException(e);             \
+  } return 0;
+
+// do not use threadlocal on OSX since it is not always available
+#ifndef DISABLE_THREAD_LOCAL
+#ifdef __GNUC__
+  #define XGB_TREAD_LOCAL __thread
+#elif __STDC_VERSION__ >= 201112L
+  #define XGB_TREAD_LOCAL _Thread_local
+#elif defined(_MSC_VER)
+  #define XGB_TREAD_LOCAL __declspec(thread)
+#endif
+#endif
+
+#ifndef XGB_TREAD_LOCAL
+#pragma message("Warning: Threadlocal not enabled, used single thread error handling")
+#define XGB_TREAD_LOCAL
+#endif
+
+/*!
+ * \brief a helper function for error handling
+ *  will set the last error to be str_set when it is not NULL
+ * \param str_set the error to set
+ * \return a pointer message to last error
+ */
+const char *XGBSetGetLastError_(const char *str_set) {
+  // use last_error to record last error
+  static XGB_TREAD_LOCAL std::string *last_error = NULL;
+  if (last_error == NULL) {
+    last_error = thread_local_store.Alloc();
   }
-  void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
-                               const unsigned *indices,
-                               const float *data,
-                               bst_ulong nindptr,
-                               bst_ulong nelem) {
-    DMatrixSimple *p_mat = new DMatrixSimple();
-    DMatrixSimple &mat = *p_mat;
-    mat.row_ptr_.resize(nindptr);
-    for (bst_ulong i = 0; i < nindptr; ++i) {
-      mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
-    }
-    mat.row_data_.resize(nelem);
-    for (bst_ulong i = 0; i < nelem; ++i) {
-      mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]);
-      mat.info.info.num_col = std::max(mat.info.info.num_col,
-                                       static_cast<size_t>(indices[i]+1));
-    }
-    mat.info.info.num_row = nindptr - 1;
-    return p_mat;
+  if (str_set != NULL) {
+    *last_error = str_set;
   }
-  XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
-                                       const unsigned *indices,
-                                       const float *data,
-                                       bst_ulong nindptr,
-                                       bst_ulong nelem) {
-    int nthread;
-    #pragma omp parallel
-    {
-      nthread = omp_get_num_threads();
-    }
-    
-    DMatrixSimple *p_mat = new DMatrixSimple();
-    DMatrixSimple &mat = *p_mat;
-    utils::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
-    builder.InitBudget(0, nthread);
-    long ncol = static_cast<long>(nindptr - 1);
-    #pragma omp parallel for schedule(static)
-    for (long i = 0; i < ncol; ++i) {
-      int tid = omp_get_thread_num();
-      for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
-        builder.AddBudget(indices[j], tid);
-      }
-    }
-    builder.InitStorage();
-    #pragma omp parallel for schedule(static)
-    for (long i = 0; i < ncol; ++i) {
-      int tid = omp_get_thread_num();
-      for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
-        builder.Push(indices[j],
-                     RowBatch::Entry(static_cast<bst_uint>(i), data[j]),
-                     tid);
-      }
-    }
-    mat.info.info.num_row = mat.row_ptr_.size() - 1;
-    mat.info.info.num_col = static_cast<size_t>(ncol);
-    return p_mat;
+  return last_error->c_str();
+}
+
+/*! \brief return str message of the last error */
+const char *XGBGetLastError() {
+  return XGBSetGetLastError_(NULL);
+}
+
+/*!
+ * \brief handle exception throwed out
+ * \param e the exception
+ * \return the return value of API after exception is handled
+ */
+int XGBHandleException(const std::exception &e) {
+  XGBSetGetLastError_(e.what());
+  return -1;
+}
+
+int XGDMatrixCreateFromFile(const char *fname,
+                            int silent,
+                            DMatrixHandle *out) {
+  API_BEGIN();
+  *out = LoadDataMatrix(fname, silent != 0, false, false);
+  API_END();
+}
+
+int XGDMatrixCreateFromCSR(const bst_ulong *indptr,
+                           const unsigned *indices,
+                           const float *data,
+                           bst_ulong nindptr,
+                           bst_ulong nelem,
+                           DMatrixHandle *out) {
+  DMatrixSimple *p_mat = NULL;
+  API_BEGIN();
+  p_mat = new DMatrixSimple();
+  DMatrixSimple &mat = *p_mat;
+  mat.row_ptr_.resize(nindptr);
+  for (bst_ulong i = 0; i < nindptr; ++i) {
+    mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
   }
-  void* XGDMatrixCreateFromMat(const float *data,
-                               bst_ulong nrow,
-                               bst_ulong ncol,
-                               float  missing) {    
-    bool nan_missing = utils::CheckNAN(missing);
-    DMatrixSimple *p_mat = new DMatrixSimple();
-    DMatrixSimple &mat = *p_mat;
-    mat.info.info.num_row = nrow;
-    mat.info.info.num_col = ncol;
-    for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
-      bst_ulong nelem = 0;
-      for (bst_ulong j = 0; j < ncol; ++j) {
-        if (utils::CheckNAN(data[j])) {
-          utils::Check(nan_missing,
-                       "There are NAN in the matrix, however, you did not set missing=NAN"); 
-        } else {
-          if (nan_missing || data[j] != missing) {
-            mat.row_data_.push_back(RowBatch::Entry(j, data[j]));
-            ++nelem;
-          }
+  mat.row_data_.resize(nelem);
+  for (bst_ulong i = 0; i < nelem; ++i) {
+    mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]);
+    mat.info.info.num_col = std::max(mat.info.info.num_col,
+                                     static_cast<size_t>(indices[i]+1));
+  }
+  mat.info.info.num_row = nindptr - 1;
+  *out = p_mat;
+  API_END(delete p_mat);
+}
+
+int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
+                           const unsigned *indices,
+                           const float *data,
+                           bst_ulong nindptr,
+                           bst_ulong nelem,
+                           DMatrixHandle *out) {
+  DMatrixSimple *p_mat = NULL;
+  API_BEGIN();
+  int nthread;
+  #pragma omp parallel
+  {
+    nthread = omp_get_num_threads();
+  }
+  p_mat = new DMatrixSimple();
+  DMatrixSimple &mat = *p_mat;
+  utils::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
+  builder.InitBudget(0, nthread);
+  long ncol = static_cast<long>(nindptr - 1);  // NOLINT(*)
+  #pragma omp parallel for schedule(static)
+  for (long i = 0; i < ncol; ++i) {  // NOLINT(*)
+    int tid = omp_get_thread_num();
+    for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
+      builder.AddBudget(indices[j], tid);
+    }
+  }
+  builder.InitStorage();
+  #pragma omp parallel for schedule(static)
+  for (long i = 0; i < ncol; ++i) {  // NOLINT(*)
+    int tid = omp_get_thread_num();
+    for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
+      builder.Push(indices[j],
+                   RowBatch::Entry(static_cast<bst_uint>(i), data[j]),
+                   tid);
+    }
+  }
+  mat.info.info.num_row = mat.row_ptr_.size() - 1;
+  mat.info.info.num_col = static_cast<size_t>(ncol);
+  *out = p_mat;
+  API_END(delete p_mat);
+}
+
+int XGDMatrixCreateFromMat(const float *data,
+                           bst_ulong nrow,
+                           bst_ulong ncol,
+                           float  missing,
+                           DMatrixHandle *out) {
+  DMatrixSimple *p_mat = NULL;
+  API_BEGIN();
+  p_mat = new DMatrixSimple();
+  bool nan_missing = utils::CheckNAN(missing);
+  DMatrixSimple &mat = *p_mat;
+  mat.info.info.num_row = nrow;
+  mat.info.info.num_col = ncol;
+  for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
+    bst_ulong nelem = 0;
+    for (bst_ulong j = 0; j < ncol; ++j) {
+      if (utils::CheckNAN(data[j])) {
+        utils::Check(nan_missing,
+                     "There are NAN in the matrix, however, you did not set missing=NAN");
+      } else {
+        if (nan_missing || data[j] != missing) {
+          mat.row_data_.push_back(RowBatch::Entry(j, data[j]));
+          ++nelem;
         }
       }
-      mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
     }
-    return p_mat;
-  }
-  void* XGDMatrixSliceDMatrix(void *handle,
-                              const int *idxset,
-                              bst_ulong len) {
-    DMatrixSimple tmp;
-    DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
-    if (dsrc.magic != DMatrixSimple::kMagic) {
-      tmp.CopyFrom(dsrc);
-    }
-    DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
-                       *static_cast<DMatrixSimple*>(handle): tmp);
-    DMatrixSimple *p_ret = new DMatrixSimple();
-    DMatrixSimple &ret = *p_ret;
-
-    utils::Check(src.info.group_ptr.size() == 0,
-                 "slice does not support group structure");
-    ret.Clear();
-    ret.info.info.num_row = len;
-    ret.info.info.num_col = src.info.num_col();
-
-    utils::IIterator<RowBatch> *iter = src.fmat()->RowIterator();
-    iter->BeforeFirst();
-    utils::Assert(iter->Next(), "slice");
-    const RowBatch &batch = iter->Value();
-    for (bst_ulong i = 0; i < len; ++i) {
-      const int ridx = idxset[i];
-      RowBatch::Inst inst = batch[ridx];
-      utils::Check(static_cast<bst_ulong>(ridx) < batch.size, "slice index exceed number of rows");
-      ret.row_data_.resize(ret.row_data_.size() + inst.length);
-      memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
-             sizeof(RowBatch::Entry) * inst.length);
-      ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
-      if (src.info.labels.size() != 0) {
-        ret.info.labels.push_back(src.info.labels[ridx]);
-      }
-      if (src.info.weights.size() != 0) {
-        ret.info.weights.push_back(src.info.weights[ridx]);
-      }
-      if (src.info.info.root_index.size() != 0) {
-        ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
-      }
-      if (src.info.info.fold_index.size() != 0) {
-        ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]);
-      }
-    }
-    return p_ret;
-  }
-  void XGDMatrixFree(void *handle) {
-    delete static_cast<DataMatrix*>(handle);
-  }
-  void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
-    SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent != 0);
-  }
-  void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) {
-    std::vector<float> &vec = 
-        static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
-    vec.resize(len);
-    memcpy(BeginPtr(vec), info, sizeof(float) * len);
-  }
-  void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
-    std::vector<unsigned> &vec =
-        static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
-    vec.resize(len);
-    memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
-  }
-  void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
-    DataMatrix *pmat = static_cast<DataMatrix*>(handle);
-    pmat->info.group_ptr.resize(len + 1);
-    pmat->info.group_ptr[0] = 0;
-    for (uint64_t i = 0; i < len; ++i) {
-      pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i];
-    }
-  }
-  const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) {
-    const std::vector<float> &vec =
-        static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
-    *len = static_cast<bst_ulong>(vec.size());
-    return BeginPtr(vec);
-  }
-  const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
-    const std::vector<unsigned> &vec =
-        static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
-    *len = static_cast<bst_ulong>(vec.size());
-    return BeginPtr(vec);
-  }
-  bst_ulong XGDMatrixNumRow(const void *handle) {
-    return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
-  }
-
-  // xgboost implementation
-  void *XGBoosterCreate(void *dmats[], bst_ulong len) {
-    std::vector<DataMatrix*> mats;
-    for (bst_ulong i = 0; i < len; ++i) {
-      DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
-      mats.push_back(dtr);
-    }
-    return new Booster(mats);
-  }
-  void XGBoosterFree(void *handle) {
-    delete static_cast<Booster*>(handle);
-  }
-  void XGBoosterSetParam(void *handle, const char *name, const char *value) {
-    static_cast<Booster*>(handle)->SetParam(name, value);
-  }
-  void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
-    Booster *bst = static_cast<Booster*>(handle);
-    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
-    bst->CheckInitModel();
-    bst->CheckInit(dtr);
-    bst->UpdateOneIter(iter, *dtr);
-  }
-  void XGBoosterBoostOneIter(void *handle, void *dtrain,
-                             float *grad, float *hess, bst_ulong len) {
-    Booster *bst = static_cast<Booster*>(handle);
-    DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
-    bst->CheckInitModel();
-    bst->CheckInit(dtr);
-    bst->BoostOneIter(*dtr, grad, hess, len);
-  }
-  const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
-                                   const char *evnames[], bst_ulong len) {
-    Booster *bst = static_cast<Booster*>(handle);
-    std::vector<std::string> names;
-    std::vector<const DataMatrix*> mats;
-    for (bst_ulong i = 0; i < len; ++i) {
-      mats.push_back(static_cast<DataMatrix*>(dmats[i]));
-      names.push_back(std::string(evnames[i]));
-    }
-    bst->CheckInitModel();
-    bst->eval_str = bst->EvalOneIter(iter, mats, names);
-    return bst->eval_str.c_str();
-  }
-  const float *XGBoosterPredict(void *handle, void *dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) {
-    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), option_mask, ntree_limit, len);
-  }
-  void XGBoosterLoadModel(void *handle, const char *fname) {
-    static_cast<Booster*>(handle)->LoadModel(fname);
-  }
-  void XGBoosterSaveModel(void *handle, const char *fname) {
-    Booster *bst = static_cast<Booster*>(handle);
-    bst->CheckInitModel();
-    bst->SaveModel(fname, false);
-  }
-  void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len) {
-    static_cast<Booster*>(handle)->LoadModelFromBuffer(buf, len);
-  }
-  const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len) {
-    return static_cast<Booster*>(handle)->GetModelRaw(out_len);
-  }
-  const char** XGBoosterDumpModel(void *handle, const char *fmap, int with_stats, bst_ulong *len){
-    utils::FeatMap featmap;
-    if (strlen(fmap) != 0) {
-      featmap.LoadText(fmap);
-    }
-    return static_cast<Booster*>(handle)->GetModelDump(featmap, with_stats != 0, len);
+    mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
   }
+  *out = p_mat;
+  API_END(delete p_mat);
+}
+
+int XGDMatrixSliceDMatrix(DMatrixHandle handle,
+                          const int *idxset,
+                          bst_ulong len,
+                          DMatrixHandle *out) {
+  DMatrixSimple *p_ret = NULL;
+  API_BEGIN();
+  DMatrixSimple tmp;
+  DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
+  if (dsrc.magic != DMatrixSimple::kMagic) {
+    tmp.CopyFrom(dsrc);
+  }
+  DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
+                     *static_cast<DMatrixSimple*>(handle): tmp);
+  p_ret = new DMatrixSimple();
+  DMatrixSimple &ret = *p_ret;
+
+  utils::Check(src.info.group_ptr.size() == 0,
+               "slice does not support group structure");
+  ret.Clear();
+  ret.info.info.num_row = len;
+  ret.info.info.num_col = src.info.num_col();
+
+  utils::IIterator<RowBatch> *iter = src.fmat()->RowIterator();
+  iter->BeforeFirst();
+  utils::Assert(iter->Next(), "slice");
+  const RowBatch &batch = iter->Value();
+  for (bst_ulong i = 0; i < len; ++i) {
+    const int ridx = idxset[i];
+    RowBatch::Inst inst = batch[ridx];
+    utils::Check(static_cast<bst_ulong>(ridx) < batch.size, "slice index exceed number of rows");
+    ret.row_data_.resize(ret.row_data_.size() + inst.length);
+    memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
+           sizeof(RowBatch::Entry) * inst.length);
+    ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
+    if (src.info.labels.size() != 0) {
+      ret.info.labels.push_back(src.info.labels[ridx]);
+    }
+    if (src.info.weights.size() != 0) {
+      ret.info.weights.push_back(src.info.weights[ridx]);
+    }
+    if (src.info.info.root_index.size() != 0) {
+      ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
+    }
+    if (src.info.info.fold_index.size() != 0) {
+      ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]);
+    }
+  }
+  *out = p_ret;
+  API_END(delete p_ret);
+}
+
+int XGDMatrixFree(DMatrixHandle handle) {
+  API_BEGIN();
+  delete static_cast<DataMatrix*>(handle);
+  API_END();
+}
+
+int XGDMatrixSaveBinary(DMatrixHandle handle,
+                        const char *fname,
+                        int silent) {
+  API_BEGIN();
+  SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent != 0);
+  API_END();
+}
+
+int XGDMatrixSetFloatInfo(DMatrixHandle handle,
+                          const char *field,
+                          const float *info,
+                          bst_ulong len) {
+  API_BEGIN();
+  std::vector<float> &vec =
+      static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
+  vec.resize(len);
+  memcpy(BeginPtr(vec), info, sizeof(float) * len);
+  API_END();
+}
+
+int XGDMatrixSetUIntInfo(DMatrixHandle handle,
+                         const char *field,
+                         const unsigned *info,
+                         bst_ulong len) {
+  API_BEGIN();
+  std::vector<unsigned> &vec =
+      static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
+  vec.resize(len);
+  memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
+  API_END();
+}
+
+int XGDMatrixSetGroup(DMatrixHandle handle,
+                      const unsigned *group,
+                      bst_ulong len) {
+  API_BEGIN();
+  DataMatrix *pmat = static_cast<DataMatrix*>(handle);
+  pmat->info.group_ptr.resize(len + 1);
+  pmat->info.group_ptr[0] = 0;
+  for (uint64_t i = 0; i < len; ++i) {
+    pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i];
+  }
+  API_END();
+}
+
+int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
+                          const char *field,
+                          bst_ulong *out_len,
+                          const float **out_dptr) {
+  API_BEGIN();
+  const std::vector<float> &vec =
+      static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
+  *out_len = static_cast<bst_ulong>(vec.size());
+  *out_dptr = BeginPtr(vec);
+  API_END();
+}
+
+int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
+                         const char *field,
+                         bst_ulong *out_len,
+                         const unsigned **out_dptr) {
+  API_BEGIN();
+  const std::vector<unsigned> &vec =
+      static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
+  *out_len = static_cast<bst_ulong>(vec.size());
+  *out_dptr = BeginPtr(vec);
+  API_END();
+}
+int XGDMatrixNumRow(const DMatrixHandle handle,
+                    bst_ulong *out) {
+  API_BEGIN();
+  *out = static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
+  API_END();
+}
+
+// xgboost implementation
+int XGBoosterCreate(DMatrixHandle dmats[],
+                    bst_ulong len,
+                    BoosterHandle *out) {
+  API_BEGIN();
+  std::vector<DataMatrix*> mats;
+  for (bst_ulong i = 0; i < len; ++i) {
+    DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
+    mats.push_back(dtr);
+  }
+  *out = new Booster(mats);
+  API_END();
+}
+
+int XGBoosterFree(BoosterHandle handle) {
+  API_BEGIN();
+  delete static_cast<Booster*>(handle);
+  API_END();
+}
+
+int XGBoosterSetParam(BoosterHandle handle,
+                      const char *name, const char *value) {
+  API_BEGIN();
+  static_cast<Booster*>(handle)->SetParam(name, value);
+  API_END();
+}
+
+int XGBoosterUpdateOneIter(BoosterHandle handle,
+                           int iter,
+                           DMatrixHandle dtrain) {
+  API_BEGIN();
+  Booster *bst = static_cast<Booster*>(handle);
+  DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+  bst->CheckInitModel();
+  bst->CheckInit(dtr);
+  bst->UpdateOneIter(iter, *dtr);
+  API_END();
+}
+
+int XGBoosterBoostOneIter(BoosterHandle handle,
+                          DMatrixHandle dtrain,
+                          float *grad,
+                          float *hess,
+                          bst_ulong len) {
+  API_BEGIN();
+  Booster *bst = static_cast<Booster*>(handle);
+  DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+  bst->CheckInitModel();
+  bst->CheckInit(dtr);
+  bst->BoostOneIter(*dtr, grad, hess, len);
+  API_END();
+}
+
+int XGBoosterEvalOneIter(BoosterHandle handle,
+                         int iter,
+                         DMatrixHandle dmats[],
+                         const char *evnames[],
+                         bst_ulong len,
+                         const char **out_str) {
+  API_BEGIN();
+  Booster *bst = static_cast<Booster*>(handle);
+  std::vector<std::string> names;
+  std::vector<const DataMatrix*> mats;
+  for (bst_ulong i = 0; i < len; ++i) {
+    mats.push_back(static_cast<DataMatrix*>(dmats[i]));
+    names.push_back(std::string(evnames[i]));
+  }
+  bst->CheckInitModel();
+  bst->eval_str = bst->EvalOneIter(iter, mats, names);
+  *out_str = bst->eval_str.c_str();
+  API_END();
+}
+
+int XGBoosterPredict(BoosterHandle handle,
+                     DMatrixHandle dmat,
+                     int option_mask,
+                     unsigned ntree_limit,
+                     bst_ulong *len,
+                     const float **out_result) {
+  API_BEGIN();
+  *out_result = static_cast<Booster*>(handle)->
+      Pred(*static_cast<DataMatrix*>(dmat),
+           option_mask, ntree_limit, len);
+  API_END();
+}
+
+int XGBoosterLoadModel(BoosterHandle handle, const char *fname) {
+  API_BEGIN();
+  static_cast<Booster*>(handle)->LoadModel(fname);
+  API_END();
+}
+
+int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
+  API_BEGIN();
+  Booster *bst = static_cast<Booster*>(handle);
+  bst->CheckInitModel();
+  bst->SaveModel(fname, false);
+  API_END();
+}
+
+int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
+                                 const void *buf,
+                                 bst_ulong len) {
+  API_BEGIN();
+  static_cast<Booster*>(handle)->LoadModelFromBuffer(buf, len);
+  API_END();
+}
+
+int XGBoosterGetModelRaw(BoosterHandle handle,
+                         bst_ulong *out_len,
+                         const char **out_dptr) {
+  API_BEGIN();
+  *out_dptr = static_cast<Booster*>(handle)->GetModelRaw(out_len);
+  API_END();
+}
+
+int XGBoosterDumpModel(BoosterHandle handle,
+                       const char *fmap,
+                       int with_stats,
+                       bst_ulong *len,
+                       const char ***out_models) {
+  API_BEGIN();
+  utils::FeatMap featmap;
+  if (strlen(fmap) != 0) {
+    featmap.LoadText(fmap);
+  }
+  *out_models = static_cast<Booster*>(handle)->GetModelDump(
+      featmap, with_stats != 0, len);
+  API_END();
 }
diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h
index 88a327d0d..6d3a619fb 100644
--- a/wrapper/xgboost_wrapper.h
+++ b/wrapper/xgboost_wrapper.h
@@ -1,235 +1,327 @@
-#ifndef XGBOOST_WRAPPER_H_
-#define XGBOOST_WRAPPER_H_
 /*!
+ * Copyright (c) 2014 by Contributors
  * \file xgboost_wrapper.h
  * \author Tianqi Chen
  * \brief a C style wrapper of xgboost
  *  can be used to create wrapper of other languages
  */
-#if defined(_MSC_VER) || defined(_WIN32)
-#define XGB_DLL __declspec(dllexport)
-#else
-#define XGB_DLL
-#endif
-// manually define unsign long
-typedef unsigned long bst_ulong;
+#ifndef XGBOOST_WRAPPER_H_
+#define XGBOOST_WRAPPER_H_
 
 #ifdef __cplusplus
-extern "C" {
+#define XGB_EXTERN_C extern "C"
 #endif
-  /*!
-   * \brief load a data matrix 
-   * \param fname the name of the file
-   * \param silent whether print messages during loading
-   * \return a loaded data matrix
-   */
-  XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent);
-  /*!
-   * \brief create a matrix content from csr format
-   * \param indptr pointer to row headers
-   * \param indices findex
-   * \param data fvalue
-   * \param nindptr number of rows in the matix + 1 
-   * \param nelem number of nonzero elements in the matrix
-   * \return created dmatrix
-   */
-  XGB_DLL void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
-                                       const unsigned *indices,
-                                       const float *data,
-                                       bst_ulong nindptr,
-                                       bst_ulong nelem);
-  /*!
-   * \brief create a matrix content from CSC format
-   * \param col_ptr pointer to col headers
-   * \param indices findex
-   * \param data fvalue
-   * \param nindptr number of rows in the matix + 1
-   * \param nelem number of nonzero elements in the matrix
-   * \return created dmatrix
-   */
-  XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
-                                       const unsigned *indices,
-                                       const float *data,
-                                       bst_ulong nindptr,
-                                       bst_ulong nelem);  
-  /*!
-   * \brief create matrix content from dense matrix
-   * \param data pointer to the data space
-   * \param nrow number of rows
-   * \param ncol number columns
-   * \param missing which value to represent missing value
-   * \return created dmatrix
-   */
-  XGB_DLL void* XGDMatrixCreateFromMat(const float *data,
-                                       bst_ulong nrow,
-                                       bst_ulong ncol,
-                                       float  missing);
-  /*!
-   * \brief create a new dmatrix from sliced content of existing matrix
-   * \param handle instance of data matrix to be sliced
-   * \param idxset index set
-   * \param len length of index set
-   * \return a sliced new matrix
-   */
-  XGB_DLL void* XGDMatrixSliceDMatrix(void *handle,
-                                      const int *idxset,
-                                      bst_ulong len);
-  /*!
-   * \brief free space in data matrix
-   */
-  XGB_DLL void XGDMatrixFree(void *handle);
-  /*!
-   * \brief load a data matrix into binary file
-   * \param handle a instance of data matrix
-   * \param fname file name
-   * \param silent print statistics when saving
-   */
-  XGB_DLL void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
-  /*!
-   * \brief set float vector to a content in info
-   * \param handle a instance of data matrix
-   * \param field field name, can be label, weight
-   * \param array pointer to float vector
-   * \param len length of array
-   */
-  XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, bst_ulong len);
-  /*!
-   * \brief set uint32 vector to a content in info
-   * \param handle a instance of data matrix
-   * \param field field name
-   * \param array pointer to float vector
-   * \param len length of array
-   */
-  XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, bst_ulong len);
-  /*!
-   * \brief set label of the training matrix
-   * \param handle a instance of data matrix
-   * \param group pointer to group size
-   * \param len length of array
-   */
-  XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len);
-  /*!
-   * \brief get float info vector from matrix
-   * \param handle a instance of data matrix
-   * \param field field name
-   * \param out_len used to set result length
-   * \return pointer to the result
-   */
-  XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* out_len);
-  /*!
-   * \brief get uint32 info vector from matrix
-   * \param handle a instance of data matrix
-   * \param field field name
-   * \param out_len used to set result length
-   * \return pointer to the result
-   */
-  XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* out_len);
-  /*!
-   * \brief return number of rows
-   */
-  XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle);
-  // --- start XGBoost class
-  /*! 
-   * \brief create xgboost learner 
-   * \param dmats matrices that are set to be cached
-   * \param len length of dmats
-   */
-  XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len);
-  /*! 
-   * \brief free obj in handle 
-   * \param handle handle to be freed
-   */
-  XGB_DLL void XGBoosterFree(void* handle);
-  /*! 
-   * \brief set parameters 
-   * \param handle handle
-   * \param name  parameter name
-   * \param val value of parameter
-   */    
-  XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value);
-  /*! 
-   * \brief update the model in one round using dtrain
-   * \param handle handle
-   * \param iter current iteration rounds
-   * \param dtrain training data
-   */
-  XGB_DLL void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
-  /*!
-   * \brief update the model, by directly specify gradient and second order gradient,
-   *        this can be used to replace UpdateOneIter, to support customized loss function
-   * \param handle handle
-   * \param dtrain training data
-   * \param grad gradient statistics
-   * \param hess second order gradient statistics
-   * \param len length of grad/hess array
-   */
-  XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain,
-                                     float *grad, float *hess, bst_ulong len);
-  /*!
-   * \brief get evaluation statistics for xgboost
-   * \param handle handle
-   * \param iter current iteration rounds
-   * \param dmats pointers to data to be evaluated
-   * \param evnames pointers to names of each data
-   * \param len length of dmats
-   * \return the string containing evaluation stati
-   */
-  XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
-                                           const char *evnames[], bst_ulong len);
-  /*!
-   * \brief make prediction based on dmat
-   * \param handle handle
-   * \param dmat data matrix
-   * \param option_mask bit-mask of options taken in prediction, possible values
-   *          0:normal prediction
-   *          1:output margin instead of transformed value
-   *          2:output leaf index of trees instead of leaf value, note leaf index is unique per tree
-   * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
-   *    when the parameter is set to 0, we will use all the trees
-   * \param len used to store length of returning result
-   */
-  XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, 
-                                        int option_mask, 
-                                        unsigned ntree_limit,
-                                        bst_ulong *len);
-  /*!
-   * \brief load model from existing file
-   * \param handle handle
-   * \param fname file name
-   */
-  XGB_DLL void XGBoosterLoadModel(void *handle, const char *fname);
-  /*!
-   * \brief save model into existing file
-   * \param handle handle
-   * \param fname file name
-   */
-  XGB_DLL void XGBoosterSaveModel(void *handle, const char *fname);
-  /*!
-   * \brief load model from in memory buffer
-   * \param handle handle
-   * \param buf pointer to the buffer
-   * \param len the length of the buffer
-   */
-  XGB_DLL void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len);
-  /*!
-   * \brief save model into binary raw bytes, return header of the array
-   * user must copy the result out, before next xgboost call
-   * \param handle handle
-   * \param out_len the argument to hold the output length
-   * \return the pointer to the beginning of binary buffer
-   */
-  XGB_DLL const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len);
-  /*!
-   * \brief dump model, return array of strings representing model dump
-   * \param handle handle
-   * \param fmap  name to fmap can be empty string
-   * \param with_stats whether to dump with statistics
-   * \param out_len length of output array
-   * \return char *data[], representing dump of each model
-   */
-  XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap,
-                                          int with_stats, bst_ulong *out_len);
-#ifdef __cplusplus
-}
+
+#if defined(_MSC_VER) || defined(_WIN32)
+#define XGB_DLL XGB_EXTERN_C __declspec(dllexport)
+#else
+#define XGB_DLL XGB_EXTERN_C
 #endif
+// manually define unsign long
+typedef unsigned long bst_ulong;  // NOLINT(*)
+
+/*! \brief handle to DMatrix */
+typedef void *DMatrixHandle;
+/*! \brief handle to Booster */
+typedef void *BoosterHandle;
+
+/*!
+ * \brief get string message of the last error
+ *
+ *  all function in this file will return 0 when success
+ *  and -1 when an error occured,
+ *  XGBGetLastError can be called to retrieve the error
+ *
+ *  this function is threadsafe and can be called by different thread
+ * \return const char* error inforomation
+ */
+XGB_DLL const char *XGBGetLastError();
+
+/*!
+ * \brief load a data matrix
+ * \param fname the name of the file
+ * \param silent whether print messages during loading
+ * \param out a loaded data matrix
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixCreateFromFile(const char *fname,
+                                    int silent,
+                                    DMatrixHandle *out);
+
+/*!
+ * \brief create a matrix content from csr format
+ * \param indptr pointer to row headers
+ * \param indices findex
+ * \param data fvalue
+ * \param nindptr number of rows in the matix + 1
+ * \param nelem number of nonzero elements in the matrix
+ * \param out created dmatrix
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixCreateFromCSR(const bst_ulong *indptr,
+                                   const unsigned *indices,
+                                   const float *data,
+                                   bst_ulong nindptr,
+                                   bst_ulong nelem,
+                                   DMatrixHandle *out);
+/*!
+ * \brief create a matrix content from CSC format
+ * \param col_ptr pointer to col headers
+ * \param indices findex
+ * \param data fvalue
+ * \param nindptr number of rows in the matix + 1
+ * \param nelem number of nonzero elements in the matrix
+ * \param out created dmatrix
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
+                                   const unsigned *indices,
+                                   const float *data,
+                                   bst_ulong nindptr,
+                                   bst_ulong nelem,
+                                   DMatrixHandle *out);
+/*!
+ * \brief create matrix content from dense matrix
+ * \param data pointer to the data space
+ * \param nrow number of rows
+ * \param ncol number columns
+ * \param missing which value to represent missing value
+ * \param out created dmatrix
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixCreateFromMat(const float *data,
+                                   bst_ulong nrow,
+                                   bst_ulong ncol,
+                                   float  missing,
+                                   DMatrixHandle *out);
+/*!
+ * \brief create a new dmatrix from sliced content of existing matrix
+ * \param handle instance of data matrix to be sliced
+ * \param idxset index set
+ * \param len length of index set
+ * \param out a sliced new matrix
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle,
+                                  const int *idxset,
+                                  bst_ulong len,
+                                  DMatrixHandle *out);
+/*!
+ * \brief free space in data matrix
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixFree(void *handle);
+/*!
+ * \brief load a data matrix into binary file
+ * \param handle a instance of data matrix
+ * \param fname file name
+ * \param silent print statistics when saving
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle,
+                                const char *fname, int silent);
+/*!
+ * \brief set float vector to a content in info
+ * \param handle a instance of data matrix
+ * \param field field name, can be label, weight
+ * \param array pointer to float vector
+ * \param len length of array
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
+                                  const char *field,
+                                  const float *array,
+                                  bst_ulong len);
+/*!
+ * \brief set uint32 vector to a content in info
+ * \param handle a instance of data matrix
+ * \param field field name
+ * \param array pointer to float vector
+ * \param len length of array
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
+                                 const char *field,
+                                 const unsigned *array,
+                                 bst_ulong len);
+/*!
+ * \brief set label of the training matrix
+ * \param handle a instance of data matrix
+ * \param group pointer to group size
+ * \param len length of array
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
+                              const unsigned *group,
+                              bst_ulong len);
+/*!
+ * \brief get float info vector from matrix
+ * \param handle a instance of data matrix
+ * \param field field name
+ * \param out_len used to set result length
+ * \param out_dptr pointer to the result
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
+                                  const char *field,
+                                  bst_ulong* out_len,
+                                  const float **out_dptr);
+/*!
+ * \brief get uint32 info vector from matrix
+ * \param handle a instance of data matrix
+ * \param field field name
+ * \param out_ptr pointer to the result
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
+                                 const char *field,
+                                 bst_ulong* out_len,
+                                 const unsigned **out_dptr);
+/*!
+ * \brief get number of rows
+ * \param handle the handle to the DMatrix
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle,
+                            bst_ulong *out);
+// --- start XGBoost class
+/*!
+ * \brief create xgboost learner
+ * \param dmats matrices that are set to be cached
+ * \param len length of dmats
+ * \param out handle to the result booster
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterCreate(void* dmats[],
+                            bst_ulong len,
+                            BoosterHandle *out);
+/*!
+ * \brief free obj in handle
+ * \param handle handle to be freed
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterFree(BoosterHandle handle);
+
+/*!
+ * \brief set parameters
+ * \param handle handle
+ * \param name  parameter name
+ * \param val value of parameter
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
+                              const char *name,
+                              const char *value);
+/*!
+ * \brief update the model in one round using dtrain
+ * \param handle handle
+ * \param iter current iteration rounds
+ * \param dtrain training data
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
+                                   int iter,
+                                   DMatrixHandle dtrain);
+/*!
+ * \brief update the model, by directly specify gradient and second order gradient,
+ *        this can be used to replace UpdateOneIter, to support customized loss function
+ * \param handle handle
+ * \param dtrain training data
+ * \param grad gradient statistics
+ * \param hess second order gradient statistics
+ * \param len length of grad/hess array
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
+                                  DMatrixHandle dtrain,
+                                  float *grad,
+                                  float *hess,
+                                  bst_ulong len);
+/*!
+ * \brief get evaluation statistics for xgboost
+ * \param handle handle
+ * \param iter current iteration rounds
+ * \param dmats pointers to data to be evaluated
+ * \param evnames pointers to names of each data
+ * \param len length of dmats
+ * \param out_result the string containing evaluation statistics
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
+                                 int iter,
+                                 DMatrixHandle dmats[],
+                                 const char *evnames[],
+                                 bst_ulong len,
+                                 const char **out_result);
+/*!
+ * \brief make prediction based on dmat
+ * \param handle handle
+ * \param dmat data matrix
+ * \param option_mask bit-mask of options taken in prediction, possible values
+ *          0:normal prediction
+ *          1:output margin instead of transformed value
+ *          2:output leaf index of trees instead of leaf value, note leaf index is unique per tree
+ * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
+ *    when the parameter is set to 0, we will use all the trees
+ * \param out_len used to store length of returning result
+ * \param out_result used to set a pointer to array
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterPredict(BoosterHandle handle,
+                             DMatrixHandle dmat,
+                             int option_mask,
+                             unsigned ntree_limit,
+                             bst_ulong *out_len,
+                             const float **out_result);
+/*!
+ * \brief load model from existing file
+ * \param handle handle
+ * \param fname file name
+* \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
+                               const char *fname);
+/*!
+ * \brief save model into existing file
+ * \param handle handle
+ * \param fname file name
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterSaveModel(BoosterHandle handle,
+                               const char *fname);
+/*!
+ * \brief load model from in memory buffer
+ * \param handle handle
+ * \param buf pointer to the buffer
+ * \param len the length of the buffer
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
+                                         const void *buf,
+                                         bst_ulong len);
+/*!
+ * \brief save model into binary raw bytes, return header of the array
+ * user must copy the result out, before next xgboost call
+ * \param handle handle
+ * \param out_len the argument to hold the output length
+ * \param out_dptr the argument to hold the output data pointer
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle,
+                                 bst_ulong *out_len,
+                                 const char **out_dptr);
+/*!
+ * \brief dump model, return array of strings representing model dump
+ * \param handle handle
+ * \param fmap  name to fmap can be empty string
+ * \param with_stats whether to dump with statistics
+ * \param out_len length of output array
+ * \param out_dump_array pointer to hold representing dump of each model
+ * \return 0 when success, -1 when failure happens
+ */
+XGB_DLL int XGBoosterDumpModel(BoosterHandle handle,
+                               const char *fmap,
+                               int with_stats,
+                               bst_ulong *out_len,
+                               const char ***out_dump_array);
 #endif  // XGBOOST_WRAPPER_H_