Merge remote-tracking branch 'dmlc/master'

2015-04-15 18:48:26 +02:00 · 2015-04-15 18:48:26 +02:00 · de3f74f755
commit de3f74f755
parent e4c8d9d2e1 6370b38c14
118 changed files with 1305 additions and 11320 deletions
--- a/37
+++ b/37
@ -16,18 +16,28 @@ ifeq ($(cxx11),1)
 else 
 endif

-ifeq ($(hdfs),1)
-	CFLAGS+= -DRABIT_USE_HDFS=1 -I$(HADOOP_HDFS_HOME)/include -I$(JAVA_HOME)/include
-	LDFLAGS+= -L$(HADOOP_HDFS_HOME)/lib/native -L$(JAVA_HOME)/jre/lib/amd64/server -lhdfs -ljvm
+# handling dmlc
+ifdef dmlc
+	ifndef config
+		ifneq ("$(wildcard $(dmlc)/config.mk)","")
+			config = $(dmlc)/config.mk
 		else
-	CFLAGS+= -DRABIT_USE_HDFS=0
+			config = $(dmlc)/make/config.mk
+		endif	
+	endif
+	include $(config)
+	include $(dmlc)/make/dmlc.mk
+	LDFLAGS+= $(DMLC_LDFLAGS)
+	LIBDMLC=$(dmlc)/libdmlc.a
+else
+	LIBDMLC=dmlc_simple.o
 endif

 # specify tensor path
 BIN = xgboost
 MOCKBIN = xgboost.mock
-OBJ = updater.o gbm.o io.o main.o 
-MPIBIN = xgboost.mpi
+OBJ = updater.o gbm.o io.o main.o dmlc_simple.o
+MPIBIN =
 SLIB = wrapper/libxgboostwrapper.so 

 .PHONY: clean all mpi python Rpack
@ -38,23 +48,22 @@ mpi: $(MPIBIN)
 python: wrapper/libxgboostwrapper.so
 # now the wrapper takes in two files. io and wrapper part
 updater.o: src/tree/updater.cpp  src/tree/*.hpp src/*.h src/tree/*.h src/utils/*.h
+dmlc_simple.o: src/io/dmlc_simple.cpp src/utils/*.h
 gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h 
 io.o: src/io/io.cpp src/io/*.hpp src/utils/*.h src/learner/dmatrix.h src/*.h
 main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h 
-xgboost.mpi:  updater.o gbm.o io.o main.o subtree/rabit/lib/librabit_mpi.a
-xgboost.mock: updater.o gbm.o io.o main.o subtree/rabit/lib/librabit_mock.a
-xgboost:  updater.o gbm.o io.o main.o subtree/rabit/lib/librabit.a
-wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h  updater.o gbm.o io.o subtree/rabit/lib/librabit.a
+xgboost:  updater.o gbm.o io.o main.o subtree/rabit/lib/librabit.a $(LIBDMLC)
+wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h  updater.o gbm.o io.o subtree/rabit/lib/librabit.a $(LIBDMLC)

 # dependency on rabit
 subtree/rabit/lib/librabit.a: subtree/rabit/src/engine.cc
-	cd subtree/rabit;make lib/librabit.a; cd ../..
+	+	cd subtree/rabit;make lib/librabit.a; cd ../..
 subtree/rabit/lib/librabit_empty.a: subtree/rabit/src/engine_empty.cc
-	cd subtree/rabit;make lib/librabit_empty.a; cd ../..
+	+	cd subtree/rabit;make lib/librabit_empty.a; cd ../..
 subtree/rabit/lib/librabit_mock.a: subtree/rabit/src/engine_mock.cc
-	cd subtree/rabit;make lib/librabit_mock.a; cd ../..
+	+	cd subtree/rabit;make lib/librabit_mock.a; cd ../..
 subtree/rabit/lib/librabit_mpi.a: subtree/rabit/src/engine_mpi.cc
-	cd subtree/rabit;make lib/librabit_mpi.a; cd ../..
+	+	cd subtree/rabit;make lib/librabit_mpi.a; cd ../..

 $(BIN) : 
 	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) 
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@ -18,7 +18,12 @@ License: Apache License (== 2.0) | file LICENSE
 URL: https://github.com/dmlc/xgboost
 BugReports: https://github.com/dmlc/xgboost/issues
 VignetteBuilder: knitr
-Suggests: knitr
+Suggests:
+    knitr,
+    ggplot2 (>= 1.0.0),
+    DiagrammeR (>= 0.4),
+    Ckmeans.1d.dp (>= 3.3.1),
+    vcd (>= 1.3)
 Depends:
    R (>= 2.10)
 Imports:
@ -26,8 +31,4 @@ Imports:
    methods,
    data.table (>= 1.9.4),
    magrittr (>= 1.5),
-    stringr (>= 0.6.2),
-    DiagrammeR (>= 0.4),
-    ggplot2 (>= 1.0.0),
-    Ckmeans.1d.dp (>= 3.3.1),
-    vcd (>= 1.3)
+    stringr (>= 0.6.2)
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -21,8 +21,6 @@ exportMethods(predict)
 import(methods)
 importClassesFrom(Matrix,dgCMatrix)
 importClassesFrom(Matrix,dgeMatrix)
-importFrom(Ckmeans.1d.dp,Ckmeans.1d.dp)
-importFrom(DiagrammeR,mermaid)
 importFrom(Matrix,cBind)
 importFrom(Matrix,colSums)
 importFrom(Matrix,sparseVector)
@ -34,16 +32,6 @@ importFrom(data.table,fread)
 importFrom(data.table,rbindlist)
 importFrom(data.table,set)
 importFrom(data.table,setnames)
-importFrom(ggplot2,aes)
-importFrom(ggplot2,coord_flip)
-importFrom(ggplot2,element_blank)
-importFrom(ggplot2,element_text)
-importFrom(ggplot2,geom_bar)
-importFrom(ggplot2,ggplot)
-importFrom(ggplot2,ggtitle)
-importFrom(ggplot2,theme)
-importFrom(ggplot2,xlab)
-importFrom(ggplot2,ylab)
 importFrom(magrittr,"%>%")
 importFrom(magrittr,add)
 importFrom(magrittr,not)
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -36,8 +36,8 @@ xgb.setinfo <- function(dmat, name, info) {
    return(TRUE)
  }
  if (name == "group") {
-    if (length(info)!=xgb.numrow(dmat))
-      stop("The length of groups must equal to the number of rows in the input data")
+    if (sum(info)!=xgb.numrow(dmat))
+      stop("The sum of groups must equal to the number of rows in the input data")
    .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), 
          PACKAGE = "xgboost")
    return(TRUE)
@ -77,9 +77,9 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
 }

 # convert xgb.Booster.handle to xgb.Booster
-xgb.handleToBooster <- function(handle)
+xgb.handleToBooster <- function(handle, raw = NULL)
 {
-  bst <- list(handle = handle, raw = NULL)
+  bst <- list(handle = handle, raw = raw)
  class(bst) <- "xgb.Booster"
  return(bst)
 }
@ -87,8 +87,12 @@ xgb.handleToBooster <- function(handle)
 # Check whether an xgb.Booster object is complete
 xgb.Booster.check <- function(bst, saveraw = TRUE)
 {
-  if (is.null(bst$handle)) {
-    bst$handle <- xgb.load(bst$raw)
+  isnull <- is.null(bst$handle)
+  if (!isnull) {
+    isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
+  }
+  if (isnull) {
+    bst$handle <- xgb.Booster(modelfile = bst$raw)
  } else {
    if (is.null(bst$raw) && saveraw)
      bst$raw <- xgb.save.raw(bst$handle)
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@ -95,6 +95,17 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
  }

  folds <- xgb.cv.mknfold(dtrain, nfold, params)
+  obj_type = params[['objective']]
+  mat_pred = FALSE
+  if (!is.null(obj_type) && obj_type=='multi:softprob')
+  {
+    num_class = params[['num_class']]
+    if (is.null(num_class))
+      stop('must set num_class to use softmax')
+    predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
+    mat_pred = TRUE
+  }
+  else
    predictValues <- rep(0,xgb.numrow(dtrain))
  history <- c()
  for (i in 1:nrounds) {
@ -102,14 +113,23 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
    for (k in 1:nfold) {
      fd <- folds[[k]]
      succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
+      if (i<nrounds) {
+          msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
+      } else {
        if (!prediction) {
          msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
        } else {
          res <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval, prediction)
+          if (mat_pred) {
+            pred_mat = matrix(res[[2]],num_class,length(fd$index))
+            predictValues[fd$index,] <- t(pred_mat)
+          } else {
            predictValues[fd$index] <- res[[2]]
+          }
          msg[[k]] <- res[[1]] %>% str_split("\t") %>% .[[1]]
        }
      }
+    }
    ret <- xgb.cv.aggcv(msg, showsd)
    history <- c(history, ret)
    if(verbose) paste(ret, "\n", sep="") %>% cat
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@ -21,7 +21,12 @@ xgb.load <- function(modelfile) {
    stop("xgb.load: modelfile cannot be NULL")
  
  handle <- xgb.Booster(modelfile = modelfile)
-  bst <- xgb.handleToBooster(handle)
+  # re-use modelfile if it is raw so we donot need to serialize
+  if (typeof(modelfile) == "raw") {
+    bst <- xgb.handleToBooster(handle, modelfile)
+  } else {
+    bst <- xgb.handleToBooster(handle, NULL)
+  }
  bst <- xgb.Booster.check(bst)
  return(bst)
 } 
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@ -96,6 +96,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
  
  allTrees <- data.table()
 
+  anynumber_regex<-"[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?" 
  for(i in 1:n_round){
    
    tree <- text[(position[i]+1):(position[i+1]-1)]
@ -115,7 +116,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
      featureBranch <- feature_names[featureBranch + 1]
    }
    featureLeaf <- rep("Leaf", length(leaf))
-    splitBranch <- str_extract(branch, "<\\d*\\.*\\d*\\]") %>% str_replace("<", "") %>% str_replace("\\]", "") 
+    splitBranch <- str_extract(branch, paste0("<",anynumber_regex,"\\]")) %>% str_replace("<", "") %>% str_replace("\\]", "") 
    splitLeaf <- rep(NA, length(leaf)) 
    yesBranch <- extract(branch, "yes=\\d*") %>% addTreeId(treeID)
    yesLeaf <- rep(NA, length(leaf)) 
@ -123,8 +124,8 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
    noLeaf <- rep(NA, length(leaf))
    missingBranch <- extract(branch, "missing=\\d+") %>% addTreeId(treeID)
    missingLeaf <- rep(NA, length(leaf))
-    qualityBranch <- extract(branch, "gain=\\d*\\.*\\d*")
-    qualityLeaf <- extract(leaf, "leaf=\\-*\\d*\\.*\\d*")
+    qualityBranch <- extract(branch, paste0("gain=",anynumber_regex))
+    qualityLeaf <- extract(leaf, paste0("leaf=",anynumber_regex))
    coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
    coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
    dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID]
--- a/R-package/R/xgb.plot.importance.R
+++ b/R-package/R/xgb.plot.importance.R
@ -2,17 +2,6 @@
 #' 
 #' Read a data.table containing feature importance details and plot it.
 #' 
-#' @importFrom ggplot2 ggplot
-#' @importFrom ggplot2 aes
-#' @importFrom ggplot2 geom_bar
-#' @importFrom ggplot2 coord_flip
-#' @importFrom ggplot2 xlab
-#' @importFrom ggplot2 ylab
-#' @importFrom ggplot2 ggtitle
-#' @importFrom ggplot2 theme
-#' @importFrom ggplot2 element_text
-#' @importFrom ggplot2 element_blank
-#' @importFrom Ckmeans.1d.dp Ckmeans.1d.dp
 #' @importFrom magrittr %>%
 #' @param importance_matrix a \code{data.table} returned by the \code{xgb.importance} function.
 #' @param numberOfClusters a \code{numeric} vector containing the min and the max range of the possible number of clusters of bars.
@ -44,11 +33,17 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
  if (!"data.table" %in% class(importance_matrix))  {     
    stop("importance_matrix: Should be a data.table.")
  }
+  if (!require(ggplot2, quietly = TRUE)) {
+    stop("ggplot2 package is required for plotting the importance", call. = FALSE)
+  }
+  if (!requireNamespace("Ckmeans.1d.dp", quietly = TRUE)) {
+    stop("Ckmeans.1d.dp package is required for plotting the importance", call. = FALSE)
+  }

  # To avoid issues in clustering when co-occurences are used
  importance_matrix <- importance_matrix[, .(Gain = sum(Gain)), by = Feature]
  
-  clusters <- suppressWarnings(Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
+  clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
  importance_matrix[,"Cluster":=clusters$cluster %>% as.character]
    
  plot <- ggplot(importance_matrix, aes(x=reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment())+  geom_bar(aes(fill=Cluster), stat="identity", position="identity") + coord_flip() + xlab("Features") + ylab("Gain") + ggtitle("Feature importance") + theme(plot.title = element_text(lineheight=.9, face="bold"), panel.grid.major.y = element_blank() )
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@ -15,7 +15,6 @@
 #' @importFrom stringr str_split
 #' @importFrom stringr str_extract
 #' @importFrom stringr str_trim
-#' @importFrom DiagrammeR mermaid
 #' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
 #' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
 #' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
@ -65,6 +64,10 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
    stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
  }
  
+  if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
+    stop("DiagrammeR package is required for xgb.plot.tree", call. = FALSE)
+  }
+  
  if(is.null(model)){
    allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree)  
  } else {
@ -85,7 +88,7 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
  no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
  
  path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
-  mermaid(path, width, height)
+  DiagrammeR::mermaid(path, width, height)
 }

 # Avoid error messages during CRAN check.
--- a/R-package/src/Makevars
+++ b/R-package/src/Makevars
@ -4,4 +4,5 @@ PKGROOT=../../
 PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ -I$(PKGROOT)
 PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
 PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
-OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o $(PKGROOT)/subtree/rabit/src/engine_empty.o
+OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o $(PKGROOT)/subtree/rabit/src/engine_empty.o $(PKGROOT)/src/io/dmlc_simple.o
+
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@ -15,5 +15,5 @@ xgblib:
 PKG_CPPFLAGS= -DXGBOOST_CUSTOMIZE_MSG_ -DXGBOOST_CUSTOMIZE_PRNG_ -DXGBOOST_STRICT_CXX98_ -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ -I$(PKGROOT) -I../..
 PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS)
 PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
-OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o $(PKGROOT)/subtree/rabit/src/engine_empty.o
+OBJECTS= xgboost_R.o xgboost_assert.o $(PKGROOT)/wrapper/xgboost_wrapper.o $(PKGROOT)/src/io/io.o $(PKGROOT)/src/gbm/gbm.o $(PKGROOT)/src/tree/updater.o $(PKGROOT)/subtree/rabit/src/engine_empty.o $(PKGROOT)/src/io/dmlc_simple.o
 $(OBJECTS) : xgblib
--- a/R-package/src/xgboost_R.cpp
+++ b/R-package/src/xgboost_R.cpp
@ -59,6 +59,9 @@ inline void _WrapperEnd(void) {
 }

 extern "C" {
+  SEXP XGCheckNullPtr_R(SEXP handle) {
+    return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
+  }
  void _DMatrixFinalizer(SEXP ext) {    
    if (R_ExternalPtrAddr(ext) == NULL) return;
    XGDMatrixFree(R_ExternalPtrAddr(ext));
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@ -11,6 +11,12 @@ extern "C" {
 }

 extern "C" {
+  /*!
+   * \brief check whether a handle is NULL
+   * \param handle
+   * \return whether it is null ptr
+   */
+  SEXP XGCheckNullPtr_R(SEXP handle);
  /*!
   * \brief load a data matrix 
   * \param fname name of the content
--- a/demo/binary_classification/mapfeat.py
+++ b/demo/binary_classification/mapfeat.py
@ -1,5 +1,4 @@
 #!/usr/bin/python
-import sys

 def loadfmap( fname ):
    fmap = {}
--- a/demo/guide-python/basic_walkthrough.py
+++ b/demo/guide-python/basic_walkthrough.py
@ -1,10 +1,6 @@
 #!/usr/bin/python
-import sys
 import numpy as np
 import scipy.sparse
-# append the path to xgboost, you may need to change the following line
-# alternatively, you can add the path to PYTHONPATH environment variable
-sys.path.append('../../wrapper')
 import xgboost as xgb

 ### simple example
--- a/demo/guide-python/boost_from_prediction.py
+++ b/demo/guide-python/boost_from_prediction.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb

 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
--- a/demo/guide-python/cross_validation.py
+++ b/demo/guide-python/cross_validation.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb

 ### load data in do training
--- a/demo/guide-python/custom_objective.py
+++ b/demo/guide-python/custom_objective.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb
 ###
 # advanced: cutomsized loss function
--- a/demo/guide-python/generalized_linear_model.py
+++ b/demo/guide-python/generalized_linear_model.py
@ -1,6 +1,4 @@
 #!/usr/bin/python
-import sys
-sys.path.append('../../wrapper')
 import xgboost as xgb
 ##
 #  this script demonstrate how to fit generalized linear model in xgboost
--- a/demo/guide-python/predict_first_ntree.py
+++ b/demo/guide-python/predict_first_ntree.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb

 ### load data in do training
--- a/demo/guide-python/predict_leaf_indices.py
+++ b/demo/guide-python/predict_leaf_indices.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb

 ### load data in do training
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@ -0,0 +1,62 @@
+'''
+Created on 1 Apr 2015
+
+@author: Jamie Hall
+'''
+
+import xgboost as xgb
+
+import numpy as np
+from sklearn.cross_validation import KFold
+from sklearn.grid_search import GridSearchCV
+from sklearn.metrics import confusion_matrix, mean_squared_error
+from sklearn.datasets import load_iris, load_digits, load_boston
+
+rng = np.random.RandomState(31337)
+
+
+print("Zeros and Ones from the Digits dataset: binary classification")
+digits = load_digits(2)
+y = digits['target']
+X = digits['data']
+kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
+for train_index, test_index in kf:
+    xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
+    predictions = xgb_model.predict(X[test_index])
+    actuals = y[test_index]
+    print(confusion_matrix(actuals, predictions))
+
+print("Iris: multiclass classification")
+iris = load_iris()
+y = iris['target']
+X = iris['data']
+kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
+for train_index, test_index in kf:
+    xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
+    predictions = xgb_model.predict(X[test_index])
+    actuals = y[test_index]
+    print(confusion_matrix(actuals, predictions))
+
+print("Boston Housing: regression")
+boston = load_boston()
+y = boston['target']
+X = boston['data']
+kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
+for train_index, test_index in kf:
+    xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
+    predictions = xgb_model.predict(X[test_index])
+    actuals = y[test_index]
+    print(mean_squared_error(actuals, predictions))
+
+print("Parameter optimization")
+y = boston['target']
+X = boston['data']
+xgb_model = xgb.XGBRegressor()
+clf = GridSearchCV(xgb_model,
+                   {'max_depth': [2,4,6],
+                    'n_estimators': [50,100,200]}, verbose=1)
+clf.fit(X,y)
+print(clf.best_score_)
+print(clf.best_params_)
+
+
--- a/demo/kaggle-higgs/higgs-cv.py
+++ b/demo/kaggle-higgs/higgs-cv.py
@ -1,7 +1,5 @@
 #!/usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper')
 import xgboost as xgb

 ### load data in do training
--- a/demo/kaggle-higgs/higgs-numpy.py
+++ b/demo/kaggle-higgs/higgs-numpy.py
@ -1,14 +1,6 @@
 #!/usr/bin/python
 # this is the example script to use xgboost to train
-import inspect
-import os
-import sys
 import numpy as np
-# add path of xgboost python module
-code_path = os.path.join(
-    os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../wrapper")
-
-sys.path.append(code_path)

 import xgboost as xgb

--- a/demo/kaggle-higgs/higgs-pred.py
+++ b/demo/kaggle-higgs/higgs-pred.py
@ -1,9 +1,6 @@
 #!/usr/bin/python
 # make prediction
-import sys
 import numpy as np
-# add path of xgboost python module
-sys.path.append('../../wrapper/')
 import xgboost as xgb

 # path to where the data lies
--- a/demo/kaggle-higgs/speedtest.py
+++ b/demo/kaggle-higgs/speedtest.py
@ -1,9 +1,6 @@
 #!/usr/bin/python
 # this is the example script to use xgboost to train
-import sys
 import numpy as np
-# add path of xgboost python module
-sys.path.append('../../wrapper/')
 import xgboost as xgb
 from sklearn.ensemble import GradientBoostingClassifier
 import time
--- a/demo/kaggle-otto/benchmark.R
+++ b/demo/kaggle-otto/benchmark.R
@ -1,43 +0,0 @@
-require(xgboost)
-require(methods)
-
-train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = F)
-test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = F)
-train = train[,-1]
-test = test[,-1]
-
-y = train[,ncol(train)]
-y = gsub('Class_','',y)
-y = as.integer(y)-1 #xgboost take features in [0,numOfClass)
-
-x = rbind(train[,-ncol(train)],test)
-x = as.matrix(x)
-x = matrix(as.numeric(x),nrow(x),ncol(x))
-trind = 1:length(y)
-teind = (nrow(train)+1):nrow(x)
-
-# Set necessary parameter
-param <- list("objective" = "multi:softprob",
-              "eval_metric" = "mlogloss",
-              "num_class" = 9,
-              "nthread" = 8)
-
-# Run Cross Valication
-cv.nround = 50
-bst.cv = xgb.cv(param=param, data = x[trind,], label = y, 
-                nfold = 3, nrounds=cv.nround)
-
-# Train the model
-nround = 50
-bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nround)
-
-# Make prediction
-pred = predict(bst,x[teind,])
-pred = matrix(pred,9,length(pred)/9)
-pred = t(pred)
-
-# Output submission
-pred = format(pred, digits=2,scientific=F) # shrink the size of submission
-pred = data.frame(1:nrow(pred),pred)
-names(pred) = c('id', paste0('Class_',1:9))
-write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE)
--- a/demo/multiclass_classification/train.py
+++ b/demo/multiclass_classification/train.py
@ -1,7 +1,5 @@
 #! /usr/bin/python
-import sys
 import numpy as np
-sys.path.append('../../wrapper/')
 import xgboost as xgb

 # label need to be 0 to num_class -1
--- a/demo/regression/mapfeat.py
+++ b/demo/regression/mapfeat.py
@ -1,5 +1,4 @@
 #!/usr/bin/python
-import sys

 fo = open( 'machine.txt', 'w' )
 cnt = 6
--- a/multi-node/README.md
+++ b/multi-node/README.md
@ -1,17 +1,10 @@
 Distributed XGBoost
 ======
-This folder contains information of Distributed XGBoost (Distributed GBDT).
-
+Distributed XGBoost is now part of [Wormhole](https://github.com/dmlc/wormhole).
+Checkout this [Link](https://github.com/dmlc/wormhole/tree/master/learn/xgboost) for usage examples, build and job submissions.
 * The distributed version is built on Rabit:[Reliable Allreduce and Broadcast Library](https://github.com/dmlc/rabit)
  - Rabit is a portable library that provides fault-tolerance for Allreduce calls for distributed machine learning  
  - This makes xgboost portable and fault-tolerant against node failures
-* You can run Distributed XGBoost on platforms including Hadoop(see [hadoop folder](hadoop)) and MPI
-  - Rabit only replies a platform to start the programs, so it should be easy to port xgboost to most platforms
-
-Build
-=====
-* In the root folder, type ```make```
-  - If you have C++11 compiler, it is recommended to use ```make cxx11=1```

 Notes
 ====
@ -27,11 +20,9 @@ Notes

 Solvers
 =====
-There are two solvers in distributed xgboost. You can check for local demo of the two solvers, see [row-split](row-split) and [col-split](col-split)
 * Column-based solver split data by column, each node work on subset of columns, 
  it uses exactly the same algorithm as single node version.
 * Row-based solver split data by row, each node work on subset of rows,
  it uses an approximate histogram count algorithm, and will only examine subset of 
  potential split points as opposed to all split points.
  - This is the mode used by current hadoop version, since usually data was stored by rows in many industry system
-    
--- a/multi-node/hadoop/README.md
+++ b/multi-node/hadoop/README.md
@ -1,40 +0,0 @@
-Distributed XGBoost: Hadoop Yarn Version
-====
-*  The script in this fold shows an example of how to run distributed xgboost on hadoop platform with YARN
-*  It relies on [Rabit Library](https://github.com/dmlc/rabit) (Reliable Allreduce and Broadcast Interface) and Yarn. Rabit provides an interface to aggregate gradient values and split statistics, that allow xgboost to run reliably on hadoop. You do not need to care how to update model in each iteration, just use the script ```rabit_yarn.py```. For those who want to know how it exactly works, plz refer to the main page of [Rabit](https://github.com/dmlc/rabit).
-*  Quick start: run ```bash run_mushroom.sh <n_hadoop_workers> <n_thread_per_worker> <path_in_HDFS>```
-  - This is the hadoop version of binary classification example in the demo folder.
-  - More info of the usage of xgboost can be refered to [wiki page](https://github.com/dmlc/xgboost/wiki)
-
-Before you run the script
-====
-* Make sure you have set up the hadoop environment.  
-  - Check variable $HADOOP_PREFIX exists (e.g. run ```echo $HADOOP_PREFIX```)
-  - Compile xgboost with hdfs support by typing ```make hdfs=1```
-
-How to Use
-====
-* Input data format: LIBSVM format. The example here uses generated data in demo/data folder.
-* Put the training data in HDFS (hadoop distributed file system).
-* Use rabit ```rabit_yarn.py``` to submit training task to yarn
-* Get the final model file from HDFS, and locally do prediction as well as visualization of model.
-
-Single machine vs Hadoop version
-====
-If you have used xgboost (single machine version) before, this section will show you how to run xgboost on hadoop with a slight modification on conf file.
-* IO: instead of reading and writing file locally, we now use HDFS, put ```hdfs://``` prefix to the address of file you like to access
-* File cache: ```rabit_yarn.py``` also provide several ways to cache necesary files, including binary file (xgboost), conf file
-  - ```rabit_yarn.py``` will automatically cache files in the command line. For example, ```rabit_yarn.py -n 3 $localPath/xgboost mushroom.hadoop.conf``` will cache "xgboost" and "mushroom.hadoop.conf".
-  - You could also use "-f" to manually cache one or more files, like ```-f file1 -f file2``` or ```-f file1#file2``` (use "#" to spilt file names).
-  - The local path of cached files in command is "./".
-  - Since the cached files will be packaged and delivered to hadoop slave nodes, the cached file should not be large.
-* Hadoop version also support evaluting each training round. You just need to modify parameters "eval_train".
-* More details of submission can be referred to the usage of ```rabit_yarn.py```.
-* The model saved by hadoop version is compatible with single machine version.
-
-Notes
-====
-* The code has been tested on YARN.
-* The code is optimized with multi-threading, so you will want to run one xgboost per node/worker for best performance.
-  - You will want to set <n_thread_per_worker> to be number of cores you have on each machine.
-* It is also possible to submit job with hadoop streaming, however, YARN is highly recommended for efficiency reason
--- a/multi-node/hadoop/mushroom.hadoop.conf
+++ b/multi-node/hadoop/mushroom.hadoop.conf
@ -1,36 +0,0 @@
-# General Parameters, see comment for each definition
-# choose the booster, can be gbtree or gblinear
-booster = gbtree
-# choose logistic regression loss function for binary classification
-objective = binary:logistic
-
-# Tree Booster Parameters
-# step size shrinkage
-eta = 1.0 
-# minimum loss reduction required to make a further partition
-gamma = 1.0 
-# minimum sum of instance weight(hessian) needed in a child
-min_child_weight = 1 
-# maximum depth of a tree
-max_depth = 3 
-
-# Task Parameters
-# the number of round to do boosting
-num_round = 2
-# 0 means do not save any model except the final round model
-save_period = 0 
-# evaluate on training data as well each round
-# eval_train = 1
-# The path of validation data, used to monitor training process, here [test] sets name of the validation set
-# eval[test] = "agaricus.txt.test"
-
-# Plz donot modify the following parameters
-# The path of training data, with prefix hdfs
-#data = hdfs:/data/
-# The path of model file
-#model_out = 
-# split pattern of xgboost
-dsplit = row
-# evaluate on training data as well each round
-eval_train = 1
-
--- a/multi-node/hadoop/run_mushroom.sh
+++ b/multi-node/hadoop/run_mushroom.sh
@ -1,28 +0,0 @@
-#!/bin/bash
-if [ "$#" -lt 3 ];
-then
-	echo "Usage: <nworkers> <nthreads> <path_in_HDFS>"
-	exit -1
-fi
-
-# put the local training file to HDFS
-hadoop fs -mkdir $3/data
-hadoop fs -put ../../demo/data/agaricus.txt.train $3/data
-hadoop fs -put ../../demo/data/agaricus.txt.test $3/data
-
-# running rabit, pass address in hdfs
-../../subtree/rabit/tracker/rabit_yarn.py  -n $1 --vcores $2 ../../xgboost mushroom.hadoop.conf nthread=$2\
-    data=hdfs://$3/data/agaricus.txt.train\
-    eval[test]=hdfs://$3/data/agaricus.txt.test\
-    model_out=hdfs://$3/mushroom.final.model
-
-# get the final model file
-hadoop fs -get $3/mushroom.final.model final.model
-
-# output prediction task=pred 
-../../xgboost mushroom.hadoop.conf task=pred model_in=final.model test:data=../../demo/data/agaricus.txt.test
-# print the boosters of final.model in dump.raw.txt
-../../xgboost mushroom.hadoop.conf task=dump model_in=final.model name_dump=dump.raw.txt
-# use the feature map in printing for better visualization
-../../xgboost mushroom.hadoop.conf task=dump model_in=final.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.txt
-cat dump.nice.txt
--- a/multi-node/row-split/README.md
+++ b/multi-node/row-split/README.md
@ -1,18 +0,0 @@
-Distributed XGBoost: Row Split Version
-====
-* You might be interested to checkout the [Hadoop example](../hadoop)
-* Machine Rabit: run ```bash machine-row-rabit.sh <n-mpi-process>```
-  - machine-col-rabit.sh starts xgboost job using rabit
-
-How to Use
-====
-* First split the data by rows
-* In the config, specify data file as containing a wildcard %d, where %d is the rank of the node, each node will load their part of data
-* Enable ow split mode by ```dsplit=row```
-
-Notes
-====
-* The code is multi-threaded, so you want to run one xgboost-mpi per node
-* Row-based solver split data by row, each node work on subset of rows, it uses an approximate histogram count algorithm,
-  and will only examine subset of potential split points as opposed to all split points.
-
--- a/multi-node/row-split/machine-row-rabit-mock.sh
+++ b/multi-node/row-split/machine-row-rabit-mock.sh
@ -1,20 +0,0 @@
-#!/bin/bash
-if [[ $# -ne 1 ]]
-then
-    echo "Usage: nprocess"
-    exit -1
-fi
-
-rm -rf train-machine.row* *.model
-k=$1
-# make machine data
-cd ../../demo/regression/
-python mapfeat.py
-python mknfold.py machine.txt 1
-cd -
-
-# split the lib svm file into k subfiles
-python splitrows.py ../../demo/regression/machine.txt.train train-machine $k
-
-# run xgboost mpi
-../../subtree/rabit/tracker/rabit_demo.py -n $k  ../../xgboost.mock machine-row.conf dsplit=row num_round=3 mock=1,1,1,0  mock=0,0,3,0 mock=2,2,3,0
--- a/multi-node/row-split/machine-row-rabit.sh
+++ b/multi-node/row-split/machine-row-rabit.sh
@ -1,24 +0,0 @@
-#!/bin/bash
-if [[ $# -ne 1 ]]
-then
-    echo "Usage: nprocess"
-    exit -1
-fi
-
-rm -rf train-machine.row* *.model
-k=$1
-# make machine data
-cd ../../demo/regression/
-python mapfeat.py
-python mknfold.py machine.txt 1
-cd -
-
-# split the lib svm file into k subfiles
-python splitrows.py ../../demo/regression/machine.txt.train train-machine $k
-
-# run xgboost mpi
-../../subtree/rabit/tracker/rabit_demo.py -n $k ../../xgboost machine-row.conf dsplit=row num_round=3 eval_train=1
-
-# run xgboost-mpi save model 0001, continue to run from existing model
-../../subtree/rabit/tracker/rabit_demo.py -n $k ../../xgboost machine-row.conf dsplit=row num_round=1
-../../subtree/rabit/tracker/rabit_demo.py -n $k ../../xgboost machine-row.conf dsplit=row num_round=2 model_in=0001.model
--- a/multi-node/row-split/machine-row.conf
+++ b/multi-node/row-split/machine-row.conf
@ -1,30 +0,0 @@
-# General Parameters, see comment for each definition
-# choose the tree booster, can also change to gblinear
-booster = gbtree
-# this is the only difference with classification, use reg:linear to do linear classification
-# when labels are in [0,1] we can also use reg:logistic
-objective = reg:linear
-
-# Tree Booster Parameters
-# step size shrinkage
-eta = 1.0 
-# minimum loss reduction required to make a further partition
-gamma = 1.0 
-# minimum sum of instance weight(hessian) needed in a child
-min_child_weight = 1 
-# maximum depth of a tree
-max_depth = 3 
-# Task parameters
-# the number of round to do boosting
-num_round = 2
-# 0 means do not save any model except the final round model
-save_period = 0 
-use_buffer = 0
-
-# The path of training data
-data = "train-machine.row%d" 
-# The path of validation data, used to monitor training process, here [test] sets name of the validation set
-eval[test] = "../../demo/regression/machine.txt.test" 
-# The path of test data 
-test:data = "../../demo/regression/machine.txt.test" 
-
--- a/multi-node/row-split/splitrows.py
+++ b/multi-node/row-split/splitrows.py
@ -1,24 +0,0 @@
-#!/usr/bin/python
-import sys
-import random
-
-# split libsvm file into different rows
-if len(sys.argv) < 4:
-    print ('Usage:<fin> <fo> k')
-    exit(0)
-
-random.seed(10)
-
-k = int(sys.argv[3])
-fi = open( sys.argv[1], 'r' )
-fos = []
-
-for i in range(k):
-    fos.append(open( sys.argv[2]+'.row%d' % i, 'w' ))
-    
-for l in open(sys.argv[1]):
-    i = random.randint(0, k-1)
-    fos[i].write(l)
-
-for f in fos:    
-    f.close()
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@ -206,6 +206,10 @@ class GBTree : public IGradBooster {
    for (size_t i = 0; i < trees.size(); ++i) {
      delete trees[i];
    }
+    for (size_t i = 0; i < updaters.size(); ++i) {
+      delete updaters[i];
+    }
+    updaters.clear();
    trees.clear();
    pred_buffer.clear();
    pred_counter.clear();
@ -444,12 +448,12 @@ class GBTree : public IGradBooster {
    int reserved[31];
    /*! \brief constructor */
    ModelParam(void) {
+      std::memset(this, 0, sizeof(ModelParam));
      num_trees = 0;
      num_roots = num_feature = 0;
      num_pbuffer = 0;
      num_output_group = 1;
      size_leaf_vector = 0;
-      std::memset(reserved, 0, sizeof(reserved));
    }
    /*!
     * \brief set parameters from outside
--- a/src/io/dmlc_simple.cpp
+++ b/src/io/dmlc_simple.cpp
@ -0,0 +1,127 @@
+#define _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_DEPRECATE
+#define NOMINMAX
+#include "../utils/io.h"
+
+// implements a single no split version of DMLC
+// in case we want to avoid dependency on dmlc-core
+
+namespace xgboost {
+namespace utils {
+class SingleFileSplit : public dmlc::InputSplit {
+ public:
+  explicit SingleFileSplit(const char *fname) 
+      : use_stdin_(false) {
+    if (!std::strcmp(fname, "stdin")) {
+#ifndef XGBOOST_STRICT_CXX98_
+      use_stdin_ = true; fp_ = stdin;
+#endif
+    }
+    if (!use_stdin_) {
+      fp_ = utils::FopenCheck(fname, "r");
+    }
+    end_of_file_ = false;
+  }
+  virtual ~SingleFileSplit(void) {
+    if (!use_stdin_) std::fclose(fp_);
+  }
+  virtual bool ReadRecord(std::string *out_data) {
+    if (end_of_file_) return false;
+    out_data->clear();
+    while (true) {
+      char c = std::fgetc(fp_);
+      if (c == EOF) {
+        end_of_file_ = true;
+      }
+      if (c != '\r' && c != '\n' && c != EOF) {
+        *out_data += c;
+      } else {
+        if (out_data->length() != 0) return true;
+        if (end_of_file_) return false;
+      }
+    }
+    return false;
+  }  
+    
+ private:
+  std::FILE *fp_;
+  bool use_stdin_;
+  bool end_of_file_;
+};
+
+class StdFile : public dmlc::Stream {
+ public:
+  explicit StdFile(const char *fname, const char *mode)
+      : use_stdio(false) {
+    using namespace std;
+#ifndef XGBOOST_STRICT_CXX98_
+    if (!strcmp(fname, "stdin")) {
+      use_stdio = true; fp = stdin;
+    }
+    if (!strcmp(fname, "stdout")) {
+      use_stdio = true; fp = stdout;
+    }
+#endif
+    if (!strncmp(fname, "file://", 7)) fname += 7;
+    if (!use_stdio) {
+      std::string flag = mode;
+      if (flag == "w") flag = "wb";
+      if (flag == "r") flag = "rb";
+      fp = utils::FopenCheck(fname, flag.c_str());
+      
+    }
+  }  
+  virtual ~StdFile(void) {
+    this->Close();
+  }
+  virtual size_t Read(void *ptr, size_t size) {
+    return std::fread(ptr, 1, size, fp);
+  }
+  virtual void Write(const void *ptr, size_t size) {
+    std::fwrite(ptr, size, 1, fp);
+  }
+  virtual void Seek(size_t pos) {
+    std::fseek(fp, static_cast<long>(pos), SEEK_SET);
+  }
+  virtual size_t Tell(void) {
+    return std::ftell(fp);
+  }
+  virtual bool AtEnd(void) const {
+    return std::feof(fp) != 0;
+  }
+  inline void Close(void) {
+    if (fp != NULL && !use_stdio) {
+      std::fclose(fp); fp = NULL;
+    }
+  }
+
+ private:
+  std::FILE *fp;
+  bool use_stdio;
+};
+}  // namespace utils
+}  // namespace xgboost
+
+namespace dmlc {
+InputSplit* InputSplit::Create(const char *uri,
+                               unsigned part,
+                               unsigned nsplit) {
+  using namespace xgboost;
+  const char *msg = "xgboost is compiled in local mode\n"\
+      "to use hdfs, s3 or distributed version, compile with make dmlc=1";
+  utils::Check(strncmp(uri, "s3://", 5) != 0, msg);
+  utils::Check(strncmp(uri, "hdfs://", 7) != 0, msg);
+  utils::Check(nsplit == 1, msg);
+  return new utils::SingleFileSplit(uri);
+}
+
+Stream *Stream::Create(const char *uri, const char * const flag) {
+  using namespace xgboost;
+  const char *msg = "xgboost is compiled in local mode\n"\
+      "to use hdfs, s3 or distributed version, compile with make dmlc=1";
+  utils::Check(strncmp(uri, "s3://", 5) != 0, msg);
+  utils::Check(strncmp(uri, "hdfs://", 7) != 0, msg);
+  return new utils::StdFile(uri, flag);
+}
+}  // namespace dmlc
+
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@ -16,7 +16,10 @@ namespace xgboost {
 namespace io {
 DataMatrix* LoadDataMatrix(const char *fname, bool silent,
                           bool savebuffer, bool loadsplit) {
-  if (!std::strcmp(fname, "stdin") || loadsplit) {
+  if (!std::strcmp(fname, "stdin") ||
+      !std::strncmp(fname, "s3://", 5) ||
+      !std::strncmp(fname, "hdfs://", 7) ||
+      loadsplit) {
    DMatrixSimple *dmat = new DMatrixSimple();
    dmat->LoadText(fname, silent, loadsplit);
    return dmat;
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@ -90,11 +90,11 @@ class DMatrixSimple : public DataMatrix {
      rank = rabit::GetRank();
      npart = rabit::GetWorldSize();
    }
-    rabit::io::InputSplit *in =
-        rabit::io::CreateInputSplit(uri, rank, npart);
+    dmlc::InputSplit *in =
+        dmlc::InputSplit::Create(uri, rank, npart);
    this->Clear();
    std::string line;
-    while (in->NextLine(&line)) {
+    while (in->ReadRecord(&line)) {
      float label;
      std::istringstream ss(line);
      std::vector<RowBatch::Entry> feats;
--- a/src/io/simple_fmatrix-inl.hpp
+++ b/src/io/simple_fmatrix-inl.hpp
@ -192,8 +192,10 @@ class FMatrixS : public IFMatrix{
    bst_omp_uint ncol = static_cast<bst_omp_uint>(this->NumCol());
    #pragma omp parallel for schedule(static)
    for (bst_omp_uint i = 0; i < ncol; ++i) {
-      std::sort(&col_data_[0] + col_ptr_[i],
-                &col_data_[0] + col_ptr_[i + 1], Entry::CmpValue);
+      if (col_ptr_[i] < col_ptr_[i + 1]) {
+        std::sort(BeginPtr(col_data_) + col_ptr_[i],
+                  BeginPtr(col_data_) + col_ptr_[i + 1], Entry::CmpValue);
+      }
    }
  }

--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@ -83,7 +83,15 @@ struct EvalLogLoss : public EvalEWiseBase<EvalLogLoss> {
    return "logloss";
  }
  inline static float EvalRow(float y, float py) {
-    return - y * std::log(py) - (1.0f - y) * std::log(1 - py);
+    const float eps = 1e-16f;
+    const float pneg = 1.0f - py;
+    if (py < eps) {
+      return -y * std::log(eps) - (1.0f - y)  * std::log(1.0f - eps); 
+    } else if (pneg < eps) {
+      return -y * std::log(1.0f - eps) - (1.0f - y)  * std::log(eps); 
+    } else {
+      return -y * std::log(py) - (1.0f - y) * std::log(pneg);
+    }
  }
 };

@ -111,17 +119,29 @@ struct EvalMClassBase : public IEvaluator {
    utils::Check(preds.size() % info.labels.size() == 0,
                 "label and prediction size not match");
    const size_t nclass = preds.size() / info.labels.size();
+    utils::Check(nclass > 1,
+                 "mlogloss and merror are only used for multi-class classification,"\
+                 " use logloss for binary classification");
    const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
-    
    float sum = 0.0, wsum = 0.0;
+    int label_error = 0;
    #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
    for (bst_omp_uint i = 0; i < ndata; ++i) {      
      const float wt = info.GetWeight(i);
+      int label =  static_cast<int>(info.labels[i]);
+      if (label >= 0 && label < static_cast<int>(nclass)) {
        sum += Derived::EvalRow(info.labels[i],
                                BeginPtr(preds) + i * nclass,
                                nclass) * wt;
        wsum += wt;
+      } else {
+        label_error = label;
      }
+    }
+    utils::Check(label_error >= 0 && label_error < static_cast<int>(nclass),
+                 "MultiClassEvaluation: label must be in [0, num_class)," \
+                 " num_class=%d but found %d in label",
+                 static_cast<int>(nclass), label_error);
    float dat[2]; dat[0] = sum, dat[1] = wsum;
    if (distributed) {
      rabit::Allreduce<rabit::op::Sum>(dat, 2);
@ -135,7 +155,7 @@ struct EvalMClassBase : public IEvaluator {
   * \param pred prediction value of current instance 
   * \param nclass number of class in the prediction
   */
-  inline static float EvalRow(float label,
+  inline static float EvalRow(int label,
                              const float *pred,
                              size_t nclass);
  /*! 
@ -146,13 +166,15 @@ struct EvalMClassBase : public IEvaluator {
  inline static float GetFinal(float esum, float wsum) {
    return esum / wsum;
  }
+  // used to store error message
+  const char *error_msg_;
 };
 /*! \brief match error */
 struct EvalMatchError : public EvalMClassBase<EvalMatchError> {
  virtual const char *Name(void) const {
    return "merror";
  }
-  inline static float EvalRow(float label,
+  inline static float EvalRow(int label,
                              const float *pred,
                              size_t nclass) {
    return FindMaxIndex(pred, nclass) != static_cast<int>(label);
@ -163,12 +185,11 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
  virtual const char *Name(void) const {
    return "mlogloss";
  }
-  inline static float EvalRow(float label,
+  inline static float EvalRow(int label,
                              const float *pred,
                              size_t nclass) {
    const float eps = 1e-16f;
    size_t k = static_cast<size_t>(label);
-    utils::Check(k < nclass, "mlogloss: label must be in [0, num_class)");
    if (pred[k] > eps) {
      return -std::log(pred[k]);
    } else {
--- a/src/learner/helper_utils.h
+++ b/src/learner/helper_utils.h
@ -43,6 +43,26 @@ inline static int FindMaxIndex(const std::vector<float>& rec) {
  return FindMaxIndex(BeginPtr(rec), rec.size());
 }

+// perform numerical safe logsum
+inline float LogSum(float x, float y) {
+  if (x < y) {
+    return y + std::log(std::exp(x - y) + 1.0f);
+  } else {
+    return x + std::log(std::exp(y - x) + 1.0f);
+  }
+}
+// numerical safe logsum
+inline float LogSum(const float *rec, size_t size) {
+  float mx = rec[0];
+  for (size_t i = 1; i < size; ++i) {
+    mx = std::max(mx, rec[i]);
+  }
+  float sum = 0.0f;
+  for (size_t i = 0; i < size; ++i) {
+    sum += std::exp(rec[i] - mx);
+  }
+  return mx + std::log(sum);  
+}

 inline static bool CmpFirst(const std::pair<float, unsigned> &a,
                            const std::pair<float, unsigned> &b) {
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@ -23,7 +23,7 @@ namespace learner {
 * \brief learner that takes do gradient boosting on specific objective functions
 *  and do training and prediction
 */
-class BoostLearner : public rabit::ISerializable {
+class BoostLearner : public rabit::Serializable {
 public:
  BoostLearner(void) {
    obj_ = NULL;
@ -163,34 +163,51 @@ class BoostLearner : public rabit::ISerializable {
                        bool calc_num_feature = true) {
    utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
                 "BoostLearner: wrong model format");
-    utils::Check(fi.Read(&name_obj_), "BoostLearner: wrong model format");
+    {
+      // backward compatibility code for compatible with old model type
+      // for new model, Read(&name_obj_) is suffice      
+      size_t len;
+      utils::Check(fi.Read(&len, sizeof(len)) != 0, "BoostLearner: wrong model format");
+      if (len >= std::numeric_limits<unsigned>::max()) {
+        int gap;
+        utils::Check(fi.Read(&gap, sizeof(gap)) != 0, "BoostLearner: wrong model format");
+        len = len >> 32UL;
+      }
+      if (len != 0) {
+        name_obj_.resize(len);
+        utils::Check(fi.Read(&name_obj_[0], len) != 0, "BoostLearner: wrong model format");
+      }
+    }
    utils::Check(fi.Read(&name_gbm_), "BoostLearner: wrong model format");
    // delete existing gbm if any
    if (obj_ != NULL) delete obj_;
    if (gbm_ != NULL) delete gbm_;
    this->InitTrainer(calc_num_feature);
    this->InitObjGBM();
+    char tmp[32];
+    utils::SPrintf(tmp, sizeof(tmp), "%u", mparam.num_class);
+    obj_->SetParam("num_class", tmp);
    gbm_->LoadModel(fi, with_pbuffer);
    if (!with_pbuffer || distributed_mode == 2) {
      gbm_->ResetPredBuffer(pred_buffer_size);
    }
  }
  // rabit load model from rabit checkpoint
-  virtual void Load(rabit::IStream &fi) {
+  virtual void Load(rabit::Stream *fi) {
    // for row split, we should not keep pbuffer
-    this->LoadModel(fi, distributed_mode != 2, false);
+    this->LoadModel(*fi, distributed_mode != 2, false);
  }
  // rabit save model to rabit checkpoint
-  virtual void Save(rabit::IStream &fo) const {
+  virtual void Save(rabit::Stream *fo) const {
    // for row split, we should not keep pbuffer
-    this->SaveModel(fo, distributed_mode != 2);
+    this->SaveModel(*fo, distributed_mode != 2);
  }
  /*!
   * \brief load model from file
   * \param fname file name
   */
  inline void LoadModel(const char *fname) {
-    utils::IStream *fi = rabit::io::CreateStream(fname, "r");
+    utils::IStream *fi = utils::IStream::Create(fname, "r");
    std::string header; header.resize(4);
    // check header for different binary encode
    // can be base64 or binary
@ -204,7 +221,7 @@ class BoostLearner : public rabit::ISerializable {
      this->LoadModel(*fi);
    } else {
      delete fi;
-      fi = rabit::io::CreateStream(fname, "r");
+      fi = utils::IStream::Create(fname, "r");
      this->LoadModel(*fi);
    }
    delete fi;   
@ -221,7 +238,7 @@ class BoostLearner : public rabit::ISerializable {
   * \param save_base64 whether save in base64 format
   */
  inline void SaveModel(const char *fname, bool save_base64 = false) const {
-    utils::IStream *fo = rabit::io::CreateStream(fname, "w");
+    utils::IStream *fo = utils::IStream::Create(fname, "w");
    if (save_base64 != 0 || !strcmp(fname, "stdout")) {
      fo->Write("bs64\t", 5);
      utils::Base64OutStream bout(fo);
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@ -82,11 +82,13 @@ struct LossType {
   * \return second order gradient
   */
  inline float SecondOrderGradient(float predt, float label) const {
+    // cap second order gradient to postive value
+    const float eps = 1e-16f;
    switch (loss_type) {
      case kLinearSquare: return 1.0f;
      case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
      case kLogisticClassify:
-      case kLogisticNeglik: return predt * (1 - predt);
+      case kLogisticNeglik: return std::max(predt * (1.0f - predt), eps);
      default: utils::Error("unknown loss_type"); return 0.0f;
    }
  }
@ -195,6 +197,7 @@ class SoftmaxMultiClassObj : public IObjFunction {
    gpair.resize(preds.size());
    const unsigned nstep = static_cast<unsigned>(info.labels.size() * nclass);
    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size() / nclass);
+    int label_error = 0;
    #pragma omp parallel
    {
      std::vector<float> rec(nclass);
@ -206,8 +209,9 @@ class SoftmaxMultiClassObj : public IObjFunction {
        Softmax(&rec);
        const unsigned j = i % nstep;
        int label = static_cast<int>(info.labels[j]);
-        utils::Check(label >= 0 && label < nclass,
-                     "SoftmaxMultiClassObj: label must be in [0, num_class)");
+        if (label < 0 || label >= nclass)  {
+          label_error = label; label = 0;
+        }
        const float wt = info.GetWeight(j);
        for (int k = 0; k < nclass; ++k) {
          float p = rec[k];
@ -220,6 +224,9 @@ class SoftmaxMultiClassObj : public IObjFunction {
        }
      }
    }
+    utils::Check(label_error >= 0 && label_error < nclass,
+                 "SoftmaxMultiClassObj: label must be in [0, num_class),"\
+                 " num_class=%d but found %d in label", nclass, label_error);
  }
  virtual void PredTransform(std::vector<float> *io_preds) {
    this->Transform(io_preds, output_prob);
--- a/src/sync/sync.h
+++ b/src/sync/sync.h
@ -7,7 +7,6 @@
 * \author Tianqi Chen
 */
 #include "../../subtree/rabit/include/rabit.h"
-#include "../../subtree/rabit/rabit-learn/io/io.h"
 #endif  // XGBOOST_SYNC_H_


--- a/src/tree/param.h
+++ b/src/tree/param.h
@ -28,6 +28,10 @@ struct TrainParam{
  float reg_alpha;
  // default direction choice
  int default_direction;  
+  // maximum delta update we can add in weight estimation
+  // this parameter can be used to stablize update
+  // default=0 means no constraint on weight delta
+  float max_delta_step;
  // whether we want to do subsample
  float subsample;
  // whether to subsample columns each split, in each level
@ -52,6 +56,7 @@ struct TrainParam{
    learning_rate = 0.3f;
    min_split_loss = 0.0f;
    min_child_weight = 1.0f;
+    max_delta_step = 0.0f;
    max_depth = 6;
    reg_lambda = 1.0f;
    reg_alpha = 0.0f;
@ -81,6 +86,7 @@ struct TrainParam{
    if (!strcmp(name, "learning_rate")) learning_rate = static_cast<float>(atof(val));
    if (!strcmp(name, "min_child_weight")) min_child_weight = static_cast<float>(atof(val));
    if (!strcmp(name, "min_split_loss")) min_split_loss = static_cast<float>(atof(val));
+    if (!strcmp(name, "max_delta_step")) max_delta_step = static_cast<float>(atof(val));
    if (!strcmp(name, "reg_lambda")) reg_lambda = static_cast<float>(atof(val));
    if (!strcmp(name, "reg_alpha")) reg_alpha = static_cast<float>(atof(val));
    if (!strcmp(name, "subsample")) subsample = static_cast<float>(atof(val));
@ -102,11 +108,21 @@ struct TrainParam{
  // calculate the cost of loss function
  inline double CalcGain(double sum_grad, double sum_hess) const {
    if (sum_hess < min_child_weight) return 0.0;
+    if (max_delta_step == 0.0f) {
      if (reg_alpha == 0.0f) {
        return Sqr(sum_grad) / (sum_hess + reg_lambda);
      } else {
        return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda); 
      }
+    } else {
+      double w = CalcWeight(sum_grad, sum_hess);
+      double ret = sum_grad * w + 0.5 * (sum_hess + reg_lambda) * Sqr(w);
+      if (reg_alpha == 0.0f) {
+        return - 2.0 * ret;
+      } else {
+        return - 2.0 * (ret + reg_alpha * std::abs(w));
+      }
+    }
  }
  // calculate cost of loss function with four stati
  inline double CalcGain(double sum_grad, double sum_hess,
@ -122,11 +138,17 @@ struct TrainParam{
  // calculate weight given the statistics
  inline double CalcWeight(double sum_grad, double sum_hess) const {
    if (sum_hess < min_child_weight) return 0.0;
+    double dw;
    if (reg_alpha == 0.0f) {
-      return -sum_grad / (sum_hess + reg_lambda);
+      dw = -sum_grad / (sum_hess + reg_lambda);
    } else {
-      return -ThresholdL1(sum_grad, reg_alpha) / (sum_hess + reg_lambda);
+      dw = -ThresholdL1(sum_grad, reg_alpha) / (sum_hess + reg_lambda);
    }
+    if (max_delta_step != 0.0f) {
+      if (dw > max_delta_step) dw = max_delta_step;
+      if (dw < -max_delta_step) dw = -max_delta_step;
+    }
+    return dw;
  }
  /*! \brief whether need forward small to big search: default right */
  inline bool need_forward_search(float col_density = 0.0f) const {
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@ -406,7 +406,8 @@ class ColMaker: public IUpdater {
        c.SetSubstract(snode[nid].stats, e.stats);
        if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
          bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
-          const float delta = d_step == +1 ? rt_eps : -rt_eps;
+          const float gap = std::abs(e.last_fvalue) + rt_eps;
+          const float delta = d_step == +1 ? gap: -gap;
          e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
        }
      }
@ -497,6 +498,9 @@ class ColMaker: public IUpdater {
      #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < ndata; ++i) {
        const bst_uint ridx = rowset[i];
+        if (ridx >= position.size()) {
+          utils::Printf("ridx exceed bound\n");
+        }
        const int nid = this->DecodePosition(ridx);
        if (tree[nid].is_leaf()) {
          // mark finish when it is not a fresh leaf
--- a/subtree/rabit/rabit-learn/io/base64-inl.h
+++ b/subtree/rabit/rabit-learn/io/base64-inl.h
@ -1,5 +1,5 @@
-#ifndef RABIT_LEARN_IO_BASE64_INL_H_
-#define RABIT_LEARN_IO_BASE64_INL_H_
+#ifndef XGBOOST_UTILS_BASE64_INL_H_
+#define XGBOOST_UTILS_BASE64_INL_H_
 /*!
 * \file base64.h
 * \brief data stream support to input and output from/to base64 stream
@ -9,10 +9,54 @@
 #include <cctype>
 #include <cstdio>
 #include "./io.h"
-#include "./buffer_reader-inl.h"

-namespace rabit {
-namespace io {
+namespace xgboost {
+namespace utils {
+/*! \brief buffer reader of the stream that allows you to get */
+class StreamBufferReader {
+ public:
+  StreamBufferReader(size_t buffer_size)
+      :stream_(NULL),
+       read_len_(1), read_ptr_(1) {
+    buffer_.resize(buffer_size);
+  }
+  /*!
+   * \brief set input stream
+   */
+  inline void set_stream(IStream *stream) {
+    stream_ = stream;
+    read_len_ = read_ptr_ = 1;
+  }
+  /*!
+   * \brief allows quick read using get char
+   */
+  inline char GetChar(void) {
+    while (true) {
+      if (read_ptr_ < read_len_) {
+        return buffer_[read_ptr_++];
+      } else {
+        read_len_ = stream_->Read(&buffer_[0], buffer_.length());
+        if (read_len_ == 0) return EOF;
+        read_ptr_ = 0;
+      }
+    }
+  }
+  /*! \brief whether we are reaching the end of file */
+  inline bool AtEnd(void) const {
+    return read_len_ == 0;
+  }
+  
+ private:
+  /*! \brief the underlying stream */
+  IStream *stream_;
+  /*! \brief buffer to hold data */
+  std::string buffer_;
+  /*! \brief length of valid data in buffer */
+  size_t read_len_;
+  /*! \brief pointer in the buffer */
+  size_t read_ptr_;
+};
+
 /*! \brief namespace of base64 decoding and encoding table */
 namespace base64 {
 const char DecodeTable[] = {
@ -209,9 +253,11 @@ class Base64OutStream: public IStream {
    if (out_buf.length() >= kBufferSize) Flush();
  }
  inline void Flush(void) {
-    fp->Write(BeginPtr(out_buf), out_buf.length());
+    if (out_buf.length() != 0) {
+      fp->Write(&out_buf[0], out_buf.length());
      out_buf.clear();
    }
+  }
 };
 }  // namespace utils
 }  // namespace rabit
--- a/src/utils/io.h
+++ b/src/utils/io.h
@ -14,12 +14,10 @@
 namespace xgboost {
 namespace utils {
 // reuse the definitions of streams
-typedef rabit::IStream IStream;
-typedef rabit::utils::ISeekStream ISeekStream;
+typedef rabit::Stream IStream;
+typedef rabit::utils::SeekStream ISeekStream;
 typedef rabit::utils::MemoryFixSizeBuffer MemoryFixSizeBuffer;
 typedef rabit::utils::MemoryBufferStream MemoryBufferStream;
-typedef rabit::io::Base64InStream Base64InStream;
-typedef rabit::io::Base64OutStream Base64OutStream;

 /*! \brief implementation of file i/o stream */
 class FileStream : public ISeekStream {
@ -54,4 +52,6 @@ class FileStream : public ISeekStream {
 };
 }  // namespace utils
 }  // namespace xgboost
+
+#include "./base64-inl.h"
 #endif
--- a/src/utils/quantile.h
+++ b/src/utils/quantile.h
@ -296,7 +296,17 @@ struct WXQSummary : public WQSummary<DType, RType> {
    }
    RType begin = src.data[0].rmax;
    size_t n = maxsize - 1, nbig = 0;
-    const RType range = src.data[src.size - 1].rmin - begin;
+    RType range = src.data[src.size - 1].rmin - begin;
+    // prune off zero weights 
+    if (range == 0.0f) {
+      // special case, contain only two effective data pts
+      this->data[0] = src.data[0];
+      this->data[1] = src.data[src.size - 1];
+      this->size = 2;
+      return;
+    } else {
+      range = std::max(range, static_cast<RType>(1e-3f));
+    }
    const RType chunk = 2 * range / n;
    // minimized range
    RType mrange = 0;
@ -316,7 +326,19 @@ struct WXQSummary : public WQSummary<DType, RType> {
        mrange += src.data[src.size-1].rmax_prev() - src.data[bid].rmin_next();
      }
    }
-    utils::Assert(nbig < n - 1, "too many large chunk");
+    if (nbig >= n - 1) {
+      // see what was the case
+      fprintf(stderr, "LOG: check quantile stats, nbig=%lu, n=%lu\n", nbig, n);
+      fprintf(stderr, "LOG: srcsize=%lu, maxsize=%lu, range=%g, chunk=%g\n",
+              src.size, maxsize, static_cast<double>(range),
+              static_cast<double>(chunk));      
+      for (size_t i = 0; i < src.size; ++i) {
+        printf("[%lu] rmin=%g, rmax=%g, wmin=%g, v=%g, isbig=%d\n", i,
+               src.data[i].rmin, src.data[i].rmax,  src.data[i].wmin,
+               src.data[i].value, CheckLarge(src.data[i], chunk));
+      }
+      utils::Assert(nbig < n - 1, "quantile: too many large chunk");
+    }
    this->data[0] = src.data[0];
    this->size = 1;
    // use smaller size
@ -619,6 +641,7 @@ class QuantileSketchTemplate {
   * \param x the elemented added to the sketch
   */
  inline void Push(DType x, RType w = 1) {
+    if (w == static_cast<RType>(0)) return;
    if (inqueue.qtail == inqueue.queue.size()) {
      // jump from lazy one value to limit_size * 2
      if (inqueue.queue.size() == 1) {
--- a/subtree/rabit/README.md
+++ b/subtree/rabit/README.md
@ -5,7 +5,8 @@ rabit is a light weight library that provides a fault tolerant interface of Allr
 * [Tutorial](guide)
 * [API Documentation](http://homes.cs.washington.edu/~tqchen/rabit/doc)
 * You can also directly read the [interface header](include/rabit.h)
-* [Machine Learning Tools](rabit-learn)
+* [Distributed Machine Learning Tools](https://github.com/dmlc/wormhole)
+  - Rabit is one of the backbone library to support wormhole machine learning tools

 Features
 ====
@ -33,5 +34,4 @@ Contributing
 Rabit is an open-source library, contributions are welcomed, including:
 * The rabit core library.
 * Customized tracker script for new platforms and interface of new languages.
-* Toolkits, benchmarks, resource (links to related repos).
 * Tutorial and examples about the library.
--- a/subtree/rabit/doc/Doxyfile
+++ b/subtree/rabit/doc/Doxyfile
@ -95,7 +95,7 @@ WARN_LOGFILE           =
 #---------------------------------------------------------------------------
 # configuration options related to the input files
 #---------------------------------------------------------------------------
-INPUT                  =
+INPUT                  = . dmlc
 INPUT_ENCODING         = UTF-8
 FILE_PATTERNS          =
 RECURSIVE              = NO
--- a/subtree/rabit/guide/README.md
+++ b/subtree/rabit/guide/README.md
@ -151,7 +151,7 @@ This section trys to gives examples of different aspectes of rabit API.

 #### Structure of a Rabit Program
 The following code illustrates the common structure of a rabit program. This is an abstract example,
-you can also refer to [kmeans.cc](../rabit-learn/kmeans/kmeans.cc) for an example implementation of kmeans algorithm.
+you can also refer to [wormhole](https://github.com/dmlc/wormhole/blob/master/learn/kmeans/kmeans.cc) for an example implementation of kmeans algorithm.

 ```c++
 #include <rabit.h>
--- a/subtree/rabit/include/dmlc/README.md
+++ b/subtree/rabit/include/dmlc/README.md
@ -0,0 +1,4 @@
+This folder is part of dmlc-core library, this allows rabit to use unified stream interface with other dmlc projects.
+
+- Since it is only interface dependency DMLC core is not required to compile rabit
+- To compile project that uses dmlc-core functions, link to libdmlc.a (provided by dmlc-core) will be required.
--- a/subtree/rabit/include/dmlc/io.h
+++ b/subtree/rabit/include/dmlc/io.h
@ -0,0 +1,333 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file io.h
+ * \brief defines serializable interface of dmlc
+ */
+#ifndef DMLC_IO_H_
+#define DMLC_IO_H_
+#include <cstdio>
+#include <string>
+#include <vector>
+#include <istream>
+#include <ostream>
+#include <streambuf>
+#include <cassert>
+
+/*! \brief namespace for dmlc */
+namespace dmlc {
+/*!
+ * \brief interface of stream I/O for serialization
+ */
+class Stream {
+ public:
+  /*!
+   * \brief reads data from a stream
+   * \param ptr pointer to a memory buffer
+   * \param size block size
+   * \return the size of data read
+   */
+  virtual size_t Read(void *ptr, size_t size) = 0;
+  /*!
+   * \brief writes data to a stream
+   * \param ptr pointer to a memory buffer
+   * \param size block size
+   */
+  virtual void Write(const void *ptr, size_t size) = 0;
+  /*! \brief virtual destructor */
+  virtual ~Stream(void) {}
+  /*!
+   * \brief generic factory function
+   *    create an stream, the stream will close the underlying files
+   *    upon deletion
+   * \param uri the uri of the input currently we support
+   *            hdfs://, s3://, and file:// by default file:// will be used
+   * \param flag can be "w", "r", "a"
+   */
+  static Stream *Create(const char *uri, const char* const flag);
+  // helper functions to write/read different data structures
+  /*!
+   * \brief writes a vector
+   * \param vec vector to be written/serialized
+   */
+  template<typename T>
+  inline void Write(const std::vector<T> &vec);
+  /*!
+   * \brief loads a vector
+   * \param out_vec vector to be loaded/deserialized
+   * \return whether the load was successful
+   */
+  template<typename T>
+  inline bool Read(std::vector<T> *out_vec);
+  /*!
+   * \brief writes a string
+   * \param str the string to be written/serialized
+   */ 
+  inline void Write(const std::string &str);
+  /*!
+   * \brief loads a string
+   * \param out_str string to be loaded/deserialized
+   * \return whether the load/deserialization was successful
+   */
+  inline bool Read(std::string *out_str);
+};
+
+/*! \brief interface of i/o stream that support seek */
+class SeekStream: public Stream {
+ public:
+  // virtual destructor
+  virtual ~SeekStream(void) {}
+  /*! \brief seek to certain position of the file */
+  virtual void Seek(size_t pos) = 0;
+  /*! \brief tell the position of the stream */
+  virtual size_t Tell(void) = 0;
+  /*! \return whether we are at end of file */
+  virtual bool AtEnd(void) const = 0;  
+};
+
+/*! \brief interface for serializable objects */
+class Serializable {
+ public:
+  /*! 
+  * \brief load the model from a stream
+  * \param fi stream where to load the model from
+  */
+  virtual void Load(Stream *fi) = 0;
+  /*! 
+  * \brief saves the model to a stream
+  * \param fo stream where to save the model to
+  */
+  virtual void Save(Stream *fo) const = 0;
+};
+
+/*!
+ * \brief input split header, used to create input split on input dataset
+ * this class can be used to obtain filesystem invariant splits from input files
+ */
+class InputSplit {
+ public:
+  /*!
+   * \brief read next record, store into out_data
+   *   the data in outcomming record depends on the input data format
+   *   if input is text data, each line is returned as a record (\n not included)
+   *   if input is recordio, each record is returned
+   * \param out_data the string that stores the line data, \n is not included
+   * \return true of next line was found, false if we read all the lines
+   */
+  virtual bool ReadRecord(std::string *out_data) = 0;
+  /*! \brief destructor*/
+  virtual ~InputSplit(void) {}  
+  /*!
+   * \brief factory function:
+   *  create input split given a uri
+   * \param uri the uri of the input, can contain hdfs prefix
+   * \param part_index the part id of current input
+   * \param num_parts total number of splits
+   */
+  static InputSplit* Create(const char *uri,
+                            unsigned part_index,
+                            unsigned num_parts);
+};
+
+/*!
+ * \brief a std::ostream class that can can wrap Stream objects,
+ *  can use ostream with that output to underlying Stream
+ *
+ * Usage example:
+ * \code
+ *
+ *   Stream *fs = Stream::Create("hdfs:///test.txt", "w");
+ *   dmlc::ostream os(fs);
+ *   os << "hello world" << std::endl;
+ *   delete fs;
+ * \endcode
+ */
+class ostream : public std::basic_ostream<char> {
+ public:
+  /*!
+   * \brief construct std::ostream type
+   * \param stream the Stream output to be used
+   * \param buffer_size internal streambuf size
+   */
+  explicit ostream(Stream *stream,
+                   size_t buffer_size = 1 << 10)
+      : std::basic_ostream<char>(NULL), buf_(buffer_size) {
+    this->set_stream(stream);
+  }
+  // explictly synchronize the buffer
+  virtual ~ostream() {
+    buf_.pubsync();
+  }
+  /*!
+   * \brief set internal stream to be stream, reset states
+   * \param stream new stream as output
+   */
+  inline void set_stream(Stream *stream) {
+    buf_.set_stream(stream);
+    this->rdbuf(&buf_);
+  }
+  
+ private:
+  // internal streambuf
+  class OutBuf : public std::streambuf {
+   public:
+    explicit OutBuf(size_t buffer_size)
+        : stream_(NULL), buffer_(buffer_size) {
+      assert(buffer_.size() > 0); 
+    }
+    // set stream to the buffer
+    inline void set_stream(Stream *stream);
+    
+   private:
+    /*! \brief internal stream by StreamBuf */
+    Stream *stream_;
+    /*! \brief internal buffer */
+    std::vector<char> buffer_;
+    // override sync
+    inline int_type sync(void);
+    // override overflow
+    inline int_type overflow(int c);
+  };
+  /*! \brief buffer of the stream */
+  OutBuf buf_;
+};
+
+/*!
+ * \brief a std::istream class that can can wrap Stream objects,
+ *  can use istream with that output to underlying Stream
+ *
+ * Usage example:
+ * \code
+ *
+ *   Stream *fs = Stream::Create("hdfs:///test.txt", "r");
+ *   dmlc::istream is(fs);
+ *   is >> mydata;
+ *   delete fs;
+ * \endcode
+ */
+class istream : public std::basic_istream<char> {
+ public:
+  /*!
+   * \brief construct std::ostream type
+   * \param stream the Stream output to be used
+   * \param buffer_size internal buffer size
+   */
+  explicit istream(Stream *stream,
+                   size_t buffer_size = 1 << 10)                   
+      : std::basic_istream<char>(NULL), buf_(buffer_size) {
+    this->set_stream(stream);
+  }
+  virtual ~istream() {}
+  /*!
+   * \brief set internal stream to be stream, reset states
+   * \param stream new stream as output
+   */
+  inline void set_stream(Stream *stream) {
+    buf_.set_stream(stream);
+    this->rdbuf(&buf_);
+  }
+  
+ private:
+  // internal streambuf
+  class InBuf : public std::streambuf {
+   public:
+    explicit InBuf(size_t buffer_size)
+        : stream_(NULL), buffer_(buffer_size) {
+      assert(buffer_.size() > 0);
+    }
+    // set stream to the buffer
+    inline void set_stream(Stream *stream);
+    
+   private:
+    /*! \brief internal stream by StreamBuf */
+    Stream *stream_;
+    /*! \brief internal buffer */
+    std::vector<char> buffer_;
+    // override underflow
+    inline int_type underflow();
+  };
+  /*! \brief input buffer */
+  InBuf buf_;
+};
+
+// implementations of inline functions
+template<typename T>
+inline void Stream::Write(const std::vector<T> &vec) {
+  size_t sz = vec.size();
+  this->Write(&sz, sizeof(sz));
+  if (sz != 0) {
+    this->Write(&vec[0], sizeof(T) * sz);
+  }
+}
+template<typename T>
+inline bool Stream::Read(std::vector<T> *out_vec) {
+  size_t sz;
+  if (this->Read(&sz, sizeof(sz)) == 0) return false;
+  out_vec->resize(sz);
+  if (sz != 0) {
+    if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false;
+  }
+  return true;
+}
+inline void Stream::Write(const std::string &str) {
+  size_t sz = str.length();
+  this->Write(&sz, sizeof(sz));
+  if (sz != 0) {
+    this->Write(&str[0], sizeof(char) * sz);
+  }
+}
+inline bool Stream::Read(std::string *out_str) {
+  size_t sz;
+  if (this->Read(&sz, sizeof(sz)) == 0) return false;
+  out_str->resize(sz);
+  if (sz != 0) {
+    if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// implementations for ostream
+inline void ostream::OutBuf::set_stream(Stream *stream) {
+  if (stream_ != NULL) this->pubsync();
+  this->stream_ = stream;
+  this->setp(&buffer_[0], &buffer_[0] + buffer_.size() - 1);
+}
+inline int ostream::OutBuf::sync(void) {
+  if (stream_ == NULL) return -1;
+  std::ptrdiff_t n = pptr() - pbase();
+  stream_->Write(pbase(), n);
+  this->pbump(-n);
+  return 0;
+}
+inline int ostream::OutBuf::overflow(int c) {
+  *(this->pptr()) = c;
+  std::ptrdiff_t n = pptr() - pbase();
+  this->pbump(-n);
+  if (c == EOF) {
+    stream_->Write(pbase(), n);
+  } else {
+    stream_->Write(pbase(), n + 1);
+  }
+  return c;
+}
+
+// implementations for istream
+inline void istream::InBuf::set_stream(Stream *stream) {
+  stream_ = stream;
+  this->setg(&buffer_[0], &buffer_[0], &buffer_[0]);  
+}
+inline int istream::InBuf::underflow() {
+  char *bhead = &buffer_[0];
+  if (this->gptr() == this->egptr()) {
+    size_t sz = stream_->Read(bhead, buffer_.size());
+    this->setg(bhead, bhead, bhead + sz);
+  }
+  if (this->gptr() == this->egptr()) {
+    return traits_type::eof();
+  } else {
+    return traits_type::to_int_type(*gptr());
+  }
+}
+}  // namespace dmlc
+#endif  // DMLC_IO_H_
--- a/subtree/rabit/include/rabit.h
+++ b/subtree/rabit/include/rabit.h
@ -16,7 +16,7 @@
 #if __cplusplus >= 201103L
 #include <functional>
 #endif  // C++11
-// contains definition of ISerializable
+// contains definition of Serializable
 #include "./rabit_serializable.h"
 // engine definition of rabit, defines internal implementation
 // to use rabit interface, there is no need to read engine.h
@ -183,8 +183,8 @@ inline void Allreduce(DType *sendrecvbuf, size_t count,
 *
 * \sa CheckPoint, VersionNumber
 */
-inline int LoadCheckPoint(ISerializable *global_model,
-                          ISerializable *local_model = NULL);
+inline int LoadCheckPoint(Serializable *global_model,
+                          Serializable *local_model = NULL);
 /*!
 * \brief checkpoints the model, meaning a stage of execution has finished.
 *  every time we call check point, a version number will be increased by one
@ -199,8 +199,8 @@ inline int LoadCheckPoint(ISerializable *global_model,
   *       So, only CheckPoint with the global_model if possible
   * \sa LoadCheckPoint, VersionNumber
   */
-inline void CheckPoint(const ISerializable *global_model,
-                       const ISerializable *local_model = NULL);
+inline void CheckPoint(const Serializable *global_model,
+                       const Serializable *local_model = NULL);
 /*!
 * \brief This function can be used to replace CheckPoint for global_model only,
 *   when certain condition is met (see detailed explanation).
@ -222,7 +222,7 @@ inline void CheckPoint(const ISerializable *global_model,
 *   is the same in every node
 * \sa LoadCheckPoint, CheckPoint, VersionNumber
 */
-inline void LazyCheckPoint(const ISerializable *global_model);
+inline void LazyCheckPoint(const Serializable *global_model);
 /*!
 * \return version number of the current stored model,
 *         which means how many calls to CheckPoint we made so far
--- a/subtree/rabit/include/rabit/engine.h
+++ b/subtree/rabit/include/rabit/engine.h
@ -94,8 +94,8 @@ class IEngine {
   *
   * \sa CheckPoint, VersionNumber
   */
-  virtual int LoadCheckPoint(ISerializable *global_model,
-                             ISerializable *local_model = NULL) = 0;
+  virtual int LoadCheckPoint(Serializable *global_model,
+                             Serializable *local_model = NULL) = 0;
  /*!
   * \brief checkpoints the model, meaning a stage of execution was finished
   *  every time we call check point, a version number increases by ones
@ -112,8 +112,8 @@ class IEngine {
   *
   * \sa LoadCheckPoint, VersionNumber
   */
-  virtual void CheckPoint(const ISerializable *global_model,
-                          const ISerializable *local_model = NULL) = 0;
+  virtual void CheckPoint(const Serializable *global_model,
+                          const Serializable *local_model = NULL) = 0;
  /*!
   * \brief This function can be used to replace CheckPoint for global_model only,
   *   when certain condition is met (see detailed explanation).
@ -134,7 +134,7 @@ class IEngine {
   *   is the same in every node
   * \sa LoadCheckPoint, CheckPoint, VersionNumber
   */
-  virtual void LazyCheckPoint(const ISerializable *global_model) = 0;
+  virtual void LazyCheckPoint(const Serializable *global_model) = 0;
  /*!
   * \return version number of the current stored model,
   *         which means how many calls to CheckPoint we made so far
--- a/subtree/rabit/include/rabit/io.h
+++ b/subtree/rabit/include/rabit/io.h
@ -16,21 +16,10 @@

 namespace rabit {
 namespace utils {
-/*! \brief interface of i/o stream that support seek */
-class ISeekStream: public IStream {
- public:
-  // virtual destructor
-  virtual ~ISeekStream(void) {}
-  /*! \brief seek to certain position of the file */
-  virtual void Seek(size_t pos) = 0;
-  /*! \brief tell the position of the stream */
-  virtual size_t Tell(void) = 0;
-  /*! \return whether we are at end of file */
-  virtual bool AtEnd(void) const = 0;
-};
-
+/*! \brief re-use definition of dmlc::SeekStream */
+typedef dmlc::SeekStream SeekStream;
 /*! \brief fixed size memory buffer */
-struct MemoryFixSizeBuffer : public ISeekStream {
+struct MemoryFixSizeBuffer : public SeekStream {
 public:
  MemoryFixSizeBuffer(void *p_buffer, size_t buffer_size)
      : p_buffer_(reinterpret_cast<char*>(p_buffer)),
@ -72,7 +61,7 @@ struct MemoryFixSizeBuffer : public ISeekStream {
 };  // class MemoryFixSizeBuffer

 /*! \brief a in memory buffer that can be read and write as stream interface */
-struct MemoryBufferStream : public ISeekStream {
+struct MemoryBufferStream : public SeekStream {
 public:
  explicit MemoryBufferStream(std::string *p_buffer)
      : p_buffer_(p_buffer) {
--- a/subtree/rabit/include/rabit/rabit-inl.h
+++ b/subtree/rabit/include/rabit/rabit-inl.h
@ -178,17 +178,17 @@ inline void TrackerPrintf(const char *fmt, ...) {
 }
 #endif
 // load latest check point
-inline int LoadCheckPoint(ISerializable *global_model,
-                          ISerializable *local_model) {
+inline int LoadCheckPoint(Serializable *global_model,
+                          Serializable *local_model) {
  return engine::GetEngine()->LoadCheckPoint(global_model, local_model);
 }
 // checkpoint the model, meaning we finished a stage of execution
-inline void CheckPoint(const ISerializable *global_model,
-                       const ISerializable *local_model) {
+inline void CheckPoint(const Serializable *global_model,
+                       const Serializable *local_model) {
  engine::GetEngine()->CheckPoint(global_model, local_model);
 }
 // lazy checkpoint the model, only remember the pointer to global_model
-inline void LazyCheckPoint(const ISerializable *global_model) {
+inline void LazyCheckPoint(const Serializable *global_model) {
  engine::GetEngine()->LazyCheckPoint(global_model);
 }
 // return the version number of currently stored model
--- a/subtree/rabit/include/rabit_serializable.h
+++ b/subtree/rabit/include/rabit_serializable.h
@ -9,98 +9,19 @@
 #include <vector>
 #include <string>
 #include "./rabit/utils.h"
+#include "./dmlc/io.h"
+
 namespace rabit {
 /*!
- * \brief interface of stream I/O, used by ISerializable
- * \sa ISerializable
+ * \brief defines stream used in rabit 
+ * see definition of Stream in dmlc/io.h 
 */
-class IStream {
- public:
+typedef dmlc::Stream Stream;
 /*!
-   * \brief reads data from a stream
-   * \param ptr pointer to a memory buffer
-   * \param size block size
-   * \return the size of data read
+ * \brief defines serializable objects used in rabit 
+ * see definition of Serializable in dmlc/io.h 
 */
-  virtual size_t Read(void *ptr, size_t size) = 0;
-  /*!
-   * \brief writes data to a stream
-   * \param ptr pointer to a memory buffer
-   * \param size block size
-   */
-  virtual void Write(const void *ptr, size_t size) = 0;
-  /*! \brief virtual destructor */
-  virtual ~IStream(void) {}
+typedef dmlc::Serializable Serializable;

- public:
-  // helper functions to write/read different data structures
-  /*!
-   * \brief writes a vector
-   * \param vec vector to be written/serialized
-   */
-  template<typename T>
-  inline void Write(const std::vector<T> &vec) {
-    uint64_t sz = static_cast<uint64_t>(vec.size());
-    this->Write(&sz, sizeof(sz));
-    if (sz != 0) {
-      this->Write(&vec[0], sizeof(T) * sz);
-    }
-  }
-  /*!
-   * \brief loads a vector
-   * \param out_vec vector to be loaded/deserialized
-   * \return whether the load was successful
-   */
-  template<typename T>
-  inline bool Read(std::vector<T> *out_vec) {
-    uint64_t sz;
-    if (this->Read(&sz, sizeof(sz)) == 0) return false;
-    out_vec->resize(sz);
-    if (sz != 0) {
-      if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false;
-    }
-    return true;
-  }
-  /*!
-   * \brief writes a string
-   * \param str the string to be written/serialized
-   */ 
-  inline void Write(const std::string &str) {
-    uint64_t sz = static_cast<uint64_t>(str.length());
-    this->Write(&sz, sizeof(sz));
-    if (sz != 0) {
-      this->Write(&str[0], sizeof(char) * sz);
-    }
-  }
-  /*!
-   * \brief loads a string
-   * \param out_str string to be loaded/deserialized
-   * \return whether the load/deserialization was successful
-   */
-  inline bool Read(std::string *out_str) {
-    uint64_t sz;
-    if (this->Read(&sz, sizeof(sz)) == 0) return false;
-    out_str->resize(sz);
-    if (sz != 0) {
-      if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) return false;
-    }
-    return true;
-  }
-};
-
-/*! \brief interface for serializable objects */
-class ISerializable {
- public:
-  /*! 
-  * \brief load the model from a stream
-  * \param fi stream where to load the model from
-  */
-  virtual void Load(IStream &fi) = 0;
-  /*! 
-  * \brief saves the model to a stream
-  * \param fo stream where to save the model to
-  */
-  virtual void Save(IStream &fo) const = 0;
-};
 }  // namespace rabit
 #endif  // RABIT_RABIT_SERIALIZABLE_H_
--- a/subtree/rabit/rabit-learn/.gitignore
+++ b/subtree/rabit/rabit-learn/.gitignore
@ -1,2 +0,0 @@
-config.mk
-*.log
--- a/subtree/rabit/rabit-learn/README.md
+++ b/subtree/rabit/rabit-learn/README.md
@ -1,17 +0,0 @@
-Rabit-Learn
-====
-This folder contains implementation of distributed machine learning algorithm using rabit.
-It also contain links to the Machine Learning packages that uses rabit.
-
-* Contribution of toolkits, examples, benchmarks is more than welcomed!
-
-
-Toolkits
-====
-* [KMeans Clustering](kmeans)
-* [Linear and Logistic Regression](linear)  
-* [XGBoost: eXtreme Gradient Boosting](https://github.com/tqchen/xgboost/tree/master/multi-node)
-  - xgboost is a very fast boosted tree(also known as GBDT) library, that can run more than
-    10 times faster than existing packages
-  - Rabit carries xgboost to distributed enviroment, inheritating all the benefits of xgboost
-    single node version, and scale it to even larger problems
--- a/subtree/rabit/rabit-learn/data/README.md
+++ b/subtree/rabit/rabit-learn/data/README.md
@ -1,2 +0,0 @@
-This folder contains processed example dataset used by the demos.
-Copyright of the dataset belongs to the original copyright holder
--- a/subtree/rabit/rabit-learn/data/agaricus.txt.test
+++ b/subtree/rabit/rabit-learn/data/agaricus.txt.test
--- a/subtree/rabit/rabit-learn/data/agaricus.txt.train
+++ b/subtree/rabit/rabit-learn/data/agaricus.txt.train
--- a/subtree/rabit/rabit-learn/data/featmap.txt
+++ b/subtree/rabit/rabit-learn/data/featmap.txt
@ -1,126 +0,0 @@
-0	cap-shape=bell	i
-1	cap-shape=conical	i
-2	cap-shape=convex	i
-3	cap-shape=flat	i
-4	cap-shape=knobbed	i
-5	cap-shape=sunken	i
-6	cap-surface=fibrous	i
-7	cap-surface=grooves	i
-8	cap-surface=scaly	i
-9	cap-surface=smooth	i
-10	cap-color=brown	i
-11	cap-color=buff	i
-12	cap-color=cinnamon	i
-13	cap-color=gray	i
-14	cap-color=green	i
-15	cap-color=pink	i
-16	cap-color=purple	i
-17	cap-color=red	i
-18	cap-color=white	i
-19	cap-color=yellow	i
-20	bruises?=bruises	i
-21	bruises?=no	i
-22	odor=almond	i
-23	odor=anise	i
-24	odor=creosote	i
-25	odor=fishy	i
-26	odor=foul	i
-27	odor=musty	i
-28	odor=none	i
-29	odor=pungent	i
-30	odor=spicy	i
-31	gill-attachment=attached	i
-32	gill-attachment=descending	i
-33	gill-attachment=free	i
-34	gill-attachment=notched	i
-35	gill-spacing=close	i
-36	gill-spacing=crowded	i
-37	gill-spacing=distant	i
-38	gill-size=broad	i
-39	gill-size=narrow	i
-40	gill-color=black	i
-41	gill-color=brown	i
-42	gill-color=buff	i
-43	gill-color=chocolate	i
-44	gill-color=gray	i
-45	gill-color=green	i
-46	gill-color=orange	i
-47	gill-color=pink	i
-48	gill-color=purple	i
-49	gill-color=red	i
-50	gill-color=white	i
-51	gill-color=yellow	i
-52	stalk-shape=enlarging	i
-53	stalk-shape=tapering	i
-54	stalk-root=bulbous	i
-55	stalk-root=club	i
-56	stalk-root=cup	i
-57	stalk-root=equal	i
-58	stalk-root=rhizomorphs	i
-59	stalk-root=rooted	i
-60	stalk-root=missing	i
-61	stalk-surface-above-ring=fibrous	i
-62	stalk-surface-above-ring=scaly	i
-63	stalk-surface-above-ring=silky	i
-64	stalk-surface-above-ring=smooth	i
-65	stalk-surface-below-ring=fibrous	i
-66	stalk-surface-below-ring=scaly	i
-67	stalk-surface-below-ring=silky	i
-68	stalk-surface-below-ring=smooth	i
-69	stalk-color-above-ring=brown	i
-70	stalk-color-above-ring=buff	i
-71	stalk-color-above-ring=cinnamon	i
-72	stalk-color-above-ring=gray	i
-73	stalk-color-above-ring=orange	i
-74	stalk-color-above-ring=pink	i
-75	stalk-color-above-ring=red	i
-76	stalk-color-above-ring=white	i
-77	stalk-color-above-ring=yellow	i
-78	stalk-color-below-ring=brown	i
-79	stalk-color-below-ring=buff	i
-80	stalk-color-below-ring=cinnamon	i
-81	stalk-color-below-ring=gray	i
-82	stalk-color-below-ring=orange	i
-83	stalk-color-below-ring=pink	i
-84	stalk-color-below-ring=red	i
-85	stalk-color-below-ring=white	i
-86	stalk-color-below-ring=yellow	i
-87	veil-type=partial	i
-88	veil-type=universal	i
-89	veil-color=brown	i
-90	veil-color=orange	i
-91	veil-color=white	i
-92	veil-color=yellow	i
-93	ring-number=none	i
-94	ring-number=one	i
-95	ring-number=two	i
-96	ring-type=cobwebby	i
-97	ring-type=evanescent	i
-98	ring-type=flaring	i
-99	ring-type=large	i
-100	ring-type=none	i
-101	ring-type=pendant	i
-102	ring-type=sheathing	i
-103	ring-type=zone	i
-104	spore-print-color=black	i
-105	spore-print-color=brown	i
-106	spore-print-color=buff	i
-107	spore-print-color=chocolate	i
-108	spore-print-color=green	i
-109	spore-print-color=orange	i
-110	spore-print-color=purple	i
-111	spore-print-color=white	i
-112	spore-print-color=yellow	i
-113	population=abundant	i
-114	population=clustered	i
-115	population=numerous	i
-116	population=scattered	i
-117	population=several	i
-118	population=solitary	i
-119	habitat=grasses	i
-120	habitat=leaves	i
-121	habitat=meadows	i
-122	habitat=paths	i
-123	habitat=urban	i
-124	habitat=waste	i
-125	habitat=woods	i
--- a/subtree/rabit/rabit-learn/io/buffer_reader-inl.h
+++ b/subtree/rabit/rabit-learn/io/buffer_reader-inl.h
@ -1,58 +0,0 @@
-#ifndef RABIT_LEARN_IO_BUFFER_READER_INL_H_
-#define RABIT_LEARN_IO_BUFFER_READER_INL_H_
-/*!
- * \file buffer_reader-inl.h
- * \brief implementation of stream buffer reader
- * \author Tianqi Chen
- */
-#include "./io.h"
-
-namespace rabit {
-namespace io {
-/*! \brief buffer reader of the stream that allows you to get */
-class StreamBufferReader {
- public:
-  StreamBufferReader(size_t buffer_size)
-      :stream_(NULL),
-       read_len_(1), read_ptr_(1) {
-    buffer_.resize(buffer_size);
-  }
-  /*!
-   * \brief set input stream
-   */
-  inline void set_stream(IStream *stream) {
-    stream_ = stream;
-    read_len_ = read_ptr_ = 1;
-  }
-  /*!
-   * \brief allows quick read using get char
-   */
-  inline char GetChar(void) {
-    while (true) {
-      if (read_ptr_ < read_len_) {
-        return buffer_[read_ptr_++];
-      } else {
-        read_len_ = stream_->Read(&buffer_[0], buffer_.length());
-        if (read_len_ == 0) return EOF;
-        read_ptr_ = 0;
-      }
-    }
-  }
-  /*! \brief whether we are reaching the end of file */
-  inline bool AtEnd(void) const {
-    return read_len_ == 0;
-  }
-  
- private:
-  /*! \brief the underlying stream */
-  IStream *stream_;
-  /*! \brief buffer to hold data */
-  std::string buffer_;
-  /*! \brief length of valid data in buffer */
-  size_t read_len_;
-  /*! \brief pointer in the buffer */
-  size_t read_ptr_;
-};
-}  // namespace io
-}  // namespace rabit
-#endif  // RABIT_LEARN_IO_BUFFER_READER_INL_H_
--- a/subtree/rabit/rabit-learn/io/file-inl.h
+++ b/subtree/rabit/rabit-learn/io/file-inl.h
@ -1,112 +0,0 @@
-#ifndef RABIT_LEARN_IO_FILE_INL_H_
-#define RABIT_LEARN_IO_FILE_INL_H_
-/*!
- * \file file-inl.h
- * \brief normal filesystem I/O
- * \author Tianqi Chen
- */
-#include <string>
-#include <vector>
-#include <cstdio>
-#include "./io.h"
-#include "./line_split-inl.h"
-
-/*! \brief io interface */
-namespace rabit {
-namespace io {
-/*! \brief implementation of file i/o stream */
-class FileStream : public utils::ISeekStream {
- public:
-  explicit FileStream(const char *fname, const char *mode)
-      : use_stdio(false) {
-    using namespace std;
-#ifndef RABIT_STRICT_CXX98_
-    if (!strcmp(fname, "stdin")) {
-      use_stdio = true; fp = stdin;
-    }
-    if (!strcmp(fname, "stdout")) {
-      use_stdio = true; fp = stdout;
-    }
-#endif
-    if (!strncmp(fname, "file://", 7)) fname += 7;
-    if (!use_stdio) {
-      std::string flag = mode;
-      if (flag == "w") flag = "wb";
-      if (flag == "r") flag = "rb";
-      fp = utils::FopenCheck(fname, flag.c_str());
-    }
-  }
-  virtual ~FileStream(void) {
-    this->Close();
-  }
-  virtual size_t Read(void *ptr, size_t size) {
-    return std::fread(ptr, 1, size, fp);
-  }
-  virtual void Write(const void *ptr, size_t size) {
-    std::fwrite(ptr, size, 1, fp);
-  }
-  virtual void Seek(size_t pos) {
-    std::fseek(fp, static_cast<long>(pos), SEEK_SET);
-  }
-  virtual size_t Tell(void) {
-    return std::ftell(fp);
-  }
-  virtual bool AtEnd(void) const {
-    return std::feof(fp) != 0;
-  }
-  inline void Close(void) {
-    if (fp != NULL && !use_stdio) {
-      std::fclose(fp); fp = NULL;
-    }
-  }
-
- private:
-  std::FILE *fp;
-  bool use_stdio;
-};
-
-/*! \brief line split from normal file system */
-class FileProvider : public LineSplitter::IFileProvider {
- public:
-  explicit FileProvider(const char *uri) {
-    LineSplitter::SplitNames(&fnames_, uri, "#");
-    std::vector<size_t> fsize;
-    for (size_t  i = 0; i < fnames_.size(); ++i) {
-      if (!std::strncmp(fnames_[i].c_str(), "file://", 7)) {
-        std::string tmp = fnames_[i].c_str() + 7;
-        fnames_[i] = tmp;        
-      }
-      size_t fz = GetFileSize(fnames_[i].c_str());
-      if (fz != 0) {
-        fsize_.push_back(fz);
-      }
-    }
-  }
-  // destrucor
-  virtual ~FileProvider(void) {}  
-  virtual utils::ISeekStream *Open(size_t file_index) {
-    utils::Assert(file_index < fnames_.size(), "file index exceed bound"); 
-    return new FileStream(fnames_[file_index].c_str(), "rb");
-  }
-  virtual const std::vector<size_t> &FileSize(void) const {
-    return fsize_;
-  }
- private:
-  // file sizes
-  std::vector<size_t> fsize_;
-  // file names
-  std::vector<std::string> fnames_;
-  // get file size
-  inline static size_t GetFileSize(const char *fname) {
-    std::FILE *fp = utils::FopenCheck(fname, "rb");
-    // NOTE: fseek may not be good, but serves as ok solution
-    std::fseek(fp, 0, SEEK_END);
-    size_t fsize = static_cast<size_t>(std::ftell(fp));
-    std::fclose(fp);
-    return fsize;
-  }
-};
-}  // namespace io
-}  // namespace rabit
-#endif  // RABIT_LEARN_IO_FILE_INL_H_
-
--- a/subtree/rabit/rabit-learn/io/hdfs-inl.h
+++ b/subtree/rabit/rabit-learn/io/hdfs-inl.h
@ -1,165 +0,0 @@
-#ifndef RABIT_LEARN_IO_HDFS_INL_H_
-#define RABIT_LEARN_IO_HDFS_INL_H_
-/*!
- * \file hdfs-inl.h
- * \brief HDFS I/O
- * \author Tianqi Chen
- */
-#include <string>
-#include <cstdlib>
-#include <vector>
-#include <hdfs.h>
-#include <errno.h>
-#include "./io.h"
-#include "./line_split-inl.h"
-
-/*! \brief io interface */
-namespace rabit {
-namespace io {
-class HDFSStream : public ISeekStream {
- public:
-  HDFSStream(hdfsFS fs,
-             const char *fname,
-             const char *mode,
-             bool disconnect_when_done)
-      : fs_(fs), at_end_(false),
-        disconnect_when_done_(disconnect_when_done) {
-    int flag = 0;
-    if (!strcmp(mode, "r")) {
-      flag = O_RDONLY;
-    } else if (!strcmp(mode, "w"))  {
-      flag = O_WRONLY;
-    } else if (!strcmp(mode, "a"))  {
-      flag = O_WRONLY | O_APPEND;
-    } else {
-      utils::Error("HDFSStream: unknown flag %s", mode);
-    }
-    fp_ = hdfsOpenFile(fs_, fname, flag, 0, 0, 0);
-    utils::Check(fp_ != NULL,
-                 "HDFSStream: fail to open %s", fname);
-  }
-  virtual ~HDFSStream(void) {
-    this->Close();
-    if (disconnect_when_done_) {
-      utils::Check(hdfsDisconnect(fs_) == 0, "hdfsDisconnect error");
-    }
-  }
-  virtual size_t Read(void *ptr, size_t size) {
-    tSize nread = hdfsRead(fs_, fp_, ptr, size);
-    if (nread == -1) {
-      int errsv = errno;
-      utils::Error("HDFSStream.Read Error:%s", strerror(errsv));
-    }
-    if (nread == 0) {
-      at_end_ = true;
-    }
-    return static_cast<size_t>(nread);
-  }
-  virtual void Write(const void *ptr, size_t size) {
-    const char *buf = reinterpret_cast<const char*>(ptr);
-    while (size != 0) {
-      tSize nwrite = hdfsWrite(fs_, fp_, buf, size);
-      if (nwrite == -1) {
-        int errsv = errno;
-        utils::Error("HDFSStream.Write Error:%s", strerror(errsv));
-      }
-      size_t sz = static_cast<size_t>(nwrite);
-      buf += sz; size -= sz;
-    }
-  }
-  virtual void Seek(size_t pos) {
-    if (hdfsSeek(fs_, fp_, pos) != 0) {
-      int errsv = errno;
-      utils::Error("HDFSStream.Seek Error:%s", strerror(errsv));
-    }
-  }
-  virtual size_t Tell(void) {
-    tOffset offset = hdfsTell(fs_, fp_);
-    if (offset == -1) {
-      int errsv = errno;
-      utils::Error("HDFSStream.Tell Error:%s", strerror(errsv));
-    }
-    return static_cast<size_t>(offset);
-  }
-  virtual bool AtEnd(void) const {
-    return at_end_;
-  }
-  inline void Close(void) {
-    if (fp_ != NULL) {
-      if (hdfsCloseFile(fs_, fp_) == -1) {
-        int errsv = errno;
-        utils::Error("HDFSStream.Close Error:%s", strerror(errsv));
-      }
-      fp_ = NULL;
-    }
-  }  
-  
-  inline static std::string GetNameNode(void) {
-    const char *nn = getenv("rabit_hdfs_namenode");
-    if (nn == NULL) {
-      return std::string("default");
-    } else {
-      return std::string(nn);
-    }
-  }
- private:
-  hdfsFS fs_;
-  hdfsFile fp_;
-  bool at_end_;
-  bool disconnect_when_done_;
-};
-
-/*! \brief line split from normal file system */
-class HDFSProvider : public LineSplitter::IFileProvider {
- public:
-  explicit HDFSProvider(const char *uri) {
-    fs_ = hdfsConnect(HDFSStream::GetNameNode().c_str(), 0);
-    utils::Check(fs_ != NULL, "error when connecting to default HDFS");
-    std::vector<std::string> paths;
-    LineSplitter::SplitNames(&paths, uri, "#");
-    // get the files
-    for (size_t  i = 0; i < paths.size(); ++i) {
-      hdfsFileInfo *info = hdfsGetPathInfo(fs_, paths[i].c_str());
-      utils::Check(info != NULL, "path %s do not exist", paths[i].c_str());
-      if (info->mKind == 'D') {
-        int nentry;
-        hdfsFileInfo *files = hdfsListDirectory(fs_, info->mName, &nentry);
-        utils::Check(files != NULL, "error when ListDirectory %s", info->mName);
-        for (int i = 0; i < nentry; ++i) {
-          if (files[i].mKind == 'F' && files[i].mSize != 0) {
-            fsize_.push_back(files[i].mSize);            
-            fnames_.push_back(std::string(files[i].mName));
-          }
-        }
-        hdfsFreeFileInfo(files, nentry);
-      } else {
-        if (info->mSize != 0) {
-          fsize_.push_back(info->mSize);
-          fnames_.push_back(std::string(info->mName));
-        }
-      }
-      hdfsFreeFileInfo(info, 1);
-    }
-  }
-  virtual ~HDFSProvider(void) {
-    utils::Check(hdfsDisconnect(fs_) == 0, "hdfsDisconnect error");
-  }  
-  virtual const std::vector<size_t> &FileSize(void) const {
-    return fsize_;
-  }
-  virtual ISeekStream *Open(size_t file_index) {
-    utils::Assert(file_index < fnames_.size(), "file index exceed bound"); 
-    return new HDFSStream(fs_, fnames_[file_index].c_str(), "r", false);
-  }
-  
- private:
-  // hdfs handle
-  hdfsFS fs_;
-  // file sizes
-  std::vector<size_t> fsize_;
-  // file names
-  std::vector<std::string> fnames_;
-};
-}  // namespace io
-}  // namespace rabit
-#endif  // RABIT_LEARN_IO_HDFS_INL_H_
--- a/subtree/rabit/rabit-learn/io/io-inl.h
+++ b/subtree/rabit/rabit-learn/io/io-inl.h
@ -1,68 +0,0 @@
-#ifndef RABIT_LEARN_IO_IO_INL_H_
-#define RABIT_LEARN_IO_IO_INL_H_
-/*!
- * \file io-inl.h
- * \brief Input/Output utils that handles read/write
- *        of files in distrubuted enviroment
- * \author Tianqi Chen
- */
-#include <cstring>
-
-#include "./io.h"
-#if RABIT_USE_HDFS
-#include "./hdfs-inl.h"
-#endif
-#include "./file-inl.h"
-
-namespace rabit {
-namespace io {
-/*!
- * \brief create input split given a uri
- * \param uri the uri of the input, can contain hdfs prefix
- * \param part the part id of current input
- * \param nsplit total number of splits
- */
-inline InputSplit *CreateInputSplit(const char *uri,
-                                    unsigned part,
-                                    unsigned nsplit) {
-  using namespace std;
-  if (!strcmp(uri, "stdin")) {
-    return new SingleFileSplit(uri);
-  }
-  if (!strncmp(uri, "file://", 7)) {
-    return new LineSplitter(new FileProvider(uri), part, nsplit);
-  }
-  if (!strncmp(uri, "hdfs://", 7)) {
-#if RABIT_USE_HDFS
-    return new LineSplitter(new HDFSProvider(uri), part, nsplit);
-#else
-    utils::Error("Please compile with RABIT_USE_HDFS=1");
-#endif
-  }
-  return new LineSplitter(new FileProvider(uri), part, nsplit);
-}
-/*!
- * \brief create an stream, the stream must be able to close
- *    the underlying resources(files) when deleted
- *
- * \param uri the uri of the input, can contain hdfs prefix
- * \param mode can be 'w' or 'r' for read or write
- */
-inline IStream *CreateStream(const char *uri, const char *mode) {
-  using namespace std;
-  if (!strncmp(uri, "file://", 7)) {
-    return new FileStream(uri + 7, mode);
-  }
-  if (!strncmp(uri, "hdfs://", 7)) {
-#if RABIT_USE_HDFS
-    return new HDFSStream(hdfsConnect(HDFSStream::GetNameNode().c_str(), 0),
-                          uri, mode, true);
-#else
-    utils::Error("Please compile with RABIT_USE_HDFS=1");
-#endif
-  }
-  return new FileStream(uri, mode);
-}
-}  // namespace io
-}  // namespace rabit
-#endif  // RABIT_LEARN_IO_IO_INL_H_
--- a/subtree/rabit/rabit-learn/io/io.h
+++ b/subtree/rabit/rabit-learn/io/io.h
@ -1,62 +0,0 @@
-#ifndef RABIT_LEARN_IO_IO_H_
-#define RABIT_LEARN_IO_IO_H_
-/*!
- * \file io.h
- * \brief Input/Output utils that handles read/write
- *        of files in distrubuted enviroment
- * \author Tianqi Chen
- */
-#include "../../include/rabit_serializable.h"
-
-/*! \brief whether compile with HDFS support */
-#ifndef RABIT_USE_HDFS
-#define RABIT_USE_HDFS 0
-#endif
-
-/*! \brief io interface */
-namespace rabit {
-/*!
- * \brief namespace to handle input split and filesystem interfacing
- */
-namespace io {
-/*! \brief reused ISeekStream's definition */
-typedef utils::ISeekStream ISeekStream;
-/*!
- * \brief user facing input split helper,
- *   can be used to get the partition of data used by current node
- */
-class InputSplit {
- public:
-  /*!
-   * \brief get next line, store into out_data
-   * \param out_data the string that stores the line data,
-   *        \n is not included
-   * \return true of next line was found, false if we read all the lines
-   */
-  virtual bool NextLine(std::string *out_data) = 0;
-  /*! \brief destructor*/
-  virtual ~InputSplit(void) {}
-};
-/*!
- * \brief create input split given a uri
- * \param uri the uri of the input, can contain hdfs prefix
- * \param part the part id of current input
- * \param nsplit total number of splits
- */
-inline InputSplit *CreateInputSplit(const char *uri,
-                                    unsigned part,
-                                    unsigned nsplit);
-/*!
- * \brief create an stream, the stream must be able to close
- *    the underlying resources(files) when deleted
- *
- * \param uri the uri of the input, can contain hdfs prefix
- * \param mode can be 'w' or 'r' for read or write
- */
-inline IStream *CreateStream(const char *uri, const char *mode);
-}  // namespace io
-}  // namespace rabit
-
-#include "./io-inl.h"
-#include "./base64-inl.h"
-#endif  // RABIT_LEARN_IO_IO_H_
--- a/subtree/rabit/rabit-learn/io/line_split-inl.h
+++ b/subtree/rabit/rabit-learn/io/line_split-inl.h
@ -1,206 +0,0 @@
-#ifndef RABIT_LEARN_IO_LINE_SPLIT_INL_H_
-#define RABIT_LEARN_IO_LINE_SPLIT_INL_H_
-/*!
- * \std::FILE line_split-inl.h
- * \brief base implementation of line-spliter
- * \author Tianqi Chen
- */
-#include <vector>
-#include <utility>
-#include <cstring>
-#include <string>
-#include "../../include/rabit.h"
-#include "./io.h"
-#include "./buffer_reader-inl.h"
-
-namespace rabit {
-namespace io {
-
-/*! \brief class that split the files by line */
-class LineSplitter : public InputSplit {
- public:
-  class IFileProvider {
-   public:
-    /*!
-     * \brief get the seek stream of given file_index
-     * \return the corresponding seek stream at head of the stream
-     *  the seek stream's resource can be freed by calling delete 
-     */
-    virtual ISeekStream *Open(size_t file_index) = 0;
-    /*!
-     * \return const reference to size of each files
-     */
-    virtual const std::vector<size_t> &FileSize(void) const = 0;
-    // virtual destructor
-    virtual ~IFileProvider() {}
-  };
-  // constructor
-  explicit LineSplitter(IFileProvider *provider,
-                         unsigned rank,
-                         unsigned nsplit)
-      : provider_(provider), fs_(NULL),
-        reader_(kBufferSize) {
-    this->Init(provider_->FileSize(), rank, nsplit);
-  }
-  // destructor
-  virtual ~LineSplitter() {
-    if (fs_ != NULL) {
-      delete fs_; fs_ = NULL;
-    }
-    // delete provider after destructing the streams
-    delete provider_;
-  }
-  // get next line
-  virtual bool NextLine(std::string *out_data) {
-    if (file_ptr_ >= file_ptr_end_ &&
-        offset_curr_ >= offset_end_) return false;
-    out_data->clear();
-    while (true) {
-      char c = reader_.GetChar();
-      if (reader_.AtEnd()) {
-        if (out_data->length() != 0) return true;
-        file_ptr_ += 1;
-        if (offset_curr_ >= offset_end_) return false;
-        if (offset_curr_ != file_offset_[file_ptr_]) {
-          utils::Error("warning: FILE size not calculated correctly\n");
-          offset_curr_ = file_offset_[file_ptr_];
-        }
-        utils::Assert(file_ptr_ + 1 < file_offset_.size(),
-                      "boundary check");
-        delete fs_;
-        fs_ = provider_->Open(file_ptr_);
-        reader_.set_stream(fs_);
-      } else {
-        ++offset_curr_;
-        if (c != '\r' && c != '\n' && c != EOF) {
-          *out_data += c;
-        } else {
-          if (out_data->length() != 0) return true;
-          if (file_ptr_ >= file_ptr_end_ &&
-              offset_curr_ >= offset_end_) return false;
-        }
-      }
-    }
-  }
-  /*!
-   * \brief split names given 
-   * \param out_fname output std::FILE names
-   * \param uri_ the iput uri std::FILE
-   * \param dlm deliminetr
-   */
-  inline static void SplitNames(std::vector<std::string> *out_fname,
-                                const char *uri_,
-                                const char *dlm) {
-    std::string uri = uri_;
-    char *p = std::strtok(BeginPtr(uri), dlm);
-    while (p != NULL) {
-      out_fname->push_back(std::string(p));
-      p = std::strtok(NULL, dlm);
-    }
-  }
-  
- private:
-  /*!
-   * \brief initialize the line spliter,
-   * \param file_size, size of each files
-   * \param rank the current rank of the data
-   * \param nsplit number of split we will divide the data into
-   */
-  inline void Init(const std::vector<size_t> &file_size,
-                   unsigned rank, unsigned nsplit) {
-    file_offset_.resize(file_size.size() + 1);
-    file_offset_[0] = 0;
-    for (size_t i = 0; i < file_size.size(); ++i) {
-      file_offset_[i + 1] = file_offset_[i] + file_size[i];
-    }
-    size_t ntotal = file_offset_.back();
-    size_t nstep = (ntotal + nsplit - 1) / nsplit;
-    offset_begin_ = std::min(nstep * rank, ntotal);
-    offset_end_ = std::min(nstep * (rank + 1), ntotal);    
-    offset_curr_ = offset_begin_;
-    if (offset_begin_ == offset_end_) return;
-    file_ptr_ = std::upper_bound(file_offset_.begin(),
-                                 file_offset_.end(),
-                                 offset_begin_) - file_offset_.begin() - 1;
-    file_ptr_end_ = std::upper_bound(file_offset_.begin(),
-                                     file_offset_.end(),
-                                     offset_end_) - file_offset_.begin() - 1;
-    fs_ = provider_->Open(file_ptr_);
-    reader_.set_stream(fs_);
-    // try to set the starting position correctly
-    if (file_offset_[file_ptr_] != offset_begin_) {
-      fs_->Seek(offset_begin_ - file_offset_[file_ptr_]);
-      while (true) {
-        char c = reader_.GetChar(); 
-        if (!reader_.AtEnd()) ++offset_curr_;
-        if (c == '\n' || c == '\r' || c == EOF) return;
-      }
-    }
-  }
-
- private:
-  /*! \brief FileProvider */
-  IFileProvider *provider_;
-  /*! \brief current input stream */
-  utils::ISeekStream *fs_;
-  /*! \brief file pointer of which file to read on */
-  size_t file_ptr_;
-  /*! \brief file pointer where the end of file lies */
-  size_t file_ptr_end_;
-  /*! \brief get the current offset */
-  size_t offset_curr_;
-  /*! \brief beginning of offset */
-  size_t offset_begin_;
-  /*! \brief end of the offset */
-  size_t offset_end_;
-  /*! \brief byte-offset of each file */
-  std::vector<size_t> file_offset_;
-  /*! \brief buffer reader */
-  StreamBufferReader reader_;
-  /*! \brief buffer size */
-  const static size_t kBufferSize = 256;  
-};
-
-/*! \brief line split from single std::FILE */
-class SingleFileSplit : public InputSplit {
- public:
-  explicit SingleFileSplit(const char *fname) {
-    if (!std::strcmp(fname, "stdin")) {
-#ifndef RABIT_STRICT_CXX98_
-      use_stdin_ = true; fp_ = stdin;
-#endif
-    }
-    if (!use_stdin_) {
-      fp_ = utils::FopenCheck(fname, "r");
-    }
-    end_of_file_ = false;
-  }
-  virtual ~SingleFileSplit(void) {
-    if (!use_stdin_) std::fclose(fp_);
-  }
-  virtual bool NextLine(std::string *out_data) {
-    if (end_of_file_) return false;
-    out_data->clear();
-    while (true) {
-      char c = std::fgetc(fp_);
-      if (c == EOF) {
-        end_of_file_ = true;
-      }
-      if (c != '\r' && c != '\n' && c != EOF) {
-        *out_data += c;
-      } else {
-        if (out_data->length() != 0) return true;
-        if (end_of_file_) return false;
-      }
-    }
-    return false;
-  }  
-    
- private:
-  std::FILE *fp_;
-  bool use_stdin_;
-  bool end_of_file_;
-};
-}  // namespace io
-}  // namespace rabit
-#endif  // RABIT_LEARN_IO_LINE_SPLIT_INL_H_
--- a/subtree/rabit/rabit-learn/kmeans/.gitignore
+++ b/subtree/rabit/rabit-learn/kmeans/.gitignore
@ -1,2 +0,0 @@
-kmeans
-*.mpi
--- a/subtree/rabit/rabit-learn/kmeans/Makefile
+++ b/subtree/rabit/rabit-learn/kmeans/Makefile
@ -1,15 +0,0 @@
-# specify tensor path
-BIN = kmeans.rabit 
-MOCKBIN= kmeans.mock
-MPIBIN = kmeans.mpi
-# objectives that makes up rabit library
-OBJ = kmeans.o
-
-# common build script for programs
-include ../make/common.mk
-
-# dependenies here
-kmeans.rabit: kmeans.o lib
-kmeans.mock: kmeans.o lib
-kmeans.mpi: kmeans.o libmpi
-kmeans.o: kmeans.cc ../../src/*.h
--- a/subtree/rabit/rabit-learn/kmeans/README.md
+++ b/subtree/rabit/rabit-learn/kmeans/README.md
@ -1,129 +0,0 @@
-Toolkit
-====
-This folder contains some example toolkits developed with rabit to help you get started. 
-
-KMeans
-====
-
-## Input File Format
-KMeans uses LIBSVM format to parse the input. If you are not familiar with LIBSVM, <a href="http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/">here</a> you will find more details. 
-
-The format is the following:
-
-&lt;label&gt; &lt;index1&gt;:&lt;value1&gt; &lt;index2&gt;:&lt;value2&gt; ...
-
-where label is a dummy integer value in this case (you can add 1's to every example), index&lt;x&gt; is the index for feature x, and value&lt;x&gt; is the feature x value.
-
-## Output File Format
-KMeans currently outputs the centroids as dense vectors. Each line in the output file corresponds to a centroid. The number of lines in the file must match the number of clusters K you specified in the command line.
-
-## Example
-
-Let's go over a more detailed example...
-
-#### Preprocess
-
-Download the smallwiki dataset used in the Machine Learning for Big Data class at University of Washington.
-
-http://courses.cs.washington.edu/courses/cse547/14wi/datasets/smallwiki.zip
-
-Unzip it, you should find three files:
-* tfidf.txt: each row is in the form of “docid||termid1:tfidf1,termid2:tfidf2,...
-* dictionary.txt: map of term to termid
-* cluster0.txt: initial cluster centers. Won't needed.
-
-The first thing to do is to convert the tfidf file format into the input format rabit supports, i.e. LIBSVM. For that, you can use a simple python script. The following should suffice. You should redirect the output to a file, let's say tfidf.libsvm.
-
-```python
-  for line in open("tfidf.txt").read().splitlines():
-    example = line.split('|')[1].split(',')
-    example = ' '.join(example)
-    print '%s %s' % (1, example)
-```
-#### Compile
-
-You will then need to build the KMeans program with ```make```, which will produce three binaries:
-
-* kmeans.mpi: runs on MPI.
-* kmeans.mock: uses a mock to simulate error conditions for testing purposes.
-* kmeans.rabit: uses our C++ implementation.
-
-#### Running with Hadoop
- 
-If you want to run it with Hadoop, you can execute the [./kmeans_hadoop.sh](./kmeans_hadoop.sh) script from your master node in cluster. 
-You will have to edit the file in order to specify the path to the Hadoop Streaming jar. Afterwards, you can execute it with the following arguments (in the exact same order):
-
-* number of worker nodes in your Hadoop cluster (i.e. number of slave nodes)
-* path to the input data (HDFS path where you put the preprocessed file in libsvm format)
-* number of clusters K (let's use 20 for this example)
-* number of iterations to perform (let's use just 5 iterations)
-* output path (HDFS path where to store the output data, must be a non-existent folder)
-
-The current implementation runs for the amount of iterations you specify in the command line argument. If you would like to add some convergence criteria (e.g. when no cluster assignment changes between iterations you stop or something like that) you will have to modify [./kmeans.cc](./kmeans.cc). We leave that as an exercise to the reader :)
-
-You may have noticed that [./kmeans_hadoop.sh](./kmeans_hadoop.sh) uses kmeans.rabit binary, but you can also use kmeans.mock in order to easily test your system behavior in presence of failures. More on that later.
-
-Don't forget to copy the preprocessed file into HDFS and create the output folder. For example, inside the bin folder in Hadoop, you can execute the following:
-
-```bash
-$ ./hadoop fs -mkdir kmeans
-$ ./hadoop fs -mkdir kmeans/in
-$ ./hadoop fs -put tfidf.libsvm kmeans/in
-$ ./hadoop fs -mkdir kmeans/out
-```
-
-#### Running with MPI
-
-You will need to have a MPI cluster installed, for example OpenMPI. In order to run the program, you can use mpirun to submit the job. This is a non-fault tolerant version as it is backed by MPI.
-
-
-#### Running with Mock
-
-As previously mentioned, you can execute the kmeans example, an any of your own, with the mock binary. This will allow you to test error conditions while you are developing your algorithms. As explained in the [Tutorial](../guide), passing the script certain parameters (e.g. mock=0,0,1,0) will cause certain node to exit after calling Allreduce/Broadcast in some iteration.
-
-You can also run this locally, you will only need to split the input file into several smaller files, each will be used by a particular process in the shared memory environment. You can use some Unix command line tool such as split.
-
-
-#### Processing Output
-
-Once the program finishes running, you can fetch the output from HDFS. For example, inside the bin folder in Hadoop, you can execute the following:
-
-```bash
-$ ./hadoop fs -get kmeans/out/part-00000 kmeans.out
-
-```
-
-Each line of the output file is a centroid in dense format. As this dataset contains the words in dictionary.txt file, you can do some simple post processing to recover the top 10 words of each centroid. Something like this should work:
-
-```python
-  words = {}
-  for line in open("dictionary.txt").read().splitlines():
-    word, index = line.split(' ')
-    words[int(index)] = word
-  
-  from collections import defaultdict
-  clusters = defaultdict(list)
-  cluster_name = 0
-  for line in open("kmeans.out").read().splitlines():
-    line = line.split(' ')
-    clusters[cluster_name].extend(line)
-    cluster_name+=1
-
-  import numpy as np
-  for j, key in enumerate(clusters):
-    elements = clusters[key]
-    array = np.array(elements).astype(np.float32)
-    idx = np.argsort(array)[::-1][:10]
-    ws = []
-    for i in idx:
-      ws.append(words[i])
-    print 'cluster %d = %s' % (j, ' '.join(ws))
-```
-
-
-
-
-
-
-
-
--- a/subtree/rabit/rabit-learn/kmeans/kmeans.cc
+++ b/subtree/rabit/rabit-learn/kmeans/kmeans.cc
@ -1,165 +0,0 @@
-// this is a test case to test whether rabit can recover model when 
-// facing an exception
-#include <rabit.h>
-#include <rabit/utils.h>
-#include <time.h>
-#include "../utils/data.h"
-
-using namespace rabit;
-
-// kmeans model
-class Model : public rabit::ISerializable {
- public:
-  // matrix of centroids
-  Matrix centroids;
-  // load from stream
-  virtual void Load(rabit::IStream &fi) {
-    fi.Read(&centroids.nrow, sizeof(centroids.nrow));
-    fi.Read(&centroids.ncol, sizeof(centroids.ncol));
-    fi.Read(&centroids.data);
-  }
-  /*! \brief save the model to the stream */
-  virtual void Save(rabit::IStream &fo) const {
-    fo.Write(&centroids.nrow, sizeof(centroids.nrow));
-    fo.Write(&centroids.ncol, sizeof(centroids.ncol));
-    fo.Write(centroids.data);
-  }
-  virtual void InitModel(unsigned num_cluster, unsigned feat_dim) {
-    centroids.Init(num_cluster, feat_dim);
-  }
-  // normalize L2 norm
-  inline void Normalize(void) {
-    for (size_t i = 0; i < centroids.nrow; ++i) {
-      float *row = centroids[i];
-      double wsum = 0.0;
-      for (size_t j = 0; j < centroids.ncol; ++j) {
-        wsum += row[j] * row[j];
-      }
-      wsum = sqrt(wsum);
-      if (wsum < 1e-6) return;
-      float winv = 1.0 / wsum;
-      for (size_t j = 0; j < centroids.ncol; ++j) {
-        row[j] *= winv;
-      }
-    }
-  }
-};
-inline void InitCentroids(const SparseMat &data, Matrix *centroids) {
-  int num_cluster = centroids->nrow; 
-  for (int i = 0; i < num_cluster; ++i) {
-    int index = Random(data.NumRow());
-    SparseMat::Vector v = data[index];
-    for (unsigned j = 0; j < v.length; ++j) {
-      (*centroids)[i][v[j].findex] = v[j].fvalue;
-    }
-  }
-  for (int i = 0; i < num_cluster; ++i) {
-    int proc = Random(rabit::GetWorldSize());
-    rabit::Broadcast((*centroids)[i], centroids->ncol * sizeof(float), proc);
-  }
-}
-
-inline double Cos(const float *row,
-                  const SparseMat::Vector &v) {
-  double rdot = 0.0, rnorm = 0.0; 
-  for (unsigned i = 0; i < v.length; ++i) {
-    rdot += row[v[i].findex] * v[i].fvalue;
-    rnorm += v[i].fvalue * v[i].fvalue;
-  }
-  return rdot  / sqrt(rnorm);
-}
-inline size_t GetCluster(const Matrix &centroids,
-                         const SparseMat::Vector &v) {
-  size_t imin = 0;
-  double dmin = Cos(centroids[0], v);
-  for (size_t k = 1; k < centroids.nrow; ++k) {
-    double dist = Cos(centroids[k], v);
-    if (dist > dmin) {
-      dmin = dist; imin = k;
-    }
-  }
-  return imin;
-}
-             
-int main(int argc, char *argv[]) {
-  if (argc < 5) {
-    // intialize rabit engine
-    rabit::Init(argc, argv);
-    if (rabit::GetRank() == 0) {
-      rabit::TrackerPrintf("Usage: <data_dir> num_cluster max_iter <out_model>\n");
-    }
-    rabit::Finalize();
-    return 0;
-  }
-  clock_t tStart = clock();
-
-  srand(0);
-  // load the data 
-  SparseMat data;
-  data.Load(argv[1]);
-  // set the parameters
-  int num_cluster = atoi(argv[2]);
-  int max_iter = atoi(argv[3]);
-  // intialize rabit engine
-  rabit::Init(argc, argv);
-  // load model
-  Model model; 
-  int iter = rabit::LoadCheckPoint(&model);
-  if (iter == 0) {
-    rabit::Allreduce<op::Max>(&data.feat_dim, 1);
-    model.InitModel(num_cluster, data.feat_dim);
-    InitCentroids(data, &model.centroids);
-    model.Normalize();
-    rabit::TrackerPrintf("[%d] start at %s\n",
-                         rabit::GetRank(), rabit::GetProcessorName().c_str());
-  } else {
-    rabit::TrackerPrintf("[%d] restart iter=%d\n", rabit::GetRank(), iter);    
-  }
-  const unsigned num_feat = data.feat_dim;
-  // matrix to store the result
-  Matrix temp;
-  for (int r = iter; r < max_iter; ++r) {    
-    temp.Init(num_cluster, num_feat + 1, 0.0f);    
-#if __cplusplus >= 201103L    
-    auto lazy_get_centroid = [&]()
-#endif
-    {
-      // lambda function used to calculate the data if necessary
-      // this function may not be called when the result can be directly recovered
-      const size_t ndata = data.NumRow();
-      for (size_t i = 0; i < ndata; ++i) {
-        SparseMat::Vector v = data[i];
-        size_t k = GetCluster(model.centroids, v);
-        // temp[k] += v
-        for (size_t j = 0; j < v.length; ++j) {
-          temp[k][v[j].findex] += v[j].fvalue;
-        }
-        // use last column to record counts
-        temp[k][num_feat] += 1.0f;
-      }
-    };
-    // call allreduce
-#if __cplusplus >= 201103L
-    rabit::Allreduce<op::Sum>(&temp.data[0], temp.data.size(), lazy_get_centroid);
-#else
-    rabit::Allreduce<op::Sum>(&temp.data[0], temp.data.size());
-#endif
-    // set number
-    for (int k = 0; k < num_cluster; ++k) {
-      float cnt = temp[k][num_feat];
-      utils::Check(cnt != 0.0f, "get zero sized cluster");
-      for (unsigned i = 0; i < num_feat; ++i) {
-        model.centroids[k][i] = temp[k][i] / cnt;
-      }
-    }
-    model.Normalize();
-    rabit::CheckPoint(&model);
-  }
-  // output the model file to somewhere
-  if (rabit::GetRank() == 0) {
-    model.centroids.Print(argv[4]);
-  }
-  rabit::TrackerPrintf("[%d] Time taken: %f seconds\n", rabit::GetRank(), static_cast<float>(clock() - tStart) / CLOCKS_PER_SEC);
-  rabit::Finalize();
-  return 0;
-}
--- a/subtree/rabit/rabit-learn/kmeans/kmeans_hadoop.sh
+++ b/subtree/rabit/rabit-learn/kmeans/kmeans_hadoop.sh
@ -1,9 +0,0 @@
-#!/bin/bash
-if [ "$#" -lt 5 ];
-then
-    echo "Usage: <nslaves> <input_data> <ncluster> <max_iteration> <output>"
-    exit -1
-fi
-#set path to hadoop streaming jar here
-STREAMING_JAR=
-python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -n $1 -i $2 -o $5  kmeans.rabit stdin $3 $4 stdout
--- a/subtree/rabit/rabit-learn/linear/.gitignore
+++ b/subtree/rabit/rabit-learn/linear/.gitignore
@ -1,2 +0,0 @@
-mushroom.row*
-*.model
--- a/subtree/rabit/rabit-learn/linear/Makefile
+++ b/subtree/rabit/rabit-learn/linear/Makefile
@ -1,21 +0,0 @@
-ifneq ("$(wildcard ../config.mk)","")
-	config = ../config.mk
-else
-	config = ../make/config.mk
-endif
-include $(config)
-
-BIN = linear.rabit
-MOCKBIN= linear.mock
-MPIBIN = 
-# objectives that makes up rabit library
-OBJ = linear.o
-
-# common build script for programs
-include ../make/common.mk
-CFLAGS+=-fopenmp
-linear.o: linear.cc ../../src/*.h linear.h ../solver/*.h
-# dependenies here
-linear.rabit: linear.o lib
-linear.mock: linear.o lib
-
--- a/subtree/rabit/rabit-learn/linear/README.md
+++ b/subtree/rabit/rabit-learn/linear/README.md
@ -1,48 +0,0 @@
-Linear and Logistic Regression
-====
-* input format: LibSVM
-* Local Example: [run-linear.sh](run-linear.sh)
-* Runnig on YARN: [run-yarn.sh](run-yarn.sh)
-  - You will need to have YARN 
-  - Modify  ```../make/config.mk``` to set USE_HDFS=1 to compile with HDFS support
-  - Run build.sh on [../../yarn](../../yarn) on to build yarn jar file 
-
-Multi-Threading Optimization
-====
-* The code can be  multi-threaded, we encourage you to use it
-  - Simply add ```nthread=k``` where k is the number of threads you want to use
-* If you submit with YARN 
-  - Use ```--vcores``` and ```-mem``` to request CPU and memory resources
-  - Some scheduler in YARN do not honor CPU request, you can request more memory to grab working slots
-* Usually multi-threading improves speed in general
-  - You can use less workers and assign more resources to each of worker
-  - This usually means less communication overhead and faster running time
-
-Parameters
-====
-All the parameters can be set by param=value
-
-#### Important Parameters
-* objective [default = logistic]
-  - can be linear or logistic
-* base_score [default = 0.5]
-  - global bias, recommended set to mean value of label
-* reg_L1 [default = 0]
-  - l1 regularization co-efficient
-* reg_L2 [default = 1]
-  - l2 regularization co-efficient
-* lbfgs_stop_tol [default = 1e-5]
-  - relative tolerance level of loss reduction with respect to initial loss
-* max_lbfgs_iter [default = 500]
-  - maximum number of lbfgs iterations
-
-### Optimization Related parameters
-* min_lbfgs_iter [default = 5]
-  - minimum number of lbfgs iterations
-* max_linesearch_iter [default = 100] 
-  - maximum number of iterations in linesearch
-* linesearch_c1 [default = 1e-4] 
-  - c1 co-efficient in backoff linesearch
-* linesarch_backoff [default = 0.5]
-  - backoff ratio in linesearch
- 
--- a/subtree/rabit/rabit-learn/linear/linear.cc
+++ b/subtree/rabit/rabit-learn/linear/linear.cc
@ -1,227 +0,0 @@
-#include "./linear.h"
-#include "../io/io.h"
-
-namespace rabit {
-namespace linear {
-class LinearObjFunction : public solver::IObjFunction<float> {
- public:
-  // training threads
-  int nthread;
-  // L2 regularization
-  float reg_L2;
-  // model
-  LinearModel model;
-  // training data
-  SparseMat dtrain;
-  // solver
-  solver::LBFGSSolver<float> lbfgs;
-  // constructor
-  LinearObjFunction(void) {
-    lbfgs.SetObjFunction(this);
-    nthread = 1;
-    reg_L2 = 0.0f;
-    model.weight = NULL;
-    task = "train";
-    model_in = "NULL";
-    name_pred = "pred.txt";
-    model_out = "final.model";
-  }
-  virtual ~LinearObjFunction(void) {
-  }
-  // set parameters
-  inline void SetParam(const char *name, const char *val) {
-    model.param.SetParam(name, val);
-    lbfgs.SetParam(name, val);
-    if (!strcmp(name, "num_feature")) {
-      char ndigit[30];
-      sprintf(ndigit, "%lu", model.param.num_feature + 1);
-      lbfgs.SetParam("num_dim", ndigit);
-    }
-    if (!strcmp(name, "reg_L2")) {
-      reg_L2 = static_cast<float>(atof(val));
-    }
-    if (!strcmp(name, "nthread")) {
-      nthread = atoi(val);
-    }
-    if (!strcmp(name, "task")) task = val;
-    if (!strcmp(name, "model_in")) model_in = val;
-    if (!strcmp(name, "model_out")) model_out = val;
-    if (!strcmp(name, "name_pred")) name_pred = val;
-  }
-  inline void Run(void) {
-    if (model_in != "NULL") {
-      this->LoadModel(model_in.c_str());
-    }
-    if (task == "train") {
-      lbfgs.Run();
-      if (rabit::GetRank() == 0) {
-        this->SaveModel(model_out.c_str(), lbfgs.GetWeight());
-      }
-    } else if (task == "pred") {
-      this->TaskPred();
-    } else {
-      utils::Error("unknown task=%s", task.c_str());
-    }
-  }
-  inline void TaskPred(void) {
-    utils::Check(model_in != "NULL",
-                 "must set model_in for task=pred");
-    FILE *fp = utils::FopenCheck(name_pred.c_str(), "w");
-    for (size_t i = 0; i < dtrain.NumRow(); ++i) {
-      float pred = model.Predict(dtrain[i]);
-      fprintf(fp, "%g\n", pred);
-    }
-    fclose(fp);
-    printf("Finishing writing to %s\n", name_pred.c_str());
-  }
-  inline void LoadModel(const char *fname) {
-    IStream *fi = io::CreateStream(fname, "r");
-    std::string header; header.resize(4);
-    // check header for different binary encode
-    // can be base64 or binary
-    utils::Check(fi->Read(&header[0], 4) != 0, "invalid model");
-    // base64 format
-    if (header == "bs64") {
-      io::Base64InStream bsin(fi);
-      bsin.InitPosition();
-      model.Load(bsin);
-    } else if (header == "binf") {
-      model.Load(*fi);
-    } else {
-      utils::Error("invalid model file");
-    }
-    delete fi;
-  }
-  inline void SaveModel(const char *fname,
-                        const float *wptr,
-                        bool save_base64 = false) {
-    IStream *fo = io::CreateStream(fname, "w");
-    if (save_base64 != 0 || !strcmp(fname, "stdout")) {
-      fo->Write("bs64\t", 5);
-      io::Base64OutStream bout(fo);
-      model.Save(bout, wptr);
-      bout.Finish('\n');
-    } else {
-      fo->Write("binf", 4);
-      model.Save(*fo, wptr);
-    }
-    delete fo;
-  }
-  inline void LoadData(const char *fname) {
-    dtrain.Load(fname);
-  }
-  virtual size_t InitNumDim(void)  {
-    if (model_in == "NULL") {
-      size_t ndim = dtrain.feat_dim;
-      rabit::Allreduce<rabit::op::Max>(&ndim, 1);
-      model.param.num_feature = std::max(ndim, model.param.num_feature);
-    }
-    return model.param.num_feature + 1;
-  }
-  virtual void InitModel(float *weight, size_t size) {
-    if (model_in == "NULL") {
-      memset(weight, 0.0f, size * sizeof(float));
-      model.param.InitBaseScore();
-    } else {
-      rabit::Broadcast(model.weight, size * sizeof(float), 0);
-      memcpy(weight, model.weight, size * sizeof(float));
-    }
-  }
-  // load model
-  virtual void Load(rabit::IStream &fi) {
-    fi.Read(&model.param, sizeof(model.param));
-  }
-  virtual void Save(rabit::IStream &fo) const {
-    fo.Write(&model.param, sizeof(model.param));
-  }
-  virtual double Eval(const float *weight, size_t size) {
-   if (nthread != 0) omp_set_num_threads(nthread);
-    utils::Check(size == model.param.num_feature + 1,
-                 "size consistency check");
-    double sum_val = 0.0;
-    #pragma omp parallel for schedule(static) reduction(+:sum_val)
-    for (size_t i = 0; i < dtrain.NumRow(); ++i) {
-      float py = model.param.PredictMargin(weight, dtrain[i]);
-      float fv = model.param.MarginToLoss(dtrain.labels[i], py);
-      sum_val += fv;
-    }
-    if (rabit::GetRank() == 0) {
-      // only add regularization once
-      if (reg_L2 != 0.0f) {
-        double sum_sqr = 0.0;
-        for (size_t i = 0; i < model.param.num_feature; ++i) {
-          sum_sqr += weight[i] * weight[i];
-        }
-        sum_val += 0.5 * reg_L2 * sum_sqr;        
-      }
-    }
-    utils::Check(!std::isnan(sum_val), "nan occurs");
-    return sum_val;
-  }
-  virtual void CalcGrad(float *out_grad,
-                        const float *weight,
-                        size_t size) {
-   if (nthread != 0) omp_set_num_threads(nthread);
-   utils::Check(size == model.param.num_feature + 1,
-                 "size consistency check");
-    memset(out_grad, 0.0f, sizeof(float) * size);
-    double sum_gbias = 0.0;    
-    #pragma omp parallel for schedule(static) reduction(+:sum_gbias)
-    for (size_t i = 0; i < dtrain.NumRow(); ++i) {
-      SparseMat::Vector v = dtrain[i];
-      float py = model.param.Predict(weight, v);
-      float grad = model.param.PredToGrad(dtrain.labels[i], py);
-      for (index_t j = 0; j < v.length; ++j) {
-        out_grad[v[j].findex] += v[j].fvalue * grad;
-      }
-      sum_gbias += grad;
-    }
-    out_grad[model.param.num_feature] = static_cast<float>(sum_gbias);
-    if (rabit::GetRank() == 0) {
-      // only add regularization once
-      if (reg_L2 != 0.0f) {
-        for (size_t i = 0; i < model.param.num_feature; ++i) {
-          out_grad[i] += reg_L2 * weight[i];
-        }
-      }
-    }
-  }
-    
- private:
-  std::string task;
-  std::string model_in;
-  std::string model_out;
-  std::string name_pred;
-};
-}  // namespace linear
-}  // namespace rabit
-
-int main(int argc, char *argv[]) {
-  if (argc < 2) {
-    // intialize rabit engine
-    rabit::Init(argc, argv);
-    if (rabit::GetRank() == 0) {
-      rabit::TrackerPrintf("Usage: <data_in> param=val\n");
-    }
-    rabit::Finalize();
-    return 0;
-  }
-  rabit::linear::LinearObjFunction *linear = new rabit::linear::LinearObjFunction();
-  if (!strcmp(argv[1], "stdin")) {
-    linear->LoadData(argv[1]);
-    rabit::Init(argc, argv);
-  } else {
-    rabit::Init(argc, argv);
-    linear->LoadData(argv[1]);
-  }
-  for (int i = 2; i < argc; ++i) {
-    char name[256], val[256];
-    if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
-      linear->SetParam(name, val);
-    }
-  }
-  linear->Run();
-  delete linear;
-  rabit::Finalize();
-  return 0;
-}
--- a/subtree/rabit/rabit-learn/linear/linear.h
+++ b/subtree/rabit/rabit-learn/linear/linear.h
@ -1,134 +0,0 @@
-/*!
- *  Copyright (c) 2015 by Contributors
- * \file linear.h
- * \brief Linear and Logistic regression
- *
- * \author Tianqi Chen
- */
-#ifndef RABIT_LINEAR_H_
-#define RABIT_LINEAR_H_
-#include <omp.h>
-#include "../utils/data.h"
-#include "../solver/lbfgs.h"
-
-namespace rabit {
-namespace linear {
-/*! \brief simple linear model */
-struct LinearModel {
-  struct ModelParam {
-    /*! \brief global bias */
-    float base_score;
-    /*! \brief number of features  */
-    size_t num_feature;
-    /*! \brief loss type*/
-    int loss_type;
-    // reserved field
-    int reserved[16];
-    // constructor
-    ModelParam(void) {
-      memset(this, 0, sizeof(ModelParam));
-      base_score = 0.5f;
-      num_feature = 0;
-      loss_type = 1;
-      num_feature = 0;
-    }
-    // initialize base score
-    inline void InitBaseScore(void) {
-      utils::Check(base_score > 0.0f && base_score < 1.0f,
-                   "base_score must be in (0,1) for logistic loss");
-      base_score = -std::log(1.0f / base_score - 1.0f);      
-    }
-    /*!
-     * \brief set parameters from outside
-     * \param name name of the parameter
-     * \param val value of the parameter
-     */    
-    inline void SetParam(const char *name, const char *val) {
-      using namespace std;
-      if (!strcmp("base_score", name)) {
-        base_score = static_cast<float>(atof(val));
-      }
-      if (!strcmp("num_feature", name)) {
-        num_feature = static_cast<size_t>(atol(val));
-      }
-      if (!strcmp("objective", name)) {
-        if (!strcmp("linear", val)) {
-          loss_type = 0;
-        } else if (!strcmp("logistic", val)) {
-          loss_type = 1;
-        } else {
-          utils::Error("unknown objective type %s\n", val);
-        }
-      }
-    }
-    // transform margin to prediction
-    inline float MarginToPred(float margin) const {
-      if (loss_type == 1) {
-        return 1.0f / (1.0f + std::exp(-margin));
-      } else {
-        return margin;
-      }
-    }
-    // margin to loss
-    inline float MarginToLoss(float label, float margin) const {
-      if (loss_type == 1) {
-        float nlogprob;
-        if (margin > 0.0f) {
-          nlogprob = std::log(1.0f + std::exp(-margin));
-        } else {
-          nlogprob = -margin + std::log(1.0f + std::exp(margin));
-        }
-        return label * nlogprob +
-            (1.0f -label) * (margin + nlogprob); 
-      } else {
-        float diff = margin - label;
-        return 0.5f * diff * diff;
-      }
-    }
-    inline float PredToGrad(float label, float pred) const {
-      return pred - label;      
-    }
-    inline float PredictMargin(const float *weight,
-                               const SparseMat::Vector &v) const {
-      // weight[num_feature] is bias
-      float sum = base_score + weight[num_feature];
-      for (unsigned i = 0; i < v.length; ++i) {
-        if (v[i].findex >= num_feature) continue;
-        sum += weight[v[i].findex] * v[i].fvalue;
-      }    
-      return sum;
-    }
-    inline float Predict(const float *weight,
-                         const SparseMat::Vector &v) const {
-      return MarginToPred(PredictMargin(weight, v));
-    }
-  };
-  // model parameter
-  ModelParam param;
-  // weight corresponding to the model
-  float *weight;
-  LinearModel(void) : weight(NULL) {
-  }
-  ~LinearModel(void) {
-    if (weight != NULL) delete [] weight;
-  }
-  // load model
-  inline void Load(rabit::IStream &fi) {
-    fi.Read(&param, sizeof(param));
-    if (weight == NULL) {
-      weight = new float[param.num_feature + 1];
-    }
-    fi.Read(weight, sizeof(float) * (param.num_feature + 1));
-  }
-  inline void Save(rabit::IStream &fo, const float *wptr = NULL) {
-    fo.Write(&param, sizeof(param));
-    if (wptr == NULL) wptr = weight;
-    fo.Write(wptr, sizeof(float) * (param.num_feature + 1));
-  }
-  inline float Predict(const SparseMat::Vector &v) const {
-    return param.Predict(weight, v);
-  }
-};
-}  // namespace linear
-}  // namespace rabit
-#endif // RABIT_LINEAR_H_
--- a/subtree/rabit/rabit-learn/linear/run-hadoop-old.sh
+++ b/subtree/rabit/rabit-learn/linear/run-hadoop-old.sh
@ -1,20 +0,0 @@
-#!/bin/bash
-if [ "$#" -lt 3 ];
-then
-	echo "Usage: <nworkers> <path_in_HDFS> [param=val]"
-	exit -1
-fi
-
-# put the local training file to HDFS
-hadoop fs -rm -r -f $2/data
-hadoop fs -rm -r -f $2/mushroom.linear.model
-hadoop fs -mkdir $2/data
-hadoop fs -put ../data/agaricus.txt.train $2/data
-
-# submit to hadoop
-../../tracker/rabit_hadoop_streaming.py  -n $1 --vcores 1 -i $2/data/agaricus.txt.train -o $2/mushroom.linear.model linear.rabit stdin model_out=stdout "${*:3}" 
-
-# get the final model file
-hadoop fs -get $2/mushroom.linear.model/part-00000 ./linear.model
-
-./linear.rabit ../data/agaricus.txt.test task=pred model_in=linear.model
--- a/subtree/rabit/rabit-learn/linear/run-linear-mock.sh
+++ b/subtree/rabit/rabit-learn/linear/run-linear-mock.sh
@ -1,11 +0,0 @@
-#!/bin/bash
-if [[ $# -lt 1 ]]
-then
-    echo "Usage: nprocess"
-    exit -1
-fi
-
-rm -rf *.model
-k=$1
-
-../../tracker/rabit_demo.py -n $k linear.mock ../data/agaricus.txt.train  "${*:2}" reg_L1=1 mock=0,1,1,0 mock=1,1,1,0  mock=0,2,1,1
--- a/subtree/rabit/rabit-learn/linear/run-linear.sh
+++ b/subtree/rabit/rabit-learn/linear/run-linear.sh
@ -1,14 +0,0 @@
-#!/bin/bash
-if [[ $# -lt 1 ]]
-then
-    echo "Usage: nprocess"
-    exit -1
-fi
-
-rm -rf *.model
-k=$1
-
-# run linear model, the program will automatically split the inputs
-../../tracker/rabit_demo.py -n $k linear.rabit ../data/agaricus.txt.train reg_L1=1 
-
-./linear.rabit ../data/agaricus.txt.test task=pred model_in=final.model
--- a/subtree/rabit/rabit-learn/linear/run-yarn.sh
+++ b/subtree/rabit/rabit-learn/linear/run-yarn.sh
@ -1,20 +0,0 @@
-#!/bin/bash
-if [ "$#" -lt 3 ];
-then
-	echo "Usage: <nworkers> <path_in_HDFS> [param=val]"
-	exit -1
-fi
-
-# put the local training file to HDFS
-hadoop fs -rm -r -f $2/mushroom.linear.model
-
-hadoop fs -mkdir $2/data
-hadoop fs -put ../data/agaricus.txt.train $2/data
-
-# submit to hadoop
-../../tracker/rabit_yarn.py  -n $1 --vcores 1  ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}" 
-
-# get the final model file
-hadoop fs -get $2/mushroom.linear.model ./linear.model
-
-./linear.rabit ../data/agaricus.txt.test task=pred model_in=linear.model
--- a/subtree/rabit/rabit-learn/make/common.mk
+++ b/subtree/rabit/rabit-learn/make/common.mk
@ -1,39 +0,0 @@
-# this is the common build script for rabit programs
-# you do not have to use it
-export LDFLAGS= -L../../lib -pthread -lm -lrt
-export CFLAGS = -Wall  -msse2  -Wno-unknown-pragmas -fPIC -I../../include  
-
-# setup opencv
-ifeq ($(USE_HDFS),1)
-	CFLAGS+= -DRABIT_USE_HDFS=1 -I$(HADOOP_HDFS_HOME)/include -I$(JAVA_HOME)/include
-	LDFLAGS+= -L$(HADOOP_HDFS_HOME)/lib/native -L$(LIBJVM) -lhdfs -ljvm
-else
-	CFLAGS+= -DRABIT_USE_HDFS=0
-endif
-
-.PHONY: clean all lib mpi
-
-all: $(BIN) $(MOCKBIN)
-
-mpi: $(MPIBIN)
-
-lib:
-	cd ../..;make lib/librabit.a lib/librabit_mock.a; cd -
-libmpi:
-	cd ../..;make lib/librabit_mpi.a;cd -
-
-
-$(BIN) : 
-	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc,  $^) -lrabit $(LDFLAGS) 
-
-$(MOCKBIN) : 
-	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc,  $^) -lrabit_mock $(LDFLAGS) 
-
-$(OBJ) : 
-	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
-
-$(MPIBIN) : 
-	$(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^)  $(LDFLAGS) -lrabit_mpi 
-
-clean:
-	$(RM) $(OBJ) $(BIN) $(MPIBIN) $(MOCKBIN) *~ ../src/*~
--- a/subtree/rabit/rabit-learn/make/config.mk
+++ b/subtree/rabit/rabit-learn/make/config.mk
@ -1,21 +0,0 @@
-#-----------------------------------------------------
-#  rabit-learn: the configuration compile script
-#
-#  This is the default configuration setup for rabit-learn
-#  If you want to change configuration, do the following steps:
-#
-#  - copy this file to the root of rabit-learn folder
-#  - modify the configuration you want
-#  - type make or make -j n on each of the folder
-#----------------------------------------------------
-
-# choice of compiler
-export CC = gcc
-export CXX = g++
-export MPICXX = mpicxx
-
-# whether use HDFS support during compile
-USE_HDFS = 1
-
-# path to libjvm.so
-LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server
--- a/subtree/rabit/rabit-learn/solver/lbfgs.h
+++ b/subtree/rabit/rabit-learn/solver/lbfgs.h
@ -1,669 +0,0 @@
-/*!
- *  Copyright (c) 2015 by Contributors
- * \file lbfgs.h
- * \brief L-BFGS solver for general optimization problem
- *
- * \author Tianqi Chen
- */
-#ifndef RABIT_LEARN_LBFGS_H_
-#define RABIT_LEARN_LBFGS_H_
-#include <cmath>
-#include <rabit.h>
-
-namespace rabit {
-/*! \brief namespace of solver for general problems */
-namespace solver {
-/*!
- * \brief objective function for optimizers 
- *  the objective function can also implement save/load
- *  to remember the state parameters that might need to remember
- */
-template<typename DType>
-class IObjFunction : public rabit::ISerializable {
- public:
-  // destructor
-  virtual ~IObjFunction(void){}
-  /*!
-   * \brief evaluate function values for a given weight
-   * \param weight weight of the function
-   * \param size size of the weight
-   */
-  virtual double Eval(const DType *weight, size_t size) = 0;
-  /*!
-   * \return number of feature dimension to be allocated
-   * only called once during initialization
-   */
-  virtual size_t InitNumDim(void) = 0;
-  /*!
-   * \brief initialize the weight before starting the solver
-   * only called once for initialization
-   */
-  virtual void InitModel(DType *weight, size_t size) = 0;
-  /*!
-   * \brief calculate gradient for a given weight
-   * \param out_grad used to store the gradient value of the function
-   * \param weight weight of the function
-   * \param size size of the weight
-   */
-  virtual void CalcGrad(DType *out_grad,
-                        const DType *weight,
-                        size_t size) = 0;
-};
-
-/*! \brief a basic version L-BFGS solver */
-template<typename DType>
-class LBFGSSolver {
- public:
-  LBFGSSolver(void) {
-    // set default values
-    reg_L1 = 0.0f;
-    max_linesearch_iter = 100;
-    linesearch_backoff = 0.5f;
-    linesearch_c1 = 1e-4;
-    min_lbfgs_iter = 5;
-    max_lbfgs_iter = 500;
-    lbfgs_stop_tol = 1e-5f;
-    silent = 0;
-  }
-  virtual ~LBFGSSolver(void) {}
-  /*!
-   * \brief set parameters from outside
-   * \param name name of the parameter
-   * \param val value of the parameter
-   */
-  virtual void SetParam(const char *name, const char *val) {
-    if (!strcmp("num_dim", name)) {
-      gstate.num_dim = static_cast<size_t>(atol(val));
-    }
-    if (!strcmp("size_memory", name)) {
-      gstate.size_memory = static_cast<size_t>(atol(val));
-    }
-    if (!strcmp("reg_L1", name)) {
-      reg_L1 = static_cast<float>(atof(val));
-    }
-    if (!strcmp("lbfgs_stop_tol", name)) {
-      lbfgs_stop_tol = static_cast<float>(atof(val));
-    }
-    if (!strcmp("linesearch_backoff", name)) {
-      linesearch_backoff = static_cast<float>(atof(val));
-    }
-    if (!strcmp("max_linesearch_iter", name)) {
-      max_linesearch_iter = atoi(val);
-    }
-    if (!strcmp("max_lbfgs_iter", name)) {
-      max_lbfgs_iter = atoi(val);
-    }
-    if (!strcmp("min_lbfgs_iter", name)) {
-      min_lbfgs_iter = atoi(val);
-    }
-    if (!strcmp("linesearch_c1", name)) {
-      linesearch_c1 = static_cast<float>(atof(val));
-    }
-  }
-  /*!
-   * \brief set objective function to optimize
-   *  the objective function only need to evaluate and calculate
-   *  gradient with respect to current subset of data
-   * \param obj the objective function we are looking for
-   */
-  virtual void SetObjFunction(IObjFunction<DType> *obj) {
-    gstate.obj = obj;
-  }
-  /*!
-   * \brief initialize the LBFGS solver
-   *  user must already set the objective function
-   */
-  virtual void Init(void) {
-    utils::Check(gstate.obj != NULL,
-                 "LBFGSSolver.Init must SetObjFunction first");
-    int version = rabit::LoadCheckPoint(&gstate, &hist);
-    if (version == 0) {
-      gstate.num_dim = gstate.obj->InitNumDim();
-    } else {
-      printf("restart from version=%d\n", version);
-    }
-    {
-      // decide parameter partition
-      size_t nproc = rabit::GetWorldSize();
-      size_t rank = rabit::GetRank();
-      size_t step = (gstate.num_dim + nproc - 1) / nproc;
-      // upper align
-      step = (step + 7) / 8 * 8;
-      utils::Assert(step * nproc >= gstate.num_dim, "BUG");
-      range_begin_ = std::min(rank * step, gstate.num_dim);
-      range_end_ = std::min((rank + 1) * step, gstate.num_dim);
-    }
-    if (version == 0) {
-      gstate.Init();
-      hist.Init(range_end_ - range_begin_, gstate.size_memory);
-      gstate.obj->InitModel(gstate.weight, gstate.num_dim);
-      // broadcast initialize model
-      rabit::Broadcast(gstate.weight,
-                       sizeof(DType) * gstate.num_dim, 0);
-      gstate.old_objval = this->Eval(gstate.weight);
-      gstate.init_objval = gstate.old_objval;
-      
-      if (silent == 0 && rabit::GetRank() == 0) {
-        rabit::TrackerPrintf
-            ("L-BFGS solver starts, num_dim=%lu, init_objval=%g, size_memory=%lu, RAM-approx=%lu\n",
-             gstate.num_dim, gstate.init_objval, gstate.size_memory,
-             gstate.MemCost() + hist.MemCost());
-      }
-    }
-  }
-  /*!
-   * \brief get the current weight vector
-   *  note that if update function is called
-   *  the content of weight vector is no longer valid
-   * \return weight vector
-   */
-  virtual DType *GetWeight(void) {
-    return gstate.weight;
-  }
-  /*!
-   * \brief update the weight for one L-BFGS iteration
-   * \return whether stopping condition is met
-   */
-  virtual bool UpdateOneIter(void) {
-    bool stop = false;
-    GlobalState &g = gstate;
-    g.obj->CalcGrad(g.grad, g.weight, g.num_dim);
-    rabit::Allreduce<rabit::op::Sum>(g.grad, g.num_dim);
-    // find change direction
-    double vdot = FindChangeDirection(g.tempw, g.grad, g.weight);
-    // line-search, g.grad is now new weight
-    int iter = BacktrackLineSearch(g.grad, g.tempw, g.weight, vdot);
-    utils::Check(iter < max_linesearch_iter, "line search failed");
-    // swap new weight 
-    std::swap(g.weight, g.grad);
-    // check stop condition
-    if (gstate.num_iteration > static_cast<size_t>(min_lbfgs_iter)) {
-      if (g.old_objval - g.new_objval < lbfgs_stop_tol * g.init_objval) {
-        return true;
-      }
-    }
-    if (silent == 0 && rabit::GetRank() == 0) {
-      rabit::TrackerPrintf
-          ("[%d] L-BFGS: linesearch finishes in %d rounds, new_objval=%g, improvment=%g\n",
-           gstate.num_iteration, iter,
-           gstate.new_objval,
-           gstate.old_objval - gstate.new_objval);
-    }
-    gstate.old_objval = gstate.new_objval;
-    rabit::CheckPoint(&gstate, &hist);
-    return stop;
-  }
-  /*! \brief run optimization */
-  virtual void Run(void) {
-    this->Init();
-    while (gstate.num_iteration < static_cast<size_t>(max_lbfgs_iter)) {
-      if (this->UpdateOneIter()) break;
-    }
-    if (silent == 0 && rabit::GetRank() == 0) {
-      size_t nonzero = 0;
-      for (size_t i = 0; i < gstate.num_dim; ++i) {
-        if (gstate.weight[i] != 0.0f) nonzero += 1;
-      }
-      rabit::TrackerPrintf
-          ("L-BFGS: finishes at iteration %d, %lu/%lu active weights\n",
-           gstate.num_iteration, nonzero, gstate.num_dim);
-    }
-  }
- protected:
-  // find the delta value, given gradient
-  // return dot(dir, l1grad)
-  virtual double FindChangeDirection(DType *dir,
-                                     const DType *grad,
-                                     const DType *weight) {
-    int m = static_cast<int>(gstate.size_memory);
-    int n = static_cast<int>(hist.num_useful());
-    if (n < m) {
-      utils::Assert(hist.num_useful() == gstate.num_iteration,
-                    "BUG2, n=%d, it=%d", n, gstate.num_iteration);
-    } else {
-      utils::Assert(n == m, "BUG3");
-    }
-    const size_t num_dim = gstate.num_dim;
-    const DType *gsub = grad + range_begin_;
-    const size_t nsub = range_end_ - range_begin_;
-    double vdot = 0.0;
-    if (n != 0) {
-      // hist[m + n - 1] stores old gradient
-      Minus(hist[m + n - 1], gsub, hist[m + n - 1], nsub);
-      SetL1Dir(hist[2 * m], gsub, weight + range_begin_, nsub);
-      // index set for calculating results
-      std::vector<std::pair<size_t, size_t> > idxset;
-      for (int j = 0; j < n; ++j) {
-        idxset.push_back(std::make_pair(j, 2 * m));
-        idxset.push_back(std::make_pair(j, n - 1));
-        idxset.push_back(std::make_pair(j, m + n - 1));
-      }
-      for (int j = 0; j < n; ++j) {
-        idxset.push_back(std::make_pair(m + j, 2 * m));
-        idxset.push_back(std::make_pair(m + j, m + n - 1));
-      }
-
-      // calculate dot products
-      std::vector<double> tmp(idxset.size());
-      for (size_t i = 0; i < tmp.size(); ++i) {
-        tmp[i] = hist.CalcDot(idxset[i].first, idxset[i].second);
-      }
-
-      rabit::Allreduce<rabit::op::Sum>(BeginPtr(tmp), tmp.size());
-
-      for (size_t i = 0; i < tmp.size(); ++i) {
-        gstate.DotBuf(idxset[i].first, idxset[i].second) = tmp[i];
-      }
-
-      // BFGS steps, use vector-free update
-      // parameterize vector using basis in hist
-      std::vector<double> alpha(n);
-      std::vector<double> delta(2 * m + 1, 0.0);
-      delta[2 * m] = 1.0;
-      // backward step
-      for (int j = n - 1; j >= 0; --j) {
-        double vsum = 0.0;
-        for (size_t k = 0; k < delta.size(); ++k) {
-          vsum += delta[k] * gstate.DotBuf(k, j);
-        }
-        alpha[j] = vsum / gstate.DotBuf(j, m + j);
-        delta[m + j] = delta[m + j] - alpha[j];
-      }      
-      // scale
-      double scale = gstate.DotBuf(n - 1, m + n - 1) /
-      gstate.DotBuf(m + n - 1, m + n - 1);
-      for (size_t k = 0; k < delta.size(); ++k) {
-        delta[k] *= scale;
-      }
-      // forward step
-      for (int j = 0; j < n; ++j) {
-        double vsum = 0.0;
-        for (size_t k = 0; k < delta.size(); ++k) {
-          vsum += delta[k] * gstate.DotBuf(k, m + j);
-        }
-        double beta = vsum / gstate.DotBuf(j, m + j);
-        delta[j] = delta[j] + (alpha[j] - beta);
-      }
-
-      // set all to zero
-      std::fill(dir, dir + num_dim, 0.0f);
-      DType *dirsub = dir + range_begin_; 
-      for (int i = 0; i < n; ++i) {
-        AddScale(dirsub, dirsub, hist[m + i], delta[m + i], nsub);
-      }
-      AddScale(dirsub, dirsub, hist[2 * m], delta[2 * m], nsub);
-      for (int i = 0; i < n; ++i) {
-        AddScale(dirsub, dirsub, hist[i], delta[i], nsub);
-      }
-      FixDirL1Sign(dirsub, hist[2 * m], nsub);
-      vdot = -Dot(dirsub, hist[2 * m], nsub);
-
-      // allreduce to get full direction
-      rabit::Allreduce<rabit::op::Sum>(dir, num_dim);
-      rabit::Allreduce<rabit::op::Sum>(&vdot, 1);
-    } else {
-      SetL1Dir(dir, grad, weight, num_dim);
-      vdot = -Dot(dir, dir, num_dim);
-    }
-    // shift the history record    
-    if (n < m) {
-      n += 1;
-    } else {
-      gstate.Shift(); hist.Shift();
-    }
-    hist.set_num_useful(n);
-    // copy gradient to hist[m + n - 1]
-    memcpy(hist[m + n - 1], gsub, nsub * sizeof(DType));
-    return vdot;
-  }
-  // line search for given direction
-  // return whether there is a descent
-  inline int BacktrackLineSearch(DType *new_weight,
-                                 const DType *dir,
-                                 const DType *weight,
-                                 double dot_dir_l1grad) {
-    utils::Assert(dot_dir_l1grad < 0.0f,
-                  "gradient error, dotv=%g", dot_dir_l1grad);
-    double alpha = 1.0;
-    double backoff = linesearch_backoff;
-    // unit descent direction in first iter
-    if (gstate.num_iteration == 0) {
-      utils::Assert(hist.num_useful() == 1, "hist.nuseful");
-      alpha = 1.0f / std::sqrt(-dot_dir_l1grad);
-      backoff = 0.1f;
-    }
-    int iter = 0;
-    
-    double old_val = gstate.old_objval;
-    double c1 = this->linesearch_c1;
-    while (true) {
-      const size_t num_dim = gstate.num_dim;
-      if (++iter >= max_linesearch_iter) return iter;
-      AddScale(new_weight, weight, dir, alpha, num_dim);
-      this->FixWeightL1Sign(new_weight, weight, num_dim);
-      double new_val = this->Eval(new_weight);
-      if (new_val - old_val <= c1 * dot_dir_l1grad * alpha) {
-        gstate.new_objval = new_val; break;
-      }
-      alpha *= backoff;
-    }
-    // hist[n - 1] = new_weight - weight
-    Minus(hist[hist.num_useful() - 1],
-          new_weight + range_begin_,
-          weight + range_begin_,
-          range_end_ - range_begin_);
-    gstate.num_iteration += 1;
-    return iter;
-  }
-  // OWL-QN step for L1 regularization
-  inline void SetL1Dir(DType *dst,
-                       const DType *grad,
-                       const DType *weight,
-                       size_t size) {
-    if (reg_L1 == 0.0) {
-      for (size_t i = 0; i < size; ++i) {
-        dst[i] = -grad[i];
-      }
-    } else {
-      for (size_t i = 0; i < size; ++i) {
-        if (weight[i] > 0.0f) {
-          dst[i] = -grad[i] - reg_L1;
-        } else if (weight[i] < 0.0f) {
-          dst[i] = -grad[i] + reg_L1;
-        } else {
-          if (grad[i] < -reg_L1) {
-            dst[i] = -grad[i] - reg_L1;
-          } else if (grad[i] > reg_L1) {
-            dst[i] = -grad[i] + reg_L1;
-          } else {
-            dst[i] = 0.0;
-          }
-        }
-      }
-    }
-  }
-  // OWL-QN step: fix direction sign to be consistent with proposal
-  inline void FixDirL1Sign(DType *dir,
-                           const DType *steepdir,
-                           size_t size) {
-    if (reg_L1 != 0.0f) {
-      for (size_t i = 0; i < size; ++i) {
-        if (dir[i] * steepdir[i] <= 0.0f) {
-          dir[i] = 0.0f;
-        }
-      }
-    }
-  }
-  // QWL-QN step: fix direction sign to be consistent with proposal
-  inline void FixWeightL1Sign(DType *new_weight,
-                              const DType *weight,
-                              size_t size) {
-    if (reg_L1 != 0.0f) {
-      for (size_t i = 0; i < size; ++i) {
-        if (new_weight[i] * weight[i] < 0.0f) {
-          new_weight[i] = 0.0f;
-        }
-      }
-    }
-  }
-  inline double Eval(const DType *weight) {
-    double val = gstate.obj->Eval(weight, gstate.num_dim);    
-    rabit::Allreduce<rabit::op::Sum>(&val, 1);
-    if (reg_L1 != 0.0f) {
-      double l1norm = 0.0;
-      for (size_t i = 0; i < gstate.num_dim; ++i) {
-        l1norm += std::abs(weight[i]);
-      }
-      val += l1norm * reg_L1;
-    }
-    return val;
-  }
-
- private:
-  // helper functions
-  // dst = lhs + rhs * scale
-  inline static void AddScale(DType *dst,
-                              const DType *lhs,
-                              const DType *rhs,
-                              DType scale,
-                              size_t size) {
-    for (size_t i = 0; i < size; ++i) {
-      dst[i] = lhs[i] + rhs[i] * scale;
-    }
-  }
-  // dst = lhs - rhs
-  inline static void Minus(DType *dst,
-                           const DType *lhs,
-                           const DType *rhs,
-                           size_t size) {
-    for (size_t i = 0; i < size; ++i) {
-      dst[i] = lhs[i] - rhs[i];
-    }
-  }
-  // return dot(lhs, rhs)
-  inline static double Dot(const DType *lhs,
-                           const DType *rhs,
-                           size_t size) {
-    double res = 0.0;
-    for (size_t i = 0; i < size; ++i) {
-      res += lhs[i] * rhs[i];
-    }
-    return res;
-  }
-  // map rolling array index
-  inline static size_t MapIndex(size_t i, size_t offset,
-                                size_t size_memory) {
-    if (i == 2 * size_memory) return i;
-    if (i < size_memory) {
-      return (i + offset) % size_memory;
-    } else {
-      utils::Assert(i < 2 * size_memory,
-                    "MapIndex: index exceed bound, i=%lu", i);
-      return (i + offset) % size_memory + size_memory;
-    }
-  }
-  // global solver state
-  struct GlobalState : public rabit::ISerializable {
-   public:
-    // memory size of L-BFGS
-    size_t size_memory;
-    // number of iterations passed
-    size_t num_iteration;
-    // number of features in the solver
-    size_t num_dim;
-    // initialize objective value
-    double init_objval;
-    // history objective value
-    double old_objval;
-    // new objective value
-    double new_objval;
-    // objective function
-    IObjFunction<DType> *obj;
-    // temporal storage
-    DType *grad, *weight, *tempw;
-    // constructor
-    GlobalState(void)
-        : obj(NULL), grad(NULL),
-          weight(NULL), tempw(NULL) {
-      size_memory = 10;
-      num_iteration = 0;
-      num_dim = 0;
-      old_objval = 0.0;
-      offset_ = 0;
-    }
-    ~GlobalState(void) {
-      if (grad != NULL) {
-        delete [] grad;
-        delete [] weight;
-        delete [] tempw;
-      }
-    }
-    // intilize the space of rolling array
-    inline void Init(void) {
-      size_t n = size_memory * 2 + 1;
-      data.resize(n * n, 0.0);
-      this->AllocSpace();
-    }
-    // memory cost
-    inline size_t MemCost(void) const {
-      return sizeof(DType) * 3 * num_dim;
-    }
-    inline double &DotBuf(size_t i, size_t j)  {
-      if (i > j) std::swap(i, j);
-      return data[MapIndex(i, offset_, size_memory) * (size_memory * 2 + 1) +
-                  MapIndex(j, offset_, size_memory)];
-    }
-    // load the shift array
-    virtual void Load(rabit::IStream &fi) {
-      fi.Read(&size_memory, sizeof(size_memory));
-      fi.Read(&num_iteration, sizeof(num_iteration));
-      fi.Read(&num_dim, sizeof(num_dim));
-      fi.Read(&init_objval, sizeof(init_objval));
-      fi.Read(&old_objval, sizeof(old_objval));
-      fi.Read(&offset_, sizeof(offset_));
-      fi.Read(&data);
-      this->AllocSpace();
-      fi.Read(weight, sizeof(DType) * num_dim);
-      obj->Load(fi);
-    }
-    // save the shift array
-    virtual void Save(rabit::IStream &fo) const {
-      fo.Write(&size_memory, sizeof(size_memory));
-      fo.Write(&num_iteration, sizeof(num_iteration));
-      fo.Write(&num_dim, sizeof(num_dim));
-      fo.Write(&init_objval, sizeof(init_objval));
-      fo.Write(&old_objval, sizeof(old_objval));
-      fo.Write(&offset_, sizeof(offset_));
-      fo.Write(data);
-      fo.Write(weight, sizeof(DType) * num_dim);
-      obj->Save(fo);
-    }
-    inline void Shift(void) {
-      offset_ = (offset_ + 1) % size_memory;
-    }
-    
-   private:    
-    // rolling offset in the current memory
-    size_t offset_;
-    std::vector<double> data;
-    // allocate sapce
-    inline void AllocSpace(void) {
-      if (grad == NULL) {
-        grad = new DType[num_dim];
-        weight = new DType[num_dim];
-        tempw = new DType[num_dim];
-      }
-    }
-  };
-  /*! \brief rolling array that carries history information */
-  struct HistoryArray : public rabit::ISerializable {
-   public:
-    HistoryArray(void) : dptr_(NULL) {
-      num_useful_ = 0;
-    }
-    ~HistoryArray(void) {
-      if (dptr_ != NULL) delete [] dptr_;
-    }
-    // intilize the space of rolling array
-    inline void Init(size_t num_col, size_t size_memory) {
-      if (dptr_ != NULL &&
-          (num_col_ != num_col || size_memory_ != size_memory)) {
-        delete dptr_;
-      }
-      num_col_ = num_col;
-      size_memory_ = size_memory;
-      stride_ = num_col_;
-      offset_ = 0;
-      size_t n = size_memory * 2 + 1;
-      dptr_ = new DType[n * stride_];
-    }
-    // memory cost
-    inline size_t MemCost(void) const {
-      return sizeof(DType) * (size_memory_ * 2 + 1) * stride_;
-    }
-    // fetch element from rolling array
-    inline const DType *operator[](size_t i) const {
-      return dptr_ + MapIndex(i, offset_, size_memory_) * stride_;
-    }
-    inline DType *operator[](size_t i) {
-      return dptr_ + MapIndex(i, offset_, size_memory_) * stride_;
-    }
-    // shift array: arr_old -> arr_new
-    // for i in [0, size_memory - 1), arr_new[i] = arr_old[i + 1]
-    // for i in [size_memory, 2 * size_memory - 1), arr_new[i] = arr_old[i + 1]
-    // arr_old[0] and arr_arr[size_memory] will be discarded
-    inline void Shift(void) {
-      offset_ = (offset_ + 1) % size_memory_;
-    }
-    inline double CalcDot(size_t i, size_t j) const {
-      return Dot((*this)[i], (*this)[j], num_col_);
-    }
-    // set number of useful memory
-    inline const size_t &num_useful(void) const {
-      return num_useful_;
-    }
-    // set number of useful memory
-    inline void set_num_useful(size_t num_useful) {
-      utils::Assert(num_useful <= size_memory_,
-                    "num_useful exceed bound");
-      num_useful_ = num_useful;
-    }
-    // load the shift array
-    virtual void Load(rabit::IStream &fi) {
-      fi.Read(&num_col_, sizeof(num_col_));
-      fi.Read(&stride_, sizeof(stride_));
-      fi.Read(&size_memory_, sizeof(size_memory_));
-      fi.Read(&num_useful_, sizeof(num_useful_));
-      this->Init(num_col_, size_memory_);
-      for (size_t i = 0; i < num_useful_; ++i) {
-        fi.Read((*this)[i], num_col_ * sizeof(DType));
-        fi.Read((*this)[i + size_memory_], num_col_ * sizeof(DType));
-      }
-    }
-    // save the shift array
-    virtual void Save(rabit::IStream &fi) const {
-      fi.Write(&num_col_, sizeof(num_col_));
-      fi.Write(&stride_, sizeof(stride_));
-      fi.Write(&size_memory_, sizeof(size_memory_));
-      fi.Write(&num_useful_, sizeof(num_useful_));
-      for (size_t i = 0; i < num_useful_; ++i) {
-        fi.Write((*this)[i], num_col_ * sizeof(DType));
-        fi.Write((*this)[i + size_memory_], num_col_ * sizeof(DType));
-      }
-    }
-
-   private:
-    // number of columns in each of array
-    size_t num_col_;
-    // stride for each of column for alignment
-    size_t stride_;
-    // memory size of L-BFGS
-    size_t size_memory_;
-    // number of useful memory that will be used
-    size_t num_useful_;
-    // rolling offset in the current memory
-    size_t offset_;
-    // data pointer
-    DType *dptr_;
-  };
-  // data structure for LBFGS
-  GlobalState gstate;
-  HistoryArray hist;
-  // silent
-  int silent;
-  // the subrange of current node
-  size_t range_begin_;
-  size_t range_end_;
-  // L1 regularization co-efficient
-  float reg_L1;
-  // c1 ratio for line search
-  float linesearch_c1;
-  float linesearch_backoff;
-  int max_linesearch_iter;
-  int max_lbfgs_iter;
-  int min_lbfgs_iter;
-  float lbfgs_stop_tol;
-};
-}  // namespace solver
-}  // namespace rabit
-#endif // RABIT_LEARN_LBFGS_H_
--- a/subtree/rabit/rabit-learn/utils/data.h
+++ b/subtree/rabit/rabit-learn/utils/data.h
@ -1,138 +0,0 @@
-/*!
- *  Copyright (c) 2015 by Contributors
- * \file data.h
- * \brief simple data structure that could be used by model
- *
- * \author Tianqi Chen
- */
-#ifndef RABIT_LEARN_DATA_H_
-#define RABIT_LEARN_DATA_H_
-
-#include <vector>
-#include <cstdlib>
-#include <cstdio>
-#include <cstring>
-#include <limits>
-#include <cmath>
-#include <sstream>
-#include <rabit.h>
-#include "../io/io.h"
-
-namespace rabit {
-// typedef index type
-typedef unsigned index_t;
-
-/*! \brief sparse matrix, CSR format */
-struct SparseMat {
-  // sparse matrix entry
-  struct Entry {
-    // feature index 
-    index_t findex;
-    // feature value
-    float fvalue;
-  };
-  // sparse vector
-  struct Vector {
-    const Entry *data;
-    index_t length;
-    inline const Entry &operator[](size_t i) const {
-      return data[i];
-    }
-  };
-  inline Vector operator[](size_t i) const {
-    Vector v;
-    v.data = &data[0] + row_ptr[i];
-    v.length = static_cast<index_t>(row_ptr[i + 1]-row_ptr[i]);
-    return v;
-  }
-  // load data from LibSVM format
-  inline void Load(const char *fname) {
-    io::InputSplit *in =
-        io::CreateInputSplit
-        (fname, rabit::GetRank(),
-         rabit::GetWorldSize());
-    row_ptr.clear();
-    row_ptr.push_back(0);
-    data.clear();    
-    feat_dim = 0;
-    std::string line;
-    while (in->NextLine(&line)) {
-      float label;
-      std::istringstream ss(line);
-      ss >> label;
-      Entry e;
-      unsigned long fidx;
-      while (!ss.eof()) {
-        if (!(ss >> fidx)) break;
-        ss.ignore(32, ':');
-        if (!(ss >> e.fvalue)) break;
-        e.findex = static_cast<index_t>(fidx);
-        data.push_back(e);
-        feat_dim = std::max(fidx, feat_dim);
-      }
-      labels.push_back(label);
-      row_ptr.push_back(data.size());
-    }
-    delete in;
-    feat_dim += 1;
-    utils::Check(feat_dim < std::numeric_limits<index_t>::max(),
-                 "feature dimension exceed limit of index_t"\
-                 "consider change the index_t to unsigned long");    
-  }
-  inline size_t NumRow(void) const {
-    return row_ptr.size() - 1;
-  }
-  // memory cost
-  inline size_t MemCost(void) const {
-    return data.size() * sizeof(Entry);
-  }
-  // maximum feature dimension
-  size_t feat_dim;
-  std::vector<size_t> row_ptr;
-  std::vector<Entry> data;
-  std::vector<float> labels;
-};
-
-// dense matrix
-struct Matrix {
-  inline void Init(size_t nrow, size_t ncol, float v = 0.0f) {
-    this->nrow = nrow;
-    this->ncol = ncol;
-    data.resize(nrow * ncol);
-    std::fill(data.begin(), data.end(), v);
-  }
-  inline float *operator[](size_t i) {
-    return &data[0] + i * ncol;
-  }
-  inline const float *operator[](size_t i) const {
-    return &data[0] + i * ncol;
-  }
-  inline void Print(const char *fname) {
-    FILE *fo;
-    if (!strcmp(fname, "stdout")) {
-      fo = stdout;
-    } else {
-      fo = utils::FopenCheck(fname, "w");
-    }
-    for (size_t i = 0; i < data.size(); ++i) {
-      fprintf(fo, "%g", data[i]);
-      if ((i+1) % ncol == 0) {
-        fprintf(fo, "\n");
-      } else {
-        fprintf(fo, " ");
-      }
-    }
-    // close the filed
-    if (fo != stdout) fclose(fo);
-  }
-  // number of data
-  size_t nrow, ncol;
-  std::vector<float> data;
-};
-
-/*!\brief computes a random number modulo the value */
-inline int Random(int value) {
-  return rand() % value;
-}
-} // namespace rabit
-#endif // RABIT_LEARN_DATA_H_
--- a/subtree/rabit/src/allreduce_base.cc
+++ b/subtree/rabit/src/allreduce_base.cc
@ -31,6 +31,7 @@ AllreduceBase::AllreduceBase(void) {
  // tracker URL
  task_id = "NULL";
  err_link = NULL;
+  dmlc_role = "worker";
  this->SetParam("rabit_reduce_buffer", "256MB");
  // setup possible enviroment variable of intrest
  env_vars.push_back("rabit_task_id");
@ -39,6 +40,12 @@ AllreduceBase::AllreduceBase(void) {
  env_vars.push_back("rabit_reduce_ring_mincount");
  env_vars.push_back("rabit_tracker_uri");
  env_vars.push_back("rabit_tracker_port");
+  // also include dmlc support direct variables
+  env_vars.push_back("DMLC_TASK_ID");
+  env_vars.push_back("DMLC_ROLE");
+  env_vars.push_back("DMLC_NUM_ATTEMPT");
+  env_vars.push_back("DMLC_TRACKER_URI");
+  env_vars.push_back("DMLC_TRACKER_PORT");
 }

 // initialization function
@ -86,6 +93,10 @@ void AllreduceBase::Init(void) {
      this->SetParam("rabit_world_size", num_task);
    }
  }
+  if (dmlc_role != "worker") {
+    fprintf(stderr, "Rabit Module currently only work with dmlc worker, quit this program by exit 0\n");
+    exit(0);
+  }
  // clear the setting before start reconnection
  this->rank = -1;
  //---------------------
@ -150,6 +161,10 @@ void AllreduceBase::SetParam(const char *name, const char *val) {
  if (!strcmp(name, "rabit_tracker_uri")) tracker_uri = val;
  if (!strcmp(name, "rabit_tracker_port")) tracker_port = atoi(val);
  if (!strcmp(name, "rabit_task_id")) task_id = val;
+  if (!strcmp(name, "DMLC_TRACKER_URI")) tracker_uri = val;
+  if (!strcmp(name, "DMLC_TRACKER_PORT")) tracker_port = atoi(val);
+  if (!strcmp(name, "DMLC_TASK_ID")) task_id = val;
+  if (!strcmp(name, "DMLC_ROLE")) dmlc_role = val;
  if (!strcmp(name, "rabit_world_size")) world_size = atoi(val);
  if (!strcmp(name, "rabit_hadoop_mode")) hadoop_mode = atoi(val);
  if (!strcmp(name, "rabit_reduce_ring_mincount")) {
--- a/subtree/rabit/src/allreduce_base.h
+++ b/subtree/rabit/src/allreduce_base.h
@ -126,8 +126,8 @@ class AllreduceBase : public IEngine {
   *
   * \sa CheckPoint, VersionNumber
   */
-  virtual int LoadCheckPoint(ISerializable *global_model,
-                             ISerializable *local_model = NULL) {
+  virtual int LoadCheckPoint(Serializable *global_model,
+                             Serializable *local_model = NULL) {
    return 0;
  }
  /*!
@ -146,8 +146,8 @@ class AllreduceBase : public IEngine {
   *
   * \sa LoadCheckPoint, VersionNumber
   */
-  virtual void CheckPoint(const ISerializable *global_model,
-                          const ISerializable *local_model = NULL) {
+  virtual void CheckPoint(const Serializable *global_model,
+                          const Serializable *local_model = NULL) {
    version_number += 1;
  }
  /*!
@ -170,7 +170,7 @@ class AllreduceBase : public IEngine {
   *   is the same in all nodes
   * \sa LoadCheckPoint, CheckPoint, VersionNumber
   */
-  virtual void LazyCheckPoint(const ISerializable *global_model) {
+  virtual void LazyCheckPoint(const Serializable *global_model) {
    version_number += 1;
  }
  /*!
@ -496,6 +496,8 @@ class AllreduceBase : public IEngine {
  std::string host_uri;
  // uri of tracker
  std::string tracker_uri;
+  // role in dmlc jobs
+  std::string dmlc_role;
  // port of tracker address
  int tracker_port;
  // port of slave process
--- a/subtree/rabit/src/allreduce_mock.h
+++ b/subtree/rabit/src/allreduce_mock.h
@ -5,8 +5,8 @@
 * 
 * \author Ignacio Cano, Tianqi Chen
 */
-#ifndef RABIT_ALLREDUCE_MOCK_H
-#define RABIT_ALLREDUCE_MOCK_H
+#ifndef RABIT_ALLREDUCE_MOCK_H_
+#define RABIT_ALLREDUCE_MOCK_H_
 #include <vector>
 #include <map>
 #include <sstream>
@ -31,6 +31,7 @@ class AllreduceMock : public AllreduceRobust {
    AllreduceRobust::SetParam(name, val);
    // additional parameters
    if (!strcmp(name, "rabit_num_trial")) num_trial = atoi(val);
+    if (!strcmp(name, "DMLC_NUM_ATTEMPT")) num_trial = atoi(val);
    if (!strcmp(name, "report_stats")) report_stats = atoi(val);
    if (!strcmp(name, "force_local")) force_local = atoi(val);
    if (!strcmp(name, "mock")) {
@ -57,8 +58,8 @@ class AllreduceMock : public AllreduceRobust {
    this->Verify(MockKey(rank, version_number, seq_counter, num_trial), "Broadcast");
    AllreduceRobust::Broadcast(sendrecvbuf_, total_size, root);
  }
-  virtual int LoadCheckPoint(ISerializable *global_model,
-                             ISerializable *local_model) {
+  virtual int LoadCheckPoint(Serializable *global_model,
+                             Serializable *local_model) {
    tsum_allreduce = 0.0;
    time_checkpoint = utils::GetTime();
    if (force_local == 0) {
@ -69,8 +70,8 @@ class AllreduceMock : public AllreduceRobust {
      return AllreduceRobust::LoadCheckPoint(&dum, &com);
    }    
  }
-  virtual void CheckPoint(const ISerializable *global_model,
-                          const ISerializable *local_model) {
+  virtual void CheckPoint(const Serializable *global_model,
+                          const Serializable *local_model) {
    this->Verify(MockKey(rank, version_number, seq_counter, num_trial), "CheckPoint");
    double tstart = utils::GetTime();
    double tbet_chkpt = tstart - time_checkpoint;
@ -95,7 +96,7 @@ class AllreduceMock : public AllreduceRobust {
    tsum_allreduce = 0.0;
  }

-  virtual void LazyCheckPoint(const ISerializable *global_model) {
+  virtual void LazyCheckPoint(const Serializable *global_model) {
    this->Verify(MockKey(rank, version_number, seq_counter, num_trial), "LazyCheckPoint");
    AllreduceRobust::LazyCheckPoint(global_model);
  }
@ -109,28 +110,28 @@ class AllreduceMock : public AllreduceRobust {
  double time_checkpoint;
  
 private:
-  struct DummySerializer : public ISerializable {
-    virtual void Load(IStream &fi) {
+  struct DummySerializer : public Serializable {
+    virtual void Load(Stream *fi) {
    }
-    virtual void Save(IStream &fo) const {
+    virtual void Save(Stream *fo) const {
    }
  };
-  struct ComboSerializer : public ISerializable {
-    ISerializable *lhs;
-    ISerializable *rhs;
-    const ISerializable *c_lhs;
-    const ISerializable *c_rhs;
-    ComboSerializer(ISerializable *lhs, ISerializable *rhs)
+  struct ComboSerializer : public Serializable {
+    Serializable *lhs;
+    Serializable *rhs;
+    const Serializable *c_lhs;
+    const Serializable *c_rhs;
+    ComboSerializer(Serializable *lhs, Serializable *rhs)
        : lhs(lhs), rhs(rhs), c_lhs(lhs), c_rhs(rhs) {
    }
-    ComboSerializer(const ISerializable *lhs, const ISerializable *rhs)
+    ComboSerializer(const Serializable *lhs, const Serializable *rhs)
        : lhs(NULL), rhs(NULL), c_lhs(lhs), c_rhs(rhs) {
    }    
-    virtual void Load(IStream &fi) {
+    virtual void Load(Stream *fi) {
      if (lhs != NULL) lhs->Load(fi);
      if (rhs != NULL) rhs->Load(fi);
    }
-    virtual void Save(IStream &fo) const {
+    virtual void Save(Stream *fo) const {
      if (c_lhs != NULL) c_lhs->Save(fo);
      if (c_rhs != NULL) c_rhs->Save(fo);
    }
@ -172,4 +173,4 @@ class AllreduceMock : public AllreduceRobust {
 };
 }  // namespace engine
 }  // namespace rabit
-#endif // RABIT_ALLREDUCE_MOCK_H
+#endif // RABIT_ALLREDUCE_MOCK_H_
--- a/subtree/rabit/src/allreduce_robust.cc
+++ b/subtree/rabit/src/allreduce_robust.cc
@ -158,8 +158,8 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root)
 *
 * \sa CheckPoint, VersionNumber
 */
-int AllreduceRobust::LoadCheckPoint(ISerializable *global_model,
-                                    ISerializable *local_model) {
+int AllreduceRobust::LoadCheckPoint(Serializable *global_model,
+                                    Serializable *local_model) {
  // skip action in single node
  if (world_size == 1) return 0;
  this->LocalModelCheck(local_model != NULL);
@ -175,7 +175,7 @@ int AllreduceRobust::LoadCheckPoint(ISerializable *global_model,
        // load in local model
        utils::MemoryFixSizeBuffer fs(BeginPtr(local_chkpt[local_chkpt_version]),
                                      local_rptr[local_chkpt_version][1]);
-        local_model->Load(fs);
+        local_model->Load(&fs);
      } else {
        utils::Assert(nlocal == 0, "[%d] local model inconsistent, nlocal=%d", rank, nlocal);
      }
@ -189,7 +189,7 @@ int AllreduceRobust::LoadCheckPoint(ISerializable *global_model,
    } else {
      utils::Assert(fs.Read(&version_number, sizeof(version_number)) != 0,
                    "read in version number");
-      global_model->Load(fs);
+      global_model->Load(&fs);
      utils::Assert(local_model == NULL || nlocal == num_local_replica + 1,
                    "local model inconsistent, nlocal=%d", nlocal);
    }
@ -241,8 +241,8 @@ void AllreduceRobust::LocalModelCheck(bool with_local) {
 *
 * \sa CheckPoint, LazyCheckPoint
 */
-void AllreduceRobust::CheckPoint_(const ISerializable *global_model,
-                                  const ISerializable *local_model,
+void AllreduceRobust::CheckPoint_(const Serializable *global_model,
+                                  const Serializable *local_model,
                                  bool lazy_checkpt) {
  // never do check point in single machine mode
  if (world_size == 1) {
@ -261,7 +261,7 @@ void AllreduceRobust::CheckPoint_(const ISerializable *global_model,
      local_chkpt[new_version].clear();
      utils::MemoryBufferStream fs(&local_chkpt[new_version]);
      if (local_model != NULL) {
-        local_model->Save(fs);
+        local_model->Save(&fs);
      }
      local_rptr[new_version].clear();
      local_rptr[new_version].push_back(0);
@ -287,7 +287,7 @@ void AllreduceRobust::CheckPoint_(const ISerializable *global_model,
    global_checkpoint.resize(0);
    utils::MemoryBufferStream fs(&global_checkpoint);
    fs.Write(&version_number, sizeof(version_number));
-    global_model->Save(fs);
+    global_model->Save(&fs);
    global_lazycheck = NULL;
  }
  // reset result buffer
@ -748,7 +748,7 @@ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) {
    global_checkpoint.resize(0);
    utils::MemoryBufferStream fs(&global_checkpoint);
    fs.Write(&version_number, sizeof(version_number));
-    global_lazycheck->Save(fs);
+    global_lazycheck->Save(&fs);
    global_lazycheck = NULL;
  }
  // recover global checkpoint
--- a/subtree/rabit/src/allreduce_robust.h
+++ b/subtree/rabit/src/allreduce_robust.h
@ -80,8 +80,8 @@ class AllreduceRobust : public AllreduceBase {
   *
   * \sa CheckPoint, VersionNumber
   */
-  virtual int LoadCheckPoint(ISerializable *global_model,
-                             ISerializable *local_model = NULL);
+  virtual int LoadCheckPoint(Serializable *global_model,
+                             Serializable *local_model = NULL);
  /*!
   * \brief checkpoint the model, meaning we finished a stage of execution
   *  every time we call check point, there is a version number which will increase by one
@ -98,8 +98,8 @@ class AllreduceRobust : public AllreduceBase {
   *
   * \sa LoadCheckPoint, VersionNumber
   */
-  virtual void CheckPoint(const ISerializable *global_model,
-                          const ISerializable *local_model = NULL) {
+  virtual void CheckPoint(const Serializable *global_model,
+                          const Serializable *local_model = NULL) {
    this->CheckPoint_(global_model, local_model, false);
  }
  /*!
@ -122,7 +122,7 @@ class AllreduceRobust : public AllreduceBase {
   *   is the same in all nodes
   * \sa LoadCheckPoint, CheckPoint, VersionNumber
   */
-  virtual void LazyCheckPoint(const ISerializable *global_model) {
+  virtual void LazyCheckPoint(const Serializable *global_model) {
    this->CheckPoint_(global_model, NULL, true);
  }
  /*!
@ -318,8 +318,8 @@ class AllreduceRobust : public AllreduceBase {
   *
   * \sa CheckPoint, LazyCheckPoint
   */
-  void CheckPoint_(const ISerializable *global_model,
-                   const ISerializable *local_model,
+  void CheckPoint_(const Serializable *global_model,
+                   const Serializable *local_model,
                   bool lazy_checkpt);
  /*!
   * \brief reset the all the existing links by sending Out-of-Band message marker
@ -521,7 +521,7 @@ o   *  the input state must exactly one saved state(local state of current node)
  // last check point global model
  std::string global_checkpoint;
  // lazy checkpoint of global model
-  const ISerializable *global_lazycheck;
+  const Serializable *global_lazycheck;
  // number of replica for local state/model
  int num_local_replica;
  // number of default local replica
--- a/subtree/rabit/src/engine_empty.cc
+++ b/subtree/rabit/src/engine_empty.cc
@ -34,15 +34,15 @@ class EmptyEngine : public IEngine {
  virtual void InitAfterException(void) {
    utils::Error("EmptyEngine is not fault tolerant");
  }
-  virtual int LoadCheckPoint(ISerializable *global_model,
-                             ISerializable *local_model = NULL) {
+  virtual int LoadCheckPoint(Serializable *global_model,
+                             Serializable *local_model = NULL) {
    return 0;
  }
-  virtual void CheckPoint(const ISerializable *global_model,
-                          const ISerializable *local_model = NULL) {
+  virtual void CheckPoint(const Serializable *global_model,
+                          const Serializable *local_model = NULL) {
    version_number += 1;
  }
-  virtual void LazyCheckPoint(const ISerializable *global_model) {
+  virtual void LazyCheckPoint(const Serializable *global_model) {
    version_number += 1;
  }
  virtual int VersionNumber(void) const {
--- a/subtree/rabit/src/engine_mpi.cc
+++ b/subtree/rabit/src/engine_mpi.cc
@ -37,15 +37,15 @@ class MPIEngine : public IEngine {
  virtual void InitAfterException(void) {
    utils::Error("MPI is not fault tolerant");
  }
-  virtual int LoadCheckPoint(ISerializable *global_model,
-                             ISerializable *local_model = NULL) {
+  virtual int LoadCheckPoint(Serializable *global_model,
+                             Serializable *local_model = NULL) {
    return 0;
  }
-  virtual void CheckPoint(const ISerializable *global_model,
-                          const ISerializable *local_model = NULL) {
+  virtual void CheckPoint(const Serializable *global_model,
+                          const Serializable *local_model = NULL) {
    version_number += 1;
  }
-  virtual void LazyCheckPoint(const ISerializable *global_model) {
+  virtual void LazyCheckPoint(const Serializable *global_model) {
    version_number += 1;
  }
  virtual int VersionNumber(void) const {
--- a/Show More
+++ b/Show More