Squashed 'subtree/rabit/' changes from 1bb8fe9..4db0a62

4db0a62 bugfix of lazy prepare 87017bd license dc703e1 license c171440 change license to bsd 7db2070 Update README.md 581fe06 add mocktest d2f252f ok 4a5b9e5 add all 12ee049 init version of lbfgs 37a2837 complete lbfgs solver 6ade7cb complete lbfgs git-subtree-dir: subtree/rabit git-subtree-split: 4db0a62a06
2015-02-11 20:33:35 -08:00
parent 3791ae5cf0
commit 13776a006a
21 changed files with 9698 additions and 22 deletions
--- a/35
+++ b/35
@@ -1,13 +1,28 @@
 Copyright (c) 2014 by Contributors
+All rights reserved.

-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    
-   http://www.apache.org/licenses/LICENSE-2.0
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of rabit nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--- a/rabit-learn/README.md
+++ b/rabit-learn/README.md
@@ -8,6 +8,8 @@ It also contain links to the Machine Learning packages that uses rabit.
 Toolkits
 ====
 * [KMeans Clustering](kmeans)
+* [Linear and Logistic Regression](linear)
+  
 * [XGBoost: eXtreme Gradient Boosting](https://github.com/tqchen/xgboost/tree/master/multi-node)
  - xgboost is a very fast boosted tree(also known as GBDT) library, that can run more than
    10 times faster than existing packages
--- a/rabit-learn/common.mk
+++ b/rabit-learn/common.mk
@@ -4,7 +4,7 @@ export CC  = gcc
 export CXX = g++
 export MPICXX = mpicxx
 export LDFLAGS= -pthread -lm -L../../lib
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fPIC -I../../include -I../common
+export CFLAGS = -Wall  -msse2  -Wno-unknown-pragmas -fPIC -I../../include

 .PHONY: clean all lib mpi
 all: $(BIN) $(MOCKBIN)
--- a/rabit-learn/data/README.md
+++ b/rabit-learn/data/README.md
@@ -0,0 +1,2 @@
+This folder contains processed example dataset used by the demos.
+Copyright of the dataset belongs to the original copyright holder
--- a/rabit-learn/data/agaricus.txt.test
+++ b/rabit-learn/data/agaricus.txt.test
--- a/rabit-learn/data/agaricus.txt.train
+++ b/rabit-learn/data/agaricus.txt.train
--- a/rabit-learn/data/featmap.txt
+++ b/rabit-learn/data/featmap.txt
@@ -0,0 +1,126 @@
+0	cap-shape=bell	i
+1	cap-shape=conical	i
+2	cap-shape=convex	i
+3	cap-shape=flat	i
+4	cap-shape=knobbed	i
+5	cap-shape=sunken	i
+6	cap-surface=fibrous	i
+7	cap-surface=grooves	i
+8	cap-surface=scaly	i
+9	cap-surface=smooth	i
+10	cap-color=brown	i
+11	cap-color=buff	i
+12	cap-color=cinnamon	i
+13	cap-color=gray	i
+14	cap-color=green	i
+15	cap-color=pink	i
+16	cap-color=purple	i
+17	cap-color=red	i
+18	cap-color=white	i
+19	cap-color=yellow	i
+20	bruises?=bruises	i
+21	bruises?=no	i
+22	odor=almond	i
+23	odor=anise	i
+24	odor=creosote	i
+25	odor=fishy	i
+26	odor=foul	i
+27	odor=musty	i
+28	odor=none	i
+29	odor=pungent	i
+30	odor=spicy	i
+31	gill-attachment=attached	i
+32	gill-attachment=descending	i
+33	gill-attachment=free	i
+34	gill-attachment=notched	i
+35	gill-spacing=close	i
+36	gill-spacing=crowded	i
+37	gill-spacing=distant	i
+38	gill-size=broad	i
+39	gill-size=narrow	i
+40	gill-color=black	i
+41	gill-color=brown	i
+42	gill-color=buff	i
+43	gill-color=chocolate	i
+44	gill-color=gray	i
+45	gill-color=green	i
+46	gill-color=orange	i
+47	gill-color=pink	i
+48	gill-color=purple	i
+49	gill-color=red	i
+50	gill-color=white	i
+51	gill-color=yellow	i
+52	stalk-shape=enlarging	i
+53	stalk-shape=tapering	i
+54	stalk-root=bulbous	i
+55	stalk-root=club	i
+56	stalk-root=cup	i
+57	stalk-root=equal	i
+58	stalk-root=rhizomorphs	i
+59	stalk-root=rooted	i
+60	stalk-root=missing	i
+61	stalk-surface-above-ring=fibrous	i
+62	stalk-surface-above-ring=scaly	i
+63	stalk-surface-above-ring=silky	i
+64	stalk-surface-above-ring=smooth	i
+65	stalk-surface-below-ring=fibrous	i
+66	stalk-surface-below-ring=scaly	i
+67	stalk-surface-below-ring=silky	i
+68	stalk-surface-below-ring=smooth	i
+69	stalk-color-above-ring=brown	i
+70	stalk-color-above-ring=buff	i
+71	stalk-color-above-ring=cinnamon	i
+72	stalk-color-above-ring=gray	i
+73	stalk-color-above-ring=orange	i
+74	stalk-color-above-ring=pink	i
+75	stalk-color-above-ring=red	i
+76	stalk-color-above-ring=white	i
+77	stalk-color-above-ring=yellow	i
+78	stalk-color-below-ring=brown	i
+79	stalk-color-below-ring=buff	i
+80	stalk-color-below-ring=cinnamon	i
+81	stalk-color-below-ring=gray	i
+82	stalk-color-below-ring=orange	i
+83	stalk-color-below-ring=pink	i
+84	stalk-color-below-ring=red	i
+85	stalk-color-below-ring=white	i
+86	stalk-color-below-ring=yellow	i
+87	veil-type=partial	i
+88	veil-type=universal	i
+89	veil-color=brown	i
+90	veil-color=orange	i
+91	veil-color=white	i
+92	veil-color=yellow	i
+93	ring-number=none	i
+94	ring-number=one	i
+95	ring-number=two	i
+96	ring-type=cobwebby	i
+97	ring-type=evanescent	i
+98	ring-type=flaring	i
+99	ring-type=large	i
+100	ring-type=none	i
+101	ring-type=pendant	i
+102	ring-type=sheathing	i
+103	ring-type=zone	i
+104	spore-print-color=black	i
+105	spore-print-color=brown	i
+106	spore-print-color=buff	i
+107	spore-print-color=chocolate	i
+108	spore-print-color=green	i
+109	spore-print-color=orange	i
+110	spore-print-color=purple	i
+111	spore-print-color=white	i
+112	spore-print-color=yellow	i
+113	population=abundant	i
+114	population=clustered	i
+115	population=numerous	i
+116	population=scattered	i
+117	population=several	i
+118	population=solitary	i
+119	habitat=grasses	i
+120	habitat=leaves	i
+121	habitat=meadows	i
+122	habitat=paths	i
+123	habitat=urban	i
+124	habitat=waste	i
+125	habitat=woods	i
--- a/rabit-learn/kmeans/kmeans.cc
+++ b/rabit-learn/kmeans/kmeans.cc
@@ -2,8 +2,8 @@
 // facing an exception
 #include <rabit.h>
 #include <rabit/utils.h>
-#include "./toolkit_util.h"
 #include <time.h>
+#include "../utils/data.h"

 using namespace rabit;

@@ -83,9 +83,12 @@ inline size_t GetCluster(const Matrix &centroids,
             
 int main(int argc, char *argv[]) {
  if (argc < 5) {
+    // intialize rabit engine
+    rabit::Init(argc, argv);
    if (rabit::GetRank() == 0) {
      rabit::TrackerPrintf("Usage: <data_dir> num_cluster max_iter <out_model>\n");
    }
+    rabit::Finalize();
    return 0;
  }
  clock_t tStart = clock();
--- a/rabit-learn/linear/Makefile
+++ b/rabit-learn/linear/Makefile
@@ -0,0 +1,14 @@
+# specify tensor path
+BIN = linear.rabit
+MOCKBIN= linear.mock
+MPIBIN = 
+# objectives that makes up rabit library
+OBJ = linear.o
+
+# common build script for programs
+include ../common.mk
+CFLAGS+=-fopenmp
+linear.o: linear.cc ../../src/*.h linear.h ../solver/*.h
+# dependenies here
+linear.rabit: linear.o lib
+linear.mock: linear.o lib
--- a/rabit-learn/linear/README.md
+++ b/rabit-learn/linear/README.md
@@ -0,0 +1,33 @@
+Linear and Logistic Regression
+====
+* input format: LibSVM
+* Example: [run-linear.sh](run-linear.sh)
+
+Parameters
+===
+All the parameters can be set by param=value
+
+#### Important Parameters
+* objective [default = logistic]
+  - can be linear or logistic
+* base_score [default = 0.5]
+  - global bias, recommended set to mean value of label
+* reg_L1 [default = 0]
+  - l1 regularization co-efficient
+* reg_L2 [default = 1]
+  - l2 regularization co-efficient
+* lbfgs_stop_tol [default = 1e-5]
+  - relative tolerance level of loss reduction with respect to initial loss
+* max_lbfgs_iter [default = 500]
+  - maximum number of lbfgs iterations
+
+### Optimization Related parameters
+* min_lbfgs_iter [default = 5]
+  - minimum number of lbfgs iterations
+* max_linesearch_iter [default = 100] 
+  - maximum number of iterations in linesearch
+* linesearch_c1 [default = 1e-4] 
+  - c1 co-efficient in backoff linesearch
+* linesarch_backoff [default = 0.5]
+  - backoff ratio in linesearch
+ 
--- a/rabit-learn/linear/linear.cc
+++ b/rabit-learn/linear/linear.cc
@@ -0,0 +1,239 @@
+#include "./linear.h"
+#include "../utils/io.h"
+#include "../utils/base64.h"
+
+namespace rabit {
+namespace linear {
+class LinearObjFunction : public solver::IObjFunction<float> {
+ public:
+  // training threads
+  int nthread;
+  // L2 regularization
+  float reg_L2;
+  // model
+  LinearModel model;
+  // training data
+  SparseMat dtrain;
+  // solver
+  solver::LBFGSSolver<float> lbfgs;
+  // constructor
+  LinearObjFunction(void) {
+    lbfgs.SetObjFunction(this);
+    nthread = 1;
+    reg_L2 = 0.0f;
+    model.weight = NULL;
+    task = "train";
+    model_in = "NULL";
+    name_pred = "pred.txt";
+    model_out = "final.model";
+  }
+  virtual ~LinearObjFunction(void) {
+  }
+  // set parameters
+  inline void SetParam(const char *name, const char *val) {
+    model.param.SetParam(name, val);
+    lbfgs.SetParam(name, val);
+    if (!strcmp(name, "num_feature")) {
+      char ndigit[30];
+      sprintf(ndigit, "%lu", model.param.num_feature + 1);
+      lbfgs.SetParam("num_dim", ndigit);
+    }
+    if (!strcmp(name, "reg_L2")) {
+      reg_L2 = static_cast<float>(atof(val));
+    }
+    if (!strcmp(name, "nthread")) {
+      nthread = atoi(val);
+    }
+    if (!strcmp(name, "task")) task = val;
+    if (!strcmp(name, "model_in")) model_in = val;
+    if (!strcmp(name, "model_out")) model_out = val;
+    if (!strcmp(name, "name_pred")) name_pred = val;
+  }
+  inline void Run(void) {
+    if (model_in != "NULL") {
+      this->LoadModel(model_in.c_str());
+    }
+    if (task == "train") {
+      lbfgs.Run();
+      this->SaveModel(model_out.c_str(), lbfgs.GetWeight());
+    } else if (task == "pred") {
+      this->TaskPred();
+    } else {
+      utils::Error("unknown task=%s", task.c_str());
+    }
+  }
+  inline void TaskPred(void) {
+    utils::Check(model_in != "NULL",
+                 "must set model_in for task=pred");
+    FILE *fp = utils::FopenCheck(name_pred.c_str(), "w");
+    for (size_t i = 0; i < dtrain.NumRow(); ++i) {
+      float pred = model.Predict(dtrain[i]);
+      fprintf(fp, "%g\n", pred);
+    }
+    fclose(fp);
+    printf("Finishing writing to %s\n", name_pred.c_str());
+  }
+  inline void LoadModel(const char *fname) {
+    FILE *fp = utils::FopenCheck(fname, "rb");
+    std::string header; header.resize(4);
+    // check header for different binary encode
+    // can be base64 or binary
+    utils::FileStream fi(fp);
+    utils::Check(fi.Read(&header[0], 4) != 0, "invalid model");
+      // base64 format
+    if (header == "bs64") {
+      utils::Base64InStream bsin(fp);
+      bsin.InitPosition();
+      model.Load(bsin);
+      fclose(fp);
+      return;
+    } else if (header == "binf") {
+      model.Load(fi);
+      fclose(fp);
+      return;     
+    } else {
+      utils::Error("invalid model file");
+    }
+  }
+  inline void SaveModel(const char *fname,
+                        const float *wptr,
+                        bool save_base64 = false) {
+    FILE *fp;
+    bool use_stdout = false;
+    if (!strcmp(fname, "stdout")) {
+      fp = stdout;
+      use_stdout = true;
+    } else {
+      fp = utils::FopenCheck(fname, "wb");
+   }
+    utils::FileStream fo(fp);
+    if (save_base64 != 0|| use_stdout) {
+      fo.Write("bs64\t", 5);
+      utils::Base64OutStream bout(fp);
+      model.Save(bout, wptr);
+      bout.Finish('\n');
+    } else {
+      fo.Write("binf", 4);
+      model.Save(fo, wptr);
+    }
+    if (!use_stdout) {
+      fclose(fp);
+    }
+  }
+  inline void LoadData(const char *fname) {
+    dtrain.Load(fname);
+  }
+  virtual size_t InitNumDim(void)  {
+    if (model_in == "NULL") {
+      size_t ndim = dtrain.feat_dim;
+      rabit::Allreduce<rabit::op::Max>(&ndim, 1);
+      model.param.num_feature = std::max(ndim, model.param.num_feature);
+    }
+    return model.param.num_feature + 1;
+  }
+  virtual void InitModel(float *weight, size_t size) {
+    if (model_in == "NULL") {
+      memset(weight, 0.0f, size * sizeof(float));
+      model.param.InitBaseScore();
+    } else {
+      rabit::Broadcast(model.weight, size * sizeof(float), 0);
+      memcpy(weight, model.weight, size * sizeof(float));
+    }
+  }
+  // load model
+  virtual void Load(rabit::IStream &fi) {
+    fi.Read(&model.param, sizeof(model.param));
+  }
+  virtual void Save(rabit::IStream &fo) const {
+    fo.Write(&model.param, sizeof(model.param));
+  }
+  virtual double Eval(const float *weight, size_t size) {
+   if (nthread != 0) omp_set_num_threads(nthread);
+    utils::Check(size == model.param.num_feature + 1,
+                 "size consistency check");
+    double sum_val = 0.0;
+    #pragma omp parallel for schedule(static) reduction(+:sum_val)
+    for (size_t i = 0; i < dtrain.NumRow(); ++i) {
+      float py = model.param.PredictMargin(weight, dtrain[i]);
+      float fv = model.param.MarginToLoss(dtrain.labels[i], py);
+      sum_val += fv;
+    }
+    if (rabit::GetRank() == 0) {
+      // only add regularization once
+      if (reg_L2 != 0.0f) {
+        double sum_sqr = 0.0;
+        for (size_t i = 0; i < model.param.num_feature; ++i) {
+          sum_sqr += weight[i] * weight[i];
+        }
+        sum_val += 0.5 * reg_L2 * sum_sqr;        
+      }
+    }
+    utils::Check(!std::isnan(sum_val), "nan occurs");
+    return sum_val;
+  }
+  virtual void CalcGrad(float *out_grad,
+                        const float *weight,
+                        size_t size) {
+   if (nthread != 0) omp_set_num_threads(nthread);
+   utils::Check(size == model.param.num_feature + 1,
+                 "size consistency check");
+    memset(out_grad, 0.0f, sizeof(float) * size);
+    double sum_gbias = 0.0;    
+    #pragma omp parallel for schedule(static) reduction(+:sum_gbias)
+    for (size_t i = 0; i < dtrain.NumRow(); ++i) {
+      SparseMat::Vector v = dtrain[i];
+      float py = model.param.Predict(weight, v);
+      float grad = model.param.PredToGrad(dtrain.labels[i], py);
+      for (index_t j = 0; j < v.length; ++j) {
+        out_grad[v[j].findex] += v[j].fvalue * grad;
+      }
+      sum_gbias += grad;
+    }
+    out_grad[model.param.num_feature] = static_cast<float>(sum_gbias);
+    if (rabit::GetRank() == 0) {
+      // only add regularization once
+      if (reg_L2 != 0.0f) {
+        for (size_t i = 0; i < model.param.num_feature; ++i) {
+          out_grad[i] += reg_L2 * weight[i];
+        }
+      }
+    }
+  }
+    
+ private:
+  std::string task;
+  std::string model_in;
+  std::string model_out;
+  std::string name_pred;
+};
+}  // namespace linear
+}  // namespace rabit
+
+int main(int argc, char *argv[]) {
+  if (argc < 2) {
+    // intialize rabit engine
+    rabit::Init(argc, argv);
+    if (rabit::GetRank() == 0) {
+      rabit::TrackerPrintf("Usage: <data_in> param=val\n");
+    }
+    rabit::Finalize();
+    return 0;
+  }
+  rabit::linear::LinearObjFunction linear;
+  if (!strcmp(argv[1], "stdin")) {
+    linear.LoadData(argv[1]);
+    rabit::Init(argc, argv);
+  } else {
+    rabit::Init(argc, argv);
+    linear.LoadData(argv[1]);
+  }
+  for (int i = 2; i < argc; ++i) {
+    char name[256], val[256];
+    if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
+      linear.SetParam(name, val);
+    }
+  }
+  linear.Run();
+  rabit::Finalize();
+  return 0;
+}
--- a/rabit-learn/linear/linear.h
+++ b/rabit-learn/linear/linear.h
@@ -0,0 +1,133 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file linear.h
+ * \brief Linear and Logistic regression
+ *
+ * \author Tianqi Chen
+ */
+#ifndef RABIT_LINEAR_H_
+#define RABIT_LINEAR_H_
+#include <omp.h>
+#include "../utils/data.h"
+#include "../solver/lbfgs.h"
+
+namespace rabit {
+namespace linear {
+/*! \brief simple linear model */
+struct LinearModel {
+  struct ModelParam {
+    /*! \brief global bias */
+    float base_score;
+    /*! \brief number of features  */
+    size_t num_feature;
+    /*! \brief loss type*/
+    int loss_type;
+    // reserved field
+    int reserved[16];
+    // constructor
+    ModelParam(void) {
+      base_score = 0.5f;
+      num_feature = 0;
+      loss_type = 1;
+      std::memset(reserved, 0, sizeof(reserved));
+    }
+    // initialize base score
+    inline void InitBaseScore(void) {
+      utils::Check(base_score > 0.0f && base_score < 1.0f,
+                   "base_score must be in (0,1) for logistic loss");
+      base_score = -std::log(1.0f / base_score - 1.0f);      
+    }
+    /*!
+     * \brief set parameters from outside
+     * \param name name of the parameter
+     * \param val value of the parameter
+     */    
+    inline void SetParam(const char *name, const char *val) {
+      using namespace std;
+      if (!strcmp("base_score", name)) {
+        base_score = static_cast<float>(atof(val));
+      }
+      if (!strcmp("num_feature", name)) {
+        num_feature = static_cast<size_t>(atol(val));
+      }
+      if (!strcmp("objective", name)) {
+        if (!strcmp("linear", val)) {
+          loss_type = 0;
+        } else if (!strcmp("logistic", val)) {
+          loss_type = 1;
+        } else {
+          utils::Error("unknown objective type %s\n", val);
+        }
+      }
+    }
+    // transform margin to prediction
+    inline float MarginToPred(float margin) const {
+      if (loss_type == 1) {
+        return 1.0f / (1.0f + std::exp(-margin));
+      } else {
+        return margin;
+      }
+    }
+    // margin to loss
+    inline float MarginToLoss(float label, float margin) const {
+      if (loss_type == 1) {
+        float nlogprob;
+        if (margin > 0.0f) {
+          nlogprob = std::log(1.0f + std::exp(-margin));
+        } else {
+          nlogprob = -margin + std::log(1.0f + std::exp(margin));
+        }
+        return label * nlogprob +
+            (1.0f -label) * (margin + nlogprob); 
+      } else {
+        float diff = margin - label;
+        return 0.5f * diff * diff;
+      }
+    }
+    inline float PredToGrad(float label, float pred) const {
+      return pred - label;      
+    }
+    inline float PredictMargin(const float *weight,
+                               const SparseMat::Vector &v) const {
+      // weight[num_feature] is bias
+      float sum = base_score + weight[num_feature];
+      for (unsigned i = 0; i < v.length; ++i) {
+        if (v[i].findex >= num_feature) continue;
+        sum += weight[v[i].findex] * v[i].fvalue;
+      }    
+      return sum;
+    }
+    inline float Predict(const float *weight,
+                         const SparseMat::Vector &v) const {
+      return MarginToPred(PredictMargin(weight, v));
+    }
+  };
+  // model parameter
+  ModelParam param;
+  // weight corresponding to the model
+  float *weight;
+  LinearModel(void) : weight(NULL) {
+  }
+  ~LinearModel(void) {
+    if (weight != NULL) delete [] weight;
+  }
+  // load model
+  inline void Load(rabit::IStream &fi) {
+    fi.Read(&param, sizeof(param));
+    if (weight == NULL) {
+      weight = new float[param.num_feature + 1];
+    }
+    fi.Read(weight, sizeof(float) * (param.num_feature + 1));
+  }
+  inline void Save(rabit::IStream &fo, const float *wptr = NULL) const {
+    fo.Write(&param, sizeof(param));
+    if (wptr == NULL) wptr = weight;
+    fo.Write(wptr, sizeof(float) * (param.num_feature + 1));
+  }
+  inline float Predict(const SparseMat::Vector &v) const {
+    return param.Predict(weight, v);
+  }
+};
+}  // namespace linear
+}  // namespace rabit
+#endif // RABIT_LINEAR_H_
--- a/rabit-learn/linear/run-linear-mock.sh
+++ b/rabit-learn/linear/run-linear-mock.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+if [[ $# -lt 1 ]]
+then
+    echo "Usage: nprocess"
+    exit -1
+fi
+
+rm -rf mushroom.row* *.model
+k=$1
+
+# split the lib svm file into k subfiles
+python splitrows.py ../data/agaricus.txt.train mushroom $k
+
+# run xgboost mpi
+../../tracker/rabit_demo.py -n $k linear.mock mushroom.row\%d "${*:2}" reg_L1=1 mock=0,1,1,0 mock=1,1,1,0  mock=0,2,1,1
--- a/rabit-learn/linear/run-linear.sh
+++ b/rabit-learn/linear/run-linear.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+if [[ $# -lt 1 ]]
+then
+    echo "Usage: nprocess"
+    exit -1
+fi
+
+rm -rf mushroom.row* *.model
+k=$1
+
+# split the lib svm file into k subfiles
+python splitrows.py ../data/agaricus.txt.train mushroom $k
+
+# run xgboost mpi
+../../tracker/rabit_demo.py -n $k linear.rabit mushroom.row\%d "${*:2}" reg_L1=1
+
+./linear.rabit ../data/agaricus.txt.test task=pred model_in=final.model
--- a/rabit-learn/linear/splitrows.py
+++ b/rabit-learn/linear/splitrows.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python
+import sys
+import random
+
+# split libsvm file into different rows
+if len(sys.argv) < 4:
+    print ('Usage:<fin> <fo> k')
+    exit(0)
+
+random.seed(10)
+
+k = int(sys.argv[3])
+fi = open( sys.argv[1], 'r' )
+fos = []
+
+for i in range(k):
+    fos.append(open( sys.argv[2]+'.row%d' % i, 'w' ))
+    
+for l in open(sys.argv[1]):
+    i = random.randint(0, k-1)
+    fos[i].write(l)
+
+for f in fos:    
+    f.close()
--- a/rabit-learn/solver/lbfgs.h
+++ b/rabit-learn/solver/lbfgs.h
@@ -0,0 +1,653 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file lbfgs.h
+ * \brief L-BFGS solver for general optimization problem
+ *
+ * \author Tianqi Chen
+ */
+#ifndef RABIT_LEARN_LBFGS_H_
+#define RABIT_LEARN_LBFGS_H_
+#include <cmath>
+#include <rabit.h>
+
+namespace rabit {
+/*! \brief namespace of solver for general problems */
+namespace solver {
+/*!
+ * \brief objective function for optimizers 
+ *  the objective function can also implement save/load
+ *  to remember the state parameters that might need to remember
+ */
+template<typename DType>
+class IObjFunction : public rabit::ISerializable {
+ public:
+  // destructor
+  virtual ~IObjFunction(void){}
+  /*!
+   * \brief evaluate function values for a given weight
+   * \param weight weight of the function
+   * \param size size of the weight
+   */
+  virtual double Eval(const DType *weight, size_t size) = 0;
+  /*!
+   * \return number of feature dimension to be allocated
+   * only called once during initialization
+   */
+  virtual size_t InitNumDim(void) = 0;
+  /*!
+   * \brief initialize the weight before starting the solver
+   * only called once for initialization
+   */
+  virtual void InitModel(DType *weight, size_t size) = 0;
+  /*!
+   * \brief calculate gradient for a given weight
+   * \param out_grad used to store the gradient value of the function
+   * \param weight weight of the function
+   * \param size size of the weight
+   */
+  virtual void CalcGrad(DType *out_grad,
+                        const DType *weight,
+                        size_t size) = 0;
+};
+
+/*! \brief a basic version L-BFGS solver */
+template<typename DType>
+class LBFGSSolver {
+ public:
+  LBFGSSolver(void) {
+    // set default values
+    reg_L1 = 0.0f;
+    max_linesearch_iter = 100;
+    linesearch_backoff = 0.5f;
+    linesearch_c1 = 1e-4;
+    min_lbfgs_iter = 5;
+    max_lbfgs_iter = 500;
+    lbfgs_stop_tol = 1e-5f;
+    silent = 0;
+  }
+  virtual ~LBFGSSolver(void) {}
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val value of the parameter
+   */
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strcmp("num_dim", name)) {
+      gstate.num_dim = static_cast<size_t>(atol(val));
+    }
+    if (!strcmp("size_memory", name)) {
+      gstate.size_memory = static_cast<size_t>(atol(val));
+    }
+    if (!strcmp("reg_L1", name)) {
+      reg_L1 = static_cast<float>(atof(val));
+    }
+    if (!strcmp("lbfgs_stop_tol", name)) {
+      lbfgs_stop_tol = static_cast<float>(atof(val));
+    }
+    if (!strcmp("linesearch_backoff", name)) {
+      linesearch_backoff = static_cast<float>(atof(val));
+    }
+    if (!strcmp("max_linesearch_iter", name)) {
+      max_linesearch_iter = atoi(val);
+    }
+    if (!strcmp("max_lbfgs_iter", name)) {
+      max_lbfgs_iter = atoi(val);
+    }
+    if (!strcmp("min_lbfgs_iter", name)) {
+      min_lbfgs_iter = atoi(val);
+    }
+    if (!strcmp("linesearch_c1", name)) {
+      linesearch_c1 = static_cast<float>(atof(val));
+    }
+  }
+  /*!
+   * \brief set objective function to optimize
+   *  the objective function only need to evaluate and calculate
+   *  gradient with respect to current subset of data
+   * \param obj the objective function we are looking for
+   */
+  virtual void SetObjFunction(IObjFunction<DType> *obj) {
+    gstate.obj = obj;
+  }
+  /*!
+   * \brief initialize the LBFGS solver
+   *  user must already set the objective function
+   */
+  virtual void Init(void) {
+    utils::Check(gstate.obj != NULL,
+                 "LBFGSSolver.Init must SetObjFunction first");
+    int version = rabit::LoadCheckPoint(&gstate, &hist);
+    if (version == 0) {
+      gstate.num_dim = gstate.obj->InitNumDim();
+    } else {
+      printf("restart from version=%d\n", version);
+    }
+    {
+      // decide parameter partition
+      size_t nproc = rabit::GetWorldSize();
+      size_t rank = rabit::GetRank();
+      size_t step = (gstate.num_dim + nproc - 1) / nproc;
+      // upper align
+      step = (step + 7) / 8 * 8;
+      utils::Assert(step * nproc >= gstate.num_dim, "BUG");
+      range_begin_ = std::min(rank * step, gstate.num_dim);
+      range_end_ = std::min((rank + 1) * step, gstate.num_dim);
+    }
+    if (version == 0) {
+      gstate.Init();
+      hist.Init(range_end_ - range_begin_, gstate.size_memory);
+      gstate.obj->InitModel(gstate.weight, gstate.num_dim);
+      // broadcast initialize model
+      rabit::Broadcast(gstate.weight,
+                       sizeof(DType) * gstate.num_dim, 0);
+      gstate.old_objval = this->Eval(gstate.weight);
+      gstate.init_objval = gstate.old_objval;
+      
+      if (silent == 0 && rabit::GetRank() == 0) {
+        rabit::TrackerPrintf
+            ("L-BFGS solver starts, num_dim=%lu, init_objval=%g, size_memory=%lu\n",
+             gstate.num_dim, gstate.init_objval, gstate.size_memory);
+      }
+    }
+  }
+  /*!
+   * \brief get the current weight vector
+   *  note that if update function is called
+   *  the content of weight vector is no longer valid
+   * \return weight vector
+   */
+  virtual DType *GetWeight(void) {
+    return gstate.weight;
+  }
+  /*!
+   * \brief update the weight for one L-BFGS iteration
+   * \return whether stopping condition is met
+   */
+  virtual bool UpdateOneIter(void) {
+    bool stop = false;
+    GlobalState &g = gstate;
+    g.obj->CalcGrad(g.grad, g.weight, g.num_dim);
+    rabit::Allreduce<rabit::op::Sum>(g.grad, g.num_dim);
+    // find change direction
+    double vdot = FindChangeDirection(g.tempw, g.grad, g.weight);
+    // line-search, g.grad is now new weight
+    int iter = BacktrackLineSearch(g.grad, g.tempw, g.weight, vdot);
+    utils::Check(iter < max_linesearch_iter, "line search failed");
+    // swap new weight 
+    std::swap(g.weight, g.grad);
+    // check stop condition
+    if (gstate.num_iteration > min_lbfgs_iter) {
+      if (g.old_objval - g.new_objval < lbfgs_stop_tol * g.init_objval) {
+        return true;
+      }
+    }
+    if (silent == 0 && rabit::GetRank() == 0) {
+      rabit::TrackerPrintf
+          ("[%d] L-BFGS: linesearch finishes in %d rounds, new_objval=%g, improvment=%g\n",
+           gstate.num_iteration, iter,
+           gstate.new_objval,
+           gstate.old_objval - gstate.new_objval);
+    }
+    gstate.old_objval = gstate.new_objval;
+    rabit::CheckPoint(&gstate, &hist);
+    return stop;
+  }
+  /*! \brief run optimization */
+  virtual void Run(void) {
+    this->Init();
+    while (gstate.num_iteration < max_lbfgs_iter) {
+      if (this->UpdateOneIter()) break;
+    }
+    if (silent == 0 && rabit::GetRank() == 0) {
+      size_t nonzero = 0;
+      for (size_t i = 0; i < gstate.num_dim; ++i) {
+        if (gstate.weight[i] != 0.0f) nonzero += 1;
+      }
+      rabit::TrackerPrintf
+          ("L-BFGS: finishes at iteration %d, %lu/%lu active weights\n",
+           gstate.num_iteration, nonzero, gstate.num_dim);
+    }
+  }
+ protected:
+  // find the delta value, given gradient
+  // return dot(dir, l1grad)
+  virtual double FindChangeDirection(DType *dir,
+                                     const DType *grad,
+                                     const DType *weight) {
+    int m = static_cast<int>(gstate.size_memory);
+    int n = static_cast<int>(hist.num_useful());
+    if (n < m) {
+      utils::Assert(hist.num_useful() == gstate.num_iteration,
+                    "BUG2, n=%d, it=%d", n, gstate.num_iteration);
+    } else {
+      utils::Assert(n == m, "BUG3");
+    }
+    const size_t num_dim = gstate.num_dim;
+    const DType *gsub = grad + range_begin_;
+    const size_t nsub = range_end_ - range_begin_;
+    double vdot;
+    if (n != 0) {
+      // hist[m + n - 1] stores old gradient
+      Minus(hist[m + n - 1], gsub, hist[m + n - 1], nsub);
+      SetL1Dir(hist[2 * m], gsub, weight + range_begin_, nsub);
+      // index set for calculating results
+      std::vector<std::pair<size_t, size_t> > idxset;
+      for (int j = 0; j < n; ++j) {
+        idxset.push_back(std::make_pair(j, 2 * m));
+        idxset.push_back(std::make_pair(j, n - 1));
+        idxset.push_back(std::make_pair(j, m + n - 1));
+      }
+      for (int j = 0; j < n; ++j) {
+        idxset.push_back(std::make_pair(m + j, 2 * m));
+        idxset.push_back(std::make_pair(m + j, m + n - 1));
+      }
+      // calculate dot products
+      std::vector<double> tmp(idxset.size());
+      for (size_t i = 0; i < tmp.size(); ++i) {
+        tmp[i] = hist.CalcDot(idxset[i].first, idxset[i].second);
+      }
+      rabit::Allreduce<rabit::op::Sum>(BeginPtr(tmp), tmp.size());
+      for (size_t i = 0; i < tmp.size(); ++i) {
+        gstate.DotBuf(idxset[i].first, idxset[i].second) = tmp[i];
+      }
+      // BFGS steps, use vector-free update
+      // parameterize vector using basis in hist
+      std::vector<double> alpha(n);
+      std::vector<double> delta(2 * m + 1, 0.0);
+      delta[2 * m] = 1.0;
+      // backward step
+      for (int j = n - 1; j >= 0; --j) {
+        double vsum = 0.0;
+        for (size_t k = 0; k < delta.size(); ++k) {
+          vsum += delta[k] * gstate.DotBuf(k, j);
+        }
+        alpha[j] = vsum / gstate.DotBuf(j, m + j);
+        delta[m + j] = delta[m + j] - alpha[j];
+      }
+      // scale
+      double scale = gstate.DotBuf(n - 1, m + n - 1) /
+      gstate.DotBuf(m + n - 1, m + n - 1);
+      for (size_t k = 0; k < delta.size(); ++k) {
+        delta[k] *= scale;
+      }
+      // forward step
+      for (int j = 0; j < n; ++j) {
+        double vsum = 0.0;
+        for (size_t k = 0; k < delta.size(); ++k) {
+          vsum += delta[k] * gstate.DotBuf(k, m + j);
+        }
+        double beta = vsum / gstate.DotBuf(j, m + j);
+        delta[j] = delta[j] + (alpha[j] - beta);
+      }
+      // set all to zero
+      std::fill(dir, dir + num_dim, 0.0f);
+      DType *dirsub = dir + range_begin_; 
+      for (int i = 0; i < n; ++i) {
+        AddScale(dirsub, dirsub, hist[m + i], delta[m + i], nsub);
+      }
+      AddScale(dirsub, dirsub, hist[2 * m], delta[2 * m], nsub);
+      for (int i = 0; i < n; ++i) {
+        AddScale(dirsub, dirsub, hist[i], delta[i], nsub);
+      }
+      FixDirL1Sign(dirsub, hist[2 * m], nsub);
+      vdot = -Dot(dirsub, hist[2 * m], nsub);
+      // allreduce to get full direction
+      rabit::Allreduce<rabit::op::Sum>(dir, num_dim);
+      rabit::Allreduce<rabit::op::Sum>(&vdot, 1);
+    } else {     
+      SetL1Dir(dir, grad, weight, num_dim);
+      vdot = -Dot(dir, dir, num_dim);
+    }
+    // shift the history record    
+    if (n < m) {
+      n += 1;
+    } else {
+      gstate.Shift(); hist.Shift();
+    }
+    hist.set_num_useful(n);
+    // copy gradient to hist[m + n - 1]
+    memcpy(hist[m + n - 1], gsub, nsub * sizeof(DType));
+    return vdot;
+  }
+  // line search for given direction
+  // return whether there is a descent
+  inline int BacktrackLineSearch(DType *new_weight,
+                                 const DType *dir,
+                                 const DType *weight,
+                                 double dot_dir_l1grad) {
+    utils::Assert(dot_dir_l1grad < 0.0f,
+                  "gradient error, dotv=%g", dot_dir_l1grad);
+    double alpha = 1.0;
+    double backoff = linesearch_backoff;
+    // unit descent direction in first iter
+    if (gstate.num_iteration == 0) {
+      utils::Assert(hist.num_useful() == 1, "hist.nuseful");
+      alpha = 1.0f / std::sqrt(-dot_dir_l1grad);
+      backoff = 0.1f;
+    }
+    int iter = 0;
+    
+    double old_val = gstate.old_objval;
+    double c1 = this->linesearch_c1;
+    while (true) {
+      const size_t num_dim = gstate.num_dim;
+      if (++iter >= max_linesearch_iter) return iter;
+      AddScale(new_weight, weight, dir, alpha, num_dim);
+      this->FixWeightL1Sign(new_weight, weight, num_dim);
+      double new_val = this->Eval(new_weight);
+      if (new_val - old_val <= c1 * dot_dir_l1grad * alpha) {
+        gstate.new_objval = new_val; break;
+      }
+      alpha *= backoff;
+    }
+    // hist[n - 1] = new_weight - weight
+    Minus(hist[hist.num_useful() - 1],
+          new_weight + range_begin_,
+          weight + range_begin_,
+          range_end_ - range_begin_);
+    gstate.num_iteration += 1;
+    return iter;
+  }
+  // OWL-QN step for L1 regularization
+  inline void SetL1Dir(DType *dst,
+                       const DType *grad,
+                       const DType *weight,
+                       size_t size) {
+    if (reg_L1 == 0.0) {
+      for (size_t i = 0; i < size; ++i) {
+        dst[i] = -grad[i];
+      }
+    } else {
+      for (size_t i = 0; i < size; ++i) {
+        if (weight[i] > 0.0f) {
+          dst[i] = -grad[i] - reg_L1;
+        } else if (weight[i] < 0.0f) {
+          dst[i] = -grad[i] + reg_L1;
+        } else {
+          if (grad[i] < -reg_L1) {
+            dst[i] = -grad[i] - reg_L1;
+          } else if (grad[i] > reg_L1) {
+            dst[i] = -grad[i] + reg_L1;
+          } else {
+            dst[i] = 0.0;
+          }
+        }
+      }
+    }
+  }
+  // OWL-QN step: fix direction sign to be consistent with proposal
+  inline void FixDirL1Sign(DType *dir,
+                           const DType *steepdir,
+                           size_t size) {
+    if (reg_L1 != 0.0f) {
+      for (size_t i = 0; i < size; ++i) {
+        if (dir[i] * steepdir[i] <= 0.0f) {
+          dir[i] = 0.0f;
+        }
+      }
+    }
+  }
+  // QWL-QN step: fix direction sign to be consistent with proposal
+  inline void FixWeightL1Sign(DType *new_weight,
+                              const DType *weight,
+                              size_t size) {
+    if (reg_L1 != 0.0f) {
+      for (size_t i = 0; i < size; ++i) {
+        if (new_weight[i] * weight[i] < 0.0f) {
+          new_weight[i] = 0.0f;
+        }
+      }
+    }
+  }
+  inline double Eval(const DType *weight) {
+    double val = gstate.obj->Eval(weight, gstate.num_dim);    
+    rabit::Allreduce<rabit::op::Sum>(&val, 1);
+    if (reg_L1 != 0.0f) {
+      double l1norm = 0.0;
+      for (size_t i = 0; i < gstate.num_dim; ++i) {
+        l1norm += std::abs(weight[i]);
+      }
+      val += l1norm * reg_L1;
+    }
+    return val;
+  }
+
+ private:
+  // helper functions
+  // dst = lhs + rhs * scale
+  inline static void AddScale(DType *dst,
+                              const DType *lhs,
+                              const DType *rhs,
+                              DType scale,
+                              size_t size) {
+    for (size_t i = 0; i < size; ++i) {
+      dst[i] = lhs[i] + rhs[i] * scale;
+    }
+  }
+  // dst = lhs - rhs
+  inline static void Minus(DType *dst,
+                           const DType *lhs,
+                           const DType *rhs,
+                           size_t size) {
+    for (size_t i = 0; i < size; ++i) {
+      dst[i] = lhs[i] - rhs[i];
+    }
+  }
+  // return dot(lhs, rhs)
+  inline static double Dot(const DType *lhs,
+                           const DType *rhs,
+                           size_t size) {
+    double res = 0.0;
+    for (size_t i = 0; i < size; ++i) {
+      res += lhs[i] * rhs[i];
+    }
+    return res;
+  }
+  // map rolling array index
+  inline static size_t MapIndex(size_t i, size_t offset,
+                                size_t size_memory) {
+    if (i == 2 * size_memory) return i;
+    if (i < size_memory) {
+      return (i + offset) % size_memory;
+    } else {
+      utils::Assert(i < 2 * size_memory,
+                    "MapIndex: index exceed bound, i=%lu", i);
+      return (i + offset) % size_memory + size_memory;
+    }
+  }
+  // global solver state
+  struct GlobalState : public rabit::ISerializable {
+   public:
+    // memory size of L-BFGS
+    size_t size_memory;
+    // number of iterations passed
+    size_t num_iteration;
+    // number of features in the solver
+    size_t num_dim;
+    // initialize objective value
+    double init_objval;
+    // history objective value
+    double old_objval;
+    // new objective value
+    double new_objval;
+    // objective function
+    IObjFunction<DType> *obj;
+    // temporal storage
+    DType *grad, *weight, *tempw;
+    // constructor
+    GlobalState(void)
+        : obj(NULL), grad(NULL),
+          weight(NULL), tempw(NULL) {
+      size_memory = 10;
+      num_iteration = 0;
+      num_dim = 0;
+      old_objval = 0.0;
+    }
+    ~GlobalState(void) {
+      if (grad != NULL) {
+        delete [] grad;
+        delete [] weight;
+        delete [] tempw;
+      }
+    }
+    // intilize the space of rolling array
+    inline void Init(void) {
+      size_t n = size_memory * 2 + 1;
+      data.resize(n * n, 0.0);
+      this->AllocSpace();
+    }
+    inline double &DotBuf(size_t i, size_t j)  {
+      if (i > j) std::swap(i, j);
+      return data[MapIndex(i, offset_, size_memory) * (size_memory * 2 + 1) +
+                  MapIndex(j, offset_, size_memory)];
+    }
+    // load the shift array
+    virtual void Load(rabit::IStream &fi) {
+      fi.Read(&size_memory, sizeof(size_memory));
+      fi.Read(&num_iteration, sizeof(num_iteration));
+      fi.Read(&num_dim, sizeof(num_dim));
+      fi.Read(&init_objval, sizeof(init_objval));
+      fi.Read(&old_objval, sizeof(old_objval));
+      fi.Read(&offset_, sizeof(offset_));
+      fi.Read(&data);
+      this->AllocSpace();
+      fi.Read(weight, sizeof(DType) * num_dim);
+      obj->Load(fi);
+    }
+    // save the shift array
+    virtual void Save(rabit::IStream &fo) const {
+      fo.Write(&size_memory, sizeof(size_memory));
+      fo.Write(&num_iteration, sizeof(num_iteration));
+      fo.Write(&num_dim, sizeof(num_dim));
+      fo.Write(&init_objval, sizeof(init_objval));
+      fo.Write(&old_objval, sizeof(old_objval));
+      fo.Write(&offset_, sizeof(offset_));
+      fo.Write(data);
+      fo.Write(weight, sizeof(DType) * num_dim);
+      obj->Save(fo);
+    }
+    inline void Shift(void) {
+      offset_ = (offset_ + 1) % size_memory;
+    }
+    
+   private:    
+    // rolling offset in the current memory
+    size_t offset_;
+    std::vector<double> data;
+    // allocate sapce
+    inline void AllocSpace(void) {
+      if (grad == NULL) {
+        grad = new DType[num_dim];
+        weight = new DType[num_dim];
+        tempw = new DType[num_dim];
+      }
+    }
+  };
+  /*! \brief rolling array that carries history information */
+  struct HistoryArray : public rabit::ISerializable {
+   public:
+    HistoryArray(void) : dptr_(NULL) {
+      num_useful_ = 0;
+    }
+    ~HistoryArray(void) {
+      if (dptr_ != NULL) delete [] dptr_;
+    }
+    // intilize the space of rolling array
+    inline void Init(size_t num_col, size_t size_memory) {
+      if (dptr_ != NULL &&
+          (num_col_ != num_col || size_memory_ != size_memory)) {
+        delete dptr_;
+      }
+      num_col_ = num_col;
+      size_memory_ = size_memory;
+      stride_ = num_col_;
+      offset_ = 0;
+      size_t n = size_memory * 2 + 1;
+      dptr_ = new DType[n * stride_];
+    }
+    // fetch element from rolling array
+    inline const DType *operator[](size_t i) const {
+      return dptr_ + MapIndex(i, offset_, size_memory_) * stride_;
+    }
+    inline DType *operator[](size_t i) {
+      return dptr_ + MapIndex(i, offset_, size_memory_) * stride_;
+    }
+    // shift array: arr_old -> arr_new
+    // for i in [0, size_memory - 1), arr_new[i] = arr_old[i + 1]
+    // for i in [size_memory, 2 * size_memory - 1), arr_new[i] = arr_old[i + 1]
+    // arr_old[0] and arr_arr[size_memory] will be discarded
+    inline void Shift(void) {
+      offset_ = (offset_ + 1) % size_memory_;
+    }
+    inline double CalcDot(size_t i, size_t j) const {
+      return Dot((*this)[i], (*this)[j], num_col_);
+    }
+    // set number of useful memory
+    inline const size_t &num_useful(void) const {
+      return num_useful_;
+    }
+    // set number of useful memory
+    inline void set_num_useful(size_t num_useful) {
+      utils::Assert(num_useful <= size_memory_,
+                    "num_useful exceed bound");
+      num_useful_ = num_useful;
+    }
+    // load the shift array
+    virtual void Load(rabit::IStream &fi) {
+      fi.Read(&num_col_, sizeof(num_col_));
+      fi.Read(&stride_, sizeof(stride_));
+      fi.Read(&size_memory_, sizeof(size_memory_));
+      fi.Read(&num_useful_, sizeof(num_useful_));
+      this->Init(num_col_, size_memory_);
+      for (size_t i = 0; i < num_useful_; ++i) {
+        fi.Read((*this)[i], num_col_ * sizeof(DType));
+        fi.Read((*this)[i + size_memory_], num_col_ * sizeof(DType));
+      }
+    }
+    // save the shift array
+    virtual void Save(rabit::IStream &fi) const {
+      fi.Write(&num_col_, sizeof(num_col_));
+      fi.Write(&stride_, sizeof(stride_));
+      fi.Write(&size_memory_, sizeof(size_memory_));
+      fi.Write(&num_useful_, sizeof(num_useful_));
+      for (size_t i = 0; i < num_useful_; ++i) {
+        fi.Write((*this)[i], num_col_ * sizeof(DType));
+        fi.Write((*this)[i + size_memory_], num_col_ * sizeof(DType));
+      }
+    }
+
+   private:
+    // number of columns in each of array
+    size_t num_col_;
+    // stride for each of column for alignment
+    size_t stride_;
+    // memory size of L-BFGS
+    size_t size_memory_;
+    // number of useful memory that will be used
+    size_t num_useful_;
+    // rolling offset in the current memory
+    size_t offset_;
+    // data pointer
+    DType *dptr_;
+  };
+  // data structure for LBFGS
+  GlobalState gstate;
+  HistoryArray hist;
+  // silent
+  int silent;
+  // the subrange of current node
+  size_t range_begin_;
+  size_t range_end_;
+  // L1 regularization co-efficient
+  float reg_L1;
+  // c1 ratio for line search
+  float linesearch_c1;
+  float linesearch_backoff;
+  int max_linesearch_iter;
+  int max_lbfgs_iter;
+  int min_lbfgs_iter;
+  float lbfgs_stop_tol;
+};
+}  // namespace solver
+}  // namespace rabit
+#endif // RABIT_LEARN_LBFGS_H_
--- a/rabit-learn/utils/base64.h
+++ b/rabit-learn/utils/base64.h
@@ -0,0 +1,204 @@
+#ifndef RABIT_LEARN_UTILS_BASE64_H_
+#define RABIT_LEARN_UTILS_BASE64_H_
+/*!
+ * \file base64.h
+ * \brief data stream support to input and output from/to base64 stream
+ * base64 is easier to store and pass as text format in mapreduce
+ * \author Tianqi Chen
+ */
+#include <cctype>
+#include <cstdio>
+#include <rabit/io.h>
+
+namespace rabit {
+namespace utils {
+/*! \brief namespace of base64 decoding and encoding table */
+namespace base64 {
+const char DecodeTable[] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  62,  // '+'
+  0, 0, 0,
+  63,  // '/'
+  52, 53, 54, 55, 56, 57, 58, 59, 60, 61,  // '0'-'9'
+  0, 0, 0, 0, 0, 0, 0,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+  13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,  // 'A'-'Z'
+  0, 0, 0, 0, 0, 0,
+  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+  39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,  // 'a'-'z'
+};
+static const char EncodeTable[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+} // namespace base64
+/*! \brief the stream that reads from base64, note we take from file pointers */
+class Base64InStream: public IStream {
+ public:
+  explicit Base64InStream(FILE *fp) : fp(fp) {
+    num_prev = 0; tmp_ch = 0;
+  }
+  /*! 
+   * \brief initialize the stream position to beginning of next base64 stream 
+   * call this function before actually start read
+   */
+  inline void InitPosition(void) {
+    // get a charater
+    do {
+      tmp_ch = fgetc(fp);
+    } while (isspace(tmp_ch));
+  }
+  /*! \brief whether current position is end of a base64 stream */
+  inline bool IsEOF(void) const {
+    return num_prev == 0 && (tmp_ch == EOF || isspace(tmp_ch));
+  }
+  virtual size_t Read(void *ptr, size_t size) {
+    using base64::DecodeTable;
+    if (size == 0) return 0;
+    // use tlen to record left size
+    size_t tlen = size;
+    unsigned char *cptr = static_cast<unsigned char*>(ptr);
+    // if anything left, load from previous buffered result
+    if (num_prev != 0) {
+      if (num_prev == 2) {
+        if (tlen >= 2) {
+          *cptr++ = buf_prev[0];
+          *cptr++ = buf_prev[1];
+          tlen -= 2;
+          num_prev = 0;
+        } else {
+          // assert tlen == 1
+          *cptr++ = buf_prev[0]; --tlen;
+          buf_prev[0] = buf_prev[1];
+          num_prev = 1;
+        }
+      } else {
+        // assert num_prev == 1
+        *cptr++ = buf_prev[0]; --tlen; num_prev = 0;
+      }
+    }
+    if (tlen == 0) return size;
+    int nvalue;
+    // note: everything goes with 4 bytes in Base64
+    // so we process 4 bytes a unit
+    while (tlen && tmp_ch != EOF && !isspace(tmp_ch)) {
+      // first byte
+      nvalue = DecodeTable[tmp_ch] << 18;
+      {
+        // second byte
+        Check((tmp_ch = fgetc(fp), tmp_ch != EOF && !isspace(tmp_ch)),
+              "invalid base64 format");
+        nvalue |= DecodeTable[tmp_ch] << 12;
+        *cptr++ = (nvalue >> 16) & 0xFF; --tlen;
+      }
+      {
+        // third byte
+        Check((tmp_ch = fgetc(fp), tmp_ch != EOF && !isspace(tmp_ch)),
+              "invalid base64 format");
+        // handle termination
+        if (tmp_ch == '=') {
+          Check((tmp_ch = fgetc(fp), tmp_ch == '='), "invalid base64 format");
+          Check((tmp_ch = fgetc(fp), tmp_ch == EOF || isspace(tmp_ch)),
+                "invalid base64 format");
+          break;
+        }
+        nvalue |= DecodeTable[tmp_ch] << 6;
+        if (tlen) {
+          *cptr++ = (nvalue >> 8) & 0xFF; --tlen;
+        } else {
+          buf_prev[num_prev++] = (nvalue >> 8) & 0xFF;
+        }
+      }
+      {
+        // fourth byte
+        Check((tmp_ch = fgetc(fp), tmp_ch != EOF && !isspace(tmp_ch)),
+              "invalid base64 format");
+        if (tmp_ch == '=') {
+          Check((tmp_ch = fgetc(fp), tmp_ch == EOF || isspace(tmp_ch)),
+                "invalid base64 format");
+          break;
+        }
+        nvalue |= DecodeTable[tmp_ch];
+        if (tlen) {
+          *cptr++ = nvalue & 0xFF; --tlen;
+        } else {
+          buf_prev[num_prev ++] = nvalue & 0xFF;
+        }
+      }
+      // get next char
+      tmp_ch = fgetc(fp);
+    }
+    if (kStrictCheck) {
+      Check(tlen == 0, "Base64InStream: read incomplete");
+    }
+    return size - tlen;
+  }
+  virtual void Write(const void *ptr, size_t size) {
+    utils::Error("Base64InStream do not support write");
+  }
+
+ private:
+  FILE *fp;
+  int tmp_ch;
+  int num_prev;
+  unsigned char buf_prev[2];
+  // whether we need to do strict check
+  static const bool kStrictCheck = false;
+};
+/*! \brief the stream that write to base64, note we take from file pointers */
+class Base64OutStream: public IStream {
+ public:
+  explicit Base64OutStream(FILE *fp) : fp(fp) {
+    buf_top = 0;
+  }
+  virtual void Write(const void *ptr, size_t size) {
+    using base64::EncodeTable;
+    size_t tlen = size;
+    const unsigned char *cptr = static_cast<const unsigned char*>(ptr);
+    while (tlen) {
+      while (buf_top < 3  && tlen != 0) {
+        buf[++buf_top] = *cptr++; --tlen;
+      }
+      if (buf_top == 3) {
+        // flush 4 bytes out
+        fputc(EncodeTable[buf[1] >> 2], fp);
+        fputc(EncodeTable[((buf[1] << 4) | (buf[2] >> 4)) & 0x3F], fp);
+        fputc(EncodeTable[((buf[2] << 2) | (buf[3] >> 6)) & 0x3F], fp);
+        fputc(EncodeTable[buf[3] & 0x3F], fp);
+        buf_top = 0;
+      }
+    }
+  }
+  virtual size_t Read(void *ptr, size_t size) {
+    Error("Base64OutStream do not support read");
+    return 0;
+  }
+  /*!
+   * \brief finish writing of all current base64 stream, do some post processing
+   * \param endch charater to put to end of stream, if it is EOF, then nothing will be done
+   */
+  inline void Finish(char endch = EOF) {
+    using base64::EncodeTable;
+    if (buf_top == 1) {
+      fputc(EncodeTable[buf[1] >> 2], fp);
+      fputc(EncodeTable[(buf[1] << 4) & 0x3F], fp);
+      fputc('=', fp);
+      fputc('=', fp);
+    }
+    if (buf_top == 2) {
+      fputc(EncodeTable[buf[1] >> 2], fp);
+      fputc(EncodeTable[((buf[1] << 4) | (buf[2] >> 4)) & 0x3F], fp);
+      fputc(EncodeTable[(buf[2] << 2) & 0x3F], fp);
+      fputc('=', fp);
+    }
+    buf_top = 0;
+    if (endch != EOF) fputc(endch, fp);
+  }
+
+ private:
+  FILE *fp;
+  int buf_top;
+  unsigned char buf[4];
+};
+}  // namespace utils
+}  // namespace rabit
+#endif  // RABIT_LEARN_UTILS_BASE64_H_
--- a/rabit-learn/common/toolkit_util.h
+++ b/rabit-learn/common/toolkit_util.h
@@ -1,24 +1,38 @@
-#include <rabit.h>
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file data.h
+ * \brief simple data structure that could be used by model
+ *
+ * \author Tianqi Chen
+ */
+#ifndef RABIT_LEARN_DATA_H_
+#define RABIT_LEARN_DATA_H_
+
 #include <vector>
 #include <cstdlib>
 #include <cstdio>
 #include <cstring>
+#include <limits>
 #include <cmath>
+#include <rabit.h>

 namespace rabit {
+// typedef index type
+typedef unsigned index_t;
+
 /*! \brief sparse matrix, CSR format */
 struct SparseMat {
  // sparse matrix entry
  struct Entry {
    // feature index 
-    unsigned findex;
+    index_t findex;
    // feature value
    float fvalue;
  };
  // sparse vector
  struct Vector {
    const Entry *data;
-    unsigned length;
+    index_t length;
    inline const Entry &operator[](size_t i) const {
      return data[i];
    }
@@ -26,7 +40,7 @@ struct SparseMat {
  inline Vector operator[](size_t i) const {
    Vector v;
    v.data = &data[0] + row_ptr[i];
-    v.length = static_cast<unsigned>(row_ptr[i + 1]-row_ptr[i]);
+    v.length = static_cast<index_t>(row_ptr[i + 1]-row_ptr[i]);
    return v;
  }
  // load data from LibSVM format
@@ -35,7 +49,13 @@ struct SparseMat {
    if (!strcmp(fname, "stdin")) {
      fi = stdin;
    } else {
-      fi = utils::FopenCheck(fname, "r");
+      if (strchr(fname, '%') != NULL) {
+        char s_tmp[256];
+        snprintf(s_tmp, sizeof(s_tmp), fname, rabit::GetRank());
+        fi = utils::FopenCheck(s_tmp, "r");        
+      } else {
+        fi = utils::FopenCheck(fname, "r");
+      }
    }
    row_ptr.clear();
    row_ptr.push_back(0);
@@ -45,9 +65,11 @@ struct SparseMat {
    char tmp[1024];
    while (fscanf(fi, "%s", tmp) == 1) {
      Entry e;
-      if (sscanf(tmp, "%u:%f", &e.findex, &e.fvalue) == 2) {
+      unsigned long fidx;
+      if (sscanf(tmp, "%lu:%f", &fidx, &e.fvalue) == 2) {
+        e.findex = static_cast<index_t>(fidx);
        data.push_back(e);
-        feat_dim = std::max(e.findex, feat_dim);
+        feat_dim = std::max(fidx, feat_dim);
      } else {
        if (!init) {
          labels.push_back(label);
@@ -61,6 +83,9 @@ struct SparseMat {
    labels.push_back(label);
    row_ptr.push_back(data.size());
    feat_dim += 1;
+    utils::Check(feat_dim < std::numeric_limits<index_t>::max(),
+                 "feature dimension exceed limit of index_t"\
+                 "consider change the index_t to unsigned long");
    // close the filed
    if (fi != stdin) fclose(fi);
  }
@@ -68,7 +93,7 @@ struct SparseMat {
    return row_ptr.size() - 1;
  }
  // maximum feature dimension
-  unsigned feat_dim;
+  size_t feat_dim;
  std::vector<size_t> row_ptr;
  std::vector<Entry> data;
  std::vector<float> labels;
@@ -115,3 +140,4 @@ inline int Random(int value) {
  return rand() % value;
 }
 } // namespace rabit
+#endif // RABIT_LEARN_DATA_H_
--- a/rabit-learn/utils/io.h
+++ b/rabit-learn/utils/io.h
@@ -0,0 +1,40 @@
+#ifndef RABIT_LEARN_UTILS_IO_H_
+#define RABIT_LEARN_UTILS_IO_H_
+/*!
+ * \file io.h
+ * \brief additional stream interface
+ * \author Tianqi Chen
+ */
+namespace rabit {
+namespace utils {
+/*! \brief implementation of file i/o stream */
+class FileStream : public ISeekStream {
+ public:
+  explicit FileStream(FILE *fp) : fp(fp) {}
+  explicit FileStream(void) {
+    this->fp = NULL;
+  }
+  virtual size_t Read(void *ptr, size_t size) {
+    return std::fread(ptr, size, 1, fp);
+  }
+  virtual void Write(const void *ptr, size_t size) {
+    std::fwrite(ptr, size, 1, fp);
+  }
+  virtual void Seek(size_t pos) {
+    std::fseek(fp, static_cast<long>(pos), SEEK_SET);
+  }
+  virtual size_t Tell(void) {
+    return std::ftell(fp);
+  }
+  inline void Close(void) {
+    if (fp != NULL){
+      std::fclose(fp); fp = NULL;
+    }
+  }
+
+ private:
+  FILE *fp;
+};
+}  // namespace utils
+}  // namespace rabit
+#endif  // RABIT_LEARN_UTILS_IO_H_
--- a/src/allreduce_robust.cc
+++ b/src/allreduce_robust.cc
@@ -77,7 +77,10 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
                                PreprocFunction prepare_fun,
                                void *prepare_arg) {
  // skip action in single node
-  if (world_size == 1) return;
+  if (world_size == 1) {
+    if (prepare_fun != NULL) prepare_fun(prepare_arg);
+    return;
+  }
  bool recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter);
  // now we are free to remove the last result, if any
  if (resbuf.LastSeqNo() != -1 &&
--- a/src/engine_empty.cc
+++ b/src/engine_empty.cc
@@ -92,6 +92,7 @@ void Allreduce_(void *sendrecvbuf,
                mpi::OpType op,
                IEngine::PreprocFunction prepare_fun,
                void *prepare_arg) {
+  if (prepare_fun != NULL) prepare_fun(prepare_arg);
 }

 // code for reduce handle
@@ -106,6 +107,8 @@ void ReduceHandle::Init(IEngine::ReduceFunction redfunc, size_t type_nbytes) {}
 void ReduceHandle::Allreduce(void *sendrecvbuf,
                             size_t type_nbytes, size_t count,
                             IEngine::PreprocFunction prepare_fun,
-                             void *prepare_arg) {}
+                             void *prepare_arg) {
+  if (prepare_fun != NULL) prepare_fun(prepare_arg);
+}
 }  // namespace engine
 }  // namespace rabit