Merge remote-tracking branch 'upstream/master'

2015-02-12 09:51:42 +01:00
parent 97cb8bf637 f8a314e2e4
commit f1f346713a
23 changed files with 9714 additions and 35 deletions
--- a/src/tree/updater.cpp
+++ b/src/tree/updater.cpp
@@ -10,6 +10,7 @@
 #include "./updater_sync-inl.hpp"
 #include "./updater_distcol-inl.hpp"
 #include "./updater_histmaker-inl.hpp"
+#include "./updater_skmaker-inl.hpp"
 #endif

 namespace xgboost {
@@ -22,6 +23,7 @@ IUpdater* CreateUpdater(const char *name) {
 #ifndef XGBOOST_STRICT_CXX98_
  if (!strcmp(name, "sync")) return new TreeSyncher();
  if (!strcmp(name, "grow_histmaker")) return new CQHistMaker<GradStats>();
+  if (!strcmp(name, "grow_skmaker")) return new SketchMaker();
  if (!strcmp(name, "distcol")) return new DistColMaker<GradStats>();
 #endif
  utils::Error("unknown updater:%s", name);
--- a/src/tree/updater_skmaker-inl.hpp
+++ b/src/tree/updater_skmaker-inl.hpp
@@ -8,7 +8,7 @@
 */
 #include <vector>
 #include <algorithm>
-#include <rabit.h>
+#include "../sync/sync.h"
 #include "../utils/quantile.h"
 #include "./updater_basemaker-inl.hpp"

@@ -123,8 +123,8 @@ class SketchMaker: public BaseMaker {
      sum_hess += b.sum_hess;
    }
    /*! \brief same as add, reduce is used in All Reduce */
-    inline void Reduce(const SKStats &b) {
-      this->Add(b);
+    inline static void Reduce(SKStats &a, const SKStats &b) {
+      a.Add(b);
    }
    /*! \brief set leaf vector value based on statistics */
    inline void SetLeafVec(const TrainParam &param, bst_float *vec) const {
@@ -156,18 +156,19 @@ class SketchMaker: public BaseMaker {
                              batch[i].length == nrows,
                              &thread_sketch[omp_get_thread_num()]);
      }
-    }
+    }       
    // setup maximum size
    unsigned max_size = param.max_sketch_size();
    // synchronize sketch
-    summary_array.Init(sketchs.size(), max_size);
+    summary_array.resize(sketchs.size());
    for (size_t i = 0; i < sketchs.size(); ++i) {
      utils::WXQuantileSketch<bst_float, bst_float>::SummaryContainer out;
      sketchs[i].GetSummary(&out);
-      summary_array.Set(i, out);
+      summary_array[i].Reserve(max_size);
+      summary_array[i].SetPrune(out, max_size);
    }
-    size_t nbytes = summary_array.MemSize();;
-    sketch_reducer.Allreduce(&summary_array, nbytes);    
+    size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);    
+    sketch_reducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size());    
  }
  // update sketch information in column fid
  inline void UpdateSketchCol(const std::vector<bst_gpair> &gpair,
@@ -186,7 +187,7 @@ class SketchMaker: public BaseMaker {
      const unsigned wid = this->node2workindex[nid];
      for (int k = 0; k < 3; ++k) {
        sbuilder[3 * nid + k].sum_total = 0.0f;
-        sbuilder[3 * nid + k].sketch = &sketchs[(wid * tree.param.num_feature + fid) * 3 + k];       
+        sbuilder[3 * nid + k].sketch = &sketchs[(wid * tree.param.num_feature + fid) * 3 + k];
      }
    }
    if (!col_full) {
@@ -367,7 +368,7 @@ class SketchMaker: public BaseMaker {
          c.sum_hess >= param.min_child_weight) {
        bst_float cpt = fsplits.back();
        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;        
-        best->Update(loss_chg, fid, cpt + fabsf(cpt) + 1.0f, true);
+        best->Update(loss_chg, fid, cpt + fabsf(cpt) + 1.0f, false);
      }
    }
  }
@@ -380,11 +381,11 @@ class SketchMaker: public BaseMaker {
  // node statistics
  std::vector<SKStats> node_stats;
  // summary array
-  WXQSketch::SummaryArray summary_array;
+  std::vector<WXQSketch::SummaryContainer> summary_array;
  // reducer for summary
-  rabit::Reducer<SKStats> stats_reducer;
+  rabit::Reducer<SKStats, SKStats::Reduce> stats_reducer;
  // reducer for summary
-  rabit::SerializeReducer<WXQSketch::SummaryArray> sketch_reducer;
+  rabit::SerializeReducer<WXQSketch::SummaryContainer> sketch_reducer;
  // per node, per feature sketch
  std::vector< utils::WXQuantileSketch<bst_float, bst_float> > sketchs;
 };
--- a/subtree/rabit/LICENSE
+++ b/subtree/rabit/LICENSE
@@ -1,13 +1,28 @@
 Copyright (c) 2014 by Contributors
+All rights reserved.

-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    
-   http://www.apache.org/licenses/LICENSE-2.0
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of rabit nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--- a/subtree/rabit/rabit-learn/README.md
+++ b/subtree/rabit/rabit-learn/README.md
@@ -8,6 +8,8 @@ It also contain links to the Machine Learning packages that uses rabit.
 Toolkits
 ====
 * [KMeans Clustering](kmeans)
+* [Linear and Logistic Regression](linear)
+  
 * [XGBoost: eXtreme Gradient Boosting](https://github.com/tqchen/xgboost/tree/master/multi-node)
  - xgboost is a very fast boosted tree(also known as GBDT) library, that can run more than
    10 times faster than existing packages
--- a/subtree/rabit/rabit-learn/common.mk
+++ b/subtree/rabit/rabit-learn/common.mk
@@ -4,7 +4,7 @@ export CC  = gcc
 export CXX = g++
 export MPICXX = mpicxx
 export LDFLAGS= -pthread -lm -L../../lib
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fPIC -I../../include -I../common
+export CFLAGS = -Wall  -msse2  -Wno-unknown-pragmas -fPIC -I../../include

 .PHONY: clean all lib mpi
 all: $(BIN) $(MOCKBIN)
--- a/subtree/rabit/rabit-learn/data/README.md
+++ b/subtree/rabit/rabit-learn/data/README.md
@@ -0,0 +1,2 @@
+This folder contains processed example dataset used by the demos.
+Copyright of the dataset belongs to the original copyright holder
--- a/subtree/rabit/rabit-learn/data/agaricus.txt.test
+++ b/subtree/rabit/rabit-learn/data/agaricus.txt.test
--- a/subtree/rabit/rabit-learn/data/agaricus.txt.train
+++ b/subtree/rabit/rabit-learn/data/agaricus.txt.train
--- a/subtree/rabit/rabit-learn/data/featmap.txt
+++ b/subtree/rabit/rabit-learn/data/featmap.txt
@@ -0,0 +1,126 @@
+0	cap-shape=bell	i
+1	cap-shape=conical	i
+2	cap-shape=convex	i
+3	cap-shape=flat	i
+4	cap-shape=knobbed	i
+5	cap-shape=sunken	i
+6	cap-surface=fibrous	i
+7	cap-surface=grooves	i
+8	cap-surface=scaly	i
+9	cap-surface=smooth	i
+10	cap-color=brown	i
+11	cap-color=buff	i
+12	cap-color=cinnamon	i
+13	cap-color=gray	i
+14	cap-color=green	i
+15	cap-color=pink	i
+16	cap-color=purple	i
+17	cap-color=red	i
+18	cap-color=white	i
+19	cap-color=yellow	i
+20	bruises?=bruises	i
+21	bruises?=no	i
+22	odor=almond	i
+23	odor=anise	i
+24	odor=creosote	i
+25	odor=fishy	i
+26	odor=foul	i
+27	odor=musty	i
+28	odor=none	i
+29	odor=pungent	i
+30	odor=spicy	i
+31	gill-attachment=attached	i
+32	gill-attachment=descending	i
+33	gill-attachment=free	i
+34	gill-attachment=notched	i
+35	gill-spacing=close	i
+36	gill-spacing=crowded	i
+37	gill-spacing=distant	i
+38	gill-size=broad	i
+39	gill-size=narrow	i
+40	gill-color=black	i
+41	gill-color=brown	i
+42	gill-color=buff	i
+43	gill-color=chocolate	i
+44	gill-color=gray	i
+45	gill-color=green	i
+46	gill-color=orange	i
+47	gill-color=pink	i
+48	gill-color=purple	i
+49	gill-color=red	i
+50	gill-color=white	i
+51	gill-color=yellow	i
+52	stalk-shape=enlarging	i
+53	stalk-shape=tapering	i
+54	stalk-root=bulbous	i
+55	stalk-root=club	i
+56	stalk-root=cup	i
+57	stalk-root=equal	i
+58	stalk-root=rhizomorphs	i
+59	stalk-root=rooted	i
+60	stalk-root=missing	i
+61	stalk-surface-above-ring=fibrous	i
+62	stalk-surface-above-ring=scaly	i
+63	stalk-surface-above-ring=silky	i
+64	stalk-surface-above-ring=smooth	i
+65	stalk-surface-below-ring=fibrous	i
+66	stalk-surface-below-ring=scaly	i
+67	stalk-surface-below-ring=silky	i
+68	stalk-surface-below-ring=smooth	i
+69	stalk-color-above-ring=brown	i
+70	stalk-color-above-ring=buff	i
+71	stalk-color-above-ring=cinnamon	i
+72	stalk-color-above-ring=gray	i
+73	stalk-color-above-ring=orange	i
+74	stalk-color-above-ring=pink	i
+75	stalk-color-above-ring=red	i
+76	stalk-color-above-ring=white	i
+77	stalk-color-above-ring=yellow	i
+78	stalk-color-below-ring=brown	i
+79	stalk-color-below-ring=buff	i
+80	stalk-color-below-ring=cinnamon	i
+81	stalk-color-below-ring=gray	i
+82	stalk-color-below-ring=orange	i
+83	stalk-color-below-ring=pink	i
+84	stalk-color-below-ring=red	i
+85	stalk-color-below-ring=white	i
+86	stalk-color-below-ring=yellow	i
+87	veil-type=partial	i
+88	veil-type=universal	i
+89	veil-color=brown	i
+90	veil-color=orange	i
+91	veil-color=white	i
+92	veil-color=yellow	i
+93	ring-number=none	i
+94	ring-number=one	i
+95	ring-number=two	i
+96	ring-type=cobwebby	i
+97	ring-type=evanescent	i
+98	ring-type=flaring	i
+99	ring-type=large	i
+100	ring-type=none	i
+101	ring-type=pendant	i
+102	ring-type=sheathing	i
+103	ring-type=zone	i
+104	spore-print-color=black	i
+105	spore-print-color=brown	i
+106	spore-print-color=buff	i
+107	spore-print-color=chocolate	i
+108	spore-print-color=green	i
+109	spore-print-color=orange	i
+110	spore-print-color=purple	i
+111	spore-print-color=white	i
+112	spore-print-color=yellow	i
+113	population=abundant	i
+114	population=clustered	i
+115	population=numerous	i
+116	population=scattered	i
+117	population=several	i
+118	population=solitary	i
+119	habitat=grasses	i
+120	habitat=leaves	i
+121	habitat=meadows	i
+122	habitat=paths	i
+123	habitat=urban	i
+124	habitat=waste	i
+125	habitat=woods	i
--- a/subtree/rabit/rabit-learn/kmeans/kmeans.cc
+++ b/subtree/rabit/rabit-learn/kmeans/kmeans.cc
@@ -2,8 +2,8 @@
 // facing an exception
 #include <rabit.h>
 #include <rabit/utils.h>
-#include "./toolkit_util.h"
 #include <time.h>
+#include "../utils/data.h"

 using namespace rabit;

@@ -83,9 +83,12 @@ inline size_t GetCluster(const Matrix &centroids,
             
 int main(int argc, char *argv[]) {
  if (argc < 5) {
+    // intialize rabit engine
+    rabit::Init(argc, argv);
    if (rabit::GetRank() == 0) {
      rabit::TrackerPrintf("Usage: <data_dir> num_cluster max_iter <out_model>\n");
    }
+    rabit::Finalize();
    return 0;
  }
  clock_t tStart = clock();
--- a/subtree/rabit/rabit-learn/linear/Makefile
+++ b/subtree/rabit/rabit-learn/linear/Makefile
@@ -0,0 +1,14 @@
+# specify tensor path
+BIN = linear.rabit
+MOCKBIN= linear.mock
+MPIBIN = 
+# objectives that makes up rabit library
+OBJ = linear.o
+
+# common build script for programs
+include ../common.mk
+CFLAGS+=-fopenmp
+linear.o: linear.cc ../../src/*.h linear.h ../solver/*.h
+# dependenies here
+linear.rabit: linear.o lib
+linear.mock: linear.o lib
--- a/subtree/rabit/rabit-learn/linear/README.md
+++ b/subtree/rabit/rabit-learn/linear/README.md
@@ -0,0 +1,33 @@
+Linear and Logistic Regression
+====
+* input format: LibSVM
+* Example: [run-linear.sh](run-linear.sh)
+
+Parameters
+===
+All the parameters can be set by param=value
+
+#### Important Parameters
+* objective [default = logistic]
+  - can be linear or logistic
+* base_score [default = 0.5]
+  - global bias, recommended set to mean value of label
+* reg_L1 [default = 0]
+  - l1 regularization co-efficient
+* reg_L2 [default = 1]
+  - l2 regularization co-efficient
+* lbfgs_stop_tol [default = 1e-5]
+  - relative tolerance level of loss reduction with respect to initial loss
+* max_lbfgs_iter [default = 500]
+  - maximum number of lbfgs iterations
+
+### Optimization Related parameters
+* min_lbfgs_iter [default = 5]
+  - minimum number of lbfgs iterations
+* max_linesearch_iter [default = 100] 
+  - maximum number of iterations in linesearch
+* linesearch_c1 [default = 1e-4] 
+  - c1 co-efficient in backoff linesearch
+* linesarch_backoff [default = 0.5]
+  - backoff ratio in linesearch
+ 
--- a/subtree/rabit/rabit-learn/linear/linear.cc
+++ b/subtree/rabit/rabit-learn/linear/linear.cc
@@ -0,0 +1,239 @@
+#include "./linear.h"
+#include "../utils/io.h"
+#include "../utils/base64.h"
+
+namespace rabit {
+namespace linear {
+class LinearObjFunction : public solver::IObjFunction<float> {
+ public:
+  // training threads
+  int nthread;
+  // L2 regularization
+  float reg_L2;
+  // model
+  LinearModel model;
+  // training data
+  SparseMat dtrain;
+  // solver
+  solver::LBFGSSolver<float> lbfgs;
+  // constructor
+  LinearObjFunction(void) {
+    lbfgs.SetObjFunction(this);
+    nthread = 1;
+    reg_L2 = 0.0f;
+    model.weight = NULL;
+    task = "train";
+    model_in = "NULL";
+    name_pred = "pred.txt";
+    model_out = "final.model";
+  }
+  virtual ~LinearObjFunction(void) {
+  }
+  // set parameters
+  inline void SetParam(const char *name, const char *val) {
+    model.param.SetParam(name, val);
+    lbfgs.SetParam(name, val);
+    if (!strcmp(name, "num_feature")) {
+      char ndigit[30];
+      sprintf(ndigit, "%lu", model.param.num_feature + 1);
+      lbfgs.SetParam("num_dim", ndigit);
+    }
+    if (!strcmp(name, "reg_L2")) {
+      reg_L2 = static_cast<float>(atof(val));
+    }
+    if (!strcmp(name, "nthread")) {
+      nthread = atoi(val);
+    }
+    if (!strcmp(name, "task")) task = val;
+    if (!strcmp(name, "model_in")) model_in = val;
+    if (!strcmp(name, "model_out")) model_out = val;
+    if (!strcmp(name, "name_pred")) name_pred = val;
+  }
+  inline void Run(void) {
+    if (model_in != "NULL") {
+      this->LoadModel(model_in.c_str());
+    }
+    if (task == "train") {
+      lbfgs.Run();
+      this->SaveModel(model_out.c_str(), lbfgs.GetWeight());
+    } else if (task == "pred") {
+      this->TaskPred();
+    } else {
+      utils::Error("unknown task=%s", task.c_str());
+    }
+  }
+  inline void TaskPred(void) {
+    utils::Check(model_in != "NULL",
+                 "must set model_in for task=pred");
+    FILE *fp = utils::FopenCheck(name_pred.c_str(), "w");
+    for (size_t i = 0; i < dtrain.NumRow(); ++i) {
+      float pred = model.Predict(dtrain[i]);
+      fprintf(fp, "%g\n", pred);
+    }
+    fclose(fp);
+    printf("Finishing writing to %s\n", name_pred.c_str());
+  }
+  inline void LoadModel(const char *fname) {
+    FILE *fp = utils::FopenCheck(fname, "rb");
+    std::string header; header.resize(4);
+    // check header for different binary encode
+    // can be base64 or binary
+    utils::FileStream fi(fp);
+    utils::Check(fi.Read(&header[0], 4) != 0, "invalid model");
+      // base64 format
+    if (header == "bs64") {
+      utils::Base64InStream bsin(fp);
+      bsin.InitPosition();
+      model.Load(bsin);
+      fclose(fp);
+      return;
+    } else if (header == "binf") {
+      model.Load(fi);
+      fclose(fp);
+      return;     
+    } else {
+      utils::Error("invalid model file");
+    }
+  }
+  inline void SaveModel(const char *fname,
+                        const float *wptr,
+                        bool save_base64 = false) {
+    FILE *fp;
+    bool use_stdout = false;
+    if (!strcmp(fname, "stdout")) {
+      fp = stdout;
+      use_stdout = true;
+    } else {
+      fp = utils::FopenCheck(fname, "wb");
+   }
+    utils::FileStream fo(fp);
+    if (save_base64 != 0|| use_stdout) {
+      fo.Write("bs64\t", 5);
+      utils::Base64OutStream bout(fp);
+      model.Save(bout, wptr);
+      bout.Finish('\n');
+    } else {
+      fo.Write("binf", 4);
+      model.Save(fo, wptr);
+    }
+    if (!use_stdout) {
+      fclose(fp);
+    }
+  }
+  inline void LoadData(const char *fname) {
+    dtrain.Load(fname);
+  }
+  virtual size_t InitNumDim(void)  {
+    if (model_in == "NULL") {
+      size_t ndim = dtrain.feat_dim;
+      rabit::Allreduce<rabit::op::Max>(&ndim, 1);
+      model.param.num_feature = std::max(ndim, model.param.num_feature);
+    }
+    return model.param.num_feature + 1;
+  }
+  virtual void InitModel(float *weight, size_t size) {
+    if (model_in == "NULL") {
+      memset(weight, 0.0f, size * sizeof(float));
+      model.param.InitBaseScore();
+    } else {
+      rabit::Broadcast(model.weight, size * sizeof(float), 0);
+      memcpy(weight, model.weight, size * sizeof(float));
+    }
+  }
+  // load model
+  virtual void Load(rabit::IStream &fi) {
+    fi.Read(&model.param, sizeof(model.param));
+  }
+  virtual void Save(rabit::IStream &fo) const {
+    fo.Write(&model.param, sizeof(model.param));
+  }
+  virtual double Eval(const float *weight, size_t size) {
+   if (nthread != 0) omp_set_num_threads(nthread);
+    utils::Check(size == model.param.num_feature + 1,
+                 "size consistency check");
+    double sum_val = 0.0;
+    #pragma omp parallel for schedule(static) reduction(+:sum_val)
+    for (size_t i = 0; i < dtrain.NumRow(); ++i) {
+      float py = model.param.PredictMargin(weight, dtrain[i]);
+      float fv = model.param.MarginToLoss(dtrain.labels[i], py);
+      sum_val += fv;
+    }
+    if (rabit::GetRank() == 0) {
+      // only add regularization once
+      if (reg_L2 != 0.0f) {
+        double sum_sqr = 0.0;
+        for (size_t i = 0; i < model.param.num_feature; ++i) {
+          sum_sqr += weight[i] * weight[i];
+        }
+        sum_val += 0.5 * reg_L2 * sum_sqr;        
+      }
+    }
+    utils::Check(!std::isnan(sum_val), "nan occurs");
+    return sum_val;
+  }
+  virtual void CalcGrad(float *out_grad,
+                        const float *weight,
+                        size_t size) {
+   if (nthread != 0) omp_set_num_threads(nthread);
+   utils::Check(size == model.param.num_feature + 1,
+                 "size consistency check");
+    memset(out_grad, 0.0f, sizeof(float) * size);
+    double sum_gbias = 0.0;    
+    #pragma omp parallel for schedule(static) reduction(+:sum_gbias)
+    for (size_t i = 0; i < dtrain.NumRow(); ++i) {
+      SparseMat::Vector v = dtrain[i];
+      float py = model.param.Predict(weight, v);
+      float grad = model.param.PredToGrad(dtrain.labels[i], py);
+      for (index_t j = 0; j < v.length; ++j) {
+        out_grad[v[j].findex] += v[j].fvalue * grad;
+      }
+      sum_gbias += grad;
+    }
+    out_grad[model.param.num_feature] = static_cast<float>(sum_gbias);
+    if (rabit::GetRank() == 0) {
+      // only add regularization once
+      if (reg_L2 != 0.0f) {
+        for (size_t i = 0; i < model.param.num_feature; ++i) {
+          out_grad[i] += reg_L2 * weight[i];
+        }
+      }
+    }
+  }
+    
+ private:
+  std::string task;
+  std::string model_in;
+  std::string model_out;
+  std::string name_pred;
+};
+}  // namespace linear
+}  // namespace rabit
+
+int main(int argc, char *argv[]) {
+  if (argc < 2) {
+    // intialize rabit engine
+    rabit::Init(argc, argv);
+    if (rabit::GetRank() == 0) {
+      rabit::TrackerPrintf("Usage: <data_in> param=val\n");
+    }
+    rabit::Finalize();
+    return 0;
+  }
+  rabit::linear::LinearObjFunction linear;
+  if (!strcmp(argv[1], "stdin")) {
+    linear.LoadData(argv[1]);
+    rabit::Init(argc, argv);
+  } else {
+    rabit::Init(argc, argv);
+    linear.LoadData(argv[1]);
+  }
+  for (int i = 2; i < argc; ++i) {
+    char name[256], val[256];
+    if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
+      linear.SetParam(name, val);
+    }
+  }
+  linear.Run();
+  rabit::Finalize();
+  return 0;
+}
--- a/subtree/rabit/rabit-learn/linear/linear.h
+++ b/subtree/rabit/rabit-learn/linear/linear.h
@@ -0,0 +1,133 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file linear.h
+ * \brief Linear and Logistic regression
+ *
+ * \author Tianqi Chen
+ */
+#ifndef RABIT_LINEAR_H_
+#define RABIT_LINEAR_H_
+#include <omp.h>
+#include "../utils/data.h"
+#include "../solver/lbfgs.h"
+
+namespace rabit {
+namespace linear {
+/*! \brief simple linear model */
+struct LinearModel {
+  struct ModelParam {
+    /*! \brief global bias */
+    float base_score;
+    /*! \brief number of features  */
+    size_t num_feature;
+    /*! \brief loss type*/
+    int loss_type;
+    // reserved field
+    int reserved[16];
+    // constructor
+    ModelParam(void) {
+      base_score = 0.5f;
+      num_feature = 0;
+      loss_type = 1;
+      std::memset(reserved, 0, sizeof(reserved));
+    }
+    // initialize base score
+    inline void InitBaseScore(void) {
+      utils::Check(base_score > 0.0f && base_score < 1.0f,
+                   "base_score must be in (0,1) for logistic loss");
+      base_score = -std::log(1.0f / base_score - 1.0f);      
+    }
+    /*!
+     * \brief set parameters from outside
+     * \param name name of the parameter
+     * \param val value of the parameter
+     */    
+    inline void SetParam(const char *name, const char *val) {
+      using namespace std;
+      if (!strcmp("base_score", name)) {
+        base_score = static_cast<float>(atof(val));
+      }
+      if (!strcmp("num_feature", name)) {
+        num_feature = static_cast<size_t>(atol(val));
+      }
+      if (!strcmp("objective", name)) {
+        if (!strcmp("linear", val)) {
+          loss_type = 0;
+        } else if (!strcmp("logistic", val)) {
+          loss_type = 1;
+        } else {
+          utils::Error("unknown objective type %s\n", val);
+        }
+      }
+    }
+    // transform margin to prediction
+    inline float MarginToPred(float margin) const {
+      if (loss_type == 1) {
+        return 1.0f / (1.0f + std::exp(-margin));
+      } else {
+        return margin;
+      }
+    }
+    // margin to loss
+    inline float MarginToLoss(float label, float margin) const {
+      if (loss_type == 1) {
+        float nlogprob;
+        if (margin > 0.0f) {
+          nlogprob = std::log(1.0f + std::exp(-margin));
+        } else {
+          nlogprob = -margin + std::log(1.0f + std::exp(margin));
+        }
+        return label * nlogprob +
+            (1.0f -label) * (margin + nlogprob); 
+      } else {
+        float diff = margin - label;
+        return 0.5f * diff * diff;
+      }
+    }
+    inline float PredToGrad(float label, float pred) const {
+      return pred - label;      
+    }
+    inline float PredictMargin(const float *weight,
+                               const SparseMat::Vector &v) const {
+      // weight[num_feature] is bias
+      float sum = base_score + weight[num_feature];
+      for (unsigned i = 0; i < v.length; ++i) {
+        if (v[i].findex >= num_feature) continue;
+        sum += weight[v[i].findex] * v[i].fvalue;
+      }    
+      return sum;
+    }
+    inline float Predict(const float *weight,
+                         const SparseMat::Vector &v) const {
+      return MarginToPred(PredictMargin(weight, v));
+    }
+  };
+  // model parameter
+  ModelParam param;
+  // weight corresponding to the model
+  float *weight;
+  LinearModel(void) : weight(NULL) {
+  }
+  ~LinearModel(void) {
+    if (weight != NULL) delete [] weight;
+  }
+  // load model
+  inline void Load(rabit::IStream &fi) {
+    fi.Read(&param, sizeof(param));
+    if (weight == NULL) {
+      weight = new float[param.num_feature + 1];
+    }
+    fi.Read(weight, sizeof(float) * (param.num_feature + 1));
+  }
+  inline void Save(rabit::IStream &fo, const float *wptr = NULL) const {
+    fo.Write(&param, sizeof(param));
+    if (wptr == NULL) wptr = weight;
+    fo.Write(wptr, sizeof(float) * (param.num_feature + 1));
+  }
+  inline float Predict(const SparseMat::Vector &v) const {
+    return param.Predict(weight, v);
+  }
+};
+}  // namespace linear
+}  // namespace rabit
+#endif // RABIT_LINEAR_H_
--- a/subtree/rabit/rabit-learn/linear/run-linear-mock.sh
+++ b/subtree/rabit/rabit-learn/linear/run-linear-mock.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+if [[ $# -lt 1 ]]
+then
+    echo "Usage: nprocess"
+    exit -1
+fi
+
+rm -rf mushroom.row* *.model
+k=$1
+
+# split the lib svm file into k subfiles
+python splitrows.py ../data/agaricus.txt.train mushroom $k
+
+# run xgboost mpi
+../../tracker/rabit_demo.py -n $k linear.mock mushroom.row\%d "${*:2}" reg_L1=1 mock=0,1,1,0 mock=1,1,1,0  mock=0,2,1,1
--- a/subtree/rabit/rabit-learn/linear/run-linear.sh
+++ b/subtree/rabit/rabit-learn/linear/run-linear.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+if [[ $# -lt 1 ]]
+then
+    echo "Usage: nprocess"
+    exit -1
+fi
+
+rm -rf mushroom.row* *.model
+k=$1
+
+# split the lib svm file into k subfiles
+python splitrows.py ../data/agaricus.txt.train mushroom $k
+
+# run xgboost mpi
+../../tracker/rabit_demo.py -n $k linear.rabit mushroom.row\%d "${*:2}" reg_L1=1
+
+./linear.rabit ../data/agaricus.txt.test task=pred model_in=final.model
--- a/subtree/rabit/rabit-learn/linear/splitrows.py
+++ b/subtree/rabit/rabit-learn/linear/splitrows.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python
+import sys
+import random
+
+# split libsvm file into different rows
+if len(sys.argv) < 4:
+    print ('Usage:<fin> <fo> k')
+    exit(0)
+
+random.seed(10)
+
+k = int(sys.argv[3])
+fi = open( sys.argv[1], 'r' )
+fos = []
+
+for i in range(k):
+    fos.append(open( sys.argv[2]+'.row%d' % i, 'w' ))
+    
+for l in open(sys.argv[1]):
+    i = random.randint(0, k-1)
+    fos[i].write(l)
+
+for f in fos:    
+    f.close()
--- a/subtree/rabit/rabit-learn/solver/lbfgs.h
+++ b/subtree/rabit/rabit-learn/solver/lbfgs.h
@@ -0,0 +1,653 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file lbfgs.h
+ * \brief L-BFGS solver for general optimization problem
+ *
+ * \author Tianqi Chen
+ */
+#ifndef RABIT_LEARN_LBFGS_H_
+#define RABIT_LEARN_LBFGS_H_
+#include <cmath>
+#include <rabit.h>
+
+namespace rabit {
+/*! \brief namespace of solver for general problems */
+namespace solver {
+/*!
+ * \brief objective function for optimizers 
+ *  the objective function can also implement save/load
+ *  to remember the state parameters that might need to remember
+ */
+template<typename DType>
+class IObjFunction : public rabit::ISerializable {
+ public:
+  // destructor
+  virtual ~IObjFunction(void){}
+  /*!
+   * \brief evaluate function values for a given weight
+   * \param weight weight of the function
+   * \param size size of the weight
+   */
+  virtual double Eval(const DType *weight, size_t size) = 0;
+  /*!
+   * \return number of feature dimension to be allocated
+   * only called once during initialization
+   */
+  virtual size_t InitNumDim(void) = 0;
+  /*!
+   * \brief initialize the weight before starting the solver
+   * only called once for initialization
+   */
+  virtual void InitModel(DType *weight, size_t size) = 0;
+  /*!
+   * \brief calculate gradient for a given weight
+   * \param out_grad used to store the gradient value of the function
+   * \param weight weight of the function
+   * \param size size of the weight
+   */
+  virtual void CalcGrad(DType *out_grad,
+                        const DType *weight,
+                        size_t size) = 0;
+};
+
+/*! \brief a basic version L-BFGS solver */
+template<typename DType>
+class LBFGSSolver {
+ public:
+  LBFGSSolver(void) {
+    // set default values
+    reg_L1 = 0.0f;
+    max_linesearch_iter = 100;
+    linesearch_backoff = 0.5f;
+    linesearch_c1 = 1e-4;
+    min_lbfgs_iter = 5;
+    max_lbfgs_iter = 500;
+    lbfgs_stop_tol = 1e-5f;
+    silent = 0;
+  }
+  virtual ~LBFGSSolver(void) {}
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val value of the parameter
+   */
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strcmp("num_dim", name)) {
+      gstate.num_dim = static_cast<size_t>(atol(val));
+    }
+    if (!strcmp("size_memory", name)) {
+      gstate.size_memory = static_cast<size_t>(atol(val));
+    }
+    if (!strcmp("reg_L1", name)) {
+      reg_L1 = static_cast<float>(atof(val));
+    }
+    if (!strcmp("lbfgs_stop_tol", name)) {
+      lbfgs_stop_tol = static_cast<float>(atof(val));
+    }
+    if (!strcmp("linesearch_backoff", name)) {
+      linesearch_backoff = static_cast<float>(atof(val));
+    }
+    if (!strcmp("max_linesearch_iter", name)) {
+      max_linesearch_iter = atoi(val);
+    }
+    if (!strcmp("max_lbfgs_iter", name)) {
+      max_lbfgs_iter = atoi(val);
+    }
+    if (!strcmp("min_lbfgs_iter", name)) {
+      min_lbfgs_iter = atoi(val);
+    }
+    if (!strcmp("linesearch_c1", name)) {
+      linesearch_c1 = static_cast<float>(atof(val));
+    }
+  }
+  /*!
+   * \brief set objective function to optimize
+   *  the objective function only need to evaluate and calculate
+   *  gradient with respect to current subset of data
+   * \param obj the objective function we are looking for
+   */
+  virtual void SetObjFunction(IObjFunction<DType> *obj) {
+    gstate.obj = obj;
+  }
+  /*!
+   * \brief initialize the LBFGS solver
+   *  user must already set the objective function
+   */
+  virtual void Init(void) {
+    utils::Check(gstate.obj != NULL,
+                 "LBFGSSolver.Init must SetObjFunction first");
+    int version = rabit::LoadCheckPoint(&gstate, &hist);
+    if (version == 0) {
+      gstate.num_dim = gstate.obj->InitNumDim();
+    } else {
+      printf("restart from version=%d\n", version);
+    }
+    {
+      // decide parameter partition
+      size_t nproc = rabit::GetWorldSize();
+      size_t rank = rabit::GetRank();
+      size_t step = (gstate.num_dim + nproc - 1) / nproc;
+      // upper align
+      step = (step + 7) / 8 * 8;
+      utils::Assert(step * nproc >= gstate.num_dim, "BUG");
+      range_begin_ = std::min(rank * step, gstate.num_dim);
+      range_end_ = std::min((rank + 1) * step, gstate.num_dim);
+    }
+    if (version == 0) {
+      gstate.Init();
+      hist.Init(range_end_ - range_begin_, gstate.size_memory);
+      gstate.obj->InitModel(gstate.weight, gstate.num_dim);
+      // broadcast initialize model
+      rabit::Broadcast(gstate.weight,
+                       sizeof(DType) * gstate.num_dim, 0);
+      gstate.old_objval = this->Eval(gstate.weight);
+      gstate.init_objval = gstate.old_objval;
+      
+      if (silent == 0 && rabit::GetRank() == 0) {
+        rabit::TrackerPrintf
+            ("L-BFGS solver starts, num_dim=%lu, init_objval=%g, size_memory=%lu\n",
+             gstate.num_dim, gstate.init_objval, gstate.size_memory);
+      }
+    }
+  }
+  /*!
+   * \brief get the current weight vector
+   *  note that if update function is called
+   *  the content of weight vector is no longer valid
+   * \return weight vector
+   */
+  virtual DType *GetWeight(void) {
+    return gstate.weight;
+  }
+  /*!
+   * \brief update the weight for one L-BFGS iteration
+   * \return whether stopping condition is met
+   */
+  virtual bool UpdateOneIter(void) {
+    bool stop = false;
+    GlobalState &g = gstate;
+    g.obj->CalcGrad(g.grad, g.weight, g.num_dim);
+    rabit::Allreduce<rabit::op::Sum>(g.grad, g.num_dim);
+    // find change direction
+    double vdot = FindChangeDirection(g.tempw, g.grad, g.weight);
+    // line-search, g.grad is now new weight
+    int iter = BacktrackLineSearch(g.grad, g.tempw, g.weight, vdot);
+    utils::Check(iter < max_linesearch_iter, "line search failed");
+    // swap new weight 
+    std::swap(g.weight, g.grad);
+    // check stop condition
+    if (gstate.num_iteration > min_lbfgs_iter) {
+      if (g.old_objval - g.new_objval < lbfgs_stop_tol * g.init_objval) {
+        return true;
+      }
+    }
+    if (silent == 0 && rabit::GetRank() == 0) {
+      rabit::TrackerPrintf
+          ("[%d] L-BFGS: linesearch finishes in %d rounds, new_objval=%g, improvment=%g\n",
+           gstate.num_iteration, iter,
+           gstate.new_objval,
+           gstate.old_objval - gstate.new_objval);
+    }
+    gstate.old_objval = gstate.new_objval;
+    rabit::CheckPoint(&gstate, &hist);
+    return stop;
+  }
+  /*! \brief run optimization */
+  virtual void Run(void) {
+    this->Init();
+    while (gstate.num_iteration < max_lbfgs_iter) {
+      if (this->UpdateOneIter()) break;
+    }
+    if (silent == 0 && rabit::GetRank() == 0) {
+      size_t nonzero = 0;
+      for (size_t i = 0; i < gstate.num_dim; ++i) {
+        if (gstate.weight[i] != 0.0f) nonzero += 1;
+      }
+      rabit::TrackerPrintf
+          ("L-BFGS: finishes at iteration %d, %lu/%lu active weights\n",
+           gstate.num_iteration, nonzero, gstate.num_dim);
+    }
+  }
+ protected:
+  // find the delta value, given gradient
+  // return dot(dir, l1grad)
+  virtual double FindChangeDirection(DType *dir,
+                                     const DType *grad,
+                                     const DType *weight) {
+    int m = static_cast<int>(gstate.size_memory);
+    int n = static_cast<int>(hist.num_useful());
+    if (n < m) {
+      utils::Assert(hist.num_useful() == gstate.num_iteration,
+                    "BUG2, n=%d, it=%d", n, gstate.num_iteration);
+    } else {
+      utils::Assert(n == m, "BUG3");
+    }
+    const size_t num_dim = gstate.num_dim;
+    const DType *gsub = grad + range_begin_;
+    const size_t nsub = range_end_ - range_begin_;
+    double vdot;
+    if (n != 0) {
+      // hist[m + n - 1] stores old gradient
+      Minus(hist[m + n - 1], gsub, hist[m + n - 1], nsub);
+      SetL1Dir(hist[2 * m], gsub, weight + range_begin_, nsub);
+      // index set for calculating results
+      std::vector<std::pair<size_t, size_t> > idxset;
+      for (int j = 0; j < n; ++j) {
+        idxset.push_back(std::make_pair(j, 2 * m));
+        idxset.push_back(std::make_pair(j, n - 1));
+        idxset.push_back(std::make_pair(j, m + n - 1));
+      }
+      for (int j = 0; j < n; ++j) {
+        idxset.push_back(std::make_pair(m + j, 2 * m));
+        idxset.push_back(std::make_pair(m + j, m + n - 1));
+      }
+      // calculate dot products
+      std::vector<double> tmp(idxset.size());
+      for (size_t i = 0; i < tmp.size(); ++i) {
+        tmp[i] = hist.CalcDot(idxset[i].first, idxset[i].second);
+      }
+      rabit::Allreduce<rabit::op::Sum>(BeginPtr(tmp), tmp.size());
+      for (size_t i = 0; i < tmp.size(); ++i) {
+        gstate.DotBuf(idxset[i].first, idxset[i].second) = tmp[i];
+      }
+      // BFGS steps, use vector-free update
+      // parameterize vector using basis in hist
+      std::vector<double> alpha(n);
+      std::vector<double> delta(2 * m + 1, 0.0);
+      delta[2 * m] = 1.0;
+      // backward step
+      for (int j = n - 1; j >= 0; --j) {
+        double vsum = 0.0;
+        for (size_t k = 0; k < delta.size(); ++k) {
+          vsum += delta[k] * gstate.DotBuf(k, j);
+        }
+        alpha[j] = vsum / gstate.DotBuf(j, m + j);
+        delta[m + j] = delta[m + j] - alpha[j];
+      }
+      // scale
+      double scale = gstate.DotBuf(n - 1, m + n - 1) /
+      gstate.DotBuf(m + n - 1, m + n - 1);
+      for (size_t k = 0; k < delta.size(); ++k) {
+        delta[k] *= scale;
+      }
+      // forward step
+      for (int j = 0; j < n; ++j) {
+        double vsum = 0.0;
+        for (size_t k = 0; k < delta.size(); ++k) {
+          vsum += delta[k] * gstate.DotBuf(k, m + j);
+        }
+        double beta = vsum / gstate.DotBuf(j, m + j);
+        delta[j] = delta[j] + (alpha[j] - beta);
+      }
+      // set all to zero
+      std::fill(dir, dir + num_dim, 0.0f);
+      DType *dirsub = dir + range_begin_; 
+      for (int i = 0; i < n; ++i) {
+        AddScale(dirsub, dirsub, hist[m + i], delta[m + i], nsub);
+      }
+      AddScale(dirsub, dirsub, hist[2 * m], delta[2 * m], nsub);
+      for (int i = 0; i < n; ++i) {
+        AddScale(dirsub, dirsub, hist[i], delta[i], nsub);
+      }
+      FixDirL1Sign(dirsub, hist[2 * m], nsub);
+      vdot = -Dot(dirsub, hist[2 * m], nsub);
+      // allreduce to get full direction
+      rabit::Allreduce<rabit::op::Sum>(dir, num_dim);
+      rabit::Allreduce<rabit::op::Sum>(&vdot, 1);
+    } else {     
+      SetL1Dir(dir, grad, weight, num_dim);
+      vdot = -Dot(dir, dir, num_dim);
+    }
+    // shift the history record    
+    if (n < m) {
+      n += 1;
+    } else {
+      gstate.Shift(); hist.Shift();
+    }
+    hist.set_num_useful(n);
+    // copy gradient to hist[m + n - 1]
+    memcpy(hist[m + n - 1], gsub, nsub * sizeof(DType));
+    return vdot;
+  }
+  // line search for given direction
+  // return whether there is a descent
+  inline int BacktrackLineSearch(DType *new_weight,
+                                 const DType *dir,
+                                 const DType *weight,
+                                 double dot_dir_l1grad) {
+    utils::Assert(dot_dir_l1grad < 0.0f,
+                  "gradient error, dotv=%g", dot_dir_l1grad);
+    double alpha = 1.0;
+    double backoff = linesearch_backoff;
+    // unit descent direction in first iter
+    if (gstate.num_iteration == 0) {
+      utils::Assert(hist.num_useful() == 1, "hist.nuseful");
+      alpha = 1.0f / std::sqrt(-dot_dir_l1grad);
+      backoff = 0.1f;
+    }
+    int iter = 0;
+    
+    double old_val = gstate.old_objval;
+    double c1 = this->linesearch_c1;
+    while (true) {
+      const size_t num_dim = gstate.num_dim;
+      if (++iter >= max_linesearch_iter) return iter;
+      AddScale(new_weight, weight, dir, alpha, num_dim);
+      this->FixWeightL1Sign(new_weight, weight, num_dim);
+      double new_val = this->Eval(new_weight);
+      if (new_val - old_val <= c1 * dot_dir_l1grad * alpha) {
+        gstate.new_objval = new_val; break;
+      }
+      alpha *= backoff;
+    }
+    // hist[n - 1] = new_weight - weight
+    Minus(hist[hist.num_useful() - 1],
+          new_weight + range_begin_,
+          weight + range_begin_,
+          range_end_ - range_begin_);
+    gstate.num_iteration += 1;
+    return iter;
+  }
+  // OWL-QN step for L1 regularization
+  inline void SetL1Dir(DType *dst,
+                       const DType *grad,
+                       const DType *weight,
+                       size_t size) {
+    if (reg_L1 == 0.0) {
+      for (size_t i = 0; i < size; ++i) {
+        dst[i] = -grad[i];
+      }
+    } else {
+      for (size_t i = 0; i < size; ++i) {
+        if (weight[i] > 0.0f) {
+          dst[i] = -grad[i] - reg_L1;
+        } else if (weight[i] < 0.0f) {
+          dst[i] = -grad[i] + reg_L1;
+        } else {
+          if (grad[i] < -reg_L1) {
+            dst[i] = -grad[i] - reg_L1;
+          } else if (grad[i] > reg_L1) {
+            dst[i] = -grad[i] + reg_L1;
+          } else {
+            dst[i] = 0.0;
+          }
+        }
+      }
+    }
+  }
+  // OWL-QN step: fix direction sign to be consistent with proposal
+  inline void FixDirL1Sign(DType *dir,
+                           const DType *steepdir,
+                           size_t size) {
+    if (reg_L1 != 0.0f) {
+      for (size_t i = 0; i < size; ++i) {
+        if (dir[i] * steepdir[i] <= 0.0f) {
+          dir[i] = 0.0f;
+        }
+      }
+    }
+  }
+  // QWL-QN step: fix direction sign to be consistent with proposal
+  inline void FixWeightL1Sign(DType *new_weight,
+                              const DType *weight,
+                              size_t size) {
+    if (reg_L1 != 0.0f) {
+      for (size_t i = 0; i < size; ++i) {
+        if (new_weight[i] * weight[i] < 0.0f) {
+          new_weight[i] = 0.0f;
+        }
+      }
+    }
+  }
+  inline double Eval(const DType *weight) {
+    double val = gstate.obj->Eval(weight, gstate.num_dim);    
+    rabit::Allreduce<rabit::op::Sum>(&val, 1);
+    if (reg_L1 != 0.0f) {
+      double l1norm = 0.0;
+      for (size_t i = 0; i < gstate.num_dim; ++i) {
+        l1norm += std::abs(weight[i]);
+      }
+      val += l1norm * reg_L1;
+    }
+    return val;
+  }
+
+ private:
+  // helper functions
+  // dst = lhs + rhs * scale
+  inline static void AddScale(DType *dst,
+                              const DType *lhs,
+                              const DType *rhs,
+                              DType scale,
+                              size_t size) {
+    for (size_t i = 0; i < size; ++i) {
+      dst[i] = lhs[i] + rhs[i] * scale;
+    }
+  }
+  // dst = lhs - rhs
+  inline static void Minus(DType *dst,
+                           const DType *lhs,
+                           const DType *rhs,
+                           size_t size) {
+    for (size_t i = 0; i < size; ++i) {
+      dst[i] = lhs[i] - rhs[i];
+    }
+  }
+  // return dot(lhs, rhs)
+  inline static double Dot(const DType *lhs,
+                           const DType *rhs,
+                           size_t size) {
+    double res = 0.0;
+    for (size_t i = 0; i < size; ++i) {
+      res += lhs[i] * rhs[i];
+    }
+    return res;
+  }
+  // map rolling array index
+  inline static size_t MapIndex(size_t i, size_t offset,
+                                size_t size_memory) {
+    if (i == 2 * size_memory) return i;
+    if (i < size_memory) {
+      return (i + offset) % size_memory;
+    } else {
+      utils::Assert(i < 2 * size_memory,
+                    "MapIndex: index exceed bound, i=%lu", i);
+      return (i + offset) % size_memory + size_memory;
+    }
+  }
+  // global solver state
+  struct GlobalState : public rabit::ISerializable {
+   public:
+    // memory size of L-BFGS
+    size_t size_memory;
+    // number of iterations passed
+    size_t num_iteration;
+    // number of features in the solver
+    size_t num_dim;
+    // initialize objective value
+    double init_objval;
+    // history objective value
+    double old_objval;
+    // new objective value
+    double new_objval;
+    // objective function
+    IObjFunction<DType> *obj;
+    // temporal storage
+    DType *grad, *weight, *tempw;
+    // constructor
+    GlobalState(void)
+        : obj(NULL), grad(NULL),
+          weight(NULL), tempw(NULL) {
+      size_memory = 10;
+      num_iteration = 0;
+      num_dim = 0;
+      old_objval = 0.0;
+    }
+    ~GlobalState(void) {
+      if (grad != NULL) {
+        delete [] grad;
+        delete [] weight;
+        delete [] tempw;
+      }
+    }
+    // intilize the space of rolling array
+    inline void Init(void) {
+      size_t n = size_memory * 2 + 1;
+      data.resize(n * n, 0.0);
+      this->AllocSpace();
+    }
+    inline double &DotBuf(size_t i, size_t j)  {
+      if (i > j) std::swap(i, j);
+      return data[MapIndex(i, offset_, size_memory) * (size_memory * 2 + 1) +
+                  MapIndex(j, offset_, size_memory)];
+    }
+    // load the shift array
+    virtual void Load(rabit::IStream &fi) {
+      fi.Read(&size_memory, sizeof(size_memory));
+      fi.Read(&num_iteration, sizeof(num_iteration));
+      fi.Read(&num_dim, sizeof(num_dim));
+      fi.Read(&init_objval, sizeof(init_objval));
+      fi.Read(&old_objval, sizeof(old_objval));
+      fi.Read(&offset_, sizeof(offset_));
+      fi.Read(&data);
+      this->AllocSpace();
+      fi.Read(weight, sizeof(DType) * num_dim);
+      obj->Load(fi);
+    }
+    // save the shift array
+    virtual void Save(rabit::IStream &fo) const {
+      fo.Write(&size_memory, sizeof(size_memory));
+      fo.Write(&num_iteration, sizeof(num_iteration));
+      fo.Write(&num_dim, sizeof(num_dim));
+      fo.Write(&init_objval, sizeof(init_objval));
+      fo.Write(&old_objval, sizeof(old_objval));
+      fo.Write(&offset_, sizeof(offset_));
+      fo.Write(data);
+      fo.Write(weight, sizeof(DType) * num_dim);
+      obj->Save(fo);
+    }
+    inline void Shift(void) {
+      offset_ = (offset_ + 1) % size_memory;
+    }
+    
+   private:    
+    // rolling offset in the current memory
+    size_t offset_;
+    std::vector<double> data;
+    // allocate sapce
+    inline void AllocSpace(void) {
+      if (grad == NULL) {
+        grad = new DType[num_dim];
+        weight = new DType[num_dim];
+        tempw = new DType[num_dim];
+      }
+    }
+  };
+  /*! \brief rolling array that carries history information */
+  struct HistoryArray : public rabit::ISerializable {
+   public:
+    HistoryArray(void) : dptr_(NULL) {
+      num_useful_ = 0;
+    }
+    ~HistoryArray(void) {
+      if (dptr_ != NULL) delete [] dptr_;
+    }
+    // intilize the space of rolling array
+    inline void Init(size_t num_col, size_t size_memory) {
+      if (dptr_ != NULL &&
+          (num_col_ != num_col || size_memory_ != size_memory)) {
+        delete dptr_;
+      }
+      num_col_ = num_col;
+      size_memory_ = size_memory;
+      stride_ = num_col_;
+      offset_ = 0;
+      size_t n = size_memory * 2 + 1;
+      dptr_ = new DType[n * stride_];
+    }
+    // fetch element from rolling array
+    inline const DType *operator[](size_t i) const {
+      return dptr_ + MapIndex(i, offset_, size_memory_) * stride_;
+    }
+    inline DType *operator[](size_t i) {
+      return dptr_ + MapIndex(i, offset_, size_memory_) * stride_;
+    }
+    // shift array: arr_old -> arr_new
+    // for i in [0, size_memory - 1), arr_new[i] = arr_old[i + 1]
+    // for i in [size_memory, 2 * size_memory - 1), arr_new[i] = arr_old[i + 1]
+    // arr_old[0] and arr_arr[size_memory] will be discarded
+    inline void Shift(void) {
+      offset_ = (offset_ + 1) % size_memory_;
+    }
+    inline double CalcDot(size_t i, size_t j) const {
+      return Dot((*this)[i], (*this)[j], num_col_);
+    }
+    // set number of useful memory
+    inline const size_t &num_useful(void) const {
+      return num_useful_;
+    }
+    // set number of useful memory
+    inline void set_num_useful(size_t num_useful) {
+      utils::Assert(num_useful <= size_memory_,
+                    "num_useful exceed bound");
+      num_useful_ = num_useful;
+    }
+    // load the shift array
+    virtual void Load(rabit::IStream &fi) {
+      fi.Read(&num_col_, sizeof(num_col_));
+      fi.Read(&stride_, sizeof(stride_));
+      fi.Read(&size_memory_, sizeof(size_memory_));
+      fi.Read(&num_useful_, sizeof(num_useful_));
+      this->Init(num_col_, size_memory_);
+      for (size_t i = 0; i < num_useful_; ++i) {
+        fi.Read((*this)[i], num_col_ * sizeof(DType));
+        fi.Read((*this)[i + size_memory_], num_col_ * sizeof(DType));
+      }
+    }
+    // save the shift array
+    virtual void Save(rabit::IStream &fi) const {
+      fi.Write(&num_col_, sizeof(num_col_));
+      fi.Write(&stride_, sizeof(stride_));
+      fi.Write(&size_memory_, sizeof(size_memory_));
+      fi.Write(&num_useful_, sizeof(num_useful_));
+      for (size_t i = 0; i < num_useful_; ++i) {
+        fi.Write((*this)[i], num_col_ * sizeof(DType));
+        fi.Write((*this)[i + size_memory_], num_col_ * sizeof(DType));
+      }
+    }
+
+   private:
+    // number of columns in each of array
+    size_t num_col_;
+    // stride for each of column for alignment
+    size_t stride_;
+    // memory size of L-BFGS
+    size_t size_memory_;
+    // number of useful memory that will be used
+    size_t num_useful_;
+    // rolling offset in the current memory
+    size_t offset_;
+    // data pointer
+    DType *dptr_;
+  };
+  // data structure for LBFGS
+  GlobalState gstate;
+  HistoryArray hist;
+  // silent
+  int silent;
+  // the subrange of current node
+  size_t range_begin_;
+  size_t range_end_;
+  // L1 regularization co-efficient
+  float reg_L1;
+  // c1 ratio for line search
+  float linesearch_c1;
+  float linesearch_backoff;
+  int max_linesearch_iter;
+  int max_lbfgs_iter;
+  int min_lbfgs_iter;
+  float lbfgs_stop_tol;
+};
+}  // namespace solver
+}  // namespace rabit
+#endif // RABIT_LEARN_LBFGS_H_
--- a/subtree/rabit/rabit-learn/utils/base64.h
+++ b/subtree/rabit/rabit-learn/utils/base64.h
@@ -0,0 +1,204 @@
+#ifndef RABIT_LEARN_UTILS_BASE64_H_
+#define RABIT_LEARN_UTILS_BASE64_H_
+/*!
+ * \file base64.h
+ * \brief data stream support to input and output from/to base64 stream
+ * base64 is easier to store and pass as text format in mapreduce
+ * \author Tianqi Chen
+ */
+#include <cctype>
+#include <cstdio>
+#include <rabit/io.h>
+
+namespace rabit {
+namespace utils {
+/*! \brief namespace of base64 decoding and encoding table */
+namespace base64 {
+const char DecodeTable[] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  62,  // '+'
+  0, 0, 0,
+  63,  // '/'
+  52, 53, 54, 55, 56, 57, 58, 59, 60, 61,  // '0'-'9'
+  0, 0, 0, 0, 0, 0, 0,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+  13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,  // 'A'-'Z'
+  0, 0, 0, 0, 0, 0,
+  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+  39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,  // 'a'-'z'
+};
+static const char EncodeTable[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+} // namespace base64
+/*! \brief the stream that reads from base64, note we take from file pointers */
+class Base64InStream: public IStream {
+ public:
+  explicit Base64InStream(FILE *fp) : fp(fp) {
+    num_prev = 0; tmp_ch = 0;
+  }
+  /*! 
+   * \brief initialize the stream position to beginning of next base64 stream 
+   * call this function before actually start read
+   */
+  inline void InitPosition(void) {
+    // get a charater
+    do {
+      tmp_ch = fgetc(fp);
+    } while (isspace(tmp_ch));
+  }
+  /*! \brief whether current position is end of a base64 stream */
+  inline bool IsEOF(void) const {
+    return num_prev == 0 && (tmp_ch == EOF || isspace(tmp_ch));
+  }
+  virtual size_t Read(void *ptr, size_t size) {
+    using base64::DecodeTable;
+    if (size == 0) return 0;
+    // use tlen to record left size
+    size_t tlen = size;
+    unsigned char *cptr = static_cast<unsigned char*>(ptr);
+    // if anything left, load from previous buffered result
+    if (num_prev != 0) {
+      if (num_prev == 2) {
+        if (tlen >= 2) {
+          *cptr++ = buf_prev[0];
+          *cptr++ = buf_prev[1];
+          tlen -= 2;
+          num_prev = 0;
+        } else {
+          // assert tlen == 1
+          *cptr++ = buf_prev[0]; --tlen;
+          buf_prev[0] = buf_prev[1];
+          num_prev = 1;
+        }
+      } else {
+        // assert num_prev == 1
+        *cptr++ = buf_prev[0]; --tlen; num_prev = 0;
+      }
+    }
+    if (tlen == 0) return size;
+    int nvalue;
+    // note: everything goes with 4 bytes in Base64
+    // so we process 4 bytes a unit
+    while (tlen && tmp_ch != EOF && !isspace(tmp_ch)) {
+      // first byte
+      nvalue = DecodeTable[tmp_ch] << 18;
+      {
+        // second byte
+        Check((tmp_ch = fgetc(fp), tmp_ch != EOF && !isspace(tmp_ch)),
+              "invalid base64 format");
+        nvalue |= DecodeTable[tmp_ch] << 12;
+        *cptr++ = (nvalue >> 16) & 0xFF; --tlen;
+      }
+      {
+        // third byte
+        Check((tmp_ch = fgetc(fp), tmp_ch != EOF && !isspace(tmp_ch)),
+              "invalid base64 format");
+        // handle termination
+        if (tmp_ch == '=') {
+          Check((tmp_ch = fgetc(fp), tmp_ch == '='), "invalid base64 format");
+          Check((tmp_ch = fgetc(fp), tmp_ch == EOF || isspace(tmp_ch)),
+                "invalid base64 format");
+          break;
+        }
+        nvalue |= DecodeTable[tmp_ch] << 6;
+        if (tlen) {
+          *cptr++ = (nvalue >> 8) & 0xFF; --tlen;
+        } else {
+          buf_prev[num_prev++] = (nvalue >> 8) & 0xFF;
+        }
+      }
+      {
+        // fourth byte
+        Check((tmp_ch = fgetc(fp), tmp_ch != EOF && !isspace(tmp_ch)),
+              "invalid base64 format");
+        if (tmp_ch == '=') {
+          Check((tmp_ch = fgetc(fp), tmp_ch == EOF || isspace(tmp_ch)),
+                "invalid base64 format");
+          break;
+        }
+        nvalue |= DecodeTable[tmp_ch];
+        if (tlen) {
+          *cptr++ = nvalue & 0xFF; --tlen;
+        } else {
+          buf_prev[num_prev ++] = nvalue & 0xFF;
+        }
+      }
+      // get next char
+      tmp_ch = fgetc(fp);
+    }
+    if (kStrictCheck) {
+      Check(tlen == 0, "Base64InStream: read incomplete");
+    }
+    return size - tlen;
+  }
+  virtual void Write(const void *ptr, size_t size) {
+    utils::Error("Base64InStream do not support write");
+  }
+
+ private:
+  FILE *fp;
+  int tmp_ch;
+  int num_prev;
+  unsigned char buf_prev[2];
+  // whether we need to do strict check
+  static const bool kStrictCheck = false;
+};
+/*! \brief the stream that write to base64, note we take from file pointers */
+class Base64OutStream: public IStream {
+ public:
+  explicit Base64OutStream(FILE *fp) : fp(fp) {
+    buf_top = 0;
+  }
+  virtual void Write(const void *ptr, size_t size) {
+    using base64::EncodeTable;
+    size_t tlen = size;
+    const unsigned char *cptr = static_cast<const unsigned char*>(ptr);
+    while (tlen) {
+      while (buf_top < 3  && tlen != 0) {
+        buf[++buf_top] = *cptr++; --tlen;
+      }
+      if (buf_top == 3) {
+        // flush 4 bytes out
+        fputc(EncodeTable[buf[1] >> 2], fp);
+        fputc(EncodeTable[((buf[1] << 4) | (buf[2] >> 4)) & 0x3F], fp);
+        fputc(EncodeTable[((buf[2] << 2) | (buf[3] >> 6)) & 0x3F], fp);
+        fputc(EncodeTable[buf[3] & 0x3F], fp);
+        buf_top = 0;
+      }
+    }
+  }
+  virtual size_t Read(void *ptr, size_t size) {
+    Error("Base64OutStream do not support read");
+    return 0;
+  }
+  /*!
+   * \brief finish writing of all current base64 stream, do some post processing
+   * \param endch charater to put to end of stream, if it is EOF, then nothing will be done
+   */
+  inline void Finish(char endch = EOF) {
+    using base64::EncodeTable;
+    if (buf_top == 1) {
+      fputc(EncodeTable[buf[1] >> 2], fp);
+      fputc(EncodeTable[(buf[1] << 4) & 0x3F], fp);
+      fputc('=', fp);
+      fputc('=', fp);
+    }
+    if (buf_top == 2) {
+      fputc(EncodeTable[buf[1] >> 2], fp);
+      fputc(EncodeTable[((buf[1] << 4) | (buf[2] >> 4)) & 0x3F], fp);
+      fputc(EncodeTable[(buf[2] << 2) & 0x3F], fp);
+      fputc('=', fp);
+    }
+    buf_top = 0;
+    if (endch != EOF) fputc(endch, fp);
+  }
+
+ private:
+  FILE *fp;
+  int buf_top;
+  unsigned char buf[4];
+};
+}  // namespace utils
+}  // namespace rabit
+#endif  // RABIT_LEARN_UTILS_BASE64_H_
--- a/subtree/rabit/rabit-learn/common/toolkit_util.h
+++ b/subtree/rabit/rabit-learn/common/toolkit_util.h
@@ -1,24 +1,38 @@
-#include <rabit.h>
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file data.h
+ * \brief simple data structure that could be used by model
+ *
+ * \author Tianqi Chen
+ */
+#ifndef RABIT_LEARN_DATA_H_
+#define RABIT_LEARN_DATA_H_
+
 #include <vector>
 #include <cstdlib>
 #include <cstdio>
 #include <cstring>
+#include <limits>
 #include <cmath>
+#include <rabit.h>

 namespace rabit {
+// typedef index type
+typedef unsigned index_t;
+
 /*! \brief sparse matrix, CSR format */
 struct SparseMat {
  // sparse matrix entry
  struct Entry {
    // feature index 
-    unsigned findex;
+    index_t findex;
    // feature value
    float fvalue;
  };
  // sparse vector
  struct Vector {
    const Entry *data;
-    unsigned length;
+    index_t length;
    inline const Entry &operator[](size_t i) const {
      return data[i];
    }
@@ -26,7 +40,7 @@ struct SparseMat {
  inline Vector operator[](size_t i) const {
    Vector v;
    v.data = &data[0] + row_ptr[i];
-    v.length = static_cast<unsigned>(row_ptr[i + 1]-row_ptr[i]);
+    v.length = static_cast<index_t>(row_ptr[i + 1]-row_ptr[i]);
    return v;
  }
  // load data from LibSVM format
@@ -35,7 +49,13 @@ struct SparseMat {
    if (!strcmp(fname, "stdin")) {
      fi = stdin;
    } else {
-      fi = utils::FopenCheck(fname, "r");
+      if (strchr(fname, '%') != NULL) {
+        char s_tmp[256];
+        snprintf(s_tmp, sizeof(s_tmp), fname, rabit::GetRank());
+        fi = utils::FopenCheck(s_tmp, "r");        
+      } else {
+        fi = utils::FopenCheck(fname, "r");
+      }
    }
    row_ptr.clear();
    row_ptr.push_back(0);
@@ -45,9 +65,11 @@ struct SparseMat {
    char tmp[1024];
    while (fscanf(fi, "%s", tmp) == 1) {
      Entry e;
-      if (sscanf(tmp, "%u:%f", &e.findex, &e.fvalue) == 2) {
+      unsigned long fidx;
+      if (sscanf(tmp, "%lu:%f", &fidx, &e.fvalue) == 2) {
+        e.findex = static_cast<index_t>(fidx);
        data.push_back(e);
-        feat_dim = std::max(e.findex, feat_dim);
+        feat_dim = std::max(fidx, feat_dim);
      } else {
        if (!init) {
          labels.push_back(label);
@@ -61,6 +83,9 @@ struct SparseMat {
    labels.push_back(label);
    row_ptr.push_back(data.size());
    feat_dim += 1;
+    utils::Check(feat_dim < std::numeric_limits<index_t>::max(),
+                 "feature dimension exceed limit of index_t"\
+                 "consider change the index_t to unsigned long");
    // close the filed
    if (fi != stdin) fclose(fi);
  }
@@ -68,7 +93,7 @@ struct SparseMat {
    return row_ptr.size() - 1;
  }
  // maximum feature dimension
-  unsigned feat_dim;
+  size_t feat_dim;
  std::vector<size_t> row_ptr;
  std::vector<Entry> data;
  std::vector<float> labels;
@@ -115,3 +140,4 @@ inline int Random(int value) {
  return rand() % value;
 }
 } // namespace rabit
+#endif // RABIT_LEARN_DATA_H_
--- a/subtree/rabit/rabit-learn/utils/io.h
+++ b/subtree/rabit/rabit-learn/utils/io.h
@@ -0,0 +1,40 @@
+#ifndef RABIT_LEARN_UTILS_IO_H_
+#define RABIT_LEARN_UTILS_IO_H_
+/*!
+ * \file io.h
+ * \brief additional stream interface
+ * \author Tianqi Chen
+ */
+namespace rabit {
+namespace utils {
+/*! \brief implementation of file i/o stream */
+class FileStream : public ISeekStream {
+ public:
+  explicit FileStream(FILE *fp) : fp(fp) {}
+  explicit FileStream(void) {
+    this->fp = NULL;
+  }
+  virtual size_t Read(void *ptr, size_t size) {
+    return std::fread(ptr, size, 1, fp);
+  }
+  virtual void Write(const void *ptr, size_t size) {
+    std::fwrite(ptr, size, 1, fp);
+  }
+  virtual void Seek(size_t pos) {
+    std::fseek(fp, static_cast<long>(pos), SEEK_SET);
+  }
+  virtual size_t Tell(void) {
+    return std::ftell(fp);
+  }
+  inline void Close(void) {
+    if (fp != NULL){
+      std::fclose(fp); fp = NULL;
+    }
+  }
+
+ private:
+  FILE *fp;
+};
+}  // namespace utils
+}  // namespace rabit
+#endif  // RABIT_LEARN_UTILS_IO_H_
--- a/subtree/rabit/src/allreduce_robust.cc
+++ b/subtree/rabit/src/allreduce_robust.cc
@@ -77,7 +77,10 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
                                PreprocFunction prepare_fun,
                                void *prepare_arg) {
  // skip action in single node
-  if (world_size == 1) return;
+  if (world_size == 1) {
+    if (prepare_fun != NULL) prepare_fun(prepare_arg);
+    return;
+  }
  bool recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter);
  // now we are free to remove the last result, if any
  if (resbuf.LastSeqNo() != -1 &&
--- a/subtree/rabit/src/engine_empty.cc
+++ b/subtree/rabit/src/engine_empty.cc
@@ -92,6 +92,7 @@ void Allreduce_(void *sendrecvbuf,
                mpi::OpType op,
                IEngine::PreprocFunction prepare_fun,
                void *prepare_arg) {
+  if (prepare_fun != NULL) prepare_fun(prepare_arg);
 }

 // code for reduce handle
@@ -106,6 +107,8 @@ void ReduceHandle::Init(IEngine::ReduceFunction redfunc, size_t type_nbytes) {}
 void ReduceHandle::Allreduce(void *sendrecvbuf,
                             size_t type_nbytes, size_t count,
                             IEngine::PreprocFunction prepare_fun,
-                             void *prepare_arg) {}
+                             void *prepare_arg) {
+  if (prepare_fun != NULL) prepare_fun(prepare_arg);
+}
 }  // namespace engine
 }  // namespace rabit