[LIBXGBOOST] pass demo running.

2016-01-05 21:49:48 -08:00
parent cee148ed64
commit d75e3ed05d
59 changed files with 1611 additions and 1845 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -70,3 +70,5 @@ nb-configuration*
 .settings/
 build
 config.mk
+xgboost
+*.data
--- a/23
+++ b/23
@@ -37,7 +37,7 @@ ifeq ($(OS), Windows_NT)
 endif

 export LDFLAGS= -pthread -lm $(ADD_LDFLAGS) $(DMLC_LDFLAGS)
-export CFLAGS= -Wall -O3 -msse2  -Wno-unknown-pragmas -funroll-loops -fPIC -Iinclude $(ADD_CFLAGS)
+export CFLAGS=  -std=c++0x -Wall -O3 -msse2  -Wno-unknown-pragmas -funroll-loops -fPIC -Iinclude $(ADD_CFLAGS)
 CFLAGS += -I$(DMLC_CORE)/include -I$(RABIT)/include

 ifndef LINT_LANG
@@ -65,16 +65,27 @@ $(DMLC_CORE)/libdmlc.a:
 $(RABIT)/lib/$(LIB_RABIT):
 	+ cd $(RABIT); make lib/$(LIB_RABIT); cd $(ROOTDIR)

+
 SRC = $(wildcard src/*.cc src/*/*.cc)
 ALL_OBJ = $(patsubst src/%.cc, build/%.o, $(SRC))
+AMALGA_OBJ = amalgamation/xgboost-all0.o
 LIB_DEP = $(DMLC_CORE)/libdmlc.a $(RABIT)/lib/$(LIB_RABIT)
 ALL_DEP = $(filter-out build/cli_main.o, $(ALL_OBJ)) $(LIB_DEP)
 CLI_OBJ = build/cli_main.o

 build/%.o: src/%.cc
 	@mkdir -p $(@D)
-	$(CXX) -std=c++0x $(CFLAGS) -MM -MT build/$*.o $< >build/$*.d
-	$(CXX) -std=c++0x -c $(CFLAGS) -c $< -o $@
+	$(CXX) $(CFLAGS) -MM -MT build/$*.o $< >build/$*.d
+	$(CXX) -c $(CFLAGS) -c $< -o $@
+
+# The should be equivalent to $(ALL_OBJ)  except for build/cli_main.o
+amalgamation/xgboost-all0.o: amalgamation/xgboost-all0.cc
+	$(CXX) -c $(CFLAGS) -c $< -o $@
+
+# Equivalent to lib/libxgboost_all.so
+lib/libxgboost_all.so: $(AMALGA_OBJ) $(LIB_DEP)
+	@mkdir -p $(@D)
+	$(CXX) $(CFLAGS) -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS)

 lib/libxgboost.a: $(ALL_DEP)
 	@mkdir -p $(@D)
@@ -84,14 +95,14 @@ lib/libxgboost.so: $(ALL_DEP)
 	@mkdir -p $(@D)
 	$(CXX) $(CFLAGS) -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS)

-xgboost: lib/libxgboost.a $(CLI_OBJ) $(LIB_DEP)
-	$(CXX) $(CFLAGS) -o $@ $(filter %.o %.a, $^) $(LDFLAGS)
+xgboost: $(CLI_OBJ) lib/libxgboost.a $(LIB_DEP)
+	$(CXX) $(CFLAGS) -o $@  $(filter %.o %.a, $^)  $(LDFLAGS)

 lint:
 	python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} include src

 clean:
-	$(RM) -r build lib bin *~ */*~ */*/*~ */*/*/*~
+	$(RM) -r build lib bin *~ */*~ */*/*~ */*/*/*~ $(AMALGA_OBJ)

 clean_all: clean
 	cd $(DMLC_CORE); make clean; cd -
--- a/amalgamation/xgboost-all0.cc
+++ b/amalgamation/xgboost-all0.cc
@@ -0,0 +1,51 @@
+/*!
+ * Copyright 2015 by Contributors.
+ * \brief XGBoost Amalgamation.
+ *  This offers an alternative way to compile the entire library from this single file.
+ *
+ *  Example usage command.
+ *  - $(CXX) -std=c++0x -fopenmp -o -shared libxgboost.so xgboost-all0.cc -ldmlc -lrabit
+ *
+ * \author Tianqi Chen.
+ */
+
+// metrics
+#include "../src/metric/metric.cc"
+#include "../src/metric/elementwise_metric.cc"
+#include "../src/metric/multiclass_metric.cc"
+#include "../src/metric/rank_metric.cc"
+
+// objectives
+#include "../src/objective/objective.cc"
+#include "../src/objective/regression_obj.cc"
+#include "../src/objective/multiclass_obj.cc"
+#include "../src/objective/rank_obj.cc"
+
+// gbms
+#include "../src/gbm/gbm.cc"
+#include "../src/gbm/gbtree.cc"
+#include "../src/gbm/gblinear.cc"
+
+// data
+#include "../src/data/data.cc"
+#include "../src/data/simple_csr_source.cc"
+#include "../src/data/simple_dmatrix.cc"
+
+// tress
+#include "../src/tree/tree_model.cc"
+#include "../src/tree/tree_updater.cc"
+#include "../src/tree/updater_colmaker.cc"
+#include "../src/tree/updater_prune.cc"
+#include "../src/tree/updater_refresh.cc"
+#include "../src/tree/updater_sync.cc"
+#include "../src/tree/updater_histmaker.cc"
+#include "../src/tree/updater_skmaker.cc"
+
+// global
+#include "../src/learner.cc"
+#include "../src/logging.cc"
+#include "../src/common/common.cc"
+
+// c_api
+#include "../src/c_api/c_api.cc"
+#include "../src/c_api/c_api_error.cc"
--- a/demo/guide-python/runall.sh
+++ b/demo/guide-python/runall.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+export PYTHONPATH=PYTHONPATH:../../python-package
 python basic_walkthrough.py
 python custom_objective.py
 python boost_from_prediction.py
@@ -9,4 +10,4 @@ python predict_leaf_indices.py
 python sklearn_examples.py
 python sklearn_parallel.py
 python external_memory.py
-rm -rf *~ *.model *.buffer 
+rm -rf *~ *.model *.buffer
--- a/2
+++ b/2
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -16,6 +16,15 @@
 #define XGBOOST_STRICT_R_MODE 0
 #endif

+/*!
+ * \brief Whether always log console message with time.
+ *  It will display like, with timestamp appended to head of the message.
+ *  "[21:47:50] 6513x126 matrix with 143286 entries loaded from ../data/agaricus.txt.train"
+ */
+#ifndef XGBOOST_LOG_WITH_TIME
+#define XGBOOST_LOG_WITH_TIME 0
+#endif
+
 /*! \brief namespace of xgboo st*/
 namespace xgboost {
 /*!
@@ -23,6 +32,8 @@ namespace xgboost {
 *  used for feature index and row index.
 */
 typedef uint32_t bst_uint;
+/*! \brief long integers */
+typedef unsigned long bst_ulong;  // NOLINT(*)
 /*! \brief float type, used for storing statistics */
 typedef float bst_float;

--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -36,13 +36,6 @@ typedef void *BoosterHandle;
 */
 XGB_DLL const char *XGBGetLastError();

-/*!
- * \brief Entry point of CLI program.
- * \param argc The number of arguments.
- * \param argv The command line arguments.
- */
-XGB_DLL int XGBoostCLIMain(int argc, char* argv[])
-
 /*!
 * \brief load a data matrix
 * \param fname the name of the file
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -59,7 +59,7 @@ struct MetaInfo {
  /*! \brief version flag, used to check version of this info */
  static const int kVersion = 1;
  /*! \brief default constructor */
-  MetaInfo() : num_row(0), num_col(0) {}
+  MetaInfo() : num_row(0), num_col(0), num_nonzero(0) {}
  /*!
   * \brief Get weight of each instances.
   * \param i Instance index.
@@ -96,14 +96,6 @@ struct MetaInfo {
   * \param num Number of elements in the source array.
   */
  void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num);
-  /*!
-   * \brief Get information from meta info.
-   * \param key The key of the information.
-   * \param dptr The output data pointer of the source array.
-   * \param dtype The output data type of the information array.
-   * \param num Number of elements in the array.
-   */
-  void GetInfo(const char* key, const void** dptr, DataType* dtype, size_t* num) const;
 };

 /*! \brief read-only sparse instance batch in CSR format */
@@ -259,11 +251,14 @@ class DMatrix {
   * \param uri The URI of input.
   * \param silent Whether print information during loading.
   * \param load_row_split Flag to read in part of rows, divided among the workers in distributed mode.
+   * \param file_format The format type of the file, used for dmlc::Parser::Create.
+   *   By default "auto" will be able to load in both local binary file.
   * \return The created DMatrix.
   */
  static DMatrix* Load(const std::string& uri,
                       bool silent,
-                       bool load_row_split);
+                       bool load_row_split,
+                       const std::string& file_format = "auto");
  /*!
   * \brief create a new DMatrix, by wrapping a row_iterator, and meta info.
   * \param source The source iterator of the data, the create function takes ownership of the source.
@@ -273,7 +268,7 @@ class DMatrix {
   * \return a Created DMatrix.
   */
  static DMatrix* Create(std::unique_ptr<DataSource>&& source,
-                         const char* cache_prefix = nullptr);
+                         const std::string& cache_prefix = "");
  /*!
   * \brief Create a DMatrix by loaidng data from parser.
   *  Parser can later be deleted after the DMatrix i created.
@@ -287,7 +282,7 @@ class DMatrix {
   * \return A created DMatrix.
   */
  static DMatrix* Create(dmlc::Parser<uint32_t>* parser,
-                         const char* cache_prefix = nullptr);
+                         const std::string& cache_prefix = "");

 private:
  // allow learner class to access this field.
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -163,7 +163,7 @@ struct GradientBoosterReg
 */
 #define XGBOOST_REGISTER_GBM(UniqueId, Name)                            \
  static ::xgboost::GradientBoosterReg & __make_ ## GradientBoosterReg ## _ ## UniqueId ## __ = \
-      ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->__REGISTER__(#Name)
+      ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->__REGISTER__(Name)

 }  // namespace xgboost
 #endif  // XGBOOST_GBM_H_
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -36,6 +36,8 @@ namespace xgboost {
 */
 class Learner : public rabit::Serializable {
 public:
+  /*! \brief virtual destructor */
+  virtual ~Learner() {}
  /*!
   * \brief set configuration from pair iterators.
   * \param begin The beginning iterator.
@@ -51,6 +53,11 @@ class Learner : public rabit::Serializable {
   * \param cfg configurations on both training and model parameters.
   */
  virtual void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) = 0;
+  /*!
+   * \brief Initialize the model using the specified configurations via Configure.
+   *  An model have to be either Loaded or initialized before Update/Predict/Save can be called.
+   */
+  virtual void InitModel() = 0;
  /*!
   * \brief load model from stream
   * \param fi input stream.
--- a/include/xgboost/logging.h
+++ b/include/xgboost/logging.h
@@ -0,0 +1,50 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file logging.h
+ * \brief defines console logging options for xgboost.
+ *  Use to enforce unified print behavior.
+ *  For debug loggers, use LOG(INFO) and LOG(ERROR).
+ */
+#ifndef XGBOOST_LOGGING_H_
+#define XGBOOST_LOGGING_H_
+
+#include <dmlc/logging.h>
+#include <sstream>
+#include "./base.h"
+
+namespace xgboost {
+
+class BaseLogger {
+ public:
+  BaseLogger() {
+#if XGBOOST_LOG_WITH_TIME
+    log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] ";
+#endif
+  }
+  std::ostream& stream() { return log_stream_; }
+
+ protected:
+  std::ostringstream log_stream_;
+};
+
+class ConsoleLogger : public BaseLogger {
+ public:
+  ~ConsoleLogger();
+};
+
+class TrackerLogger : public BaseLogger {
+ public:
+  ~TrackerLogger();
+};
+
+// redefines the logging macro if not existed
+#ifndef LOG
+#define LOG(severity) LOG_##severity.stream()
+#endif
+
+// Enable LOG(CONSOLE) for print messages to console.
+#define LOG_CONSOLE ::xgboost::ConsoleLogger()
+// Enable LOG(TRACKER) for print messages to tracker
+#define LOG_TRACKER ::xgboost::TrackerLogger()
+}  // namespace xgboost.
+#endif  // XGBOOST_LOGGING_H_
--- a/include/xgboost/metric.h
+++ b/include/xgboost/metric.h
@@ -70,7 +70,7 @@ struct MetricReg
 * \endcode
 */
 #define XGBOOST_REGISTER_METRIC(UniqueId, Name)                         \
-  static ::xgboost::MetricReg & __make_ ## MetricReg ## _ ## UniqueId ## __ = \
-      ::dmlc::Registry< ::xgboost::MetricReg>::Get()->__REGISTER__(#Name)
+  ::xgboost::MetricReg&  __make_ ## MetricReg ## _ ## UniqueId ## __ =  \
+      ::dmlc::Registry< ::xgboost::MetricReg>::Get()->__REGISTER__(Name)
 }  // namespace xgboost
 #endif  // XGBOOST_METRIC_H_
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -106,6 +106,6 @@ struct ObjFunctionReg
 */
 #define XGBOOST_REGISTER_OBJECTIVE(UniqueId, Name)                      \
  static ::xgboost::ObjFunctionReg & __make_ ## ObjFunctionReg ## _ ## UniqueId ## __ = \
-      ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->__REGISTER__(#Name)
+      ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->__REGISTER__(Name)
 }  // namespace xgboost
 #endif  // XGBOOST_OBJECTIVE_H_
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -8,7 +8,6 @@
 #define XGBOOST_TREE_MODEL_H_

 #include <dmlc/io.h>
-#include <dmlc/logging.h>
 #include <dmlc/parameter.h>
 #include <limits>
 #include <vector>
@@ -17,6 +16,7 @@
 #include <algorithm>
 #include "./base.h"
 #include "./data.h"
+#include "./logging.h"
 #include "./feature_map.h"

 namespace xgboost {
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@@ -79,7 +79,7 @@ struct TreeUpdaterReg
 */
 #define XGBOOST_REGISTER_TREE_UPDATER(UniqueId, Name)                   \
  static ::xgboost::TreeUpdaterReg& __make_ ## TreeUpdaterReg ## _ ## UniqueId ## __ = \
-      ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->__REGISTER__(#Name)
+      ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->__REGISTER__(Name)

 }  // namespace xgboost
 #endif  // XGBOOST_TREE_UPDATER_H_
--- a/old_src/io/dmlc_simple.cpp
+++ b/old_src/io/dmlc_simple.cpp
@@ -1,229 +0,0 @@
-// Copyright by Contributors
-#define _CRT_SECURE_NO_WARNINGS
-#define _CRT_SECURE_NO_DEPRECATE
-#define NOMINMAX
-#include <string>
-#include "../utils/io.h"
-
-// implements a single no split version of DMLC
-// in case we want to avoid dependency on dmlc-core
-
-namespace xgboost {
-namespace utils {
-/*!
- * \brief line split implementation from single FILE
- * simply returns lines of files, used for stdin
- */
-class SingleFileSplit : public dmlc::InputSplit {
- public:
-  explicit SingleFileSplit(const char *fname)
-      : use_stdin_(false),
-        chunk_begin_(NULL), chunk_end_(NULL) {
-    if (!std::strcmp(fname, "stdin")) {
-#ifndef XGBOOST_STRICT_CXX98_
-      use_stdin_ = true; fp_ = stdin;
-#endif
-    }
-    if (!use_stdin_) {
-      fp_ = utils::FopenCheck(fname, "rb");
-    }
-    buffer_.resize(kBufferSize);
-  }
-  virtual ~SingleFileSplit(void) {
-    if (!use_stdin_) std::fclose(fp_);
-  }
-  virtual size_t Read(void *ptr, size_t size) {
-    return std::fread(ptr, 1, size, fp_);
-  }
-  virtual void Write(const void *ptr, size_t size) {
-    utils::Error("cannot do write in inputsplit");
-  }
-  virtual void BeforeFirst(void) {
-    std::fseek(fp_, 0, SEEK_SET);
-  }
-  virtual bool NextRecord(Blob *out_rec) {
-    if (chunk_begin_ == chunk_end_) {
-      if (!LoadChunk()) return false;
-    }
-    char *next = FindNextRecord(chunk_begin_,
-                                chunk_end_);
-    out_rec->dptr = chunk_begin_;
-    out_rec->size = next - chunk_begin_;
-    chunk_begin_ = next;
-    return true;
-  }
-  virtual bool NextChunk(Blob *out_chunk) {
-    if (chunk_begin_ == chunk_end_) {
-      if (!LoadChunk()) return false;
-    }
-    out_chunk->dptr = chunk_begin_;
-    out_chunk->size = chunk_end_ - chunk_begin_;
-    chunk_begin_ = chunk_end_;
-    return true;
-  }
-  inline bool ReadChunk(void *buf, size_t *size) {
-    size_t max_size = *size;
-    if (max_size <= overflow_.length()) {
-      *size = 0; return true;
-    }
-    if (overflow_.length() != 0) {
-      std::memcpy(buf, BeginPtr(overflow_), overflow_.length());
-    }
-    size_t olen = overflow_.length();
-    overflow_.resize(0);
-    size_t nread = this->Read(reinterpret_cast<char*>(buf) + olen,
-                              max_size - olen);
-    nread += olen;
-    if (nread == 0) return false;
-    if (nread != max_size) {
-      *size = nread;
-      return true;
-    } else {
-      const char *bptr = reinterpret_cast<const char*>(buf);
-      // return the last position where a record starts
-      const char *bend = this->FindLastRecordBegin(bptr, bptr + max_size);
-      *size = bend - bptr;
-      overflow_.resize(max_size - *size);
-      if (overflow_.length() != 0) {
-        std::memcpy(BeginPtr(overflow_), bend, overflow_.length());
-      }
-      return true;
-    }
-  }
-
- protected:
-  inline const char* FindLastRecordBegin(const char *begin,
-                                         const char *end) {
-    if (begin == end) return begin;
-    for (const char *p = end - 1; p != begin; --p) {
-      if (*p == '\n' || *p == '\r') return p + 1;
-    }
-    return begin;
-  }
-  inline char* FindNextRecord(char *begin, char *end) {
-    char *p;
-    for (p = begin; p != end; ++p) {
-      if (*p == '\n' || *p == '\r') break;
-    }
-    for (; p != end; ++p) {
-      if (*p != '\n' && *p != '\r') return p;
-    }
-    return end;
-  }
-  inline bool LoadChunk(void) {
-    while (true) {
-      size_t size = buffer_.length();
-      if (!ReadChunk(BeginPtr(buffer_), &size)) return false;
-      if (size == 0) {
-        buffer_.resize(buffer_.length() * 2);
-      } else {
-        chunk_begin_ = reinterpret_cast<char *>(BeginPtr(buffer_));
-        chunk_end_ = chunk_begin_ + size;
-        break;
-      }
-    }
-    return true;
-  }
-
- private:
-  // buffer size
-  static const size_t kBufferSize = 1 << 18UL;
-  // file
-  std::FILE *fp_;
-  bool use_stdin_;
-  // internal overflow
-  std::string overflow_;
-  // internal buffer
-  std::string buffer_;
-  // beginning of chunk
-  char *chunk_begin_;
-  // end of chunk
-  char *chunk_end_;
-};
-
-class StdFile : public dmlc::Stream {
- public:
-  explicit StdFile(std::FILE *fp, bool use_stdio)
-      : fp(fp), use_stdio(use_stdio) {
-  }
-  virtual ~StdFile(void) {
-    this->Close();
-  }
-  virtual size_t Read(void *ptr, size_t size) {
-    return std::fread(ptr, 1, size, fp);
-  }
-  virtual void Write(const void *ptr, size_t size) {
-    Check(std::fwrite(ptr, size, 1, fp) == 1, "StdFile::Write: fwrite error!");
-  }
-  virtual void Seek(size_t pos) {
-    std::fseek(fp, static_cast<long>(pos), SEEK_SET);  // NOLINT(*)
-  }
-  virtual size_t Tell(void) {
-    return std::ftell(fp);
-  }
-  virtual bool AtEnd(void) const {
-    return std::feof(fp) != 0;
-  }
-  inline void Close(void) {
-    if (fp != NULL && !use_stdio) {
-      std::fclose(fp); fp = NULL;
-    }
-  }
-
- private:
-  std::FILE *fp;
-  bool use_stdio;
-};
-}  // namespace utils
-}  // namespace xgboost
-
-namespace dmlc {
-InputSplit* InputSplit::Create(const char *uri,
-                               unsigned part,
-                               unsigned nsplit,
-                               const char *type) {
-  using namespace std;
-  using namespace xgboost;
-  const char *msg = "xgboost is compiled in local mode\n"\
-      "to use hdfs, s3 or distributed version, compile with make dmlc=1";
-  utils::Check(strncmp(uri, "s3://", 5) != 0, msg);
-  utils::Check(strncmp(uri, "hdfs://", 7) != 0, msg);
-  utils::Check(nsplit == 1, msg);
-  return new utils::SingleFileSplit(uri);
-}
-
-Stream *Stream::Create(const char *fname, const char * const mode, bool allow_null) {
-  using namespace std;
-  using namespace xgboost;
-  const char *msg = "xgboost is compiled in local mode\n"\
-      "to use hdfs, s3 or distributed version, compile with make dmlc=1";
-  utils::Check(strncmp(fname, "s3://", 5) != 0, msg);
-  utils::Check(strncmp(fname, "hdfs://", 7) != 0, msg);
-
-  std::FILE *fp = NULL;
-  bool use_stdio = false;
-  using namespace std;
-#ifndef XGBOOST_STRICT_CXX98_
-  if (!strcmp(fname, "stdin")) {
-    use_stdio = true; fp = stdin;
-  }
-  if (!strcmp(fname, "stdout")) {
-    use_stdio = true; fp = stdout;
-  }
-#endif
-  if (!strncmp(fname, "file://", 7)) fname += 7;
-  if (!use_stdio) {
-    std::string flag = mode;
-    if (flag == "w") flag = "wb";
-    if (flag == "r") flag = "rb";
-    fp = fopen64(fname, flag.c_str());
-  }
-  if (fp != NULL) {
-    return new utils::StdFile(fp, use_stdio);
-  } else {
-    utils::Check(allow_null, "fail to open file %s", fname);
-    return NULL;
-  }
-}
-}  // namespace dmlc
-
--- a/old_src/io/libsvm_parser.h
+++ b/old_src/io/libsvm_parser.h
@@ -1,212 +0,0 @@
-/*!
- *  Copyright (c) 2015 by Contributors
- * \file libsvm_parser.h
- * \brief iterator parser to parse libsvm format
- * \author Tianqi Chen
- */
-#ifndef XGBOOST_IO_LIBSVM_PARSER_H_
-#define XGBOOST_IO_LIBSVM_PARSER_H_
-#define NOMINMAX
-#include <vector>
-#include <cstring>
-#include <cctype>
-#include <algorithm>
-#include "../utils/omp.h"
-#include "../utils/utils.h"
-#include "../sync/sync.h"
-#include "../utils/thread_buffer.h"
-#include "./sparse_batch_page.h"
-
-namespace xgboost {
-namespace io {
-/*! \brief page returned by libsvm parser */
-struct LibSVMPage : public SparsePage {
-  std::vector<float> label;
-  // overload clear
-  inline void Clear() {
-    SparsePage::Clear();
-    label.clear();
-  }
-};
-/*!
- * \brief libsvm parser that parses the input lines
- * and returns rows in input data
- * factory that was used by threadbuffer template
- */
-class LibSVMPageFactory  {
- public:
-  LibSVMPageFactory()
-      : bytes_read_(0), at_head_(true) {
-  }
-  inline bool Init(void) {
-    return true;
-  }
-  inline void Setup(dmlc::InputSplit *source,
-                    int nthread) {
-    source_ = source;
-    int maxthread;
-    #pragma omp parallel
-    {
-      maxthread = omp_get_num_procs();
-    }
-    maxthread = std::max(maxthread / 2, 1);
-    nthread_ = std::min(maxthread, nthread);
-  }
-  inline void SetParam(const char *name, const char *val) {}
-  inline bool LoadNext(std::vector<LibSVMPage> *data) {
-    return FillData(data);
-  }
-  inline void FreeSpace(std::vector<LibSVMPage> *a) {
-    delete a;
-  }
-  inline std::vector<LibSVMPage> *Create(void) {
-    return new std::vector<LibSVMPage>();
-  }
-  inline void BeforeFirst(void) {
-    utils::Assert(at_head_, "cannot call beforefirst");
-  }
-  inline void Destroy(void) {
-    delete source_;
-  }
-  inline size_t bytes_read(void) const {
-    return bytes_read_;
-  }
-
- protected:
-  inline bool FillData(std::vector<LibSVMPage> *data) {
-    dmlc::InputSplit::Blob chunk;
-    if (!source_->NextChunk(&chunk)) return false;
-    int nthread;
-    #pragma omp parallel num_threads(nthread_)
-    {
-      nthread = omp_get_num_threads();
-    }
-    // reserve space for data
-    data->resize(nthread);
-    bytes_read_ += chunk.size;
-    utils::Assert(chunk.size != 0, "LibSVMParser.FileData");
-    char *head = reinterpret_cast<char*>(chunk.dptr);
-    #pragma omp parallel num_threads(nthread_)
-    {
-      // threadid
-      int tid = omp_get_thread_num();
-      size_t nstep = (chunk.size + nthread - 1) / nthread;
-      size_t sbegin = std::min(tid * nstep, chunk.size);
-      size_t send = std::min((tid + 1) * nstep, chunk.size);
-      char *pbegin = BackFindEndLine(head + sbegin, head);
-      char *pend;
-      if (tid + 1 == nthread) {
-        pend = head + send;
-      } else {
-        pend = BackFindEndLine(head + send, head);
-      }
-      ParseBlock(pbegin, pend, &(*data)[tid]);
-    }
-    return true;
-  }
-  /*!
-   * \brief parse data into out
-   * \param begin beginning of buffer
-   * \param end end of buffer
-   */
-  inline void ParseBlock(char *begin,
-                         char *end,
-                         LibSVMPage *out) {
-    using namespace std;
-    out->Clear();
-    char *p = begin;
-    while (p != end) {
-      while (isspace(*p) && p != end) ++p;
-      if (p == end) break;
-      char *head = p;
-      while (isdigit(*p) && p != end) ++p;
-      if (*p == ':') {
-        out->data.push_back(SparseBatch::Entry(atol(head),
-                                               static_cast<bst_float>(atof(p + 1))));
-      } else {
-        if (out->label.size() != 0) {
-          out->offset.push_back(out->data.size());
-        }
-        out->label.push_back(static_cast<float>(atof(head)));
-      }
-      while (!isspace(*p) && p != end) ++p;
-    }
-    if (out->label.size() != 0) {
-      out->offset.push_back(out->data.size());
-    }
-    utils::Check(out->label.size() + 1 == out->offset.size(),
-                 "LibSVMParser inconsistent");
-  }
-  /*!
-   * \brief start from bptr, go backward and find first endof line
-   * \param bptr end position to go backward
-   * \param begin the beginning position of buffer
-   * \return position of first endof line going backward
-   */
-  inline char* BackFindEndLine(char *bptr,
-                               char *begin) {
-    for (; bptr != begin; --bptr) {
-      if (*bptr == '\n' || *bptr == '\r') return bptr;
-    }
-    return begin;
-  }
-
- private:
-  // nthread
-  int nthread_;
-  // number of bytes readed
-  size_t bytes_read_;
-  // at beginning, at end of stream
-  bool at_head_;
-  // source split that provides the data
-  dmlc::InputSplit *source_;
-};
-
-class LibSVMParser : public utils::IIterator<LibSVMPage> {
- public:
-  explicit LibSVMParser(dmlc::InputSplit *source,
-                        int nthread)
-      : at_end_(false), data_ptr_(0), data_(NULL) {
-    itr.SetParam("buffer_size", "2");
-    itr.get_factory().Setup(source, nthread);
-    itr.Init();
-  }
-  virtual void BeforeFirst(void) {
-    itr.BeforeFirst();
-  }
-  virtual bool Next(void) {
-    if (at_end_) return false;
-    while (true) {
-      if (data_ == NULL || data_ptr_ >= data_->size()) {
-        if (!itr.Next(data_)) {
-          at_end_ = true; return false;
-        } else {
-          data_ptr_ = 0;
-        }
-      }
-      while (data_ptr_ < data_->size()) {
-        data_ptr_ += 1;
-        if ((*data_)[data_ptr_ - 1].Size() != 0) {
-          return true;
-        }
-      }
-    }
-    return true;
-  }
-  virtual const LibSVMPage &Value(void) const {
-    return (*data_)[data_ptr_ - 1];
-  }
-  inline size_t bytes_read(void) const {
-    return itr.get_factory().bytes_read();
-  }
-
- private:
-  bool at_end_;
-  size_t data_ptr_;
-  std::vector<LibSVMPage> *data_;
-  utils::ThreadBuffer<std::vector<LibSVMPage>*, LibSVMPageFactory> itr;
-};
-
-}  // namespace io
-}  // namespace xgboost
-#endif  // XGBOOST_IO_LIBSVM_PARSER_H_
--- a/old_src/io/simple_fmatrix-inl.hpp
+++ b/old_src/io/simple_fmatrix-inl.hpp
@@ -1,374 +0,0 @@
-/*!
- * Copyright 2014 by Contributors
- * \file simple_fmatrix-inl.hpp
- * \brief the input data structure for gradient boosting
- * \author Tianqi Chen
- */
-#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
-#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
-
-#include <limits>
-#include <algorithm>
-#include <vector>
-#include "../data.h"
-#include "../utils/utils.h"
-#include "../utils/random.h"
-#include "../utils/omp.h"
-#include "../learner/dmatrix.h"
-#include "../utils/group_data.h"
-#include "./sparse_batch_page.h"
-
-namespace xgboost {
-namespace io {
-/*!
- * \brief sparse matrix that support column access, CSC
- */
-class FMatrixS : public IFMatrix {
- public:
-  typedef SparseBatch::Entry Entry;
-  /*! \brief constructor */
-  FMatrixS(utils::IIterator<RowBatch> *iter,
-               const learner::MetaInfo &info)
-      : info_(info) {
-    this->iter_ = iter;
-  }
-  // destructor
-  virtual ~FMatrixS(void) {
-    if (iter_ != NULL) delete iter_;
-  }
-  /*! \return whether column access is enabled */
-  virtual bool HaveColAccess(void) const {
-    return col_size_.size() != 0;
-  }
-  /*! \brief get number of columns */
-  virtual size_t NumCol(void) const {
-    utils::Check(this->HaveColAccess(), "NumCol:need column access");
-    return col_size_.size();
-  }
-  /*! \brief get number of buffered rows */
-  virtual const std::vector<bst_uint> &buffered_rowset(void) const {
-    return buffered_rowset_;
-  }
-  /*! \brief get column size */
-  virtual size_t GetColSize(size_t cidx) const {
-    return col_size_[cidx];
-  }
-  /*! \brief get column density */
-  virtual float GetColDensity(size_t cidx) const {
-    size_t nmiss = buffered_rowset_.size() - col_size_[cidx];
-    return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
-  }
-  virtual void InitColAccess(const std::vector<bool> &enabled,
-                             float pkeep, size_t max_row_perbatch) {
-    if (this->HaveColAccess()) return;
-    this->InitColData(enabled, pkeep, max_row_perbatch);
-  }
-  /*!
-   * \brief get the row iterator associated with FMatrix
-   */
-  virtual utils::IIterator<RowBatch>* RowIterator(void) {
-    iter_->BeforeFirst();
-    return iter_;
-  }
-  /*!
-   * \brief get the column based  iterator
-   */
-  virtual utils::IIterator<ColBatch>* ColIterator(void) {
-    size_t ncol = this->NumCol();
-    col_iter_.col_index_.resize(ncol);
-    for (size_t i = 0; i < ncol; ++i) {
-      col_iter_.col_index_[i] = static_cast<bst_uint>(i);
-    }
-    col_iter_.BeforeFirst();
-    return &col_iter_;
-  }
-  /*!
-   * \brief column based iterator
-   */
-  virtual utils::IIterator<ColBatch> *ColIterator(const std::vector<bst_uint> &fset) {
-    size_t ncol = this->NumCol();
-    col_iter_.col_index_.resize(0);
-    for (size_t i = 0; i < fset.size(); ++i) {
-      if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]);
-    }
-    col_iter_.BeforeFirst();
-    return &col_iter_;
-  }
-  /*!
-   * \brief save column access data into stream
-   * \param fo output stream to save to
-   */
-  inline void SaveColAccess(utils::IStream &fo) const { // NOLINT(*)
-    size_t n = 0;
-    fo.Write(&n, sizeof(n));
-  }
-  /*!
-   * \brief load column access data from stream
-   * \param fo output stream to load from
-   */
-  inline void LoadColAccess(utils::IStream &fi) { // NOLINT(*)
-    // do nothing in load col access
-  }
-
- protected:
-  /*!
-   * \brief initialize column data
-   * \param enabled the list of enabled columns
-   * \param pkeep probability to keep a row
-   * \param max_row_perbatch maximum row per batch
-   */
-  inline void InitColData(const std::vector<bool> &enabled,
-                          float pkeep, size_t max_row_perbatch) {
-    col_iter_.Clear();
-    if (info_.num_row() < max_row_perbatch) {
-      SparsePage *page = new SparsePage();
-      this->MakeOneBatch(enabled, pkeep, page);
-      col_iter_.cpages_.push_back(page);
-    } else {
-      this->MakeManyBatch(enabled, pkeep, max_row_perbatch);
-    }
-    // setup col-size
-    col_size_.resize(info_.num_col());
-    std::fill(col_size_.begin(), col_size_.end(), 0);
-    for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) {
-      SparsePage *pcol = col_iter_.cpages_[i];
-      for (size_t j = 0; j < pcol->Size(); ++j) {
-        col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];
-      }
-    }
-  }
-  /*!
-   * \brief make column page from iterator
-   * \param pkeep probability to keep a row
-   * \param pcol the target column
-   */
-  inline void MakeOneBatch(const std::vector<bool> &enabled,
-                           float pkeep,
-                           SparsePage *pcol) {
-    // clear rowset
-    buffered_rowset_.clear();
-    // bit map
-    int nthread;
-    std::vector<bool> bmap;
-    #pragma omp parallel
-    {
-      nthread = omp_get_num_threads();
-    }
-    pcol->Clear();
-    utils::ParallelGroupBuilder<SparseBatch::Entry>
-        builder(&pcol->offset, &pcol->data);
-    builder.InitBudget(info_.num_col(), nthread);
-    // start working
-    iter_->BeforeFirst();
-    while (iter_->Next()) {
-      const RowBatch &batch = iter_->Value();
-      bmap.resize(bmap.size() + batch.size, true);
-      long batch_size = static_cast<long>(batch.size); // NOLINT(*)
-      for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
-        bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
-        if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
-          buffered_rowset_.push_back(ridx);
-        } else {
-          bmap[i] = false;
-        }
-      }
-      #pragma omp parallel for schedule(static)
-      for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
-        int tid = omp_get_thread_num();
-        bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
-        if (bmap[ridx]) {
-          RowBatch::Inst inst = batch[i];
-          for (bst_uint j = 0; j < inst.length; ++j) {
-            if (enabled[inst[j].index]) {
-              builder.AddBudget(inst[j].index, tid);
-            }
-          }
-        }
-      }
-    }
-    builder.InitStorage();
-
-    iter_->BeforeFirst();
-    while (iter_->Next()) {
-      const RowBatch &batch = iter_->Value();
-      #pragma omp parallel for schedule(static)
-      for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
-        int tid = omp_get_thread_num();
-        bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
-        if (bmap[ridx]) {
-          RowBatch::Inst inst = batch[i];
-          for (bst_uint j = 0; j < inst.length; ++j) {
-            if (enabled[inst[j].index]) {
-              builder.Push(inst[j].index,
-                           Entry((bst_uint)(batch.base_rowid+i),
-                                 inst[j].fvalue), tid);
-            }
-          }
-        }
-      }
-    }
-
-    utils::Assert(pcol->Size() == info_.num_col(),
-                  "inconsistent col data");
-    // sort columns
-    bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
-    #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
-    for (bst_omp_uint i = 0; i < ncol; ++i) {
-      if (pcol->offset[i] < pcol->offset[i + 1]) {
-        std::sort(BeginPtr(pcol->data) + pcol->offset[i],
-                  BeginPtr(pcol->data) + pcol->offset[i + 1],
-                  SparseBatch::Entry::CmpValue);
-      }
-    }
-  }
-
-  inline void MakeManyBatch(const std::vector<bool> &enabled,
-                            float pkeep, size_t max_row_perbatch) {
-    size_t btop = 0;
-    buffered_rowset_.clear();
-    // internal temp cache
-    SparsePage tmp; tmp.Clear();
-    iter_->BeforeFirst();
-    while (iter_->Next()) {
-      const RowBatch &batch = iter_->Value();
-      for (size_t i = 0; i < batch.size; ++i) {
-        bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
-        if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
-          buffered_rowset_.push_back(ridx);
-          tmp.Push(batch[i]);
-        }
-        if (tmp.Size() >= max_row_perbatch) {
-          SparsePage *page = new SparsePage();
-          this->MakeColPage(tmp.GetRowBatch(0),
-                            BeginPtr(buffered_rowset_) + btop,
-                            enabled, page);
-          col_iter_.cpages_.push_back(page);
-          btop = buffered_rowset_.size();
-          tmp.Clear();
-        }
-      }
-    }
-    if (tmp.Size() != 0) {
-      SparsePage *page = new SparsePage();
-      this->MakeColPage(tmp.GetRowBatch(0),
-                        BeginPtr(buffered_rowset_) + btop,
-                        enabled, page);
-      col_iter_.cpages_.push_back(page);
-    }
-  }
-  // make column page from subset of rowbatchs
-  inline void MakeColPage(const RowBatch &batch,
-                          const bst_uint *ridx,
-                          const std::vector<bool> &enabled,
-                          SparsePage *pcol) {
-    int nthread;
-    #pragma omp parallel
-    {
-      nthread = omp_get_num_threads();
-      int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
-      if (nthread > max_nthread) {
-        nthread = max_nthread;
-      }
-    }
-    pcol->Clear();
-    utils::ParallelGroupBuilder<SparseBatch::Entry>
-        builder(&pcol->offset, &pcol->data);
-    builder.InitBudget(info_.num_col(), nthread);
-    bst_omp_uint ndata = static_cast<bst_uint>(batch.size);
-    #pragma omp parallel for schedule(static) num_threads(nthread)
-    for (bst_omp_uint i = 0; i < ndata; ++i) {
-      int tid = omp_get_thread_num();
-      RowBatch::Inst inst = batch[i];
-      for (bst_uint j = 0; j < inst.length; ++j) {
-        const SparseBatch::Entry &e = inst[j];
-        if (enabled[e.index]) {
-          builder.AddBudget(e.index, tid);
-        }
-      }
-    }
-    builder.InitStorage();
-    #pragma omp parallel for schedule(static) num_threads(nthread)
-    for (bst_omp_uint i = 0; i < ndata; ++i) {
-      int tid = omp_get_thread_num();
-      RowBatch::Inst inst = batch[i];
-      for (bst_uint j = 0; j < inst.length; ++j) {
-        const SparseBatch::Entry &e = inst[j];
-        builder.Push(e.index,
-                     SparseBatch::Entry(ridx[i], e.fvalue),
-                     tid);
-      }
-    }
-    utils::Assert(pcol->Size() == info_.num_col(), "inconsistent col data");
-    // sort columns
-    bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
-    #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
-    for (bst_omp_uint i = 0; i < ncol; ++i) {
-      if (pcol->offset[i] < pcol->offset[i + 1]) {
-        std::sort(BeginPtr(pcol->data) + pcol->offset[i],
-                  BeginPtr(pcol->data) + pcol->offset[i + 1],
-                  SparseBatch::Entry::CmpValue);
-      }
-    }
-  }
-
- private:
-  // one batch iterator that return content in the matrix
-  struct ColBatchIter: utils::IIterator<ColBatch> {
-    ColBatchIter(void) : data_ptr_(0) {}
-    virtual ~ColBatchIter(void) {
-      this->Clear();
-    }
-    virtual void BeforeFirst(void) {
-      data_ptr_ = 0;
-    }
-    virtual bool Next(void) {
-      if (data_ptr_ >= cpages_.size()) return false;
-      data_ptr_ += 1;
-      SparsePage *pcol = cpages_[data_ptr_ - 1];
-      batch_.size = col_index_.size();
-      col_data_.resize(col_index_.size(), SparseBatch::Inst(NULL, 0));
-      for (size_t i = 0; i < col_data_.size(); ++i) {
-        const bst_uint ridx = col_index_[i];
-        col_data_[i] = SparseBatch::Inst
-            (BeginPtr(pcol->data) + pcol->offset[ridx],
-             static_cast<bst_uint>(pcol->offset[ridx + 1] - pcol->offset[ridx]));
-      }
-      batch_.col_index = BeginPtr(col_index_);
-      batch_.col_data = BeginPtr(col_data_);
-      return true;
-    }
-    virtual const ColBatch &Value(void) const {
-      return batch_;
-    }
-    inline void Clear(void) {
-      for (size_t i = 0; i < cpages_.size(); ++i) {
-        delete cpages_[i];
-      }
-      cpages_.clear();
-    }
-    // data content
-    std::vector<bst_uint> col_index_;
-    // column content
-    std::vector<ColBatch::Inst> col_data_;
-    // column sparse pages
-    std::vector<SparsePage*> cpages_;
-    // data pointer
-    size_t data_ptr_;
-    // temporal space for batch
-    ColBatch batch_;
-  };
-  // --- data structure used to support InitColAccess --
-  // column iterator
-  ColBatchIter col_iter_;
-  // shared meta info with DMatrix
-  const learner::MetaInfo &info_;
-  // row iterator
-  utils::IIterator<RowBatch> *iter_;
-  /*! \brief list of row index that are buffered */
-  std::vector<bst_uint> buffered_rowset_;
-  // count for column data
-  std::vector<size_t> col_size_;
-};
-}  // namespace io
-}  // namespace xgboost
-#endif  // XGBOOST_IO_SLICE_FMATRIX_INL_HPP_
--- a/old_src/learner/dmatrix.h
+++ b/old_src/learner/dmatrix.h
@@ -1,176 +0,0 @@
-/*!
- * Copyright 2014 by Contributors
- * \file dmatrix.h
- * \brief meta data and template data structure
- *        used for regression/classification/ranking
- * \author Tianqi Chen
- */
-#ifndef XGBOOST_LEARNER_DMATRIX_H_
-#define XGBOOST_LEARNER_DMATRIX_H_
-
-#include <vector>
-#include <cstring>
-#include "../data.h"
-#include "../utils/io.h"
-namespace xgboost {
-namespace learner {
-/*!
- * \brief meta information needed in training, including label, weight
- */
-struct MetaInfo {
-  /*!
-   * \brief information needed by booster
-   * BoosterInfo does not implement save and load,
-   * all serialization is done in MetaInfo
-   */
-  BoosterInfo info;
-  /*! \brief label of each instance */
-  std::vector<float> labels;
-  /*!
-   * \brief the index of begin and end of a group
-   * needed when the learning task is ranking
-   */
-  std::vector<bst_uint> group_ptr;
-  /*! \brief weights of each instance, optional */
-  std::vector<float> weights;
-  /*!
-   * \brief initialized margins,
-   * if specified, xgboost will start from this initial margin
-   * can be used to specify initial prediction to boost from
-   */
-  std::vector<float> base_margin;
-  /*! \brief version flag, used to check version of this info */
-  static const int kVersion = 0;
-  // constructor
-  MetaInfo(void) {}
-  /*! \return number of rows in dataset */
-  inline size_t num_row(void) const {
-    return info.num_row;
-  }
-  /*! \return number of columns in dataset */
-  inline size_t num_col(void) const {
-    return info.num_col;
-  }
-  /*! \brief clear all the information */
-  inline void Clear(void) {
-    labels.clear();
-    group_ptr.clear();
-    weights.clear();
-    info.root_index.clear();
-    base_margin.clear();
-    info.num_row = info.num_col = 0;
-  }
-  /*! \brief get weight of each instances */
-  inline float GetWeight(size_t i) const {
-    if (weights.size() != 0) {
-      return weights[i];
-    } else {
-      return 1.0f;
-    }
-  }
-  inline void SaveBinary(utils::IStream &fo) const { // NOLINT(*)
-    int version = kVersion;
-    fo.Write(&version, sizeof(version));
-    fo.Write(&info.num_row, sizeof(info.num_row));
-    fo.Write(&info.num_col, sizeof(info.num_col));
-    fo.Write(labels);
-    fo.Write(group_ptr);
-    fo.Write(weights);
-    fo.Write(info.root_index);
-    fo.Write(base_margin);
-  }
-  inline void LoadBinary(utils::IStream &fi) { // NOLINT(*)
-    int version;
-    utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format");
-    utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format");
-    utils::Check(fi.Read(&info.num_col, sizeof(info.num_col)) != 0, "MetaInfo: invalid format");
-    utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
-    utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
-    utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
-    utils::Check(fi.Read(&info.root_index), "MetaInfo: invalid format");
-    utils::Check(fi.Read(&base_margin), "MetaInfo: invalid format");
-  }
-  // try to load group information from file, if exists
-  inline bool TryLoadGroup(const char* fname, bool silent = false) {
-    using namespace std;
-    FILE *fi = fopen64(fname, "r");
-    if (fi == NULL) return false;
-    group_ptr.push_back(0);
-    unsigned nline;
-    while (fscanf(fi, "%u", &nline) == 1) {
-      group_ptr.push_back(group_ptr.back()+nline);
-    }
-    if (!silent) {
-      utils::Printf("%u groups are loaded from %s\n",
-                    static_cast<unsigned>(group_ptr.size()-1), fname);
-    }
-    fclose(fi);
-    return true;
-  }
-  inline std::vector<float>& GetFloatInfo(const char *field) {
-    using namespace std;
-    if (!strcmp(field, "label")) return labels;
-    if (!strcmp(field, "weight")) return weights;
-    if (!strcmp(field, "base_margin")) return base_margin;
-    utils::Error("unknown field %s", field);
-    return labels;
-  }
-  inline const std::vector<float>& GetFloatInfo(const char *field) const {
-    return ((MetaInfo*)this)->GetFloatInfo(field); // NOLINT(*)
-  }
-  inline std::vector<unsigned> &GetUIntInfo(const char *field) {
-    using namespace std;
-    if (!strcmp(field, "root_index")) return info.root_index;
-    if (!strcmp(field, "fold_index")) return info.fold_index;
-    utils::Error("unknown field %s", field);
-    return info.root_index;
-  }
-  inline const std::vector<unsigned> &GetUIntInfo(const char *field) const {
-    return ((MetaInfo*)this)->GetUIntInfo(field);  // NOLINT(*)
-  }
-  // try to load weight information from file, if exists
-  inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
-    using namespace std;
-    std::vector<float> &data = this->GetFloatInfo(field);
-    FILE *fi = fopen64(fname, "r");
-    if (fi == NULL) return false;
-    float wt;
-    while (fscanf(fi, "%f", &wt) == 1) {
-      data.push_back(wt);
-    }
-    if (!silent) {
-      utils::Printf("loading %s from %s\n", field, fname);
-    }
-    fclose(fi);
-    return true;
-  }
-};
-
-/*!
- * \brief data object used for learning,
- * \tparam FMatrix type of feature data source
- */
-struct DMatrix {
-  /*!
-   * \brief magic number associated with this object
-   *    used to check if it is specific instance
-   */
-  const int magic;
-  /*! \brief meta information about the dataset */
-  MetaInfo info;
-  /*!
-   * \brief cache pointer to verify if the data structure is cached in some learner
-   *  used to verify if DMatrix is cached
-   */
-  void *cache_learner_ptr_;
-  /*! \brief default constructor */
-  explicit DMatrix(int magic) : magic(magic), cache_learner_ptr_(NULL) {}
-  /*! \brief get feature matrix about data content */
-  virtual IFMatrix *fmat(void) const = 0;
-  // virtual destructor
-  virtual ~DMatrix(void){}
-};
-
-}  // namespace learner
-}  // namespace xgboost
-#endif  // XGBOOST_LEARNER_DMATRIX_H_
--- a/python-package/xgboost/libpath.py
+++ b/python-package/xgboost/libpath.py
@@ -20,8 +20,8 @@ def find_lib_path():
    """
    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
    # make pythonpack hack: copy this directory one level upper for setup.py
-    dll_path = [curr_path, os.path.join(curr_path, '../../wrapper/'),
-                os.path.join(curr_path, './wrapper/')]
+    dll_path = [curr_path, os.path.join(curr_path, '../../lib/'),
+                os.path.join(curr_path, './lib/')]
    if os.name == 'nt':
        if platform.architecture()[0] == '64bit':
            dll_path.append(os.path.join(curr_path, '../../windows/x64/Release/'))
@@ -32,9 +32,9 @@ def find_lib_path():
            # hack for pip installation when copy all parent source directory here
            dll_path.append(os.path.join(curr_path, './windows/Release/'))
    if os.name == 'nt':
-        dll_path = [os.path.join(p, 'xgboost_wrapper.dll') for p in dll_path]
+        dll_path = [os.path.join(p, 'libxgboost.dll') for p in dll_path]
    else:
-        dll_path = [os.path.join(p, 'libxgboostwrapper.so') for p in dll_path]
+        dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path]
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
    #From github issues, most of installation errors come from machines w/o compilers
    if len(lib_path) == 0 and not os.environ.get('XGBOOST_BUILD_DOC', False):
--- a/2
+++ b/2
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -0,0 +1,528 @@
+// Copyright (c) 2014 by Contributors
+
+#include <xgboost/data.h>
+#include <xgboost/learner.h>
+#include <xgboost/c_api.h>
+#include <cstdio>
+#include <vector>
+#include <string>
+#include <cstring>
+#include <memory>
+
+#include "./c_api_error.h"
+#include "../data/simple_csr_source.h"
+#include "../common/thread_local.h"
+#include "../common/math.h"
+#include "../common/io.h"
+#include "../common/group_data.h"
+
+namespace xgboost {
+
+// booster wrapper for backward compatible reason.
+class Booster {
+ public:
+  explicit Booster(const std::vector<DMatrix*>& cache_mats)
+      : configured_(false),
+        initialized_(false),
+        learner_(Learner::Create(cache_mats)) {}
+
+  inline Learner* learner() {
+    return learner_.get();
+  }
+
+  inline void SetParam(const std::string& name, const std::string& val) {
+    cfg_.push_back(std::make_pair(name, val));
+    if (configured_) {
+      learner_->Configure(cfg_);
+    }
+  }
+
+  inline void LazyInit() {
+    if (!configured_) {
+      learner_->Configure(cfg_);
+      configured_ = true;
+    }
+    if (!initialized_) {
+      learner_->InitModel();
+      initialized_ = true;
+    }
+  }
+
+  inline void LoadModel(dmlc::Stream* fi) {
+    learner_->Load(fi);
+    initialized_ = true;
+  }
+
+ public:
+  bool configured_;
+  bool initialized_;
+  std::unique_ptr<Learner> learner_;
+  std::vector<std::pair<std::string, std::string> > cfg_;
+};
+}  // namespace xgboost
+
+using namespace xgboost; // NOLINT(*);
+
+/*! \brief entry to to easily hold returning information */
+struct XGBAPIThreadLocalEntry {
+  /*! \brief result holder for returning string */
+  std::string ret_str;
+  /*! \brief result holder for returning strings */
+  std::vector<std::string> ret_vec_str;
+  /*! \brief result holder for returning string pointers */
+  std::vector<const char *> ret_vec_charp;
+  /*! \brief returning float vector. */
+  std::vector<float> ret_vec_float;
+  /*! \brief temp variable of gradient pairs. */
+  std::vector<bst_gpair> tmp_gpair;
+};
+
+// define the threadlocal store.
+typedef xgboost::common::ThreadLocalStore<XGBAPIThreadLocalEntry> XGBAPIThreadLocalStore;
+
+int XGDMatrixCreateFromFile(const char *fname,
+                            int silent,
+                            DMatrixHandle *out) {
+  API_BEGIN();
+  *out = DMatrix::Load(
+      fname, silent != 0, false);
+  API_END();
+}
+
+int XGDMatrixCreateFromCSR(const bst_ulong* indptr,
+                           const unsigned *indices,
+                           const float* data,
+                           bst_ulong nindptr,
+                           bst_ulong nelem,
+                           DMatrixHandle* out) {
+  std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+
+  API_BEGIN();
+  data::SimpleCSRSource& mat = *source;
+  mat.row_ptr_.resize(nindptr);
+  for (bst_ulong i = 0; i < nindptr; ++i) {
+    mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
+  }
+  mat.row_data_.resize(nelem);
+  for (bst_ulong i = 0; i < nelem; ++i) {
+    mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]);
+    mat.info.num_col = std::max(mat.info.num_col,
+                                static_cast<size_t>(indices[i] + 1));
+  }
+  mat.info.num_row = nindptr - 1;
+  mat.info.num_nonzero = static_cast<uint64_t>(nelem);
+  *out  = DMatrix::Create(std::move(source));
+  API_END();
+}
+
+int XGDMatrixCreateFromCSC(const bst_ulong* col_ptr,
+                           const unsigned* indices,
+                           const float* data,
+                           bst_ulong nindptr,
+                           bst_ulong nelem,
+                           DMatrixHandle* out) {
+  std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+
+  API_BEGIN();
+  int nthread;
+  #pragma omp parallel
+  {
+    nthread = omp_get_num_threads();
+  }
+  data::SimpleCSRSource& mat = *source;
+  common::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
+  builder.InitBudget(0, nthread);
+  long ncol = static_cast<long>(nindptr - 1);  // NOLINT(*)
+  #pragma omp parallel for schedule(static)
+  for (long i = 0; i < ncol; ++i) {  // NOLINT(*)
+    int tid = omp_get_thread_num();
+    for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
+      builder.AddBudget(indices[j], tid);
+    }
+  }
+  builder.InitStorage();
+  #pragma omp parallel for schedule(static)
+  for (long i = 0; i < ncol; ++i) {  // NOLINT(*)
+    int tid = omp_get_thread_num();
+    for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
+      builder.Push(indices[j],
+                   RowBatch::Entry(static_cast<bst_uint>(i), data[j]),
+                   tid);
+    }
+}
+  mat.info.num_row = mat.row_ptr_.size() - 1;
+  mat.info.num_col = static_cast<uint64_t>(ncol);
+  mat.info.num_nonzero = nelem;
+  *out  = DMatrix::Create(std::move(source));
+  API_END();
+}
+
+int XGDMatrixCreateFromMat(const float* data,
+                           bst_ulong nrow,
+                           bst_ulong ncol,
+                           float  missing,
+                           DMatrixHandle* out) {
+  std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+
+  API_BEGIN();
+  data::SimpleCSRSource& mat = *source;
+  bool nan_missing = common::CheckNAN(missing);
+  mat.info.num_row = nrow;
+  mat.info.num_col = ncol;
+  for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
+    bst_ulong nelem = 0;
+    for (bst_ulong j = 0; j < ncol; ++j) {
+      if (common::CheckNAN(data[j])) {
+        CHECK(nan_missing)
+            << "There are NAN in the matrix, however, you did not set missing=NAN";
+      } else {
+        if (nan_missing || data[j] != missing) {
+          mat.row_data_.push_back(RowBatch::Entry(j, data[j]));
+          ++nelem;
+        }
+      }
+    }
+    mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
+  }
+  mat.info.num_nonzero = mat.row_data_.size();
+  *out  = DMatrix::Create(std::move(source));
+  API_END();
+}
+
+int XGDMatrixSliceDMatrix(DMatrixHandle handle,
+                          const int* idxset,
+                          bst_ulong len,
+                          DMatrixHandle* out) {
+  std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+
+  API_BEGIN();
+  data::SimpleCSRSource src;
+  src.CopyFrom(static_cast<DMatrix*>(handle));
+  data::SimpleCSRSource& ret = *source;
+
+  CHECK_EQ(src.info.group_ptr.size(), 0)
+      << "slice does not support group structure";
+
+  ret.Clear();
+  ret.info.num_row = len;
+  ret.info.num_col = src.info.num_col;
+
+  dmlc::DataIter<RowBatch>* iter = &src;
+  iter->BeforeFirst();
+  CHECK(iter->Next());
+
+  const RowBatch& batch = iter->Value();
+  for (bst_ulong i = 0; i < len; ++i) {
+    const int ridx = idxset[i];
+    RowBatch::Inst inst = batch[ridx];
+    CHECK_LT(static_cast<bst_ulong>(ridx), batch.size);
+    ret.row_data_.resize(ret.row_data_.size() + inst.length);
+    std::memcpy(dmlc::BeginPtr(ret.row_data_) + ret.row_ptr_.back(), inst.data,
+                sizeof(RowBatch::Entry) * inst.length);
+    ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
+    ret.info.num_nonzero += inst.length;
+
+    if (src.info.labels.size() != 0) {
+      ret.info.labels.push_back(src.info.labels[ridx]);
+    }
+    if (src.info.weights.size() != 0) {
+      ret.info.weights.push_back(src.info.weights[ridx]);
+    }
+    if (src.info.root_index.size() != 0) {
+      ret.info.root_index.push_back(src.info.root_index[ridx]);
+    }
+  }
+  *out  = DMatrix::Create(std::move(source));
+  API_END();
+}
+
+int XGDMatrixFree(DMatrixHandle handle) {
+  API_BEGIN();
+  delete static_cast<DMatrix*>(handle);
+  API_END();
+}
+
+int XGDMatrixSaveBinary(DMatrixHandle handle,
+                        const char* fname,
+                        int silent) {
+  API_BEGIN();
+  static_cast<DMatrix*>(handle)->SaveToLocalFile(fname);
+  API_END();
+}
+
+int XGDMatrixSetFloatInfo(DMatrixHandle handle,
+                          const char* field,
+                          const float* info,
+                          bst_ulong len) {
+  API_BEGIN();
+  static_cast<DMatrix*>(handle)->info().SetInfo(field, info, kFloat32, len);
+  API_END();
+}
+
+int XGDMatrixSetUIntInfo(DMatrixHandle handle,
+                         const char* field,
+                         const unsigned* info,
+                         bst_ulong len) {
+  API_BEGIN();
+  static_cast<DMatrix*>(handle)->info().SetInfo(field, info, kUInt32, len);
+  API_END();
+}
+
+int XGDMatrixSetGroup(DMatrixHandle handle,
+                      const unsigned* group,
+                      bst_ulong len) {
+  API_BEGIN();
+  DMatrix *pmat = static_cast<DMatrix*>(handle);
+  MetaInfo& info = pmat->info();
+  info.group_ptr.resize(len + 1);
+  info.group_ptr[0] = 0;
+  for (uint64_t i = 0; i < len; ++i) {
+    info.group_ptr[i + 1] = info.group_ptr[i] + group[i];
+  }
+  API_END();
+}
+
+int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
+                          const char* field,
+                          bst_ulong* out_len,
+                          const float** out_dptr) {
+  API_BEGIN();
+  const MetaInfo& info = static_cast<const DMatrix*>(handle)->info();
+  const std::vector<float>* vec = nullptr;
+  if (!std::strcmp(field, "label")) {
+    vec = &info.labels;
+  } else if (!std::strcmp(field, "weight")) {
+    vec = &info.weights;
+  } else if (!std::strcmp(field, "base_margin")) {
+    vec = &info.base_margin;
+  } else {
+    LOG(FATAL) << "Unknown float field name " << field;
+  }
+  *out_len = static_cast<bst_ulong>(vec->size());
+  *out_dptr = dmlc::BeginPtr(*vec);
+  API_END();
+}
+
+int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
+                         const char *field,
+                         bst_ulong *out_len,
+                         const unsigned **out_dptr) {
+  API_BEGIN();
+  const MetaInfo& info = static_cast<const DMatrix*>(handle)->info();
+  const std::vector<unsigned>* vec = nullptr;
+  if (!std::strcmp(field, "root_index")) {
+    vec = &info.root_index;
+  } else {
+    LOG(FATAL) << "Unknown uint field name " << field;
+  }
+  *out_len = static_cast<bst_ulong>(vec->size());
+  *out_dptr = dmlc::BeginPtr(*vec);
+  API_END();
+}
+
+int XGDMatrixNumRow(const DMatrixHandle handle,
+                    bst_ulong *out) {
+  API_BEGIN();
+  *out = static_cast<bst_ulong>(static_cast<const DMatrix*>(handle)->info().num_row);
+  API_END();
+}
+
+int XGDMatrixNumCol(const DMatrixHandle handle,
+                    bst_ulong *out) {
+  API_BEGIN();
+  *out = static_cast<size_t>(static_cast<const DMatrix*>(handle)->info().num_col);
+  API_END();
+}
+
+// xgboost implementation
+int XGBoosterCreate(DMatrixHandle dmats[],
+                    bst_ulong len,
+                    BoosterHandle *out) {
+  API_BEGIN();
+  std::vector<DMatrix*> mats;
+  for (bst_ulong i = 0; i < len; ++i) {
+    mats.push_back(static_cast<DMatrix*>(dmats[i]));
+  }
+  *out = new Booster(mats);
+  API_END();
+}
+
+int XGBoosterFree(BoosterHandle handle) {
+  API_BEGIN();
+  delete static_cast<Booster*>(handle);
+  API_END();
+}
+
+int XGBoosterSetParam(BoosterHandle handle,
+                      const char *name,
+                      const char *value) {
+  API_BEGIN();
+  static_cast<Booster*>(handle)->SetParam(name, value);
+  API_END();
+}
+
+int XGBoosterUpdateOneIter(BoosterHandle handle,
+                           int iter,
+                           DMatrixHandle dtrain) {
+  API_BEGIN();
+  Booster* bst = static_cast<Booster*>(handle);
+  DMatrix *dtr = static_cast<DMatrix*>(dtrain);
+
+  bst->LazyInit();
+  bst->learner()->UpdateOneIter(iter, dtr);
+  API_END();
+}
+
+int XGBoosterBoostOneIter(BoosterHandle handle,
+                          DMatrixHandle dtrain,
+                          float *grad,
+                          float *hess,
+                          bst_ulong len) {
+  std::vector<bst_gpair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair;
+  API_BEGIN();
+  Booster* bst = static_cast<Booster*>(handle);
+  DMatrix* dtr = static_cast<DMatrix*>(dtrain);
+  tmp_gpair.resize(len);
+  for (bst_ulong i = 0; i < len; ++i) {
+    tmp_gpair[i] = bst_gpair(grad[i], hess[i]);
+  }
+
+  bst->LazyInit();
+  bst->learner()->BoostOneIter(0, dtr, &tmp_gpair);
+  API_END();
+}
+
+int XGBoosterEvalOneIter(BoosterHandle handle,
+                         int iter,
+                         DMatrixHandle dmats[],
+                         const char* evnames[],
+                         bst_ulong len,
+                         const char** out_str) {
+  std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str;
+  API_BEGIN();
+  Booster* bst = static_cast<Booster*>(handle);
+  std::vector<DMatrix*> data_sets;
+  std::vector<std::string> data_names;
+
+  for (bst_ulong i = 0; i < len; ++i) {
+    data_sets.push_back(static_cast<DMatrix*>(dmats[i]));
+    data_names.push_back(std::string(evnames[i]));
+  }
+
+  bst->LazyInit();
+  eval_str = bst->learner()->EvalOneIter(iter, data_sets, data_names);
+  *out_str = eval_str.c_str();
+  API_END();
+}
+
+int XGBoosterPredict(BoosterHandle handle,
+                     DMatrixHandle dmat,
+                     int option_mask,
+                     unsigned ntree_limit,
+                     bst_ulong *len,
+                     const float **out_result) {
+  std::vector<float>& preds = XGBAPIThreadLocalStore::Get()->ret_vec_float;
+  API_BEGIN();
+  Booster *bst = static_cast<Booster*>(handle);
+  bst->LazyInit();
+  bst->learner()->Predict(
+      static_cast<DMatrix*>(dmat),
+      (option_mask & 1) != 0,
+      &preds, ntree_limit,
+      (option_mask & 2) != 0);
+  *out_result = dmlc::BeginPtr(preds);
+  *len = static_cast<bst_ulong>(preds.size());
+  API_END();
+}
+
+int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
+  API_BEGIN();
+  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname, "r"));
+  static_cast<Booster*>(handle)->LoadModel(fi.get());
+  API_END();
+}
+
+int XGBoosterSaveModel(BoosterHandle handle, const char* fname) {
+  API_BEGIN();
+  std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname, "w"));
+  Booster *bst = static_cast<Booster*>(handle);
+  bst->LazyInit();
+  bst->learner()->Save(fo.get());
+  API_END();
+}
+
+int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
+                                 const void* buf,
+                                 bst_ulong len) {
+  API_BEGIN();
+  common::MemoryFixSizeBuffer fs((void*)buf, len);  // NOLINT(*)
+  static_cast<Booster*>(handle)->LoadModel(&fs);
+  API_END();
+}
+
+int XGBoosterGetModelRaw(BoosterHandle handle,
+                         bst_ulong* out_len,
+                         const char** out_dptr) {
+  std::string& raw_str = XGBAPIThreadLocalStore::Get()->ret_str;
+  raw_str.resize(0);
+
+  API_BEGIN();
+  common::MemoryBufferStream fo(&raw_str);
+  Booster *bst = static_cast<Booster*>(handle);
+  bst->LazyInit();
+  bst->learner()->Save(&fo);
+  *out_dptr = dmlc::BeginPtr(raw_str);
+  *out_len = static_cast<bst_ulong>(raw_str.length());
+  API_END();
+}
+
+inline void XGBoostDumpModelImpl(
+    BoosterHandle handle,
+    const FeatureMap& fmap,
+    int with_stats,
+    bst_ulong* len,
+    const char*** out_models) {
+  std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
+  std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
+  Booster *bst = static_cast<Booster*>(handle);
+  bst->LazyInit();
+  str_vecs = bst->learner()->Dump2Text(fmap, with_stats != 0);
+  charp_vecs.resize(str_vecs.size());
+  for (size_t i = 0; i < str_vecs.size(); ++i) {
+    charp_vecs[i] = str_vecs[i].c_str();
+  }
+  *out_models = dmlc::BeginPtr(charp_vecs);
+  *len = static_cast<bst_ulong>(charp_vecs.size());
+}
+int XGBoosterDumpModel(BoosterHandle handle,
+                       const char* fmap,
+                       int with_stats,
+                       bst_ulong* len,
+                       const char*** out_models) {
+  API_BEGIN();
+  FeatureMap featmap;
+  if (strlen(fmap) != 0) {
+    std::unique_ptr<dmlc::Stream> fs(
+        dmlc::Stream::Create(fmap, "r"));
+    dmlc::istream is(fs.get());
+    featmap.LoadText(is);
+  }
+  XGBoostDumpModelImpl(handle, featmap, with_stats, len, out_models);
+  API_END();
+}
+
+int XGBoosterDumpModelWithFeatures(BoosterHandle handle,
+                                   int fnum,
+                                   const char** fname,
+                                   const char** ftype,
+                                   int with_stats,
+                                   bst_ulong* len,
+                                   const char*** out_models) {
+  API_BEGIN();
+  FeatureMap featmap;
+  for (int i = 0; i < fnum; ++i) {
+    featmap.PushBack(i, fname[i], ftype[i]);
+  }
+  XGBoostDumpModelImpl(handle, featmap, with_stats, len, out_models);
+  API_END();
+}
--- a/src/c_api/c_api_error.cc
+++ b/src/c_api/c_api_error.cc
@@ -0,0 +1,21 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file c_api_error.cc
+ * \brief C error handling
+ */
+#include "./c_api_error.h"
+#include "../common/thread_local.h"
+
+struct XGBAPIErrorEntry {
+  std::string last_error;
+};
+
+typedef xgboost::common::ThreadLocalStore<XGBAPIErrorEntry> XGBAPIErrorStore;
+
+const char *XGBGetLastError() {
+  return XGBAPIErrorStore::Get()->last_error.c_str();
+}
+
+void XGBAPISetLastError(const char* msg) {
+  XGBAPIErrorStore::Get()->last_error = msg;
+}
--- a/src/c_api/c_api_error.h
+++ b/src/c_api/c_api_error.h
@@ -0,0 +1,39 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file c_api_error.h
+ * \brief Error handling for C API.
+ */
+#ifndef XGBOOST_C_API_C_API_ERROR_H_
+#define XGBOOST_C_API_C_API_ERROR_H_
+
+#include <dmlc/base.h>
+#include <dmlc/logging.h>
+#include <xgboost/c_api.h>
+
+/*! \brief  macro to guard beginning and end section of all functions */
+#define API_BEGIN() try {
+/*! \brief every function starts with API_BEGIN();
+     and finishes with API_END() or API_END_HANDLE_ERROR */
+#define API_END() } catch(dmlc::Error &_except_) { return XGBAPIHandleException(_except_); } return 0;  // NOLINT(*)
+/*!
+ * \brief every function starts with API_BEGIN();
+ *   and finishes with API_END() or API_END_HANDLE_ERROR
+ *   The finally clause contains procedure to cleanup states when an error happens.
+ */
+#define API_END_HANDLE_ERROR(Finalize) } catch(dmlc::Error &_except_) { Finalize; return XGBAPIHandleException(_except_); } return 0; // NOLINT(*)
+
+/*!
+ * \brief Set the last error message needed by C API
+ * \param msg The error message to set.
+ */
+void XGBAPISetLastError(const char* msg);
+/*!
+ * \brief handle exception throwed out
+ * \param e the exception
+ * \return the return value of API after exception is handled
+ */
+inline int XGBAPIHandleException(const dmlc::Error &e) {
+  XGBAPISetLastError(e.what());
+  return -1;
+}
+#endif  // XGBOOST_C_API_C_API_ERROR_H_
--- a/src/cli_main.cc
+++ b/src/cli_main.cc
@@ -11,8 +11,9 @@

 #include <xgboost/learner.h>
 #include <xgboost/data.h>
-#include <dmlc/logging.h>
+#include <xgboost/logging.h>
 #include <dmlc/timer.h>
+#include <iomanip>
 #include <ctime>
 #include <string>
 #include <cstdio>
@@ -107,6 +108,8 @@ struct CLIParam : public dmlc::Parameter<CLIParam> {
        .describe("Data split mode.");
    DMLC_DECLARE_FIELD(ntree_limit).set_default(0).set_lower_bound(0)
        .describe("Number of trees used for prediction, 0 means use all trees.");
+    DMLC_DECLARE_FIELD(pred_margin).set_default(false)
+        .describe("Whether to predict margin value instead of probability.");
    DMLC_DECLARE_FIELD(dump_stats).set_default(false)
        .describe("Whether dump the model statistics.");
    DMLC_DECLARE_FIELD(name_fmap).set_default("NULL")
@@ -115,7 +118,8 @@ struct CLIParam : public dmlc::Parameter<CLIParam> {
        .describe("Name of the output dump text file.");
    // alias
    DMLC_DECLARE_ALIAS(train_path, data);
-    DMLC_DECLARE_ALIAS(test_path, "test:data");
+    DMLC_DECLARE_ALIAS(test_path, test:data);
+    DMLC_DECLARE_ALIAS(name_fmap, fmap);
  }
  // customized configure function of CLIParam
  inline void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) {
@@ -149,7 +153,7 @@ DMLC_REGISTER_PARAMETER(CLIParam);
 void CLITrain(const CLIParam& param) {
  if (rabit::IsDistributed()) {
    std::string pname = rabit::GetProcessorName();
-    LOG(INFO) << "start " << pname << ":" << rabit::GetRank();
+    LOG(CONSOLE) << "start " << pname << ":" << rabit::GetRank();
  }
  // load in data.
  std::unique_ptr<DMatrix> dtrain(
@@ -178,6 +182,8 @@ void CLITrain(const CLIParam& param) {
      std::unique_ptr<dmlc::Stream> fi(
          dmlc::Stream::Create(param.model_in.c_str(), "r"));
      learner->Load(fi.get());
+    } else {
+      learner->InitModel();
    }
  }
  // start training.
@@ -186,7 +192,7 @@ void CLITrain(const CLIParam& param) {
    double elapsed = dmlc::GetTime() - start;
    if (version % 2 == 0) {
      if (param.silent == 0) {
-        LOG(INFO) << "boosting round " << i << ", " << elapsed << " sec elapsed";
+        LOG(CONSOLE) << "boosting round " << i << ", " << elapsed << " sec elapsed";
      }
      learner->UpdateOneIter(i, dtrain.get());
      if (learner->AllowLazyCheckPoint()) {
@@ -200,16 +206,18 @@ void CLITrain(const CLIParam& param) {
    std::string res = learner->EvalOneIter(i, eval_datasets, eval_data_names);
    if (rabit::IsDistributed()) {
      if (rabit::GetRank() == 0) {
-        rabit::TrackerPrint(res + "\n");
+        LOG(TRACKER) << res;
      }
    } else {
      if (param.silent < 2) {
-        LOG(INFO) << res;
+        LOG(CONSOLE) << res;
      }
    }
    if (param.save_period != 0 && (i + 1) % param.save_period == 0) {
      std::ostringstream os;
-      os << param.model_dir << '/' << i + 1 << ".model";
+      os << param.model_dir << '/'
+         << std::setfill('0') << std::setw(4)
+         << i + 1 << ".model";
      std::unique_ptr<dmlc::Stream> fo(
          dmlc::Stream::Create(os.str().c_str(), "w"));
      learner->Save(fo.get());
@@ -228,7 +236,9 @@ void CLITrain(const CLIParam& param) {
      param.model_out != "NONE") {
    std::ostringstream os;
    if (param.model_out == "NULL") {
-      os << param.model_dir << '/' << param.num_round << ".model";
+      os << param.model_dir << '/'
+         << std::setfill('0') << std::setw(4)
+         << param.num_round << ".model";
    } else {
      os << param.model_out;
    }
@@ -239,7 +249,7 @@ void CLITrain(const CLIParam& param) {

  if (param.silent == 0) {
    double elapsed = dmlc::GetTime() - start;
-    LOG(INFO) << "update end, " << elapsed << " sec in all";
+    LOG(CONSOLE) << "update end, " << elapsed << " sec in all";
  }
 }

@@ -272,6 +282,8 @@ void CLIDump2Text(const CLIParam& param) {
 }

 void CLIPredict(const CLIParam& param) {
+  CHECK_NE(param.test_path, "NULL")
+      << "Test dataset parameter test:data must be specified.";
  // load data
  std::unique_ptr<DMatrix> dtest(
      DMatrix::Load(param.test_path, param.silent != 0, param.dsplit == 2));
@@ -284,12 +296,12 @@ void CLIPredict(const CLIParam& param) {
  learner->Load(fi.get());

  if (param.silent == 0) {
-    LOG(INFO) << "start prediction...";
+    LOG(CONSOLE) << "start prediction...";
  }
  std::vector<float> preds;
  learner->Predict(dtest.get(), param.pred_margin, &preds, param.ntree_limit);
  if (param.silent == 0) {
-    LOG(INFO) << "writing prediction to " << param.name_pred;
+    LOG(CONSOLE) << "writing prediction to " << param.name_pred;
  }
  std::unique_ptr<dmlc::Stream> fo(
      dmlc::Stream::Create(param.name_pred.c_str(), "w"));
--- a/src/common/base64.h
+++ b/src/common/base64.h
@@ -8,7 +8,7 @@
 #ifndef XGBOOST_COMMON_BASE64_H_
 #define XGBOOST_COMMON_BASE64_H_

-#include <dmlc/logging.h>
+#include <xgboost/logging.h>
 #include <cctype>
 #include <cstdio>
 #include <string>
--- a/src/common/common.cc
+++ b/src/common/common.cc
@@ -0,0 +1,15 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file common.cc
+ * \brief Enable all kinds of global variables in common.
+ */
+#include "./random.h"
+
+namespace xgboost {
+namespace common {
+RandomEngine& GlobalRandom() {
+  static RandomEngine inst;
+  return inst;
+}
+}
+}  // namespace xgboost
--- a/src/common/quantile.h
+++ b/src/common/quantile.h
@@ -8,7 +8,7 @@
 #define XGBOOST_COMMON_QUANTILE_H_

 #include <dmlc/base.h>
-#include <dmlc/logging.h>
+#include <xgboost/logging.h>
 #include <cmath>
 #include <vector>
 #include <cstring>
--- a/src/common/thread_local.h
+++ b/src/common/thread_local.h
@@ -0,0 +1,77 @@
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file thread_local.h
+ * \brief Common utility for thread local storage.
+ */
+#ifndef XGBOOST_COMMON_THREAD_LOCAL_H_
+#define XGBOOST_COMMON_THREAD_LOCAL_H_
+
+#include <mutex>
+#include <memory>
+#include <vector>
+
+namespace xgboost {
+namespace common {
+
+// macro hanlding for threadlocal variables
+#ifdef __GNUC__
+  #define MX_TREAD_LOCAL __thread
+#elif __STDC_VERSION__ >= 201112L
+  #define  MX_TREAD_LOCAL _Thread_local
+#elif defined(_MSC_VER)
+  #define MX_TREAD_LOCAL __declspec(thread)
+#endif
+
+#ifndef MX_TREAD_LOCAL
+#message("Warning: Threadlocal is not enabled");
+#endif
+
+/*!
+ * \brief A threadlocal store to store threadlocal variables.
+ *  Will return a thread local singleton of type T
+ * \tparam T the type we like to store
+ */
+template<typename T>
+class ThreadLocalStore {
+ public:
+  /*! \return get a thread local singleton */
+  static T* Get() {
+    static MX_TREAD_LOCAL T* ptr = nullptr;
+    if (ptr == nullptr) {
+      ptr = new T();
+      Singleton()->RegisterDelete(ptr);
+    }
+    return ptr;
+  }
+
+ private:
+  /*! \brief constructor */
+  ThreadLocalStore() {}
+  /*! \brief destructor */
+  ~ThreadLocalStore() {
+    for (size_t i = 0; i < data_.size(); ++i) {
+      delete data_[i];
+    }
+  }
+  /*! \return singleton of the store */
+  static ThreadLocalStore<T> *Singleton() {
+    static ThreadLocalStore<T> inst;
+    return &inst;
+  }
+  /*!
+   * \brief register str for internal deletion
+   * \param str the string pointer
+   */
+  void RegisterDelete(T *str) {
+    std::unique_lock<std::mutex> lock(mutex_);
+    data_.push_back(str);
+    lock.unlock();
+  }
+  /*! \brief internal mutex */
+  std::mutex mutex_;
+  /*!\brief internal data */
+  std::vector<T*> data_;
+};
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_THREAD_LOCAL_H_
--- a/src/data/data.cc
+++ b/src/data/data.cc
@@ -3,7 +3,12 @@
 * \file data.cc
 */
 #include <xgboost/data.h>
+#include <xgboost/logging.h>
 #include <cstring>
+#include "./sparse_batch_page.h"
+#include "./simple_dmatrix.h"
+#include "./simple_csr_source.h"
+#include "../common/io.h"

 namespace xgboost {
 // implementation of inline functions
@@ -83,4 +88,83 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t
  }
 }

+
+DMatrix* DMatrix::Load(const std::string& uri,
+                       bool silent,
+                       bool load_row_split,
+                       const std::string& file_format) {
+  std::string fname, cache_file;
+  size_t dlm_pos = uri.find('#');
+  if (dlm_pos != std::string::npos) {
+    cache_file = uri.substr(dlm_pos + 1, uri.length());
+    fname = uri.substr(0, dlm_pos);
+    CHECK_EQ(cache_file.find('#'), std::string::npos)
+        << "Only one `#` is allowed in file path for cache file specification.";
+    if (load_row_split) {
+      std::ostringstream os;
+      os << cache_file << ".r" << rabit::GetRank();
+      cache_file = os.str();
+    }
+  } else {
+    fname = uri;
+  }
+  int partid = 0, npart = 1;
+  if (load_row_split) {
+    partid = rabit::GetRank();
+    npart = rabit::GetWorldSize();
+  }
+
+  // legacy handling of binary data loading
+  if (file_format == "auto" && !load_row_split) {
+    int magic;
+    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
+    common::PeekableInStream is(fi.get());
+     if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic) &&
+         magic == data::SimpleCSRSource::kMagic) {
+       std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+       source->LoadBinary(&is);
+       DMatrix* dmat = DMatrix::Create(std::move(source), cache_file);
+       if (!silent) {
+         LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with "
+                      << dmat->info().num_nonzero << " entries loaded from " << uri;
+       }
+       return dmat;
+     }
+  }
+
+  std::string ftype = file_format;
+  if (file_format == "auto") ftype = "libsvm";
+  std::unique_ptr<dmlc::Parser<uint32_t> > parser(
+      dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, ftype.c_str()));
+  DMatrix* dmat = DMatrix::Create(parser.get(), cache_file);
+  if (!silent) {
+    LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with "
+                 << dmat->info().num_nonzero << " entries loaded from " << uri;
+  }
+  return dmat;
+}
+
+DMatrix* DMatrix::Create(dmlc::Parser<uint32_t>* parser,
+                         const std::string& cache_prefix) {
+  if (cache_prefix.length() == 0) {
+    std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
+    source->CopyFrom(parser);
+    return DMatrix::Create(std::move(source), cache_prefix);
+  } else {
+    LOG(FATAL) << "external memory not yet implemented";
+    return nullptr;
+  }
+}
+
+void DMatrix::SaveToLocalFile(const std::string& fname) {
+  data::SimpleCSRSource source;
+  source.CopyFrom(this);
+  std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
+  source.SaveBinary(fo.get());
+}
+
+DMatrix* DMatrix::Create(std::unique_ptr<DataSource>&& source,
+                         const std::string& cache_prefix) {
+  return new data::SimpleDMatrix(std::move(source));
+}
 }  // namespace xgboost
--- a/src/data/simple_csr_source.cc
+++ b/src/data/simple_csr_source.cc
@@ -3,7 +3,7 @@
 * \file simple_csr_source.cc
 */
 #include <dmlc/base.h>
-#include <dmlc/logging.h>
+#include <xgboost/logging.h>
 #include "./simple_csr_source.h"

 namespace xgboost {
@@ -80,7 +80,7 @@ void SimpleCSRSource::SaveBinary(dmlc::Stream* fo) const {
 }

 void SimpleCSRSource::BeforeFirst() {
-  at_first_ = false;
+  at_first_ = true;
 }

 bool SimpleCSRSource::Next() {
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@@ -0,0 +1,265 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file simple_dmatrix.cc
+ * \brief the input data structure for gradient boosting
+ * \author Tianqi Chen
+ */
+#include <xgboost/data.h>
+#include <limits>
+#include <algorithm>
+#include <vector>
+#include "./simple_dmatrix.h"
+#include "../common/random.h"
+#include "../common/group_data.h"
+
+namespace xgboost {
+namespace data {
+
+bool SimpleDMatrix::ColBatchIter::Next() {
+  if (data_ptr_ >= cpages_.size()) return false;
+  data_ptr_ += 1;
+  SparsePage* pcol = cpages_[data_ptr_ - 1].get();
+  batch_.size = col_index_.size();
+  col_data_.resize(col_index_.size(), SparseBatch::Inst(NULL, 0));
+  for (size_t i = 0; i < col_data_.size(); ++i) {
+    const bst_uint ridx = col_index_[i];
+    col_data_[i] = SparseBatch::Inst
+        (dmlc::BeginPtr(pcol->data) + pcol->offset[ridx],
+         static_cast<bst_uint>(pcol->offset[ridx + 1] - pcol->offset[ridx]));
+  }
+  batch_.col_index = dmlc::BeginPtr(col_index_);
+  batch_.col_data = dmlc::BeginPtr(col_data_);
+  return true;
+}
+
+dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator() {
+  size_t ncol = this->info().num_col;
+  col_iter_.col_index_.resize(ncol);
+  for (size_t i = 0; i < ncol; ++i) {
+    col_iter_.col_index_[i] = static_cast<bst_uint>(i);
+  }
+  col_iter_.BeforeFirst();
+  return &col_iter_;
+}
+
+dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator(const std::vector<bst_uint>&fset) {
+  size_t ncol = this->info().num_col;
+  col_iter_.col_index_.resize(0);
+  for (size_t i = 0; i < fset.size(); ++i) {
+    if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]);
+  }
+  col_iter_.BeforeFirst();
+  return &col_iter_;
+}
+
+void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
+                                  float pkeep,
+                                  size_t max_row_perbatch) {
+  if (this->HaveColAccess()) return;
+
+  col_iter_.cpages_.clear();
+  if (info().num_row < max_row_perbatch) {
+    std::unique_ptr<SparsePage> page(new SparsePage());
+    this->MakeOneBatch(enabled, pkeep, page.get());
+    col_iter_.cpages_.push_back(std::move(page));
+  } else {
+    this->MakeManyBatch(enabled, pkeep, max_row_perbatch);
+  }
+  // setup col-size
+  col_size_.resize(info().num_col);
+  std::fill(col_size_.begin(), col_size_.end(), 0);
+  for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) {
+    SparsePage *pcol = col_iter_.cpages_[i].get();
+    for (size_t j = 0; j < pcol->Size(); ++j) {
+      col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];
+    }
+  }
+}
+
+// internal function to make one batch from row iter.
+void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled,
+                                 float pkeep,
+                                 SparsePage *pcol) {
+  // clear rowset
+  buffered_rowset_.clear();
+  // bit map
+  int nthread;
+  std::vector<bool> bmap;
+  #pragma omp parallel
+  {
+    nthread = omp_get_num_threads();
+  }
+
+  pcol->Clear();
+  common::ParallelGroupBuilder<SparseBatch::Entry>
+      builder(&pcol->offset, &pcol->data);
+  builder.InitBudget(info().num_col, nthread);
+  // start working
+  dmlc::DataIter<RowBatch>* iter = this->RowIterator();
+  iter->BeforeFirst();
+  while (iter->Next()) {
+    const RowBatch& batch = iter->Value();
+    bmap.resize(bmap.size() + batch.size, true);
+    std::bernoulli_distribution coin_flip(pkeep);
+    auto& rnd = common::GlobalRandom();
+
+    long batch_size = static_cast<long>(batch.size); // NOLINT(*)
+    for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
+      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      if (pkeep == 1.0f || coin_flip(rnd)) {
+        buffered_rowset_.push_back(ridx);
+      } else {
+        bmap[i] = false;
+      }
+    }
+    #pragma omp parallel for schedule(static)
+    for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
+      int tid = omp_get_thread_num();
+      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      if (bmap[ridx]) {
+        RowBatch::Inst inst = batch[i];
+        for (bst_uint j = 0; j < inst.length; ++j) {
+          if (enabled[inst[j].index]) {
+            builder.AddBudget(inst[j].index, tid);
+          }
+        }
+      }
+    }
+  }
+  builder.InitStorage();
+
+  iter->BeforeFirst();
+  while (iter->Next()) {
+    const RowBatch& batch = iter->Value();
+    #pragma omp parallel for schedule(static)
+    for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
+      int tid = omp_get_thread_num();
+      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      if (bmap[ridx]) {
+        RowBatch::Inst inst = batch[i];
+        for (bst_uint j = 0; j < inst.length; ++j) {
+          if (enabled[inst[j].index]) {
+            builder.Push(inst[j].index,
+                         SparseBatch::Entry((bst_uint)(batch.base_rowid+i),
+                                            inst[j].fvalue), tid);
+          }
+        }
+      }
+    }
+  }
+
+  CHECK_EQ(pcol->Size(), info().num_col);
+  // sort columns
+  bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
+  #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
+  for (bst_omp_uint i = 0; i < ncol; ++i) {
+    if (pcol->offset[i] < pcol->offset[i + 1]) {
+      std::sort(dmlc::BeginPtr(pcol->data) + pcol->offset[i],
+                dmlc::BeginPtr(pcol->data) + pcol->offset[i + 1],
+                SparseBatch::Entry::CmpValue);
+    }
+  }
+}
+
+void SimpleDMatrix::MakeManyBatch(const std::vector<bool>& enabled,
+                                  float pkeep,
+                                  size_t max_row_perbatch) {
+  size_t btop = 0;
+  std::bernoulli_distribution coin_flip(pkeep);
+  auto& rnd = common::GlobalRandom();
+  buffered_rowset_.clear();
+  // internal temp cache
+  SparsePage tmp; tmp.Clear();
+  // start working
+  dmlc::DataIter<RowBatch>* iter = this->RowIterator();
+  iter->BeforeFirst();
+
+  while (iter->Next()) {
+    const RowBatch &batch = iter->Value();
+    for (size_t i = 0; i < batch.size; ++i) {
+      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      if (pkeep == 1.0f || coin_flip(rnd)) {
+        buffered_rowset_.push_back(ridx);
+        tmp.Push(batch[i]);
+      }
+      if (tmp.Size() >= max_row_perbatch) {
+        std::unique_ptr<SparsePage> page(new SparsePage());
+        this->MakeColPage(tmp.GetRowBatch(0),
+                          dmlc::BeginPtr(buffered_rowset_) + btop,
+                          enabled, page.get());
+        col_iter_.cpages_.push_back(std::move(page));
+        btop = buffered_rowset_.size();
+        tmp.Clear();
+      }
+    }
+  }
+
+  if (tmp.Size() != 0) {
+    std::unique_ptr<SparsePage> page(new SparsePage());
+    this->MakeColPage(tmp.GetRowBatch(0),
+                      dmlc::BeginPtr(buffered_rowset_) + btop,
+                      enabled, page.get());
+    col_iter_.cpages_.push_back(std::move(page));
+  }
+}
+
+// make column page from subset of rowbatchs
+void SimpleDMatrix::MakeColPage(const RowBatch& batch,
+                                const bst_uint* ridx,
+                                const std::vector<bool>& enabled,
+                                SparsePage* pcol) {
+  int nthread;
+  #pragma omp parallel
+  {
+    nthread = omp_get_num_threads();
+    int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
+    if (nthread > max_nthread) {
+      nthread = max_nthread;
+    }
+  }
+  pcol->Clear();
+  common::ParallelGroupBuilder<SparseBatch::Entry>
+      builder(&pcol->offset, &pcol->data);
+  builder.InitBudget(info().num_col, nthread);
+  bst_omp_uint ndata = static_cast<bst_uint>(batch.size);
+  #pragma omp parallel for schedule(static) num_threads(nthread)
+  for (bst_omp_uint i = 0; i < ndata; ++i) {
+    int tid = omp_get_thread_num();
+    RowBatch::Inst inst = batch[i];
+    for (bst_uint j = 0; j < inst.length; ++j) {
+      const SparseBatch::Entry &e = inst[j];
+      if (enabled[e.index]) {
+        builder.AddBudget(e.index, tid);
+      }
+    }
+  }
+  builder.InitStorage();
+  #pragma omp parallel for schedule(static) num_threads(nthread)
+  for (bst_omp_uint i = 0; i < ndata; ++i) {
+    int tid = omp_get_thread_num();
+    RowBatch::Inst inst = batch[i];
+    for (bst_uint j = 0; j < inst.length; ++j) {
+      const SparseBatch::Entry &e = inst[j];
+      builder.Push(e.index,
+                   SparseBatch::Entry(ridx[i], e.fvalue),
+                   tid);
+    }
+  }
+  CHECK_EQ(pcol->Size(), info().num_col);
+  // sort columns
+  bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
+  #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
+  for (bst_omp_uint i = 0; i < ncol; ++i) {
+    if (pcol->offset[i] < pcol->offset[i + 1]) {
+      std::sort(dmlc::BeginPtr(pcol->data) + pcol->offset[i],
+                dmlc::BeginPtr(pcol->data) + pcol->offset[i + 1],
+                SparseBatch::Entry::CmpValue);
+    }
+  }
+}
+
+bool SimpleDMatrix::SingleColBlock() const {
+  return col_iter_.cpages_.size() <= 1;
+}
+}  // namespace data
+}  // namespace xgboost
--- a/src/data/simple_dmatrix.h
+++ b/src/data/simple_dmatrix.h
@@ -0,0 +1,119 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file simple_dmatrix.h
+ * \brief In-memory version of DMatrix.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_DATA_SIMPLE_DMATRIX_H_
+#define XGBOOST_DATA_SIMPLE_DMATRIX_H_
+
+#include <xgboost/base.h>
+#include <xgboost/data.h>
+#include <vector>
+#include <algorithm>
+#include <cstring>
+#include "./sparse_batch_page.h"
+
+namespace xgboost {
+namespace data {
+
+class SimpleDMatrix : public DMatrix {
+ public:
+  explicit SimpleDMatrix(std::unique_ptr<DataSource>&& source)
+      : source_(std::move(source)) {}
+
+  MetaInfo& info() override {
+    return source_->info;
+  }
+
+  const MetaInfo& info() const override {
+    return source_->info;
+  }
+
+  dmlc::DataIter<RowBatch>* RowIterator() override {
+    dmlc::DataIter<RowBatch>* iter = source_.get();
+    iter->BeforeFirst();
+    return iter;
+  }
+
+  bool HaveColAccess() const override {
+    return col_size_.size() != 0;
+  }
+
+  const std::vector<bst_uint>& buffered_rowset() const override {
+    return buffered_rowset_;
+  }
+
+  size_t GetColSize(size_t cidx) const {
+    return col_size_[cidx];
+  }
+
+  float GetColDensity(size_t cidx) const override {
+    size_t nmiss = buffered_rowset_.size() - col_size_[cidx];
+    return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
+  }
+
+  dmlc::DataIter<ColBatch>* ColIterator() override;
+
+  dmlc::DataIter<ColBatch>* ColIterator(const std::vector<bst_uint>& fset) override;
+
+  void InitColAccess(const std::vector<bool>& enabled,
+                     float subsample,
+                     size_t max_row_perbatch) override;
+
+  bool SingleColBlock() const override;
+
+ private:
+  // in-memory column batch iterator.
+  struct ColBatchIter: dmlc::DataIter<ColBatch> {
+   public:
+    ColBatchIter() : data_ptr_(0) {}
+    void BeforeFirst() override {
+      data_ptr_ = 0;
+    }
+    const ColBatch &Value() const override {
+      return batch_;
+    }
+    bool Next() override;
+
+   private:
+    // allow SimpleDMatrix to access it.
+    friend class SimpleDMatrix;
+    // data content
+    std::vector<bst_uint> col_index_;
+    // column content
+    std::vector<ColBatch::Inst> col_data_;
+    // column sparse pages
+    std::vector<std::unique_ptr<SparsePage> > cpages_;
+    // data pointer
+    size_t data_ptr_;
+    // temporal space for batch
+    ColBatch batch_;
+  };
+
+  // source data pointer.
+  std::unique_ptr<DataSource> source_;
+  // column iterator
+  ColBatchIter col_iter_;
+  // list of row index that are buffered.
+  std::vector<bst_uint> buffered_rowset_;
+  /*! \brief sizeof column data */
+  std::vector<size_t> col_size_;
+
+  // internal function to make one batch from row iter.
+  void MakeOneBatch(const std::vector<bool>& enabled,
+                    float pkeep,
+                    SparsePage *pcol);
+
+  void MakeManyBatch(const std::vector<bool>& enabled,
+                     float pkeep,
+                     size_t max_row_perbatch);
+
+  void MakeColPage(const RowBatch& batch,
+                   const bst_uint* ridx,
+                   const std::vector<bool>& enabled,
+                   SparsePage* pcol);
+};
+}  // namespace data
+}  // namespace xgboost
+#endif  // XGBOOST_DATA_SIMPLE_DMATRIX_H_
--- a/old_src/io/sparse_batch_page.h
+++ b/old_src/io/sparse_batch_page.h
@@ -6,17 +6,18 @@
 *   use in external memory computation
 * \author Tianqi Chen
 */
-#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
-#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
+#ifndef XGBOOST_DATA_SPARSE_BATCH_PAGE_H_
+#define XGBOOST_DATA_SPARSE_BATCH_PAGE_H_

+#include <xgboost/data.h>
+#include <dmlc/io.h>
 #include <vector>
 #include <algorithm>
-#include "../data.h"

 namespace xgboost {
-namespace io {
+namespace data {
 /*!
- * \brief storage unit of sparse batch
+ * \brief in-memory storage unit of sparse batch
 */
 class SparsePage {
 public:
@@ -24,6 +25,7 @@ class SparsePage {
  std::vector<size_t> offset;
  /*! \brief the data of the segments */
  std::vector<SparseBatch::Entry> data;
+
  /*! \brief constructor */
  SparsePage() {
    this->Clear();
@@ -38,14 +40,14 @@ class SparsePage {
   * \param sorted_index_set sorted index of segments we are interested in
   * \return true of the loading as successful, false if end of file was reached
   */
-  inline bool Load(utils::ISeekStream *fi,
+  inline bool Load(dmlc::SeekStream *fi,
                   const std::vector<bst_uint> &sorted_index_set) {
    if (!fi->Read(&disk_offset_)) return false;
    // setup the offset
    offset.clear(); offset.push_back(0);
    for (size_t i = 0; i < sorted_index_set.size(); ++i) {
      bst_uint fid = sorted_index_set[i];
-      utils::Check(fid + 1 < disk_offset_.size(), "bad col.blob format");
+      CHECK_LT(fid + 1, disk_offset_.size());
      size_t size = disk_offset_[fid + 1] - disk_offset_[fid];
      offset.push_back(offset.back() + size);
    }
@@ -56,7 +58,7 @@ class SparsePage {
    for (size_t i = 0; i < sorted_index_set.size();) {
      bst_uint fid = sorted_index_set[i];
      if (disk_offset_[fid] != curr_offset) {
-        utils::Assert(disk_offset_[fid] > curr_offset, "fset index was not sorted");
+        CHECK_GT(disk_offset_[fid], curr_offset);
        fi->Seek(begin + disk_offset_[fid] * sizeof(SparseBatch::Entry));
        curr_offset = disk_offset_[fid];
      }
@@ -68,10 +70,12 @@ class SparsePage {
          break;
        }
      }
+
      if (size_to_read != 0) {
-        utils::Check(fi->Read(BeginPtr(data) + offset[i],
-                              size_to_read * sizeof(SparseBatch::Entry)) != 0,
-                     "Invalid SparsePage file");
+        CHECK_EQ(fi->Read(dmlc::BeginPtr(data) + offset[i],
+                          size_to_read * sizeof(SparseBatch::Entry)),
+                 size_to_read * sizeof(SparseBatch::Entry))
+            << "Invalid SparsePage file";
        curr_offset += size_to_read;
      }
      i = j;
@@ -87,13 +91,14 @@ class SparsePage {
   * \param fi the input stream of the file
   * \return true of the loading as successful, false if end of file was reached
   */
-  inline bool Load(utils::IStream *fi) {
+  inline bool Load(dmlc::Stream *fi) {
    if (!fi->Read(&offset)) return false;
-    utils::Check(offset.size() != 0, "Invalid SparsePage file");
+    CHECK_NE(offset.size(), 0) << "Invalid SparsePage file";
    data.resize(offset.back());
    if (data.size() != 0) {
-      utils::Check(fi->Read(BeginPtr(data), data.size() * sizeof(SparseBatch::Entry)) != 0,
-                   "Invalid SparsePage file");
+      CHECK_EQ(fi->Read(dmlc::BeginPtr(data), data.size() * sizeof(SparseBatch::Entry)),
+               data.size() * sizeof(SparseBatch::Entry))
+          << "Invalid SparsePage file";
    }
    return true;
  }
@@ -102,12 +107,12 @@ class SparsePage {
   *    to disk it must contain all the elements in the
   * \param fo output stream
   */
-  inline void Save(utils::IStream *fo) const {
-    utils::Assert(offset.size() != 0 && offset[0] == 0, "bad offset");
-    utils::Assert(offset.back() == data.size(), "in consistent SparsePage");
+  inline void Save(dmlc::Stream *fo) const {
+    CHECK(offset.size() != 0 && offset[0] == 0);
+    CHECK_EQ(offset.back(), data.size());
    fo->Write(offset);
    if (data.size() != 0) {
-      fo->Write(BeginPtr(data), data.size() * sizeof(SparseBatch::Entry));
+      fo->Write(dmlc::BeginPtr(data), data.size() * sizeof(SparseBatch::Entry));
    }
  }
  /*! \return estimation of memory cost of this page */
@@ -125,13 +130,14 @@ class SparsePage {
   * \param fi the input stream of the file
   * \return true of the loading as successful, false if end of file was reached
   */
-  inline bool PushLoad(utils::IStream *fi) {
+  inline bool PushLoad(dmlc::Stream *fi) {
    if (!fi->Read(&disk_offset_)) return false;
    data.resize(offset.back() + disk_offset_.back());
    if (disk_offset_.back() != 0) {
-      utils::Check(fi->Read(BeginPtr(data) + offset.back(),
-                            disk_offset_.back() * sizeof(SparseBatch::Entry)) != 0,
-                   "Invalid SparsePage file");
+      CHECK_EQ(fi->Read(dmlc::BeginPtr(data) + offset.back(),
+                        disk_offset_.back() * sizeof(SparseBatch::Entry)),
+               disk_offset_.back() * sizeof(SparseBatch::Entry))
+          << "Invalid SparsePage file";
    }
    size_t top = offset.back();
    size_t begin = offset.size();
@@ -147,7 +153,7 @@ class SparsePage {
   */
  inline void Push(const RowBatch &batch) {
    data.resize(offset.back() + batch.ind_ptr[batch.size]);
-    std::memcpy(BeginPtr(data) + offset.back(),
+    std::memcpy(dmlc::BeginPtr(data) + offset.back(),
                batch.data_ptr + batch.ind_ptr[0],
                sizeof(SparseBatch::Entry) * batch.ind_ptr[batch.size]);
    size_t top = offset.back();
@@ -164,8 +170,8 @@ class SparsePage {
  inline void Push(const SparsePage &batch) {
    size_t top = offset.back();
    data.resize(top + batch.data.size());
-    std::memcpy(BeginPtr(data) + top,
-                BeginPtr(batch.data),
+    std::memcpy(dmlc::BeginPtr(data) + top,
+                dmlc::BeginPtr(batch.data),
                sizeof(SparseBatch::Entry) * batch.data.size());
    size_t begin = offset.size();
    offset.resize(begin + batch.Size());
@@ -182,7 +188,7 @@ class SparsePage {
    size_t begin = data.size();
    data.resize(begin + inst.length);
    if (inst.length != 0) {
-      std::memcpy(BeginPtr(data) + begin, inst.data,
+      std::memcpy(dmlc::BeginPtr(data) + begin, inst.data,
                  sizeof(SparseBatch::Entry) * inst.length);
    }
  }
@@ -193,8 +199,8 @@ class SparsePage {
  inline RowBatch GetRowBatch(size_t base_rowid) const {
    RowBatch out;
    out.base_rowid  = base_rowid;
-    out.ind_ptr = BeginPtr(offset);
-    out.data_ptr = BeginPtr(data);
+    out.ind_ptr = dmlc::BeginPtr(offset);
+    out.data_ptr = dmlc::BeginPtr(data);
    out.size = offset.size() - 1;
    return out;
  }
@@ -203,70 +209,6 @@ class SparsePage {
  /*! \brief external memory column offset */
  std::vector<size_t> disk_offset_;
 };
-/*!
- * \brief factory class for SparsePage,
- *        used in threadbuffer template
- */
-class SparsePageFactory {
- public:
-  SparsePageFactory(void)
-      : action_load_all_(true), set_load_all_(true) {}
-  inline void SetFile(const utils::FileStream &fi,
-                      size_t file_begin = 0) {
-    fi_ = fi;
-    file_begin_ = file_begin;
-  }
-  inline const std::vector<bst_uint> &index_set(void) const {
-    return action_index_set_;
-  }
-  // set index set, will be used after next before first
-  inline void SetIndexSet(const std::vector<bst_uint> &index_set,
-                          bool load_all) {
-    set_load_all_ = load_all;
-    if (!set_load_all_) {
-      set_index_set_ = index_set;
-      std::sort(set_index_set_.begin(), set_index_set_.end());
-    }
-  }
-  inline bool Init(void) {
-    return true;
-  }
-  inline void SetParam(const char *name, const char *val) {}
-  inline bool LoadNext(SparsePage *val) {
-    if (!action_load_all_) {
-      if (action_index_set_.size() == 0) {
-        return false;
-      } else {
-        return val->Load(&fi_, action_index_set_);
-      }
-    } else {
-      return val->Load(&fi_);
-    }
-  }
-  inline SparsePage *Create(void) {
-    return new SparsePage();
-  }
-  inline void FreeSpace(SparsePage *a) {
-    delete a;
-  }
-  inline void Destroy(void) {
-    fi_.Close();
-  }
-  inline void BeforeFirst(void) {
-    fi_.Seek(file_begin_);
-    action_load_all_ = set_load_all_;
-    if (!set_load_all_) {
-      action_index_set_ = set_index_set_;
-    }
-  }
-
- private:
-  bool action_load_all_, set_load_all_;
-  size_t file_begin_;
-  utils::FileStream fi_;
-  std::vector<bst_uint> action_index_set_;
-  std::vector<bst_uint> set_index_set_;
-};
-}  // namespace io
+}  // namespace data
 }  // namespace xgboost
-#endif  // XGBOOST_IO_SPARSE_BATCH_PAGE_H_
+#endif  // XGBOOST_DATA_SPARSE_BATCH_PAGE_H_
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -5,10 +5,10 @@
 *        the update rule is parallel coordinate descent (shotgun)
 * \author Tianqi Chen
 */
-#include <dmlc/logging.h>
 #include <dmlc/omp.h>
 #include <dmlc/parameter.h>
 #include <xgboost/gbm.h>
+#include <xgboost/logging.h>
 #include <vector>
 #include <string>
 #include <sstream>
@@ -17,6 +17,9 @@

 namespace xgboost {
 namespace gbm {
+
+DMLC_REGISTRY_FILE_TAG(gblinear);
+
 // model parameter
 struct GBLinearModelParam :public dmlc::Parameter<GBLinearModelParam> {
  // number of feature dimension
@@ -168,6 +171,9 @@ class GBLinear : public GradientBooster {
               int64_t buffer_offset,
               std::vector<float> *out_preds,
               unsigned ntree_limit) override {
+    if (model.weight.size() == 0) {
+      model.InitModel();
+    }
    CHECK_EQ(ntree_limit, 0)
        << "GBLinear::Predict ntrees is only valid for gbtree predictor";
    std::vector<float> &preds = *out_preds;
@@ -293,4 +299,3 @@ XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
  });
 }  // namespace gbm
 }  // namespace xgboost
-
--- a/src/gbm/gbm.cc
+++ b/src/gbm/gbm.cc
@@ -0,0 +1,29 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file gbm.cc
+ * \brief Registry of gradient boosters.
+ */
+#include <xgboost/gbm.h>
+#include <dmlc/registry.h>
+
+namespace dmlc {
+DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
+}  // namespace dmlc
+
+namespace xgboost {
+GradientBooster* GradientBooster::Create(const std::string& name) {
+  auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
+  if (e == nullptr) {
+    LOG(FATAL) << "Unknown gbm type " << name;
+  }
+  return (e->body)();
+}
+}  // namespace xgboost
+
+namespace xgboost {
+namespace gbm {
+// List of files that will be force linked in static links.
+DMLC_REGISTRY_LINK_TAG(gblinear);
+DMLC_REGISTRY_LINK_TAG(gbtree);
+}  // namespace gbm
+}  // namespace xgboost
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -4,9 +4,9 @@
 * \brief gradient boosted tree implementation.
 * \author Tianqi Chen
 */
-#include <dmlc/logging.h>
 #include <dmlc/omp.h>
 #include <dmlc/parameter.h>
+#include <xgboost/logging.h>
 #include <xgboost/gbm.h>
 #include <xgboost/tree_updater.h>

@@ -19,6 +19,8 @@
 namespace xgboost {
 namespace gbm {

+DMLC_REGISTRY_FILE_TAG(gbtree);
+
 /*! \brief training parameters */
 struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
  /*! \brief number of threads */
@@ -482,4 +484,3 @@ XGBOOST_REGISTER_GBM(GBTree, "gbtree")
  });
 }  // namespace gbm
 }  // namespace xgboost
-
--- a/src/global.cc
+++ b/src/global.cc
@@ -1,72 +0,0 @@
-/*!
- * Copyright 2015 by Contributors
- * \file global.cc
- * \brief Enable all kinds of global static registry and variables.
- */
-#include <xgboost/objective.h>
-#include <xgboost/metric.h>
-#include <xgboost/tree_updater.h>
-#include <xgboost/gbm.h>
-#include "./common/random.h"
-#include "./common/base64.h"
-
-namespace dmlc {
-DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);
-DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
-DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
-DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
-}  // namespace dmlc
-
-namespace xgboost {
-// implement factory functions
-ObjFunction* ObjFunction::Create(const std::string& name) {
-  auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(name);
-  if (e == nullptr) {
-    LOG(FATAL) << "Unknown objective function " << name;
-  }
-  return (e->body)();
-}
-
-Metric* Metric::Create(const std::string& name) {
-  std::string buf = name;
-  std::string prefix = name;
-  auto pos = buf.find('@');
-  if (pos == std::string::npos) {
-    auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(name);
-    if (e == nullptr) {
-      LOG(FATAL) << "Unknown objective function " << name;
-    }
-    return (e->body)(nullptr);
-  } else {
-    std::string prefix = buf.substr(0, pos);
-    auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(prefix.c_str());
-    if (e == nullptr) {
-      LOG(FATAL) << "Unknown objective function " << name;
-    }
-    return (e->body)(buf.substr(pos + 1, buf.length()).c_str());
-  }
-}
-
-TreeUpdater* TreeUpdater::Create(const std::string& name) {
-  auto *e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
-  if (e == nullptr) {
-    LOG(FATAL) << "Unknown tree updater " << name;
-  }
-  return (e->body)();
-}
-
-GradientBooster* GradientBooster::Create(const std::string& name) {
-  auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
-  if (e == nullptr) {
-    LOG(FATAL) << "Unknown gbm type " << name;
-  }
-  return (e->body)();
-}
-
-namespace common {
-RandomEngine& GlobalRandom() {
-  static RandomEngine inst;
-  return inst;
-}
-}
-}  // namespace xgboost
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -11,6 +11,7 @@
 #include <string>
 #include <sstream>
 #include <limits>
+#include <iomanip>
 #include "./common/io.h"
 #include "./common/random.h"

@@ -94,6 +95,9 @@ struct LearnerTrainParam
  }
 };

+DMLC_REGISTER_PARAMETER(LearnerModelParam);
+DMLC_REGISTER_PARAMETER(LearnerTrainParam);
+
 /*!
 * \brief learner that performs gradient boosting for a specific objective function.
 *  It does training and prediction.
@@ -144,6 +148,9 @@ class LearnerImpl : public Learner {

    if (cfg_.count("num_class") != 0) {
      cfg_["num_output_group"] = cfg_["num_class"];
+      if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
+        cfg_["objective"] = "multi:softmax";
+      }
    }

    if (cfg_.count("max_delta_step") == 0 &&
@@ -187,6 +194,10 @@ class LearnerImpl : public Learner {
    }
  }

+  void InitModel() override {
+    this->LazyInitModel();
+  }
+
  void Load(dmlc::Stream* fi) override {
    // TODO(tqchen) mark deprecation of old format.
    common::PeekableInStream fp(fi);
@@ -202,7 +213,6 @@ class LearnerImpl : public Learner {
    }
    // use the peekable reader.
    fi = &fp;
-    std::string name_gbm, name_obj;
    // read parameter
    CHECK_EQ(fi->Read(&mparam, sizeof(mparam)), sizeof(mparam))
        << "BoostLearner: wrong model format";
@@ -218,7 +228,7 @@ class LearnerImpl : public Learner {
        len = len >> static_cast<uint64_t>(32UL);
      }
      if (len != 0) {
-        name_obj.resize(len);
+        name_obj_.resize(len);
        CHECK_EQ(fi->Read(&name_obj_[0], len), len)
            <<"BoostLearner: wrong model format";
      }
@@ -226,8 +236,10 @@ class LearnerImpl : public Learner {
    CHECK(fi->Read(&name_gbm_))
        << "BoostLearner: wrong model format";
    // duplicated code with LazyInitModel
-    obj_.reset(ObjFunction::Create(cfg_.at(name_obj_)));
-    gbm_.reset(GradientBooster::Create(cfg_.at(name_gbm_)));
+    obj_.reset(ObjFunction::Create(name_obj_));
+    gbm_.reset(GradientBooster::Create(name_gbm_));
+    gbm_->Load(fi);
+
    if (metrics_.size() == 0) {
      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric()));
    }
@@ -246,11 +258,12 @@ class LearnerImpl : public Learner {
  }

  void UpdateOneIter(int iter, DMatrix* train) override {
+    CHECK(ModelInitialized())
+        << "Always call InitModel or LoadModel before update";
    if (tparam.seed_per_iteration || rabit::IsDistributed()) {
      common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
    }
    this->LazyInitDMatrix(train);
-    this->LazyInitModel();
    this->PredictRaw(train, &preds_);
    obj_->GetGradient(preds_, train->info(), iter, &gpair_);
    gbm_->DoBoost(train, this->FindBufferOffset(train), &gpair_);
@@ -262,6 +275,7 @@ class LearnerImpl : public Learner {
    if (tparam.seed_per_iteration || rabit::IsDistributed()) {
      common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
    }
+    this->LazyInitDMatrix(train);
    gbm_->DoBoost(train, this->FindBufferOffset(train), in_gpair);
  }

@@ -269,7 +283,8 @@ class LearnerImpl : public Learner {
                          const std::vector<DMatrix*>& data_sets,
                          const std::vector<std::string>& data_names) override {
    std::ostringstream os;
-    os << '[' << iter << ']';
+    os << '[' << iter << ']'
+       << std::setiosflags(std::ios::fixed);
    for (size_t i = 0; i < data_sets.size(); ++i) {
      this->PredictRaw(data_sets[i], &preds_);
      obj_->EvalTransform(&preds_);
@@ -347,8 +362,6 @@ class LearnerImpl : public Learner {
    if (num_feature > mparam.num_feature) {
      mparam.num_feature = num_feature;
    }
-    // reset the base score
-    mparam.base_score = obj_->ProbToMargin(mparam.base_score);

    // setup
    cfg_["num_feature"] = ToString(mparam.num_feature);
@@ -357,9 +370,13 @@ class LearnerImpl : public Learner {
    gbm_.reset(GradientBooster::Create(name_gbm_));
    gbm_->Configure(cfg_.begin(), cfg_.end());
    obj_->Configure(cfg_.begin(), cfg_.end());
+
+    // reset the base score
+    mparam.base_score = obj_->ProbToMargin(mparam.base_score);
    if (metrics_.size() == 0) {
      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric()));
    }
+
    this->base_score_ = mparam.base_score;
    gbm_->ResetPredBuffer(pred_buffer_size_);
  }
@@ -373,6 +390,8 @@ class LearnerImpl : public Learner {
  inline void PredictRaw(DMatrix* data,
                         std::vector<float>* out_preds,
                         unsigned ntree_limit = 0) const {
+    CHECK(gbm_.get() != nullptr)
+        << "Predict must happen after Load or InitModel";
    gbm_->Predict(data,
                  this->FindBufferOffset(data),
                  out_preds,
--- a/src/logging.cc
+++ b/src/logging.cc
@@ -0,0 +1,20 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file logging.cc
+ * \brief Implementation of loggers.
+ * \author Tianqi Chen
+ */
+#include <xgboost/logging.h>
+#include <iostream>
+#include "./common/sync.h"
+
+namespace xgboost {
+ConsoleLogger::~ConsoleLogger() {
+  std::cout << log_stream_.str() << std::endl;
+}
+
+TrackerLogger::~TrackerLogger() {
+  log_stream_ << '\n';
+  rabit::TrackerPrint(log_stream_.str());
+}
+}  // namespace xgboost
--- a/src/metric/elementwise_metric.cc
+++ b/src/metric/elementwise_metric.cc
@@ -5,12 +5,16 @@
 * \author Kailong Chen, Tianqi Chen
 */
 #include <xgboost/metric.h>
+#include <dmlc/registry.h>
 #include <cmath>
 #include "../common/math.h"
 #include "../common/sync.h"

 namespace xgboost {
 namespace metric {
+// tag the this file, used by force static link later.
+DMLC_REGISTRY_FILE_TAG(elementwise_metric);
+
 /*!
 * \brief base class of element-wise evaluation
 * \tparam Derived the name of subclass
@@ -124,4 +128,3 @@ XGBOOST_REGISTER_METRIC(PossionNegLoglik, "poisson-nloglik")

 }  // namespace metric
 }  // namespace xgboost
-
--- a/src/metric/metric.cc
+++ b/src/metric/metric.cc
@@ -0,0 +1,42 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file metric_registry.cc
+ * \brief Registry of objective functions.
+ */
+#include <xgboost/metric.h>
+#include <dmlc/registry.h>
+
+namespace dmlc {
+DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
+}
+
+namespace xgboost {
+Metric* Metric::Create(const std::string& name) {
+  std::string buf = name;
+  std::string prefix = name;
+  auto pos = buf.find('@');
+  if (pos == std::string::npos) {
+    auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(name);
+    if (e == nullptr) {
+      LOG(FATAL) << "Unknown metric function " << name;
+    }
+    return (e->body)(nullptr);
+  } else {
+    std::string prefix = buf.substr(0, pos);
+    auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(prefix.c_str());
+    if (e == nullptr) {
+      LOG(FATAL) << "Unknown metric function " << name;
+    }
+    return (e->body)(buf.substr(pos + 1, buf.length()).c_str());
+  }
+}
+}  // namespace xgboost
+
+namespace xgboost {
+namespace metric {
+// List of files that will be force linked in static links.
+DMLC_REGISTRY_LINK_TAG(elementwise_metric);
+DMLC_REGISTRY_LINK_TAG(multiclass_metric);
+DMLC_REGISTRY_LINK_TAG(rank_metric);
+}  // namespace metric
+}  // namespace xgboost
--- a/src/metric/multiclass_metric.cc
+++ b/src/metric/multiclass_metric.cc
@@ -11,6 +11,9 @@

 namespace xgboost {
 namespace metric {
+// tag the this file, used by force static link later.
+DMLC_REGISTRY_FILE_TAG(multiclass_metric);
+
 /*!
 * \brief base class of multi-class evaluation
 * \tparam Derived the name of subclass
@@ -114,4 +117,3 @@ XGBOOST_REGISTER_METRIC(MultiLogLoss, "mlogloss")
 .set_body([](const char* param) { return new EvalMultiLogLoss(); });
 }  // namespace metric
 }  // namespace xgboost
-
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@@ -5,12 +5,16 @@
 * \author Kailong Chen, Tianqi Chen
 */
 #include <xgboost/metric.h>
+#include <dmlc/registry.h>
 #include <cmath>
 #include "../common/sync.h"
 #include "../common/math.h"

 namespace xgboost {
 namespace metric {
+// tag the this file, used by force static link later.
+DMLC_REGISTRY_FILE_TAG(rank_metric);
+
 /*! \brief AMS: also records best threshold */
 struct EvalAMS : public Metric {
 public:
--- a/src/objective/multiclass_obj.cc
+++ b/src/objective/multiclass_obj.cc
@@ -4,9 +4,9 @@
 * \brief Definition of multi-class classification objectives.
 * \author Tianqi Chen
 */
-#include <dmlc/logging.h>
 #include <dmlc/omp.h>
 #include <dmlc/parameter.h>
+#include <xgboost/logging.h>
 #include <xgboost/objective.h>
 #include <vector>
 #include <algorithm>
@@ -16,6 +16,8 @@
 namespace xgboost {
 namespace obj {

+DMLC_REGISTRY_FILE_TAG(multiclass_obj);
+
 struct SoftmaxMultiClassParam : public dmlc::Parameter<SoftmaxMultiClassParam> {
  int num_class;
  // declare parameters
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -0,0 +1,34 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file objective.cc
+ * \brief Registry of all objective functions.
+ */
+#include <xgboost/objective.h>
+#include <dmlc/registry.h>
+
+namespace dmlc {
+DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);
+}  // namespace dmlc
+
+namespace xgboost {
+// implement factory functions
+ObjFunction* ObjFunction::Create(const std::string& name) {
+  auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(name);
+  if (e == nullptr) {
+    for (const auto& entry : ::dmlc::Registry< ::xgboost::ObjFunctionReg>::List()) {
+      LOG(INFO) << "Objective candidate: " << entry->name;
+    }
+    LOG(FATAL) << "Unknown objective function " << name;
+  }
+  return (e->body)();
+}
+}  // namespace xgboost
+
+namespace xgboost {
+namespace obj {
+// List of files that will be force linked in static links.
+DMLC_REGISTRY_LINK_TAG(regression_obj);
+DMLC_REGISTRY_LINK_TAG(multiclass_obj);
+DMLC_REGISTRY_LINK_TAG(rank_obj);
+}  // namespace obj
+}  // namespace xgboost
--- a/src/objective/rank_obj.cc
+++ b/src/objective/rank_obj.cc
@@ -4,8 +4,8 @@
 * \brief Definition of rank loss.
 * \author Tianqi Chen, Kailong Chen
 */
-#include <dmlc/logging.h>
 #include <dmlc/omp.h>
+#include <xgboost/logging.h>
 #include <xgboost/objective.h>
 #include <vector>
 #include <algorithm>
@@ -16,6 +16,8 @@
 namespace xgboost {
 namespace obj {

+DMLC_REGISTRY_FILE_TAG(rank_obj);
+
 struct LambdaRankParam : public dmlc::Parameter<LambdaRankParam> {
  int num_pairsample;
  float fix_list_weight;
@@ -324,4 +326,3 @@ XGBOOST_REGISTER_OBJECTIVE(LambdaRankObjMAP, "rank:map")

 }  // namespace obj
 }  // namespace xgboost
-
--- a/src/objective/regression_obj.cc
+++ b/src/objective/regression_obj.cc
@@ -4,8 +4,8 @@
 * \brief Definition of single-value regression and classification objectives.
 * \author Tianqi Chen, Kailong Chen
 */
-#include <dmlc/logging.h>
 #include <dmlc/omp.h>
+#include <xgboost/logging.h>
 #include <xgboost/objective.h>
 #include <vector>
 #include <algorithm>
@@ -14,6 +14,9 @@

 namespace xgboost {
 namespace obj {
+
+DMLC_REGISTRY_FILE_TAG(regression_obj);
+
 // common regressions
 // linear regression
 struct LinearSquareLoss {
@@ -84,7 +87,9 @@ class RegLossObj : public ObjFunction {
                   int iter,
                   std::vector<bst_gpair> *out_gpair) override {
    CHECK_NE(info.labels.size(), 0) << "label set cannot be empty";
-    CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided";
+    CHECK_EQ(preds.size(), info.labels.size())
+        << "labels are not correctly provided"
+        << "preds.size=" << preds.size() << ", label.size=" << info.labels.size();
    out_gpair->resize(preds.size());
    // check if label in range
    bool label_correct = true;
@@ -95,7 +100,7 @@ class RegLossObj : public ObjFunction {
      float p = Loss::PredTransform(preds[i]);
      float w = info.GetWeight(i);
      if (info.labels[i] == 1.0f) w *= param_.scale_pos_weight;
-      if (Loss::CheckLabel(info.labels[i])) label_correct = false;
+      if (!Loss::CheckLabel(info.labels[i])) label_correct = false;
      out_gpair->at(i) = bst_gpair(Loss::FirstOrderGradient(p, info.labels[i]) * w,
                                   Loss::SecondOrderGradient(p, info.labels[i]) * w);
    }
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -71,7 +71,7 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
        .describe("L2 regularization on leaf weight");
    DMLC_DECLARE_FIELD(reg_alpha).set_lower_bound(0.0f).set_default(0.0f)
        .describe("L1 regularization on leaf weight");
-    DMLC_DECLARE_FIELD(default_direction)
+    DMLC_DECLARE_FIELD(default_direction).set_default(0)
        .add_enum("learn", 0)
        .add_enum("left", 1)
        .add_enum("right", 2)
--- a/src/tree/tree_updater.cc
+++ b/src/tree/tree_updater.cc
@@ -0,0 +1,35 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file tree_updater.cc
+ * \brief Registry of tree updaters.
+ */
+#include <xgboost/tree_updater.h>
+#include <dmlc/registry.h>
+
+namespace dmlc {
+DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
+}  // namespace dmlc
+
+namespace xgboost {
+
+TreeUpdater* TreeUpdater::Create(const std::string& name) {
+  auto *e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
+  if (e == nullptr) {
+    LOG(FATAL) << "Unknown tree updater " << name;
+  }
+  return (e->body)();
+}
+
+}  // namespace xgboost
+
+namespace xgboost {
+namespace tree {
+// List of files that will be force linked in static links.
+DMLC_REGISTRY_LINK_TAG(updater_colmaker);
+DMLC_REGISTRY_LINK_TAG(updater_skmaker);
+DMLC_REGISTRY_LINK_TAG(updater_refresh);
+DMLC_REGISTRY_LINK_TAG(updater_prune);
+DMLC_REGISTRY_LINK_TAG(updater_histmaker);
+DMLC_REGISTRY_LINK_TAG(updater_sync);
+}  // namespace tree
+}  // namespace xgboost
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@@ -15,6 +15,9 @@

 namespace xgboost {
 namespace tree {
+
+DMLC_REGISTRY_FILE_TAG(updater_colmaker);
+
 /*! \brief column-wise update to construct a tree */
 template<typename TStats>
 class ColMaker: public TreeUpdater {
@@ -891,4 +894,3 @@ XGBOOST_REGISTER_TREE_UPDATER(DistColMaker, "distcol")
  });
 }  // namespace tree
 }  // namespace xgboost
-
--- a/src/tree/updater_histmaker.cc
+++ b/src/tree/updater_histmaker.cc
@@ -15,6 +15,9 @@

 namespace xgboost {
 namespace tree {
+
+DMLC_REGISTRY_FILE_TAG(updater_histmaker);
+
 template<typename TStats>
 class HistMaker: public BaseMaker {
 public:
--- a/src/tree/updater_prune.cc
+++ b/src/tree/updater_prune.cc
@@ -14,6 +14,9 @@

 namespace xgboost {
 namespace tree {
+
+DMLC_REGISTRY_FILE_TAG(updater_prune);
+
 /*! \brief pruner that prunes a tree after growing finishes */
 class TreePruner: public TreeUpdater {
 public:
--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@@ -14,6 +14,9 @@

 namespace xgboost {
 namespace tree {
+
+DMLC_REGISTRY_FILE_TAG(updater_refresh);
+
 /*! \brief pruner that prunes a tree after growing finishs */
 template<typename TStats>
 class TreeRefresher: public TreeUpdater {
--- a/src/tree/updater_skmaker.cc
+++ b/src/tree/updater_skmaker.cc
@@ -18,6 +18,8 @@
 namespace xgboost {
 namespace tree {

+DMLC_REGISTRY_FILE_TAG(updater_skmaker);
+
 class SketchMaker: public BaseMaker {
 public:
  void Update(const std::vector<bst_gpair> &gpair,
@@ -399,4 +401,3 @@ XGBOOST_REGISTER_TREE_UPDATER(SketchMaker, "grow_skmaker")
  });
 }  // namespace tree
 }  // namespace xgboost
-
--- a/src/tree/updater_sync.cc
+++ b/src/tree/updater_sync.cc
@@ -12,6 +12,9 @@

 namespace xgboost {
 namespace tree {
+
+DMLC_REGISTRY_FILE_TAG(updater_sync);
+
 /*!
 * \brief syncher that synchronize the tree in all distributed nodes
 * can implement various strategies, so far it is always set to node 0's tree
--- a/wrapper/.gitignore
+++ b/wrapper/.gitignore
@@ -1,3 +0,0 @@
-build
-dist
-*.egg*
--- a/wrapper/README.md
+++ b/wrapper/README.md
@@ -1,9 +0,0 @@
-XGBoost Wrappers
-================
-This folder provides wrapper to create xgboost packages to other languages.
-
-***Supported Language Packages***
-* [Python package](../python-package)
-* [R-package](../R-package)
-* [Java Package](../java)
-* [Julia Package](https://github.com/antinucleon/XGBoost.jl)
--- a/wrapper/xgboost_wrapper.cpp
+++ b/wrapper/xgboost_wrapper.cpp
@@ -1,599 +0,0 @@
-// Copyright (c) 2014 by Contributors
-// implementations in ctypes
-#define _CRT_SECURE_NO_WARNINGS
-#define _CRT_SECURE_NO_DEPRECATE
-#include <cstdio>
-#include <vector>
-#include <string>
-#include <cstring>
-#include <cmath>
-#include <algorithm>
-#include <exception>
-// include all std functions
-using namespace std;
-#include "./xgboost_wrapper.h"
-#include "../src/data.h"
-#include "../src/learner/learner-inl.hpp"
-#include "../src/io/io.h"
-#include "../src/utils/utils.h"
-#include "../src/utils/math.h"
-#include "../src/utils/group_data.h"
-#include "../src/io/simple_dmatrix-inl.hpp"
-
-using namespace xgboost;
-using namespace xgboost::io;
-
-namespace xgboost {
-namespace wrapper {
-// booster wrapper class
-class Booster: public learner::BoostLearner {
- public:
-  explicit Booster(const std::vector<DataMatrix*>& mats) {
-    this->silent = 1;
-    this->init_model = false;
-    this->SetCacheData(mats);
-  }
-  inline const float *Pred(const DataMatrix &dmat, int option_mask,
-                           unsigned ntree_limit, bst_ulong *len) {
-    this->CheckInitModel();
-    this->Predict(dmat, (option_mask&1) != 0, &this->preds_,
-                  ntree_limit, (option_mask&2) != 0);
-    *len = static_cast<bst_ulong>(this->preds_.size());
-    return BeginPtr(this->preds_);
-  }
-  inline void BoostOneIter(const DataMatrix &train,
-                           float *grad, float *hess, bst_ulong len) {
-    this->gpair_.resize(len);
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(len);
-    #pragma omp parallel for schedule(static)
-    for (bst_omp_uint j = 0; j < ndata; ++j) {
-      gpair_[j] = bst_gpair(grad[j], hess[j]);
-    }
-    gbm_->DoBoost(train.fmat(), this->FindBufferOffset(train), train.info.info, &gpair_);
-  }
-  inline void CheckInitModel(void) {
-    if (!init_model) {
-      this->InitModel(); init_model = true;
-    }
-  }
-  inline void LoadModel(const char *fname) {
-    learner::BoostLearner::LoadModel(fname);
-    this->init_model = true;
-  }
-  inline void LoadModelFromBuffer(const void *buf, size_t size) {
-    utils::MemoryFixSizeBuffer fs((void*)buf, size);  // NOLINT(*)
-    learner::BoostLearner::LoadModel(fs, true);
-    this->init_model = true;
-  }
-  inline const char *GetModelRaw(bst_ulong *out_len) {
-    this->CheckInitModel();
-    model_str.resize(0);
-    utils::MemoryBufferStream fs(&model_str);
-    learner::BoostLearner::SaveModel(fs, false);
-    *out_len = static_cast<bst_ulong>(model_str.length());
-    if (*out_len == 0) {
-      return NULL;
-    } else {
-      return &model_str[0];
-    }
-  }
-  inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, bst_ulong *len) {
-    model_dump = this->DumpModel(fmap, with_stats);
-    model_dump_cptr.resize(model_dump.size());
-    for (size_t i = 0; i < model_dump.size(); ++i) {
-      model_dump_cptr[i] = model_dump[i].c_str();
-    }
-    *len = static_cast<bst_ulong>(model_dump.size());
-    return BeginPtr(model_dump_cptr);
-  }
-  // temporal fields
-  // temporal data to save evaluation dump
-  std::string eval_str;
-  // temporal data to save model dump
-  std::string model_str;
-  // temporal space to save model dump
-  std::vector<std::string> model_dump;
-  std::vector<const char*> model_dump_cptr;
-
- private:
-  bool init_model;
-};
-}  // namespace wrapper
-}  // namespace xgboost
-
-using namespace xgboost::wrapper;
-
-#ifndef XGBOOST_STRICT_CXX98_
-namespace xgboost {
-namespace wrapper {
-// helper to support threadlocal
-struct ThreadLocalStore {
-  std::vector<std::string*> data;
-  // allocate a string
-  inline std::string *Alloc() {
-    mutex.Lock();
-    data.push_back(new std::string());
-    std::string *ret = data.back();
-    mutex.Unlock();
-    return ret;
-  }
-  ThreadLocalStore() {
-    mutex.Init();
-  }
-  ~ThreadLocalStore() {
-    for (size_t i = 0; i < data.size(); ++i) {
-      delete data[i];
-    }
-    mutex.Destroy();
-  }
-  utils::Mutex mutex;
-};
-
-static ThreadLocalStore thread_local_store;
-}  // namespace wrapper
-}  // namespace xgboost
-
-/*! \brief  macro to guard beginning and end section of all functions */
-#define API_BEGIN() try {
-/*!
- * \brief every function starts with API_BEGIN(); and finishes with API_END();
- * \param Finalize optionally put in a finalizer
- */
-#define API_END_FINALIZE(Finalize) } catch(std::exception &e) {  \
-    Finalize; return XGBHandleException(e);             \
-  } return 0;
-/*! \brief API End with no finalization */
-#define API_END() API_END_FINALIZE(;)
-
-// do not use threadlocal on OSX since it is not always available
-#ifndef DISABLE_THREAD_LOCAL
-#ifdef __GNUC__
-  #define XGB_TREAD_LOCAL __thread
-#elif __STDC_VERSION__ >= 201112L
-  #define XGB_TREAD_LOCAL _Thread_local
-#elif defined(_MSC_VER)
-  #define XGB_TREAD_LOCAL __declspec(thread)
-#endif
-#endif
-
-#ifndef XGB_TREAD_LOCAL
-#pragma message("Warning: Threadlocal not enabled, used single thread error handling")
-#define XGB_TREAD_LOCAL
-#endif
-
-/*!
- * \brief a helper function for error handling
- *  will set the last error to be str_set when it is not NULL
- * \param str_set the error to set
- * \return a pointer message to last error
- */
-const char *XGBSetGetLastError_(const char *str_set) {
-  // use last_error to record last error
-  static XGB_TREAD_LOCAL std::string *last_error = NULL;
-  if (last_error == NULL) {
-    last_error = thread_local_store.Alloc();
-  }
-  if (str_set != NULL) {
-    *last_error = str_set;
-  }
-  return last_error->c_str();
-}
-#else
-// crippled implementation for solaris case
-// exception handling is not needed for R, so it is OK.
-#define API_BEGIN()
-#define API_END_FINALIZE(Finalize) return 0
-#define API_END() return 0
-
-const char *XGBSetGetLastError_(const char *str_set) {
-  return NULL;
-}
-#endif  // XGBOOST_STRICT_CXX98_
-
-/*! \brief return str message of the last error */
-const char *XGBGetLastError() {
-  return XGBSetGetLastError_(NULL);
-}
-
-/*!
- * \brief handle exception throwed out
- * \param e the exception
- * \return the return value of API after exception is handled
- */
-int XGBHandleException(const std::exception &e) {
-  XGBSetGetLastError_(e.what());
-  return -1;
-}
-
-int XGDMatrixCreateFromFile(const char *fname,
-                            int silent,
-                            DMatrixHandle *out) {
-  API_BEGIN();
-  *out = LoadDataMatrix(fname, silent != 0, false, false);
-  API_END();
-}
-
-int XGDMatrixCreateFromCSR(const bst_ulong *indptr,
-                           const unsigned *indices,
-                           const float *data,
-                           bst_ulong nindptr,
-                           bst_ulong nelem,
-                           DMatrixHandle *out) {
-  DMatrixSimple *p_mat = NULL;
-  API_BEGIN();
-  p_mat = new DMatrixSimple();
-  DMatrixSimple &mat = *p_mat;
-  mat.row_ptr_.resize(nindptr);
-  for (bst_ulong i = 0; i < nindptr; ++i) {
-    mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
-  }
-  mat.row_data_.resize(nelem);
-  for (bst_ulong i = 0; i < nelem; ++i) {
-    mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]);
-    mat.info.info.num_col = std::max(mat.info.info.num_col,
-                                     static_cast<size_t>(indices[i]+1));
-  }
-  mat.info.info.num_row = nindptr - 1;
-  *out = p_mat;
-  API_END_FINALIZE(delete p_mat);
-}
-
-int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
-                           const unsigned *indices,
-                           const float *data,
-                           bst_ulong nindptr,
-                           bst_ulong nelem,
-                           DMatrixHandle *out) {
-  DMatrixSimple *p_mat = NULL;
-  API_BEGIN();
-  int nthread;
-  #pragma omp parallel
-  {
-    nthread = omp_get_num_threads();
-  }
-  p_mat = new DMatrixSimple();
-  DMatrixSimple &mat = *p_mat;
-  utils::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
-  builder.InitBudget(0, nthread);
-  long ncol = static_cast<long>(nindptr - 1);  // NOLINT(*)
-  #pragma omp parallel for schedule(static)
-  for (long i = 0; i < ncol; ++i) {  // NOLINT(*)
-    int tid = omp_get_thread_num();
-    for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
-      builder.AddBudget(indices[j], tid);
-    }
-  }
-  builder.InitStorage();
-  #pragma omp parallel for schedule(static)
-  for (long i = 0; i < ncol; ++i) {  // NOLINT(*)
-    int tid = omp_get_thread_num();
-    for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
-      builder.Push(indices[j],
-                   RowBatch::Entry(static_cast<bst_uint>(i), data[j]),
-                   tid);
-    }
-  }
-  mat.info.info.num_row = mat.row_ptr_.size() - 1;
-  mat.info.info.num_col = static_cast<size_t>(ncol);
-  *out = p_mat;
-  API_END_FINALIZE(delete p_mat);
-}
-
-int XGDMatrixCreateFromMat(const float *data,
-                           bst_ulong nrow,
-                           bst_ulong ncol,
-                           float  missing,
-                           DMatrixHandle *out) {
-  DMatrixSimple *p_mat = NULL;
-  API_BEGIN();
-  p_mat = new DMatrixSimple();
-  bool nan_missing = utils::CheckNAN(missing);
-  DMatrixSimple &mat = *p_mat;
-  mat.info.info.num_row = nrow;
-  mat.info.info.num_col = ncol;
-  for (bst_ulong i = 0; i < nrow; ++i, data += ncol) {
-    bst_ulong nelem = 0;
-    for (bst_ulong j = 0; j < ncol; ++j) {
-      if (utils::CheckNAN(data[j])) {
-        utils::Check(nan_missing,
-                     "There are NAN in the matrix, however, you did not set missing=NAN");
-      } else {
-        if (nan_missing || data[j] != missing) {
-          mat.row_data_.push_back(RowBatch::Entry(j, data[j]));
-          ++nelem;
-        }
-      }
-    }
-    mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
-  }
-  *out = p_mat;
-  API_END_FINALIZE(delete p_mat);
-}
-
-int XGDMatrixSliceDMatrix(DMatrixHandle handle,
-                          const int *idxset,
-                          bst_ulong len,
-                          DMatrixHandle *out) {
-  DMatrixSimple *p_ret = NULL;
-  API_BEGIN();
-  DMatrixSimple tmp;
-  DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
-  if (dsrc.magic != DMatrixSimple::kMagic) {
-    tmp.CopyFrom(dsrc);
-  }
-  DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
-                     *static_cast<DMatrixSimple*>(handle): tmp);
-  p_ret = new DMatrixSimple();
-  DMatrixSimple &ret = *p_ret;
-
-  utils::Check(src.info.group_ptr.size() == 0,
-               "slice does not support group structure");
-  ret.Clear();
-  ret.info.info.num_row = len;
-  ret.info.info.num_col = src.info.num_col();
-
-  utils::IIterator<RowBatch> *iter = src.fmat()->RowIterator();
-  iter->BeforeFirst();
-  utils::Assert(iter->Next(), "slice");
-  const RowBatch &batch = iter->Value();
-  for (bst_ulong i = 0; i < len; ++i) {
-    const int ridx = idxset[i];
-    RowBatch::Inst inst = batch[ridx];
-    utils::Check(static_cast<bst_ulong>(ridx) < batch.size, "slice index exceed number of rows");
-    ret.row_data_.resize(ret.row_data_.size() + inst.length);
-    memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
-           sizeof(RowBatch::Entry) * inst.length);
-    ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
-    if (src.info.labels.size() != 0) {
-      ret.info.labels.push_back(src.info.labels[ridx]);
-    }
-    if (src.info.weights.size() != 0) {
-      ret.info.weights.push_back(src.info.weights[ridx]);
-    }
-    if (src.info.info.root_index.size() != 0) {
-      ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
-    }
-    if (src.info.info.fold_index.size() != 0) {
-      ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]);
-    }
-  }
-  *out = p_ret;
-  API_END_FINALIZE(delete p_ret);
-}
-
-int XGDMatrixFree(DMatrixHandle handle) {
-  API_BEGIN();
-  delete static_cast<DataMatrix*>(handle);
-  API_END();
-}
-
-int XGDMatrixSaveBinary(DMatrixHandle handle,
-                        const char *fname,
-                        int silent) {
-  API_BEGIN();
-  SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent != 0);
-  API_END();
-}
-
-int XGDMatrixSetFloatInfo(DMatrixHandle handle,
-                          const char *field,
-                          const float *info,
-                          bst_ulong len) {
-  API_BEGIN();
-  std::vector<float> &vec =
-      static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
-  vec.resize(len);
-  memcpy(BeginPtr(vec), info, sizeof(float) * len);
-  API_END();
-}
-
-int XGDMatrixSetUIntInfo(DMatrixHandle handle,
-                         const char *field,
-                         const unsigned *info,
-                         bst_ulong len) {
-  API_BEGIN();
-  std::vector<unsigned> &vec =
-      static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
-  vec.resize(len);
-  memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
-  API_END();
-}
-
-int XGDMatrixSetGroup(DMatrixHandle handle,
-                      const unsigned *group,
-                      bst_ulong len) {
-  API_BEGIN();
-  DataMatrix *pmat = static_cast<DataMatrix*>(handle);
-  pmat->info.group_ptr.resize(len + 1);
-  pmat->info.group_ptr[0] = 0;
-  for (uint64_t i = 0; i < len; ++i) {
-    pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i];
-  }
-  API_END();
-}
-
-int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
-                          const char *field,
-                          bst_ulong *out_len,
-                          const float **out_dptr) {
-  API_BEGIN();
-  const std::vector<float> &vec =
-      static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
-  *out_len = static_cast<bst_ulong>(vec.size());
-  *out_dptr = BeginPtr(vec);
-  API_END();
-}
-
-int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
-                         const char *field,
-                         bst_ulong *out_len,
-                         const unsigned **out_dptr) {
-  API_BEGIN();
-  const std::vector<unsigned> &vec =
-      static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
-  *out_len = static_cast<bst_ulong>(vec.size());
-  *out_dptr = BeginPtr(vec);
-  API_END();
-}
-
-int XGDMatrixNumRow(const DMatrixHandle handle,
-                    bst_ulong *out) {
-  API_BEGIN();
-  *out = static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
-  API_END();
-}
-
-int XGDMatrixNumCol(const DMatrixHandle handle,
-                    bst_ulong *out) {
-  API_BEGIN();
-  *out = static_cast<size_t>(static_cast<const DataMatrix*>(handle)->info.num_col());
-  API_END();
-}
-
-// xgboost implementation
-int XGBoosterCreate(DMatrixHandle dmats[],
-                    bst_ulong len,
-                    BoosterHandle *out) {
-  API_BEGIN();
-  std::vector<DataMatrix*> mats;
-  for (bst_ulong i = 0; i < len; ++i) {
-    DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
-    mats.push_back(dtr);
-  }
-  *out = new Booster(mats);
-  API_END();
-}
-
-int XGBoosterFree(BoosterHandle handle) {
-  API_BEGIN();
-  delete static_cast<Booster*>(handle);
-  API_END();
-}
-
-int XGBoosterSetParam(BoosterHandle handle,
-                      const char *name, const char *value) {
-  API_BEGIN();
-  static_cast<Booster*>(handle)->SetParam(name, value);
-  API_END();
-}
-
-int XGBoosterUpdateOneIter(BoosterHandle handle,
-                           int iter,
-                           DMatrixHandle dtrain) {
-  API_BEGIN();
-  Booster *bst = static_cast<Booster*>(handle);
-  DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
-  bst->CheckInitModel();
-  bst->CheckInit(dtr);
-  bst->UpdateOneIter(iter, *dtr);
-  API_END();
-}
-
-int XGBoosterBoostOneIter(BoosterHandle handle,
-                          DMatrixHandle dtrain,
-                          float *grad,
-                          float *hess,
-                          bst_ulong len) {
-  API_BEGIN();
-  Booster *bst = static_cast<Booster*>(handle);
-  DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
-  bst->CheckInitModel();
-  bst->CheckInit(dtr);
-  bst->BoostOneIter(*dtr, grad, hess, len);
-  API_END();
-}
-
-int XGBoosterEvalOneIter(BoosterHandle handle,
-                         int iter,
-                         DMatrixHandle dmats[],
-                         const char *evnames[],
-                         bst_ulong len,
-                         const char **out_str) {
-  API_BEGIN();
-  Booster *bst = static_cast<Booster*>(handle);
-  std::vector<std::string> names;
-  std::vector<const DataMatrix*> mats;
-  for (bst_ulong i = 0; i < len; ++i) {
-    mats.push_back(static_cast<DataMatrix*>(dmats[i]));
-    names.push_back(std::string(evnames[i]));
-  }
-  bst->CheckInitModel();
-  bst->eval_str = bst->EvalOneIter(iter, mats, names);
-  *out_str = bst->eval_str.c_str();
-  API_END();
-}
-
-int XGBoosterPredict(BoosterHandle handle,
-                     DMatrixHandle dmat,
-                     int option_mask,
-                     unsigned ntree_limit,
-                     bst_ulong *len,
-                     const float **out_result) {
-  API_BEGIN();
-  *out_result = static_cast<Booster*>(handle)->
-      Pred(*static_cast<DataMatrix*>(dmat),
-           option_mask, ntree_limit, len);
-  API_END();
-}
-
-int XGBoosterLoadModel(BoosterHandle handle, const char *fname) {
-  API_BEGIN();
-  static_cast<Booster*>(handle)->LoadModel(fname);
-  API_END();
-}
-
-int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
-  API_BEGIN();
-  Booster *bst = static_cast<Booster*>(handle);
-  bst->CheckInitModel();
-  bst->SaveModel(fname, false);
-  API_END();
-}
-
-int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
-                                 const void *buf,
-                                 bst_ulong len) {
-  API_BEGIN();
-  static_cast<Booster*>(handle)->LoadModelFromBuffer(buf, len);
-  API_END();
-}
-
-int XGBoosterGetModelRaw(BoosterHandle handle,
-                         bst_ulong *out_len,
-                         const char **out_dptr) {
-  API_BEGIN();
-  *out_dptr = static_cast<Booster*>(handle)->GetModelRaw(out_len);
-  API_END();
-}
-
-int XGBoosterDumpModel(BoosterHandle handle,
-                       const char *fmap,
-                       int with_stats,
-                       bst_ulong *len,
-                       const char ***out_models) {
-  API_BEGIN();
-  utils::FeatMap featmap;
-  if (strlen(fmap) != 0) {
-    featmap.LoadText(fmap);
-  }
-  *out_models = static_cast<Booster*>(handle)->GetModelDump(
-      featmap, with_stats != 0, len);
-  API_END();
-}
-
-int XGBoosterDumpModelWithFeatures(BoosterHandle handle,
-                                   int fnum,
-                                   const char **fname,
-                                   const char **ftype,
-                                   int with_stats,
-                                   bst_ulong *len,
-                                   const char ***out_models) {
-  API_BEGIN();
-  utils::FeatMap featmap;
-  for (int i = 0; i < fnum; ++i) {
-      featmap.PushBack(i, fname[i], ftype[i]);
-  }
-  *out_models = static_cast<Booster*>(handle)->GetModelDump(
-      featmap, with_stats != 0, len);
-  API_END();
-}