From 301685e0a431e11fdf05286c95d7394733c651c3 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 17 Aug 2014 18:43:25 -0700
Subject: [PATCH] python module pass basic test

---
 python/example/demo.py        |  49 +++-----
 python/xgboost.py             | 212 ++++++++++++++++++++--------------
 python/xgboost_wrapper.cpp    |  17 +++
 src/data.h                    |   2 +-
 src/io/io.cpp                 |   7 +-
 src/io/simple_dmatrix-inl.hpp |   2 +-
 src/learner/dmatrix.h         |   2 +-
 7 files changed, 170 insertions(+), 121 deletions(-)

diff --git a/python/example/demo.py b/python/example/demo.py
index 389f139ff..e14c806aa 100755
--- a/python/example/demo.py
+++ b/python/example/demo.py
@@ -17,36 +17,17 @@ param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logisti
 # specify validations set to watch performance
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 num_round = 2
-bst = xgb.train( param, dtrain, num_round, evallist )
+bst = xgb.train(param, dtrain, num_round, evallist)
 
 # this is prediction
-preds = bst.predict( dtest )
+preds = bst.predict(dtest)
 labels = dtest.get_label()
 print ('error=%f' % (  sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
 bst.save_model('0001.model')
 # dump model
 bst.dump_model('dump.raw.txt')
 # dump model with feature map
-bst.dump_model('dump.raw.txt','featmap.txt')
-
-###
-# build dmatrix in python iteratively
-#
-print ('start running example of build DMatrix in python')
-dtrain = xgb.DMatrix()
-labels = []
-for l in open('agaricus.txt.train'):
-    arr = l.split()
-    labels.append( int(arr[0]))
-    feats = []
-    for it in arr[1:]:
-        k,v = it.split(':')
-        feats.append( (int(k), float(v)) )
-    dtrain.add_row( feats )
-dtrain.set_label( labels )
-evallist  = [(dtest,'eval'), (dtrain,'train')]
-
-bst = xgb.train( param, dtrain, num_round, evallist )
+bst.dump_model('dump.nice.txt','featmap.txt')
 
 ###
 # build dmatrix from scipy.sparse
@@ -61,7 +42,6 @@ for l in open('agaricus.txt.train'):
         k,v = it.split(':')
         row.append(i); col.append(int(k)); dat.append(float(v))
     i += 1
-
 csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
 dtrain = xgb.DMatrix( csr )
 dtrain.set_label(labels)
@@ -71,7 +51,7 @@ bst = xgb.train( param, dtrain, num_round, evallist )
 print ('start running example of build DMatrix from numpy array')
 # NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
 npymat = csr.todense()
-dtrain = xgb.DMatrix( npymat )
+dtrain = xgb.DMatrix( npymat)
 dtrain.set_label(labels)
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
@@ -81,16 +61,25 @@ bst = xgb.train( param, dtrain, num_round, evallist )
 # 
 print ('start running example to used cutomized objective function')
 
-# note: set objective= binary:logistic means the prediction will get logistic transformed
-#       in most case, we may want to leave it as default
-param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
+# note: for customized objective function, we leave objective as default
+# note: what we are getting is margin value in prediction
+# you must know what you are doing
+param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
 
 # user define objective function, given prediction, return gradient and second order gradient
-def logregobj( preds, dtrain ):
+# this is loglikelihood loss
+def logregobj(preds, dtrain):
     labels = dtrain.get_label()
+    preds = 1.0 / (1.0 + np.exp(-preds))
     grad = preds - labels
     hess = preds * (1.0-preds)
     return grad, hess
 
-# training with customized objective, we can also do step by step training, simply look at xgboost.py's implementation of train
-bst = xgb.train( param, dtrain, num_round, evallist, logregobj )
+# user defined evaluation function, return a pair metric_name, result
+def evalerror(preds, dtrain):
+    labels = dtrain.get_label()
+    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
+
+# training with customized objective, we can also do step by step training
+# simply look at xgboost.py's implementation of train
+bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
diff --git a/python/xgboost.py b/python/xgboost.py
index 2e8deefa8..c7a04d4c3 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -4,6 +4,7 @@ import ctypes
 import os
 # optinally have scipy sparse, though not necessary
 import numpy
+import sys
 import numpy.ctypeslib 
 import scipy.sparse as scp
 
@@ -13,33 +14,39 @@ XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
 # load in xgboost library
 xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
 
-xglib.XGDMatrixCreate.restype = ctypes.c_void_p
+xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
+xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
+xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
+xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
+xglib.XGDMatrixGetLabel.restype = ctypes.POINTER(ctypes.c_float)
+xglib.XGDMatrixGetWeight.restype = ctypes.POINTER(ctypes.c_float)
 xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
-xglib.XGDMatrixGetLabel.restype =  ctypes.POINTER( ctypes.c_float )
-xglib.XGDMatrixGetWeight.restype =  ctypes.POINTER( ctypes.c_float )
-xglib.XGDMatrixGetRow.restype = ctypes.POINTER( REntry )
-xglib.XGBoosterCreate.restype = ctypes.c_void_p
-xglib.XGBoosterPredict.restype = ctypes.POINTER( ctypes.c_float ) 
 
-def ctypes2numpy( cptr, length ):
+xglib.XGBoosterCreate.restype = ctypes.c_void_p
+xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
+xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
+xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
+
+
+def ctypes2numpy(cptr, length):
     # convert a ctypes pointer array to numpy
-    assert isinstance( cptr, ctypes.POINTER( ctypes.c_float ) )
-    res = numpy.zeros( length, dtype='float32' )
-    assert ctypes.memmove( res.ctypes.data, cptr, length * res.strides[0] )
+    assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
+    res = numpy.zeros(length, dtype='float32')
+    assert ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0])
     return res
 
 # data matrix used in xgboost
 class DMatrix:
     # constructor
-    def __init__(self, data=None, label=None, missing=0.0, weight = None):
+    def __init__(self, data, label=None, missing=0.0, weight = None):
         # force into void_p, mac need to pass things in as void_p
-        self.handle = ctypes.c_void_p( xglib.XGDMatrixCreate() )
         if data == None:
+            self.handle = None
             return
-        if isinstance(data,str):
-            xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data.encode('utf-8')), 1)             
-        elif isinstance(data,scp.csr_matrix):
-            self.__init_from_csr(data)
+        if isinstance(data, str):
+            self.handle = xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1)
+        elif isinstance(data, scp.csr_matrix):
+            self.__init_from_csr(data)            
         elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
             self.__init_from_npy2d(data, missing)
         else:
@@ -52,77 +59,68 @@ class DMatrix:
             self.set_label(label)
         if weight !=None:
             self.set_weight(weight)
-
     # convert data from csr matrix
-    def __init_from_csr(self,csr):
+    def __init_from_csr(self, csr):
         assert len(csr.indices) == len(csr.data)
-        xglib.XGDMatrixParseCSR( self.handle, 
-                                 ( ctypes.c_ulong  * len(csr.indptr) )(*csr.indptr),
-                                 ( ctypes.c_uint  * len(csr.indices) )(*csr.indices),
-                                 ( ctypes.c_float * len(csr.data) )(*csr.data),
-                                 len(csr.indptr), len(csr.data) )
+        self.handle = xglib.XGDMatrixCreateFromCSR(
+            (ctypes.c_ulong  * len(csr.indptr))(*csr.indptr),
+            (ctypes.c_uint  * len(csr.indices))(*csr.indices),
+            (ctypes.c_float * len(csr.data))(*csr.data),
+            len(csr.indptr), len(csr.data))
     # convert data from numpy matrix
     def __init_from_npy2d(self,mat,missing):
-        data = numpy.array( mat.reshape(mat.size), dtype='float32' )
-        xglib.XGDMatrixParseMat( self.handle, 
-                                 data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), 
-                                 mat.shape[0], mat.shape[1], ctypes.c_float(missing) )
+        data = numpy.array(mat.reshape(mat.size), dtype='float32')
+        self.handle = xglib.XGDMatrixCreateFromMat(
+            data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
+            mat.shape[0], mat.shape[1], ctypes.c_float(missing))
     # destructor
     def __del__(self):
-        xglib.XGDMatrixFree(self.handle)    
-    # load data from file 
-    def load(self, fname, silent=True):
-        xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
+        xglib.XGDMatrixFree(self.handle)
     # load data from file 
     def save_binary(self, fname, silent=True):
         xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
     # set label of dmatrix
     def set_label(self, label):
-        xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) )
+        xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label))
     # set group size of dmatrix, used for rank
     def set_group(self, group):
-        xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group) )
+        xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
     # set weight of each instances
     def set_weight(self, weight):
-        xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_float*len(weight))(*weight), len(weight) )
+        xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_float*len(weight))(*weight), len(weight))
     # get label from dmatrix
     def get_label(self):
         length = ctypes.c_ulong()
         labels = xglib.XGDMatrixGetLabel(self.handle, ctypes.byref(length))
-        return ctypes2numpy( labels, length.value );
+        return ctypes2numpy(labels, length.value)
     # get weight from dmatrix
     def get_weight(self):
         length = ctypes.c_ulong()
         weights = xglib.XGDMatrixGetWeight(self.handle, ctypes.byref(length))
-        return ctypes2numpy( weights, length.value );
-    # clear everything
-    def clear(self):
-        xglib.XGDMatrixClear(self.handle)
+        return ctypes2numpy(weights, length.value)
     def num_row(self):
         return xglib.XGDMatrixNumRow(self.handle)
-    # append a row to DMatrix
-    def add_row(self, row):
-        xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row) )
-    # get n-throw from DMatrix
-    def __getitem__(self, ridx):
-        length = ctypes.c_ulong()
-        row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
-        return [ (int(row[i].findex),row[i].fvalue) for i in range(length.value) ]
+    # slice the DMatrix to return a new DMatrix that only contains rindex
+    def slice(self, rindex):
+        res = DMatrix(None)
+        res.handle = xglib.XGDMatrixSliceDMatrix(
+            self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex))
+        return res
 
 class Booster:
     """learner class """
     def __init__(self, params={}, cache=[]):
         """ constructor, param: """    
         for d in cache:
-            assert isinstance(d,DMatrix)
-        dmats = ( ctypes.c_void_p  * len(cache) )(*[ d.handle for d in cache])
-        self.handle = ctypes.c_void_p( xglib.XGBoosterCreate( dmats, len(cache) ) )
-        self.set_param( {'seed':0} )
-        self.set_param( params )
+            assert isinstance(d, DMatrix)
+        dmats = (ctypes.c_void_p  * len(cache))(*[ d.handle for d in cache])
+        self.handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, len(cache)))
+        self.set_param({'seed':0})
+        self.set_param(params)
     def __del__(self):
         xglib.XGBoosterFree(self.handle) 
     def set_param(self, params, pv=None):
-        if isinstance(params,dict):
+        if isinstance(params, dict):
             for k, v in params.items():
                 xglib.XGBoosterSetParam(
                     self.handle, ctypes.c_char_p(k.encode('utf-8')), 
@@ -130,72 +128,112 @@ class Booster:
         elif isinstance(params,str) and pv != None:
             xglib.XGBoosterSetParam(
                 self.handle, ctypes.c_char_p(params.encode('utf-8')),
-                ctypes.c_char_p(str(pv).encode('utf-8')) )
+                ctypes.c_char_p(str(pv).encode('utf-8')))
         else:
             for k, v in params:
                 xglib.XGBoosterSetParam(
                     self.handle, ctypes.c_char_p(k.encode('utf-8')),
-                    ctypes.c_char_p(str(v).encode('utf-8')) )             
-    def update(self, dtrain):
+                    ctypes.c_char_p(str(v).encode('utf-8')))
+    def update(self, dtrain, it):
         """ update """
         assert isinstance(dtrain, DMatrix)
-        xglib.XGBoosterUpdateOneIter( self.handle, dtrain.handle )
-    def boost(self, dtrain, grad, hess, bst_group = -1):
+        xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
+    def boost(self, dtrain, grad, hess):
         """ update """
         assert len(grad) == len(hess)
         assert isinstance(dtrain, DMatrix)
-        xglib.XGBoosterBoostOneIter( self.handle, dtrain.handle,
-                                     (ctypes.c_float*len(grad))(*grad),
-                                     (ctypes.c_float*len(hess))(*hess),
-                                     len(grad), bst_group )
-    def update_interact(self, dtrain, action, booster_index=None):
-        """ beta: update with specified action"""
-        assert isinstance(dtrain, DMatrix)
-        if booster_index != None:
-            self.set_param('interact:booster_index', str(booster_index))
-        xglib.XGBoosterUpdateInteract(
-            self.handle, dtrain.handle, ctypes.c_char_p(str(action)) )
+        xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
+                                    (ctypes.c_float*len(grad))(*grad),
+                                    (ctypes.c_float*len(hess))(*hess),
+                                    len(grad))
     def eval_set(self, evals, it = 0):
         for d in evals:
             assert isinstance(d[0], DMatrix)
             assert isinstance(d[1], str)
-        dmats = ( ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
-        evnames = ( ctypes.c_char_p * len(evals) )(
-            *[ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
-        xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) )
-    def eval(self, mat, name = 'eval', it = 0 ):
-        self.eval_set( [(mat,name)], it)
-    def predict(self, data, bst_group = -1):
+        dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
+        evnames = (ctypes.c_char_p * len(evals))(
+            * [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
+        return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))        
+    def eval(self, mat, name = 'eval', it = 0):
+        return self.eval_set( [(mat,name)], it)
+    def predict(self, data):
         length = ctypes.c_ulong()
-        preds = xglib.XGBoosterPredict( self.handle, data.handle, ctypes.byref(length), bst_group)
-        return ctypes2numpy( preds, length.value )
+        preds = xglib.XGBoosterPredict(self.handle, data.handle, ctypes.byref(length))
+        return ctypes2numpy(preds, length.value)
     def save_model(self, fname):
         """ save model to file """
         xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
     def load_model(self, fname):
         """load model from file"""
         xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
-    def dump_model(self, fname, fmap=''):
+    def dump_model(self, fo, fmap=''):
         """dump model into text file"""
-        xglib.XGBoosterDumpModel(
-            self.handle, ctypes.c_char_p(fname.encode('utf-8')), 
-            ctypes.c_char_p(fmap.encode('utf-8')))
+        if isinstance(fo,str):            
+            fo = open(fo,'w')
+            need_close = True
+        else:
+            need_close = False
+        ret = self.get_dump(fmap)
+        for i in range(len(ret)):
+            fo.write('booster[%d]:\n' %i)
+            fo.write( ret[i] )
+        if need_close:
+            fo.close()
+    def get_dump(self, fmap=''):
+        """get dump of model as list of strings """
+        length = ctypes.c_ulong()
+        sarr = xglib.XGBoosterDumpModel(self.handle, ctypes.c_char_p(fmap.encode('utf-8')), ctypes.byref(length))
+        res = []
+        for i in range(length.value):
+            res.append( str(sarr[i]) )
+        return res
+    def get_fscore(self, fmap=''):
+        """ get feature importance of each feature """
+        trees = self.get_dump(fmap)
+        fmap = {}
+        for tree in trees:
+            print tree
+            for l in tree.split('\n'):
+                arr = l.split('[')
+                if len(arr) == 1:
+                    continue
+                fid = arr[1].split(']')[0]
+                fid = fid.split('<')[0]
+                if fid not in fmap:
+                    fmap[fid] = 1
+                else:
+                    fmap[fid]+= 1
+        return fmap
 
-def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
+def evaluate(bst, evals, it, feval = None):
+    """evaluation on eval set"""
+    if feval != None:
+        res = '[%d]' % it
+        for dm, evname in evals:
+            name, val = feval(bst.predict(dm), dm)
+            res += '\t%s-%s:%f' % (evname, name, val)
+    else:
+        res = bst.eval_set(evals, it)
+
+    return res
+
+def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
     """ train a booster with given paramaters """
     bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
     if obj == None:
         for i in range(num_boost_round):
-            bst.update( dtrain )
+            bst.update( dtrain, i )
             if len(evals) != 0:
-                bst.eval_set( evals, i )
+                sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
     else:
+        if len(evals) != 0 and feval == None:
+            print 'you need to provide your own evaluation function'
+
         # try customized objective function
         for i in range(num_boost_round):
             pred = bst.predict( dtrain )
             grad, hess = obj( pred, dtrain )
             bst.boost( dtrain, grad, hess )
             if len(evals) != 0:
-                bst.eval_set( evals, i )        
+                sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
     return bst
-
diff --git a/python/xgboost_wrapper.cpp b/python/xgboost_wrapper.cpp
index e43095920..478d74936 100644
--- a/python/xgboost_wrapper.cpp
+++ b/python/xgboost_wrapper.cpp
@@ -20,9 +20,11 @@ class Booster: public learner::BoostLearner<FMatrixS> {
  public:
   explicit Booster(const std::vector<DataMatrix*>& mats) {
     this->silent = 1;
+    this->init_model = false;
     this->SetCacheData(mats);
   }
   const float *Pred(const DataMatrix &dmat, size_t *len) {
+    this->CheckInitModel();
     this->Predict(dmat, &this->preds_);
     *len = this->preds_.size();
     return &this->preds_[0];
@@ -37,6 +39,15 @@ class Booster: public learner::BoostLearner<FMatrixS> {
     }
     gbm_->DoBoost(gpair_, train.fmat, train.info.root_index);
   }
+  inline void CheckInitModel(void) {
+    if (!init_model) {
+      this->InitModel(); init_model = true;
+    }  
+  }
+  inline void LoadModel(const char *fname) {
+    learner::BoostLearner<FMatrixS>::LoadModel(fname);
+    this->init_model = true;
+  }
   inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
     model_dump = this->DumpModel(fmap, with_stats);
     model_dump_cptr.resize(model_dump.size()); 
@@ -52,6 +63,9 @@ class Booster: public learner::BoostLearner<FMatrixS> {
   // temporal space to save model dump
   std::vector<std::string> model_dump;
   std::vector<const char*> model_dump_cptr;
+
+ private:
+  bool init_model;
 };
 }  // namespace wrapper
 }  // namespace xgboost
@@ -199,6 +213,7 @@ extern "C"{
   void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
     Booster *bst = static_cast<Booster*>(handle);
     DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInitModel();
     bst->CheckInit(dtr);
     bst->UpdateOneIter(iter, *dtr);
   }
@@ -206,6 +221,7 @@ extern "C"{
                              float *grad, float *hess, size_t len) {
     Booster *bst = static_cast<Booster*>(handle);
     DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
+    bst->CheckInitModel();
     bst->CheckInit(dtr);
     bst->BoostOneIter(*dtr, grad, hess, len);
   }
@@ -217,6 +233,7 @@ extern "C"{
       mats.push_back(static_cast<DataMatrix*>(dmats[i]));
       names.push_back(std::string(evnames[i]));
     }
+    bst->CheckInitModel();
     bst->eval_str = bst->EvalOneIter(iter, mats, names);
     return bst->eval_str.c_str();
   }
diff --git a/src/data.h b/src/data.h
index c60b58b8a..df43551e3 100644
--- a/src/data.h
+++ b/src/data.h
@@ -242,7 +242,7 @@ class FMatrixS : public FMatrixInterface<FMatrixS>{
    * \brief save column access data into stream
    * \param fo output stream to save to
    */
-  inline void SaveColAccess(utils::IStream &fo) {
+  inline void SaveColAccess(utils::IStream &fo) const {
     fo.Write(&num_buffered_row_, sizeof(num_buffered_row_));
     if (num_buffered_row_ != 0) {
       SaveBinary(fo, col_ptr_, col_data_);
diff --git a/src/io/io.cpp b/src/io/io.cpp
index 4ddf61eb0..7689a4560 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -15,7 +15,12 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
 }
 
 void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
-  utils::Error("not implemented");
+  if (dmat.magic == DMatrixSimple::kMagic){
+    const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
+    p_dmat->SaveBinary(fname, silent);
+  } else {
+    utils::Error("not implemented");
+  }
 }
 
 }  // namespace io
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index f996b8d8c..b8b15adce 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -148,7 +148,7 @@ class DMatrixSimple : public DataMatrix {
    * \param fname name of binary data
    * \param silent whether print information or not
    */
-  inline void SaveBinary(const char* fname, bool silent = false) {
+  inline void SaveBinary(const char* fname, bool silent = false) const {
     utils::FileStream fs(utils::FopenCheck(fname, "wb"));
     int magic = kMagic;
     fs.Write(&magic, sizeof(magic));
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index b558b070b..144b1a44e 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -58,7 +58,7 @@ struct MetaInfo {
       return 0;
     }
   }
-  inline void SaveBinary(utils::IStream &fo) {
+  inline void SaveBinary(utils::IStream &fo) const {
     fo.Write(&num_row, sizeof(num_row));
     fo.Write(&num_col, sizeof(num_col));
     fo.Write(labels);