From cc767add881b5cc91376f67d01a5e164c5a0fadb Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 4 Jul 2015 18:12:44 -0700 Subject: [PATCH] API refactor to make fault handling easy --- Makefile | 4 + R-package/src/xgboost_R.cpp | 105 ++++--- src/utils/utils.h | 4 +- wrapper/xgboost.py | 166 ++++++----- wrapper/xgboost_wrapper.cpp | 306 ++++++++++++++++----- wrapper/xgboost_wrapper.h | 534 +++++++++++++++++++++--------------- 6 files changed, 725 insertions(+), 394 deletions(-) diff --git a/Makefile b/Makefile index 7d2ff5273..a24bea327 100644 --- a/Makefile +++ b/Makefile @@ -69,6 +69,10 @@ else TARGET = $(BIN) endif +ifndef LINT_LANG + LINT_LANG= "all" +endif + .PHONY: clean all mpi python Rpack lint all: $(TARGET) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 436faaa5a..a8084b206 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -59,6 +59,10 @@ inline void _WrapperEnd(void) { PutRNGstate(); } +// do nothing, check error +inline void CheckErr(int ret) { +} + extern "C" { SEXP XGCheckNullPtr_R(SEXP handle) { return ScalarLogical(R_ExternalPtrAddr(handle) == NULL); @@ -70,7 +74,8 @@ extern "C" { } SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { _WrapperBegin(); - void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent)); + DMatrixHandle handle; + CheckErr(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); @@ -91,7 +96,8 @@ extern "C" { data[i * ncol +j] = din[i + nrow * j]; } } - void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing)); + DMatrixHandle handle; + CheckErr(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); @@ -119,8 +125,10 @@ extern "C" { indices_[i] = static_cast(p_indices[i]); data_[i] = static_cast(p_data[i]); } - void *handle = XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_), - BeginPtr(data_), nindptr, ndata); + DMatrixHandle handle; + CheckErr(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_), + BeginPtr(data_), nindptr, ndata, + &handle)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); @@ -134,7 +142,10 @@ extern "C" { for (int i = 0; i < len; ++i) { idxvec[i] = INTEGER(idxset)[i] - 1; } - void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len); + DMatrixHandle res; + CheckErr(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), + BeginPtr(idxvec), len, + &res)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); @@ -143,8 +154,8 @@ extern "C" { } void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { _WrapperBegin(); - XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), - CHAR(asChar(fname)), asInteger(silent)); + CheckErr(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), + CHAR(asChar(fname)), asInteger(silent))); _WrapperEnd(); } void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { @@ -157,24 +168,27 @@ extern "C" { for (int i = 0; i < len; ++i) { vec[i] = static_cast(INTEGER(array)[i]); } - XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len); + CheckErr(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len)); } else { std::vector vec(len); #pragma omp parallel for schedule(static) for (int i = 0; i < len; ++i) { vec[i] = REAL(array)[i]; } - XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), - CHAR(asChar(field)), - BeginPtr(vec), len); + CheckErr(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), + BeginPtr(vec), len)); } _WrapperEnd(); } SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { _WrapperBegin(); bst_ulong olen; - const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), - CHAR(asChar(field)), &olen); + const float *res; + CheckErr(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), + &olen, + &res)); _WrapperEnd(); SEXP ret = PROTECT(allocVector(REALSXP, olen)); for (size_t i = 0; i < olen; ++i) { @@ -184,13 +198,14 @@ extern "C" { return ret; } SEXP XGDMatrixNumRow_R(SEXP handle) { - bst_ulong nrow = XGDMatrixNumRow(R_ExternalPtrAddr(handle)); + bst_ulong nrow; + CheckErr(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow)); return ScalarInteger(static_cast(nrow)); } // functions related to booster void _BoosterFinalizer(SEXP ext) { if (R_ExternalPtrAddr(ext) == NULL) return; - XGBoosterFree(R_ExternalPtrAddr(ext)); + CheckErr(XGBoosterFree(R_ExternalPtrAddr(ext))); R_ClearExternalPtr(ext); } SEXP XGBoosterCreate_R(SEXP dmats) { @@ -200,7 +215,8 @@ extern "C" { for (int i = 0; i < len; ++i) { dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); } - void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size()); + BoosterHandle handle; + CheckErr(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); @@ -209,16 +225,16 @@ extern "C" { } void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { _WrapperBegin(); - XGBoosterSetParam(R_ExternalPtrAddr(handle), - CHAR(asChar(name)), - CHAR(asChar(val))); + CheckErr(XGBoosterSetParam(R_ExternalPtrAddr(handle), + CHAR(asChar(name)), + CHAR(asChar(val)))); _WrapperEnd(); } void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { _WrapperBegin(); - XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), - asInteger(iter), - R_ExternalPtrAddr(dtrain)); + CheckErr(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + R_ExternalPtrAddr(dtrain))); _WrapperEnd(); } void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { @@ -231,9 +247,10 @@ extern "C" { tgrad[j] = REAL(grad)[j]; thess[j] = REAL(hess)[j]; } - XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), - R_ExternalPtrAddr(dtrain), - BeginPtr(tgrad), BeginPtr(thess), len); + CheckErr(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dtrain), + BeginPtr(tgrad), BeginPtr(thess), + len)); _WrapperEnd(); } SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { @@ -250,21 +267,24 @@ extern "C" { for (int i = 0; i < len; ++i) { vec_sptr.push_back(vec_names[i].c_str()); } - const char *ret = - XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), - asInteger(iter), - BeginPtr(vec_dmats), BeginPtr(vec_sptr), len); + const char *ret; + CheckErr(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + BeginPtr(vec_dmats), + BeginPtr(vec_sptr), + len, &ret)); _WrapperEnd(); return mkString(ret); } SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) { _WrapperBegin(); bst_ulong olen; - const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), - R_ExternalPtrAddr(dmat), - asInteger(option_mask), - asInteger(ntree_limit), - &olen); + const float *res; + CheckErr(XGBoosterPredict(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dmat), + asInteger(option_mask), + asInteger(ntree_limit), + &olen, &res)); _WrapperEnd(); SEXP ret = PROTECT(allocVector(REALSXP, olen)); for (size_t i = 0; i < olen; ++i) { @@ -275,12 +295,12 @@ extern "C" { } void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { _WrapperBegin(); - XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + CheckErr(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)))); _WrapperEnd(); } void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { _WrapperBegin(); - XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + CheckErr(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)))); _WrapperEnd(); } void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { @@ -293,7 +313,8 @@ extern "C" { SEXP XGBoosterModelToRaw_R(SEXP handle) { bst_ulong olen; _WrapperBegin(); - const char *raw = XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen); + const char *raw; + CheckErr(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw)); _WrapperEnd(); SEXP ret = PROTECT(allocVector(RAWSXP, olen)); if (olen != 0) { @@ -305,11 +326,11 @@ extern "C" { SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) { _WrapperBegin(); bst_ulong olen; - const char **res = - XGBoosterDumpModel(R_ExternalPtrAddr(handle), - CHAR(asChar(fmap)), - asInteger(with_stats), - &olen); + const char **res; + CheckErr(XGBoosterDumpModel(R_ExternalPtrAddr(handle), + CHAR(asChar(fmap)), + asInteger(with_stats), + &olen, &res)); _WrapperEnd(); SEXP out = PROTECT(allocVector(STRSXP, olen)); for (size_t i = 0; i < olen; ++i) { diff --git a/src/utils/utils.h b/src/utils/utils.h index 2066634d6..7a8f18390 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef XGBOOST_STRICT_CXX98_ #include @@ -73,8 +74,7 @@ inline void HandleAssertError(const char *msg) { * \param msg error message */ inline void HandleCheckError(const char *msg) { - fprintf(stderr, "%s\n", msg); - exit(-1); + throw std::runtime_error(msg); } inline void HandlePrint(const char *msg) { printf("%s", msg); diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index a009ad81b..96f6c2573 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -44,7 +44,6 @@ else: # pylint: disable=invalid-name STRING_TYPES = basestring, - def load_xglib(): """Load the xgboost library.""" curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) @@ -63,30 +62,27 @@ def load_xglib(): raise XGBoostLibraryNotFound( 'cannot find find the files in the candicate path ' + str(dll_path)) lib = ctypes.cdll.LoadLibrary(lib_path[0]) - - # DMatrix functions - lib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p - lib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p - lib.XGDMatrixCreateFromCSC.restype = ctypes.c_void_p - lib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p - lib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p - lib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float) - lib.XGDMatrixGetUIntInfo.restype = ctypes.POINTER(ctypes.c_uint) - lib.XGDMatrixNumRow.restype = ctypes.c_ulong - - # Booster functions - lib.XGBoosterCreate.restype = ctypes.c_void_p - lib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float) - lib.XGBoosterEvalOneIter.restype = ctypes.c_char_p - lib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p) - lib.XGBoosterGetModelRaw.restype = ctypes.POINTER(ctypes.c_char) - lib.XGBoosterLoadModelFromBuffer.restype = ctypes.c_void_p + lib.XGBGetLastError.restype = ctypes.c_char_p return lib # load the XGBoost library globally _LIB = load_xglib() +def _check_call(ret): + """Check the return value of C API call + + This function will raise exception when error occurs. + Wrap every API call with this function + + Parameters + ---------- + ret : int + return value from API calls + """ + if ret != 0: + raise XGBoostError(_LIB.XGBGetLastError()) + def ctypes2numpy(cptr, length, dtype): """Convert a ctypes pointer array to a numpy array. @@ -145,7 +141,10 @@ class DMatrix(object): self.handle = None return if isinstance(data, STRING_TYPES): - self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromFile(c_str(data), int(silent))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixCreateFromFile(c_str(data), + int(silent), + ctypes.byref(self.handle))) elif isinstance(data, scipy.sparse.csr_matrix): self._init_from_csr(data) elif isinstance(data, scipy.sparse.csc_matrix): @@ -169,11 +168,12 @@ class DMatrix(object): """ if len(csr.indices) != len(csr.data): raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data))) - self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSR( - c_array(ctypes.c_ulong, csr.indptr), - c_array(ctypes.c_uint, csr.indices), - c_array(ctypes.c_float, csr.data), - len(csr.indptr), len(csr.data))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixCreateFromCSR(c_array(ctypes.c_ulong, csr.indptr), + c_array(ctypes.c_uint, csr.indices), + c_array(ctypes.c_float, csr.data), + len(csr.indptr), len(csr.data), + ctypes.byref(self.handle))) def _init_from_csc(self, csc): """ @@ -181,23 +181,26 @@ class DMatrix(object): """ if len(csc.indices) != len(csc.data): raise ValueError('length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data))) - self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSC( - c_array(ctypes.c_ulong, csc.indptr), - c_array(ctypes.c_uint, csc.indices), - c_array(ctypes.c_float, csc.data), - len(csc.indptr), len(csc.data))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixCreateFromCSC(c_array(ctypes.c_ulong, csc.indptr), + c_array(ctypes.c_uint, csc.indices), + c_array(ctypes.c_float, csc.data), + len(csc.indptr), len(csc.data), + ctypes.byref(self.handle))) def _init_from_npy2d(self, mat, missing): """ Initialize data from a 2-D numpy matrix. """ data = np.array(mat.reshape(mat.size), dtype=np.float32) - self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromMat( - data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), - mat.shape[0], mat.shape[1], ctypes.c_float(missing))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), + mat.shape[0], mat.shape[1], + ctypes.c_float(missing), + ctypes.byref(self.handle))) def __del__(self): - _LIB.XGDMatrixFree(self.handle) + _check_call(_LIB.XGDMatrixFree(self.handle)) def get_float_info(self, field): """Get float property from the DMatrix. @@ -213,7 +216,11 @@ class DMatrix(object): a numpy array of float information of the data """ length = ctypes.c_ulong() - ret = _LIB.XGDMatrixGetFloatInfo(self.handle, c_str(field), ctypes.byref(length)) + ret = ctypes.POINTER(ctypes.c_float)() + _check_call(_LIB.XGDMatrixGetFloatInfo(self.handle, + c_str(field), + ctypes.byref(length), + ctypes.byref(ret))) return ctypes2numpy(ret, length.value, np.float32) def get_uint_info(self, field): @@ -230,7 +237,11 @@ class DMatrix(object): a numpy array of float information of the data """ length = ctypes.c_ulong() - ret = _LIB.XGDMatrixGetUIntInfo(self.handle, c_str(field), ctypes.byref(length)) + ret = ctypes.POINTER(ctypes.c_uint)() + _check_call(_LIB.XGDMatrixGetUIntInfo(self.handle, + c_str(field), + ctypes.byref(length), + ctypes.byref(ret))) return ctypes2numpy(ret, length.value, np.uint32) def set_float_info(self, field, data): @@ -244,8 +255,10 @@ class DMatrix(object): data: numpy array The array ofdata to be set """ - _LIB.XGDMatrixSetFloatInfo(self.handle, c_str(field), - c_array(ctypes.c_float, data), len(data)) + _check_call(_LIB.XGDMatrixSetFloatInfo(self.handle, + c_str(field), + c_array(ctypes.c_float, data), + len(data))) def set_uint_info(self, field, data): """Set uint type property into the DMatrix. @@ -258,8 +271,10 @@ class DMatrix(object): data: numpy array The array ofdata to be set """ - _LIB.XGDMatrixSetUIntInfo(self.handle, c_str(field), - c_array(ctypes.c_uint, data), len(data)) + _check_call(_LIB.XGDMatrixSetUIntInfo(self.handle, + c_str(field), + c_array(ctypes.c_uint, data), + len(data))) def save_binary(self, fname, silent=True): """Save DMatrix to an XGBoost buffer. @@ -271,7 +286,9 @@ class DMatrix(object): silent : bool (optional; default: True) If set, the output is suppressed. """ - _LIB.XGDMatrixSaveBinary(self.handle, c_str(fname), int(silent)) + _check_call(_LIB.XGDMatrixSaveBinary(self.handle, + c_str(fname), + int(silent))) def set_label(self, label): """Set label of dmatrix @@ -317,7 +334,9 @@ class DMatrix(object): group : array like Group size of each group """ - _LIB.XGDMatrixSetGroup(self.handle, c_array(ctypes.c_uint, group), len(group)) + _check_call(_LIB.XGDMatrixSetGroup(self.handle, + c_array(ctypes.c_uint, group), + len(group))) def get_label(self): """Get the label of the DMatrix. @@ -353,7 +372,10 @@ class DMatrix(object): ------- number of rows : int """ - return _LIB.XGDMatrixNumRow(self.handle) + ret = ctypes.c_ulong() + _check_call(_LIB.XGDMatrixNumRow(self.handle, + ctypes.byref(ret))) + return ret.value def slice(self, rindex): """Slice the DMatrix and return a new DMatrix that only contains `rindex`. @@ -369,8 +391,11 @@ class DMatrix(object): A new DMatrix containing only selected indices. """ res = DMatrix(None) - res.handle = ctypes.c_void_p(_LIB.XGDMatrixSliceDMatrix( - self.handle, c_array(ctypes.c_int, rindex), len(rindex))) + res.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixSliceDMatrix(self.handle, + c_array(ctypes.c_int, rindex), + len(rindex), + ctypes.byref(res.handle))) return res @@ -394,7 +419,8 @@ class Booster(object): if not isinstance(d, DMatrix): raise TypeError('invalid cache item: {}'.format(type(d).__name__)) dmats = c_array(ctypes.c_void_p, [d.handle for d in cache]) - self.handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, len(cache))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGBoosterCreate(dmats, len(cache), ctypes.byref(self.handle))) self.set_param({'seed': 0}) self.set_param(params or {}) if model_file is not None: @@ -419,10 +445,11 @@ class Booster(object): if handle is not None: buf = handle dmats = c_array(ctypes.c_void_p, []) - handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, 0)) + handle = ctypes.c_void_p() + _check_call(_LIB.XGBoosterCreate(dmats, 0, ctypes.byref(handle))) length = ctypes.c_ulong(len(buf)) ptr = (ctypes.c_char * len(buf)).from_buffer(buf) - _LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length) + _check_call(_LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length)) state['handle'] = handle self.__dict__.update(state) self.set_param({'seed': 0}) @@ -449,7 +476,7 @@ class Booster(object): elif isinstance(params, STRING_TYPES) and value is not None: params = [(params, value)] for key, val in params: - _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val))) + _check_call(_LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val)))) def update(self, dtrain, iteration, fobj=None): """ @@ -467,7 +494,7 @@ class Booster(object): if not isinstance(dtrain, DMatrix): raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) if fobj is None: - _LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle) + _check_call(_LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle)) else: pred = self.predict(dtrain) grad, hess = fobj(pred, dtrain) @@ -490,10 +517,10 @@ class Booster(object): raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess))) if not isinstance(dtrain, DMatrix): raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) - _LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle, - c_array(ctypes.c_float, grad), - c_array(ctypes.c_float, hess), - len(grad)) + _check_call(_LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle, + c_array(ctypes.c_float, grad), + c_array(ctypes.c_float, hess), + len(grad))) def eval_set(self, evals, iteration=0, feval=None): # pylint: disable=invalid-name @@ -520,7 +547,11 @@ class Booster(object): raise TypeError('expected string, got {}'.format(type(d[1]).__name__)) dmats = c_array(ctypes.c_void_p, [d[0].handle for d in evals]) evnames = c_array(ctypes.c_char_p, [c_str(d[1]) for d in evals]) - return _LIB.XGBoosterEvalOneIter(self.handle, iteration, dmats, evnames, len(evals)) + msg = ctypes.c_char_p() + _check_call(_LIB.XGBoosterEvalOneIter(self.handle, iteration, + dmats, evnames, len(evals), + ctypes.byref(msg))) + return msg.value else: res = '[%d]' % iteration for dmat, evname in evals: @@ -582,8 +613,11 @@ class Booster(object): if pred_leaf: option_mask |= 0x02 length = ctypes.c_ulong() - preds = _LIB.XGBoosterPredict(self.handle, data.handle, - option_mask, ntree_limit, ctypes.byref(length)) + preds = ctypes.POINTER(ctypes.c_float)() + _check_call(_LIB.XGBoosterPredict(self.handle, data.handle, + option_mask, ntree_limit, + ctypes.byref(length), + ctypes.byref(preds))) preds = ctypes2numpy(preds, length.value, np.float32) if pred_leaf: preds = preds.astype(np.int32) @@ -602,7 +636,7 @@ class Booster(object): Output file name """ if isinstance(fname, STRING_TYPES): # assume file name - _LIB.XGBoosterSaveModel(self.handle, c_str(fname)) + _check_call(_LIB.XGBoosterSaveModel(self.handle, c_str(fname))) else: raise TypeError("fname must be a string") @@ -615,8 +649,10 @@ class Booster(object): a in memory buffer represetation of the model """ length = ctypes.c_ulong() - cptr = _LIB.XGBoosterGetModelRaw(self.handle, - ctypes.byref(length)) + cptr = ctypes.POINTER(ctypes.c_char)() + _check_call(_LIB.XGBoosterGetModelRaw(self.handle, + ctypes.byref(length), + ctypes.byref(cptr))) return ctypes2buffer(cptr, length.value) def load_model(self, fname): @@ -634,7 +670,7 @@ class Booster(object): buf = fname length = ctypes.c_ulong(len(buf)) ptr = (ctypes.c_char * len(buf)).from_buffer(buf) - _LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length) + _check_call(_LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length)) def dump_model(self, fout, fmap='', with_stats=False): """ @@ -666,8 +702,12 @@ class Booster(object): Returns the dump the model as a list of strings. """ length = ctypes.c_ulong() - sarr = _LIB.XGBoosterDumpModel(self.handle, c_str(fmap), - int(with_stats), ctypes.byref(length)) + sarr = ctypes.POINTER(ctypes.c_char_p)() + _check_call(_LIB.XGBoosterDumpModel(self.handle, + c_str(fmap), + int(with_stats), + ctypes.byref(length), + ctypes.byref(sarr))) res = [] for i in range(length.value): res.append(str(sarr[i].decode('ascii'))) diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 8572316f0..e1ce01119 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -8,6 +8,7 @@ #include #include #include +#include // include all std functions using namespace std; #include "./xgboost_wrapper.h" @@ -102,15 +103,79 @@ class Booster: public learner::BoostLearner { using namespace xgboost::wrapper; -void* XGDMatrixCreateFromFile(const char *fname, int silent) { - return LoadDataMatrix(fname, silent != 0, false, false); +/*! \brief macro to guard beginning and end section of all functions */ +#define API_BEGIN() try { +/*! + * \brief every function starts with API_BEGIN(); and finishes with API_END(); + * \param Finalize optionally put in a finalizer + */ +#define API_END(Finalize) } catch(std::exception &e) { \ + Finalize; return XGBHandleException(e); \ + } return 0; + +// do not use threadlocal on OSX since it is not always available +#ifndef DISABLE_THREAD_LOCAL +#ifdef __GNUC__ + #define XGB_TREAD_LOCAL __thread +#elif __STDC_VERSION__ >= 201112L + #define XGB_TREAD_LOCAL _Thread_local +#elif defined(_MSC_VER) + #define XGB_TREAD_LOCAL __declspec(thread) +#endif +#endif + +#ifndef XGB_TREAD_LOCAL +#pragma message("Warning: Threadlocal not enabled, used single thread error handling") +#define XGB_TREAD_LOCAL +#endif + +/*! + * \brief a helper function for error handling + * will set the last error to be str_set when it is not NULL + * \param str_set the error to set + * \return a pointer message to last error + */ +const char *XGBSetGetLastError_(const char *str_set) { + // use last_error to record last error + static XGB_TREAD_LOCAL std::string last_error; + if (str_set != NULL) { + last_error = str_set; + } + return last_error.c_str(); } -void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem) { - DMatrixSimple *p_mat = new DMatrixSimple(); + +/*! \brief return str message of the last error */ +const char *XGBGetLastError() { + return XGBSetGetLastError_(NULL); +} + +/*! + * \brief handle exception throwed out + * \param e the exception + * \return the return value of API after exception is handled + */ +int XGBHandleException(const std::exception &e) { + XGBSetGetLastError_(e.what()); + return -1; +} + +int XGDMatrixCreateFromFile(const char *fname, + int silent, + DMatrixHandle *out) { + API_BEGIN(); + *out = LoadDataMatrix(fname, silent != 0, false, false); + API_END(); +} + +int XGDMatrixCreateFromCSR(const bst_ulong *indptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem, + DMatrixHandle *out) { + DMatrixSimple *p_mat = NULL; + API_BEGIN(); + p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; mat.row_ptr_.resize(nindptr); for (bst_ulong i = 0; i < nindptr; ++i) { @@ -123,20 +188,24 @@ void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, static_cast(indices[i]+1)); } mat.info.info.num_row = nindptr - 1; - return p_mat; + *out = p_mat; + API_END(delete p_mat); } -void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem) { + +int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem, + DMatrixHandle *out) { + DMatrixSimple *p_mat = NULL; + API_BEGIN(); int nthread; #pragma omp parallel { nthread = omp_get_num_threads(); } - - DMatrixSimple *p_mat = new DMatrixSimple(); + p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; utils::ParallelGroupBuilder builder(&mat.row_ptr_, &mat.row_data_); builder.InitBudget(0, nthread); @@ -160,14 +229,19 @@ void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, } mat.info.info.num_row = mat.row_ptr_.size() - 1; mat.info.info.num_col = static_cast(ncol); - return p_mat; + *out = p_mat; + API_END(delete p_mat); } -void* XGDMatrixCreateFromMat(const float *data, - bst_ulong nrow, - bst_ulong ncol, - float missing) { + +int XGDMatrixCreateFromMat(const float *data, + bst_ulong nrow, + bst_ulong ncol, + float missing, + DMatrixHandle *out) { + DMatrixSimple *p_mat = NULL; + API_BEGIN(); + p_mat = new DMatrixSimple(); bool nan_missing = utils::CheckNAN(missing); - DMatrixSimple *p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; mat.info.info.num_row = nrow; mat.info.info.num_col = ncol; @@ -186,11 +260,16 @@ void* XGDMatrixCreateFromMat(const float *data, } mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem); } - return p_mat; + *out = p_mat; + API_END(delete p_mat); } -void* XGDMatrixSliceDMatrix(void *handle, - const int *idxset, - bst_ulong len) { + +int XGDMatrixSliceDMatrix(DMatrixHandle handle, + const int *idxset, + bst_ulong len, + DMatrixHandle *out) { + DMatrixSimple *p_ret = NULL; + API_BEGIN(); DMatrixSimple tmp; DataMatrix &dsrc = *static_cast(handle); if (dsrc.magic != DMatrixSimple::kMagic) { @@ -198,7 +277,7 @@ void* XGDMatrixSliceDMatrix(void *handle, } DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ? *static_cast(handle): tmp); - DMatrixSimple *p_ret = new DMatrixSimple(); + p_ret = new DMatrixSimple(); DMatrixSimple &ret = *p_ret; utils::Check(src.info.group_ptr.size() == 0, @@ -232,82 +311,151 @@ void* XGDMatrixSliceDMatrix(void *handle, ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]); } } - return p_ret; + *out = p_ret; + API_END(delete p_ret); } -void XGDMatrixFree(void *handle) { + +int XGDMatrixFree(DMatrixHandle handle) { + API_BEGIN(); delete static_cast(handle); + API_END(); } -void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) { + +int XGDMatrixSaveBinary(DMatrixHandle handle, + const char *fname, + int silent) { + API_BEGIN(); SaveDataMatrix(*static_cast(handle), fname, silent != 0); + API_END(); } -void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) { + +int XGDMatrixSetFloatInfo(DMatrixHandle handle, + const char *field, + const float *info, + bst_ulong len) { + API_BEGIN(); std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); vec.resize(len); memcpy(BeginPtr(vec), info, sizeof(float) * len); + API_END(); } -void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) { + +int XGDMatrixSetUIntInfo(DMatrixHandle handle, + const char *field, + const unsigned *info, + bst_ulong len) { + API_BEGIN(); std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); vec.resize(len); memcpy(BeginPtr(vec), info, sizeof(unsigned) * len); + API_END(); } -void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) { + +int XGDMatrixSetGroup(DMatrixHandle handle, + const unsigned *group, + bst_ulong len) { + API_BEGIN(); DataMatrix *pmat = static_cast(handle); pmat->info.group_ptr.resize(len + 1); pmat->info.group_ptr[0] = 0; for (uint64_t i = 0; i < len; ++i) { pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i]; } + API_END(); } -const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) { + +int XGDMatrixGetFloatInfo(const DMatrixHandle handle, + const char *field, + bst_ulong *out_len, + const float **out_dptr) { + API_BEGIN(); const std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); - *len = static_cast(vec.size()); - return BeginPtr(vec); + *out_len = static_cast(vec.size()); + *out_dptr = BeginPtr(vec); + API_END(); } -const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) { + +int XGDMatrixGetUIntInfo(const DMatrixHandle handle, + const char *field, + bst_ulong *out_len, + const unsigned **out_dptr) { + API_BEGIN(); const std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); - *len = static_cast(vec.size()); - return BeginPtr(vec); + *out_len = static_cast(vec.size()); + *out_dptr = BeginPtr(vec); + API_END(); } -bst_ulong XGDMatrixNumRow(const void *handle) { - return static_cast(static_cast(handle)->info.num_row()); +int XGDMatrixNumRow(const DMatrixHandle handle, + bst_ulong *out) { + API_BEGIN(); + *out = static_cast(static_cast(handle)->info.num_row()); + API_END(); } // xgboost implementation -void *XGBoosterCreate(void *dmats[], bst_ulong len) { +int XGBoosterCreate(DMatrixHandle dmats[], + bst_ulong len, + BoosterHandle *out) { + API_BEGIN(); std::vector mats; for (bst_ulong i = 0; i < len; ++i) { DataMatrix *dtr = static_cast(dmats[i]); mats.push_back(dtr); } - return new Booster(mats); + *out = new Booster(mats); + API_END(); } -void XGBoosterFree(void *handle) { + +int XGBoosterFree(BoosterHandle handle) { + API_BEGIN(); delete static_cast(handle); + API_END(); } -void XGBoosterSetParam(void *handle, const char *name, const char *value) { + +int XGBoosterSetParam(BoosterHandle handle, + const char *name, const char *value) { + API_BEGIN(); static_cast(handle)->SetParam(name, value); + API_END(); } -void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) { + +int XGBoosterUpdateOneIter(BoosterHandle handle, + int iter, + DMatrixHandle dtrain) { + API_BEGIN(); Booster *bst = static_cast(handle); DataMatrix *dtr = static_cast(dtrain); bst->CheckInitModel(); bst->CheckInit(dtr); bst->UpdateOneIter(iter, *dtr); + API_END(); } -void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, bst_ulong len) { + +int XGBoosterBoostOneIter(BoosterHandle handle, + DMatrixHandle dtrain, + float *grad, + float *hess, + bst_ulong len) { + API_BEGIN(); Booster *bst = static_cast(handle); DataMatrix *dtr = static_cast(dtrain); bst->CheckInitModel(); bst->CheckInit(dtr); bst->BoostOneIter(*dtr, grad, hess, len); + API_END(); } -const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], bst_ulong len) { + +int XGBoosterEvalOneIter(BoosterHandle handle, + int iter, + DMatrixHandle dmats[], + const char *evnames[], + bst_ulong len, + const char **out_str) { + API_BEGIN(); Booster *bst = static_cast(handle); std::vector names; std::vector mats; @@ -317,32 +465,64 @@ const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], } bst->CheckInitModel(); bst->eval_str = bst->EvalOneIter(iter, mats, names); - return bst->eval_str.c_str(); + *out_str = bst->eval_str.c_str(); + API_END(); } -const float *XGBoosterPredict(void *handle, void *dmat, int option_mask, - unsigned ntree_limit, bst_ulong *len) { - return static_cast(handle)->Pred(*static_cast(dmat), - option_mask, ntree_limit, len); + +int XGBoosterPredict(BoosterHandle handle, + DMatrixHandle dmat, + int option_mask, + unsigned ntree_limit, + bst_ulong *len, + const float **out_result) { + API_BEGIN(); + *out_result = static_cast(handle)-> + Pred(*static_cast(dmat), + option_mask, ntree_limit, len); + API_END(); } -void XGBoosterLoadModel(void *handle, const char *fname) { + +int XGBoosterLoadModel(BoosterHandle handle, const char *fname) { + API_BEGIN(); static_cast(handle)->LoadModel(fname); + API_END(); } -void XGBoosterSaveModel(void *handle, const char *fname) { + +int XGBoosterSaveModel(BoosterHandle handle, const char *fname) { + API_BEGIN(); Booster *bst = static_cast(handle); bst->CheckInitModel(); bst->SaveModel(fname, false); + API_END(); } -void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len) { + +int XGBoosterLoadModelFromBuffer(BoosterHandle handle, + const void *buf, + bst_ulong len) { + API_BEGIN(); static_cast(handle)->LoadModelFromBuffer(buf, len); + API_END(); } -const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len) { - return static_cast(handle)->GetModelRaw(out_len); + +int XGBoosterGetModelRaw(BoosterHandle handle, + bst_ulong *out_len, + const char **out_dptr) { + API_BEGIN(); + *out_dptr = static_cast(handle)->GetModelRaw(out_len); + API_END(); } -const char** XGBoosterDumpModel(void *handle, const char *fmap, - int with_stats, bst_ulong *len) { + +int XGBoosterDumpModel(BoosterHandle handle, + const char *fmap, + int with_stats, + bst_ulong *len, + const char ***out_models) { + API_BEGIN(); utils::FeatMap featmap; if (strlen(fmap) != 0) { featmap.LoadText(fmap); } - return static_cast(handle)->GetModelDump(featmap, with_stats != 0, len); + *out_models = static_cast(handle)->GetModelDump( + featmap, with_stats != 0, len); + API_END(); } diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h index 3540a3be0..6d3a619fb 100644 --- a/wrapper/xgboost_wrapper.h +++ b/wrapper/xgboost_wrapper.h @@ -8,234 +8,320 @@ #ifndef XGBOOST_WRAPPER_H_ #define XGBOOST_WRAPPER_H_ +#ifdef __cplusplus +#define XGB_EXTERN_C extern "C" +#endif + #if defined(_MSC_VER) || defined(_WIN32) -#define XGB_DLL __declspec(dllexport) +#define XGB_DLL XGB_EXTERN_C __declspec(dllexport) #else -#define XGB_DLL +#define XGB_DLL XGB_EXTERN_C #endif // manually define unsign long typedef unsigned long bst_ulong; // NOLINT(*) -#ifdef __cplusplus -extern "C" { -#endif - /*! - * \brief load a data matrix - * \param fname the name of the file - * \param silent whether print messages during loading - * \return a loaded data matrix - */ - XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent); - /*! - * \brief create a matrix content from csr format - * \param indptr pointer to row headers - * \param indices findex - * \param data fvalue - * \param nindptr number of rows in the matix + 1 - * \param nelem number of nonzero elements in the matrix - * \return created dmatrix - */ - XGB_DLL void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem); - /*! - * \brief create a matrix content from CSC format - * \param col_ptr pointer to col headers - * \param indices findex - * \param data fvalue - * \param nindptr number of rows in the matix + 1 - * \param nelem number of nonzero elements in the matrix - * \return created dmatrix - */ - XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem); - /*! - * \brief create matrix content from dense matrix - * \param data pointer to the data space - * \param nrow number of rows - * \param ncol number columns - * \param missing which value to represent missing value - * \return created dmatrix - */ - XGB_DLL void* XGDMatrixCreateFromMat(const float *data, - bst_ulong nrow, - bst_ulong ncol, - float missing); - /*! - * \brief create a new dmatrix from sliced content of existing matrix - * \param handle instance of data matrix to be sliced - * \param idxset index set - * \param len length of index set - * \return a sliced new matrix - */ - XGB_DLL void* XGDMatrixSliceDMatrix(void *handle, - const int *idxset, - bst_ulong len); - /*! - * \brief free space in data matrix - */ - XGB_DLL void XGDMatrixFree(void *handle); - /*! - * \brief load a data matrix into binary file - * \param handle a instance of data matrix - * \param fname file name - * \param silent print statistics when saving - */ - XGB_DLL void XGDMatrixSaveBinary(void *handle, const char *fname, int silent); - /*! - * \brief set float vector to a content in info - * \param handle a instance of data matrix - * \param field field name, can be label, weight - * \param array pointer to float vector - * \param len length of array - */ - XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, - const float *array, bst_ulong len); - /*! - * \brief set uint32 vector to a content in info - * \param handle a instance of data matrix - * \param field field name - * \param array pointer to float vector - * \param len length of array - */ - XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, - const unsigned *array, bst_ulong len); - /*! - * \brief set label of the training matrix - * \param handle a instance of data matrix - * \param group pointer to group size - * \param len length of array - */ - XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len); - /*! - * \brief get float info vector from matrix - * \param handle a instance of data matrix - * \param field field name - * \param out_len used to set result length - * \return pointer to the result - */ - XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, - const char *field, bst_ulong* out_len); - /*! - * \brief get uint32 info vector from matrix - * \param handle a instance of data matrix - * \param field field name - * \param out_len used to set result length - * \return pointer to the result - */ - XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, - const char *field, bst_ulong* out_len); - /*! - * \brief return number of rows - */ - XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle); - // --- start XGBoost class - /*! - * \brief create xgboost learner - * \param dmats matrices that are set to be cached - * \param len length of dmats - */ - XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len); - /*! - * \brief free obj in handle - * \param handle handle to be freed - */ - XGB_DLL void XGBoosterFree(void* handle); - /*! - * \brief set parameters - * \param handle handle - * \param name parameter name - * \param val value of parameter - */ - XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value); - /*! - * \brief update the model in one round using dtrain - * \param handle handle - * \param iter current iteration rounds - * \param dtrain training data - */ - XGB_DLL void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain); - /*! - * \brief update the model, by directly specify gradient and second order gradient, - * this can be used to replace UpdateOneIter, to support customized loss function - * \param handle handle - * \param dtrain training data - * \param grad gradient statistics - * \param hess second order gradient statistics - * \param len length of grad/hess array - */ - XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, bst_ulong len); - /*! - * \brief get evaluation statistics for xgboost - * \param handle handle - * \param iter current iteration rounds - * \param dmats pointers to data to be evaluated - * \param evnames pointers to names of each data - * \param len length of dmats - * \return the string containing evaluation stati - */ - XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], bst_ulong len); - /*! - * \brief make prediction based on dmat - * \param handle handle - * \param dmat data matrix - * \param option_mask bit-mask of options taken in prediction, possible values - * 0:normal prediction - * 1:output margin instead of transformed value - * 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree - * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees - * when the parameter is set to 0, we will use all the trees - * \param len used to store length of returning result - */ - XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, - int option_mask, - unsigned ntree_limit, - bst_ulong *len); - /*! - * \brief load model from existing file - * \param handle handle - * \param fname file name - */ - XGB_DLL void XGBoosterLoadModel(void *handle, const char *fname); - /*! - * \brief save model into existing file - * \param handle handle - * \param fname file name - */ - XGB_DLL void XGBoosterSaveModel(void *handle, const char *fname); - /*! - * \brief load model from in memory buffer - * \param handle handle - * \param buf pointer to the buffer - * \param len the length of the buffer - */ - XGB_DLL void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len); - /*! - * \brief save model into binary raw bytes, return header of the array - * user must copy the result out, before next xgboost call - * \param handle handle - * \param out_len the argument to hold the output length - * \return the pointer to the beginning of binary buffer - */ - XGB_DLL const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len); - /*! - * \brief dump model, return array of strings representing model dump - * \param handle handle - * \param fmap name to fmap can be empty string - * \param with_stats whether to dump with statistics - * \param out_len length of output array - * \return char *data[], representing dump of each model - */ - XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap, - int with_stats, bst_ulong *out_len); -#ifdef __cplusplus -} -#endif +/*! \brief handle to DMatrix */ +typedef void *DMatrixHandle; +/*! \brief handle to Booster */ +typedef void *BoosterHandle; + +/*! + * \brief get string message of the last error + * + * all function in this file will return 0 when success + * and -1 when an error occured, + * XGBGetLastError can be called to retrieve the error + * + * this function is threadsafe and can be called by different thread + * \return const char* error inforomation + */ +XGB_DLL const char *XGBGetLastError(); + +/*! + * \brief load a data matrix + * \param fname the name of the file + * \param silent whether print messages during loading + * \param out a loaded data matrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixCreateFromFile(const char *fname, + int silent, + DMatrixHandle *out); + +/*! + * \brief create a matrix content from csr format + * \param indptr pointer to row headers + * \param indices findex + * \param data fvalue + * \param nindptr number of rows in the matix + 1 + * \param nelem number of nonzero elements in the matrix + * \param out created dmatrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixCreateFromCSR(const bst_ulong *indptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem, + DMatrixHandle *out); +/*! + * \brief create a matrix content from CSC format + * \param col_ptr pointer to col headers + * \param indices findex + * \param data fvalue + * \param nindptr number of rows in the matix + 1 + * \param nelem number of nonzero elements in the matrix + * \param out created dmatrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem, + DMatrixHandle *out); +/*! + * \brief create matrix content from dense matrix + * \param data pointer to the data space + * \param nrow number of rows + * \param ncol number columns + * \param missing which value to represent missing value + * \param out created dmatrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixCreateFromMat(const float *data, + bst_ulong nrow, + bst_ulong ncol, + float missing, + DMatrixHandle *out); +/*! + * \brief create a new dmatrix from sliced content of existing matrix + * \param handle instance of data matrix to be sliced + * \param idxset index set + * \param len length of index set + * \param out a sliced new matrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, + const int *idxset, + bst_ulong len, + DMatrixHandle *out); +/*! + * \brief free space in data matrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixFree(void *handle); +/*! + * \brief load a data matrix into binary file + * \param handle a instance of data matrix + * \param fname file name + * \param silent print statistics when saving + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle, + const char *fname, int silent); +/*! + * \brief set float vector to a content in info + * \param handle a instance of data matrix + * \param field field name, can be label, weight + * \param array pointer to float vector + * \param len length of array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, + const char *field, + const float *array, + bst_ulong len); +/*! + * \brief set uint32 vector to a content in info + * \param handle a instance of data matrix + * \param field field name + * \param array pointer to float vector + * \param len length of array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, + const char *field, + const unsigned *array, + bst_ulong len); +/*! + * \brief set label of the training matrix + * \param handle a instance of data matrix + * \param group pointer to group size + * \param len length of array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, + const unsigned *group, + bst_ulong len); +/*! + * \brief get float info vector from matrix + * \param handle a instance of data matrix + * \param field field name + * \param out_len used to set result length + * \param out_dptr pointer to the result + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, + const char *field, + bst_ulong* out_len, + const float **out_dptr); +/*! + * \brief get uint32 info vector from matrix + * \param handle a instance of data matrix + * \param field field name + * \param out_ptr pointer to the result + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle, + const char *field, + bst_ulong* out_len, + const unsigned **out_dptr); +/*! + * \brief get number of rows + * \param handle the handle to the DMatrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, + bst_ulong *out); +// --- start XGBoost class +/*! + * \brief create xgboost learner + * \param dmats matrices that are set to be cached + * \param len length of dmats + * \param out handle to the result booster + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterCreate(void* dmats[], + bst_ulong len, + BoosterHandle *out); +/*! + * \brief free obj in handle + * \param handle handle to be freed + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterFree(BoosterHandle handle); + +/*! + * \brief set parameters + * \param handle handle + * \param name parameter name + * \param val value of parameter + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterSetParam(BoosterHandle handle, + const char *name, + const char *value); +/*! + * \brief update the model in one round using dtrain + * \param handle handle + * \param iter current iteration rounds + * \param dtrain training data + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, + int iter, + DMatrixHandle dtrain); +/*! + * \brief update the model, by directly specify gradient and second order gradient, + * this can be used to replace UpdateOneIter, to support customized loss function + * \param handle handle + * \param dtrain training data + * \param grad gradient statistics + * \param hess second order gradient statistics + * \param len length of grad/hess array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, + DMatrixHandle dtrain, + float *grad, + float *hess, + bst_ulong len); +/*! + * \brief get evaluation statistics for xgboost + * \param handle handle + * \param iter current iteration rounds + * \param dmats pointers to data to be evaluated + * \param evnames pointers to names of each data + * \param len length of dmats + * \param out_result the string containing evaluation statistics + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, + int iter, + DMatrixHandle dmats[], + const char *evnames[], + bst_ulong len, + const char **out_result); +/*! + * \brief make prediction based on dmat + * \param handle handle + * \param dmat data matrix + * \param option_mask bit-mask of options taken in prediction, possible values + * 0:normal prediction + * 1:output margin instead of transformed value + * 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree + * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees + * when the parameter is set to 0, we will use all the trees + * \param out_len used to store length of returning result + * \param out_result used to set a pointer to array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterPredict(BoosterHandle handle, + DMatrixHandle dmat, + int option_mask, + unsigned ntree_limit, + bst_ulong *out_len, + const float **out_result); +/*! + * \brief load model from existing file + * \param handle handle + * \param fname file name +* \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, + const char *fname); +/*! + * \brief save model into existing file + * \param handle handle + * \param fname file name + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, + const char *fname); +/*! + * \brief load model from in memory buffer + * \param handle handle + * \param buf pointer to the buffer + * \param len the length of the buffer + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, + const void *buf, + bst_ulong len); +/*! + * \brief save model into binary raw bytes, return header of the array + * user must copy the result out, before next xgboost call + * \param handle handle + * \param out_len the argument to hold the output length + * \param out_dptr the argument to hold the output data pointer + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, + bst_ulong *out_len, + const char **out_dptr); +/*! + * \brief dump model, return array of strings representing model dump + * \param handle handle + * \param fmap name to fmap can be empty string + * \param with_stats whether to dump with statistics + * \param out_len length of output array + * \param out_dump_array pointer to hold representing dump of each model + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterDumpModel(BoosterHandle handle, + const char *fmap, + int with_stats, + bst_ulong *out_len, + const char ***out_dump_array); #endif // XGBOOST_WRAPPER_H_