API refactor to make fault handling easy

This commit is contained in:
tqchen 2015-07-04 18:12:44 -07:00
parent 4d436a3cb0
commit cc767add88
6 changed files with 725 additions and 394 deletions

View File

@ -69,6 +69,10 @@ else
TARGET = $(BIN) TARGET = $(BIN)
endif endif
ifndef LINT_LANG
LINT_LANG= "all"
endif
.PHONY: clean all mpi python Rpack lint .PHONY: clean all mpi python Rpack lint
all: $(TARGET) all: $(TARGET)

View File

@ -59,6 +59,10 @@ inline void _WrapperEnd(void) {
PutRNGstate(); PutRNGstate();
} }
// do nothing, check error
inline void CheckErr(int ret) {
}
extern "C" { extern "C" {
SEXP XGCheckNullPtr_R(SEXP handle) { SEXP XGCheckNullPtr_R(SEXP handle) {
return ScalarLogical(R_ExternalPtrAddr(handle) == NULL); return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
@ -70,7 +74,8 @@ extern "C" {
} }
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
_WrapperBegin(); _WrapperBegin();
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent)); DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -91,7 +96,8 @@ extern "C" {
data[i * ncol +j] = din[i + nrow * j]; data[i * ncol +j] = din[i + nrow * j];
} }
} }
void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing)); DMatrixHandle handle;
CheckErr(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -119,8 +125,10 @@ extern "C" {
indices_[i] = static_cast<unsigned>(p_indices[i]); indices_[i] = static_cast<unsigned>(p_indices[i]);
data_[i] = static_cast<float>(p_data[i]); data_[i] = static_cast<float>(p_data[i]);
} }
void *handle = XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_), DMatrixHandle handle;
BeginPtr(data_), nindptr, ndata); CheckErr(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_),
BeginPtr(data_), nindptr, ndata,
&handle));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -134,7 +142,10 @@ extern "C" {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
idxvec[i] = INTEGER(idxset)[i] - 1; idxvec[i] = INTEGER(idxset)[i] - 1;
} }
void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len); DMatrixHandle res;
CheckErr(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle),
BeginPtr(idxvec), len,
&res));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
@ -143,8 +154,8 @@ extern "C" {
} }
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
_WrapperBegin(); _WrapperBegin();
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), CheckErr(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
CHAR(asChar(fname)), asInteger(silent)); CHAR(asChar(fname)), asInteger(silent)));
_WrapperEnd(); _WrapperEnd();
} }
void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
@ -157,24 +168,27 @@ extern "C" {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
vec[i] = static_cast<unsigned>(INTEGER(array)[i]); vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
} }
XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len); CheckErr(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len));
} else { } else {
std::vector<float> vec(len); std::vector<float> vec(len);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
vec[i] = REAL(array)[i]; vec[i] = REAL(array)[i];
} }
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), CheckErr(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)), CHAR(asChar(field)),
BeginPtr(vec), len); BeginPtr(vec), len));
} }
_WrapperEnd(); _WrapperEnd();
} }
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
_WrapperBegin(); _WrapperBegin();
bst_ulong olen; bst_ulong olen;
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), const float *res;
CHAR(asChar(field)), &olen); CheckErr(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)),
&olen,
&res));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(allocVector(REALSXP, olen)); SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) { for (size_t i = 0; i < olen; ++i) {
@ -184,13 +198,14 @@ extern "C" {
return ret; return ret;
} }
SEXP XGDMatrixNumRow_R(SEXP handle) { SEXP XGDMatrixNumRow_R(SEXP handle) {
bst_ulong nrow = XGDMatrixNumRow(R_ExternalPtrAddr(handle)); bst_ulong nrow;
CheckErr(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));
return ScalarInteger(static_cast<int>(nrow)); return ScalarInteger(static_cast<int>(nrow));
} }
// functions related to booster // functions related to booster
void _BoosterFinalizer(SEXP ext) { void _BoosterFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return; if (R_ExternalPtrAddr(ext) == NULL) return;
XGBoosterFree(R_ExternalPtrAddr(ext)); CheckErr(XGBoosterFree(R_ExternalPtrAddr(ext)));
R_ClearExternalPtr(ext); R_ClearExternalPtr(ext);
} }
SEXP XGBoosterCreate_R(SEXP dmats) { SEXP XGBoosterCreate_R(SEXP dmats) {
@ -200,7 +215,8 @@ extern "C" {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
} }
void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size()); BoosterHandle handle;
CheckErr(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
@ -209,16 +225,16 @@ extern "C" {
} }
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
_WrapperBegin(); _WrapperBegin();
XGBoosterSetParam(R_ExternalPtrAddr(handle), CheckErr(XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)), CHAR(asChar(name)),
CHAR(asChar(val))); CHAR(asChar(val))));
_WrapperEnd(); _WrapperEnd();
} }
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
_WrapperBegin(); _WrapperBegin();
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), CheckErr(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
asInteger(iter), asInteger(iter),
R_ExternalPtrAddr(dtrain)); R_ExternalPtrAddr(dtrain)));
_WrapperEnd(); _WrapperEnd();
} }
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
@ -231,9 +247,10 @@ extern "C" {
tgrad[j] = REAL(grad)[j]; tgrad[j] = REAL(grad)[j];
thess[j] = REAL(hess)[j]; thess[j] = REAL(hess)[j];
} }
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), CheckErr(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dtrain), R_ExternalPtrAddr(dtrain),
BeginPtr(tgrad), BeginPtr(thess), len); BeginPtr(tgrad), BeginPtr(thess),
len));
_WrapperEnd(); _WrapperEnd();
} }
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
@ -250,21 +267,24 @@ extern "C" {
for (int i = 0; i < len; ++i) { for (int i = 0; i < len; ++i) {
vec_sptr.push_back(vec_names[i].c_str()); vec_sptr.push_back(vec_names[i].c_str());
} }
const char *ret = const char *ret;
XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), CheckErr(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter), asInteger(iter),
BeginPtr(vec_dmats), BeginPtr(vec_sptr), len); BeginPtr(vec_dmats),
BeginPtr(vec_sptr),
len, &ret));
_WrapperEnd(); _WrapperEnd();
return mkString(ret); return mkString(ret);
} }
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) { SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
_WrapperBegin(); _WrapperBegin();
bst_ulong olen; bst_ulong olen;
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), const float *res;
CheckErr(XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat), R_ExternalPtrAddr(dmat),
asInteger(option_mask), asInteger(option_mask),
asInteger(ntree_limit), asInteger(ntree_limit),
&olen); &olen, &res));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(allocVector(REALSXP, olen)); SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) { for (size_t i = 0; i < olen; ++i) {
@ -275,12 +295,12 @@ extern "C" {
} }
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
_WrapperBegin(); _WrapperBegin();
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); CheckErr(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
_WrapperEnd(); _WrapperEnd();
} }
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
_WrapperBegin(); _WrapperBegin();
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); CheckErr(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
_WrapperEnd(); _WrapperEnd();
} }
void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
@ -293,7 +313,8 @@ extern "C" {
SEXP XGBoosterModelToRaw_R(SEXP handle) { SEXP XGBoosterModelToRaw_R(SEXP handle) {
bst_ulong olen; bst_ulong olen;
_WrapperBegin(); _WrapperBegin();
const char *raw = XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen); const char *raw;
CheckErr(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw));
_WrapperEnd(); _WrapperEnd();
SEXP ret = PROTECT(allocVector(RAWSXP, olen)); SEXP ret = PROTECT(allocVector(RAWSXP, olen));
if (olen != 0) { if (olen != 0) {
@ -305,11 +326,11 @@ extern "C" {
SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) { SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) {
_WrapperBegin(); _WrapperBegin();
bst_ulong olen; bst_ulong olen;
const char **res = const char **res;
XGBoosterDumpModel(R_ExternalPtrAddr(handle), CheckErr(XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)), CHAR(asChar(fmap)),
asInteger(with_stats), asInteger(with_stats),
&olen); &olen, &res));
_WrapperEnd(); _WrapperEnd();
SEXP out = PROTECT(allocVector(STRSXP, olen)); SEXP out = PROTECT(allocVector(STRSXP, olen));
for (size_t i = 0; i < olen; ++i) { for (size_t i = 0; i < olen; ++i) {

View File

@ -12,6 +12,7 @@
#include <string> #include <string>
#include <cstdlib> #include <cstdlib>
#include <vector> #include <vector>
#include <stdexcept>
#ifndef XGBOOST_STRICT_CXX98_ #ifndef XGBOOST_STRICT_CXX98_
#include <cstdarg> #include <cstdarg>
@ -73,8 +74,7 @@ inline void HandleAssertError(const char *msg) {
* \param msg error message * \param msg error message
*/ */
inline void HandleCheckError(const char *msg) { inline void HandleCheckError(const char *msg) {
fprintf(stderr, "%s\n", msg); throw std::runtime_error(msg);
exit(-1);
} }
inline void HandlePrint(const char *msg) { inline void HandlePrint(const char *msg) {
printf("%s", msg); printf("%s", msg);

View File

@ -44,7 +44,6 @@ else:
# pylint: disable=invalid-name # pylint: disable=invalid-name
STRING_TYPES = basestring, STRING_TYPES = basestring,
def load_xglib(): def load_xglib():
"""Load the xgboost library.""" """Load the xgboost library."""
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
@ -63,30 +62,27 @@ def load_xglib():
raise XGBoostLibraryNotFound( raise XGBoostLibraryNotFound(
'cannot find find the files in the candicate path ' + str(dll_path)) 'cannot find find the files in the candicate path ' + str(dll_path))
lib = ctypes.cdll.LoadLibrary(lib_path[0]) lib = ctypes.cdll.LoadLibrary(lib_path[0])
lib.XGBGetLastError.restype = ctypes.c_char_p
# DMatrix functions
lib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
lib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
lib.XGDMatrixCreateFromCSC.restype = ctypes.c_void_p
lib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
lib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
lib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
lib.XGDMatrixGetUIntInfo.restype = ctypes.POINTER(ctypes.c_uint)
lib.XGDMatrixNumRow.restype = ctypes.c_ulong
# Booster functions
lib.XGBoosterCreate.restype = ctypes.c_void_p
lib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
lib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
lib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
lib.XGBoosterGetModelRaw.restype = ctypes.POINTER(ctypes.c_char)
lib.XGBoosterLoadModelFromBuffer.restype = ctypes.c_void_p
return lib return lib
# load the XGBoost library globally # load the XGBoost library globally
_LIB = load_xglib() _LIB = load_xglib()
def _check_call(ret):
"""Check the return value of C API call
This function will raise exception when error occurs.
Wrap every API call with this function
Parameters
----------
ret : int
return value from API calls
"""
if ret != 0:
raise XGBoostError(_LIB.XGBGetLastError())
def ctypes2numpy(cptr, length, dtype): def ctypes2numpy(cptr, length, dtype):
"""Convert a ctypes pointer array to a numpy array. """Convert a ctypes pointer array to a numpy array.
@ -145,7 +141,10 @@ class DMatrix(object):
self.handle = None self.handle = None
return return
if isinstance(data, STRING_TYPES): if isinstance(data, STRING_TYPES):
self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromFile(c_str(data), int(silent))) self.handle = ctypes.c_void_p()
_check_call(_LIB.XGDMatrixCreateFromFile(c_str(data),
int(silent),
ctypes.byref(self.handle)))
elif isinstance(data, scipy.sparse.csr_matrix): elif isinstance(data, scipy.sparse.csr_matrix):
self._init_from_csr(data) self._init_from_csr(data)
elif isinstance(data, scipy.sparse.csc_matrix): elif isinstance(data, scipy.sparse.csc_matrix):
@ -169,11 +168,12 @@ class DMatrix(object):
""" """
if len(csr.indices) != len(csr.data): if len(csr.indices) != len(csr.data):
raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data))) raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data)))
self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSR( self.handle = ctypes.c_void_p()
c_array(ctypes.c_ulong, csr.indptr), _check_call(_LIB.XGDMatrixCreateFromCSR(c_array(ctypes.c_ulong, csr.indptr),
c_array(ctypes.c_uint, csr.indices), c_array(ctypes.c_uint, csr.indices),
c_array(ctypes.c_float, csr.data), c_array(ctypes.c_float, csr.data),
len(csr.indptr), len(csr.data))) len(csr.indptr), len(csr.data),
ctypes.byref(self.handle)))
def _init_from_csc(self, csc): def _init_from_csc(self, csc):
""" """
@ -181,23 +181,26 @@ class DMatrix(object):
""" """
if len(csc.indices) != len(csc.data): if len(csc.indices) != len(csc.data):
raise ValueError('length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data))) raise ValueError('length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data)))
self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSC( self.handle = ctypes.c_void_p()
c_array(ctypes.c_ulong, csc.indptr), _check_call(_LIB.XGDMatrixCreateFromCSC(c_array(ctypes.c_ulong, csc.indptr),
c_array(ctypes.c_uint, csc.indices), c_array(ctypes.c_uint, csc.indices),
c_array(ctypes.c_float, csc.data), c_array(ctypes.c_float, csc.data),
len(csc.indptr), len(csc.data))) len(csc.indptr), len(csc.data),
ctypes.byref(self.handle)))
def _init_from_npy2d(self, mat, missing): def _init_from_npy2d(self, mat, missing):
""" """
Initialize data from a 2-D numpy matrix. Initialize data from a 2-D numpy matrix.
""" """
data = np.array(mat.reshape(mat.size), dtype=np.float32) data = np.array(mat.reshape(mat.size), dtype=np.float32)
self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromMat( self.handle = ctypes.c_void_p()
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), _check_call(_LIB.XGDMatrixCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
mat.shape[0], mat.shape[1], ctypes.c_float(missing))) mat.shape[0], mat.shape[1],
ctypes.c_float(missing),
ctypes.byref(self.handle)))
def __del__(self): def __del__(self):
_LIB.XGDMatrixFree(self.handle) _check_call(_LIB.XGDMatrixFree(self.handle))
def get_float_info(self, field): def get_float_info(self, field):
"""Get float property from the DMatrix. """Get float property from the DMatrix.
@ -213,7 +216,11 @@ class DMatrix(object):
a numpy array of float information of the data a numpy array of float information of the data
""" """
length = ctypes.c_ulong() length = ctypes.c_ulong()
ret = _LIB.XGDMatrixGetFloatInfo(self.handle, c_str(field), ctypes.byref(length)) ret = ctypes.POINTER(ctypes.c_float)()
_check_call(_LIB.XGDMatrixGetFloatInfo(self.handle,
c_str(field),
ctypes.byref(length),
ctypes.byref(ret)))
return ctypes2numpy(ret, length.value, np.float32) return ctypes2numpy(ret, length.value, np.float32)
def get_uint_info(self, field): def get_uint_info(self, field):
@ -230,7 +237,11 @@ class DMatrix(object):
a numpy array of float information of the data a numpy array of float information of the data
""" """
length = ctypes.c_ulong() length = ctypes.c_ulong()
ret = _LIB.XGDMatrixGetUIntInfo(self.handle, c_str(field), ctypes.byref(length)) ret = ctypes.POINTER(ctypes.c_uint)()
_check_call(_LIB.XGDMatrixGetUIntInfo(self.handle,
c_str(field),
ctypes.byref(length),
ctypes.byref(ret)))
return ctypes2numpy(ret, length.value, np.uint32) return ctypes2numpy(ret, length.value, np.uint32)
def set_float_info(self, field, data): def set_float_info(self, field, data):
@ -244,8 +255,10 @@ class DMatrix(object):
data: numpy array data: numpy array
The array ofdata to be set The array ofdata to be set
""" """
_LIB.XGDMatrixSetFloatInfo(self.handle, c_str(field), _check_call(_LIB.XGDMatrixSetFloatInfo(self.handle,
c_array(ctypes.c_float, data), len(data)) c_str(field),
c_array(ctypes.c_float, data),
len(data)))
def set_uint_info(self, field, data): def set_uint_info(self, field, data):
"""Set uint type property into the DMatrix. """Set uint type property into the DMatrix.
@ -258,8 +271,10 @@ class DMatrix(object):
data: numpy array data: numpy array
The array ofdata to be set The array ofdata to be set
""" """
_LIB.XGDMatrixSetUIntInfo(self.handle, c_str(field), _check_call(_LIB.XGDMatrixSetUIntInfo(self.handle,
c_array(ctypes.c_uint, data), len(data)) c_str(field),
c_array(ctypes.c_uint, data),
len(data)))
def save_binary(self, fname, silent=True): def save_binary(self, fname, silent=True):
"""Save DMatrix to an XGBoost buffer. """Save DMatrix to an XGBoost buffer.
@ -271,7 +286,9 @@ class DMatrix(object):
silent : bool (optional; default: True) silent : bool (optional; default: True)
If set, the output is suppressed. If set, the output is suppressed.
""" """
_LIB.XGDMatrixSaveBinary(self.handle, c_str(fname), int(silent)) _check_call(_LIB.XGDMatrixSaveBinary(self.handle,
c_str(fname),
int(silent)))
def set_label(self, label): def set_label(self, label):
"""Set label of dmatrix """Set label of dmatrix
@ -317,7 +334,9 @@ class DMatrix(object):
group : array like group : array like
Group size of each group Group size of each group
""" """
_LIB.XGDMatrixSetGroup(self.handle, c_array(ctypes.c_uint, group), len(group)) _check_call(_LIB.XGDMatrixSetGroup(self.handle,
c_array(ctypes.c_uint, group),
len(group)))
def get_label(self): def get_label(self):
"""Get the label of the DMatrix. """Get the label of the DMatrix.
@ -353,7 +372,10 @@ class DMatrix(object):
------- -------
number of rows : int number of rows : int
""" """
return _LIB.XGDMatrixNumRow(self.handle) ret = ctypes.c_ulong()
_check_call(_LIB.XGDMatrixNumRow(self.handle,
ctypes.byref(ret)))
return ret.value
def slice(self, rindex): def slice(self, rindex):
"""Slice the DMatrix and return a new DMatrix that only contains `rindex`. """Slice the DMatrix and return a new DMatrix that only contains `rindex`.
@ -369,8 +391,11 @@ class DMatrix(object):
A new DMatrix containing only selected indices. A new DMatrix containing only selected indices.
""" """
res = DMatrix(None) res = DMatrix(None)
res.handle = ctypes.c_void_p(_LIB.XGDMatrixSliceDMatrix( res.handle = ctypes.c_void_p()
self.handle, c_array(ctypes.c_int, rindex), len(rindex))) _check_call(_LIB.XGDMatrixSliceDMatrix(self.handle,
c_array(ctypes.c_int, rindex),
len(rindex),
ctypes.byref(res.handle)))
return res return res
@ -394,7 +419,8 @@ class Booster(object):
if not isinstance(d, DMatrix): if not isinstance(d, DMatrix):
raise TypeError('invalid cache item: {}'.format(type(d).__name__)) raise TypeError('invalid cache item: {}'.format(type(d).__name__))
dmats = c_array(ctypes.c_void_p, [d.handle for d in cache]) dmats = c_array(ctypes.c_void_p, [d.handle for d in cache])
self.handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, len(cache))) self.handle = ctypes.c_void_p()
_check_call(_LIB.XGBoosterCreate(dmats, len(cache), ctypes.byref(self.handle)))
self.set_param({'seed': 0}) self.set_param({'seed': 0})
self.set_param(params or {}) self.set_param(params or {})
if model_file is not None: if model_file is not None:
@ -419,10 +445,11 @@ class Booster(object):
if handle is not None: if handle is not None:
buf = handle buf = handle
dmats = c_array(ctypes.c_void_p, []) dmats = c_array(ctypes.c_void_p, [])
handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, 0)) handle = ctypes.c_void_p()
_check_call(_LIB.XGBoosterCreate(dmats, 0, ctypes.byref(handle)))
length = ctypes.c_ulong(len(buf)) length = ctypes.c_ulong(len(buf))
ptr = (ctypes.c_char * len(buf)).from_buffer(buf) ptr = (ctypes.c_char * len(buf)).from_buffer(buf)
_LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length) _check_call(_LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length))
state['handle'] = handle state['handle'] = handle
self.__dict__.update(state) self.__dict__.update(state)
self.set_param({'seed': 0}) self.set_param({'seed': 0})
@ -449,7 +476,7 @@ class Booster(object):
elif isinstance(params, STRING_TYPES) and value is not None: elif isinstance(params, STRING_TYPES) and value is not None:
params = [(params, value)] params = [(params, value)]
for key, val in params: for key, val in params:
_LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val))) _check_call(_LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val))))
def update(self, dtrain, iteration, fobj=None): def update(self, dtrain, iteration, fobj=None):
""" """
@ -467,7 +494,7 @@ class Booster(object):
if not isinstance(dtrain, DMatrix): if not isinstance(dtrain, DMatrix):
raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__))
if fobj is None: if fobj is None:
_LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle) _check_call(_LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle))
else: else:
pred = self.predict(dtrain) pred = self.predict(dtrain)
grad, hess = fobj(pred, dtrain) grad, hess = fobj(pred, dtrain)
@ -490,10 +517,10 @@ class Booster(object):
raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess))) raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess)))
if not isinstance(dtrain, DMatrix): if not isinstance(dtrain, DMatrix):
raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__))
_LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle, _check_call(_LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle,
c_array(ctypes.c_float, grad), c_array(ctypes.c_float, grad),
c_array(ctypes.c_float, hess), c_array(ctypes.c_float, hess),
len(grad)) len(grad)))
def eval_set(self, evals, iteration=0, feval=None): def eval_set(self, evals, iteration=0, feval=None):
# pylint: disable=invalid-name # pylint: disable=invalid-name
@ -520,7 +547,11 @@ class Booster(object):
raise TypeError('expected string, got {}'.format(type(d[1]).__name__)) raise TypeError('expected string, got {}'.format(type(d[1]).__name__))
dmats = c_array(ctypes.c_void_p, [d[0].handle for d in evals]) dmats = c_array(ctypes.c_void_p, [d[0].handle for d in evals])
evnames = c_array(ctypes.c_char_p, [c_str(d[1]) for d in evals]) evnames = c_array(ctypes.c_char_p, [c_str(d[1]) for d in evals])
return _LIB.XGBoosterEvalOneIter(self.handle, iteration, dmats, evnames, len(evals)) msg = ctypes.c_char_p()
_check_call(_LIB.XGBoosterEvalOneIter(self.handle, iteration,
dmats, evnames, len(evals),
ctypes.byref(msg)))
return msg.value
else: else:
res = '[%d]' % iteration res = '[%d]' % iteration
for dmat, evname in evals: for dmat, evname in evals:
@ -582,8 +613,11 @@ class Booster(object):
if pred_leaf: if pred_leaf:
option_mask |= 0x02 option_mask |= 0x02
length = ctypes.c_ulong() length = ctypes.c_ulong()
preds = _LIB.XGBoosterPredict(self.handle, data.handle, preds = ctypes.POINTER(ctypes.c_float)()
option_mask, ntree_limit, ctypes.byref(length)) _check_call(_LIB.XGBoosterPredict(self.handle, data.handle,
option_mask, ntree_limit,
ctypes.byref(length),
ctypes.byref(preds)))
preds = ctypes2numpy(preds, length.value, np.float32) preds = ctypes2numpy(preds, length.value, np.float32)
if pred_leaf: if pred_leaf:
preds = preds.astype(np.int32) preds = preds.astype(np.int32)
@ -602,7 +636,7 @@ class Booster(object):
Output file name Output file name
""" """
if isinstance(fname, STRING_TYPES): # assume file name if isinstance(fname, STRING_TYPES): # assume file name
_LIB.XGBoosterSaveModel(self.handle, c_str(fname)) _check_call(_LIB.XGBoosterSaveModel(self.handle, c_str(fname)))
else: else:
raise TypeError("fname must be a string") raise TypeError("fname must be a string")
@ -615,8 +649,10 @@ class Booster(object):
a in memory buffer represetation of the model a in memory buffer represetation of the model
""" """
length = ctypes.c_ulong() length = ctypes.c_ulong()
cptr = _LIB.XGBoosterGetModelRaw(self.handle, cptr = ctypes.POINTER(ctypes.c_char)()
ctypes.byref(length)) _check_call(_LIB.XGBoosterGetModelRaw(self.handle,
ctypes.byref(length),
ctypes.byref(cptr)))
return ctypes2buffer(cptr, length.value) return ctypes2buffer(cptr, length.value)
def load_model(self, fname): def load_model(self, fname):
@ -634,7 +670,7 @@ class Booster(object):
buf = fname buf = fname
length = ctypes.c_ulong(len(buf)) length = ctypes.c_ulong(len(buf))
ptr = (ctypes.c_char * len(buf)).from_buffer(buf) ptr = (ctypes.c_char * len(buf)).from_buffer(buf)
_LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length) _check_call(_LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length))
def dump_model(self, fout, fmap='', with_stats=False): def dump_model(self, fout, fmap='', with_stats=False):
""" """
@ -666,8 +702,12 @@ class Booster(object):
Returns the dump the model as a list of strings. Returns the dump the model as a list of strings.
""" """
length = ctypes.c_ulong() length = ctypes.c_ulong()
sarr = _LIB.XGBoosterDumpModel(self.handle, c_str(fmap), sarr = ctypes.POINTER(ctypes.c_char_p)()
int(with_stats), ctypes.byref(length)) _check_call(_LIB.XGBoosterDumpModel(self.handle,
c_str(fmap),
int(with_stats),
ctypes.byref(length),
ctypes.byref(sarr)))
res = [] res = []
for i in range(length.value): for i in range(length.value):
res.append(str(sarr[i].decode('ascii'))) res.append(str(sarr[i].decode('ascii')))

View File

@ -8,6 +8,7 @@
#include <cstring> #include <cstring>
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
#include <exception>
// include all std functions // include all std functions
using namespace std; using namespace std;
#include "./xgboost_wrapper.h" #include "./xgboost_wrapper.h"
@ -102,15 +103,79 @@ class Booster: public learner::BoostLearner {
using namespace xgboost::wrapper; using namespace xgboost::wrapper;
void* XGDMatrixCreateFromFile(const char *fname, int silent) { /*! \brief macro to guard beginning and end section of all functions */
return LoadDataMatrix(fname, silent != 0, false, false); #define API_BEGIN() try {
/*!
* \brief every function starts with API_BEGIN(); and finishes with API_END();
* \param Finalize optionally put in a finalizer
*/
#define API_END(Finalize) } catch(std::exception &e) { \
Finalize; return XGBHandleException(e); \
} return 0;
// do not use threadlocal on OSX since it is not always available
#ifndef DISABLE_THREAD_LOCAL
#ifdef __GNUC__
#define XGB_TREAD_LOCAL __thread
#elif __STDC_VERSION__ >= 201112L
#define XGB_TREAD_LOCAL _Thread_local
#elif defined(_MSC_VER)
#define XGB_TREAD_LOCAL __declspec(thread)
#endif
#endif
#ifndef XGB_TREAD_LOCAL
#pragma message("Warning: Threadlocal not enabled, used single thread error handling")
#define XGB_TREAD_LOCAL
#endif
/*!
* \brief a helper function for error handling
* will set the last error to be str_set when it is not NULL
* \param str_set the error to set
* \return a pointer message to last error
*/
const char *XGBSetGetLastError_(const char *str_set) {
// use last_error to record last error
static XGB_TREAD_LOCAL std::string last_error;
if (str_set != NULL) {
last_error = str_set;
} }
void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, return last_error.c_str();
}
/*! \brief return str message of the last error */
const char *XGBGetLastError() {
return XGBSetGetLastError_(NULL);
}
/*!
* \brief handle exception throwed out
* \param e the exception
* \return the return value of API after exception is handled
*/
int XGBHandleException(const std::exception &e) {
XGBSetGetLastError_(e.what());
return -1;
}
int XGDMatrixCreateFromFile(const char *fname,
int silent,
DMatrixHandle *out) {
API_BEGIN();
*out = LoadDataMatrix(fname, silent != 0, false, false);
API_END();
}
int XGDMatrixCreateFromCSR(const bst_ulong *indptr,
const unsigned *indices, const unsigned *indices,
const float *data, const float *data,
bst_ulong nindptr, bst_ulong nindptr,
bst_ulong nelem) { bst_ulong nelem,
DMatrixSimple *p_mat = new DMatrixSimple(); DMatrixHandle *out) {
DMatrixSimple *p_mat = NULL;
API_BEGIN();
p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat; DMatrixSimple &mat = *p_mat;
mat.row_ptr_.resize(nindptr); mat.row_ptr_.resize(nindptr);
for (bst_ulong i = 0; i < nindptr; ++i) { for (bst_ulong i = 0; i < nindptr; ++i) {
@ -123,20 +188,24 @@ void* XGDMatrixCreateFromCSR(const bst_ulong *indptr,
static_cast<size_t>(indices[i]+1)); static_cast<size_t>(indices[i]+1));
} }
mat.info.info.num_row = nindptr - 1; mat.info.info.num_row = nindptr - 1;
return p_mat; *out = p_mat;
API_END(delete p_mat);
} }
void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
const unsigned *indices, const unsigned *indices,
const float *data, const float *data,
bst_ulong nindptr, bst_ulong nindptr,
bst_ulong nelem) { bst_ulong nelem,
DMatrixHandle *out) {
DMatrixSimple *p_mat = NULL;
API_BEGIN();
int nthread; int nthread;
#pragma omp parallel #pragma omp parallel
{ {
nthread = omp_get_num_threads(); nthread = omp_get_num_threads();
} }
p_mat = new DMatrixSimple();
DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat; DMatrixSimple &mat = *p_mat;
utils::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_); utils::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
builder.InitBudget(0, nthread); builder.InitBudget(0, nthread);
@ -160,14 +229,19 @@ void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
} }
mat.info.info.num_row = mat.row_ptr_.size() - 1; mat.info.info.num_row = mat.row_ptr_.size() - 1;
mat.info.info.num_col = static_cast<size_t>(ncol); mat.info.info.num_col = static_cast<size_t>(ncol);
return p_mat; *out = p_mat;
API_END(delete p_mat);
} }
void* XGDMatrixCreateFromMat(const float *data,
int XGDMatrixCreateFromMat(const float *data,
bst_ulong nrow, bst_ulong nrow,
bst_ulong ncol, bst_ulong ncol,
float missing) { float missing,
DMatrixHandle *out) {
DMatrixSimple *p_mat = NULL;
API_BEGIN();
p_mat = new DMatrixSimple();
bool nan_missing = utils::CheckNAN(missing); bool nan_missing = utils::CheckNAN(missing);
DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat; DMatrixSimple &mat = *p_mat;
mat.info.info.num_row = nrow; mat.info.info.num_row = nrow;
mat.info.info.num_col = ncol; mat.info.info.num_col = ncol;
@ -186,11 +260,16 @@ void* XGDMatrixCreateFromMat(const float *data,
} }
mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem); mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
} }
return p_mat; *out = p_mat;
API_END(delete p_mat);
} }
void* XGDMatrixSliceDMatrix(void *handle,
int XGDMatrixSliceDMatrix(DMatrixHandle handle,
const int *idxset, const int *idxset,
bst_ulong len) { bst_ulong len,
DMatrixHandle *out) {
DMatrixSimple *p_ret = NULL;
API_BEGIN();
DMatrixSimple tmp; DMatrixSimple tmp;
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle); DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
if (dsrc.magic != DMatrixSimple::kMagic) { if (dsrc.magic != DMatrixSimple::kMagic) {
@ -198,7 +277,7 @@ void* XGDMatrixSliceDMatrix(void *handle,
} }
DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ? DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
*static_cast<DMatrixSimple*>(handle): tmp); *static_cast<DMatrixSimple*>(handle): tmp);
DMatrixSimple *p_ret = new DMatrixSimple(); p_ret = new DMatrixSimple();
DMatrixSimple &ret = *p_ret; DMatrixSimple &ret = *p_ret;
utils::Check(src.info.group_ptr.size() == 0, utils::Check(src.info.group_ptr.size() == 0,
@ -232,82 +311,151 @@ void* XGDMatrixSliceDMatrix(void *handle,
ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]); ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]);
} }
} }
return p_ret; *out = p_ret;
API_END(delete p_ret);
} }
void XGDMatrixFree(void *handle) {
int XGDMatrixFree(DMatrixHandle handle) {
API_BEGIN();
delete static_cast<DataMatrix*>(handle); delete static_cast<DataMatrix*>(handle);
API_END();
} }
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
int XGDMatrixSaveBinary(DMatrixHandle handle,
const char *fname,
int silent) {
API_BEGIN();
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent != 0); SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent != 0);
API_END();
} }
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) {
int XGDMatrixSetFloatInfo(DMatrixHandle handle,
const char *field,
const float *info,
bst_ulong len) {
API_BEGIN();
std::vector<float> &vec = std::vector<float> &vec =
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field); static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
vec.resize(len); vec.resize(len);
memcpy(BeginPtr(vec), info, sizeof(float) * len); memcpy(BeginPtr(vec), info, sizeof(float) * len);
API_END();
} }
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) {
int XGDMatrixSetUIntInfo(DMatrixHandle handle,
const char *field,
const unsigned *info,
bst_ulong len) {
API_BEGIN();
std::vector<unsigned> &vec = std::vector<unsigned> &vec =
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field); static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
vec.resize(len); vec.resize(len);
memcpy(BeginPtr(vec), info, sizeof(unsigned) * len); memcpy(BeginPtr(vec), info, sizeof(unsigned) * len);
API_END();
} }
void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) {
int XGDMatrixSetGroup(DMatrixHandle handle,
const unsigned *group,
bst_ulong len) {
API_BEGIN();
DataMatrix *pmat = static_cast<DataMatrix*>(handle); DataMatrix *pmat = static_cast<DataMatrix*>(handle);
pmat->info.group_ptr.resize(len + 1); pmat->info.group_ptr.resize(len + 1);
pmat->info.group_ptr[0] = 0; pmat->info.group_ptr[0] = 0;
for (uint64_t i = 0; i < len; ++i) { for (uint64_t i = 0; i < len; ++i) {
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i]; pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i];
} }
API_END();
} }
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) {
int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
const char *field,
bst_ulong *out_len,
const float **out_dptr) {
API_BEGIN();
const std::vector<float> &vec = const std::vector<float> &vec =
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field); static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
*len = static_cast<bst_ulong>(vec.size()); *out_len = static_cast<bst_ulong>(vec.size());
return BeginPtr(vec); *out_dptr = BeginPtr(vec);
API_END();
} }
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) {
int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
const char *field,
bst_ulong *out_len,
const unsigned **out_dptr) {
API_BEGIN();
const std::vector<unsigned> &vec = const std::vector<unsigned> &vec =
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field); static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
*len = static_cast<bst_ulong>(vec.size()); *out_len = static_cast<bst_ulong>(vec.size());
return BeginPtr(vec); *out_dptr = BeginPtr(vec);
API_END();
} }
bst_ulong XGDMatrixNumRow(const void *handle) { int XGDMatrixNumRow(const DMatrixHandle handle,
return static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row()); bst_ulong *out) {
API_BEGIN();
*out = static_cast<bst_ulong>(static_cast<const DataMatrix*>(handle)->info.num_row());
API_END();
} }
// xgboost implementation // xgboost implementation
void *XGBoosterCreate(void *dmats[], bst_ulong len) { int XGBoosterCreate(DMatrixHandle dmats[],
bst_ulong len,
BoosterHandle *out) {
API_BEGIN();
std::vector<DataMatrix*> mats; std::vector<DataMatrix*> mats;
for (bst_ulong i = 0; i < len; ++i) { for (bst_ulong i = 0; i < len; ++i) {
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]); DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
mats.push_back(dtr); mats.push_back(dtr);
} }
return new Booster(mats); *out = new Booster(mats);
API_END();
} }
void XGBoosterFree(void *handle) {
int XGBoosterFree(BoosterHandle handle) {
API_BEGIN();
delete static_cast<Booster*>(handle); delete static_cast<Booster*>(handle);
API_END();
} }
void XGBoosterSetParam(void *handle, const char *name, const char *value) {
int XGBoosterSetParam(BoosterHandle handle,
const char *name, const char *value) {
API_BEGIN();
static_cast<Booster*>(handle)->SetParam(name, value); static_cast<Booster*>(handle)->SetParam(name, value);
API_END();
} }
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
int XGBoosterUpdateOneIter(BoosterHandle handle,
int iter,
DMatrixHandle dtrain) {
API_BEGIN();
Booster *bst = static_cast<Booster*>(handle); Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain); DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInitModel(); bst->CheckInitModel();
bst->CheckInit(dtr); bst->CheckInit(dtr);
bst->UpdateOneIter(iter, *dtr); bst->UpdateOneIter(iter, *dtr);
API_END();
} }
void XGBoosterBoostOneIter(void *handle, void *dtrain,
float *grad, float *hess, bst_ulong len) { int XGBoosterBoostOneIter(BoosterHandle handle,
DMatrixHandle dtrain,
float *grad,
float *hess,
bst_ulong len) {
API_BEGIN();
Booster *bst = static_cast<Booster*>(handle); Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain); DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInitModel(); bst->CheckInitModel();
bst->CheckInit(dtr); bst->CheckInit(dtr);
bst->BoostOneIter(*dtr, grad, hess, len); bst->BoostOneIter(*dtr, grad, hess, len);
API_END();
} }
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
const char *evnames[], bst_ulong len) { int XGBoosterEvalOneIter(BoosterHandle handle,
int iter,
DMatrixHandle dmats[],
const char *evnames[],
bst_ulong len,
const char **out_str) {
API_BEGIN();
Booster *bst = static_cast<Booster*>(handle); Booster *bst = static_cast<Booster*>(handle);
std::vector<std::string> names; std::vector<std::string> names;
std::vector<const DataMatrix*> mats; std::vector<const DataMatrix*> mats;
@ -317,32 +465,64 @@ const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
} }
bst->CheckInitModel(); bst->CheckInitModel();
bst->eval_str = bst->EvalOneIter(iter, mats, names); bst->eval_str = bst->EvalOneIter(iter, mats, names);
return bst->eval_str.c_str(); *out_str = bst->eval_str.c_str();
API_END();
} }
const float *XGBoosterPredict(void *handle, void *dmat, int option_mask,
unsigned ntree_limit, bst_ulong *len) { int XGBoosterPredict(BoosterHandle handle,
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), DMatrixHandle dmat,
int option_mask,
unsigned ntree_limit,
bst_ulong *len,
const float **out_result) {
API_BEGIN();
*out_result = static_cast<Booster*>(handle)->
Pred(*static_cast<DataMatrix*>(dmat),
option_mask, ntree_limit, len); option_mask, ntree_limit, len);
API_END();
} }
void XGBoosterLoadModel(void *handle, const char *fname) {
int XGBoosterLoadModel(BoosterHandle handle, const char *fname) {
API_BEGIN();
static_cast<Booster*>(handle)->LoadModel(fname); static_cast<Booster*>(handle)->LoadModel(fname);
API_END();
} }
void XGBoosterSaveModel(void *handle, const char *fname) {
int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
API_BEGIN();
Booster *bst = static_cast<Booster*>(handle); Booster *bst = static_cast<Booster*>(handle);
bst->CheckInitModel(); bst->CheckInitModel();
bst->SaveModel(fname, false); bst->SaveModel(fname, false);
API_END();
} }
void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len) {
int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
const void *buf,
bst_ulong len) {
API_BEGIN();
static_cast<Booster*>(handle)->LoadModelFromBuffer(buf, len); static_cast<Booster*>(handle)->LoadModelFromBuffer(buf, len);
API_END();
} }
const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len) {
return static_cast<Booster*>(handle)->GetModelRaw(out_len); int XGBoosterGetModelRaw(BoosterHandle handle,
bst_ulong *out_len,
const char **out_dptr) {
API_BEGIN();
*out_dptr = static_cast<Booster*>(handle)->GetModelRaw(out_len);
API_END();
} }
const char** XGBoosterDumpModel(void *handle, const char *fmap,
int with_stats, bst_ulong *len) { int XGBoosterDumpModel(BoosterHandle handle,
const char *fmap,
int with_stats,
bst_ulong *len,
const char ***out_models) {
API_BEGIN();
utils::FeatMap featmap; utils::FeatMap featmap;
if (strlen(fmap) != 0) { if (strlen(fmap) != 0) {
featmap.LoadText(fmap); featmap.LoadText(fmap);
} }
return static_cast<Booster*>(handle)->GetModelDump(featmap, with_stats != 0, len); *out_models = static_cast<Booster*>(handle)->GetModelDump(
featmap, with_stats != 0, len);
API_END();
} }

View File

@ -8,24 +8,46 @@
#ifndef XGBOOST_WRAPPER_H_ #ifndef XGBOOST_WRAPPER_H_
#define XGBOOST_WRAPPER_H_ #define XGBOOST_WRAPPER_H_
#ifdef __cplusplus
#define XGB_EXTERN_C extern "C"
#endif
#if defined(_MSC_VER) || defined(_WIN32) #if defined(_MSC_VER) || defined(_WIN32)
#define XGB_DLL __declspec(dllexport) #define XGB_DLL XGB_EXTERN_C __declspec(dllexport)
#else #else
#define XGB_DLL #define XGB_DLL XGB_EXTERN_C
#endif #endif
// manually define unsign long // manually define unsign long
typedef unsigned long bst_ulong; // NOLINT(*) typedef unsigned long bst_ulong; // NOLINT(*)
#ifdef __cplusplus /*! \brief handle to DMatrix */
extern "C" { typedef void *DMatrixHandle;
#endif /*! \brief handle to Booster */
typedef void *BoosterHandle;
/*!
* \brief get string message of the last error
*
* all function in this file will return 0 when success
* and -1 when an error occured,
* XGBGetLastError can be called to retrieve the error
*
* this function is threadsafe and can be called by different thread
* \return const char* error inforomation
*/
XGB_DLL const char *XGBGetLastError();
/*! /*!
* \brief load a data matrix * \brief load a data matrix
* \param fname the name of the file * \param fname the name of the file
* \param silent whether print messages during loading * \param silent whether print messages during loading
* \return a loaded data matrix * \param out a loaded data matrix
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent); XGB_DLL int XGDMatrixCreateFromFile(const char *fname,
int silent,
DMatrixHandle *out);
/*! /*!
* \brief create a matrix content from csr format * \brief create a matrix content from csr format
* \param indptr pointer to row headers * \param indptr pointer to row headers
@ -33,13 +55,15 @@ extern "C" {
* \param data fvalue * \param data fvalue
* \param nindptr number of rows in the matix + 1 * \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix * \param nelem number of nonzero elements in the matrix
* \return created dmatrix * \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, XGB_DLL int XGDMatrixCreateFromCSR(const bst_ulong *indptr,
const unsigned *indices, const unsigned *indices,
const float *data, const float *data,
bst_ulong nindptr, bst_ulong nindptr,
bst_ulong nelem); bst_ulong nelem,
DMatrixHandle *out);
/*! /*!
* \brief create a matrix content from CSC format * \brief create a matrix content from CSC format
* \param col_ptr pointer to col headers * \param col_ptr pointer to col headers
@ -47,119 +71,157 @@ extern "C" {
* \param data fvalue * \param data fvalue
* \param nindptr number of rows in the matix + 1 * \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix * \param nelem number of nonzero elements in the matrix
* \return created dmatrix * \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, XGB_DLL int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
const unsigned *indices, const unsigned *indices,
const float *data, const float *data,
bst_ulong nindptr, bst_ulong nindptr,
bst_ulong nelem); bst_ulong nelem,
DMatrixHandle *out);
/*! /*!
* \brief create matrix content from dense matrix * \brief create matrix content from dense matrix
* \param data pointer to the data space * \param data pointer to the data space
* \param nrow number of rows * \param nrow number of rows
* \param ncol number columns * \param ncol number columns
* \param missing which value to represent missing value * \param missing which value to represent missing value
* \return created dmatrix * \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void* XGDMatrixCreateFromMat(const float *data, XGB_DLL int XGDMatrixCreateFromMat(const float *data,
bst_ulong nrow, bst_ulong nrow,
bst_ulong ncol, bst_ulong ncol,
float missing); float missing,
DMatrixHandle *out);
/*! /*!
* \brief create a new dmatrix from sliced content of existing matrix * \brief create a new dmatrix from sliced content of existing matrix
* \param handle instance of data matrix to be sliced * \param handle instance of data matrix to be sliced
* \param idxset index set * \param idxset index set
* \param len length of index set * \param len length of index set
* \return a sliced new matrix * \param out a sliced new matrix
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void* XGDMatrixSliceDMatrix(void *handle, XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle,
const int *idxset, const int *idxset,
bst_ulong len); bst_ulong len,
DMatrixHandle *out);
/*! /*!
* \brief free space in data matrix * \brief free space in data matrix
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGDMatrixFree(void *handle); XGB_DLL int XGDMatrixFree(void *handle);
/*! /*!
* \brief load a data matrix into binary file * \brief load a data matrix into binary file
* \param handle a instance of data matrix * \param handle a instance of data matrix
* \param fname file name * \param fname file name
* \param silent print statistics when saving * \param silent print statistics when saving
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGDMatrixSaveBinary(void *handle, const char *fname, int silent); XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle,
const char *fname, int silent);
/*! /*!
* \brief set float vector to a content in info * \brief set float vector to a content in info
* \param handle a instance of data matrix * \param handle a instance of data matrix
* \param field field name, can be label, weight * \param field field name, can be label, weight
* \param array pointer to float vector * \param array pointer to float vector
* \param len length of array * \param len length of array
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
const float *array, bst_ulong len); const char *field,
const float *array,
bst_ulong len);
/*! /*!
* \brief set uint32 vector to a content in info * \brief set uint32 vector to a content in info
* \param handle a instance of data matrix * \param handle a instance of data matrix
* \param field field name * \param field field name
* \param array pointer to float vector * \param array pointer to float vector
* \param len length of array * \param len length of array
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
const unsigned *array, bst_ulong len); const char *field,
const unsigned *array,
bst_ulong len);
/*! /*!
* \brief set label of the training matrix * \brief set label of the training matrix
* \param handle a instance of data matrix * \param handle a instance of data matrix
* \param group pointer to group size * \param group pointer to group size
* \param len length of array * \param len length of array
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len); XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
const unsigned *group,
bst_ulong len);
/*! /*!
* \brief get float info vector from matrix * \brief get float info vector from matrix
* \param handle a instance of data matrix * \param handle a instance of data matrix
* \param field field name * \param field field name
* \param out_len used to set result length * \param out_len used to set result length
* \return pointer to the result * \param out_dptr pointer to the result
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
const char *field, bst_ulong* out_len); const char *field,
bst_ulong* out_len,
const float **out_dptr);
/*! /*!
* \brief get uint32 info vector from matrix * \brief get uint32 info vector from matrix
* \param handle a instance of data matrix * \param handle a instance of data matrix
* \param field field name * \param field field name
* \param out_len used to set result length * \param out_ptr pointer to the result
* \return pointer to the result * \return 0 when success, -1 when failure happens
*/ */
XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
const char *field, bst_ulong* out_len); const char *field,
bst_ulong* out_len,
const unsigned **out_dptr);
/*! /*!
* \brief return number of rows * \brief get number of rows
* \param handle the handle to the DMatrix
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle); XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle,
bst_ulong *out);
// --- start XGBoost class // --- start XGBoost class
/*! /*!
* \brief create xgboost learner * \brief create xgboost learner
* \param dmats matrices that are set to be cached * \param dmats matrices that are set to be cached
* \param len length of dmats * \param len length of dmats
* \param out handle to the result booster
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len); XGB_DLL int XGBoosterCreate(void* dmats[],
bst_ulong len,
BoosterHandle *out);
/*! /*!
* \brief free obj in handle * \brief free obj in handle
* \param handle handle to be freed * \param handle handle to be freed
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGBoosterFree(void* handle); XGB_DLL int XGBoosterFree(BoosterHandle handle);
/*! /*!
* \brief set parameters * \brief set parameters
* \param handle handle * \param handle handle
* \param name parameter name * \param name parameter name
* \param val value of parameter * \param val value of parameter
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value); XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
const char *name,
const char *value);
/*! /*!
* \brief update the model in one round using dtrain * \brief update the model in one round using dtrain
* \param handle handle * \param handle handle
* \param iter current iteration rounds * \param iter current iteration rounds
* \param dtrain training data * \param dtrain training data
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain); XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
int iter,
DMatrixHandle dtrain);
/*! /*!
* \brief update the model, by directly specify gradient and second order gradient, * \brief update the model, by directly specify gradient and second order gradient,
* this can be used to replace UpdateOneIter, to support customized loss function * this can be used to replace UpdateOneIter, to support customized loss function
@ -168,9 +230,13 @@ extern "C" {
* \param grad gradient statistics * \param grad gradient statistics
* \param hess second order gradient statistics * \param hess second order gradient statistics
* \param len length of grad/hess array * \param len length of grad/hess array
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain, XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
float *grad, float *hess, bst_ulong len); DMatrixHandle dtrain,
float *grad,
float *hess,
bst_ulong len);
/*! /*!
* \brief get evaluation statistics for xgboost * \brief get evaluation statistics for xgboost
* \param handle handle * \param handle handle
@ -178,10 +244,15 @@ extern "C" {
* \param dmats pointers to data to be evaluated * \param dmats pointers to data to be evaluated
* \param evnames pointers to names of each data * \param evnames pointers to names of each data
* \param len length of dmats * \param len length of dmats
* \return the string containing evaluation stati * \param out_result the string containing evaluation statistics
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
const char *evnames[], bst_ulong len); int iter,
DMatrixHandle dmats[],
const char *evnames[],
bst_ulong len,
const char **out_result);
/*! /*!
* \brief make prediction based on dmat * \brief make prediction based on dmat
* \param handle handle * \param handle handle
@ -192,50 +263,65 @@ extern "C" {
* 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree * 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
* when the parameter is set to 0, we will use all the trees * when the parameter is set to 0, we will use all the trees
* \param len used to store length of returning result * \param out_len used to store length of returning result
* \param out_result used to set a pointer to array
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, XGB_DLL int XGBoosterPredict(BoosterHandle handle,
DMatrixHandle dmat,
int option_mask, int option_mask,
unsigned ntree_limit, unsigned ntree_limit,
bst_ulong *len); bst_ulong *out_len,
const float **out_result);
/*! /*!
* \brief load model from existing file * \brief load model from existing file
* \param handle handle * \param handle handle
* \param fname file name * \param fname file name
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGBoosterLoadModel(void *handle, const char *fname); XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
const char *fname);
/*! /*!
* \brief save model into existing file * \brief save model into existing file
* \param handle handle * \param handle handle
* \param fname file name * \param fname file name
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGBoosterSaveModel(void *handle, const char *fname); XGB_DLL int XGBoosterSaveModel(BoosterHandle handle,
const char *fname);
/*! /*!
* \brief load model from in memory buffer * \brief load model from in memory buffer
* \param handle handle * \param handle handle
* \param buf pointer to the buffer * \param buf pointer to the buffer
* \param len the length of the buffer * \param len the length of the buffer
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len); XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
const void *buf,
bst_ulong len);
/*! /*!
* \brief save model into binary raw bytes, return header of the array * \brief save model into binary raw bytes, return header of the array
* user must copy the result out, before next xgboost call * user must copy the result out, before next xgboost call
* \param handle handle * \param handle handle
* \param out_len the argument to hold the output length * \param out_len the argument to hold the output length
* \return the pointer to the beginning of binary buffer * \param out_dptr the argument to hold the output data pointer
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len); XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle,
bst_ulong *out_len,
const char **out_dptr);
/*! /*!
* \brief dump model, return array of strings representing model dump * \brief dump model, return array of strings representing model dump
* \param handle handle * \param handle handle
* \param fmap name to fmap can be empty string * \param fmap name to fmap can be empty string
* \param with_stats whether to dump with statistics * \param with_stats whether to dump with statistics
* \param out_len length of output array * \param out_len length of output array
* \return char *data[], representing dump of each model * \param out_dump_array pointer to hold representing dump of each model
* \return 0 when success, -1 when failure happens
*/ */
XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap, XGB_DLL int XGBoosterDumpModel(BoosterHandle handle,
int with_stats, bst_ulong *out_len); const char *fmap,
#ifdef __cplusplus int with_stats,
} bst_ulong *out_len,
#endif const char ***out_dump_array);
#endif // XGBOOST_WRAPPER_H_ #endif // XGBOOST_WRAPPER_H_