chg size_t to uint64_t

This commit is contained in:
tqchen 2014-08-26 19:12:51 -07:00
parent 3c1ed847fb
commit 97467fe807
3 changed files with 55 additions and 51 deletions

View File

@ -119,7 +119,7 @@ extern "C" {
} }
} }
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
size_t olen; uint64_t olen;
const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle),
CHAR(asChar(field)), &olen); CHAR(asChar(field)), &olen);
SEXP ret = PROTECT(allocVector(REALSXP, olen)); SEXP ret = PROTECT(allocVector(REALSXP, olen));
@ -188,7 +188,7 @@ extern "C" {
&vec_dmats[0], &vec_sptr[0], len)); &vec_dmats[0], &vec_sptr[0], len));
} }
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) { SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
size_t olen; uint64_t olen;
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat), R_ExternalPtrAddr(dmat),
asInteger(output_margin), asInteger(output_margin),
@ -207,7 +207,7 @@ extern "C" {
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
} }
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) { void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
size_t olen; uint64_t olen;
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle), const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)), CHAR(asChar(fmap)),
&olen); &olen);

View File

@ -23,14 +23,14 @@ class Booster: public learner::BoostLearner<FMatrixS> {
this->init_model = false; this->init_model = false;
this->SetCacheData(mats); this->SetCacheData(mats);
} }
const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) { const float *Pred(const DataMatrix &dmat, int output_margin, uint64_t *len) {
this->CheckInitModel(); this->CheckInitModel();
this->Predict(dmat, output_margin, &this->preds_); this->Predict(dmat, output_margin, &this->preds_);
*len = this->preds_.size(); *len = this->preds_.size();
return &this->preds_[0]; return &this->preds_[0];
} }
inline void BoostOneIter(const DataMatrix &train, inline void BoostOneIter(const DataMatrix &train,
float *grad, float *hess, size_t len) { float *grad, float *hess, uint64_t len) {
this->gpair_.resize(len); this->gpair_.resize(len);
const unsigned ndata = static_cast<unsigned>(len); const unsigned ndata = static_cast<unsigned>(len);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
@ -48,7 +48,7 @@ class Booster: public learner::BoostLearner<FMatrixS> {
learner::BoostLearner<FMatrixS>::LoadModel(fname); learner::BoostLearner<FMatrixS>::LoadModel(fname);
this->init_model = true; this->init_model = true;
} }
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) { inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, uint64_t *len) {
model_dump = this->DumpModel(fmap, with_stats); model_dump = this->DumpModel(fmap, with_stats);
model_dump_cptr.resize(model_dump.size()); model_dump_cptr.resize(model_dump.size());
for (size_t i = 0; i < model_dump.size(); ++i) { for (size_t i = 0; i < model_dump.size(); ++i) {
@ -76,35 +76,37 @@ extern "C"{
void* XGDMatrixCreateFromFile(const char *fname, int silent) { void* XGDMatrixCreateFromFile(const char *fname, int silent) {
return LoadDataMatrix(fname, silent, false); return LoadDataMatrix(fname, silent, false);
} }
void* XGDMatrixCreateFromCSR(const size_t *indptr, void* XGDMatrixCreateFromCSR(const uint64_t *indptr,
const unsigned *indices, const unsigned *indices,
const float *data, const float *data,
size_t nindptr, uint64_t nindptr,
size_t nelem) { uint64_t nelem) {
DMatrixSimple *p_mat = new DMatrixSimple(); DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat; DMatrixSimple &mat = *p_mat;
mat.row_ptr_.resize(nindptr); mat.row_ptr_.resize(nindptr);
memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr); for (uint64_t i = 0; i < nindptr; ++i) {
mat.row_ptr_[i] = static_cast<size_t>(indptr[i]);
}
mat.row_data_.resize(nelem); mat.row_data_.resize(nelem);
for (size_t i = 0; i < nelem; ++i) { for (uint64_t i = 0; i < nelem; ++i) {
mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]); mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
mat.info.info.num_col = std::max(mat.info.info.num_col, mat.info.info.num_col = std::max(mat.info.info.num_col,
static_cast<size_t>(indices[i]+1)); static_cast<uint64_t>(indices[i]+1));
} }
mat.info.info.num_row = nindptr - 1; mat.info.info.num_row = nindptr - 1;
return p_mat; return p_mat;
} }
void* XGDMatrixCreateFromMat(const float *data, void* XGDMatrixCreateFromMat(const float *data,
size_t nrow, uint64_t nrow,
size_t ncol, uint64_t ncol,
float missing) { float missing) {
DMatrixSimple *p_mat = new DMatrixSimple(); DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat; DMatrixSimple &mat = *p_mat;
mat.info.info.num_row = nrow; mat.info.info.num_row = nrow;
mat.info.info.num_col = ncol; mat.info.info.num_col = ncol;
for (size_t i = 0; i < nrow; ++i, data += ncol) { for (uint64_t i = 0; i < nrow; ++i, data += ncol) {
size_t nelem = 0; uint64_t nelem = 0;
for (size_t j = 0; j < ncol; ++j) { for (uint64_t j = 0; j < ncol; ++j) {
if (data[j] != missing) { if (data[j] != missing) {
mat.row_data_.push_back(SparseBatch::Entry(j, data[j])); mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
++nelem; ++nelem;
@ -116,7 +118,7 @@ extern "C"{
} }
void* XGDMatrixSliceDMatrix(void *handle, void* XGDMatrixSliceDMatrix(void *handle,
const int *idxset, const int *idxset,
size_t len) { uint64_t len) {
DMatrixSimple tmp; DMatrixSimple tmp;
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle); DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
if (dsrc.magic != DMatrixSimple::kMagic) { if (dsrc.magic != DMatrixSimple::kMagic) {
@ -137,10 +139,10 @@ extern "C"{
iter->BeforeFirst(); iter->BeforeFirst();
utils::Assert(iter->Next(), "slice"); utils::Assert(iter->Next(), "slice");
const SparseBatch &batch = iter->Value(); const SparseBatch &batch = iter->Value();
for (size_t i = 0; i < len; ++i) { for (uint64_t i = 0; i < len; ++i) {
const int ridx = idxset[i]; const int ridx = idxset[i];
SparseBatch::Inst inst = batch[ridx]; SparseBatch::Inst inst = batch[ridx];
utils::Check(static_cast<size_t>(ridx) < batch.size, "slice index exceed number of rows"); utils::Check(static_cast<uint64_t>(ridx) < batch.size, "slice index exceed number of rows");
ret.row_data_.resize(ret.row_data_.size() + inst.length); ret.row_data_.resize(ret.row_data_.size() + inst.length);
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data, memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
sizeof(SparseBatch::Entry) * inst.length); sizeof(SparseBatch::Entry) * inst.length);
@ -163,46 +165,46 @@ extern "C"{
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) { void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent); SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
} }
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) { void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, uint64_t len) {
std::vector<float> &vec = std::vector<float> &vec =
static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field); static_cast<DataMatrix*>(handle)->info.GetFloatInfo(field);
vec.resize(len); vec.resize(len);
memcpy(&vec[0], info, sizeof(float) * len); memcpy(&vec[0], info, sizeof(float) * len);
} }
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, size_t len) { void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, uint64_t len) {
std::vector<unsigned> &vec = std::vector<unsigned> &vec =
static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field); static_cast<DataMatrix*>(handle)->info.GetUIntInfo(field);
vec.resize(len); vec.resize(len);
memcpy(&vec[0], info, sizeof(unsigned) * len); memcpy(&vec[0], info, sizeof(unsigned) * len);
} }
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) { void XGDMatrixSetGroup(void *handle, const unsigned *group, uint64_t len) {
DataMatrix *pmat = static_cast<DataMatrix*>(handle); DataMatrix *pmat = static_cast<DataMatrix*>(handle);
pmat->info.group_ptr.resize(len + 1); pmat->info.group_ptr.resize(len + 1);
pmat->info.group_ptr[0] = 0; pmat->info.group_ptr[0] = 0;
for (size_t i = 0; i < len; ++i) { for (uint64_t i = 0; i < len; ++i) {
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i]; pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
} }
} }
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) { const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, uint64_t* len) {
const std::vector<float> &vec = const std::vector<float> &vec =
static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field); static_cast<const DataMatrix*>(handle)->info.GetFloatInfo(field);
*len = vec.size(); *len = vec.size();
return &vec[0]; return &vec[0];
} }
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, size_t* len) { const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* len) {
const std::vector<unsigned> &vec = const std::vector<unsigned> &vec =
static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field); static_cast<const DataMatrix*>(handle)->info.GetUIntInfo(field);
*len = vec.size(); *len = vec.size();
return &vec[0]; return &vec[0];
} }
size_t XGDMatrixNumRow(const void *handle) { uint64_t XGDMatrixNumRow(const void *handle) {
return static_cast<const DataMatrix*>(handle)->info.num_row(); return static_cast<const DataMatrix*>(handle)->info.num_row();
} }
// xgboost implementation // xgboost implementation
void *XGBoosterCreate(void *dmats[], size_t len) { void *XGBoosterCreate(void *dmats[], uint64_t len) {
std::vector<DataMatrix*> mats; std::vector<DataMatrix*> mats;
for (size_t i = 0; i < len; ++i) { for (uint64_t i = 0; i < len; ++i) {
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]); DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
mats.push_back(dtr); mats.push_back(dtr);
} }
@ -222,7 +224,7 @@ extern "C"{
bst->UpdateOneIter(iter, *dtr); bst->UpdateOneIter(iter, *dtr);
} }
void XGBoosterBoostOneIter(void *handle, void *dtrain, void XGBoosterBoostOneIter(void *handle, void *dtrain,
float *grad, float *hess, size_t len) { float *grad, float *hess, uint64_t len) {
Booster *bst = static_cast<Booster*>(handle); Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain); DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInitModel(); bst->CheckInitModel();
@ -230,11 +232,11 @@ extern "C"{
bst->BoostOneIter(*dtr, grad, hess, len); bst->BoostOneIter(*dtr, grad, hess, len);
} }
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
const char *evnames[], size_t len) { const char *evnames[], uint64_t len) {
Booster *bst = static_cast<Booster*>(handle); Booster *bst = static_cast<Booster*>(handle);
std::vector<std::string> names; std::vector<std::string> names;
std::vector<const DataMatrix*> mats; std::vector<const DataMatrix*> mats;
for (size_t i = 0; i < len; ++i) { for (uint64_t i = 0; i < len; ++i) {
mats.push_back(static_cast<DataMatrix*>(dmats[i])); mats.push_back(static_cast<DataMatrix*>(dmats[i]));
names.push_back(std::string(evnames[i])); names.push_back(std::string(evnames[i]));
} }
@ -242,7 +244,7 @@ extern "C"{
bst->eval_str = bst->EvalOneIter(iter, mats, names); bst->eval_str = bst->EvalOneIter(iter, mats, names);
return bst->eval_str.c_str(); return bst->eval_str.c_str();
} }
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) { const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len) {
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len); return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
} }
void XGBoosterLoadModel(void *handle, const char *fname) { void XGBoosterLoadModel(void *handle, const char *fname) {
@ -251,7 +253,7 @@ extern "C"{
void XGBoosterSaveModel(const void *handle, const char *fname) { void XGBoosterSaveModel(const void *handle, const char *fname) {
static_cast<const Booster*>(handle)->SaveModel(fname); static_cast<const Booster*>(handle)->SaveModel(fname);
} }
const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){ const char** XGBoosterDumpModel(void *handle, const char *fmap, uint64_t *len){
utils::FeatMap featmap; utils::FeatMap featmap;
if (strlen(fmap) != 0) { if (strlen(fmap) != 0) {
featmap.LoadText(fmap); featmap.LoadText(fmap);

View File

@ -7,6 +7,8 @@
* can be used to create wrapper of other languages * can be used to create wrapper of other languages
*/ */
#include <cstdio> #include <cstdio>
// define uint64_t to be unsigned long
typedef unsigned long uint64_t;
extern "C" { extern "C" {
/*! /*!
@ -23,11 +25,11 @@ extern "C" {
* \param nelem number of nonzero elements in the matrix * \param nelem number of nonzero elements in the matrix
* \return created dmatrix * \return created dmatrix
*/ */
void* XGDMatrixCreateFromCSR(const size_t *indptr, void* XGDMatrixCreateFromCSR(const uint64_t *indptr,
const unsigned *indices, const unsigned *indices,
const float *data, const float *data,
size_t nindptr, uint64_t nindptr,
size_t nelem); uint64_t nelem);
/*! /*!
* \brief create matrix content from dense matrix * \brief create matrix content from dense matrix
* \param data pointer to the data space * \param data pointer to the data space
@ -37,8 +39,8 @@ extern "C" {
* \return created dmatrix * \return created dmatrix
*/ */
void* XGDMatrixCreateFromMat(const float *data, void* XGDMatrixCreateFromMat(const float *data,
size_t nrow, uint64_t nrow,
size_t ncol, uint64_t ncol,
float missing); float missing);
/*! /*!
* \brief create a new dmatrix from sliced content of existing matrix * \brief create a new dmatrix from sliced content of existing matrix
@ -49,7 +51,7 @@ extern "C" {
*/ */
void* XGDMatrixSliceDMatrix(void *handle, void* XGDMatrixSliceDMatrix(void *handle,
const int *idxset, const int *idxset,
size_t len); uint64_t len);
/*! /*!
* \brief free space in data matrix * \brief free space in data matrix
*/ */
@ -68,7 +70,7 @@ extern "C" {
* \param array pointer to float vector * \param array pointer to float vector
* \param len length of array * \param len length of array
*/ */
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len); void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, uint64_t len);
/*! /*!
* \brief set uint32 vector to a content in info * \brief set uint32 vector to a content in info
* \param handle a instance of data matrix * \param handle a instance of data matrix
@ -76,14 +78,14 @@ extern "C" {
* \param array pointer to float vector * \param array pointer to float vector
* \param len length of array * \param len length of array
*/ */
void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, size_t len); void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, uint64_t len);
/*! /*!
* \brief set label of the training matrix * \brief set label of the training matrix
* \param handle a instance of data matrix * \param handle a instance of data matrix
* \param group pointer to group size * \param group pointer to group size
* \param len length of array * \param len length of array
*/ */
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len); void XGDMatrixSetGroup(void *handle, const unsigned *group, uint64_t len);
/*! /*!
* \brief get float info vector from matrix * \brief get float info vector from matrix
* \param handle a instance of data matrix * \param handle a instance of data matrix
@ -91,7 +93,7 @@ extern "C" {
* \param out_len used to set result length * \param out_len used to set result length
* \return pointer to the result * \return pointer to the result
*/ */
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len); const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, uint64_t* out_len);
/*! /*!
* \brief get uint32 info vector from matrix * \brief get uint32 info vector from matrix
* \param handle a instance of data matrix * \param handle a instance of data matrix
@ -99,18 +101,18 @@ extern "C" {
* \param out_len used to set result length * \param out_len used to set result length
* \return pointer to the result * \return pointer to the result
*/ */
const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, size_t* out_len); const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, uint64_t* out_len);
/*! /*!
* \brief return number of rows * \brief return number of rows
*/ */
size_t XGDMatrixNumRow(const void *handle); uint64_t XGDMatrixNumRow(const void *handle);
// --- start XGBoost class // --- start XGBoost class
/*! /*!
* \brief create xgboost learner * \brief create xgboost learner
* \param dmats matrices that are set to be cached * \param dmats matrices that are set to be cached
* \param len length of dmats * \param len length of dmats
*/ */
void *XGBoosterCreate(void* dmats[], size_t len); void *XGBoosterCreate(void* dmats[], uint64_t len);
/*! /*!
* \brief free obj in handle * \brief free obj in handle
* \param handle handle to be freed * \param handle handle to be freed
@ -140,7 +142,7 @@ extern "C" {
* \param len length of grad/hess array * \param len length of grad/hess array
*/ */
void XGBoosterBoostOneIter(void *handle, void *dtrain, void XGBoosterBoostOneIter(void *handle, void *dtrain,
float *grad, float *hess, size_t len); float *grad, float *hess, uint64_t len);
/*! /*!
* \brief get evaluation statistics for xgboost * \brief get evaluation statistics for xgboost
* \param handle handle * \param handle handle
@ -151,7 +153,7 @@ extern "C" {
* \return the string containing evaluation stati * \return the string containing evaluation stati
*/ */
const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
const char *evnames[], size_t len); const char *evnames[], uint64_t len);
/*! /*!
* \brief make prediction based on dmat * \brief make prediction based on dmat
* \param handle handle * \param handle handle
@ -159,7 +161,7 @@ extern "C" {
* \param output_margin whether only output raw margin value * \param output_margin whether only output raw margin value
* \param len used to store length of returning result * \param len used to store length of returning result
*/ */
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len); const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, uint64_t *len);
/*! /*!
* \brief load model from existing file * \brief load model from existing file
* \param handle handle * \param handle handle
@ -180,6 +182,6 @@ extern "C" {
* \return char *data[], representing dump of each model * \return char *data[], representing dump of each model
*/ */
const char **XGBoosterDumpModel(void *handle, const char *fmap, const char **XGBoosterDumpModel(void *handle, const char *fmap,
size_t *out_len); uint64_t *out_len);
}; };
#endif // XGBOOST_WRAPPER_H_ #endif // XGBOOST_WRAPPER_H_