diff --git a/.travis.yml b/.travis.yml index 3eafab2a0..af06f2f27 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,8 @@ env: - TASK=python_lightweight_test # java package test - TASK=java_test + # cmake test + - TASK=cmake_test os: - linux diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..e80d92153 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,72 @@ +cmake_minimum_required (VERSION 2.6) +project (xgboost) +find_package(OpenMP) + +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} -fPIC") + +# Make sure we are using C++11 +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11) +CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X) +if(COMPILER_SUPPORTS_CXX11) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +elseif(COMPILER_SUPPORTS_CXX0X) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") +else() + message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") +endif() + + +#Make sure we are using the static runtime +if(MSVC) + set(variables + CMAKE_C_FLAGS_DEBUG + CMAKE_C_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_DEBUG + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_RELWITHDEBINFO + ) + foreach(variable ${variables}) + if(${variable} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${variable} "${${variable}}") + endif() + endforeach() +endif() + +include_directories ( + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/dmlc-core/include + ${PROJECT_SOURCE_DIR}/rabit/include + ) + +file(GLOB SOURCES + src/c_api/*.cc + src/common/*.cc + src/data/*.cc + src/gbm/*.cc + src/metric/*.cc + src/objective/*.cc + src/tree/*.cc + src/*.cc +) + +set(RABIT_SOURCES + rabit/src/allreduce_base.cc + rabit/src/allreduce_robust.cc + rabit/src/engine.cc + rabit/src/c_api.cc +) + + +add_subdirectory(dmlc-core) + +add_library(rabit STATIC ${RABIT_SOURCES}) + +add_executable(xgboost ${SOURCES}) +add_library(libxgboost SHARED ${SOURCES}) + +target_link_libraries(xgboost dmlccore rabit) +target_link_libraries(libxgboost dmlccore rabit) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index e75ed2cfe..eeec464b0 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -22,6 +22,7 @@ XGB_EXTERN_C { #else #define XGB_DLL XGB_EXTERN_C #endif + // manually define unsign long typedef unsigned long bst_ulong; // NOLINT(*) diff --git a/include/xgboost/tree_updater.h b/include/xgboost/tree_updater.h index ecace6571..abbc7f3cb 100644 --- a/include/xgboost/tree_updater.h +++ b/include/xgboost/tree_updater.h @@ -16,6 +16,10 @@ #include "./data.h" #include "./tree_model.h" +#ifdef _MSC_VER +#include +#endif + namespace xgboost { /*! * \brief interface of tree update module, that performs update of a tree. diff --git a/jvm-packages/README.md b/jvm-packages/README.md index a9aded6f8..80e194fd8 100644 --- a/jvm-packages/README.md +++ b/jvm-packages/README.md @@ -16,6 +16,11 @@ and power xgboost into JVM ecosystem. You can find more about XGBoost on [Documentation](https://xgboost.readthedocs.org/en/latest/jvm/index.html) and [Resource Page](../demo/README.md). ## Hello World +**NOTE on LIBSVM Format**: +- Use *1-based* ascending indexes for the LIBSVM format in distributed training mode - + - Spark does the internal conversion, and does not accept formats that are 0-based +- Whereas, use *0-based* indexes format when predicting in normal mode - for instance, while using the saved model in the Python package + ### XGBoost Scala ```scala import ml.dmlc.xgboost4j.scala.DMatrix diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index d659b85ce..03ec0e42e 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -228,22 +228,22 @@ int XGDMatrixCreateFromDataIter( API_END(); } -int XGDMatrixCreateFromCSR(const bst_ulong* indptr, +XGB_DLL int XGDMatrixCreateFromCSR(const xgboost::bst_ulong* indptr, const unsigned *indices, const float* data, - bst_ulong nindptr, - bst_ulong nelem, + xgboost::bst_ulong nindptr, + xgboost::bst_ulong nelem, DMatrixHandle* out) { std::unique_ptr source(new data::SimpleCSRSource()); API_BEGIN(); data::SimpleCSRSource& mat = *source; mat.row_ptr_.resize(nindptr); - for (bst_ulong i = 0; i < nindptr; ++i) { + for (xgboost::bst_ulong i = 0; i < nindptr; ++i) { mat.row_ptr_[i] = static_cast(indptr[i]); } mat.row_data_.resize(nelem); - for (bst_ulong i = 0; i < nelem; ++i) { + for (xgboost::bst_ulong i = 0; i < nelem; ++i) { mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]); mat.info.num_col = std::max(mat.info.num_col, static_cast(indices[i] + 1)); @@ -254,11 +254,11 @@ int XGDMatrixCreateFromCSR(const bst_ulong* indptr, API_END(); } -int XGDMatrixCreateFromCSC(const bst_ulong* col_ptr, +XGB_DLL int XGDMatrixCreateFromCSC(const xgboost::bst_ulong* col_ptr, const unsigned* indices, const float* data, - bst_ulong nindptr, - bst_ulong nelem, + xgboost::bst_ulong nindptr, + xgboost::bst_ulong nelem, DMatrixHandle* out) { std::unique_ptr source(new data::SimpleCSRSource()); @@ -296,9 +296,9 @@ int XGDMatrixCreateFromCSC(const bst_ulong* col_ptr, API_END(); } -int XGDMatrixCreateFromMat(const float* data, - bst_ulong nrow, - bst_ulong ncol, +XGB_DLL int XGDMatrixCreateFromMat(const float* data, + xgboost::bst_ulong nrow, + xgboost::bst_ulong ncol, float missing, DMatrixHandle* out) { std::unique_ptr source(new data::SimpleCSRSource()); @@ -308,9 +308,9 @@ int XGDMatrixCreateFromMat(const float* data, bool nan_missing = common::CheckNAN(missing); mat.info.num_row = nrow; mat.info.num_col = ncol; - for (bst_ulong i = 0; i < nrow; ++i, data += ncol) { - bst_ulong nelem = 0; - for (bst_ulong j = 0; j < ncol; ++j) { + for (xgboost::bst_ulong i = 0; i < nrow; ++i, data += ncol) { + xgboost::bst_ulong nelem = 0; + for (xgboost::bst_ulong j = 0; j < ncol; ++j) { if (common::CheckNAN(data[j])) { CHECK(nan_missing) << "There are NAN in the matrix, however, you did not set missing=NAN"; @@ -328,9 +328,9 @@ int XGDMatrixCreateFromMat(const float* data, API_END(); } -int XGDMatrixSliceDMatrix(DMatrixHandle handle, +XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, const int* idxset, - bst_ulong len, + xgboost::bst_ulong len, DMatrixHandle* out) { std::unique_ptr source(new data::SimpleCSRSource()); @@ -351,10 +351,10 @@ int XGDMatrixSliceDMatrix(DMatrixHandle handle, CHECK(iter->Next()); const RowBatch& batch = iter->Value(); - for (bst_ulong i = 0; i < len; ++i) { + for (xgboost::bst_ulong i = 0; i < len; ++i) { const int ridx = idxset[i]; RowBatch::Inst inst = batch[ridx]; - CHECK_LT(static_cast(ridx), batch.size); + CHECK_LT(static_cast(ridx), batch.size); ret.row_data_.resize(ret.row_data_.size() + inst.length); std::memcpy(dmlc::BeginPtr(ret.row_data_) + ret.row_ptr_.back(), inst.data, sizeof(RowBatch::Entry) * inst.length); @@ -375,13 +375,13 @@ int XGDMatrixSliceDMatrix(DMatrixHandle handle, API_END(); } -int XGDMatrixFree(DMatrixHandle handle) { +XGB_DLL int XGDMatrixFree(DMatrixHandle handle) { API_BEGIN(); delete static_cast(handle); API_END(); } -int XGDMatrixSaveBinary(DMatrixHandle handle, +XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle, const char* fname, int silent) { API_BEGIN(); @@ -389,27 +389,27 @@ int XGDMatrixSaveBinary(DMatrixHandle handle, API_END(); } -int XGDMatrixSetFloatInfo(DMatrixHandle handle, +XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char* field, const float* info, - bst_ulong len) { + xgboost::bst_ulong len) { API_BEGIN(); static_cast(handle)->info().SetInfo(field, info, kFloat32, len); API_END(); } -int XGDMatrixSetUIntInfo(DMatrixHandle handle, +XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char* field, const unsigned* info, - bst_ulong len) { + xgboost::bst_ulong len) { API_BEGIN(); static_cast(handle)->info().SetInfo(field, info, kUInt32, len); API_END(); } -int XGDMatrixSetGroup(DMatrixHandle handle, +XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned* group, - bst_ulong len) { + xgboost::bst_ulong len) { API_BEGIN(); DMatrix *pmat = static_cast(handle); MetaInfo& info = pmat->info(); @@ -421,9 +421,9 @@ int XGDMatrixSetGroup(DMatrixHandle handle, API_END(); } -int XGDMatrixGetFloatInfo(const DMatrixHandle handle, +XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char* field, - bst_ulong* out_len, + xgboost::bst_ulong* out_len, const float** out_dptr) { API_BEGIN(); const MetaInfo& info = static_cast(handle)->info(); @@ -437,14 +437,14 @@ int XGDMatrixGetFloatInfo(const DMatrixHandle handle, } else { LOG(FATAL) << "Unknown float field name " << field; } - *out_len = static_cast(vec->size()); + *out_len = static_cast(vec->size()); *out_dptr = dmlc::BeginPtr(*vec); API_END(); } -int XGDMatrixGetUIntInfo(const DMatrixHandle handle, +XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle, const char *field, - bst_ulong *out_len, + xgboost::bst_ulong *out_len, const unsigned **out_dptr) { API_BEGIN(); const MetaInfo& info = static_cast(handle)->info(); @@ -454,45 +454,45 @@ int XGDMatrixGetUIntInfo(const DMatrixHandle handle, } else { LOG(FATAL) << "Unknown uint field name " << field; } - *out_len = static_cast(vec->size()); + *out_len = static_cast(vec->size()); *out_dptr = dmlc::BeginPtr(*vec); API_END(); } -int XGDMatrixNumRow(const DMatrixHandle handle, - bst_ulong *out) { +XGB_DLL int XGDMatrixNumRow(const DMatrixHandle handle, + xgboost::bst_ulong *out) { API_BEGIN(); - *out = static_cast(static_cast(handle)->info().num_row); + *out = static_cast(static_cast(handle)->info().num_row); API_END(); } -int XGDMatrixNumCol(const DMatrixHandle handle, - bst_ulong *out) { +XGB_DLL int XGDMatrixNumCol(const DMatrixHandle handle, + xgboost::bst_ulong *out) { API_BEGIN(); *out = static_cast(static_cast(handle)->info().num_col); API_END(); } // xgboost implementation -int XGBoosterCreate(const DMatrixHandle dmats[], - bst_ulong len, +XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[], + xgboost::bst_ulong len, BoosterHandle *out) { API_BEGIN(); std::vector mats; - for (bst_ulong i = 0; i < len; ++i) { + for (xgboost::bst_ulong i = 0; i < len; ++i) { mats.push_back(static_cast(dmats[i])); } *out = new Booster(mats); API_END(); } -int XGBoosterFree(BoosterHandle handle) { +XGB_DLL int XGBoosterFree(BoosterHandle handle) { API_BEGIN(); delete static_cast(handle); API_END(); } -int XGBoosterSetParam(BoosterHandle handle, +XGB_DLL int XGBoosterSetParam(BoosterHandle handle, const char *name, const char *value) { API_BEGIN(); @@ -500,7 +500,7 @@ int XGBoosterSetParam(BoosterHandle handle, API_END(); } -int XGBoosterUpdateOneIter(BoosterHandle handle, +XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain) { API_BEGIN(); @@ -512,17 +512,17 @@ int XGBoosterUpdateOneIter(BoosterHandle handle, API_END(); } -int XGBoosterBoostOneIter(BoosterHandle handle, +XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, float *grad, float *hess, - bst_ulong len) { + xgboost::bst_ulong len) { std::vector& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair; API_BEGIN(); Booster* bst = static_cast(handle); DMatrix* dtr = static_cast(dtrain); tmp_gpair.resize(len); - for (bst_ulong i = 0; i < len; ++i) { + for (xgboost::bst_ulong i = 0; i < len; ++i) { tmp_gpair[i] = bst_gpair(grad[i], hess[i]); } @@ -531,11 +531,11 @@ int XGBoosterBoostOneIter(BoosterHandle handle, API_END(); } -int XGBoosterEvalOneIter(BoosterHandle handle, +XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[], const char* evnames[], - bst_ulong len, + xgboost::bst_ulong len, const char** out_str) { std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str; API_BEGIN(); @@ -543,7 +543,7 @@ int XGBoosterEvalOneIter(BoosterHandle handle, std::vector data_sets; std::vector data_names; - for (bst_ulong i = 0; i < len; ++i) { + for (xgboost::bst_ulong i = 0; i < len; ++i) { data_sets.push_back(static_cast(dmats[i])); data_names.push_back(std::string(evnames[i])); } @@ -554,11 +554,11 @@ int XGBoosterEvalOneIter(BoosterHandle handle, API_END(); } -int XGBoosterPredict(BoosterHandle handle, +XGB_DLL int XGBoosterPredict(BoosterHandle handle, DMatrixHandle dmat, int option_mask, unsigned ntree_limit, - bst_ulong *len, + xgboost::bst_ulong *len, const float **out_result) { std::vector& preds = XGBAPIThreadLocalStore::Get()->ret_vec_float; API_BEGIN(); @@ -570,18 +570,18 @@ int XGBoosterPredict(BoosterHandle handle, &preds, ntree_limit, (option_mask & 2) != 0); *out_result = dmlc::BeginPtr(preds); - *len = static_cast(preds.size()); + *len = static_cast(preds.size()); API_END(); } -int XGBoosterLoadModel(BoosterHandle handle, const char* fname) { +XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) { API_BEGIN(); std::unique_ptr fi(dmlc::Stream::Create(fname, "r")); static_cast(handle)->LoadModel(fi.get()); API_END(); } -int XGBoosterSaveModel(BoosterHandle handle, const char* fname) { +XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char* fname) { API_BEGIN(); std::unique_ptr fo(dmlc::Stream::Create(fname, "w")); Booster *bst = static_cast(handle); @@ -590,17 +590,17 @@ int XGBoosterSaveModel(BoosterHandle handle, const char* fname) { API_END(); } -int XGBoosterLoadModelFromBuffer(BoosterHandle handle, +XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, const void* buf, - bst_ulong len) { + xgboost::bst_ulong len) { API_BEGIN(); common::MemoryFixSizeBuffer fs((void*)buf, len); // NOLINT(*) static_cast(handle)->LoadModel(&fs); API_END(); } -int XGBoosterGetModelRaw(BoosterHandle handle, - bst_ulong* out_len, +XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, + xgboost::bst_ulong* out_len, const char** out_dptr) { std::string& raw_str = XGBAPIThreadLocalStore::Get()->ret_str; raw_str.resize(0); @@ -611,7 +611,7 @@ int XGBoosterGetModelRaw(BoosterHandle handle, bst->LazyInit(); bst->learner()->Save(&fo); *out_dptr = dmlc::BeginPtr(raw_str); - *out_len = static_cast(raw_str.length()); + *out_len = static_cast(raw_str.length()); API_END(); } @@ -619,7 +619,7 @@ inline void XGBoostDumpModelImpl( BoosterHandle handle, const FeatureMap& fmap, int with_stats, - bst_ulong* len, + xgboost::bst_ulong* len, const char*** out_models) { std::vector& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str; std::vector& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp; @@ -631,12 +631,12 @@ inline void XGBoostDumpModelImpl( charp_vecs[i] = str_vecs[i].c_str(); } *out_models = dmlc::BeginPtr(charp_vecs); - *len = static_cast(charp_vecs.size()); + *len = static_cast(charp_vecs.size()); } -int XGBoosterDumpModel(BoosterHandle handle, +XGB_DLL int XGBoosterDumpModel(BoosterHandle handle, const char* fmap, int with_stats, - bst_ulong* len, + xgboost::bst_ulong* len, const char*** out_models) { API_BEGIN(); FeatureMap featmap; @@ -650,12 +650,12 @@ int XGBoosterDumpModel(BoosterHandle handle, API_END(); } -int XGBoosterDumpModelWithFeatures(BoosterHandle handle, +XGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle, int fnum, const char** fname, const char** ftype, int with_stats, - bst_ulong* len, + xgboost::bst_ulong* len, const char*** out_models) { API_BEGIN(); FeatureMap featmap; @@ -666,7 +666,7 @@ int XGBoosterDumpModelWithFeatures(BoosterHandle handle, API_END(); } -int XGBoosterGetAttr(BoosterHandle handle, +XGB_DLL int XGBoosterGetAttr(BoosterHandle handle, const char* key, const char** out, int* success) { @@ -683,7 +683,7 @@ int XGBoosterGetAttr(BoosterHandle handle, API_END(); } -int XGBoosterSetAttr(BoosterHandle handle, +XGB_DLL int XGBoosterSetAttr(BoosterHandle handle, const char* key, const char* value) { Booster* bst = static_cast(handle); @@ -696,8 +696,8 @@ int XGBoosterSetAttr(BoosterHandle handle, API_END(); } -int XGBoosterGetAttrNames(BoosterHandle handle, - bst_ulong* out_len, +XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle, + xgboost::bst_ulong* out_len, const char*** out) { std::vector& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str; std::vector& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp; @@ -709,11 +709,11 @@ int XGBoosterGetAttrNames(BoosterHandle handle, charp_vecs[i] = str_vecs[i].c_str(); } *out = dmlc::BeginPtr(charp_vecs); - *out_len = static_cast(charp_vecs.size()); + *out_len = static_cast(charp_vecs.size()); API_END(); } -int XGBoosterLoadRabitCheckpoint(BoosterHandle handle, +XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle, int* version) { API_BEGIN(); Booster* bst = static_cast(handle); @@ -724,7 +724,7 @@ int XGBoosterLoadRabitCheckpoint(BoosterHandle handle, API_END(); } -int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) { +XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) { API_BEGIN(); Booster* bst = static_cast(handle); if (bst->learner()->AllowLazyCheckPoint()) { diff --git a/src/data/sparse_batch_page.h b/src/data/sparse_batch_page.h index 81dbe1368..85f762727 100644 --- a/src/data/sparse_batch_page.h +++ b/src/data/sparse_batch_page.h @@ -23,6 +23,10 @@ #include #endif +#ifdef _MSC_VER +#include +#endif + namespace xgboost { namespace data { /*! diff --git a/src/tree/param.h b/src/tree/param.h index e9de3ac14..23d0c5b81 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -9,6 +9,7 @@ #include #include +#include namespace xgboost { namespace tree { diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index e00cb6e01..b4af3743a 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -81,3 +81,10 @@ if [ ${TASK} == "java_test" ]; then mvn clean install -DskipTests=true mvn test fi + +if [ ${TASK} == "cmake_test" ]; then + mkdir build + cd build + cmake .. + make +fi