diff --git a/doc/dev-guide/contribute.md b/doc/dev-guide/contribute.md index 03060ab59..988e9493b 100644 --- a/doc/dev-guide/contribute.md +++ b/doc/dev-guide/contribute.md @@ -9,5 +9,4 @@ Contributing Code * The C++ code follows Google C++ style * We follow numpy style to document our python module * Tools to precheck codestyle - - clone https://github.com/dmlc/dmlc-core into root directory - type ```make lint``` and fix possible errors. diff --git a/make/minimum_parallel.mk b/make/minimum_parallel.mk new file mode 100644 index 000000000..c41158aaf --- /dev/null +++ b/make/minimum_parallel.mk @@ -0,0 +1,23 @@ +#------------------------------------------------------------------------ +# xgboost: minumum dependency configuration with Parallelization. +# This configuration is standard but cannot run distributed computing. +# +# see config.mk for template. +#------------------------------------------------------------------------ + +# Whether enable openmp support, needed for multi-threading. +USE_OPENMP = 1 + +# whether use HDFS support during compile +USE_HDFS = 0 + +# whether use AWS S3 support during compile +USE_S3 = 0 + +# whether use Azure blob support during compile +USE_AZURE = 0 + +# Rabit library version, +# - librabit.a Normal distributed version. +# - librabit_empty.a Non distributed mock version, +LIB_RABIT = librabit_empty.a diff --git a/old_src/README.md b/old_src/README.md deleted file mode 100644 index 4b8420306..000000000 --- a/old_src/README.md +++ /dev/null @@ -1,26 +0,0 @@ -Coding Guide -====== -This file is intended to be notes about code structure in xgboost - -Project Logical Layout -======= -* Dependency order: io->learner->gbm->tree - - All module depends on data.h -* tree are implementations of tree construction algorithms. -* gbm is gradient boosting interface, that takes trees and other base learner to do boosting. - - gbm only takes gradient as sufficient statistics, it does not compute the gradient. -* learner is learning module that computes gradient for specific object, and pass it to GBM - -File Naming Convention -======= -* .h files are data structures and interface, which are needed to use functions in that layer. -* -inl.hpp files are implementations of interface, like cpp file in most project. - - You only need to understand the interface file to understand the usage of that layer -* In each folder, there can be a .cpp file, that compiles the module of that layer - -How to Hack the Code -====== -* Add objective function: add to learner/objective-inl.hpp and register it in learner/objective.h ```CreateObjFunction``` - - You can also directly do it in python -* Add new evaluation metric: add to learner/evaluation-inl.hpp and register it in learner/evaluation.h ```CreateEvaluator``` -* Add wrapper for a new language, most likely you can do it by taking the functions in python/xgboost_wrapper.h, which is purely C based, and call these C functions to use xgboost diff --git a/old_src/io/io.cpp b/old_src/io/io.cpp deleted file mode 100644 index b3713f0c5..000000000 --- a/old_src/io/io.cpp +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2014 by Contributors -#define _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_DEPRECATE -#define NOMINMAX -#include -#include "./io.h" -#include "../utils/io.h" -#include "../utils/utils.h" -#include "simple_dmatrix-inl.hpp" -#include "page_dmatrix-inl.hpp" - -namespace xgboost { -namespace io { -DataMatrix* LoadDataMatrix(const char *fname, - bool silent, - bool savebuffer, - bool loadsplit, - const char *cache_file) { - using namespace std; - std::string fname_ = fname; - - const char *dlm = strchr(fname, '#'); - if (dlm != NULL) { - utils::Check(strchr(dlm + 1, '#') == NULL, - "only one `#` is allowed in file path for cachefile specification"); - utils::Check(cache_file == NULL, - "can only specify the cachefile with `#` or argument, not both"); - fname_ = std::string(fname, dlm - fname); - fname = fname_.c_str(); - cache_file = dlm +1; - } - - if (cache_file == NULL) { - if (!std::strcmp(fname, "stdin") || - !std::strncmp(fname, "s3://", 5) || - !std::strncmp(fname, "hdfs://", 7) || - loadsplit) { - DMatrixSimple *dmat = new DMatrixSimple(); - dmat->LoadText(fname, silent, loadsplit); - return dmat; - } - int magic; - utils::FileStream fs(utils::FopenCheck(fname, "rb")); - utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format"); - fs.Seek(0); - if (magic == DMatrixSimple::kMagic) { - DMatrixSimple *dmat = new DMatrixSimple(); - dmat->LoadBinary(fs, silent, fname); - fs.Close(); - return dmat; - } - fs.Close(); - DMatrixSimple *dmat = new DMatrixSimple(); - dmat->CacheLoad(fname, silent, savebuffer); - return dmat; - } else { - std::string cache_fname = cache_file; - if (loadsplit) { - std::ostringstream os; - os << cache_file << ".r" << rabit::GetRank(); - cache_fname = os.str(); - cache_file = cache_fname.c_str(); - } - FILE *fi = fopen64(cache_file, "rb"); - if (fi != NULL) { - DMatrixPage *dmat = new DMatrixPage(); - utils::FileStream fs(fi); - dmat->LoadBinary(fs, silent, cache_file); - fs.Close(); - return dmat; - } else { - if (fname[0] == '!') { - DMatrixHalfRAM *dmat = new DMatrixHalfRAM(); - dmat->LoadText(fname + 1, cache_file, false, loadsplit); - return dmat; - } else { - DMatrixPage *dmat = new DMatrixPage(); - dmat->LoadText(fname, cache_file, false, loadsplit); - return dmat; - } - } - } -} - -void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) { - if (dmat.magic == DMatrixSimple::kMagic) { - const DMatrixSimple *p_dmat = static_cast(&dmat); - p_dmat->SaveBinary(fname, silent); - } else { - DMatrixSimple smat; - smat.CopyFrom(dmat); - smat.SaveBinary(fname, silent); - } -} - -} // namespace io -} // namespace xgboost diff --git a/old_src/io/page_dmatrix-inl.hpp b/src/data/page_csr_source.h similarity index 100% rename from old_src/io/page_dmatrix-inl.hpp rename to src/data/page_csr_source.h diff --git a/old_src/io/page_fmatrix-inl.hpp b/src/data/page_dmatrix.cc similarity index 100% rename from old_src/io/page_fmatrix-inl.hpp rename to src/data/page_dmatrix.cc