[REFACTOR] completely remove old src
This commit is contained in:
parent
d75e3ed05d
commit
5f28617d7d
@ -9,5 +9,4 @@ Contributing Code
|
|||||||
* The C++ code follows Google C++ style
|
* The C++ code follows Google C++ style
|
||||||
* We follow numpy style to document our python module
|
* We follow numpy style to document our python module
|
||||||
* Tools to precheck codestyle
|
* Tools to precheck codestyle
|
||||||
- clone https://github.com/dmlc/dmlc-core into root directory
|
|
||||||
- type ```make lint``` and fix possible errors.
|
- type ```make lint``` and fix possible errors.
|
||||||
|
|||||||
23
make/minimum_parallel.mk
Normal file
23
make/minimum_parallel.mk
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
#------------------------------------------------------------------------
|
||||||
|
# xgboost: minumum dependency configuration with Parallelization.
|
||||||
|
# This configuration is standard but cannot run distributed computing.
|
||||||
|
#
|
||||||
|
# see config.mk for template.
|
||||||
|
#------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Whether enable openmp support, needed for multi-threading.
|
||||||
|
USE_OPENMP = 1
|
||||||
|
|
||||||
|
# whether use HDFS support during compile
|
||||||
|
USE_HDFS = 0
|
||||||
|
|
||||||
|
# whether use AWS S3 support during compile
|
||||||
|
USE_S3 = 0
|
||||||
|
|
||||||
|
# whether use Azure blob support during compile
|
||||||
|
USE_AZURE = 0
|
||||||
|
|
||||||
|
# Rabit library version,
|
||||||
|
# - librabit.a Normal distributed version.
|
||||||
|
# - librabit_empty.a Non distributed mock version,
|
||||||
|
LIB_RABIT = librabit_empty.a
|
||||||
@ -1,26 +0,0 @@
|
|||||||
Coding Guide
|
|
||||||
======
|
|
||||||
This file is intended to be notes about code structure in xgboost
|
|
||||||
|
|
||||||
Project Logical Layout
|
|
||||||
=======
|
|
||||||
* Dependency order: io->learner->gbm->tree
|
|
||||||
- All module depends on data.h
|
|
||||||
* tree are implementations of tree construction algorithms.
|
|
||||||
* gbm is gradient boosting interface, that takes trees and other base learner to do boosting.
|
|
||||||
- gbm only takes gradient as sufficient statistics, it does not compute the gradient.
|
|
||||||
* learner is learning module that computes gradient for specific object, and pass it to GBM
|
|
||||||
|
|
||||||
File Naming Convention
|
|
||||||
=======
|
|
||||||
* .h files are data structures and interface, which are needed to use functions in that layer.
|
|
||||||
* -inl.hpp files are implementations of interface, like cpp file in most project.
|
|
||||||
- You only need to understand the interface file to understand the usage of that layer
|
|
||||||
* In each folder, there can be a .cpp file, that compiles the module of that layer
|
|
||||||
|
|
||||||
How to Hack the Code
|
|
||||||
======
|
|
||||||
* Add objective function: add to learner/objective-inl.hpp and register it in learner/objective.h ```CreateObjFunction```
|
|
||||||
- You can also directly do it in python
|
|
||||||
* Add new evaluation metric: add to learner/evaluation-inl.hpp and register it in learner/evaluation.h ```CreateEvaluator```
|
|
||||||
* Add wrapper for a new language, most likely you can do it by taking the functions in python/xgboost_wrapper.h, which is purely C based, and call these C functions to use xgboost
|
|
||||||
@ -1,97 +0,0 @@
|
|||||||
// Copyright 2014 by Contributors
|
|
||||||
#define _CRT_SECURE_NO_WARNINGS
|
|
||||||
#define _CRT_SECURE_NO_DEPRECATE
|
|
||||||
#define NOMINMAX
|
|
||||||
#include <string>
|
|
||||||
#include "./io.h"
|
|
||||||
#include "../utils/io.h"
|
|
||||||
#include "../utils/utils.h"
|
|
||||||
#include "simple_dmatrix-inl.hpp"
|
|
||||||
#include "page_dmatrix-inl.hpp"
|
|
||||||
|
|
||||||
namespace xgboost {
|
|
||||||
namespace io {
|
|
||||||
DataMatrix* LoadDataMatrix(const char *fname,
|
|
||||||
bool silent,
|
|
||||||
bool savebuffer,
|
|
||||||
bool loadsplit,
|
|
||||||
const char *cache_file) {
|
|
||||||
using namespace std;
|
|
||||||
std::string fname_ = fname;
|
|
||||||
|
|
||||||
const char *dlm = strchr(fname, '#');
|
|
||||||
if (dlm != NULL) {
|
|
||||||
utils::Check(strchr(dlm + 1, '#') == NULL,
|
|
||||||
"only one `#` is allowed in file path for cachefile specification");
|
|
||||||
utils::Check(cache_file == NULL,
|
|
||||||
"can only specify the cachefile with `#` or argument, not both");
|
|
||||||
fname_ = std::string(fname, dlm - fname);
|
|
||||||
fname = fname_.c_str();
|
|
||||||
cache_file = dlm +1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cache_file == NULL) {
|
|
||||||
if (!std::strcmp(fname, "stdin") ||
|
|
||||||
!std::strncmp(fname, "s3://", 5) ||
|
|
||||||
!std::strncmp(fname, "hdfs://", 7) ||
|
|
||||||
loadsplit) {
|
|
||||||
DMatrixSimple *dmat = new DMatrixSimple();
|
|
||||||
dmat->LoadText(fname, silent, loadsplit);
|
|
||||||
return dmat;
|
|
||||||
}
|
|
||||||
int magic;
|
|
||||||
utils::FileStream fs(utils::FopenCheck(fname, "rb"));
|
|
||||||
utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
|
|
||||||
fs.Seek(0);
|
|
||||||
if (magic == DMatrixSimple::kMagic) {
|
|
||||||
DMatrixSimple *dmat = new DMatrixSimple();
|
|
||||||
dmat->LoadBinary(fs, silent, fname);
|
|
||||||
fs.Close();
|
|
||||||
return dmat;
|
|
||||||
}
|
|
||||||
fs.Close();
|
|
||||||
DMatrixSimple *dmat = new DMatrixSimple();
|
|
||||||
dmat->CacheLoad(fname, silent, savebuffer);
|
|
||||||
return dmat;
|
|
||||||
} else {
|
|
||||||
std::string cache_fname = cache_file;
|
|
||||||
if (loadsplit) {
|
|
||||||
std::ostringstream os;
|
|
||||||
os << cache_file << ".r" << rabit::GetRank();
|
|
||||||
cache_fname = os.str();
|
|
||||||
cache_file = cache_fname.c_str();
|
|
||||||
}
|
|
||||||
FILE *fi = fopen64(cache_file, "rb");
|
|
||||||
if (fi != NULL) {
|
|
||||||
DMatrixPage *dmat = new DMatrixPage();
|
|
||||||
utils::FileStream fs(fi);
|
|
||||||
dmat->LoadBinary(fs, silent, cache_file);
|
|
||||||
fs.Close();
|
|
||||||
return dmat;
|
|
||||||
} else {
|
|
||||||
if (fname[0] == '!') {
|
|
||||||
DMatrixHalfRAM *dmat = new DMatrixHalfRAM();
|
|
||||||
dmat->LoadText(fname + 1, cache_file, false, loadsplit);
|
|
||||||
return dmat;
|
|
||||||
} else {
|
|
||||||
DMatrixPage *dmat = new DMatrixPage();
|
|
||||||
dmat->LoadText(fname, cache_file, false, loadsplit);
|
|
||||||
return dmat;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
|
|
||||||
if (dmat.magic == DMatrixSimple::kMagic) {
|
|
||||||
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
|
|
||||||
p_dmat->SaveBinary(fname, silent);
|
|
||||||
} else {
|
|
||||||
DMatrixSimple smat;
|
|
||||||
smat.CopyFrom(dmat);
|
|
||||||
smat.SaveBinary(fname, silent);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace io
|
|
||||||
} // namespace xgboost
|
|
||||||
Loading…
x
Reference in New Issue
Block a user