[REFACTOR] completely remove old src

This commit is contained in:
tqchen 2016-01-09 22:53:36 -08:00
parent d75e3ed05d
commit 5f28617d7d
6 changed files with 23 additions and 124 deletions

View File

@ -9,5 +9,4 @@ Contributing Code
* The C++ code follows Google C++ style
* We follow numpy style to document our python module
* Tools to precheck codestyle
- clone https://github.com/dmlc/dmlc-core into root directory
- type ```make lint``` and fix possible errors.

23
make/minimum_parallel.mk Normal file
View File

@ -0,0 +1,23 @@
#------------------------------------------------------------------------
# xgboost: minumum dependency configuration with Parallelization.
# This configuration is standard but cannot run distributed computing.
#
# see config.mk for template.
#------------------------------------------------------------------------
# Whether enable openmp support, needed for multi-threading.
USE_OPENMP = 1
# whether use HDFS support during compile
USE_HDFS = 0
# whether use AWS S3 support during compile
USE_S3 = 0
# whether use Azure blob support during compile
USE_AZURE = 0
# Rabit library version,
# - librabit.a Normal distributed version.
# - librabit_empty.a Non distributed mock version,
LIB_RABIT = librabit_empty.a

View File

@ -1,26 +0,0 @@
Coding Guide
======
This file is intended to be notes about code structure in xgboost
Project Logical Layout
=======
* Dependency order: io->learner->gbm->tree
- All module depends on data.h
* tree are implementations of tree construction algorithms.
* gbm is gradient boosting interface, that takes trees and other base learner to do boosting.
- gbm only takes gradient as sufficient statistics, it does not compute the gradient.
* learner is learning module that computes gradient for specific object, and pass it to GBM
File Naming Convention
=======
* .h files are data structures and interface, which are needed to use functions in that layer.
* -inl.hpp files are implementations of interface, like cpp file in most project.
- You only need to understand the interface file to understand the usage of that layer
* In each folder, there can be a .cpp file, that compiles the module of that layer
How to Hack the Code
======
* Add objective function: add to learner/objective-inl.hpp and register it in learner/objective.h ```CreateObjFunction```
- You can also directly do it in python
* Add new evaluation metric: add to learner/evaluation-inl.hpp and register it in learner/evaluation.h ```CreateEvaluator```
* Add wrapper for a new language, most likely you can do it by taking the functions in python/xgboost_wrapper.h, which is purely C based, and call these C functions to use xgboost

View File

@ -1,97 +0,0 @@
// Copyright 2014 by Contributors
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#define NOMINMAX
#include <string>
#include "./io.h"
#include "../utils/io.h"
#include "../utils/utils.h"
#include "simple_dmatrix-inl.hpp"
#include "page_dmatrix-inl.hpp"
namespace xgboost {
namespace io {
DataMatrix* LoadDataMatrix(const char *fname,
bool silent,
bool savebuffer,
bool loadsplit,
const char *cache_file) {
using namespace std;
std::string fname_ = fname;
const char *dlm = strchr(fname, '#');
if (dlm != NULL) {
utils::Check(strchr(dlm + 1, '#') == NULL,
"only one `#` is allowed in file path for cachefile specification");
utils::Check(cache_file == NULL,
"can only specify the cachefile with `#` or argument, not both");
fname_ = std::string(fname, dlm - fname);
fname = fname_.c_str();
cache_file = dlm +1;
}
if (cache_file == NULL) {
if (!std::strcmp(fname, "stdin") ||
!std::strncmp(fname, "s3://", 5) ||
!std::strncmp(fname, "hdfs://", 7) ||
loadsplit) {
DMatrixSimple *dmat = new DMatrixSimple();
dmat->LoadText(fname, silent, loadsplit);
return dmat;
}
int magic;
utils::FileStream fs(utils::FopenCheck(fname, "rb"));
utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
fs.Seek(0);
if (magic == DMatrixSimple::kMagic) {
DMatrixSimple *dmat = new DMatrixSimple();
dmat->LoadBinary(fs, silent, fname);
fs.Close();
return dmat;
}
fs.Close();
DMatrixSimple *dmat = new DMatrixSimple();
dmat->CacheLoad(fname, silent, savebuffer);
return dmat;
} else {
std::string cache_fname = cache_file;
if (loadsplit) {
std::ostringstream os;
os << cache_file << ".r" << rabit::GetRank();
cache_fname = os.str();
cache_file = cache_fname.c_str();
}
FILE *fi = fopen64(cache_file, "rb");
if (fi != NULL) {
DMatrixPage *dmat = new DMatrixPage();
utils::FileStream fs(fi);
dmat->LoadBinary(fs, silent, cache_file);
fs.Close();
return dmat;
} else {
if (fname[0] == '!') {
DMatrixHalfRAM *dmat = new DMatrixHalfRAM();
dmat->LoadText(fname + 1, cache_file, false, loadsplit);
return dmat;
} else {
DMatrixPage *dmat = new DMatrixPage();
dmat->LoadText(fname, cache_file, false, loadsplit);
return dmat;
}
}
}
}
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
if (dmat.magic == DMatrixSimple::kMagic) {
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
p_dmat->SaveBinary(fname, silent);
} else {
DMatrixSimple smat;
smat.CopyFrom(dmat);
smat.SaveBinary(fname, silent);
}
}
} // namespace io
} // namespace xgboost