mv code into src

This commit is contained in:
tqchen
2014-08-15 21:04:23 -07:00
parent 3589e8252f
commit 34dd409c5b
25 changed files with 1 additions and 28 deletions

196
src/utils/config.h Normal file
View File

@@ -0,0 +1,196 @@
#ifndef XGBOOST_UTILS_CONFIG_H_
#define XGBOOST_UTILS_CONFIG_H_
/*!
* \file config.h
* \brief helper class to load in configures from file
* \author Tianqi Chen
*/
#include <cstdio>
#include <cstring>
#include <string>
#include <istream>
#include <fstream>
#include "./utils.h"
namespace xgboost {
namespace utils {
/*!
* \brief base implementation of config reader
*/
class ConfigReaderBase {
public:
/*!
* \brief get current name, called after Next returns true
* \return current parameter name
*/
inline const char *name(void) const {
return s_name;
}
/*!
* \brief get current value, called after Next returns true
* \return current parameter value
*/
inline const char *val(void) const {
return s_val;
}
/*!
* \brief move iterator to next position
* \return true if there is value in next position
*/
inline bool Next(void) {
while (!this->IsEnd()) {
GetNextToken(s_name);
if (s_name[0] == '=') return false;
if (GetNextToken( s_buf ) || s_buf[0] != '=') return false;
if (GetNextToken( s_val ) || s_val[0] == '=') return false;
return true;
}
return false;
}
// called before usage
inline void Init(void) {
ch_buf = this->GetChar();
}
protected:
/*!
* \brief to be implemented by subclass,
* get next token, return EOF if end of file
*/
virtual char GetChar(void) = 0;
/*! \brief to be implemented by child, check if end of stream */
virtual bool IsEnd(void) = 0;
private:
char ch_buf;
char s_name[100000], s_val[100000], s_buf[100000];
inline void SkipLine(void) {
do {
ch_buf = this->GetChar();
} while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r');
}
inline void ParseStr(char tok[]) {
int i = 0;
while ((ch_buf = this->GetChar()) != EOF) {
switch (ch_buf) {
case '\\': tok[i++] = this->GetChar(); break;
case '\"': tok[i++] = '\0'; return;
case '\r':
case '\n': Error("ConfigReader: unterminated string");
default: tok[i++] = ch_buf;
}
}
Error("ConfigReader: unterminated string");
}
inline void ParseStrML(char tok[]) {
int i = 0;
while ((ch_buf = this->GetChar()) != EOF) {
switch (ch_buf) {
case '\\': tok[i++] = this->GetChar(); break;
case '\'': tok[i++] = '\0'; return;
default: tok[i++] = ch_buf;
}
}
Error("unterminated string");
}
// return newline
inline bool GetNextToken(char tok[]) {
int i = 0;
bool new_line = false;
while (ch_buf != EOF) {
switch (ch_buf) {
case '#' : SkipLine(); new_line = true; break;
case '\"':
if (i == 0) {
ParseStr(tok); ch_buf = this->GetChar(); return new_line;
} else {
Error("ConfigReader: token followed directly by string");
}
case '\'':
if (i == 0) {
ParseStrML( tok ); ch_buf = this->GetChar(); return new_line;
} else {
Error("ConfigReader: token followed directly by string");
}
case '=':
if (i == 0) {
ch_buf = this->GetChar();
tok[0] = '=';
tok[1] = '\0';
} else {
tok[i] = '\0';
}
return new_line;
case '\r':
case '\n':
if (i == 0) new_line = true;
case '\t':
case ' ' :
ch_buf = this->GetChar();
if (i > 0) {
tok[i] = '\0';
return new_line;
}
break;
default:
tok[i++] = ch_buf;
ch_buf = this->GetChar();
break;
}
}
return true;
}
};
/*!
* \brief an iterator use stream base, allows use all types of istream
*/
class ConfigStreamReader: public ConfigReaderBase {
public:
/*!
* \brief constructor
* \param istream input stream
*/
explicit ConfigStreamReader(std::istream &fin) : fin(fin) {}
protected:
virtual char GetChar(void) {
return fin.get();
}
/*! \brief to be implemented by child, check if end of stream */
virtual bool IsEnd(void) {
return fin.eof();
}
private:
std::istream &fin;
};
/*!
* \brief an iterator that iterates over a configure file and gets the configures
*/
class ConfigIterator: public ConfigStreamReader {
public:
/*!
* \brief constructor
* \param fname name of configure file
*/
explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) {
fi.open(fname);
if (fi.fail()) {
utils::Error("cannot open file %s", fname);
}
ConfigReaderBase::Init();
}
/*! \brief destructor */
~ConfigIterator(void) {
fi.close();
}
private:
std::ifstream fi;
};
} // namespace utils
} // namespace xgboost
#endif // XGBOOST_UTILS_CONFIG_H_

80
src/utils/fmap.h Normal file
View File

@@ -0,0 +1,80 @@
#ifndef XGBOOST_UTILS_FMAP_H_
#define XGBOOST_UTILS_FMAP_H_
/*!
* \file fmap.h
* \brief helper class that holds the feature names and interpretations
* \author Tianqi Chen
*/
#include <vector>
#include <string>
#include <cstring>
#include "./utils.h"
namespace xgboost {
namespace utils {
/*! \brief helper class that holds the feature names and interpretations */
class FeatMap {
public:
enum Type {
kIndicator = 0,
kQuantitive = 1,
kInteger = 2,
kFloat = 3
};
// function definitions
/*! \brief load feature map from text format */
inline void LoadText(const char *fname) {
FILE *fi = utils::FopenCheck(fname, "r");
this->LoadText(fi);
fclose(fi);
}
/*! \brief load feature map from text format */
inline void LoadText(FILE *fi) {
int fid;
char fname[1256], ftype[1256];
while (fscanf(fi, "%d\t%[^\t]\t%s\n", &fid, fname, ftype) == 3) {
this->PushBack(fid, fname, ftype);
}
}
/*!\brief push back feature map */
inline void PushBack(int fid, const char *fname, const char *ftype) {
utils::Check(fid == static_cast<int>(names_.size()), "invalid fmap format");
names_.push_back(std::string(fname));
types_.push_back(GetType(ftype));
}
inline void Clear(void) {
names_.clear(); types_.clear();
}
/*! \brief number of known features */
size_t size(void) const {
return names_.size();
}
/*! \brief return name of specific feature */
const char* name(size_t idx) const {
utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
return names_[idx].c_str();
}
/*! \brief return type of specific feature */
const Type& type(size_t idx) const {
utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
return types_[idx];
}
private:
inline static Type GetType(const char *tname) {
if (!strcmp("i", tname)) return kIndicator;
if (!strcmp("q", tname)) return kQuantitive;
if (!strcmp("int", tname)) return kInteger;
if (!strcmp("float", tname)) return kFloat;
utils::Error("unknown feature type, use i for indicator and q for quantity");
return kIndicator;
}
/*! \brief name of the feature */
std::vector<std::string> names_;
/*! \brief type of the feature */
std::vector<Type> types_;
};
} // namespace utils
} // namespace xgboost
#endif // XGBOOST_FMAP_H_

104
src/utils/io.h Normal file
View File

@@ -0,0 +1,104 @@
#ifndef XGBOOST_UTILS_IO_H
#define XGBOOST_UTILS_IO_H
#include <cstdio>
#include <vector>
#include <string>
#include "./utils.h"
/*!
* \file io.h
* \brief general stream interface for serialization, I/O
* \author Tianqi Chen
*/
namespace xgboost {
namespace utils {
/*!
* \brief interface of stream I/O, used to serialize model
*/
class IStream {
public:
/*!
* \brief read data from stream
* \param ptr pointer to memory buffer
* \param size size of block
* \return usually is the size of data readed
*/
virtual size_t Read(void *ptr, size_t size) = 0;
/*!
* \brief write data to stream
* \param ptr pointer to memory buffer
* \param size size of block
*/
virtual void Write(const void *ptr, size_t size) = 0;
/*! \brief virtual destructor */
virtual ~IStream(void) {}
public:
// helper functions to write various of data structures
/*!
* \brief binary serialize a vector
* \param vec vector to be serialized
*/
template<typename T>
inline void Write(const std::vector<T> &vec) {
uint64_t sz = vec.size();
this->Write(&sz, sizeof(sz));
this->Write(&vec[0], sizeof(T) * sz);
}
/*!
* \brief binary load a vector
* \param out_vec vector to be loaded
* \return whether load is successfull
*/
template<typename T>
inline bool Read(std::vector<T> *out_vec) {
uint64_t sz;
if (this->Read(&sz, sizeof(sz)) == 0) return false;
out_vec->resize(sz);
if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false;
return true;
}
/*!
* \brief binary serialize a string
* \param str the string to be serialized
*/
inline void Write(const std::string &str) {
uint64_t sz = str.length();
this->Write(&sz, sizeof(sz));
this->Write(&str[0], sizeof(char) * sz);
}
/*!
* \brief binary load a string
* \param out_str string to be loaded
* \return whether load is successful
*/
inline bool Read(std::string *out_str) {
uint64_t sz;
if (this->Read(&sz, sizeof(sz)) == 0) return false;
out_str->resize(sz);
if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) return false;
return true;
}
};
/*! \brief implementation of file i/o stream */
class FileStream : public IStream {
private:
FILE *fp;
public:
explicit FileStream(FILE *fp) {
this->fp = fp;
}
virtual size_t Read(void *ptr, size_t size) {
return fread(ptr, size, 1, fp);
}
virtual void Write(const void *ptr, size_t size) {
fwrite(ptr, size, 1, fp);
}
inline void Close(void) {
fclose(fp);
}
};
} // namespace utils
} // namespace xgboost
#endif

40
src/utils/iterator.h Normal file
View File

@@ -0,0 +1,40 @@
#ifndef XGBOOST_UTILS_ITERATOR_H
#define XGBOOST_UTILS_ITERATOR_H
#include <cstdio>
/*!
* \file iterator.h
* \brief itertator interface
* \author Tianqi Chen
*/
namespace xgboost {
namespace utils {
/*!
* \brief iterator interface
* \tparam DType data type
*/
template<typename DType>
class IIterator {
public:
/*!
* \brief set the parameter
* \param name name of parameter
* \param val value of parameter
*/
virtual void SetParam(const char *name, const char *val) = 0;
/*! \brief initalize the iterator so that we can use the iterator */
virtual void Init(void) = 0;
/*! \brief set before first of the item */
virtual void BeforeFirst(void) = 0;
/*! \brief move to next item */
virtual bool Next(void) = 0;
/*! \brief get current data */
virtual const DType &Value(void) const = 0;
public:
/*! \brief constructor */
virtual ~IIterator(void) {}
};
} // namespace utils
} // namespace xgboost
#endif

123
src/utils/matrix_csr.h Normal file
View File

@@ -0,0 +1,123 @@
#ifndef XGBOOST_UTILS_MATRIX_CSR_H_
#define XGBOOST_UTILS_MATRIX_CSR_H_
/*!
* \file matrix_csr.h
* \brief this file defines some easy to use STL based class for in memory sparse CSR matrix
* \author Tianqi Chen
*/
#include <vector>
#include <algorithm>
#include "./utils.h"
namespace xgboost {
namespace utils {
/*!
* \brief a class used to help construct CSR format matrix,
* can be used to convert row major CSR to column major CSR
* \tparam IndexType type of index used to store the index position, usually unsigned or size_t
* \tparam whether enabling the usage of aclist, this option must be enabled manually
*/
template<typename IndexType, bool UseAcList = false>
struct SparseCSRMBuilder {
private:
/*! \brief dummy variable used in the indicator matrix construction */
std::vector<size_t> dummy_aclist;
/*! \brief pointer to each of the row */
std::vector<size_t> &rptr;
/*! \brief index of nonzero entries in each row */
std::vector<IndexType> &findex;
/*! \brief a list of active rows, used when many rows are empty */
std::vector<size_t> &aclist;
public:
SparseCSRMBuilder(std::vector<size_t> &p_rptr,
std::vector<IndexType> &p_findex)
:rptr(p_rptr), findex(p_findex), aclist(dummy_aclist) {
Assert(!UseAcList, "enabling bug");
}
/*! \brief use with caution! rptr must be cleaned before use */
SparseCSRMBuilder(std::vector<size_t> &p_rptr,
std::vector<IndexType> &p_findex,
std::vector<size_t> &p_aclist)
:rptr(p_rptr), findex(p_findex), aclist(p_aclist) {
Assert(UseAcList, "must manually enable the option use aclist");
}
public:
/*!
* \brief step 1: initialize the number of rows in the data, not necessary exact
* \nrows number of rows in the matrix, can be smaller than expected
*/
inline void InitBudget(size_t nrows = 0) {
if (!UseAcList) {
rptr.clear();
rptr.resize(nrows + 1, 0);
} else {
Assert(nrows + 1 == rptr.size(), "rptr must be initialized already");
this->Cleanup();
}
}
/*!
* \brief step 2: add budget to each rows, this function is called when aclist is used
* \param row_id the id of the row
* \param nelem number of element budget add to this row
*/
inline void AddBudget(size_t row_id, size_t nelem = 1) {
if (rptr.size() < row_id + 2) {
rptr.resize(row_id + 2, 0);
}
if (UseAcList) {
if (rptr[row_id + 1] == 0) aclist.push_back(row_id);
}
rptr[row_id + 1] += nelem;
}
/*! \brief step 3: initialize the necessary storage */
inline void InitStorage(void) {
// initialize rptr to be beginning of each segment
size_t start = 0;
if (!UseAcList) {
for (size_t i = 1; i < rptr.size(); i++) {
size_t rlen = rptr[i];
rptr[i] = start;
start += rlen;
}
} else {
// case with active list
std::sort(aclist.begin(), aclist.end());
for (size_t i = 0; i < aclist.size(); i++) {
size_t ridx = aclist[i];
size_t rlen = rptr[ridx + 1];
rptr[ridx + 1] = start;
// set previous rptr to right position if previous feature is not active
if (i == 0 || ridx != aclist[i - 1] + 1) rptr[ridx] = start;
start += rlen;
}
}
findex.resize(start);
}
/*!
* \brief step 4:
* used in indicator matrix construction, add new
* element to each row, the number of calls shall be exactly same as add_budget
*/
inline void PushElem(size_t row_id, IndexType col_id) {
size_t &rp = rptr[row_id + 1];
findex[rp++] = col_id;
}
/*!
* \brief step 5: only needed when aclist is used
* clean up the rptr for next usage
*/
inline void Cleanup(void) {
Assert(UseAcList, "this function can only be called use AcList");
for (size_t i = 0; i < aclist.size(); i++) {
const size_t ridx = aclist[i];
rptr[ridx] = 0; rptr[ridx + 1] = 0;
}
aclist.clear();
}
};
} // namespace utils
} // namespace xgboost
#endif

16
src/utils/omp.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef XGBOOST_UTILS_OMP_H_
#define XGBOOST_UTILS_OMP_H_
/*!
* \file omp.h
* \brief header to handle OpenMP compatibility issues
* \author Tianqi Chen
*/
#if defined(_OPENMP)
#include <omp.h>
#else
#warning "OpenMP is not available, compile to single thread code"
inline int omp_get_thread_num() { return 0; }
inline int omp_get_num_threads() { return 1; }
inline void omp_set_num_threads(int nthread) {}
#endif
#endif // XGBOOST_UTILS_OMP_H_

102
src/utils/random.h Normal file
View File

@@ -0,0 +1,102 @@
#ifndef XGBOOST_UTILS_RANDOM_H_
#define XGBOOST_UTILS_RANDOM_H_
/*!
* \file xgboost_random.h
* \brief PRNG to support random number generation
* \author Tianqi Chen: tianqi.tchen@gmail.com
*
* Use standard PRNG from stdlib
*/
#include <cmath>
#include <cstdlib>
#include <vector>
#include <algorithm>
#include "./utils.h"
/*! namespace of PRNG */
namespace xgboost {
namespace random {
/*! \brief seed the PRNG */
inline void Seed(uint32_t seed) {
srand(seed);
}
/*! \brief return a real number uniform in [0,1) */
inline double NextDouble(void) {
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
}
/*! \brief return a real numer uniform in (0,1) */
inline double NextDouble2(void) {
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
}
/*! \brief return a random number */
inline uint32_t NextUInt32(void) {
return (uint32_t)rand();
}
/*! \brief return a random number in n */
inline uint32_t NextUInt32(uint32_t n) {
return (uint32_t)floor(NextDouble() * n);
}
/*! \brief return x~N(0,1) */
inline double SampleNormal() {
double x, y, s;
do {
x = 2 * NextDouble2() - 1.0;
y = 2 * NextDouble2() - 1.0;
s = x*x + y*y;
} while (s >= 1.0 || s == 0.0);
return x * sqrt(-2.0 * log(s) / s);
}
/*! \brief return iid x,y ~N(0,1) */
inline void SampleNormal2D(double &xx, double &yy) {
double x, y, s;
do {
x = 2 * NextDouble2() - 1.0;
y = 2 * NextDouble2() - 1.0;
s = x*x + y*y;
} while (s >= 1.0 || s == 0.0);
double t = sqrt(-2.0 * log(s) / s);
xx = x * t;
yy = y * t;
}
/*! \brief return x~N(mu,sigma^2) */
inline double SampleNormal(double mu, double sigma) {
return SampleNormal() * sigma + mu;
}
/*! \brief return 1 with probability p, coin flip */
inline int SampleBinary(double p) {
return NextDouble() < p;
}
template<typename T>
inline void Shuffle(T *data, size_t sz) {
if (sz == 0) return;
for (uint32_t i = (uint32_t)sz - 1; i > 0; i--){
std::swap(data[i], data[NextUInt32(i + 1)]);
}
}
// random shuffle the data inside, require PRNG
template<typename T>
inline void Shuffle(std::vector<T> &data) {
Shuffle(&data[0], data.size());
}
/*! \brief random number generator with independent random number seed*/
struct Random{
/*! \brief set random number seed */
inline void Seed(unsigned sd) {
this->rseed = sd;
}
/*! \brief return a real number uniform in [0,1) */
inline double RandDouble(void) {
return static_cast<double>( rand_r( &rseed ) ) / (static_cast<double>( RAND_MAX )+1.0);
}
// random number seed
unsigned rseed;
};
} // namespace random
} // namespace xgboost
#endif // XGBOOST_UTILS_RANDOM_H_

94
src/utils/utils.h Normal file
View File

@@ -0,0 +1,94 @@
#ifndef XGBOOST_UTILS_UTILS_H_
#define XGBOOST_UTILS_UTILS_H_
/*!
* \file utils.h
* \brief simple utils to support the code
* \author Tianqi Chen
*/
#define _CRT_SECURE_NO_WARNINGS
#ifdef _MSC_VER
#define fopen64 fopen
#else
#ifdef _FILE_OFFSET_BITS
#if _FILE_OFFSET_BITS == 32
#warning "FILE OFFSET BITS defined to be 32 bit"
#endif
#endif
#ifdef __APPLE__
#define off64_t off_t
#define fopen64 fopen
#endif
#define _FILE_OFFSET_BITS 64
extern "C" {
#include <sys/types.h>
};
#endif
#ifdef _MSC_VER
typedef unsigned char uint8_t;
typedef unsigned short int uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long uint64_t;
typedef long int64_t;
#else
#include <inttypes.h>
#endif
#include <cstdio>
#include <cstdarg>
#include <cstdlib>
namespace xgboost {
/*! \brief namespace for helper utils of the project */
namespace utils {
/*! \brief assert an condition is true, use this to handle debug information */
inline void Assert(bool exp, const char *fmt, ...) {
if (!exp) {
va_list args;
va_start(args, fmt);
fprintf(stderr, "AssertError:");
vfprintf(stderr, fmt, args);
va_end(args);
fprintf(stderr, "\n");
exit(-1);
}
}
/*!\brief same as assert, but this is intended to be used as message for user*/
inline void Check(bool exp, const char *fmt, ...) {
if (!exp) {
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
fprintf(stderr, "\n");
exit(-1);
}
}
/*! \brief report error message, same as check */
inline void Error(const char *fmt, ...) {
{
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
fprintf(stderr, "\n");
exit(-1);
}
}
/*! \brief replace fopen, report error when the file open fails */
inline FILE *FopenCheck(const char *fname, const char *flag) {
FILE *fp = fopen64(fname, flag);
Check(fp != NULL, "can not open file \"%s\"\n", fname);
return fp;
}
} // namespace utils
} // namespace xgboost
#endif // XGBOOST_UTILS_UTILS_H_