fix all utils

This commit is contained in:
tqchen 2015-07-03 18:44:01 -07:00
parent 0162bb7034
commit 1581de08da
11 changed files with 164 additions and 139 deletions

View File

@ -1,13 +1,16 @@
#ifndef XGBOOST_UTILS_BASE64_INL_H_
#define XGBOOST_UTILS_BASE64_INL_H_
/*! /*!
* Copyright 2014 by Contributors
* \file base64.h * \file base64.h
* \brief data stream support to input and output from/to base64 stream * \brief data stream support to input and output from/to base64 stream
* base64 is easier to store and pass as text format in mapreduce * base64 is easier to store and pass as text format in mapreduce
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_BASE64_INL_H_
#define XGBOOST_UTILS_BASE64_INL_H_
#include <cctype> #include <cctype>
#include <cstdio> #include <cstdio>
#include <string>
#include "./io.h" #include "./io.h"
namespace xgboost { namespace xgboost {
@ -15,7 +18,7 @@ namespace utils {
/*! \brief buffer reader of the stream that allows you to get */ /*! \brief buffer reader of the stream that allows you to get */
class StreamBufferReader { class StreamBufferReader {
public: public:
StreamBufferReader(size_t buffer_size) explicit StreamBufferReader(size_t buffer_size)
:stream_(NULL), :stream_(NULL),
read_len_(1), read_ptr_(1) { read_len_(1), read_ptr_(1) {
buffer_.resize(buffer_size); buffer_.resize(buffer_size);
@ -45,7 +48,7 @@ class StreamBufferReader {
inline bool AtEnd(void) const { inline bool AtEnd(void) const {
return read_len_ == 0; return read_len_ == 0;
} }
private: private:
/*! \brief the underlying stream */ /*! \brief the underlying stream */
IStream *stream_; IStream *stream_;
@ -75,7 +78,7 @@ const char DecodeTable[] = {
}; };
static const char EncodeTable[] = static const char EncodeTable[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
} // namespace base64 } // namespace base64
/*! \brief the stream that reads from base64, note we take from file pointers */ /*! \brief the stream that reads from base64, note we take from file pointers */
class Base64InStream: public IStream { class Base64InStream: public IStream {
public: public:
@ -83,8 +86,8 @@ class Base64InStream: public IStream {
reader_.set_stream(fs); reader_.set_stream(fs);
num_prev = 0; tmp_ch = 0; num_prev = 0; tmp_ch = 0;
} }
/*! /*!
* \brief initialize the stream position to beginning of next base64 stream * \brief initialize the stream position to beginning of next base64 stream
* call this function before actually start read * call this function before actually start read
*/ */
inline void InitPosition(void) { inline void InitPosition(void) {
@ -132,19 +135,19 @@ class Base64InStream: public IStream {
{ {
// second byte // second byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
nvalue |= DecodeTable[tmp_ch] << 12; nvalue |= DecodeTable[tmp_ch] << 12;
*cptr++ = (nvalue >> 16) & 0xFF; --tlen; *cptr++ = (nvalue >> 16) & 0xFF; --tlen;
} }
{ {
// third byte // third byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
// handle termination // handle termination
if (tmp_ch == '=') { if (tmp_ch == '=') {
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format"); utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format");
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
break; break;
} }
nvalue |= DecodeTable[tmp_ch] << 6; nvalue |= DecodeTable[tmp_ch] << 6;
@ -157,10 +160,10 @@ class Base64InStream: public IStream {
{ {
// fourth byte // fourth byte
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
if (tmp_ch == '=') { if (tmp_ch == '=') {
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)), utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
"invalid base64 format"); "invalid base64 format");
break; break;
} }
nvalue |= DecodeTable[tmp_ch]; nvalue |= DecodeTable[tmp_ch];
@ -240,13 +243,13 @@ class Base64OutStream: public IStream {
if (endch != EOF) PutChar(endch); if (endch != EOF) PutChar(endch);
this->Flush(); this->Flush();
} }
private: private:
IStream *fp; IStream *fp;
int buf_top; int buf_top;
unsigned char buf[4]; unsigned char buf[4];
std::string out_buf; std::string out_buf;
const static size_t kBufferSize = 256; static const size_t kBufferSize = 256;
inline void PutChar(char ch) { inline void PutChar(char ch) {
out_buf += ch; out_buf += ch;
@ -260,5 +263,5 @@ class Base64OutStream: public IStream {
} }
}; };
} // namespace utils } // namespace utils
} // namespace rabit } // namespace xgboost
#endif // RABIT_LEARN_UTILS_BASE64_INL_H_ #endif // XGBOOST_UTILS_BASE64_INL_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_UTILS_BITMAP_H_
#define XGBOOST_UTILS_BITMAP_H_
/*! /*!
* Copyright 2014 by Contributors
* \file bitmap.h * \file bitmap.h
* \brief a simple implement of bitmap * \brief a simple implement of bitmap
* NOTE: bitmap is only threadsafe per word access, remember this when using bitmap * NOTE: bitmap is only threadsafe per word access, remember this when using bitmap
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_BITMAP_H_
#define XGBOOST_UTILS_BITMAP_H_
#include <vector> #include <vector>
#include "./utils.h" #include "./utils.h"
#include "./omp.h" #include "./omp.h"
@ -16,22 +18,22 @@ namespace utils {
struct BitMap { struct BitMap {
/*! \brief internal data structure */ /*! \brief internal data structure */
std::vector<uint32_t> data; std::vector<uint32_t> data;
/*! /*!
* \brief resize the bitmap to be certain size * \brief resize the bitmap to be certain size
* \param size the size of bitmap * \param size the size of bitmap
*/ */
inline void Resize(size_t size) { inline void Resize(size_t size) {
data.resize((size + 31U) >> 5, 0); data.resize((size + 31U) >> 5, 0);
} }
/*! /*!
* \brief query the i-th position of bitmap * \brief query the i-th position of bitmap
* \param i the position in * \param i the position in
*/ */
inline bool Get(size_t i) const { inline bool Get(size_t i) const {
return (data[i >> 5] >> (i & 31U)) & 1U; return (data[i >> 5] >> (i & 31U)) & 1U;
} }
/*! /*!
* \brief set i-th position to true * \brief set i-th position to true
* \param i position index * \param i position index
*/ */
inline void SetTrue(size_t i) { inline void SetTrue(size_t i) {
@ -63,4 +65,4 @@ struct BitMap {
}; };
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_BITMAP_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_FMAP_H_
#define XGBOOST_UTILS_FMAP_H_
/*! /*!
* Copyright 2014 by Contributors
* \file fmap.h * \file fmap.h
* \brief helper class that holds the feature names and interpretations * \brief helper class that holds the feature names and interpretations
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_FMAP_H_
#define XGBOOST_UTILS_FMAP_H_
#include <vector> #include <vector>
#include <string> #include <string>
#include <cstring> #include <cstring>
@ -78,4 +80,4 @@ class FeatMap {
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_FMAP_H_ #endif // XGBOOST_UTILS_FMAP_H_

View File

@ -111,5 +111,4 @@ struct ParallelGroupBuilder {
}; };
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_GROUP_DATA_H_

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_UTILS_ITERATOR_H
#define XGBOOST_UTILS_ITERATOR_H
#include <cstdio>
/*! /*!
* Copyright 2014 by Contributors
* \file iterator.h * \file iterator.h
* \brief itertator interface * \brief itertator interface
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_ITERATOR_H_
#define XGBOOST_UTILS_ITERATOR_H_
#include <cstdio>
namespace xgboost { namespace xgboost {
namespace utils { namespace utils {
/*! /*!
@ -16,7 +18,7 @@ template<typename DType>
class IIterator { class IIterator {
public: public:
/*! /*!
* \brief set the parameter * \brief set the parameter
* \param name name of parameter * \param name name of parameter
* \param val value of parameter * \param val value of parameter
*/ */
@ -36,5 +38,5 @@ class IIterator {
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_ITERATOR_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_MATH_H_
#define XGBOOST_UTILS_MATH_H_
/*! /*!
* Copyright 2014 by Contributors
* \file math.h * \file math.h
* \brief support additional math * \brief support additional math
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_MATH_H_
#define XGBOOST_UTILS_MATH_H_
#include <cmath> #include <cmath>
namespace xgboost { namespace xgboost {
@ -28,7 +30,8 @@ inline T LogGamma(T v) {
#if _MSC_VER >= 1800 #if _MSC_VER >= 1800
return lgamma(v); return lgamma(v);
#else #else
#pragma message ("Warning: lgamma function was not available until VS2013, poisson regression will be disabled") #pragma message("Warning: lgamma function was not available until VS2013"\
", poisson regression will be disabled")
utils::Error("lgamma function was not available until VS2013"); utils::Error("lgamma function was not available until VS2013");
return static_cast<T>(1.0); return static_cast<T>(1.0);
#endif #endif

View File

@ -1,16 +1,20 @@
#ifndef XGBOOST_UTILS_OMP_H_
#define XGBOOST_UTILS_OMP_H_
/*! /*!
* Copyright 2014 by Contributors
* \file omp.h * \file omp.h
* \brief header to handle OpenMP compatibility issues * \brief header to handle OpenMP compatibility issues
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_OMP_H_
#define XGBOOST_UTILS_OMP_H_
#if defined(_OPENMP) #if defined(_OPENMP)
#include <omp.h> #include <omp.h>
#else #else
#ifndef DISABLE_OPENMP #ifndef DISABLE_OPENMP
// use pragma message instead of warning // use pragma message instead of warning
#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading") #pragma message("Warning: OpenMP is not available,"\
"xgboost will be compiled into single-thread code."\
"Use OpenMP-enabled compiler to get benefit of multi-threading")
#endif #endif
inline int omp_get_thread_num() { return 0; } inline int omp_get_thread_num() { return 0; }
inline int omp_get_num_threads() { return 1; } inline int omp_get_num_threads() { return 1; }
@ -25,6 +29,6 @@ typedef int bst_omp_uint;
#else #else
typedef unsigned bst_omp_uint; typedef unsigned bst_omp_uint;
#endif #endif
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_UTILS_OMP_H_ #endif // XGBOOST_UTILS_OMP_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_QUANTILE_H_
#define XGBOOST_UTILS_QUANTILE_H_
/*! /*!
* Copyright 2014 by Contributors
* \file quantile.h * \file quantile.h
* \brief util to compute quantiles * \brief util to compute quantiles
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_QUANTILE_H_
#define XGBOOST_UTILS_QUANTILE_H_
#include <cmath> #include <cmath>
#include <vector> #include <vector>
#include <cstring> #include <cstring>
@ -37,8 +39,8 @@ struct WQSummary {
// constructor // constructor
Entry(RType rmin, RType rmax, RType wmin, DType value) Entry(RType rmin, RType rmax, RType wmin, DType value)
: rmin(rmin), rmax(rmax), wmin(wmin), value(value) {} : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
/*! /*!
* \brief debug function, check Valid * \brief debug function, check Valid
* \param eps the tolerate level for violating the relation * \param eps the tolerate level for violating the relation
*/ */
inline void CheckValid(RType eps = 0) const { inline void CheckValid(RType eps = 0) const {
@ -65,7 +67,7 @@ struct WQSummary {
// default constructor // default constructor
QEntry(void) {} QEntry(void) {}
// constructor // constructor
QEntry(DType value, RType weight) QEntry(DType value, RType weight)
: value(value), weight(weight) {} : value(value), weight(weight) {}
// comparator on value // comparator on value
inline bool operator<(const QEntry &b) const { inline bool operator<(const QEntry &b) const {
@ -83,11 +85,11 @@ struct WQSummary {
} else { } else {
queue[qtail - 1].weight += w; queue[qtail - 1].weight += w;
} }
} }
inline void MakeSummary(WQSummary *out) { inline void MakeSummary(WQSummary *out) {
std::sort(queue.begin(), queue.begin() + qtail); std::sort(queue.begin(), queue.begin() + qtail);
out->size = 0; out->size = 0;
// start update sketch // start update sketch
RType wsum = 0; RType wsum = 0;
// construct data with unique weights // construct data with unique weights
for (size_t i = 0; i < qtail;) { for (size_t i = 0; i < qtail;) {
@ -106,7 +108,7 @@ struct WQSummary {
/*! \brief number of elements in the summary */ /*! \brief number of elements in the summary */
size_t size; size_t size;
// constructor // constructor
WQSummary(Entry *data, size_t size) WQSummary(Entry *data, size_t size)
: data(data), size(size) {} : data(data), size(size) {}
/*! /*!
* \return the maximum error of the Summary * \return the maximum error of the Summary
@ -119,12 +121,12 @@ struct WQSummary {
} }
return res; return res;
} }
/*! /*!
* \brief query qvalue, start from istart * \brief query qvalue, start from istart
* \param qvalue the value we query for * \param qvalue the value we query for
* \param istart starting position * \param istart starting position
*/ */
inline Entry Query(DType qvalue, size_t &istart) const { inline Entry Query(DType qvalue, size_t &istart) const { // NOLINT(*)
while (istart < size && qvalue > data[istart].value) { while (istart < size && qvalue > data[istart].value) {
++istart; ++istart;
} }
@ -136,7 +138,7 @@ struct WQSummary {
return data[istart]; return data[istart];
} else { } else {
if (istart == 0) { if (istart == 0) {
return Entry(0.0f, 0.0f, 0.0f, qvalue); return Entry(0.0f, 0.0f, 0.0f, qvalue);
} else { } else {
return Entry(data[istart - 1].rmin_next(), return Entry(data[istart - 1].rmin_next(),
data[istart].rmax_prev(), data[istart].rmax_prev(),
@ -154,12 +156,12 @@ struct WQSummary {
*/ */
inline void CopyFrom(const WQSummary &src) { inline void CopyFrom(const WQSummary &src) {
size = src.size; size = src.size;
std::memcpy(data, src.data, sizeof(Entry) * size); std::memcpy(data, src.data, sizeof(Entry) * size);
} }
/*! /*!
* \brief debug function, validate whether the summary * \brief debug function, validate whether the summary
* run consistency check to check if it is a valid summary * run consistency check to check if it is a valid summary
* \param eps the tolerate error level, used when RType is floating point and * \param eps the tolerate error level, used when RType is floating point and
* some inconsistency could occur due to rounding error * some inconsistency could occur due to rounding error
*/ */
inline void CheckValid(RType eps) const { inline void CheckValid(RType eps) const {
@ -199,8 +201,8 @@ struct WQSummary {
size_t i = 1, lastidx = 0; size_t i = 1, lastidx = 0;
for (size_t k = 1; k < n; ++k) { for (size_t k = 1; k < n; ++k) {
RType dx2 = 2 * ((k * range) / n + begin); RType dx2 = 2 * ((k * range) / n + begin);
// find first i such that d < (rmax[i+1] + rmin[i+1]) / 2 // find first i such that d < (rmax[i+1] + rmin[i+1]) / 2
while (i < src.size - 1 while (i < src.size - 1
&& dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
utils::Assert(i != src.size - 1, "this cannot happen"); utils::Assert(i != src.size - 1, "this cannot happen");
if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
@ -217,7 +219,7 @@ struct WQSummary {
data[size++] = src.data[src.size - 1]; data[size++] = src.data[src.size - 1];
} }
} }
/*! /*!
* \brief set current summary to be merged summary of sa and sb * \brief set current summary to be merged summary of sa and sb
* \param sa first input summary to be merged * \param sa first input summary to be merged
* \param sb second input summar to be merged * \param sb second input summar to be merged
@ -230,7 +232,7 @@ struct WQSummary {
if (sb.size == 0) { if (sb.size == 0) {
this->CopyFrom(sa); return; this->CopyFrom(sa); return;
} }
utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge"); utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
const Entry *a = sa.data, *a_end = sa.data + sa.size; const Entry *a = sa.data, *a_end = sa.data + sa.size;
const Entry *b = sb.data, *b_end = sb.data + sb.size; const Entry *b = sb.data, *b_end = sb.data + sb.size;
// extended rmin value // extended rmin value
@ -297,7 +299,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
RType begin = src.data[0].rmax; RType begin = src.data[0].rmax;
size_t n = maxsize - 1, nbig = 0; size_t n = maxsize - 1, nbig = 0;
RType range = src.data[src.size - 1].rmin - begin; RType range = src.data[src.size - 1].rmin - begin;
// prune off zero weights // prune off zero weights
if (range == 0.0f) { if (range == 0.0f) {
// special case, contain only two effective data pts // special case, contain only two effective data pts
this->data[0] = src.data[0]; this->data[0] = src.data[0];
@ -331,7 +333,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
utils::Printf("LOG: check quantile stats, nbig=%lu, n=%lu\n", nbig, n); utils::Printf("LOG: check quantile stats, nbig=%lu, n=%lu\n", nbig, n);
utils::Printf("LOG: srcsize=%lu, maxsize=%lu, range=%g, chunk=%g\n", utils::Printf("LOG: srcsize=%lu, maxsize=%lu, range=%g, chunk=%g\n",
src.size, maxsize, static_cast<double>(range), src.size, maxsize, static_cast<double>(range),
static_cast<double>(chunk)); static_cast<double>(chunk));
for (size_t i = 0; i < src.size; ++i) { for (size_t i = 0; i < src.size; ++i) {
utils::Printf("[%lu] rmin=%g, rmax=%g, wmin=%g, v=%g, isbig=%d\n", i, utils::Printf("[%lu] rmin=%g, rmax=%g, wmin=%g, v=%g, isbig=%d\n", i,
src.data[i].rmin, src.data[i].rmax, src.data[i].wmin, src.data[i].rmin, src.data[i].rmax, src.data[i].wmin,
@ -352,7 +354,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
RType maxdx2 = src.data[end].rmax_prev() * 2; RType maxdx2 = src.data[end].rmax_prev() * 2;
for (; k < n; ++k) { for (; k < n; ++k) {
RType dx2 = 2 * ((k * mrange) / n + begin); RType dx2 = 2 * ((k * mrange) / n + begin);
if (dx2 >= maxdx2) break; if (dx2 >= maxdx2) break;
while (i < end && while (i < end &&
dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
@ -371,13 +373,13 @@ struct WXQSummary : public WQSummary<DType, RType> {
lastidx = end; lastidx = end;
} }
bid = end; bid = end;
// shift base by the gap // shift base by the gap
begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev(); begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev();
} }
} }
} }
}; };
/*! /*!
* \brief traditional GK summary * \brief traditional GK summary
*/ */
template<typename DType, typename RType> template<typename DType, typename RType>
@ -405,7 +407,7 @@ struct GKSummary {
// push data to the queue // push data to the queue
inline void Push(DType x, RType w) { inline void Push(DType x, RType w) {
queue[qtail++] = x; queue[qtail++] = x;
} }
inline void MakeSummary(GKSummary *out) { inline void MakeSummary(GKSummary *out) {
std::sort(queue.begin(), queue.begin() + qtail); std::sort(queue.begin(), queue.begin() + qtail);
out->size = qtail; out->size = qtail;
@ -419,7 +421,7 @@ struct GKSummary {
/*! \brief number of elements in the summary */ /*! \brief number of elements in the summary */
size_t size; size_t size;
GKSummary(Entry *data, size_t size) GKSummary(Entry *data, size_t size)
: data(data), size(size) {} : data(data), size(size) {}
/*! \brief the maximum error of the summary */ /*! \brief the maximum error of the summary */
inline RType MaxError(void) const { inline RType MaxError(void) const {
RType res = 0; RType res = 0;
@ -432,7 +434,7 @@ struct GKSummary {
inline RType MaxRank(void) const { inline RType MaxRank(void) const {
return data[size - 1].rmax; return data[size - 1].rmax;
} }
/*! /*!
* \brief copy content from src * \brief copy content from src
* \param src source sketch * \param src source sketch
*/ */
@ -450,8 +452,8 @@ struct GKSummary {
<< "[" << data[i].rmin << "," << data[i].rmax << "]" << "[" << data[i].rmin << "," << data[i].rmax << "]"
<< std::endl; << std::endl;
} }
} }
/*! /*!
* \brief set current summary to be pruned summary of src * \brief set current summary to be pruned summary of src
* assume data field is already allocated to be at least maxsize * assume data field is already allocated to be at least maxsize
* \param src source summary * \param src source summary
@ -486,8 +488,8 @@ struct GKSummary {
} }
if (sb.size == 0) { if (sb.size == 0) {
this->CopyFrom(sa); return; this->CopyFrom(sa); return;
} }
utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge"); utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
const Entry *a = sa.data, *a_end = sa.data + sa.size; const Entry *a = sa.data, *a_end = sa.data + sa.size;
const Entry *b = sb.data, *b_end = sb.data + sb.size; const Entry *b = sb.data, *b_end = sb.data + sb.size;
this->size = sa.size + sb.size; this->size = sa.size + sb.size;
@ -500,7 +502,7 @@ struct GKSummary {
aprev_rmin = a->rmin; aprev_rmin = a->rmin;
++dst; ++a; ++dst; ++a;
} else { } else {
*dst = Entry(aprev_rmin + b->rmin, *dst = Entry(aprev_rmin + b->rmin,
b->rmax + a->rmax - 1, b->value); b->rmax + a->rmax - 1, b->value);
bprev_rmin = b->rmin; bprev_rmin = b->rmin;
++dst; ++b; ++dst; ++b;
@ -537,15 +539,15 @@ class QuantileSketchTemplate {
/*! \brief type of summary type */ /*! \brief type of summary type */
typedef TSummary Summary; typedef TSummary Summary;
/*! \brief the entry type */ /*! \brief the entry type */
typedef typename Summary::Entry Entry; typedef typename Summary::Entry Entry;
/*! \brief same as summary, but use STL to backup the space */ /*! \brief same as summary, but use STL to backup the space */
struct SummaryContainer : public Summary { struct SummaryContainer : public Summary {
std::vector<Entry> space; std::vector<Entry> space;
SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) { SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
this->space = src.space; this->space = src.space;
this->data = BeginPtr(this->space); this->data = BeginPtr(this->space);
} }
SummaryContainer(void) : Summary(NULL, 0) { SummaryContainer(void) : Summary(NULL, 0) {
} }
/*! \brief reserve space for summary */ /*! \brief reserve space for summary */
inline void Reserve(size_t size) { inline void Reserve(size_t size) {
@ -554,7 +556,7 @@ class QuantileSketchTemplate {
this->data = BeginPtr(space); this->data = BeginPtr(space);
} }
} }
/*! /*!
* \brief set the space to be merge of all Summary arrays * \brief set the space to be merge of all Summary arrays
* \param begin begining position in th summary array * \param begin begining position in th summary array
* \param end ending position in the Summary array * \param end ending position in the Summary array
@ -597,7 +599,7 @@ class QuantileSketchTemplate {
} }
/*! \brief save the data structure into stream */ /*! \brief save the data structure into stream */
template<typename TStream> template<typename TStream>
inline void Save(TStream &fo) const { inline void Save(TStream &fo) const { // NOLINT(*)
fo.Write(&(this->size), sizeof(this->size)); fo.Write(&(this->size), sizeof(this->size));
if (this->size != 0) { if (this->size != 0) {
fo.Write(this->data, this->size * sizeof(Entry)); fo.Write(this->data, this->size * sizeof(Entry));
@ -605,15 +607,16 @@ class QuantileSketchTemplate {
} }
/*! \brief load data structure from input stream */ /*! \brief load data structure from input stream */
template<typename TStream> template<typename TStream>
inline void Load(TStream &fi) { inline void Load(TStream &fi) { // NOLINT(*)
utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1"); utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1");
this->Reserve(this->size); this->Reserve(this->size);
if (this->size != 0) { if (this->size != 0) {
utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0, "invalid SummaryArray 2"); utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0,
"invalid SummaryArray 2");
} }
} }
}; };
/*! /*!
* \brief intialize the quantile sketch, given the performance specification * \brief intialize the quantile sketch, given the performance specification
* \param maxn maximum number of data points can be feed into sketch * \param maxn maximum number of data points can be feed into sketch
* \param eps accuracy level of summary * \param eps accuracy level of summary
@ -741,8 +744,8 @@ class QuantileSketchTemplate {
* \tparam DType type of data content * \tparam DType type of data content
* \tparam RType type of rank * \tparam RType type of rank
*/ */
template<typename DType, typename RType=unsigned> template<typename DType, typename RType = unsigned>
class WQuantileSketch : class WQuantileSketch :
public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> >{ public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> >{
}; };
@ -751,8 +754,8 @@ class WQuantileSketch :
* \tparam DType type of data content * \tparam DType type of data content
* \tparam RType type of rank * \tparam RType type of rank
*/ */
template<typename DType, typename RType=unsigned> template<typename DType, typename RType = unsigned>
class WXQuantileSketch : class WXQuantileSketch :
public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> >{ public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> >{
}; };
/*! /*!
@ -760,11 +763,11 @@ class WXQuantileSketch :
* \tparam DType type of data content * \tparam DType type of data content
* \tparam RType type of rank * \tparam RType type of rank
*/ */
template<typename DType, typename RType=unsigned> template<typename DType, typename RType = unsigned>
class GKQuantileSketch : class GKQuantileSketch :
public QuantileSketchTemplate<DType, RType, GKSummary<DType, RType> >{ public QuantileSketchTemplate<DType, RType, GKSummary<DType, RType> >{
}; };
} // utils } // namespace utils
} // xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_QUANTILE_H_

View File

@ -1,12 +1,14 @@
#ifndef XGBOOST_UTILS_RANDOM_H_
#define XGBOOST_UTILS_RANDOM_H_
/*! /*!
* Copyright 2014 by Contributors
* \file xgboost_random.h * \file xgboost_random.h
* \brief PRNG to support random number generation * \brief PRNG to support random number generation
* \author Tianqi Chen: tianqi.tchen@gmail.com * \author Tianqi Chen: tianqi.tchen@gmail.com
* *
* Use standard PRNG from stdlib * Use standard PRNG from stdlib
*/ */
#ifndef XGBOOST_UTILS_RANDOM_H_
#define XGBOOST_UTILS_RANDOM_H_
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <vector> #include <vector>
@ -23,11 +25,11 @@ inline void Seed(unsigned seed) {
} }
/*! \brief basic function, uniform */ /*! \brief basic function, uniform */
inline double Uniform(void) { inline double Uniform(void) {
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0); return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0); // NOLINT(*)
} }
/*! \brief return a real numer uniform in (0,1) */ /*! \brief return a real numer uniform in (0,1) */
inline double NextDouble2(void) { inline double NextDouble2(void) {
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0); return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0); // NOLINT(*)
} }
/*! \brief return x~N(0,1) */ /*! \brief return x~N(0,1) */
inline double Normal(void) { inline double Normal(void) {
@ -73,7 +75,7 @@ inline void Shuffle(T *data, size_t sz) {
} }
// random shuffle the data inside, require PRNG // random shuffle the data inside, require PRNG
template<typename T> template<typename T>
inline void Shuffle(std::vector<T> &data) { inline void Shuffle(std::vector<T> &data) { // NOLINT(*)
Shuffle(&data[0], data.size()); Shuffle(&data[0], data.size());
} }
@ -81,17 +83,18 @@ inline void Shuffle(std::vector<T> &data) {
struct Random{ struct Random{
/*! \brief set random number seed */ /*! \brief set random number seed */
inline void Seed(unsigned sd) { inline void Seed(unsigned sd) {
this->rseed = sd; this->rseed = sd;
#if defined(_MSC_VER)||defined(_WIN32) #if defined(_MSC_VER) || defined(_WIN32)
::xgboost::random::Seed(sd); ::xgboost::random::Seed(sd);
#endif #endif
} }
/*! \brief return a real number uniform in [0,1) */ /*! \brief return a real number uniform in [0,1) */
inline double RandDouble(void) { inline double RandDouble(void) {
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe // use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general // For cygwin and mingw, this can slows down parallelism,
// todo, replace with another PRNG // but rand_r is only used in objective-inl.hpp, won't affect speed in general
#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_) // todo, replace with another PRNG
#if defined(_MSC_VER) || defined(_WIN32) || defined(XGBOOST_STRICT_CXX98_)
return Uniform(); return Uniform();
#else #else
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0); return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_
#define XGBOOST_UTILS_THREAD_BUFFER_H_
/*! /*!
* Copyright 2014 by Contributors
* \file thread_buffer.h * \file thread_buffer.h
* \brief multi-thread buffer, iterator, can be used to create parallel pipeline * \brief multi-thread buffer, iterator, can be used to create parallel pipeline
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_
#define XGBOOST_UTILS_THREAD_BUFFER_H_
#include <vector> #include <vector>
#include <cstring> #include <cstring>
#include <cstdlib> #include <cstdlib>
@ -27,7 +29,7 @@ class ThreadBuffer {
this->buf_size = 30; this->buf_size = 30;
} }
~ThreadBuffer(void) { ~ThreadBuffer(void) {
if(init_end) this->Destroy(); if (init_end) this->Destroy();
} }
/*!\brief set parameter, will also pass the parameter to factory */ /*!\brief set parameter, will also pass the parameter to factory */
inline void SetParam(const char *name, const char *val) { inline void SetParam(const char *name, const char *val) {
@ -38,7 +40,7 @@ class ThreadBuffer {
/*! /*!
* \brief initalize the buffered iterator * \brief initalize the buffered iterator
* \param param a initialize parameter that will pass to factory, ignore it if not necessary * \param param a initialize parameter that will pass to factory, ignore it if not necessary
* \return false if the initlization can't be done, e.g. buffer file hasn't been created * \return false if the initlization can't be done, e.g. buffer file hasn't been created
*/ */
inline bool Init(void) { inline bool Init(void) {
if (!factory.Init()) return false; if (!factory.Init()) return false;
@ -49,7 +51,7 @@ class ThreadBuffer {
this->init_end = true; this->init_end = true;
this->StartLoader(); this->StartLoader();
return true; return true;
} }
/*!\brief place the iterator before first value */ /*!\brief place the iterator before first value */
inline void BeforeFirst(void) { inline void BeforeFirst(void) {
// wait till last loader end // wait till last loader end
@ -70,7 +72,7 @@ class ThreadBuffer {
loading_need.Post(); loading_need.Post();
// set buffer value // set buffer value
buf_index = 0; buf_index = 0;
} }
/*! \brief destroy the buffer iterator, will deallocate the buffer */ /*! \brief destroy the buffer iterator, will deallocate the buffer */
inline void Destroy(void) { inline void Destroy(void) {
// wait until the signal is consumed // wait until the signal is consumed
@ -78,7 +80,7 @@ class ThreadBuffer {
loading_need.Post(); loading_need.Post();
loader_thread.Join(); loader_thread.Join();
loading_need.Destroy(); loading_need.Destroy();
loading_end.Destroy(); loading_end.Destroy();
for (size_t i = 0; i < bufA.size(); ++i) { for (size_t i = 0; i < bufA.size(); ++i) {
factory.FreeSpace(bufA[i]); factory.FreeSpace(bufA[i]);
} }
@ -88,37 +90,38 @@ class ThreadBuffer {
bufA.clear(); bufB.clear(); bufA.clear(); bufB.clear();
factory.Destroy(); factory.Destroy();
this->init_end = false; this->init_end = false;
} }
/*! /*!
* \brief get the next element needed in buffer * \brief get the next element needed in buffer
* \param elem element to store into * \param elem element to store into
* \return whether reaches end of data * \return whether reaches end of data
*/ */
inline bool Next(Elem &elem) { inline bool Next(Elem &elem) { // NOLINT(*)
// end of buffer try to switch // end of buffer try to switch
if (buf_index == buf_size) { if (buf_index == buf_size) {
this->SwitchBuffer(); this->SwitchBuffer();
buf_index = 0; buf_index = 0;
} }
if (buf_index >= (current_buf ? endA : endB)) { if (buf_index >= (current_buf ? endA : endB)) {
return false; return false;
} }
std::vector<Elem> &buf = current_buf ? bufA : bufB; std::vector<Elem> &buf = current_buf ? bufA : bufB;
elem = buf[buf_index]; elem = buf[buf_index];
++buf_index; ++buf_index;
return true; return true;
} }
/*! /*!
* \brief get the factory object * \brief get the factory object
*/ */
inline ElemFactory &get_factory(void) { inline ElemFactory &get_factory(void) {
return factory; return factory;
} }
inline const ElemFactory &get_factory(void) const{ inline const ElemFactory &get_factory(void) const {
return factory; return factory;
} }
// size of buffer // size of buffer
int buf_size; int buf_size;
private: private:
// factory object used to load configures // factory object used to load configures
ElemFactory factory; ElemFactory factory;
@ -147,15 +150,15 @@ class ThreadBuffer {
* this implementation is like producer-consumer style * this implementation is like producer-consumer style
*/ */
inline void RunLoader(void) { inline void RunLoader(void) {
while(!destroy_signal) { while (!destroy_signal) {
// sleep until loading is needed // sleep until loading is needed
loading_need.Wait(); loading_need.Wait();
std::vector<Elem> &buf = current_buf ? bufB : bufA; std::vector<Elem> &buf = current_buf ? bufB : bufA;
int i; int i;
for (i = 0; i < buf_size ; ++i) { for (i = 0; i < buf_size ; ++i) {
if (!factory.LoadNext(buf[i])) { if (!factory.LoadNext(buf[i])) {
int &end = current_buf ? endB : endA; int &end = current_buf ? endB : endA;
end = i; // marks the termination end = i; // marks the termination
break; break;
} }
} }
@ -166,14 +169,14 @@ class ThreadBuffer {
} }
/*!\brief entry point of loader thread */ /*!\brief entry point of loader thread */
inline static XGBOOST_THREAD_PREFIX LoaderEntry(void *pthread) { inline static XGBOOST_THREAD_PREFIX LoaderEntry(void *pthread) {
static_cast< ThreadBuffer<Elem,ElemFactory>* >(pthread)->RunLoader(); static_cast< ThreadBuffer<Elem, ElemFactory>* >(pthread)->RunLoader();
return NULL; return NULL;
} }
/*!\brief start loader thread */ /*!\brief start loader thread */
inline void StartLoader(void) { inline void StartLoader(void) {
destroy_signal = false; destroy_signal = false;
// set param // set param
current_buf = 1; current_buf = 1;
loading_need.Init(1); loading_need.Init(1);
loading_end .Init(0); loading_end .Init(0);
// reset terminate limit // reset terminate limit
@ -185,8 +188,8 @@ class ThreadBuffer {
current_buf = 0; current_buf = 0;
// wake loader for next part // wake loader for next part
data_loaded = false; data_loaded = false;
loading_need.Post(); loading_need.Post();
buf_index = 0; buf_index = 0;
} }
/*!\brief switch double buffer */ /*!\brief switch double buffer */
inline void SwitchBuffer(void) { inline void SwitchBuffer(void) {
@ -198,7 +201,6 @@ class ThreadBuffer {
loading_need.Post(); loading_need.Post();
} }
}; };
} // namespace utils } // namespace utils
} // namespace xgboost } // namespace xgboost
#endif #endif // XGBOOST_UTILS_THREAD_BUFFER_H_

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_UTILS_UTILS_H_
#define XGBOOST_UTILS_UTILS_H_
/*! /*!
* Copyright 2014 by Contributors
* \file utils.h * \file utils.h
* \brief simple utils to support the code * \brief simple utils to support the code
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_UTILS_UTILS_H_
#define XGBOOST_UTILS_UTILS_H_
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#include <cstdio> #include <cstdio>
#include <string> #include <string>
@ -19,18 +21,18 @@
#define fopen64 std::fopen #define fopen64 std::fopen
#endif #endif
#ifdef _MSC_VER #ifdef _MSC_VER
// NOTE: sprintf_s is not equivalent to snprintf, // NOTE: sprintf_s is not equivalent to snprintf,
// they are equivalent when success, which is sufficient for our case // they are equivalent when success, which is sufficient for our case
#define snprintf sprintf_s #define snprintf sprintf_s
#define vsnprintf vsprintf_s #define vsnprintf vsprintf_s
#else #else
#ifdef _FILE_OFFSET_BITS #ifdef _FILE_OFFSET_BITS
#if _FILE_OFFSET_BITS == 32 #if _FILE_OFFSET_BITS == 32
#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit") #pragma message("Warning: FILE OFFSET BITS defined to be 32 bit")
#endif #endif
#endif #endif
#ifdef __APPLE__ #ifdef __APPLE__
#define off64_t off_t #define off64_t off_t
#define fopen64 std::fopen #define fopen64 std::fopen
#endif #endif
@ -58,17 +60,17 @@ namespace utils {
const int kPrintBuffer = 1 << 12; const int kPrintBuffer = 1 << 12;
#ifndef XGBOOST_CUSTOMIZE_MSG_ #ifndef XGBOOST_CUSTOMIZE_MSG_
/*! /*!
* \brief handling of Assert error, caused by in-apropriate input * \brief handling of Assert error, caused by in-apropriate input
* \param msg error message * \param msg error message
*/ */
inline void HandleAssertError(const char *msg) { inline void HandleAssertError(const char *msg) {
fprintf(stderr, "AssertError:%s\n", msg); fprintf(stderr, "AssertError:%s\n", msg);
exit(-1); exit(-1);
} }
/*! /*!
* \brief handling of Check error, caused by in-apropriate input * \brief handling of Check error, caused by in-apropriate input
* \param msg error message * \param msg error message
*/ */
inline void HandleCheckError(const char *msg) { inline void HandleCheckError(const char *msg) {
fprintf(stderr, "%s\n", msg); fprintf(stderr, "%s\n", msg);
@ -158,7 +160,7 @@ inline std::FILE *FopenCheck(const char *fname, const char *flag) {
// easy utils that can be directly acessed in xgboost // easy utils that can be directly acessed in xgboost
/*! \brief get the beginning address of a vector */ /*! \brief get the beginning address of a vector */
template<typename T> template<typename T>
inline T *BeginPtr(std::vector<T> &vec) { inline T *BeginPtr(std::vector<T> &vec) { // NOLINT(*)
if (vec.size() == 0) { if (vec.size() == 0) {
return NULL; return NULL;
} else { } else {
@ -174,7 +176,7 @@ inline const T *BeginPtr(const std::vector<T> &vec) {
return &vec[0]; return &vec[0];
} }
} }
inline char* BeginPtr(std::string &str) { inline char* BeginPtr(std::string &str) { // NOLINT(*)
if (str.length() == 0) return NULL; if (str.length() == 0) return NULL;
return &str[0]; return &str[0];
} }