fix all utils
This commit is contained in:
parent
0162bb7034
commit
1581de08da
@ -1,13 +1,16 @@
|
|||||||
#ifndef XGBOOST_UTILS_BASE64_INL_H_
|
|
||||||
#define XGBOOST_UTILS_BASE64_INL_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file base64.h
|
* \file base64.h
|
||||||
* \brief data stream support to input and output from/to base64 stream
|
* \brief data stream support to input and output from/to base64 stream
|
||||||
* base64 is easier to store and pass as text format in mapreduce
|
* base64 is easier to store and pass as text format in mapreduce
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_BASE64_INL_H_
|
||||||
|
#define XGBOOST_UTILS_BASE64_INL_H_
|
||||||
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
#include "./io.h"
|
#include "./io.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
@ -15,7 +18,7 @@ namespace utils {
|
|||||||
/*! \brief buffer reader of the stream that allows you to get */
|
/*! \brief buffer reader of the stream that allows you to get */
|
||||||
class StreamBufferReader {
|
class StreamBufferReader {
|
||||||
public:
|
public:
|
||||||
StreamBufferReader(size_t buffer_size)
|
explicit StreamBufferReader(size_t buffer_size)
|
||||||
:stream_(NULL),
|
:stream_(NULL),
|
||||||
read_len_(1), read_ptr_(1) {
|
read_len_(1), read_ptr_(1) {
|
||||||
buffer_.resize(buffer_size);
|
buffer_.resize(buffer_size);
|
||||||
@ -45,7 +48,7 @@ class StreamBufferReader {
|
|||||||
inline bool AtEnd(void) const {
|
inline bool AtEnd(void) const {
|
||||||
return read_len_ == 0;
|
return read_len_ == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/*! \brief the underlying stream */
|
/*! \brief the underlying stream */
|
||||||
IStream *stream_;
|
IStream *stream_;
|
||||||
@ -75,7 +78,7 @@ const char DecodeTable[] = {
|
|||||||
};
|
};
|
||||||
static const char EncodeTable[] =
|
static const char EncodeTable[] =
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||||
} // namespace base64
|
} // namespace base64
|
||||||
/*! \brief the stream that reads from base64, note we take from file pointers */
|
/*! \brief the stream that reads from base64, note we take from file pointers */
|
||||||
class Base64InStream: public IStream {
|
class Base64InStream: public IStream {
|
||||||
public:
|
public:
|
||||||
@ -83,8 +86,8 @@ class Base64InStream: public IStream {
|
|||||||
reader_.set_stream(fs);
|
reader_.set_stream(fs);
|
||||||
num_prev = 0; tmp_ch = 0;
|
num_prev = 0; tmp_ch = 0;
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief initialize the stream position to beginning of next base64 stream
|
* \brief initialize the stream position to beginning of next base64 stream
|
||||||
* call this function before actually start read
|
* call this function before actually start read
|
||||||
*/
|
*/
|
||||||
inline void InitPosition(void) {
|
inline void InitPosition(void) {
|
||||||
@ -132,19 +135,19 @@ class Base64InStream: public IStream {
|
|||||||
{
|
{
|
||||||
// second byte
|
// second byte
|
||||||
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
|
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
|
||||||
"invalid base64 format");
|
"invalid base64 format");
|
||||||
nvalue |= DecodeTable[tmp_ch] << 12;
|
nvalue |= DecodeTable[tmp_ch] << 12;
|
||||||
*cptr++ = (nvalue >> 16) & 0xFF; --tlen;
|
*cptr++ = (nvalue >> 16) & 0xFF; --tlen;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// third byte
|
// third byte
|
||||||
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
|
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
|
||||||
"invalid base64 format");
|
"invalid base64 format");
|
||||||
// handle termination
|
// handle termination
|
||||||
if (tmp_ch == '=') {
|
if (tmp_ch == '=') {
|
||||||
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format");
|
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format");
|
||||||
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
|
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
|
||||||
"invalid base64 format");
|
"invalid base64 format");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
nvalue |= DecodeTable[tmp_ch] << 6;
|
nvalue |= DecodeTable[tmp_ch] << 6;
|
||||||
@ -157,10 +160,10 @@ class Base64InStream: public IStream {
|
|||||||
{
|
{
|
||||||
// fourth byte
|
// fourth byte
|
||||||
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
|
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)),
|
||||||
"invalid base64 format");
|
"invalid base64 format");
|
||||||
if (tmp_ch == '=') {
|
if (tmp_ch == '=') {
|
||||||
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
|
utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)),
|
||||||
"invalid base64 format");
|
"invalid base64 format");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
nvalue |= DecodeTable[tmp_ch];
|
nvalue |= DecodeTable[tmp_ch];
|
||||||
@ -240,13 +243,13 @@ class Base64OutStream: public IStream {
|
|||||||
if (endch != EOF) PutChar(endch);
|
if (endch != EOF) PutChar(endch);
|
||||||
this->Flush();
|
this->Flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IStream *fp;
|
IStream *fp;
|
||||||
int buf_top;
|
int buf_top;
|
||||||
unsigned char buf[4];
|
unsigned char buf[4];
|
||||||
std::string out_buf;
|
std::string out_buf;
|
||||||
const static size_t kBufferSize = 256;
|
static const size_t kBufferSize = 256;
|
||||||
|
|
||||||
inline void PutChar(char ch) {
|
inline void PutChar(char ch) {
|
||||||
out_buf += ch;
|
out_buf += ch;
|
||||||
@ -260,5 +263,5 @@ class Base64OutStream: public IStream {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
} // namespace rabit
|
} // namespace xgboost
|
||||||
#endif // RABIT_LEARN_UTILS_BASE64_INL_H_
|
#endif // XGBOOST_UTILS_BASE64_INL_H_
|
||||||
|
|||||||
@ -1,11 +1,13 @@
|
|||||||
#ifndef XGBOOST_UTILS_BITMAP_H_
|
|
||||||
#define XGBOOST_UTILS_BITMAP_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file bitmap.h
|
* \file bitmap.h
|
||||||
* \brief a simple implement of bitmap
|
* \brief a simple implement of bitmap
|
||||||
* NOTE: bitmap is only threadsafe per word access, remember this when using bitmap
|
* NOTE: bitmap is only threadsafe per word access, remember this when using bitmap
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_BITMAP_H_
|
||||||
|
#define XGBOOST_UTILS_BITMAP_H_
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "./utils.h"
|
#include "./utils.h"
|
||||||
#include "./omp.h"
|
#include "./omp.h"
|
||||||
@ -16,22 +18,22 @@ namespace utils {
|
|||||||
struct BitMap {
|
struct BitMap {
|
||||||
/*! \brief internal data structure */
|
/*! \brief internal data structure */
|
||||||
std::vector<uint32_t> data;
|
std::vector<uint32_t> data;
|
||||||
/*!
|
/*!
|
||||||
* \brief resize the bitmap to be certain size
|
* \brief resize the bitmap to be certain size
|
||||||
* \param size the size of bitmap
|
* \param size the size of bitmap
|
||||||
*/
|
*/
|
||||||
inline void Resize(size_t size) {
|
inline void Resize(size_t size) {
|
||||||
data.resize((size + 31U) >> 5, 0);
|
data.resize((size + 31U) >> 5, 0);
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief query the i-th position of bitmap
|
* \brief query the i-th position of bitmap
|
||||||
* \param i the position in
|
* \param i the position in
|
||||||
*/
|
*/
|
||||||
inline bool Get(size_t i) const {
|
inline bool Get(size_t i) const {
|
||||||
return (data[i >> 5] >> (i & 31U)) & 1U;
|
return (data[i >> 5] >> (i & 31U)) & 1U;
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief set i-th position to true
|
* \brief set i-th position to true
|
||||||
* \param i position index
|
* \param i position index
|
||||||
*/
|
*/
|
||||||
inline void SetTrue(size_t i) {
|
inline void SetTrue(size_t i) {
|
||||||
@ -63,4 +65,4 @@ struct BitMap {
|
|||||||
};
|
};
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif
|
#endif // XGBOOST_UTILS_BITMAP_H_
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
#ifndef XGBOOST_UTILS_FMAP_H_
|
|
||||||
#define XGBOOST_UTILS_FMAP_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file fmap.h
|
* \file fmap.h
|
||||||
* \brief helper class that holds the feature names and interpretations
|
* \brief helper class that holds the feature names and interpretations
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_FMAP_H_
|
||||||
|
#define XGBOOST_UTILS_FMAP_H_
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@ -78,4 +80,4 @@ class FeatMap {
|
|||||||
|
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif // XGBOOST_FMAP_H_
|
#endif // XGBOOST_UTILS_FMAP_H_
|
||||||
|
|||||||
@ -111,5 +111,4 @@ struct ParallelGroupBuilder {
|
|||||||
};
|
};
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif
|
#endif // XGBOOST_UTILS_GROUP_DATA_H_
|
||||||
|
|
||||||
|
|||||||
@ -1,11 +1,13 @@
|
|||||||
#ifndef XGBOOST_UTILS_ITERATOR_H
|
|
||||||
#define XGBOOST_UTILS_ITERATOR_H
|
|
||||||
#include <cstdio>
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file iterator.h
|
* \file iterator.h
|
||||||
* \brief itertator interface
|
* \brief itertator interface
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_ITERATOR_H_
|
||||||
|
#define XGBOOST_UTILS_ITERATOR_H_
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace utils {
|
namespace utils {
|
||||||
/*!
|
/*!
|
||||||
@ -16,7 +18,7 @@ template<typename DType>
|
|||||||
class IIterator {
|
class IIterator {
|
||||||
public:
|
public:
|
||||||
/*!
|
/*!
|
||||||
* \brief set the parameter
|
* \brief set the parameter
|
||||||
* \param name name of parameter
|
* \param name name of parameter
|
||||||
* \param val value of parameter
|
* \param val value of parameter
|
||||||
*/
|
*/
|
||||||
@ -36,5 +38,5 @@ class IIterator {
|
|||||||
|
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif
|
#endif // XGBOOST_UTILS_ITERATOR_H_
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
#ifndef XGBOOST_UTILS_MATH_H_
|
|
||||||
#define XGBOOST_UTILS_MATH_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file math.h
|
* \file math.h
|
||||||
* \brief support additional math
|
* \brief support additional math
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_MATH_H_
|
||||||
|
#define XGBOOST_UTILS_MATH_H_
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
@ -28,7 +30,8 @@ inline T LogGamma(T v) {
|
|||||||
#if _MSC_VER >= 1800
|
#if _MSC_VER >= 1800
|
||||||
return lgamma(v);
|
return lgamma(v);
|
||||||
#else
|
#else
|
||||||
#pragma message ("Warning: lgamma function was not available until VS2013, poisson regression will be disabled")
|
#pragma message("Warning: lgamma function was not available until VS2013"\
|
||||||
|
", poisson regression will be disabled")
|
||||||
utils::Error("lgamma function was not available until VS2013");
|
utils::Error("lgamma function was not available until VS2013");
|
||||||
return static_cast<T>(1.0);
|
return static_cast<T>(1.0);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -1,16 +1,20 @@
|
|||||||
#ifndef XGBOOST_UTILS_OMP_H_
|
|
||||||
#define XGBOOST_UTILS_OMP_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file omp.h
|
* \file omp.h
|
||||||
* \brief header to handle OpenMP compatibility issues
|
* \brief header to handle OpenMP compatibility issues
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_OMP_H_
|
||||||
|
#define XGBOOST_UTILS_OMP_H_
|
||||||
|
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#else
|
#else
|
||||||
#ifndef DISABLE_OPENMP
|
#ifndef DISABLE_OPENMP
|
||||||
// use pragma message instead of warning
|
// use pragma message instead of warning
|
||||||
#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading")
|
#pragma message("Warning: OpenMP is not available,"\
|
||||||
|
"xgboost will be compiled into single-thread code."\
|
||||||
|
"Use OpenMP-enabled compiler to get benefit of multi-threading")
|
||||||
#endif
|
#endif
|
||||||
inline int omp_get_thread_num() { return 0; }
|
inline int omp_get_thread_num() { return 0; }
|
||||||
inline int omp_get_num_threads() { return 1; }
|
inline int omp_get_num_threads() { return 1; }
|
||||||
@ -25,6 +29,6 @@ typedef int bst_omp_uint;
|
|||||||
#else
|
#else
|
||||||
typedef unsigned bst_omp_uint;
|
typedef unsigned bst_omp_uint;
|
||||||
#endif
|
#endif
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|
||||||
#endif // XGBOOST_UTILS_OMP_H_
|
#endif // XGBOOST_UTILS_OMP_H_
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
#ifndef XGBOOST_UTILS_QUANTILE_H_
|
|
||||||
#define XGBOOST_UTILS_QUANTILE_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file quantile.h
|
* \file quantile.h
|
||||||
* \brief util to compute quantiles
|
* \brief util to compute quantiles
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_QUANTILE_H_
|
||||||
|
#define XGBOOST_UTILS_QUANTILE_H_
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@ -37,8 +39,8 @@ struct WQSummary {
|
|||||||
// constructor
|
// constructor
|
||||||
Entry(RType rmin, RType rmax, RType wmin, DType value)
|
Entry(RType rmin, RType rmax, RType wmin, DType value)
|
||||||
: rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
|
: rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
|
||||||
/*!
|
/*!
|
||||||
* \brief debug function, check Valid
|
* \brief debug function, check Valid
|
||||||
* \param eps the tolerate level for violating the relation
|
* \param eps the tolerate level for violating the relation
|
||||||
*/
|
*/
|
||||||
inline void CheckValid(RType eps = 0) const {
|
inline void CheckValid(RType eps = 0) const {
|
||||||
@ -65,7 +67,7 @@ struct WQSummary {
|
|||||||
// default constructor
|
// default constructor
|
||||||
QEntry(void) {}
|
QEntry(void) {}
|
||||||
// constructor
|
// constructor
|
||||||
QEntry(DType value, RType weight)
|
QEntry(DType value, RType weight)
|
||||||
: value(value), weight(weight) {}
|
: value(value), weight(weight) {}
|
||||||
// comparator on value
|
// comparator on value
|
||||||
inline bool operator<(const QEntry &b) const {
|
inline bool operator<(const QEntry &b) const {
|
||||||
@ -83,11 +85,11 @@ struct WQSummary {
|
|||||||
} else {
|
} else {
|
||||||
queue[qtail - 1].weight += w;
|
queue[qtail - 1].weight += w;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline void MakeSummary(WQSummary *out) {
|
inline void MakeSummary(WQSummary *out) {
|
||||||
std::sort(queue.begin(), queue.begin() + qtail);
|
std::sort(queue.begin(), queue.begin() + qtail);
|
||||||
out->size = 0;
|
out->size = 0;
|
||||||
// start update sketch
|
// start update sketch
|
||||||
RType wsum = 0;
|
RType wsum = 0;
|
||||||
// construct data with unique weights
|
// construct data with unique weights
|
||||||
for (size_t i = 0; i < qtail;) {
|
for (size_t i = 0; i < qtail;) {
|
||||||
@ -106,7 +108,7 @@ struct WQSummary {
|
|||||||
/*! \brief number of elements in the summary */
|
/*! \brief number of elements in the summary */
|
||||||
size_t size;
|
size_t size;
|
||||||
// constructor
|
// constructor
|
||||||
WQSummary(Entry *data, size_t size)
|
WQSummary(Entry *data, size_t size)
|
||||||
: data(data), size(size) {}
|
: data(data), size(size) {}
|
||||||
/*!
|
/*!
|
||||||
* \return the maximum error of the Summary
|
* \return the maximum error of the Summary
|
||||||
@ -119,12 +121,12 @@ struct WQSummary {
|
|||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief query qvalue, start from istart
|
* \brief query qvalue, start from istart
|
||||||
* \param qvalue the value we query for
|
* \param qvalue the value we query for
|
||||||
* \param istart starting position
|
* \param istart starting position
|
||||||
*/
|
*/
|
||||||
inline Entry Query(DType qvalue, size_t &istart) const {
|
inline Entry Query(DType qvalue, size_t &istart) const { // NOLINT(*)
|
||||||
while (istart < size && qvalue > data[istart].value) {
|
while (istart < size && qvalue > data[istart].value) {
|
||||||
++istart;
|
++istart;
|
||||||
}
|
}
|
||||||
@ -136,7 +138,7 @@ struct WQSummary {
|
|||||||
return data[istart];
|
return data[istart];
|
||||||
} else {
|
} else {
|
||||||
if (istart == 0) {
|
if (istart == 0) {
|
||||||
return Entry(0.0f, 0.0f, 0.0f, qvalue);
|
return Entry(0.0f, 0.0f, 0.0f, qvalue);
|
||||||
} else {
|
} else {
|
||||||
return Entry(data[istart - 1].rmin_next(),
|
return Entry(data[istart - 1].rmin_next(),
|
||||||
data[istart].rmax_prev(),
|
data[istart].rmax_prev(),
|
||||||
@ -154,12 +156,12 @@ struct WQSummary {
|
|||||||
*/
|
*/
|
||||||
inline void CopyFrom(const WQSummary &src) {
|
inline void CopyFrom(const WQSummary &src) {
|
||||||
size = src.size;
|
size = src.size;
|
||||||
std::memcpy(data, src.data, sizeof(Entry) * size);
|
std::memcpy(data, src.data, sizeof(Entry) * size);
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief debug function, validate whether the summary
|
* \brief debug function, validate whether the summary
|
||||||
* run consistency check to check if it is a valid summary
|
* run consistency check to check if it is a valid summary
|
||||||
* \param eps the tolerate error level, used when RType is floating point and
|
* \param eps the tolerate error level, used when RType is floating point and
|
||||||
* some inconsistency could occur due to rounding error
|
* some inconsistency could occur due to rounding error
|
||||||
*/
|
*/
|
||||||
inline void CheckValid(RType eps) const {
|
inline void CheckValid(RType eps) const {
|
||||||
@ -199,8 +201,8 @@ struct WQSummary {
|
|||||||
size_t i = 1, lastidx = 0;
|
size_t i = 1, lastidx = 0;
|
||||||
for (size_t k = 1; k < n; ++k) {
|
for (size_t k = 1; k < n; ++k) {
|
||||||
RType dx2 = 2 * ((k * range) / n + begin);
|
RType dx2 = 2 * ((k * range) / n + begin);
|
||||||
// find first i such that d < (rmax[i+1] + rmin[i+1]) / 2
|
// find first i such that d < (rmax[i+1] + rmin[i+1]) / 2
|
||||||
while (i < src.size - 1
|
while (i < src.size - 1
|
||||||
&& dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
|
&& dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
|
||||||
utils::Assert(i != src.size - 1, "this cannot happen");
|
utils::Assert(i != src.size - 1, "this cannot happen");
|
||||||
if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
|
if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
|
||||||
@ -217,7 +219,7 @@ struct WQSummary {
|
|||||||
data[size++] = src.data[src.size - 1];
|
data[size++] = src.data[src.size - 1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief set current summary to be merged summary of sa and sb
|
* \brief set current summary to be merged summary of sa and sb
|
||||||
* \param sa first input summary to be merged
|
* \param sa first input summary to be merged
|
||||||
* \param sb second input summar to be merged
|
* \param sb second input summar to be merged
|
||||||
@ -230,7 +232,7 @@ struct WQSummary {
|
|||||||
if (sb.size == 0) {
|
if (sb.size == 0) {
|
||||||
this->CopyFrom(sa); return;
|
this->CopyFrom(sa); return;
|
||||||
}
|
}
|
||||||
utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
|
utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
|
||||||
const Entry *a = sa.data, *a_end = sa.data + sa.size;
|
const Entry *a = sa.data, *a_end = sa.data + sa.size;
|
||||||
const Entry *b = sb.data, *b_end = sb.data + sb.size;
|
const Entry *b = sb.data, *b_end = sb.data + sb.size;
|
||||||
// extended rmin value
|
// extended rmin value
|
||||||
@ -297,7 +299,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
|
|||||||
RType begin = src.data[0].rmax;
|
RType begin = src.data[0].rmax;
|
||||||
size_t n = maxsize - 1, nbig = 0;
|
size_t n = maxsize - 1, nbig = 0;
|
||||||
RType range = src.data[src.size - 1].rmin - begin;
|
RType range = src.data[src.size - 1].rmin - begin;
|
||||||
// prune off zero weights
|
// prune off zero weights
|
||||||
if (range == 0.0f) {
|
if (range == 0.0f) {
|
||||||
// special case, contain only two effective data pts
|
// special case, contain only two effective data pts
|
||||||
this->data[0] = src.data[0];
|
this->data[0] = src.data[0];
|
||||||
@ -331,7 +333,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
|
|||||||
utils::Printf("LOG: check quantile stats, nbig=%lu, n=%lu\n", nbig, n);
|
utils::Printf("LOG: check quantile stats, nbig=%lu, n=%lu\n", nbig, n);
|
||||||
utils::Printf("LOG: srcsize=%lu, maxsize=%lu, range=%g, chunk=%g\n",
|
utils::Printf("LOG: srcsize=%lu, maxsize=%lu, range=%g, chunk=%g\n",
|
||||||
src.size, maxsize, static_cast<double>(range),
|
src.size, maxsize, static_cast<double>(range),
|
||||||
static_cast<double>(chunk));
|
static_cast<double>(chunk));
|
||||||
for (size_t i = 0; i < src.size; ++i) {
|
for (size_t i = 0; i < src.size; ++i) {
|
||||||
utils::Printf("[%lu] rmin=%g, rmax=%g, wmin=%g, v=%g, isbig=%d\n", i,
|
utils::Printf("[%lu] rmin=%g, rmax=%g, wmin=%g, v=%g, isbig=%d\n", i,
|
||||||
src.data[i].rmin, src.data[i].rmax, src.data[i].wmin,
|
src.data[i].rmin, src.data[i].rmax, src.data[i].wmin,
|
||||||
@ -352,7 +354,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
|
|||||||
RType maxdx2 = src.data[end].rmax_prev() * 2;
|
RType maxdx2 = src.data[end].rmax_prev() * 2;
|
||||||
for (; k < n; ++k) {
|
for (; k < n; ++k) {
|
||||||
RType dx2 = 2 * ((k * mrange) / n + begin);
|
RType dx2 = 2 * ((k * mrange) / n + begin);
|
||||||
if (dx2 >= maxdx2) break;
|
if (dx2 >= maxdx2) break;
|
||||||
while (i < end &&
|
while (i < end &&
|
||||||
dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
|
dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
|
||||||
if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
|
if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
|
||||||
@ -371,13 +373,13 @@ struct WXQSummary : public WQSummary<DType, RType> {
|
|||||||
lastidx = end;
|
lastidx = end;
|
||||||
}
|
}
|
||||||
bid = end;
|
bid = end;
|
||||||
// shift base by the gap
|
// shift base by the gap
|
||||||
begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev();
|
begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
/*!
|
/*!
|
||||||
* \brief traditional GK summary
|
* \brief traditional GK summary
|
||||||
*/
|
*/
|
||||||
template<typename DType, typename RType>
|
template<typename DType, typename RType>
|
||||||
@ -405,7 +407,7 @@ struct GKSummary {
|
|||||||
// push data to the queue
|
// push data to the queue
|
||||||
inline void Push(DType x, RType w) {
|
inline void Push(DType x, RType w) {
|
||||||
queue[qtail++] = x;
|
queue[qtail++] = x;
|
||||||
}
|
}
|
||||||
inline void MakeSummary(GKSummary *out) {
|
inline void MakeSummary(GKSummary *out) {
|
||||||
std::sort(queue.begin(), queue.begin() + qtail);
|
std::sort(queue.begin(), queue.begin() + qtail);
|
||||||
out->size = qtail;
|
out->size = qtail;
|
||||||
@ -419,7 +421,7 @@ struct GKSummary {
|
|||||||
/*! \brief number of elements in the summary */
|
/*! \brief number of elements in the summary */
|
||||||
size_t size;
|
size_t size;
|
||||||
GKSummary(Entry *data, size_t size)
|
GKSummary(Entry *data, size_t size)
|
||||||
: data(data), size(size) {}
|
: data(data), size(size) {}
|
||||||
/*! \brief the maximum error of the summary */
|
/*! \brief the maximum error of the summary */
|
||||||
inline RType MaxError(void) const {
|
inline RType MaxError(void) const {
|
||||||
RType res = 0;
|
RType res = 0;
|
||||||
@ -432,7 +434,7 @@ struct GKSummary {
|
|||||||
inline RType MaxRank(void) const {
|
inline RType MaxRank(void) const {
|
||||||
return data[size - 1].rmax;
|
return data[size - 1].rmax;
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief copy content from src
|
* \brief copy content from src
|
||||||
* \param src source sketch
|
* \param src source sketch
|
||||||
*/
|
*/
|
||||||
@ -450,8 +452,8 @@ struct GKSummary {
|
|||||||
<< "[" << data[i].rmin << "," << data[i].rmax << "]"
|
<< "[" << data[i].rmin << "," << data[i].rmax << "]"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief set current summary to be pruned summary of src
|
* \brief set current summary to be pruned summary of src
|
||||||
* assume data field is already allocated to be at least maxsize
|
* assume data field is already allocated to be at least maxsize
|
||||||
* \param src source summary
|
* \param src source summary
|
||||||
@ -486,8 +488,8 @@ struct GKSummary {
|
|||||||
}
|
}
|
||||||
if (sb.size == 0) {
|
if (sb.size == 0) {
|
||||||
this->CopyFrom(sa); return;
|
this->CopyFrom(sa); return;
|
||||||
}
|
}
|
||||||
utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
|
utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge");
|
||||||
const Entry *a = sa.data, *a_end = sa.data + sa.size;
|
const Entry *a = sa.data, *a_end = sa.data + sa.size;
|
||||||
const Entry *b = sb.data, *b_end = sb.data + sb.size;
|
const Entry *b = sb.data, *b_end = sb.data + sb.size;
|
||||||
this->size = sa.size + sb.size;
|
this->size = sa.size + sb.size;
|
||||||
@ -500,7 +502,7 @@ struct GKSummary {
|
|||||||
aprev_rmin = a->rmin;
|
aprev_rmin = a->rmin;
|
||||||
++dst; ++a;
|
++dst; ++a;
|
||||||
} else {
|
} else {
|
||||||
*dst = Entry(aprev_rmin + b->rmin,
|
*dst = Entry(aprev_rmin + b->rmin,
|
||||||
b->rmax + a->rmax - 1, b->value);
|
b->rmax + a->rmax - 1, b->value);
|
||||||
bprev_rmin = b->rmin;
|
bprev_rmin = b->rmin;
|
||||||
++dst; ++b;
|
++dst; ++b;
|
||||||
@ -537,15 +539,15 @@ class QuantileSketchTemplate {
|
|||||||
/*! \brief type of summary type */
|
/*! \brief type of summary type */
|
||||||
typedef TSummary Summary;
|
typedef TSummary Summary;
|
||||||
/*! \brief the entry type */
|
/*! \brief the entry type */
|
||||||
typedef typename Summary::Entry Entry;
|
typedef typename Summary::Entry Entry;
|
||||||
/*! \brief same as summary, but use STL to backup the space */
|
/*! \brief same as summary, but use STL to backup the space */
|
||||||
struct SummaryContainer : public Summary {
|
struct SummaryContainer : public Summary {
|
||||||
std::vector<Entry> space;
|
std::vector<Entry> space;
|
||||||
SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
|
SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
|
||||||
this->space = src.space;
|
this->space = src.space;
|
||||||
this->data = BeginPtr(this->space);
|
this->data = BeginPtr(this->space);
|
||||||
}
|
}
|
||||||
SummaryContainer(void) : Summary(NULL, 0) {
|
SummaryContainer(void) : Summary(NULL, 0) {
|
||||||
}
|
}
|
||||||
/*! \brief reserve space for summary */
|
/*! \brief reserve space for summary */
|
||||||
inline void Reserve(size_t size) {
|
inline void Reserve(size_t size) {
|
||||||
@ -554,7 +556,7 @@ class QuantileSketchTemplate {
|
|||||||
this->data = BeginPtr(space);
|
this->data = BeginPtr(space);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief set the space to be merge of all Summary arrays
|
* \brief set the space to be merge of all Summary arrays
|
||||||
* \param begin begining position in th summary array
|
* \param begin begining position in th summary array
|
||||||
* \param end ending position in the Summary array
|
* \param end ending position in the Summary array
|
||||||
@ -597,7 +599,7 @@ class QuantileSketchTemplate {
|
|||||||
}
|
}
|
||||||
/*! \brief save the data structure into stream */
|
/*! \brief save the data structure into stream */
|
||||||
template<typename TStream>
|
template<typename TStream>
|
||||||
inline void Save(TStream &fo) const {
|
inline void Save(TStream &fo) const { // NOLINT(*)
|
||||||
fo.Write(&(this->size), sizeof(this->size));
|
fo.Write(&(this->size), sizeof(this->size));
|
||||||
if (this->size != 0) {
|
if (this->size != 0) {
|
||||||
fo.Write(this->data, this->size * sizeof(Entry));
|
fo.Write(this->data, this->size * sizeof(Entry));
|
||||||
@ -605,15 +607,16 @@ class QuantileSketchTemplate {
|
|||||||
}
|
}
|
||||||
/*! \brief load data structure from input stream */
|
/*! \brief load data structure from input stream */
|
||||||
template<typename TStream>
|
template<typename TStream>
|
||||||
inline void Load(TStream &fi) {
|
inline void Load(TStream &fi) { // NOLINT(*)
|
||||||
utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1");
|
utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1");
|
||||||
this->Reserve(this->size);
|
this->Reserve(this->size);
|
||||||
if (this->size != 0) {
|
if (this->size != 0) {
|
||||||
utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0, "invalid SummaryArray 2");
|
utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0,
|
||||||
|
"invalid SummaryArray 2");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
/*!
|
/*!
|
||||||
* \brief intialize the quantile sketch, given the performance specification
|
* \brief intialize the quantile sketch, given the performance specification
|
||||||
* \param maxn maximum number of data points can be feed into sketch
|
* \param maxn maximum number of data points can be feed into sketch
|
||||||
* \param eps accuracy level of summary
|
* \param eps accuracy level of summary
|
||||||
@ -741,8 +744,8 @@ class QuantileSketchTemplate {
|
|||||||
* \tparam DType type of data content
|
* \tparam DType type of data content
|
||||||
* \tparam RType type of rank
|
* \tparam RType type of rank
|
||||||
*/
|
*/
|
||||||
template<typename DType, typename RType=unsigned>
|
template<typename DType, typename RType = unsigned>
|
||||||
class WQuantileSketch :
|
class WQuantileSketch :
|
||||||
public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> >{
|
public QuantileSketchTemplate<DType, RType, WQSummary<DType, RType> >{
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -751,8 +754,8 @@ class WQuantileSketch :
|
|||||||
* \tparam DType type of data content
|
* \tparam DType type of data content
|
||||||
* \tparam RType type of rank
|
* \tparam RType type of rank
|
||||||
*/
|
*/
|
||||||
template<typename DType, typename RType=unsigned>
|
template<typename DType, typename RType = unsigned>
|
||||||
class WXQuantileSketch :
|
class WXQuantileSketch :
|
||||||
public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> >{
|
public QuantileSketchTemplate<DType, RType, WXQSummary<DType, RType> >{
|
||||||
};
|
};
|
||||||
/*!
|
/*!
|
||||||
@ -760,11 +763,11 @@ class WXQuantileSketch :
|
|||||||
* \tparam DType type of data content
|
* \tparam DType type of data content
|
||||||
* \tparam RType type of rank
|
* \tparam RType type of rank
|
||||||
*/
|
*/
|
||||||
template<typename DType, typename RType=unsigned>
|
template<typename DType, typename RType = unsigned>
|
||||||
class GKQuantileSketch :
|
class GKQuantileSketch :
|
||||||
public QuantileSketchTemplate<DType, RType, GKSummary<DType, RType> >{
|
public QuantileSketchTemplate<DType, RType, GKSummary<DType, RType> >{
|
||||||
};
|
};
|
||||||
|
|
||||||
} // utils
|
} // namespace utils
|
||||||
} // xgboost
|
} // namespace xgboost
|
||||||
#endif
|
#endif // XGBOOST_UTILS_QUANTILE_H_
|
||||||
|
|||||||
@ -1,12 +1,14 @@
|
|||||||
#ifndef XGBOOST_UTILS_RANDOM_H_
|
|
||||||
#define XGBOOST_UTILS_RANDOM_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file xgboost_random.h
|
* \file xgboost_random.h
|
||||||
* \brief PRNG to support random number generation
|
* \brief PRNG to support random number generation
|
||||||
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||||
*
|
*
|
||||||
* Use standard PRNG from stdlib
|
* Use standard PRNG from stdlib
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_RANDOM_H_
|
||||||
|
#define XGBOOST_UTILS_RANDOM_H_
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -23,11 +25,11 @@ inline void Seed(unsigned seed) {
|
|||||||
}
|
}
|
||||||
/*! \brief basic function, uniform */
|
/*! \brief basic function, uniform */
|
||||||
inline double Uniform(void) {
|
inline double Uniform(void) {
|
||||||
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
|
return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0); // NOLINT(*)
|
||||||
}
|
}
|
||||||
/*! \brief return a real numer uniform in (0,1) */
|
/*! \brief return a real numer uniform in (0,1) */
|
||||||
inline double NextDouble2(void) {
|
inline double NextDouble2(void) {
|
||||||
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
|
return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0); // NOLINT(*)
|
||||||
}
|
}
|
||||||
/*! \brief return x~N(0,1) */
|
/*! \brief return x~N(0,1) */
|
||||||
inline double Normal(void) {
|
inline double Normal(void) {
|
||||||
@ -73,7 +75,7 @@ inline void Shuffle(T *data, size_t sz) {
|
|||||||
}
|
}
|
||||||
// random shuffle the data inside, require PRNG
|
// random shuffle the data inside, require PRNG
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline void Shuffle(std::vector<T> &data) {
|
inline void Shuffle(std::vector<T> &data) { // NOLINT(*)
|
||||||
Shuffle(&data[0], data.size());
|
Shuffle(&data[0], data.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,17 +83,18 @@ inline void Shuffle(std::vector<T> &data) {
|
|||||||
struct Random{
|
struct Random{
|
||||||
/*! \brief set random number seed */
|
/*! \brief set random number seed */
|
||||||
inline void Seed(unsigned sd) {
|
inline void Seed(unsigned sd) {
|
||||||
this->rseed = sd;
|
this->rseed = sd;
|
||||||
#if defined(_MSC_VER)||defined(_WIN32)
|
#if defined(_MSC_VER) || defined(_WIN32)
|
||||||
::xgboost::random::Seed(sd);
|
::xgboost::random::Seed(sd);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
/*! \brief return a real number uniform in [0,1) */
|
/*! \brief return a real number uniform in [0,1) */
|
||||||
inline double RandDouble(void) {
|
inline double RandDouble(void) {
|
||||||
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
|
// use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe
|
||||||
// For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general
|
// For cygwin and mingw, this can slows down parallelism,
|
||||||
// todo, replace with another PRNG
|
// but rand_r is only used in objective-inl.hpp, won't affect speed in general
|
||||||
#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_)
|
// todo, replace with another PRNG
|
||||||
|
#if defined(_MSC_VER) || defined(_WIN32) || defined(XGBOOST_STRICT_CXX98_)
|
||||||
return Uniform();
|
return Uniform();
|
||||||
#else
|
#else
|
||||||
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
|
return static_cast<double>(rand_r(&rseed)) / (static_cast<double>(RAND_MAX) + 1.0);
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_
|
|
||||||
#define XGBOOST_UTILS_THREAD_BUFFER_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file thread_buffer.h
|
* \file thread_buffer.h
|
||||||
* \brief multi-thread buffer, iterator, can be used to create parallel pipeline
|
* \brief multi-thread buffer, iterator, can be used to create parallel pipeline
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_
|
||||||
|
#define XGBOOST_UTILS_THREAD_BUFFER_H_
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
@ -27,7 +29,7 @@ class ThreadBuffer {
|
|||||||
this->buf_size = 30;
|
this->buf_size = 30;
|
||||||
}
|
}
|
||||||
~ThreadBuffer(void) {
|
~ThreadBuffer(void) {
|
||||||
if(init_end) this->Destroy();
|
if (init_end) this->Destroy();
|
||||||
}
|
}
|
||||||
/*!\brief set parameter, will also pass the parameter to factory */
|
/*!\brief set parameter, will also pass the parameter to factory */
|
||||||
inline void SetParam(const char *name, const char *val) {
|
inline void SetParam(const char *name, const char *val) {
|
||||||
@ -38,7 +40,7 @@ class ThreadBuffer {
|
|||||||
/*!
|
/*!
|
||||||
* \brief initalize the buffered iterator
|
* \brief initalize the buffered iterator
|
||||||
* \param param a initialize parameter that will pass to factory, ignore it if not necessary
|
* \param param a initialize parameter that will pass to factory, ignore it if not necessary
|
||||||
* \return false if the initlization can't be done, e.g. buffer file hasn't been created
|
* \return false if the initlization can't be done, e.g. buffer file hasn't been created
|
||||||
*/
|
*/
|
||||||
inline bool Init(void) {
|
inline bool Init(void) {
|
||||||
if (!factory.Init()) return false;
|
if (!factory.Init()) return false;
|
||||||
@ -49,7 +51,7 @@ class ThreadBuffer {
|
|||||||
this->init_end = true;
|
this->init_end = true;
|
||||||
this->StartLoader();
|
this->StartLoader();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
/*!\brief place the iterator before first value */
|
/*!\brief place the iterator before first value */
|
||||||
inline void BeforeFirst(void) {
|
inline void BeforeFirst(void) {
|
||||||
// wait till last loader end
|
// wait till last loader end
|
||||||
@ -70,7 +72,7 @@ class ThreadBuffer {
|
|||||||
loading_need.Post();
|
loading_need.Post();
|
||||||
// set buffer value
|
// set buffer value
|
||||||
buf_index = 0;
|
buf_index = 0;
|
||||||
}
|
}
|
||||||
/*! \brief destroy the buffer iterator, will deallocate the buffer */
|
/*! \brief destroy the buffer iterator, will deallocate the buffer */
|
||||||
inline void Destroy(void) {
|
inline void Destroy(void) {
|
||||||
// wait until the signal is consumed
|
// wait until the signal is consumed
|
||||||
@ -78,7 +80,7 @@ class ThreadBuffer {
|
|||||||
loading_need.Post();
|
loading_need.Post();
|
||||||
loader_thread.Join();
|
loader_thread.Join();
|
||||||
loading_need.Destroy();
|
loading_need.Destroy();
|
||||||
loading_end.Destroy();
|
loading_end.Destroy();
|
||||||
for (size_t i = 0; i < bufA.size(); ++i) {
|
for (size_t i = 0; i < bufA.size(); ++i) {
|
||||||
factory.FreeSpace(bufA[i]);
|
factory.FreeSpace(bufA[i]);
|
||||||
}
|
}
|
||||||
@ -88,37 +90,38 @@ class ThreadBuffer {
|
|||||||
bufA.clear(); bufB.clear();
|
bufA.clear(); bufB.clear();
|
||||||
factory.Destroy();
|
factory.Destroy();
|
||||||
this->init_end = false;
|
this->init_end = false;
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief get the next element needed in buffer
|
* \brief get the next element needed in buffer
|
||||||
* \param elem element to store into
|
* \param elem element to store into
|
||||||
* \return whether reaches end of data
|
* \return whether reaches end of data
|
||||||
*/
|
*/
|
||||||
inline bool Next(Elem &elem) {
|
inline bool Next(Elem &elem) { // NOLINT(*)
|
||||||
// end of buffer try to switch
|
// end of buffer try to switch
|
||||||
if (buf_index == buf_size) {
|
if (buf_index == buf_size) {
|
||||||
this->SwitchBuffer();
|
this->SwitchBuffer();
|
||||||
buf_index = 0;
|
buf_index = 0;
|
||||||
}
|
}
|
||||||
if (buf_index >= (current_buf ? endA : endB)) {
|
if (buf_index >= (current_buf ? endA : endB)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::vector<Elem> &buf = current_buf ? bufA : bufB;
|
std::vector<Elem> &buf = current_buf ? bufA : bufB;
|
||||||
elem = buf[buf_index];
|
elem = buf[buf_index];
|
||||||
++buf_index;
|
++buf_index;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief get the factory object
|
* \brief get the factory object
|
||||||
*/
|
*/
|
||||||
inline ElemFactory &get_factory(void) {
|
inline ElemFactory &get_factory(void) {
|
||||||
return factory;
|
return factory;
|
||||||
}
|
}
|
||||||
inline const ElemFactory &get_factory(void) const{
|
inline const ElemFactory &get_factory(void) const {
|
||||||
return factory;
|
return factory;
|
||||||
}
|
}
|
||||||
// size of buffer
|
// size of buffer
|
||||||
int buf_size;
|
int buf_size;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// factory object used to load configures
|
// factory object used to load configures
|
||||||
ElemFactory factory;
|
ElemFactory factory;
|
||||||
@ -147,15 +150,15 @@ class ThreadBuffer {
|
|||||||
* this implementation is like producer-consumer style
|
* this implementation is like producer-consumer style
|
||||||
*/
|
*/
|
||||||
inline void RunLoader(void) {
|
inline void RunLoader(void) {
|
||||||
while(!destroy_signal) {
|
while (!destroy_signal) {
|
||||||
// sleep until loading is needed
|
// sleep until loading is needed
|
||||||
loading_need.Wait();
|
loading_need.Wait();
|
||||||
std::vector<Elem> &buf = current_buf ? bufB : bufA;
|
std::vector<Elem> &buf = current_buf ? bufB : bufA;
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < buf_size ; ++i) {
|
for (i = 0; i < buf_size ; ++i) {
|
||||||
if (!factory.LoadNext(buf[i])) {
|
if (!factory.LoadNext(buf[i])) {
|
||||||
int &end = current_buf ? endB : endA;
|
int &end = current_buf ? endB : endA;
|
||||||
end = i; // marks the termination
|
end = i; // marks the termination
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -166,14 +169,14 @@ class ThreadBuffer {
|
|||||||
}
|
}
|
||||||
/*!\brief entry point of loader thread */
|
/*!\brief entry point of loader thread */
|
||||||
inline static XGBOOST_THREAD_PREFIX LoaderEntry(void *pthread) {
|
inline static XGBOOST_THREAD_PREFIX LoaderEntry(void *pthread) {
|
||||||
static_cast< ThreadBuffer<Elem,ElemFactory>* >(pthread)->RunLoader();
|
static_cast< ThreadBuffer<Elem, ElemFactory>* >(pthread)->RunLoader();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
/*!\brief start loader thread */
|
/*!\brief start loader thread */
|
||||||
inline void StartLoader(void) {
|
inline void StartLoader(void) {
|
||||||
destroy_signal = false;
|
destroy_signal = false;
|
||||||
// set param
|
// set param
|
||||||
current_buf = 1;
|
current_buf = 1;
|
||||||
loading_need.Init(1);
|
loading_need.Init(1);
|
||||||
loading_end .Init(0);
|
loading_end .Init(0);
|
||||||
// reset terminate limit
|
// reset terminate limit
|
||||||
@ -185,8 +188,8 @@ class ThreadBuffer {
|
|||||||
current_buf = 0;
|
current_buf = 0;
|
||||||
// wake loader for next part
|
// wake loader for next part
|
||||||
data_loaded = false;
|
data_loaded = false;
|
||||||
loading_need.Post();
|
loading_need.Post();
|
||||||
buf_index = 0;
|
buf_index = 0;
|
||||||
}
|
}
|
||||||
/*!\brief switch double buffer */
|
/*!\brief switch double buffer */
|
||||||
inline void SwitchBuffer(void) {
|
inline void SwitchBuffer(void) {
|
||||||
@ -198,7 +201,6 @@ class ThreadBuffer {
|
|||||||
loading_need.Post();
|
loading_need.Post();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace utils
|
} // namespace utils
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif
|
#endif // XGBOOST_UTILS_THREAD_BUFFER_H_
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
#ifndef XGBOOST_UTILS_UTILS_H_
|
|
||||||
#define XGBOOST_UTILS_UTILS_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file utils.h
|
* \file utils.h
|
||||||
* \brief simple utils to support the code
|
* \brief simple utils to support the code
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_UTILS_UTILS_H_
|
||||||
|
#define XGBOOST_UTILS_UTILS_H_
|
||||||
|
|
||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -19,18 +21,18 @@
|
|||||||
#define fopen64 std::fopen
|
#define fopen64 std::fopen
|
||||||
#endif
|
#endif
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
// NOTE: sprintf_s is not equivalent to snprintf,
|
// NOTE: sprintf_s is not equivalent to snprintf,
|
||||||
// they are equivalent when success, which is sufficient for our case
|
// they are equivalent when success, which is sufficient for our case
|
||||||
#define snprintf sprintf_s
|
#define snprintf sprintf_s
|
||||||
#define vsnprintf vsprintf_s
|
#define vsnprintf vsprintf_s
|
||||||
#else
|
#else
|
||||||
#ifdef _FILE_OFFSET_BITS
|
#ifdef _FILE_OFFSET_BITS
|
||||||
#if _FILE_OFFSET_BITS == 32
|
#if _FILE_OFFSET_BITS == 32
|
||||||
#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit")
|
#pragma message("Warning: FILE OFFSET BITS defined to be 32 bit")
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#define off64_t off_t
|
#define off64_t off_t
|
||||||
#define fopen64 std::fopen
|
#define fopen64 std::fopen
|
||||||
#endif
|
#endif
|
||||||
@ -58,17 +60,17 @@ namespace utils {
|
|||||||
const int kPrintBuffer = 1 << 12;
|
const int kPrintBuffer = 1 << 12;
|
||||||
|
|
||||||
#ifndef XGBOOST_CUSTOMIZE_MSG_
|
#ifndef XGBOOST_CUSTOMIZE_MSG_
|
||||||
/*!
|
/*!
|
||||||
* \brief handling of Assert error, caused by in-apropriate input
|
* \brief handling of Assert error, caused by in-apropriate input
|
||||||
* \param msg error message
|
* \param msg error message
|
||||||
*/
|
*/
|
||||||
inline void HandleAssertError(const char *msg) {
|
inline void HandleAssertError(const char *msg) {
|
||||||
fprintf(stderr, "AssertError:%s\n", msg);
|
fprintf(stderr, "AssertError:%s\n", msg);
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief handling of Check error, caused by in-apropriate input
|
* \brief handling of Check error, caused by in-apropriate input
|
||||||
* \param msg error message
|
* \param msg error message
|
||||||
*/
|
*/
|
||||||
inline void HandleCheckError(const char *msg) {
|
inline void HandleCheckError(const char *msg) {
|
||||||
fprintf(stderr, "%s\n", msg);
|
fprintf(stderr, "%s\n", msg);
|
||||||
@ -158,7 +160,7 @@ inline std::FILE *FopenCheck(const char *fname, const char *flag) {
|
|||||||
// easy utils that can be directly acessed in xgboost
|
// easy utils that can be directly acessed in xgboost
|
||||||
/*! \brief get the beginning address of a vector */
|
/*! \brief get the beginning address of a vector */
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline T *BeginPtr(std::vector<T> &vec) {
|
inline T *BeginPtr(std::vector<T> &vec) { // NOLINT(*)
|
||||||
if (vec.size() == 0) {
|
if (vec.size() == 0) {
|
||||||
return NULL;
|
return NULL;
|
||||||
} else {
|
} else {
|
||||||
@ -174,7 +176,7 @@ inline const T *BeginPtr(const std::vector<T> &vec) {
|
|||||||
return &vec[0];
|
return &vec[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline char* BeginPtr(std::string &str) {
|
inline char* BeginPtr(std::string &str) { // NOLINT(*)
|
||||||
if (str.length() == 0) return NULL;
|
if (str.length() == 0) return NULL;
|
||||||
return &str[0];
|
return &str[0];
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user