lint all
This commit is contained in:
parent
aba41d07cd
commit
1123253f79
@ -1,10 +1,12 @@
|
|||||||
#ifndef XGBOOST_DATA_H
|
|
||||||
#define XGBOOST_DATA_H
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright (c) 2014 by Contributors
|
||||||
* \file data.h
|
* \file data.h
|
||||||
* \brief the input data structure for gradient boosting
|
* \brief the input data structure for gradient boosting
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_DATA_H_
|
||||||
|
#define XGBOOST_DATA_H_
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "utils/utils.h"
|
#include "utils/utils.h"
|
||||||
@ -161,4 +163,4 @@ class IFMatrix {
|
|||||||
virtual ~IFMatrix(void){}
|
virtual ~IFMatrix(void){}
|
||||||
};
|
};
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif // XGBOOST_DATA_H
|
#endif // XGBOOST_DATA_H_
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
|
// Copyright by Contributors
|
||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
#define _CRT_SECURE_NO_DEPRECATE
|
#define _CRT_SECURE_NO_DEPRECATE
|
||||||
#define NOMINMAX
|
#define NOMINMAX
|
||||||
|
#include <string>
|
||||||
#include "../utils/io.h"
|
#include "../utils/io.h"
|
||||||
|
|
||||||
// implements a single no split version of DMLC
|
// implements a single no split version of DMLC
|
||||||
@ -154,7 +156,7 @@ class StdFile : public dmlc::Stream {
|
|||||||
std::fwrite(ptr, size, 1, fp);
|
std::fwrite(ptr, size, 1, fp);
|
||||||
}
|
}
|
||||||
virtual void Seek(size_t pos) {
|
virtual void Seek(size_t pos) {
|
||||||
std::fseek(fp, static_cast<long>(pos), SEEK_SET);
|
std::fseek(fp, static_cast<long>(pos), SEEK_SET); // NOLINT(*)
|
||||||
}
|
}
|
||||||
virtual size_t Tell(void) {
|
virtual size_t Tell(void) {
|
||||||
return std::ftell(fp);
|
return std::ftell(fp);
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
// Copyright 2014 by Contributors
|
||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
#define _CRT_SECURE_NO_DEPRECATE
|
#define _CRT_SECURE_NO_DEPRECATE
|
||||||
#define NOMINMAX
|
#define NOMINMAX
|
||||||
|
|||||||
@ -35,7 +35,7 @@ struct LibSVMPage : public SparsePage {
|
|||||||
*/
|
*/
|
||||||
class LibSVMPageFactory {
|
class LibSVMPageFactory {
|
||||||
public:
|
public:
|
||||||
explicit LibSVMPageFactory()
|
LibSVMPageFactory()
|
||||||
: bytes_read_(0), at_head_(true) {
|
: bytes_read_(0), at_head_(true) {
|
||||||
}
|
}
|
||||||
inline bool Init(void) {
|
inline bool Init(void) {
|
||||||
@ -199,6 +199,7 @@ class LibSVMParser : public utils::IIterator<LibSVMPage> {
|
|||||||
inline size_t bytes_read(void) const {
|
inline size_t bytes_read(void) const {
|
||||||
return itr.get_factory().bytes_read();
|
return itr.get_factory().bytes_read();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool at_end_;
|
bool at_end_;
|
||||||
size_t data_ptr_;
|
size_t data_ptr_;
|
||||||
|
|||||||
@ -1,11 +1,15 @@
|
|||||||
#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
|
|
||||||
#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright (c) 2014 by Contributors
|
||||||
* \file page_dmatrix-inl.hpp
|
* \file page_dmatrix-inl.hpp
|
||||||
* row iterator based on sparse page
|
* row iterator based on sparse page
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
|
||||||
|
#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
#include "../data.h"
|
#include "../data.h"
|
||||||
#include "../utils/iterator.h"
|
#include "../utils/iterator.h"
|
||||||
#include "../utils/thread_buffer.h"
|
#include "../utils/thread_buffer.h"
|
||||||
@ -94,12 +98,12 @@ class DMatrixPageBase : public DataMatrix {
|
|||||||
fbin.Close();
|
fbin.Close();
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
utils::Printf("DMatrixPage: %lux%lu is saved to %s\n",
|
utils::Printf("DMatrixPage: %lux%lu is saved to %s\n",
|
||||||
static_cast<unsigned long>(mat.info.num_row()),
|
static_cast<unsigned long>(mat.info.num_row()), // NOLINT(*)
|
||||||
static_cast<unsigned long>(mat.info.num_col()), fname_);
|
static_cast<unsigned long>(mat.info.num_col()), fname_); // NOLINT(*)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*! \brief load and initialize the iterator with fi */
|
/*! \brief load and initialize the iterator with fi */
|
||||||
inline void LoadBinary(utils::FileStream &fi,
|
inline void LoadBinary(utils::FileStream &fi, // NOLINT(*)
|
||||||
bool silent,
|
bool silent,
|
||||||
const char *fname_) {
|
const char *fname_) {
|
||||||
this->set_cache_file(fname_);
|
this->set_cache_file(fname_);
|
||||||
@ -114,8 +118,8 @@ class DMatrixPageBase : public DataMatrix {
|
|||||||
iter_->Load(fs);
|
iter_->Load(fs);
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
utils::Printf("DMatrixPage: %lux%lu matrix is loaded",
|
utils::Printf("DMatrixPage: %lux%lu matrix is loaded",
|
||||||
static_cast<unsigned long>(info.num_row()),
|
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
|
||||||
static_cast<unsigned long>(info.num_col()));
|
static_cast<unsigned long>(info.num_col())); // NOLINT(*)
|
||||||
if (fname_ != NULL) {
|
if (fname_ != NULL) {
|
||||||
utils::Printf(" from %s\n", fname_);
|
utils::Printf(" from %s\n", fname_);
|
||||||
} else {
|
} else {
|
||||||
@ -188,8 +192,8 @@ class DMatrixPageBase : public DataMatrix {
|
|||||||
fs.Close();
|
fs.Close();
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
utils::Printf("DMatrixPage: %lux%lu is parsed from %s\n",
|
utils::Printf("DMatrixPage: %lux%lu is parsed from %s\n",
|
||||||
static_cast<unsigned long>(info.num_row()),
|
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
|
||||||
static_cast<unsigned long>(info.num_col()),
|
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
|
||||||
uri);
|
uri);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,10 +1,16 @@
|
|||||||
#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
|
|
||||||
#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright (c) 2014 by Contributors
|
||||||
* \file page_fmatrix-inl.hpp
|
* \file page_fmatrix-inl.hpp
|
||||||
* col iterator based on sparse page
|
* col iterator based on sparse page
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
|
||||||
|
#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace io {
|
namespace io {
|
||||||
/*! \brief thread buffer iterator */
|
/*! \brief thread buffer iterator */
|
||||||
@ -96,7 +102,7 @@ struct ColConvertFactory {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (tmp_.Size() != 0){
|
if (tmp_.Size() != 0) {
|
||||||
this->MakeColPage(tmp_, BeginPtr(*buffered_rowset_) + btop,
|
this->MakeColPage(tmp_, BeginPtr(*buffered_rowset_) + btop,
|
||||||
*enabled_, val);
|
*enabled_, val);
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
|
|
||||||
#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file simple_dmatrix-inl.hpp
|
* \file simple_dmatrix-inl.hpp
|
||||||
* \brief simple implementation of DMatrixS that can be used
|
* \brief simple implementation of DMatrixS that can be used
|
||||||
* the data format of xgboost is templatized, which means it can accept
|
* the data format of xgboost is templatized, which means it can accept
|
||||||
@ -8,6 +7,9 @@
|
|||||||
* this file is a specific implementation of input data structure that can be used by BoostLearner
|
* this file is a specific implementation of input data structure that can be used by BoostLearner
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
|
||||||
|
#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -123,9 +125,9 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
}
|
}
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
|
utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n",
|
||||||
static_cast<unsigned long>(info.num_row()),
|
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
|
||||||
static_cast<unsigned long>(info.num_col()),
|
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
|
||||||
static_cast<unsigned long>(row_data_.size()), uri);
|
static_cast<unsigned long>(row_data_.size()), uri); // NOLINT(*)
|
||||||
}
|
}
|
||||||
// try to load in additional file
|
// try to load in additional file
|
||||||
if (!loadsplit) {
|
if (!loadsplit) {
|
||||||
@ -165,10 +167,11 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
* \param silent whether print information during loading
|
* \param silent whether print information during loading
|
||||||
* \param fname file name, used to print message
|
* \param fname file name, used to print message
|
||||||
*/
|
*/
|
||||||
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) {
|
inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) { // NOLINT(*)
|
||||||
int tmagic;
|
int tmagic;
|
||||||
utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
|
utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
|
||||||
utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch", fname == NULL ? "" : fname);
|
utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch",
|
||||||
|
fname == NULL ? "" : fname);
|
||||||
|
|
||||||
info.LoadBinary(fs);
|
info.LoadBinary(fs);
|
||||||
LoadBinary(fs, &row_ptr_, &row_data_);
|
LoadBinary(fs, &row_ptr_, &row_data_);
|
||||||
@ -176,9 +179,9 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
|
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
utils::Printf("%lux%lu matrix with %lu entries is loaded",
|
utils::Printf("%lux%lu matrix with %lu entries is loaded",
|
||||||
static_cast<unsigned long>(info.num_row()),
|
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
|
||||||
static_cast<unsigned long>(info.num_col()),
|
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
|
||||||
static_cast<unsigned long>(row_data_.size()));
|
static_cast<unsigned long>(row_data_.size())); // NOLINT(*)
|
||||||
if (fname != NULL) {
|
if (fname != NULL) {
|
||||||
utils::Printf(" from %s\n", fname);
|
utils::Printf(" from %s\n", fname);
|
||||||
} else {
|
} else {
|
||||||
@ -205,9 +208,9 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
|
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
|
utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n",
|
||||||
static_cast<unsigned long>(info.num_row()),
|
static_cast<unsigned long>(info.num_row()), // NOLINT(*)
|
||||||
static_cast<unsigned long>(info.num_col()),
|
static_cast<unsigned long>(info.num_col()), // NOLINT(*)
|
||||||
static_cast<unsigned long>(row_data_.size()), fname);
|
static_cast<unsigned long>(row_data_.size()), fname); // NOLINT(*)
|
||||||
if (info.group_ptr.size() != 0) {
|
if (info.group_ptr.size() != 0) {
|
||||||
utils::Printf("data contains %u groups\n",
|
utils::Printf("data contains %u groups\n",
|
||||||
static_cast<unsigned>(info.group_ptr.size()-1));
|
static_cast<unsigned>(info.group_ptr.size()-1));
|
||||||
@ -256,7 +259,7 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
* \param ptr pointer data
|
* \param ptr pointer data
|
||||||
* \param data data content
|
* \param data data content
|
||||||
*/
|
*/
|
||||||
inline static void SaveBinary(utils::IStream &fo,
|
inline static void SaveBinary(utils::IStream &fo, // NOLINT(*)
|
||||||
const std::vector<size_t> &ptr,
|
const std::vector<size_t> &ptr,
|
||||||
const std::vector<RowBatch::Entry> &data) {
|
const std::vector<RowBatch::Entry> &data) {
|
||||||
size_t nrow = ptr.size() - 1;
|
size_t nrow = ptr.size() - 1;
|
||||||
@ -272,7 +275,7 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
* \param out_ptr pointer data
|
* \param out_ptr pointer data
|
||||||
* \param out_data data content
|
* \param out_data data content
|
||||||
*/
|
*/
|
||||||
inline static void LoadBinary(utils::IStream &fi,
|
inline static void LoadBinary(utils::IStream &fi, // NOLINT(*)
|
||||||
std::vector<size_t> *out_ptr,
|
std::vector<size_t> *out_ptr,
|
||||||
std::vector<RowBatch::Entry> *out_data) {
|
std::vector<RowBatch::Entry> *out_data) {
|
||||||
size_t nrow;
|
size_t nrow;
|
||||||
|
|||||||
@ -1,11 +1,15 @@
|
|||||||
#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
|
|
||||||
#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright 2014 by Contributors
|
||||||
* \file simple_fmatrix-inl.hpp
|
* \file simple_fmatrix-inl.hpp
|
||||||
* \brief the input data structure for gradient boosting
|
* \brief the input data structure for gradient boosting
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
|
||||||
|
#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_
|
||||||
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
#include "../data.h"
|
#include "../data.h"
|
||||||
#include "../utils/utils.h"
|
#include "../utils/utils.h"
|
||||||
#include "../utils/random.h"
|
#include "../utils/random.h"
|
||||||
@ -94,7 +98,7 @@ class FMatrixS : public IFMatrix {
|
|||||||
* \brief save column access data into stream
|
* \brief save column access data into stream
|
||||||
* \param fo output stream to save to
|
* \param fo output stream to save to
|
||||||
*/
|
*/
|
||||||
inline void SaveColAccess(utils::IStream &fo) const {
|
inline void SaveColAccess(utils::IStream &fo) const { // NOLINT(*)
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
fo.Write(&n, sizeof(n));
|
fo.Write(&n, sizeof(n));
|
||||||
}
|
}
|
||||||
@ -102,7 +106,7 @@ class FMatrixS : public IFMatrix {
|
|||||||
* \brief load column access data from stream
|
* \brief load column access data from stream
|
||||||
* \param fo output stream to load from
|
* \param fo output stream to load from
|
||||||
*/
|
*/
|
||||||
inline void LoadColAccess(utils::IStream &fi) {
|
inline void LoadColAccess(utils::IStream &fi) { // NOLINT(*)
|
||||||
// do nothing in load col access
|
// do nothing in load col access
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,8 +163,8 @@ class FMatrixS : public IFMatrix {
|
|||||||
while (iter_->Next()) {
|
while (iter_->Next()) {
|
||||||
const RowBatch &batch = iter_->Value();
|
const RowBatch &batch = iter_->Value();
|
||||||
bmap.resize(bmap.size() + batch.size, true);
|
bmap.resize(bmap.size() + batch.size, true);
|
||||||
long batch_size = static_cast<long>(batch.size);
|
long batch_size = static_cast<long>(batch.size); // NOLINT(*)
|
||||||
for (long i = 0; i < batch_size; ++i) {
|
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
|
||||||
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||||
if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
|
if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
|
||||||
buffered_rowset_.push_back(ridx);
|
buffered_rowset_.push_back(ridx);
|
||||||
@ -169,13 +173,13 @@ class FMatrixS : public IFMatrix {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (long i = 0; i < batch_size; ++i) {
|
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
|
||||||
int tid = omp_get_thread_num();
|
int tid = omp_get_thread_num();
|
||||||
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||||
if (bmap[ridx]) {
|
if (bmap[ridx]) {
|
||||||
RowBatch::Inst inst = batch[i];
|
RowBatch::Inst inst = batch[i];
|
||||||
for (bst_uint j = 0; j < inst.length; ++j) {
|
for (bst_uint j = 0; j < inst.length; ++j) {
|
||||||
if (enabled[inst[j].index]){
|
if (enabled[inst[j].index]) {
|
||||||
builder.AddBudget(inst[j].index, tid);
|
builder.AddBudget(inst[j].index, tid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -188,7 +192,7 @@ class FMatrixS : public IFMatrix {
|
|||||||
while (iter_->Next()) {
|
while (iter_->Next()) {
|
||||||
const RowBatch &batch = iter_->Value();
|
const RowBatch &batch = iter_->Value();
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (long i = 0; i < static_cast<long>(batch.size); ++i) {
|
for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
|
||||||
int tid = omp_get_thread_num();
|
int tid = omp_get_thread_num();
|
||||||
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||||
if (bmap[ridx]) {
|
if (bmap[ridx]) {
|
||||||
@ -367,4 +371,4 @@ class FMatrixS : public IFMatrix {
|
|||||||
};
|
};
|
||||||
} // namespace io
|
} // namespace io
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP
|
#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP_
|
||||||
|
|||||||
@ -1,12 +1,16 @@
|
|||||||
#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
|
|
||||||
#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
|
|
||||||
/*!
|
/*!
|
||||||
|
* Copyright (c) 2014 by Contributors
|
||||||
* \file sparse_batch_page.h
|
* \file sparse_batch_page.h
|
||||||
* content holder of sparse batch that can be saved to disk
|
* content holder of sparse batch that can be saved to disk
|
||||||
* the representation can be effectively
|
* the representation can be effectively
|
||||||
* use in external memory computation
|
* use in external memory computation
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
*/
|
*/
|
||||||
|
#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_
|
||||||
|
#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
#include "../data.h"
|
#include "../data.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|||||||
@ -1,14 +1,16 @@
|
|||||||
|
// Copyright 2014 by Contributors
|
||||||
#define _CRT_SECURE_NO_WARNINGS
|
#define _CRT_SECURE_NO_WARNINGS
|
||||||
#define _CRT_SECURE_NO_DEPRECATE
|
#define _CRT_SECURE_NO_DEPRECATE
|
||||||
#define NOMINMAX
|
#define NOMINMAX
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
#include "./sync/sync.h"
|
#include "./sync/sync.h"
|
||||||
#include "io/io.h"
|
#include "./io/io.h"
|
||||||
#include "utils/utils.h"
|
#include "./utils/utils.h"
|
||||||
#include "utils/config.h"
|
#include "./utils/config.h"
|
||||||
#include "learner/learner-inl.hpp"
|
#include "./learner/learner-inl.hpp"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
/*!
|
/*!
|
||||||
@ -90,12 +92,14 @@ class BoostLearnTask {
|
|||||||
if (!strcmp("save_pbuffer", name)) save_with_pbuffer = atoi(val);
|
if (!strcmp("save_pbuffer", name)) save_with_pbuffer = atoi(val);
|
||||||
if (!strncmp("eval[", name, 5)) {
|
if (!strncmp("eval[", name, 5)) {
|
||||||
char evname[256];
|
char evname[256];
|
||||||
utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display");
|
utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1,
|
||||||
|
"must specify evaluation name for display");
|
||||||
eval_data_names.push_back(std::string(evname));
|
eval_data_names.push_back(std::string(evname));
|
||||||
eval_data_paths.push_back(std::string(val));
|
eval_data_paths.push_back(std::string(val));
|
||||||
}
|
}
|
||||||
learner.SetParam(name, val);
|
learner.SetParam(name, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
BoostLearnTask(void) {
|
BoostLearnTask(void) {
|
||||||
// default parameters
|
// default parameters
|
||||||
@ -119,12 +123,13 @@ class BoostLearnTask {
|
|||||||
save_with_pbuffer = 0;
|
save_with_pbuffer = 0;
|
||||||
data = NULL;
|
data = NULL;
|
||||||
}
|
}
|
||||||
~BoostLearnTask(void){
|
~BoostLearnTask(void) {
|
||||||
for (size_t i = 0; i < deval.size(); i++){
|
for (size_t i = 0; i < deval.size(); i++) {
|
||||||
delete deval[i];
|
delete deval[i];
|
||||||
}
|
}
|
||||||
if (data != NULL) delete data;
|
if (data != NULL) delete data;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
inline void InitData(void) {
|
inline void InitData(void) {
|
||||||
if (strchr(train_path.c_str(), '%') != NULL) {
|
if (strchr(train_path.c_str(), '%') != NULL) {
|
||||||
@ -153,7 +158,7 @@ class BoostLearnTask {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<io::DataMatrix *> dcache(1, data);
|
std::vector<io::DataMatrix *> dcache(1, data);
|
||||||
for (size_t i = 0; i < deval.size(); ++ i) {
|
for (size_t i = 0; i < deval.size(); ++i) {
|
||||||
dcache.push_back(deval[i]);
|
dcache.push_back(deval[i]);
|
||||||
}
|
}
|
||||||
// set cache data to be all training and evaluation data
|
// set cache data to be all training and evaluation data
|
||||||
@ -178,12 +183,12 @@ class BoostLearnTask {
|
|||||||
int version = rabit::LoadCheckPoint(&learner);
|
int version = rabit::LoadCheckPoint(&learner);
|
||||||
if (version == 0) this->InitLearner();
|
if (version == 0) this->InitLearner();
|
||||||
const time_t start = time(NULL);
|
const time_t start = time(NULL);
|
||||||
unsigned long elapsed = 0;
|
unsigned long elapsed = 0; // NOLINT(*)
|
||||||
learner.CheckInit(data);
|
learner.CheckInit(data);
|
||||||
|
|
||||||
bool allow_lazy = learner.AllowLazyCheckPoint();
|
bool allow_lazy = learner.AllowLazyCheckPoint();
|
||||||
for (int i = version / 2; i < num_round; ++i) {
|
for (int i = version / 2; i < num_round; ++i) {
|
||||||
elapsed = (unsigned long)(time(NULL) - start);
|
elapsed = (unsigned long)(time(NULL) - start); // NOLINT(*)
|
||||||
if (version % 2 == 0) {
|
if (version % 2 == 0) {
|
||||||
if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
|
if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
|
||||||
learner.UpdateOneIter(i, *data);
|
learner.UpdateOneIter(i, *data);
|
||||||
@ -196,7 +201,7 @@ class BoostLearnTask {
|
|||||||
}
|
}
|
||||||
utils::Assert(version == rabit::VersionNumber(), "consistent check");
|
utils::Assert(version == rabit::VersionNumber(), "consistent check");
|
||||||
std::string res = learner.EvalOneIter(i, devalall, eval_data_names);
|
std::string res = learner.EvalOneIter(i, devalall, eval_data_names);
|
||||||
if (rabit::IsDistributed()){
|
if (rabit::IsDistributed()) {
|
||||||
if (rabit::GetRank() == 0) {
|
if (rabit::GetRank() == 0) {
|
||||||
rabit::TrackerPrintf("%s\n", res.c_str());
|
rabit::TrackerPrintf("%s\n", res.c_str());
|
||||||
}
|
}
|
||||||
@ -215,29 +220,29 @@ class BoostLearnTask {
|
|||||||
}
|
}
|
||||||
version += 1;
|
version += 1;
|
||||||
utils::Assert(version == rabit::VersionNumber(), "consistent check");
|
utils::Assert(version == rabit::VersionNumber(), "consistent check");
|
||||||
elapsed = (unsigned long)(time(NULL) - start);
|
elapsed = (unsigned long)(time(NULL) - start); // NOLINT(*)
|
||||||
}
|
}
|
||||||
// always save final round
|
// always save final round
|
||||||
if ((save_period == 0 || num_round % save_period != 0) && model_out != "NONE") {
|
if ((save_period == 0 || num_round % save_period != 0) && model_out != "NONE") {
|
||||||
if (model_out == "NULL"){
|
if (model_out == "NULL") {
|
||||||
this->SaveModel(num_round - 1);
|
this->SaveModel(num_round - 1);
|
||||||
} else {
|
} else {
|
||||||
this->SaveModel(model_out.c_str());
|
this->SaveModel(model_out.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!silent){
|
if (!silent) {
|
||||||
printf("\nupdating end, %lu sec in all\n", elapsed);
|
printf("\nupdating end, %lu sec in all\n", elapsed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline void TaskEval(void) {
|
inline void TaskEval(void) {
|
||||||
learner.EvalOneIter(0, devalall, eval_data_names);
|
learner.EvalOneIter(0, devalall, eval_data_names);
|
||||||
}
|
}
|
||||||
inline void TaskDump(void){
|
inline void TaskDump(void) {
|
||||||
FILE *fo = utils::FopenCheck(name_dump.c_str(), "w");
|
FILE *fo = utils::FopenCheck(name_dump.c_str(), "w");
|
||||||
std::vector<std::string> dump = learner.DumpModel(fmap, dump_model_stats != 0);
|
std::vector<std::string> dump = learner.DumpModel(fmap, dump_model_stats != 0);
|
||||||
for (size_t i = 0; i < dump.size(); ++ i) {
|
for (size_t i = 0; i < dump.size(); ++i) {
|
||||||
fprintf(fo,"booster[%lu]:\n", i);
|
fprintf(fo, "booster[%lu]:\n", i);
|
||||||
fprintf(fo,"%s", dump[i].c_str());
|
fprintf(fo, "%s", dump[i].c_str());
|
||||||
}
|
}
|
||||||
fclose(fo);
|
fclose(fo);
|
||||||
}
|
}
|
||||||
@ -247,7 +252,8 @@ class BoostLearnTask {
|
|||||||
}
|
}
|
||||||
inline void SaveModel(int i) const {
|
inline void SaveModel(int i) const {
|
||||||
char fname[256];
|
char fname[256];
|
||||||
sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1);
|
utils::SPrintf(fname, sizeof(fname),
|
||||||
|
"%s/%04d.model", model_dir_path.c_str(), i + 1);
|
||||||
this->SaveModel(fname);
|
this->SaveModel(fname);
|
||||||
}
|
}
|
||||||
inline void TaskPred(void) {
|
inline void TaskPred(void) {
|
||||||
@ -266,6 +272,7 @@ class BoostLearnTask {
|
|||||||
}
|
}
|
||||||
if (fo != stdout) fclose(fo);
|
if (fo != stdout) fclose(fo);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/*! \brief whether silent */
|
/*! \brief whether silent */
|
||||||
int silent;
|
int silent;
|
||||||
@ -309,6 +316,7 @@ class BoostLearnTask {
|
|||||||
std::vector<std::string> eval_data_paths;
|
std::vector<std::string> eval_data_paths;
|
||||||
/*! \brief the names of the evaluation data used in output log */
|
/*! \brief the names of the evaluation data used in output log */
|
||||||
std::vector<std::string> eval_data_names;
|
std::vector<std::string> eval_data_names;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
io::DataMatrix* data;
|
io::DataMatrix* data;
|
||||||
std::vector<io::DataMatrix*> deval;
|
std::vector<io::DataMatrix*> deval;
|
||||||
@ -316,9 +324,9 @@ class BoostLearnTask {
|
|||||||
utils::FeatMap fmap;
|
utils::FeatMap fmap;
|
||||||
learner::BoostLearner learner;
|
learner::BoostLearner learner;
|
||||||
};
|
};
|
||||||
}
|
} // namespace xgboost
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]) {
|
||||||
xgboost::BoostLearnTask tsk;
|
xgboost::BoostLearnTask tsk;
|
||||||
tsk.SetParam("seed", "0");
|
tsk.SetParam("seed", "0");
|
||||||
int ret = tsk.Run(argc, argv);
|
int ret = tsk.Run(argc, argv);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user