Prevent copying SimpleDMatrix. (#5453)

* Set default dtor for SimpleDMatrix to initialize default copy ctor, which is
deleted due to unique ptr.

* Remove commented code.
* Remove warning for calling host function (std::max).
* Remove warning for initialization order.
* Remove warning for unused variables.
This commit is contained in:
Jiaming Yuan 2020-04-02 07:01:49 +08:00 committed by GitHub
parent e86030c360
commit 29c6ad943a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 36 additions and 50 deletions

View File

@ -64,6 +64,11 @@ inline std::vector<std::string> Split(const std::string& s, char delim) {
return ret;
}
template <typename T>
XGBOOST_DEVICE T Max(T a, T b) {
return a < b ? b : a;
}
// simple routine to convert any data to string
template<typename T>
inline std::string ToString(const T& data) {

View File

@ -8,6 +8,8 @@
#include <cstddef>
#include <algorithm>
#include "common.h"
#ifdef __CUDACC__
#include "device_helpers.cuh"
#endif // __CUDACC__
@ -29,12 +31,12 @@ inline void ClearBit(CompressedByteT *byte, int bit_idx) {
*byte &= ~(1 << bit_idx);
}
static const int kPadding = 4; // Assign padding so we can read slightly off
// the beginning of the array
// the beginning of the array
// The number of bits required to represent a given unsigned range
inline XGBOOST_DEVICE size_t SymbolBits(size_t num_symbols) {
auto bits = std::ceil(log2(static_cast<double>(num_symbols)));
return std::max(static_cast<size_t>(bits), size_t(1));
return common::Max(static_cast<size_t>(bits), size_t(1));
}
} // namespace detail

View File

@ -297,9 +297,9 @@ struct Index {
std::vector<uint8_t> data_;
std::vector<uint32_t> offset_; // size of this field is equal to number of features
void* data_ptr_;
uint32_t* offset_ptr_;
size_t p_;
BinTypeSize binTypeSize_;
size_t p_;
uint32_t* offset_ptr_;
Func func_;
};

View File

@ -710,7 +710,7 @@ class QuantileSketchTemplate {
// check invariant
size_t n = (1ULL << nlevel);
CHECK(n * limit_size >= maxn) << "invalid init parameter";
CHECK(nlevel <= std::max(1, static_cast<int>(limit_size * eps)))
CHECK(nlevel <= std::max(static_cast<size_t>(1), static_cast<size_t>(limit_size * eps)))
<< "invalid init parameter";
}

View File

@ -427,25 +427,6 @@ DMatrix* DMatrix::Load(const std::string& uri,
return dmat;
}
/*
DMatrix* DMatrix::Create(std::unique_ptr<DataSource<SparsePage>>&& source,
const std::string& cache_prefix) {
if (cache_prefix.length() == 0) {
// Data split mode is fixed to be row right now.
rabit::Allreduce<rabit::op::Max>(&source->info.num_col_, 1);
return new data::SimpleDMatrix(std::move(source));
} else {
#if DMLC_ENABLE_STD_THREAD
return new data::SparsePageDMatrix(std::move(source), cache_prefix);
#else
LOG(FATAL) << "External memory is not enabled in mingw";
return nullptr;
#endif // DMLC_ENABLE_STD_THREAD
}
}
*/
template <typename AdapterT>
DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread,
const std::string& cache_prefix, size_t page_size ) {

View File

@ -23,6 +23,7 @@ class SimpleDMatrix : public DMatrix {
explicit SimpleDMatrix(AdapterT* adapter, float missing, int nthread);
explicit SimpleDMatrix(dmlc::Stream* in_stream);
~SimpleDMatrix() override = default;
void SaveToLocalFile(const std::string& fname);

View File

@ -30,7 +30,6 @@ class SparsePageDMatrix : public DMatrix {
row_source_.reset(new data::SparsePageSource(adapter, missing, nthread,
cache_prefix, page_size));
}
// Set number of threads but keep old value so we can reset it after
~SparsePageDMatrix() override = default;
MetaInfo& Info() override;

View File

@ -403,7 +403,6 @@ TEST(hist_util, IndexBinData) {
size_t constexpr kRows = 100;
size_t constexpr kCols = 10;
size_t bin_id = 0;
for (auto max_bin : kBinSizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
common::GHistIndexMatrix hmat;
@ -434,7 +433,6 @@ TEST(hist_util, SparseIndexBinData) {
size_t constexpr kRows = 100;
size_t constexpr kCols = 10;
size_t bin_id = 0;
for (auto max_bin : bin_sizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatix();
common::GHistIndexMatrix hmat;

View File

@ -72,32 +72,32 @@ TEST(SimpleDMatrix, Empty) {
data::CSRAdapter csr_adapter(row_ptr.data(), feature_idx.data(), data.data(),
0, 0, 0);
data::SimpleDMatrix dmat(&csr_adapter,
std::numeric_limits<float>::quiet_NaN(), 1);
CHECK_EQ(dmat.Info().num_nonzero_, 0);
CHECK_EQ(dmat.Info().num_row_, 0);
CHECK_EQ(dmat.Info().num_col_, 0);
for (auto &batch : dmat.GetBatches<SparsePage>()) {
std::unique_ptr<data::SimpleDMatrix> dmat(new data::SimpleDMatrix(
&csr_adapter, std::numeric_limits<float>::quiet_NaN(), 1));
CHECK_EQ(dmat->Info().num_nonzero_, 0);
CHECK_EQ(dmat->Info().num_row_, 0);
CHECK_EQ(dmat->Info().num_col_, 0);
for (auto &batch : dmat->GetBatches<SparsePage>()) {
CHECK_EQ(batch.Size(), 0);
}
data::DenseAdapter dense_adapter(nullptr, 0, 0);
dmat = data::SimpleDMatrix(&dense_adapter,
std::numeric_limits<float>::quiet_NaN(), 1);
CHECK_EQ(dmat.Info().num_nonzero_, 0);
CHECK_EQ(dmat.Info().num_row_, 0);
CHECK_EQ(dmat.Info().num_col_, 0);
for (auto &batch : dmat.GetBatches<SparsePage>()) {
dmat.reset( new data::SimpleDMatrix(&dense_adapter,
std::numeric_limits<float>::quiet_NaN(), 1) );
CHECK_EQ(dmat->Info().num_nonzero_, 0);
CHECK_EQ(dmat->Info().num_row_, 0);
CHECK_EQ(dmat->Info().num_col_, 0);
for (auto &batch : dmat->GetBatches<SparsePage>()) {
CHECK_EQ(batch.Size(), 0);
}
data::CSCAdapter csc_adapter(nullptr, nullptr, nullptr, 0, 0);
dmat = data::SimpleDMatrix(&csc_adapter,
std::numeric_limits<float>::quiet_NaN(), 1);
CHECK_EQ(dmat.Info().num_nonzero_, 0);
CHECK_EQ(dmat.Info().num_row_, 0);
CHECK_EQ(dmat.Info().num_col_, 0);
for (auto &batch : dmat.GetBatches<SparsePage>()) {
dmat.reset(new data::SimpleDMatrix(
&csc_adapter, std::numeric_limits<float>::quiet_NaN(), 1));
CHECK_EQ(dmat->Info().num_nonzero_, 0);
CHECK_EQ(dmat->Info().num_row_, 0);
CHECK_EQ(dmat->Info().num_col_, 0);
for (auto &batch : dmat->GetBatches<SparsePage>()) {
CHECK_EQ(batch.Size(), 0);
}
}
@ -109,11 +109,11 @@ TEST(SimpleDMatrix, MissingData) {
data::CSRAdapter adapter(row_ptr.data(), feature_idx.data(), data.data(), 2,
3, 2);
data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),
1);
CHECK_EQ(dmat.Info().num_nonzero_, 2);
dmat = data::SimpleDMatrix(&adapter, 1.0, 1);
CHECK_EQ(dmat.Info().num_nonzero_, 1);
std::unique_ptr<data::SimpleDMatrix> dmat{new data::SimpleDMatrix{
&adapter, std::numeric_limits<float>::quiet_NaN(), 1}};
CHECK_EQ(dmat->Info().num_nonzero_, 2);
dmat.reset(new data::SimpleDMatrix(&adapter, 1.0, 1));
CHECK_EQ(dmat->Info().num_nonzero_, 1);
}
TEST(SimpleDMatrix, EmptyRow) {