Cleanup data generator. (#8094)
- Avoid duplicated definition of data shape. - Explicitly define numpy iterator for CPU data.
This commit is contained in:
parent
5156be0f49
commit
ef11b024e8
@ -27,8 +27,8 @@ void TestEquivalent(float sparsity) {
|
|||||||
offset += num_elements;
|
offset += num_elements;
|
||||||
}
|
}
|
||||||
auto from_iter = page_concatenated->GetDeviceAccessor(0);
|
auto from_iter = page_concatenated->GetDeviceAccessor(0);
|
||||||
ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::kCols);
|
ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::Cols());
|
||||||
ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::kRows);
|
ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::Rows());
|
||||||
|
|
||||||
std::string interface_str = iter.AsArray();
|
std::string interface_str = iter.AsArray();
|
||||||
auto adapter = CupyAdapter(interface_str);
|
auto adapter = CupyAdapter(interface_str);
|
||||||
@ -98,8 +98,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
|
|||||||
auto impl = ellpack.Impl();
|
auto impl = ellpack.Impl();
|
||||||
common::CompressedIterator<uint32_t> iterator(
|
common::CompressedIterator<uint32_t> iterator(
|
||||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||||
auto cols = CudaArrayIterForTest::kCols;
|
auto cols = CudaArrayIterForTest::Cols();
|
||||||
auto rows = CudaArrayIterForTest::kRows;
|
auto rows = CudaArrayIterForTest::Rows();
|
||||||
|
|
||||||
auto j_interface =
|
auto j_interface =
|
||||||
Json::Load({interface_str.c_str(), interface_str.size()});
|
Json::Load({interface_str.c_str(), interface_str.size()});
|
||||||
|
|||||||
@ -1,25 +1,27 @@
|
|||||||
/*!
|
/*!
|
||||||
* Copyright 2016-2022 by XGBoost contributors
|
* Copyright 2016-2022 by XGBoost contributors
|
||||||
*/
|
*/
|
||||||
|
#include "helpers.h"
|
||||||
|
|
||||||
#include <dmlc/filesystem.h>
|
#include <dmlc/filesystem.h>
|
||||||
#include <xgboost/logging.h>
|
#include <gtest/gtest.h>
|
||||||
#include <xgboost/objective.h>
|
|
||||||
#include <xgboost/metric.h>
|
|
||||||
#include <xgboost/learner.h>
|
|
||||||
#include <xgboost/gbm.h>
|
#include <xgboost/gbm.h>
|
||||||
#include <xgboost/json.h>
|
#include <xgboost/json.h>
|
||||||
#include <gtest/gtest.h>
|
#include <xgboost/learner.h>
|
||||||
|
#include <xgboost/logging.h>
|
||||||
|
#include <xgboost/metric.h>
|
||||||
|
#include <xgboost/objective.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <random>
|
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
#include "helpers.h"
|
|
||||||
#include "xgboost/c_api.h"
|
|
||||||
#include "../../src/data/adapter.h"
|
#include "../../src/data/adapter.h"
|
||||||
|
#include "../../src/data/iterative_dmatrix.h"
|
||||||
#include "../../src/data/simple_dmatrix.h"
|
#include "../../src/data/simple_dmatrix.h"
|
||||||
#include "../../src/data/sparse_page_dmatrix.h"
|
#include "../../src/data/sparse_page_dmatrix.h"
|
||||||
#include "../../src/gbm/gbtree_model.h"
|
#include "../../src/gbm/gbtree_model.h"
|
||||||
|
#include "xgboost/c_api.h"
|
||||||
#include "xgboost/predictor.h"
|
#include "xgboost/predictor.h"
|
||||||
|
|
||||||
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||||
@ -379,6 +381,30 @@ RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix() {
|
||||||
|
NumpyArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
|
||||||
|
auto m = std::make_shared<data::IterativeDMatrix>(
|
||||||
|
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
|
||||||
|
size_t batches)
|
||||||
|
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
||||||
|
rng_->Device(Context::kCpuId);
|
||||||
|
std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||||
|
this->Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
int NumpyArrayIterForTest::Next() {
|
||||||
|
if (iter_ == n_batches_) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
XGProxyDMatrixSetDataDense(proxy_, batches_[iter_].c_str());
|
||||||
|
iter_++;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
std::shared_ptr<DMatrix>
|
std::shared_ptr<DMatrix>
|
||||||
GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns){
|
GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns){
|
||||||
data::DenseAdapter adapter(x.data(), num_rows, num_columns);
|
data::DenseAdapter adapter(x.data(), num_rows, num_columns);
|
||||||
@ -389,7 +415,7 @@ GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns){
|
|||||||
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
|
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
|
||||||
size_t n_batches, std::string prefix) {
|
size_t n_batches, std::string prefix) {
|
||||||
CHECK_GE(n_samples, n_batches);
|
CHECK_GE(n_samples, n_batches);
|
||||||
ArrayIterForTest iter(0, n_samples, n_features, n_batches);
|
NumpyArrayIterForTest iter(0, n_samples, n_features, n_batches);
|
||||||
|
|
||||||
std::unique_ptr<DMatrix> dmat{
|
std::unique_ptr<DMatrix> dmat{
|
||||||
DMatrix::Create(static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next,
|
DMatrix::Create(static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next,
|
||||||
@ -416,7 +442,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(size_t n_entries,
|
|||||||
std::string prefix) {
|
std::string prefix) {
|
||||||
size_t n_columns = 3;
|
size_t n_columns = 3;
|
||||||
size_t n_rows = n_entries / n_columns;
|
size_t n_rows = n_entries / n_columns;
|
||||||
ArrayIterForTest iter(0, n_rows, n_columns, 2);
|
NumpyArrayIterForTest iter(0, n_rows, n_columns, 2);
|
||||||
|
|
||||||
std::unique_ptr<DMatrix> dmat{DMatrix::Create(
|
std::unique_ptr<DMatrix> dmat{DMatrix::Create(
|
||||||
static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next,
|
static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next,
|
||||||
@ -563,18 +589,6 @@ ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols,
|
|||||||
|
|
||||||
ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }
|
ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }
|
||||||
|
|
||||||
int ArrayIterForTest::Next() {
|
|
||||||
if (iter_ == n_batches_) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
XGProxyDMatrixSetDataDense(proxy_, batches_[iter_].c_str());
|
|
||||||
iter_++;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t constexpr ArrayIterForTest::kRows;
|
|
||||||
size_t constexpr ArrayIterForTest::kCols;
|
|
||||||
|
|
||||||
void DMatrixToCSR(DMatrix *dmat, std::vector<float> *p_data,
|
void DMatrixToCSR(DMatrix *dmat, std::vector<float> *p_data,
|
||||||
std::vector<size_t> *p_row_ptr,
|
std::vector<size_t> *p_row_ptr,
|
||||||
std::vector<bst_feature_t> *p_cids) {
|
std::vector<bst_feature_t> *p_cids) {
|
||||||
|
|||||||
@ -15,10 +15,6 @@ CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
|
|||||||
this->Reset();
|
this->Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t constexpr CudaArrayIterForTest::kRows;
|
|
||||||
size_t constexpr CudaArrayIterForTest::kCols;
|
|
||||||
size_t constexpr CudaArrayIterForTest::kBatches;
|
|
||||||
|
|
||||||
int CudaArrayIterForTest::Next() {
|
int CudaArrayIterForTest::Next() {
|
||||||
if (iter_ == n_batches_) {
|
if (iter_ == n_batches_) {
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@ -298,6 +298,7 @@ class RandomDataGenerator {
|
|||||||
#if defined(XGBOOST_USE_CUDA)
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
std::shared_ptr<DMatrix> GenerateDeviceDMatrix();
|
std::shared_ptr<DMatrix> GenerateDeviceDMatrix();
|
||||||
#endif
|
#endif
|
||||||
|
std::shared_ptr<DMatrix> GenerateQuantileDMatrix();
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::vector<float>
|
inline std::vector<float>
|
||||||
@ -401,38 +402,38 @@ class ArrayIterForTest {
|
|||||||
size_t n_batches_;
|
size_t n_batches_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
size_t static constexpr kRows { 1000 };
|
size_t static constexpr Rows() { return 1024; }
|
||||||
size_t static constexpr kBatches { 100 };
|
size_t static constexpr Batches() { return 100; }
|
||||||
size_t static constexpr kCols { 13 };
|
size_t static constexpr Cols() { return 13; }
|
||||||
|
|
||||||
std::string AsArray() const {
|
public:
|
||||||
return interface_;
|
std::string AsArray() const { return interface_; }
|
||||||
}
|
|
||||||
|
|
||||||
virtual int Next();
|
virtual int Next() = 0;
|
||||||
virtual void Reset() {
|
virtual void Reset() { iter_ = 0; }
|
||||||
iter_ = 0;
|
|
||||||
}
|
|
||||||
size_t Iter() const { return iter_; }
|
size_t Iter() const { return iter_; }
|
||||||
auto Proxy() -> decltype(proxy_) { return proxy_; }
|
auto Proxy() -> decltype(proxy_) { return proxy_; }
|
||||||
|
|
||||||
explicit ArrayIterForTest(float sparsity, size_t rows = kRows,
|
explicit ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches);
|
||||||
size_t cols = kCols, size_t batches = kBatches);
|
|
||||||
virtual ~ArrayIterForTest();
|
virtual ~ArrayIterForTest();
|
||||||
};
|
};
|
||||||
|
|
||||||
class CudaArrayIterForTest : public ArrayIterForTest {
|
class CudaArrayIterForTest : public ArrayIterForTest {
|
||||||
public:
|
public:
|
||||||
size_t static constexpr kRows{1000};
|
explicit CudaArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
|
||||||
size_t static constexpr kBatches{100};
|
size_t batches = Batches());
|
||||||
size_t static constexpr kCols{13};
|
|
||||||
|
|
||||||
explicit CudaArrayIterForTest(float sparsity, size_t rows = kRows,
|
|
||||||
size_t cols = kCols, size_t batches = kBatches);
|
|
||||||
int Next() override;
|
int Next() override;
|
||||||
~CudaArrayIterForTest() override = default;
|
~CudaArrayIterForTest() override = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class NumpyArrayIterForTest : public ArrayIterForTest {
|
||||||
|
public:
|
||||||
|
explicit NumpyArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
|
||||||
|
size_t batches = Batches());
|
||||||
|
int Next() override;
|
||||||
|
~NumpyArrayIterForTest() override = default;
|
||||||
|
};
|
||||||
|
|
||||||
void DMatrixToCSR(DMatrix *dmat, std::vector<float> *p_data,
|
void DMatrixToCSR(DMatrix *dmat, std::vector<float> *p_data,
|
||||||
std::vector<size_t> *p_row_ptr,
|
std::vector<size_t> *p_row_ptr,
|
||||||
std::vector<bst_feature_t> *p_cids);
|
std::vector<bst_feature_t> *p_cids);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user