Refactor tests with data generator. (#5439)

This commit is contained in:
Jiaming Yuan 2020-03-27 06:44:44 +08:00 committed by GitHub
parent 7146b91d5a
commit 4942da64ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 334 additions and 259 deletions

View File

@ -82,8 +82,7 @@ TEST(c_api, Version) {
TEST(c_api, ConfigIO) {
size_t constexpr kRows = 10;
auto pp_dmat = CreateDMatrix(kRows, 10, 0);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatix();
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
std::vector<bst_float> labels(kRows);
for (size_t i = 0; i < labels.size(); ++i) {
@ -110,16 +109,13 @@ TEST(c_api, ConfigIO) {
auto config_1 = Json::Load({config_str_1.c_str(), config_str_1.size()});
ASSERT_EQ(config_0, config_1);
delete pp_dmat;
}
TEST(c_api, JsonModelIO) {
size_t constexpr kRows = 10;
dmlc::TemporaryDirectory tempdir;
auto pp_dmat = CreateDMatrix(kRows, 10, 0);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatix();
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
std::vector<bst_float> labels(kRows);
for (size_t i = 0; i < labels.size(); ++i) {
@ -144,6 +140,5 @@ TEST(c_api, JsonModelIO) {
ASSERT_EQ(model_str_0.front(), '{');
ASSERT_EQ(model_str_0, model_str_1);
delete pp_dmat;
}
} // namespace xgboost

View File

@ -9,26 +9,25 @@ namespace xgboost {
namespace common {
TEST(DenseColumn, Test) {
auto dmat = CreateDMatrix(100, 10, 0.0);
auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatix();
GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), 256);
gmat.Init(dmat.get(), 256);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.2);
for (auto i = 0ull; i < (*dmat)->Info().num_row_; i++) {
for (auto j = 0ull; j < (*dmat)->Info().num_col_; j++) {
for (auto i = 0ull; i < dmat->Info().num_row_; i++) {
for (auto j = 0ull; j < dmat->Info().num_col_; j++) {
auto col = column_matrix.GetColumn(j);
ASSERT_EQ(gmat.index[i * (*dmat)->Info().num_col_ + j],
ASSERT_EQ(gmat.index[i * dmat->Info().num_col_ + j],
col.GetGlobalBinIdx(i));
}
}
delete dmat;
}
TEST(SparseColumn, Test) {
auto dmat = CreateDMatrix(100, 1, 0.85);
auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatix();
GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), 256);
gmat.Init(dmat.get(), 256);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.5);
auto col = column_matrix.GetColumn(0);
@ -37,13 +36,12 @@ TEST(SparseColumn, Test) {
ASSERT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]],
col.GetGlobalBinIdx(i));
}
delete dmat;
}
TEST(DenseColumnWithMissing, Test) {
auto dmat = CreateDMatrix(100, 1, 0.5);
auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatix();
GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), 256);
gmat.Init(dmat.get(), 256);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.2);
auto col = column_matrix.GetColumn(0);
@ -52,7 +50,6 @@ TEST(DenseColumnWithMissing, Test) {
EXPECT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]],
col.GetGlobalBinIdx(i));
}
delete dmat;
}
void TestGHistIndexMatrixCreation(size_t nthreads) {

View File

@ -128,8 +128,7 @@ TEST(CutsBuilder, SearchGroupInd) {
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
std::shared_ptr<DMatrix> p_mat {*pp_dmat};
auto p_mat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
std::vector<bst_int> group(kNumGroups);
group[0] = 2;
@ -149,8 +148,6 @@ TEST(CutsBuilder, SearchGroupInd) {
ASSERT_EQ(group_ind, 2);
EXPECT_ANY_THROW(CutsBuilder::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17));
delete pp_dmat;
}
TEST(SparseCuts, SingleThreadedBuild) {
@ -158,8 +155,7 @@ TEST(SparseCuts, SingleThreadedBuild) {
size_t constexpr kCols = 31;
size_t constexpr kBins = 256;
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
std::shared_ptr<DMatrix> p_fmat {*pp_dmat};
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
common::GHistIndexMatrix hmat;
hmat.Init(p_fmat.get(), kBins);
@ -173,8 +169,6 @@ TEST(SparseCuts, SingleThreadedBuild) {
ASSERT_EQ(hmat.cut.Ptrs(), cuts.Ptrs());
ASSERT_EQ(hmat.cut.Values(), cuts.Values());
ASSERT_EQ(hmat.cut.MinValues(), cuts.MinValues());
delete pp_dmat;
}
TEST(SparseCuts, MultiThreadedBuild) {
@ -212,17 +206,13 @@ TEST(SparseCuts, MultiThreadedBuild) {
};
{
auto pp_mat = CreateDMatrix(kRows, kCols, 0);
DMatrix* p_fmat = (*pp_mat).get();
Compare(p_fmat);
delete pp_mat;
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
Compare(p_fmat.get());
}
{
auto pp_mat = CreateDMatrix(kRows, kCols, 0.0001);
DMatrix* p_fmat = (*pp_mat).get();
Compare(p_fmat);
delete pp_mat;
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.0001).GenerateDMatix();
Compare(p_fmat.get());
}
omp_set_num_threads(ori_nthreads);

View File

@ -128,7 +128,7 @@ inline void TestRank(const std::vector<float>& cuts,
// Ignore the last cut, its special
double sum_weight = 0.0;
size_t j = 0;
for (auto i = 0; i < cuts.size() - 1; i++) {
for (size_t i = 0; i < cuts.size() - 1; i++) {
while (cuts[i] > sorted_x[j]) {
sum_weight += sorted_weights[j];
j++;
@ -142,7 +142,7 @@ inline void TestRank(const std::vector<float>& cuts,
inline void ValidateColumn(const HistogramCuts& cuts, int column_idx,
const std::vector<float>& sorted_column,
const std::vector<float>& sorted_weights,
int num_bins) {
size_t num_bins) {
// Check the endpoints are correct
EXPECT_LT(cuts.MinValues()[column_idx], sorted_column.front());

View File

@ -68,8 +68,7 @@ TEST(Adapter, CSCAdapterColsMoreThanRows) {
}
TEST(c_api, DMatrixSliceAdapterFromSimpleDMatrix) {
auto pp_dmat = CreateDMatrix(6, 2, 1.0);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator(6, 2, 1.0).GenerateDMatix();
std::vector<int> ridx_set = {1, 3, 5};
data::DMatrixSliceAdapter adapter(p_dmat.get(),
@ -91,8 +90,6 @@ TEST(c_api, DMatrixSliceAdapterFromSimpleDMatrix) {
}
}
}
delete pp_dmat;
}
// A mock for JVM data iterator.

View File

@ -16,7 +16,7 @@ namespace xgboost {
TEST(EllpackPage, EmptyDMatrix) {
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;
constexpr float kSparsity = 0;
auto dmat = *CreateDMatrix(kNRows, kNCols, kSparsity);
auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatix();
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin}).begin();
auto impl = page.Impl();
ASSERT_EQ(impl->row_stride, 0);

View File

@ -220,8 +220,7 @@ TEST(SimpleDMatrix, FromFile) {
TEST(SimpleDMatrix, Slice) {
const int kRows = 6;
const int kCols = 2;
auto pp_dmat = CreateDMatrix(kRows, kCols, 1.0);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator(kRows, kCols, 1.0).GenerateDMatix();
auto &labels = p_dmat->Info().labels_.HostVector();
auto &weights = p_dmat->Info().weights_.HostVector();
auto &base_margin = p_dmat->Info().base_margin_.HostVector();
@ -257,8 +256,6 @@ TEST(SimpleDMatrix, Slice) {
EXPECT_EQ(old_inst[j], new_inst[j]);
}
}
delete pp_dmat;
};
TEST(SimpleDMatrix, SaveLoadBinary) {

View File

@ -55,8 +55,7 @@ TEST(GBTree, WrongUpdater) {
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
std::shared_ptr<DMatrix> p_dmat {*pp_dmat};
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
p_dmat->Info().labels_.Resize(kRows);
@ -64,7 +63,6 @@ TEST(GBTree, WrongUpdater) {
// Hist can not be used for updating tree.
learner->SetParams(Args{{"tree_method", "hist"}, {"process_type", "update"}});
ASSERT_THROW(learner->UpdateOneIter(0, p_dmat), dmlc::Error);
delete pp_dmat;
}
#ifdef XGBOOST_USE_CUDA
@ -72,8 +70,7 @@ TEST(GBTree, ChoosePredictor) {
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
std::shared_ptr<DMatrix> p_dmat {*pp_dmat};
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
p_dmat->Info().labels_.Resize(kRows);
@ -117,8 +114,6 @@ TEST(GBTree, ChoosePredictor) {
}
// data is not pulled back into host
ASSERT_FALSE(data.HostCanWrite());
delete pp_dmat;
}
#endif // XGBOOST_USE_CUDA
@ -200,8 +195,7 @@ TEST(Dart, JsonIO) {
TEST(Dart, Prediction) {
size_t constexpr kRows = 16, kCols = 10;
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
auto& p_mat = *pp_dmat;
auto p_mat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
std::vector<bst_float> labels (kRows);
for (size_t i = 0; i < kRows; ++i) {
@ -230,7 +224,5 @@ TEST(Dart, Prediction) {
// Inference doesn't drop tree.
ASSERT_GT(std::abs(h_predts_training[i] - h_predts_inference[i]), kRtEps);
}
delete pp_dmat;
}
} // namespace xgboost

View File

@ -16,7 +16,7 @@
#include "helpers.h"
#include "xgboost/c_api.h"
#include "../../src/data/adapter.h"
#include "../../src/gbm/gbtree_model.h"
#include "xgboost/predictor.h"
@ -155,26 +155,112 @@ SimpleLCG::StateType SimpleLCG::Max() const {
return max_value_;
}
std::shared_ptr<xgboost::DMatrix>* CreateDMatrix(int rows, int columns,
float sparsity, int seed) {
const float missing_value = -1;
std::vector<float> test_data(rows * columns);
void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
SimpleLCG lcg{seed_};
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
CHECK(out);
xgboost::SimpleLCG gen(seed);
SimpleRealUniformDistribution<float> dis(0.0f, 1.0f);
for (auto &e : test_data) {
if (dis(&gen) < sparsity) {
e = missing_value;
out->Resize(rows_ * cols_, 0);
auto &h_data = out->HostVector();
float sparsity = sparsity_ * (upper_ - lower_) + lower_;
for (auto &v : h_data) {
auto g = dist(&lcg);
if (g < sparsity) {
v = std::numeric_limits<float>::quiet_NaN();
} else {
e = dis(&gen);
v = dist(&lcg);
}
}
if (device_ >= 0) {
out->SetDevice(device_);
out->DeviceSpan();
}
}
DMatrixHandle handle;
XGDMatrixCreateFromMat(test_data.data(), rows, columns, missing_value,
&handle);
return static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
void RandomDataGenerator::GenerateArrayInterface(
HostDeviceVector<float> *storage, std::string *out) const {
CHECK(out);
this->GenerateDense(storage);
Json array_interface {Object()};
array_interface["data"] = std::vector<Json>(2);
array_interface["data"][0] = Integer(reinterpret_cast<int64_t>(storage->DevicePointer()));
array_interface["data"][1] = Boolean(false);
array_interface["shape"] = std::vector<Json>(2);
array_interface["shape"][0] = rows_;
array_interface["shape"][1] = cols_;
array_interface["typestr"] = String("<f4");
array_interface["version"] = 1;
Json::Dump(array_interface, out);
}
void RandomDataGenerator::GenerateCSR(
HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
HostDeviceVector<bst_feature_t>* columns) const {
auto& h_value = value->HostVector();
auto& h_rptr = row_ptr->HostVector();
auto& h_cols = columns->HostVector();
SimpleLCG lcg{seed_};
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
float sparsity = sparsity_ * (upper_ - lower_) + lower_;
h_rptr.emplace_back(0);
for (size_t i = 0; i < rows_; ++i) {
size_t rptr = h_rptr.back();
for (size_t j = 0; j < cols_; ++j) {
auto g = dist(&lcg);
if (g >= sparsity) {
g = dist(&lcg);
h_value.emplace_back(g);
rptr++;
h_cols.emplace_back(j);
}
}
h_rptr.emplace_back(rptr);
}
if (device_ >= 0) {
value->SetDevice(device_);
value->DeviceSpan();
row_ptr->SetDevice(device_);
row_ptr->DeviceSpan();
columns->SetDevice(device_);
columns->DeviceSpan();
}
CHECK_LE(h_value.size(), rows_ * cols_);
CHECK_EQ(value->Size(), h_rptr.back());
CHECK_EQ(columns->Size(), value->Size());
}
std::shared_ptr<DMatrix>
RandomDataGenerator::GenerateDMatix(bool with_label, bool float_label,
size_t classes) const {
HostDeviceVector<float> data;
HostDeviceVector<bst_row_t> rptrs;
HostDeviceVector<bst_feature_t> columns;
this->GenerateCSR(&data, &rptrs, &columns);
data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(),
data.HostPointer(), rows_, data.Size(), cols_);
std::shared_ptr<DMatrix> out{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
if (with_label) {
RandomDataGenerator gen(rows_, 1, 0);
if (!float_label) {
gen.Lower(0).Upper(classes).GenerateDense(&out->Info().labels_);
auto& h_labels = out->Info().labels_.HostVector();
for (auto& v : h_labels) {
v = static_cast<float>(static_cast<uint32_t>(v));
}
} else {
gen.GenerateDense(&out->Info().labels_);
}
}
return out;
}
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(
@ -290,8 +376,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
std::unique_ptr<GradientBooster> gbm {
GradientBooster::Create(name, generic_param, learner_model_param)};
gbm->Configure(kwargs);
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
std::vector<float> labels(kRows);
for (size_t i = 0; i < kRows; ++i) {
@ -309,7 +394,6 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
gbm->DoBoost(p_dmat.get(), &gpair, &predts);
delete pp_dmat;
return gbm;
}

View File

@ -144,7 +144,7 @@ class SimpleRealUniformDistribution {
"Result type must be floating point.");
long double const r = (static_cast<long double>(rng->Max())
- static_cast<long double>(rng->Min())) + 1.0L;
size_t const log2r = std::log(r) / std::log(2.0L);
auto const log2r = static_cast<size_t>(std::log(r) / std::log(2.0L));
size_t m = std::max<size_t>(1UL, (Bits + log2r - 1UL) / log2r);
ResultT sum_value = 0, r_k = 1;
@ -169,20 +169,49 @@ class SimpleRealUniformDistribution {
}
};
/**
* \fn std::shared_ptr<xgboost::DMatrix> CreateDMatrix(int rows, int columns, float sparsity, int seed);
*
* \brief Creates dmatrix with uniform random data between 0-1.
*
* \param rows The rows.
* \param columns The columns.
* \param sparsity The sparsity.
* \param seed The seed.
*
* \return The new d matrix.
*/
std::shared_ptr<xgboost::DMatrix> *CreateDMatrix(int rows, int columns,
float sparsity, int seed = 0);
// Generate in-memory random data without using DMatrix.
class RandomDataGenerator {
bst_row_t rows_;
size_t cols_;
float sparsity_;
float lower_;
float upper_;
int32_t device_;
int32_t seed_;
public:
RandomDataGenerator(bst_row_t rows, size_t cols, float sparsity)
: rows_{rows}, cols_{cols}, sparsity_{sparsity}, lower_{0.0f}, upper_{1.0f},
device_{-1}, seed_{0} {}
RandomDataGenerator &Lower(float v) {
lower_ = v;
return *this;
}
RandomDataGenerator& Upper(float v) {
upper_ = v;
return *this;
}
RandomDataGenerator& Device(int32_t d) {
device_ = d;
return *this;
}
RandomDataGenerator& Seed(int32_t s) {
seed_ = s;
return *this;
}
void GenerateDense(HostDeviceVector<float>* out) const;
void GenerateArrayInterface(HostDeviceVector<float>* storage, std::string* out) const;
void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
HostDeviceVector<bst_feature_t>* columns) const;
std::shared_ptr<DMatrix> GenerateDMatix(bool with_label = false,
bool float_label = true,
size_t classes = 1) const;
};
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(
size_t n_entries, size_t page_size, std::string tmp_file);
@ -257,8 +286,8 @@ class HistogramCutsWrapper : public common::HistogramCuts {
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
int n_rows, int n_cols, bst_float sparsity= 0) {
auto dmat = CreateDMatrix(n_rows, n_cols, sparsity, 3);
const SparsePage& batch = *(*dmat)->GetBatches<xgboost::SparsePage>().begin();
auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatix();
const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
HistogramCutsWrapper cmat;
cmat.SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
@ -280,9 +309,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
}
auto page = std::unique_ptr<EllpackPageImpl>(
new EllpackPageImpl(0, cmat, batch, (*dmat)->IsDense(), row_stride));
delete dmat;
new EllpackPageImpl(0, cmat, batch, dmat->IsDense(), row_stride));
return page;
}

View File

@ -15,8 +15,7 @@ TEST(Linear, Shotgun) {
size_t constexpr kRows = 10;
size_t constexpr kCols = 10;
auto pp_dmat = xgboost::CreateDMatrix(kRows, kCols, 0);
auto p_fmat {*pp_dmat};
auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
LearnerModelParam mparam;
@ -42,8 +41,6 @@ TEST(Linear, Shotgun) {
xgboost::LinearUpdater::Create("shotgun", &lparam));
EXPECT_ANY_THROW(updater->Configure({{"feature_selector", "random"}}));
}
delete pp_dmat;
}
TEST(Shotgun, JsonIO) {
@ -54,8 +51,7 @@ TEST(Linear, coordinate) {
size_t constexpr kRows = 10;
size_t constexpr kCols = 10;
auto pp_dmat = xgboost::CreateDMatrix(kRows, kCols, 0);
auto p_fmat {*pp_dmat};
auto p_fmat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
LearnerModelParam mparam;
@ -73,8 +69,6 @@ TEST(Linear, coordinate) {
updater->Update(&gpair, p_fmat.get(), &model, gpair.Size());
ASSERT_EQ(model.bias()[0], 5.0f);
delete pp_dmat;
}
TEST(Coordinate, JsonIO){

View File

@ -12,7 +12,7 @@ TEST(Linear, GPUCoordinate) {
size_t constexpr kRows = 10;
size_t constexpr kCols = 10;
auto mat = xgboost::CreateDMatrix(kRows, kCols, 0);
auto mat = xgboost::RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto lparam = CreateEmptyGenericParam(GPUIDX);
LearnerModelParam mparam;
@ -24,15 +24,13 @@ TEST(Linear, GPUCoordinate) {
xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
updater->Configure({{"eta", "1."}});
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
mat->Info().num_row_, xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model{&mparam};
model.LazyInitModel();
updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
updater->Update(&gpair, mat.get(), &model, gpair.Size());
ASSERT_EQ(model.bias()[0], 5.0f);
delete mat;
}
TEST(GPUCoordinate, JsonIO) {

View File

@ -259,14 +259,14 @@ TEST(Objective, CPU_vs_CUDA) {
constexpr size_t kRows = 400;
constexpr size_t kCols = 100;
auto ppdmat = CreateDMatrix(kRows, kCols, 0, 0);
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatix();
HostDeviceVector<float> preds;
preds.Resize(kRows);
auto& h_preds = preds.HostVector();
for (size_t i = 0; i < h_preds.size(); ++i) {
h_preds[i] = static_cast<float>(i);
}
auto& info = (*ppdmat)->Info();
auto& info = pdmat->Info();
info.labels_.Resize(kRows);
auto& h_labels = info.labels_.HostVector();
@ -297,7 +297,6 @@ TEST(Objective, CPU_vs_CUDA) {
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
ASSERT_NEAR(shess, 0.0f, kRtEps);
delete ppdmat;
delete obj;
}
#endif

View File

@ -24,11 +24,11 @@ TEST(CpuPredictor, Basic) {
gbm::GBTreeModel model = CreateTestModel(&param);
auto dmat = CreateDMatrix(kRows, kCols, 0);
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
// Test predict batch
PredictionCacheEntry out_predictions;
cpu_predictor->PredictBatch((*dmat).get(), &out_predictions, model, 0);
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
ASSERT_EQ(model.trees.size(), out_predictions.version);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
@ -36,7 +36,7 @@ TEST(CpuPredictor, Basic) {
}
// Test predict instance
auto &batch = *(*dmat)->GetBatches<xgboost::SparsePage>().begin();
auto const &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions;
cpu_predictor->PredictInstance(batch[i], &instance_out_predictions, model);
@ -45,14 +45,14 @@ TEST(CpuPredictor, Basic) {
// Test predict leaf
std::vector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf((*dmat).get(), &leaf_out_predictions, model);
cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
for (auto v : leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
// Test predict contribution
std::vector<float> out_contribution;
cpu_predictor->PredictContribution((*dmat).get(), &out_contribution, model);
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
@ -64,7 +64,7 @@ TEST(CpuPredictor, Basic) {
}
}
// Test predict contribution (approximate method)
cpu_predictor->PredictContribution((*dmat).get(), &out_contribution, model, 0, nullptr, true);
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
@ -74,8 +74,6 @@ TEST(CpuPredictor, Basic) {
ASSERT_EQ(contri, 0);
}
}
delete dmat;
}
TEST(CpuPredictor, ExternalMemory) {

View File

@ -30,7 +30,7 @@ TEST(GPUPredictor, Basic) {
for (size_t i = 1; i < 33; i *= 2) {
int n_row = i, n_col = i;
auto dmat = CreateDMatrix(n_row, n_col, 0);
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatix();
LearnerModelParam param;
param.num_feature = n_col;
@ -43,9 +43,9 @@ TEST(GPUPredictor, Basic) {
PredictionCacheEntry gpu_out_predictions;
PredictionCacheEntry cpu_out_predictions;
gpu_predictor->PredictBatch((*dmat).get(), &gpu_out_predictions, model, 0);
gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0);
ASSERT_EQ(model.trees.size(), gpu_out_predictions.version);
cpu_predictor->PredictBatch((*dmat).get(), &cpu_out_predictions, model, 0);
cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0);
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.predictions.HostVector();
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.predictions.HostVector();
@ -53,7 +53,6 @@ TEST(GPUPredictor, Basic) {
for (int j = 0; j < gpu_out_predictions.predictions.Size(); j++) {
ASSERT_NEAR(gpu_out_predictions_h[j], cpu_out_predictions_h[j], abs_tolerance);
}
delete dmat;
}
}

View File

@ -21,11 +21,9 @@ TEST(Predictor, PredictionCache) {
DMatrix* m;
// Add a cache that is immediately expired.
auto add_cache = [&]() {
auto *pp_dmat = CreateDMatrix(kRows, kCols, 0);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
container.Cache(p_dmat, GenericParameter::kCpuId);
m = p_dmat.get();
delete pp_dmat;
};
add_cache();
@ -42,8 +40,7 @@ void TestTrainingPrediction(size_t rows, std::string tree_method) {
std::unique_ptr<Learner> learner;
auto train = [&](std::string predictor, HostDeviceVector<float>* out) {
auto pp_m = CreateDMatrix(rows, kCols, 0);
auto p_m = *pp_m;
auto p_m = RandomDataGenerator(rows, kCols, 0).GenerateDMatix();
auto &h_label = p_m->Info().labels_.HostVector();
h_label.resize(rows);
@ -64,7 +61,6 @@ void TestTrainingPrediction(size_t rows, std::string tree_method) {
learner->UpdateOneIter(i, p_m);
}
learner->Predict(p_m, false, out);
delete pp_m;
};
// Alternate the predictor, CPU predictor can not use ellpack while GPU predictor can
// not use CPU histogram index. So it's guaranteed one of the following is not

View File

@ -25,15 +25,13 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, int32_t bins
gbm::GBTreeModel model = CreateTestModel(&param, kClasses);
{
auto pp_ellpack = CreateDMatrix(rows, kCols, 0);
auto p_ellpack = *pp_ellpack;
auto p_ellpack = RandomDataGenerator(rows, kCols, 0).GenerateDMatix();
// Use same number of bins as rows.
for (auto const &page DMLC_ATTRIBUTE_UNUSED :
p_ellpack->GetBatches<Page>({0, static_cast<int32_t>(bins), 0})) {
}
auto pp_precise = CreateDMatrix(rows, kCols, 0);
auto p_precise = *pp_precise;
auto p_precise = RandomDataGenerator(rows, kCols, 0).GenerateDMatix();
PredictionCacheEntry approx_out_predictions;
predictor->PredictBatch(p_ellpack.get(), &approx_out_predictions, model, 0);
@ -45,21 +43,16 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, int32_t bins
CHECK_EQ(approx_out_predictions.predictions.HostVector()[i],
precise_out_predictions.predictions.HostVector()[i]);
}
delete pp_precise;
delete pp_ellpack;
}
{
// Predictor should never try to create the histogram index by itself. As only
// histogram index from training data is valid and predictor doesn't known which
// matrix is used for training.
auto pp_dmat = CreateDMatrix(rows, kCols, 0);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator(rows, kCols, 0).GenerateDMatix();
PredictionCacheEntry precise_out_predictions;
predictor->PredictBatch(p_dmat.get(), &precise_out_predictions, model, 0);
ASSERT_FALSE(p_dmat->PageExists<Page>());
delete pp_dmat;
}
}

44
tests/cpp/test_helpers.cc Normal file
View File

@ -0,0 +1,44 @@
#include <gtest/gtest.h>
#include <algorithm>
#include "helpers.h"
namespace xgboost {
TEST(RandomDataGenerator, DMatrix) {
size_t constexpr kRows { 16 }, kCols { 32 };
float constexpr kSparsity { 0.4f };
auto p_dmatrix = RandomDataGenerator{kRows, kCols, kSparsity}.GenerateDMatix();
HostDeviceVector<float> csr_value;
HostDeviceVector<bst_row_t> csr_rptr;
HostDeviceVector<bst_feature_t> csr_cidx;
RandomDataGenerator{kRows, kCols, kSparsity}.GenerateCSR(&csr_value, &csr_rptr, &csr_cidx);
HostDeviceVector<float> dense_data;
RandomDataGenerator{kRows, kCols, kSparsity}.GenerateDense(&dense_data);
auto it = std::copy_if(
dense_data.HostVector().begin(), dense_data.HostVector().end(),
dense_data.HostVector().begin(), [](float v) { return !std::isnan(v); });
CHECK_EQ(p_dmatrix->Info().num_row_, kRows);
CHECK_EQ(p_dmatrix->Info().num_col_, kCols);
for (auto const& page : p_dmatrix->GetBatches<SparsePage>()) {
size_t n_elements = page.data.Size();
CHECK_EQ(n_elements, it - dense_data.HostVector().begin());
CHECK_EQ(n_elements, csr_value.Size());
for (size_t i = 0; i < n_elements; ++i) {
CHECK_EQ(dense_data.HostVector()[i], csr_value.HostVector()[i]);
CHECK_EQ(dense_data.HostVector()[i], page.data.HostVector()[i].fvalue);
CHECK_EQ(page.data.HostVector()[i].index, csr_cidx.HostVector()[i]);
}
CHECK_EQ(page.offset.Size(), csr_rptr.Size());
for (size_t i = 0; i < p_dmatrix->Info().num_row_; ++i) {
CHECK_EQ(page.offset.HostVector()[i], csr_rptr.HostVector()[i]);
}
}
}
} // namespace xgboost

View File

@ -16,12 +16,10 @@ namespace xgboost {
TEST(Learner, Basic) {
using Arg = std::pair<std::string, std::string>;
auto args = {Arg("tree_method", "exact")};
auto mat_ptr = CreateDMatrix(10, 10, 0);
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
auto mat_ptr = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatix();
auto learner = std::unique_ptr<Learner>(Learner::Create({mat_ptr}));
learner->SetParams(args);
delete mat_ptr;
auto major = XGBOOST_VER_MAJOR;
auto minor = XGBOOST_VER_MINOR;
@ -36,8 +34,7 @@ TEST(Learner, ParameterValidation) {
ConsoleLogger::Configure({{"verbosity", "2"}});
size_t constexpr kRows = 1;
size_t constexpr kCols = 1;
auto pp_mat = CreateDMatrix(kRows, kCols, 0);
auto& p_mat = *pp_mat;
auto p_mat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatix();
auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
learner->SetParam("validate_parameters", "1");
@ -50,17 +47,16 @@ TEST(Learner, ParameterValidation) {
std::string output = testing::internal::GetCapturedStderr();
ASSERT_TRUE(output.find("Parameters: { Knock Knock, Silence }") != std::string::npos);
delete pp_mat;
}
TEST(Learner, CheckGroup) {
using Arg = std::pair<std::string, std::string>;
size_t constexpr kNumGroups = 4;
size_t constexpr kNumRows = 17;
size_t constexpr kNumCols = 15;
bst_feature_t constexpr kNumCols = 15;
auto pp_mat = CreateDMatrix(kNumRows, kNumCols, 0);
auto& p_mat = *pp_mat;
std::shared_ptr<DMatrix> p_mat{
RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatix()};
std::vector<bst_float> weight(kNumGroups);
std::vector<bst_int> group(kNumGroups);
group[0] = 2;
@ -88,8 +84,6 @@ TEST(Learner, CheckGroup) {
group[4] = 1;
p_mat->Info().SetInfo("group", group.data(), DataType::kUInt32, kNumGroups+1);
EXPECT_ANY_THROW(learner->UpdateOneIter(0, p_mat));
delete pp_mat;
}
TEST(Learner, SLOW_CheckMultiBatch) {
@ -142,8 +136,8 @@ TEST(Learner, JsonModelIO) {
size_t constexpr kRows = 8;
int32_t constexpr kIters = 4;
auto pp_dmat = CreateDMatrix(kRows, 10, 0);
std::shared_ptr<DMatrix> p_dmat {*pp_dmat};
std::shared_ptr<DMatrix> p_dmat{
RandomDataGenerator{kRows, 10, 0}.GenerateDMatix()};
p_dmat->Info().labels_.Resize(kRows);
CHECK_NE(p_dmat->Info().num_col_, 0);
@ -180,15 +174,12 @@ TEST(Learner, JsonModelIO) {
ASSERT_EQ(get<Object>(out["learner"]["attributes"]).size(), 1);
ASSERT_EQ(out, new_in);
}
delete pp_dmat;
}
TEST(Learner, BinaryModelIO) {
size_t constexpr kRows = 8;
int32_t constexpr kIters = 4;
auto pp_dmat = CreateDMatrix(kRows, 10, 0);
std::shared_ptr<DMatrix> p_dmat {*pp_dmat};
auto p_dmat = RandomDataGenerator{kRows, 10, 0}.GenerateDMatix();
p_dmat->Info().labels_.Resize(kRows);
std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
@ -215,8 +206,6 @@ TEST(Learner, BinaryModelIO) {
Json::Dump(config, &config_str);
ASSERT_NE(config_str.find("rmsle"), std::string::npos);
ASSERT_EQ(config_str.find("WARNING"), std::string::npos);
delete pp_dmat;
}
#if defined(XGBOOST_USE_CUDA)
@ -224,8 +213,7 @@ TEST(Learner, BinaryModelIO) {
TEST(Learner, GPUConfiguration) {
using Arg = std::pair<std::string, std::string>;
size_t constexpr kRows = 10;
auto pp_dmat = CreateDMatrix(kRows, 10, 0);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatix();
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
std::vector<bst_float> labels(kRows);
for (size_t i = 0; i < labels.size(); ++i) {
@ -270,8 +258,6 @@ TEST(Learner, GPUConfiguration) {
learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
}
delete pp_dmat;
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost

View File

@ -150,19 +150,16 @@ class SerializationTest : public ::testing::Test {
protected:
size_t constexpr static kRows = 10;
size_t constexpr static kCols = 10;
std::shared_ptr<DMatrix>* pp_dmat_;
std::shared_ptr<DMatrix> p_dmat_;
FeatureMap fmap_;
protected:
~SerializationTest() override {
delete pp_dmat_;
}
~SerializationTest() override = default;
void SetUp() override {
pp_dmat_ = CreateDMatrix(kRows, kCols, .5f);
p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatix();
std::shared_ptr<DMatrix> p_dmat{*pp_dmat_};
p_dmat->Info().labels_.Resize(kRows);
auto &h_labels = p_dmat->Info().labels_.HostVector();
p_dmat_->Info().labels_.Resize(kRows);
auto &h_labels = p_dmat_->Info().labels_.HostVector();
xgboost::SimpleLCG gen(0);
SimpleRealUniformDistribution<float> dis(0.0f, 1.0f);
@ -183,7 +180,7 @@ TEST_F(SerializationTest, Exact) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "gbtree"},
{"seed", "0"},
@ -192,7 +189,7 @@ TEST_F(SerializationTest, Exact) {
{"num_parallel_tree", "4"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"seed", "0"},
@ -200,7 +197,7 @@ TEST_F(SerializationTest, Exact) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(SerializationTest, Approx) {
@ -210,7 +207,7 @@ TEST_F(SerializationTest, Approx) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "gbtree"},
{"seed", "0"},
@ -219,7 +216,7 @@ TEST_F(SerializationTest, Approx) {
{"num_parallel_tree", "4"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"seed", "0"},
@ -227,7 +224,7 @@ TEST_F(SerializationTest, Approx) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(SerializationTest, Hist) {
@ -237,7 +234,7 @@ TEST_F(SerializationTest, Hist) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "gbtree"},
{"seed", "0"},
@ -246,7 +243,7 @@ TEST_F(SerializationTest, Hist) {
{"num_parallel_tree", "4"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"seed", "0"},
@ -254,7 +251,7 @@ TEST_F(SerializationTest, Hist) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(SerializationTest, CPU_CoordDescent) {
@ -263,7 +260,7 @@ TEST_F(SerializationTest, CPU_CoordDescent) {
{"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "coord_descent"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
#if defined(XGBOOST_USE_CUDA)
@ -274,7 +271,7 @@ TEST_F(SerializationTest, GPU_Hist) {
{"nthread", "1"},
{"max_depth", "2"},
{"tree_method", "gpu_hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "gbtree"},
{"seed", "0"},
@ -283,7 +280,7 @@ TEST_F(SerializationTest, GPU_Hist) {
{"max_depth", "2"},
{"num_parallel_tree", "4"},
{"tree_method", "gpu_hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"seed", "0"},
@ -291,11 +288,11 @@ TEST_F(SerializationTest, GPU_Hist) {
{"nthread", "1"},
{"max_depth", "2"},
{"tree_method", "gpu_hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(SerializationTest, ConfigurationCount) {
auto& p_dmat = *pp_dmat_;
auto& p_dmat = p_dmat_;
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_dmat};
xgboost::ConsoleLogger::Configure({{"verbosity", "3"}});
@ -347,7 +344,7 @@ TEST_F(SerializationTest, GPU_CoordDescent) {
{"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "gpu_coord_descent"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
#endif // defined(XGBOOST_USE_CUDA)
@ -355,9 +352,9 @@ TEST_F(SerializationTest, GPU_CoordDescent) {
class LogitSerializationTest : public SerializationTest {
protected:
void SetUp() override {
pp_dmat_ = CreateDMatrix(kRows, kCols, .5f);
p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatix();
std::shared_ptr<DMatrix> p_dmat{*pp_dmat_};
std::shared_ptr<DMatrix> p_dmat{p_dmat_};
p_dmat->Info().labels_.Resize(kRows);
auto &h_labels = p_dmat->Info().labels_.HostVector();
@ -382,7 +379,7 @@ TEST_F(LogitSerializationTest, Exact) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"objective", "binary:logistic"},
@ -391,7 +388,7 @@ TEST_F(LogitSerializationTest, Exact) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(LogitSerializationTest, Approx) {
@ -402,7 +399,7 @@ TEST_F(LogitSerializationTest, Approx) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"objective", "binary:logistic"},
@ -411,7 +408,7 @@ TEST_F(LogitSerializationTest, Approx) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(LogitSerializationTest, Hist) {
@ -422,7 +419,7 @@ TEST_F(LogitSerializationTest, Hist) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"objective", "binary:logistic"},
@ -431,7 +428,7 @@ TEST_F(LogitSerializationTest, Hist) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(LogitSerializationTest, CPU_CoordDescent) {
@ -440,7 +437,7 @@ TEST_F(LogitSerializationTest, CPU_CoordDescent) {
{"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "coord_descent"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
#if defined(XGBOOST_USE_CUDA)
@ -452,7 +449,7 @@ TEST_F(LogitSerializationTest, GPU_Hist) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "gbtree"},
{"objective", "binary:logistic"},
@ -462,7 +459,7 @@ TEST_F(LogitSerializationTest, GPU_Hist) {
{"max_depth", "2"},
{"num_parallel_tree", "4"},
{"tree_method", "gpu_hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"objective", "binary:logistic"},
@ -471,7 +468,7 @@ TEST_F(LogitSerializationTest, GPU_Hist) {
{"max_depth", "2"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(LogitSerializationTest, GPU_CoordDescent) {
@ -481,7 +478,7 @@ TEST_F(LogitSerializationTest, GPU_CoordDescent) {
{"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "gpu_coord_descent"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
#endif // defined(XGBOOST_USE_CUDA)
@ -490,9 +487,9 @@ class MultiClassesSerializationTest : public SerializationTest {
size_t constexpr static kClasses = 4;
void SetUp() override {
pp_dmat_ = CreateDMatrix(kRows, kCols, .5f);
p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatix();
std::shared_ptr<DMatrix> p_dmat{*pp_dmat_};
std::shared_ptr<DMatrix> p_dmat{p_dmat_};
p_dmat->Info().labels_.Resize(kRows);
auto &h_labels = p_dmat->Info().labels_.HostVector();
@ -517,7 +514,7 @@ TEST_F(MultiClassesSerializationTest, Exact) {
{"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "gbtree"},
{"num_class", std::to_string(kClasses)},
@ -527,7 +524,7 @@ TEST_F(MultiClassesSerializationTest, Exact) {
{"num_parallel_tree", "4"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"num_class", std::to_string(kClasses)},
@ -536,7 +533,7 @@ TEST_F(MultiClassesSerializationTest, Exact) {
{"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "exact"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(MultiClassesSerializationTest, Approx) {
@ -547,7 +544,7 @@ TEST_F(MultiClassesSerializationTest, Approx) {
{"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"num_class", std::to_string(kClasses)},
@ -556,7 +553,7 @@ TEST_F(MultiClassesSerializationTest, Approx) {
{"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "approx"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(MultiClassesSerializationTest, Hist) {
@ -567,7 +564,7 @@ TEST_F(MultiClassesSerializationTest, Hist) {
{"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "gbtree"},
{"num_class", std::to_string(kClasses)},
@ -577,7 +574,7 @@ TEST_F(MultiClassesSerializationTest, Hist) {
{"enable_experimental_json_serialization", "1"},
{"num_parallel_tree", "4"},
{"tree_method", "hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"num_class", std::to_string(kClasses)},
@ -586,7 +583,7 @@ TEST_F(MultiClassesSerializationTest, Hist) {
{"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(MultiClassesSerializationTest, CPU_CoordDescent) {
@ -595,7 +592,7 @@ TEST_F(MultiClassesSerializationTest, CPU_CoordDescent) {
{"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "coord_descent"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
#if defined(XGBOOST_USE_CUDA)
@ -611,7 +608,7 @@ TEST_F(MultiClassesSerializationTest, GPU_Hist) {
{"predictor", "gpu_predictor"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "gbtree"},
{"num_class", std::to_string(kClasses)},
@ -623,7 +620,7 @@ TEST_F(MultiClassesSerializationTest, GPU_Hist) {
{"num_parallel_tree", "3"},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
TestLearnerSerialization({{"booster", "dart"},
{"num_class", std::to_string(kClasses)},
@ -632,7 +629,7 @@ TEST_F(MultiClassesSerializationTest, GPU_Hist) {
{"max_depth", std::to_string(kClasses)},
{"enable_experimental_json_serialization", "1"},
{"tree_method", "gpu_hist"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
TEST_F(MultiClassesSerializationTest, GPU_CoordDescent) {
@ -642,7 +639,7 @@ TEST_F(MultiClassesSerializationTest, GPU_CoordDescent) {
{"nthread", "1"},
{"enable_experimental_json_serialization", "1"},
{"updater", "gpu_coord_descent"}},
fmap_, *pp_dmat_);
fmap_, p_dmat_);
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost

View File

@ -11,11 +11,10 @@ void TestDeterminsticHistogram() {
size_t constexpr kBins = 24, kCols = 8, kRows = 32768, kRounds = 16;
float constexpr kLower = -1e-2, kUpper = 1e2;
auto pp_m = CreateDMatrix(kRows, kCols, 0.5);
auto& matrix = **pp_m;
auto matrix = RandomDataGenerator(kRows, kCols, 0.5).GenerateDMatix();
BatchParam batch_param{0, static_cast<int32_t>(kBins), 0};
for (auto const& batch : matrix.GetBatches<EllpackPage>(batch_param)) {
for (auto const& batch : matrix->GetBatches<EllpackPage>(batch_param)) {
auto* page = batch.Impl();
tree::RowPartitioner row_partitioner(0, kRows);
@ -58,7 +57,6 @@ void TestDeterminsticHistogram() {
}
}
}
delete pp_m;
}
TEST(Histogram, GPUDeterminstic) {

View File

@ -313,22 +313,21 @@ TEST(GpuHist, MinSplitLoss) {
constexpr size_t kRows = 32;
constexpr size_t kCols = 16;
constexpr float kSparsity = 0.6;
auto dmat = CreateDMatrix(kRows, kCols, kSparsity, 3);
auto dmat = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatix();
auto gpair = GenerateRandomGradients(kRows);
{
int32_t n_nodes = TestMinSplitLoss((*dmat).get(), 0.01, &gpair);
int32_t n_nodes = TestMinSplitLoss(dmat.get(), 0.01, &gpair);
// This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
// when writing this test, and only used for testing larger gamma (below) does prevent
// building tree.
ASSERT_EQ(n_nodes, 2);
}
{
int32_t n_nodes = TestMinSplitLoss((*dmat).get(), 100.0, &gpair);
int32_t n_nodes = TestMinSplitLoss(dmat.get(), 100.0, &gpair);
// No new nodes with gamma == 100.
ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
}
delete dmat;
}
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,

View File

@ -15,8 +15,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
GenericParameter param;
param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto pp_dmat = CreateDMatrix(kRows, kCols, 0.6, 3);
auto p_dmat = *pp_dmat;
auto p_dmat = RandomDataGenerator{kRows, kCols, 0.6f}.Seed(3).GenerateDMatix();
HostDeviceVector<GradientPair> gradients (kRows);
std::vector<GradientPair>& h_gradients = gradients.HostVector();
@ -62,7 +61,6 @@ TEST(GrowHistMaker, InteractionConstraint) {
ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
}
delete pp_dmat;
}
} // namespace tree

View File

@ -28,7 +28,8 @@ TEST(Updater, Prune) {
HostDeviceVector<GradientPair> gpair =
{ {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f},
{0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f} };
auto dmat = CreateDMatrix(32, kCols, 0.4, 3);
std::shared_ptr<DMatrix> p_dmat {
RandomDataGenerator{32, 10, 0}.GenerateDMatix() };
auto lparam = CreateEmptyGenericParam(GPUIDX);
@ -42,19 +43,19 @@ TEST(Updater, Prune) {
// loss_chg < min_split_loss;
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f);
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 0);
// loss_chg > min_split_loss;
tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 11.0f, 0.0f);
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
// loss_chg == min_split_loss;
tree.Stat(0).loss_chg = 10;
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
@ -68,7 +69,7 @@ TEST(Updater, Prune) {
/*loss_chg=*/19.0f, 0.0f);
cfg.emplace_back(std::make_pair("max_depth", "1"));
pruner->Configure(cfg);
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
@ -77,10 +78,8 @@ TEST(Updater, Prune) {
/*loss_chg=*/18.0f, 0.0f);
cfg.emplace_back(std::make_pair("min_split_loss", "0"));
pruner->Configure(cfg);
pruner->Update(&gpair, dmat->get(), trees);
pruner->Update(&gpair, p_dmat.get(), trees);
ASSERT_EQ(tree.NumExtraNodes(), 2);
delete dmat;
}
} // namespace tree
} // namespace xgboost

View File

@ -139,23 +139,23 @@ class QuantileHistMock : public QuantileHistMaker {
{ {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
{0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f} };
size_t constexpr kMaxBins = 4;
auto dmat = CreateDMatrix(kNRows, kNCols, 0, 3);
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatix();
// dense, no missing values
common::GHistIndexMatrix gmat;
gmat.Init((*dmat).get(), kMaxBins);
gmat.Init(dmat.get(), kMaxBins);
RealImpl::InitData(gmat, row_gpairs, *(*dmat), tree);
RealImpl::InitData(gmat, row_gpairs, *dmat, tree);
hist_.AddHistRow(0);
BuildHist(row_gpairs, row_set_collection_[0],
gmat, quantile_index_block, hist_[0]);
RealImpl::InitNewNode(0, gmat, row_gpairs, *(*dmat), tree);
RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree);
/* Compute correct split (best_split) using the computed histogram */
const size_t num_row = dmat->get()->Info().num_row_;
const size_t num_feature = dmat->get()->Info().num_col_;
const size_t num_row = dmat->Info().num_row_;
const size_t num_feature = dmat->Info().num_col_;
CHECK_EQ(num_row, row_gpairs.size());
// Compute total gradient for all data points
GradientPairPrecise total_gpair;
@ -216,8 +216,6 @@ class QuantileHistMock : public QuantileHistMaker {
RealImpl::EvaluateSplits({node}, gmat, hist_, tree);
ASSERT_EQ(snode_[0].best.SplitIndex(), best_split_feature);
ASSERT_EQ(snode_[0].best.split_value, gmat.cut.Values()[best_split_threshold]);
delete dmat;
}
void TestEvaluateSplitParallel(const GHistIndexBlockMatrix &quantile_index_block,
@ -230,7 +228,7 @@ class QuantileHistMock : public QuantileHistMaker {
};
int static constexpr kNRows = 8, kNCols = 16;
std::shared_ptr<xgboost::DMatrix> *dmat_;
std::shared_ptr<xgboost::DMatrix> dmat_;
const std::vector<std::pair<std::string, std::string> > cfg_;
std::shared_ptr<BuilderMock> builder_;
@ -240,23 +238,23 @@ class QuantileHistMock : public QuantileHistMaker {
cfg_{args} {
QuantileHistMaker::Configure(args);
spliteval_->Init(&param_);
dmat_ = CreateDMatrix(kNRows, kNCols, 0.8, 3);
dmat_ = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatix();
builder_.reset(
new BuilderMock(
param_,
std::move(pruner_),
std::unique_ptr<SplitEvaluator>(spliteval_->GetHostClone()),
int_constraint_,
dmat_->get()));
dmat_.get()));
}
~QuantileHistMock() override { delete dmat_; }
~QuantileHistMock() override = default;
static size_t GetNumColumns() { return kNCols; }
void TestInitData() {
size_t constexpr kMaxBins = 4;
common::GHistIndexMatrix gmat;
gmat.Init((*dmat_).get(), kMaxBins);
gmat.Init(dmat_.get(), kMaxBins);
RegTree tree = RegTree();
tree.param.UpdateAllowUnknown(cfg_);
@ -265,7 +263,7 @@ class QuantileHistMock : public QuantileHistMaker {
{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
{0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} };
builder_->TestInitData(gmat, gpair, dmat_->get(), tree);
builder_->TestInitData(gmat, gpair, dmat_.get(), tree);
}
void TestBuildHist() {
@ -274,9 +272,9 @@ class QuantileHistMock : public QuantileHistMaker {
size_t constexpr kMaxBins = 4;
common::GHistIndexMatrix gmat;
gmat.Init((*dmat_).get(), kMaxBins);
gmat.Init(dmat_.get(), kMaxBins);
builder_->TestBuildHist(0, gmat, *(*dmat_).get(), tree);
builder_->TestBuildHist(0, gmat, *dmat_, tree);
}
void TestEvaluateSplit() {

View File

@ -15,16 +15,18 @@ namespace xgboost {
namespace tree {
TEST(Updater, Refresh) {
int constexpr kNRows = 8, kNCols = 16;
bst_row_t constexpr kRows = 8;
bst_feature_t constexpr kCols = 16;
HostDeviceVector<GradientPair> gpair =
{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
{0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} };
auto dmat = CreateDMatrix(kNRows, kNCols, 0.4, 3);
std::vector<std::pair<std::string, std::string>> cfg {
{"reg_alpha", "0.0"},
{"num_feature", std::to_string(kNCols)},
{"reg_lambda", "1"}};
std::shared_ptr<DMatrix> p_dmat{
RandomDataGenerator{kRows, kCols, 0.4f}.Seed(3).GenerateDMatix()};
std::vector<std::pair<std::string, std::string>> cfg{
{"reg_alpha", "0.0"},
{"num_feature", std::to_string(kCols)},
{"reg_lambda", "1"}};
RegTree tree = RegTree();
auto lparam = CreateEmptyGenericParam(GPUIDX);
@ -40,7 +42,7 @@ TEST(Updater, Refresh) {
tree.Stat(cright).base_weight = 1.3;
refresher->Configure(cfg);
refresher->Update(&gpair, dmat->get(), trees);
refresher->Update(&gpair, p_dmat.get(), trees);
bst_float constexpr kEps = 1e-6;
ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps);
@ -48,8 +50,6 @@ TEST(Updater, Refresh) {
ASSERT_NEAR(0, tree.Stat(cleft).loss_chg, kEps);
ASSERT_NEAR(0, tree.Stat(1).loss_chg, kEps);
ASSERT_NEAR(0, tree.Stat(2).loss_chg, kEps);
delete dmat;
}
} // namespace tree