Use Booster context in DMatrix. (#8896)
- Pass context from booster to DMatrix. - Use context instead of integer for `n_threads`. - Check the consistency configuration for `max_bin`. - Test for all combinations of initialization options.
This commit is contained in:
@@ -156,6 +156,7 @@ TEST(CutsBuilder, SearchGroupInd) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DenseCutsCategorical) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
int categorical_sizes[] = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
int sizes[] = {25, 100, 1000};
|
||||
@@ -165,7 +166,7 @@ TEST(HistUtil, DenseCutsCategorical) {
|
||||
std::vector<float> x_sorted(x);
|
||||
std::sort(x_sorted.begin(), x_sorted.end());
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
|
||||
auto cuts_from_sketch = cuts.Values();
|
||||
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
|
||||
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());
|
||||
@@ -176,6 +177,7 @@ TEST(HistUtil, DenseCutsCategorical) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, DenseCutsAccuracyTest) {
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100};
|
||||
int num_columns = 5;
|
||||
@@ -183,7 +185,7 @@ TEST(HistUtil, DenseCutsAccuracyTest) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -193,6 +195,7 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
@@ -200,11 +203,11 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) {
|
||||
dmat->Info().weights_.HostVector() = w;
|
||||
for (auto num_bins : bin_sizes) {
|
||||
{
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), true);
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, true);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
{
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), false);
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, false);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -215,6 +218,7 @@ void TestQuantileWithHessian(bool use_sorted) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {1000, 1500};
|
||||
int num_columns = 5;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
@@ -225,15 +229,13 @@ void TestQuantileWithHessian(bool use_sorted) {
|
||||
dmat->Info().weights_.HostVector() = w;
|
||||
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts_hess =
|
||||
SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted, hessian);
|
||||
HistogramCuts cuts_hess = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted, hessian);
|
||||
for (size_t i = 0; i < w.size(); ++i) {
|
||||
dmat->Info().weights_.HostVector()[i] = w[i] * hessian[i];
|
||||
}
|
||||
ValidateCuts(cuts_hess, dmat.get(), num_bins);
|
||||
|
||||
HistogramCuts cuts_wh =
|
||||
SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted);
|
||||
HistogramCuts cuts_wh = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted);
|
||||
ValidateCuts(cuts_wh, dmat.get(), num_bins);
|
||||
|
||||
ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size());
|
||||
@@ -255,12 +257,13 @@ TEST(HistUtil, DenseCutsExternalMemory) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
|
||||
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
|
||||
ValidateCuts(cuts, dmat.get(), num_bins);
|
||||
}
|
||||
}
|
||||
@@ -275,12 +278,12 @@ TEST(HistUtil, IndexBinBound) {
|
||||
kUint32BinsTypeSize};
|
||||
size_t constexpr kRows = 100;
|
||||
size_t constexpr kCols = 10;
|
||||
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
size_t bin_id = 0;
|
||||
for (auto max_bin : bin_sizes) {
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
|
||||
GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest());
|
||||
GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);
|
||||
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
|
||||
EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize());
|
||||
}
|
||||
@@ -300,10 +303,11 @@ TEST(HistUtil, IndexBinData) {
|
||||
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2 };
|
||||
size_t constexpr kRows = 100;
|
||||
size_t constexpr kCols = 10;
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
|
||||
for (auto max_bin : kBinSizes) {
|
||||
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest());
|
||||
GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);
|
||||
uint32_t const* offsets = hmat.index.Offset();
|
||||
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
|
||||
switch (max_bin) {
|
||||
@@ -327,10 +331,10 @@ void TestSketchFromWeights(bool with_group) {
|
||||
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
|
||||
common::HistogramCuts cuts = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);
|
||||
|
||||
MetaInfo info;
|
||||
Context ctx;
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
if (with_group) {
|
||||
h_weights.resize(kGroups);
|
||||
@@ -363,7 +367,7 @@ void TestSketchFromWeights(bool with_group) {
|
||||
|
||||
if (with_group) {
|
||||
m->Info().weights_ = decltype(m->Info().weights_)(); // remove weight
|
||||
HistogramCuts non_weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
|
||||
HistogramCuts non_weighted = SketchOnDMatrix(&ctx, m.get(), kBins);
|
||||
for (size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
}
|
||||
@@ -382,7 +386,7 @@ void TestSketchFromWeights(bool with_group) {
|
||||
for (size_t i = 0; i < h_weights.size(); ++i) {
|
||||
h_weights[i] = static_cast<float>(i + 1) / static_cast<float>(kGroups);
|
||||
}
|
||||
HistogramCuts weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
|
||||
HistogramCuts weighted = SketchOnDMatrix(&ctx, m.get(), kBins);
|
||||
ValidateCuts(weighted, m.get(), kBins);
|
||||
}
|
||||
}
|
||||
@@ -393,11 +397,12 @@ TEST(HistUtil, SketchFromWeights) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, SketchCategoricalFeatures) {
|
||||
TestCategoricalSketch(1000, 256, 32, false, [](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest());
|
||||
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
|
||||
TestCategoricalSketch(1000, 256, 32, false, [&ctx](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return SketchOnDMatrix(&ctx, p_fmat, num_bins);
|
||||
});
|
||||
TestCategoricalSketch(1000, 256, 32, true, [](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest());
|
||||
TestCategoricalSketch(1000, 256, 32, true, [&ctx](DMatrix* p_fmat, int32_t num_bins) {
|
||||
return SketchOnDMatrix(&ctx, p_fmat, num_bins);
|
||||
});
|
||||
}
|
||||
} // namespace common
|
||||
|
||||
Reference in New Issue
Block a user