/*! * Copyright 2018-2021 by Contributors */ #include #include #include #include #include #include #include "../helpers.h" #include "../../../src/tree/param.h" #include "../../../src/tree/updater_quantile_hist.h" #include "../../../src/tree/split_evaluator.h" #include "xgboost/data.h" namespace xgboost { namespace tree { class QuantileHistMock : public QuantileHistMaker { static double constexpr kEps = 1e-6; template struct BuilderMock : public QuantileHistMaker::Builder { using RealImpl = QuantileHistMaker::Builder; using ExpandEntryT = typename RealImpl::ExpandEntry; using GHistRowT = typename RealImpl::GHistRowT; BuilderMock(const TrainParam& param, std::unique_ptr pruner, FeatureInteractionConstraintHost int_constraint, DMatrix const* fmat) : RealImpl(1, param, std::move(pruner), std::move(int_constraint), fmat) {} public: void TestInitData(const GHistIndexMatrix& gmat, std::vector* gpair, DMatrix* p_fmat, const RegTree& tree) { RealImpl::InitData(gmat, *p_fmat, tree, gpair); ASSERT_EQ(this->data_layout_, RealImpl::DataLayout::kSparseData); /* The creation of HistCutMatrix and GHistIndexMatrix are not technically * part of QuantileHist updater logic, but we include it here because * QuantileHist updater object currently stores GHistIndexMatrix * internally. According to https://github.com/dmlc/xgboost/pull/3803, * we should eventually move GHistIndexMatrix out of the QuantileHist * updater. */ const size_t num_row = p_fmat->Info().num_row_; const size_t num_col = p_fmat->Info().num_col_; /* Validate HistCutMatrix */ ASSERT_EQ(gmat.cut.Ptrs().size(), num_col + 1); for (size_t fid = 0; fid < num_col; ++fid) { const size_t ibegin = gmat.cut.Ptrs()[fid]; const size_t iend = gmat.cut.Ptrs()[fid + 1]; // Ordered, but empty feature is allowed. ASSERT_LE(ibegin, iend); for (size_t i = ibegin; i < iend - 1; ++i) { // Quantile points must be sorted in ascending order // No duplicates allowed ASSERT_LT(gmat.cut.Values()[i], gmat.cut.Values()[i + 1]) << "ibegin: " << ibegin << ", " << "iend: " << iend; } } /* Validate GHistIndexMatrix */ ASSERT_EQ(gmat.row_ptr.size(), num_row + 1); ASSERT_LT(*std::max_element(gmat.index.begin(), gmat.index.end()), gmat.cut.Ptrs().back()); for (const auto& batch : p_fmat->GetBatches()) { auto page = batch.GetView(); for (size_t i = 0; i < batch.Size(); ++i) { const size_t rid = batch.base_rowid + i; ASSERT_LT(rid, num_row); const size_t gmat_row_offset = gmat.row_ptr[rid]; ASSERT_LT(gmat_row_offset, gmat.index.Size()); SparsePage::Inst inst = page[i]; ASSERT_EQ(gmat.row_ptr[rid] + inst.size(), gmat.row_ptr[rid + 1]); for (size_t j = 0; j < inst.size(); ++j) { // Each entry of GHistIndexMatrix represents a bin ID const size_t bin_id = gmat.index[gmat_row_offset + j]; const size_t fid = inst[j].index; // The bin ID must correspond to correct feature ASSERT_GE(bin_id, gmat.cut.Ptrs()[fid]); ASSERT_LT(bin_id, gmat.cut.Ptrs()[fid + 1]); // The bin ID must correspond to a region between two // suitable quantile points ASSERT_LT(inst[j].fvalue, gmat.cut.Values()[bin_id]); if (bin_id > gmat.cut.Ptrs()[fid]) { ASSERT_GE(inst[j].fvalue, gmat.cut.Values()[bin_id - 1]); } else { ASSERT_GE(inst[j].fvalue, gmat.cut.MinValues()[fid]); } } } } } void TestInitDataSampling(const GHistIndexMatrix& gmat, std::vector* gpair, DMatrix* p_fmat, const RegTree& tree) { // check SimpleSkip size_t initial_seed = 777; std::linear_congruential_engine(1) << 63 > eng_first(initial_seed); for (size_t i = 0; i < 100; ++i) { eng_first(); } uint64_t initial_seed_th = RandomReplace::SimpleSkip(100, initial_seed, 16807, RandomReplace::kMod); std::linear_congruential_engine eng_second(initial_seed_th); ASSERT_EQ(eng_first(), eng_second()); const size_t nthreads = omp_get_num_threads(); // save state of global rng engine auto initial_rnd = common::GlobalRandom(); std::vector unused_rows_cpy = this->unused_rows_; RealImpl::InitData(gmat, *p_fmat, tree, gpair); std::vector row_indices_initial = *(this->row_set_collection_.Data()); std::vector unused_row_indices_initial = this->unused_rows_; ASSERT_EQ(row_indices_initial.size(), p_fmat->Info().num_row_); auto check_each_row_occurs_in_one_of_arrays = [](const std::vector& first, const std::vector& second, size_t nrows) { ASSERT_EQ(first.size(), nrows); ASSERT_EQ(second.size(), 0); }; check_each_row_occurs_in_one_of_arrays(row_indices_initial, unused_row_indices_initial, p_fmat->Info().num_row_); for (size_t i_nthreads = 1; i_nthreads < 4; ++i_nthreads) { omp_set_num_threads(i_nthreads); // return initial state of global rng engine common::GlobalRandom() = initial_rnd; this->unused_rows_ = unused_rows_cpy; RealImpl::InitData(gmat, *p_fmat, tree, gpair); std::vector& row_indices = *(this->row_set_collection_.Data()); ASSERT_EQ(row_indices_initial.size(), row_indices.size()); for (size_t i = 0; i < row_indices_initial.size(); ++i) { ASSERT_EQ(row_indices_initial[i], row_indices[i]); } std::vector& unused_row_indices = this->unused_rows_; ASSERT_EQ(unused_row_indices_initial.size(), unused_row_indices.size()); for (size_t i = 0; i < unused_row_indices_initial.size(); ++i) { ASSERT_EQ(unused_row_indices_initial[i], unused_row_indices[i]); } check_each_row_occurs_in_one_of_arrays(row_indices, unused_row_indices, p_fmat->Info().num_row_); } omp_set_num_threads(nthreads); } void TestAddHistRows(const GHistIndexMatrix& gmat, std::vector* gpair, DMatrix* p_fmat, RegTree* tree) { RealImpl::InitData(gmat, *p_fmat, *tree, gpair); int starting_index = std::numeric_limits::max(); int sync_count = 0; this->nodes_for_explicit_hist_build_.clear(); this->nodes_for_subtraction_trick_.clear(); tree->ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); tree->ExpandNode((*tree)[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); tree->ExpandNode((*tree)[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); this->nodes_for_explicit_hist_build_.emplace_back(3, 4, tree->GetDepth(3), 0.0f, 0); this->nodes_for_explicit_hist_build_.emplace_back(4, 3, tree->GetDepth(4), 0.0f, 0); this->nodes_for_subtraction_trick_.emplace_back(5, 6, tree->GetDepth(5), 0.0f, 0); this->nodes_for_subtraction_trick_.emplace_back(6, 5, tree->GetDepth(6), 0.0f, 0); this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); ASSERT_EQ(sync_count, 2); ASSERT_EQ(starting_index, 3); for (const ExpandEntryT& node : this->nodes_for_explicit_hist_build_) { ASSERT_EQ(this->hist_.RowExists(node.nid), true); } for (const ExpandEntryT& node : this->nodes_for_subtraction_trick_) { ASSERT_EQ(this->hist_.RowExists(node.nid), true); } } void TestSyncHistograms(const GHistIndexMatrix& gmat, std::vector* gpair, DMatrix* p_fmat, RegTree* tree) { // init RealImpl::InitData(gmat, *p_fmat, *tree, gpair); int starting_index = std::numeric_limits::max(); int sync_count = 0; this->nodes_for_explicit_hist_build_.clear(); this->nodes_for_subtraction_trick_.clear(); // level 0 this->nodes_for_explicit_hist_build_.emplace_back(0, -1, tree->GetDepth(0), 0.0f, 0); this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); tree->ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0); this->nodes_for_explicit_hist_build_.clear(); this->nodes_for_subtraction_trick_.clear(); // level 1 this->nodes_for_explicit_hist_build_.emplace_back((*tree)[0].LeftChild(), (*tree)[0].RightChild(), tree->GetDepth(1), 0.0f, 0); this->nodes_for_subtraction_trick_.emplace_back((*tree)[0].RightChild(), (*tree)[0].LeftChild(), tree->GetDepth(2), 0.0f, 0); this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); tree->ExpandNode((*tree)[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); tree->ExpandNode((*tree)[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0); this->nodes_for_explicit_hist_build_.clear(); this->nodes_for_subtraction_trick_.clear(); // level 2 this->nodes_for_explicit_hist_build_.emplace_back(3, 4, tree->GetDepth(3), 0.0f, 0); this->nodes_for_subtraction_trick_.emplace_back(4, 3, tree->GetDepth(4), 0.0f, 0); this->nodes_for_explicit_hist_build_.emplace_back(5, 6, tree->GetDepth(5), 0.0f, 0); this->nodes_for_subtraction_trick_.emplace_back(6, 5, tree->GetDepth(6), 0.0f, 0); this->hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, tree); const size_t n_nodes = this->nodes_for_explicit_hist_build_.size(); ASSERT_EQ(n_nodes, 2ul); this->row_set_collection_.AddSplit(0, (*tree)[0].LeftChild(), (*tree)[0].RightChild(), 4, 4); this->row_set_collection_.AddSplit(1, (*tree)[1].LeftChild(), (*tree)[1].RightChild(), 2, 2); this->row_set_collection_.AddSplit(2, (*tree)[2].LeftChild(), (*tree)[2].RightChild(), 2, 2); common::BlockedSpace2d space(n_nodes, [&](size_t node) { const int32_t nid = this->nodes_for_explicit_hist_build_[node].nid; return this->row_set_collection_[nid].Size(); }, 256); std::vector target_hists(n_nodes); for (size_t i = 0; i < this->nodes_for_explicit_hist_build_.size(); ++i) { const int32_t nid = this->nodes_for_explicit_hist_build_[i].nid; target_hists[i] = this->hist_[nid]; } const size_t nbins = this->hist_builder_.GetNumBins(); // set values to specific nodes hist std::vector n_ids = {1, 2}; for (size_t i : n_ids) { auto this_hist = this->hist_[i]; GradientSumT* p_hist = reinterpret_cast(this_hist.data()); for (size_t bin_id = 0; bin_id < 2*nbins; ++bin_id) { p_hist[bin_id] = 2*bin_id; } } n_ids[0] = 3; n_ids[1] = 5; for (size_t i : n_ids) { auto this_hist = this->hist_[i]; GradientSumT* p_hist = reinterpret_cast(this_hist.data()); for (size_t bin_id = 0; bin_id < 2*nbins; ++bin_id) { p_hist[bin_id] = bin_id; } } this->hist_buffer_.Reset(1, n_nodes, space, target_hists); // sync hist this->hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, tree); auto check_hist = [] (const GHistRowT parent, const GHistRowT left, const GHistRowT right, size_t begin, size_t end) { const GradientSumT* p_parent = reinterpret_cast(parent.data()); const GradientSumT* p_left = reinterpret_cast(left.data()); const GradientSumT* p_right = reinterpret_cast(right.data()); for (size_t i = 2 * begin; i < 2 * end; ++i) { ASSERT_EQ(p_parent[i], p_left[i] + p_right[i]); } }; for (const ExpandEntryT& node : this->nodes_for_explicit_hist_build_) { auto this_hist = this->hist_[node.nid]; const size_t parent_id = (*tree)[node.nid].Parent(); auto parent_hist = this->hist_[parent_id]; auto sibling_hist = this->hist_[node.sibling_nid]; check_hist(parent_hist, this_hist, sibling_hist, 0, nbins); } for (const ExpandEntryT& node : this->nodes_for_subtraction_trick_) { auto this_hist = this->hist_[node.nid]; const size_t parent_id = (*tree)[node.nid].Parent(); auto parent_hist = this->hist_[parent_id]; auto sibling_hist = this->hist_[node.sibling_nid]; check_hist(parent_hist, this_hist, sibling_hist, 0, nbins); } } void TestBuildHist(int nid, const GHistIndexMatrix& gmat, const DMatrix& fmat, const RegTree& tree) { std::vector gpair = { {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f}, {0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} }; RealImpl::InitData(gmat, fmat, tree, &gpair); GHistIndexBlockMatrix dummy; this->hist_.AddHistRow(nid); this->hist_.AllocateAllData(); this->BuildHist(gpair, this->row_set_collection_[nid], gmat, dummy, this->hist_[nid]); // Check if number of histogram bins is correct ASSERT_EQ(this->hist_[nid].size(), gmat.cut.Ptrs().back()); std::vector histogram_expected(this->hist_[nid].size()); // Compute the correct histogram (histogram_expected) const size_t num_row = fmat.Info().num_row_; CHECK_EQ(gpair.size(), num_row); for (size_t rid = 0; rid < num_row; ++rid) { const size_t ibegin = gmat.row_ptr[rid]; const size_t iend = gmat.row_ptr[rid + 1]; for (size_t i = ibegin; i < iend; ++i) { const size_t bin_id = gmat.index[i]; histogram_expected[bin_id] += GradientPairPrecise(gpair[rid]); } } // Now validate the computed histogram returned by BuildHist for (size_t i = 0; i < this->hist_[nid].size(); ++i) { GradientPairPrecise sol = histogram_expected[i]; ASSERT_NEAR(sol.GetGrad(), this->hist_[nid][i].GetGrad(), kEps); ASSERT_NEAR(sol.GetHess(), this->hist_[nid][i].GetHess(), kEps); } } void TestEvaluateSplit(const GHistIndexBlockMatrix& quantile_index_block, const RegTree& tree) { std::vector row_gpairs = { {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f}, {0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f} }; size_t constexpr kMaxBins = 4; auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix(); // dense, no missing values common::GHistIndexMatrix gmat; gmat.Init(dmat.get(), kMaxBins); RealImpl::InitData(gmat, *dmat, tree, &row_gpairs); this->hist_.AddHistRow(0); this->hist_.AllocateAllData(); this->BuildHist(row_gpairs, this->row_set_collection_[0], gmat, quantile_index_block, this->hist_[0]); RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree); /* Compute correct split (best_split) using the computed histogram */ const size_t num_row = dmat->Info().num_row_; const size_t num_feature = dmat->Info().num_col_; CHECK_EQ(num_row, row_gpairs.size()); // Compute total gradient for all data points GradientPairPrecise total_gpair; for (const auto& e : row_gpairs) { total_gpair += GradientPairPrecise(e); } // Now enumerate all feature*threshold combination to get best split // To simplify logic, we make some assumptions: // 1) no missing values in data // 2) no regularization, i.e. set min_child_weight, reg_lambda, reg_alpha, // and max_delta_step to 0. bst_float best_split_gain = 0.0f; size_t best_split_threshold = std::numeric_limits::max(); size_t best_split_feature = std::numeric_limits::max(); // Enumerate all features for (size_t fid = 0; fid < num_feature; ++fid) { const size_t bin_id_min = gmat.cut.Ptrs()[fid]; const size_t bin_id_max = gmat.cut.Ptrs()[fid + 1]; // Enumerate all bin ID in [bin_id_min, bin_id_max), i.e. every possible // choice of thresholds for feature fid for (size_t split_thresh = bin_id_min; split_thresh < bin_id_max; ++split_thresh) { // left_sum, right_sum: Gradient sums for data points whose feature // value is left/right side of the split threshold GradientPairPrecise left_sum, right_sum; for (size_t rid = 0; rid < num_row; ++rid) { for (size_t offset = gmat.row_ptr[rid]; offset < gmat.row_ptr[rid + 1]; ++offset) { const size_t bin_id = gmat.index[offset]; if (bin_id >= bin_id_min && bin_id < bin_id_max) { if (bin_id <= split_thresh) { left_sum += GradientPairPrecise(row_gpairs[rid]); } else { right_sum += GradientPairPrecise(row_gpairs[rid]); } } } } // Now compute gain (change in loss) auto evaluator = this->tree_evaluator_.GetEvaluator(); const auto split_gain = evaluator.CalcSplitGain( this->param_, 0, fid, GradStats(left_sum), GradStats(right_sum)); if (split_gain > best_split_gain) { best_split_gain = split_gain; best_split_feature = fid; best_split_threshold = split_thresh; } } } /* Now compare against result given by EvaluateSplit() */ typename RealImpl::ExpandEntry node(RealImpl::ExpandEntry::kRootNid, RealImpl::ExpandEntry::kEmptyNid, tree.GetDepth(0), this->snode_[0].best.loss_chg, 0); RealImpl::EvaluateSplits({node}, gmat, this->hist_, tree); ASSERT_EQ(this->snode_[0].best.SplitIndex(), best_split_feature); ASSERT_EQ(this->snode_[0].best.split_value, gmat.cut.Values()[best_split_threshold]); } void TestEvaluateSplitParallel(const GHistIndexBlockMatrix &quantile_index_block, const RegTree &tree) { omp_set_num_threads(2); TestEvaluateSplit(quantile_index_block, tree); omp_set_num_threads(1); } void TestApplySplit(const GHistIndexBlockMatrix& quantile_index_block, const RegTree& tree) { std::vector row_gpairs = { {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f}, {0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f} }; size_t constexpr kMaxBins = 4; // try out different sparsity to get different number of missing values for (double sparsity : {0.0, 0.1, 0.2}) { // kNRows samples with kNCols features auto dmat = RandomDataGenerator(kNRows, kNCols, sparsity).Seed(3).GenerateDMatrix(); common::GHistIndexMatrix gmat; gmat.Init(dmat.get(), kMaxBins); ColumnMatrix cm; // treat everything as dense, as this is what we intend to test here cm.Init(gmat, 0.0); RealImpl::InitData(gmat, *dmat, tree, &row_gpairs); this->hist_.AddHistRow(0); this->hist_.AllocateAllData(); RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree); const size_t num_row = dmat->Info().num_row_; // split by feature 0 const size_t bin_id_min = gmat.cut.Ptrs()[0]; const size_t bin_id_max = gmat.cut.Ptrs()[1]; // attempt to split at different bins for (size_t split = 0; split < 4; split++) { size_t left_cnt = 0, right_cnt = 0; // manually compute how many samples go left or right for (size_t rid = 0; rid < num_row; ++rid) { for (size_t offset = gmat.row_ptr[rid]; offset < gmat.row_ptr[rid + 1]; ++offset) { const size_t bin_id = gmat.index[offset]; if (bin_id >= bin_id_min && bin_id < bin_id_max) { if (bin_id <= split) { left_cnt++; } else { right_cnt++; } } } } // if any were missing due to sparsity, we add them to the left or to the right size_t missing = kNRows - left_cnt - right_cnt; if (tree[0].DefaultLeft()) { left_cnt += missing; } else { right_cnt += missing; } // have one node with kNRows (=8 at the moment) rows, just one task RealImpl::partition_builder_.Init(1, 1, [&](size_t node_in_set) { return 1; }); const size_t task_id = RealImpl::partition_builder_.GetTaskIdx(0, 0); RealImpl::partition_builder_.AllocateForTask(task_id); this->template PartitionKernel(0, 0, common::Range1d(0, kNRows), split, cm, tree); RealImpl::partition_builder_.CalculateRowOffsets(); ASSERT_EQ(RealImpl::partition_builder_.GetNLeftElems(0), left_cnt); ASSERT_EQ(RealImpl::partition_builder_.GetNRightElems(0), right_cnt); } } } }; int static constexpr kNRows = 8, kNCols = 16; std::shared_ptr dmat_; const std::vector > cfg_; std::shared_ptr > float_builder_; std::shared_ptr > double_builder_; public: explicit QuantileHistMock( const std::vector >& args, const bool single_precision_histogram = false, bool batch = true) : cfg_{args} { QuantileHistMaker::Configure(args); dmat_ = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix(); if (single_precision_histogram) { float_builder_.reset( new BuilderMock( param_, std::move(pruner_), int_constraint_, dmat_.get())); if (batch) { float_builder_->SetHistSynchronizer(new BatchHistSynchronizer()); float_builder_->SetHistRowsAdder(new BatchHistRowsAdder()); } else { float_builder_->SetHistSynchronizer(new DistributedHistSynchronizer()); float_builder_->SetHistRowsAdder(new DistributedHistRowsAdder()); } } else { double_builder_.reset( new BuilderMock( param_, std::move(pruner_), int_constraint_, dmat_.get())); if (batch) { double_builder_->SetHistSynchronizer(new BatchHistSynchronizer()); double_builder_->SetHistRowsAdder(new BatchHistRowsAdder()); } else { double_builder_->SetHistSynchronizer(new DistributedHistSynchronizer()); double_builder_->SetHistRowsAdder(new DistributedHistRowsAdder()); } } } ~QuantileHistMock() override = default; static size_t GetNumColumns() { return kNCols; } void TestInitData() { size_t constexpr kMaxBins = 4; common::GHistIndexMatrix gmat; gmat.Init(dmat_.get(), kMaxBins); RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); std::vector gpair = { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; if (double_builder_) { double_builder_->TestInitData(gmat, &gpair, dmat_.get(), tree); } else { float_builder_->TestInitData(gmat, &gpair, dmat_.get(), tree); } } void TestInitDataSampling() { size_t constexpr kMaxBins = 4; common::GHistIndexMatrix gmat; gmat.Init(dmat_.get(), kMaxBins); RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); std::vector gpair = { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; if (double_builder_) { double_builder_->TestInitDataSampling(gmat, &gpair, dmat_.get(), tree); } else { float_builder_->TestInitDataSampling(gmat, &gpair, dmat_.get(), tree); } } void TestAddHistRows() { size_t constexpr kMaxBins = 4; common::GHistIndexMatrix gmat; gmat.Init(dmat_.get(), kMaxBins); RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); std::vector gpair = { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; if (double_builder_) { double_builder_->TestAddHistRows(gmat, &gpair, dmat_.get(), &tree); } else { float_builder_->TestAddHistRows(gmat, &gpair, dmat_.get(), &tree); } } void TestSyncHistograms() { size_t constexpr kMaxBins = 4; common::GHistIndexMatrix gmat; gmat.Init(dmat_.get(), kMaxBins); RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); std::vector gpair = { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; if (double_builder_) { double_builder_->TestSyncHistograms(gmat, &gpair, dmat_.get(), &tree); } else { float_builder_->TestSyncHistograms(gmat, &gpair, dmat_.get(), &tree); } } void TestBuildHist() { RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); size_t constexpr kMaxBins = 4; common::GHistIndexMatrix gmat; gmat.Init(dmat_.get(), kMaxBins); if (double_builder_) { double_builder_->TestBuildHist(0, gmat, *dmat_, tree); } else { float_builder_->TestBuildHist(0, gmat, *dmat_, tree); } } void TestEvaluateSplit() { RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); if (double_builder_) { double_builder_->TestEvaluateSplit(gmatb_, tree); } else { float_builder_->TestEvaluateSplit(gmatb_, tree); } } void TestApplySplit() { RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); if (double_builder_) { double_builder_->TestApplySplit(gmatb_, tree); } else { float_builder_->TestEvaluateSplit(gmatb_, tree); } } }; TEST(QuantileHist, InitData) { std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; QuantileHistMock maker(cfg); maker.TestInitData(); const bool single_precision_histogram = true; QuantileHistMock maker_float(cfg, single_precision_histogram); maker_float.TestInitData(); } TEST(QuantileHist, InitDataSampling) { const float subsample = 0.5; std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}, {"subsample", std::to_string(subsample)}}; QuantileHistMock maker(cfg); maker.TestInitDataSampling(); const bool single_precision_histogram = true; QuantileHistMock maker_float(cfg, single_precision_histogram); maker_float.TestInitDataSampling(); } TEST(QuantileHist, AddHistRows) { std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; QuantileHistMock maker(cfg); maker.TestAddHistRows(); const bool single_precision_histogram = true; QuantileHistMock maker_float(cfg, single_precision_histogram); maker_float.TestAddHistRows(); } TEST(QuantileHist, SyncHistograms) { std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; QuantileHistMock maker(cfg); maker.TestSyncHistograms(); const bool single_precision_histogram = true; QuantileHistMock maker_float(cfg, single_precision_histogram); maker_float.TestSyncHistograms(); } TEST(QuantileHist, DistributedAddHistRows) { std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; QuantileHistMock maker(cfg, false); maker.TestAddHistRows(); const bool single_precision_histogram = true; QuantileHistMock maker_float(cfg, single_precision_histogram); maker_float.TestAddHistRows(); } TEST(QuantileHist, DistributedSyncHistograms) { std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; QuantileHistMock maker(cfg, false); maker.TestSyncHistograms(); const bool single_precision_histogram = true; QuantileHistMock maker_float(cfg, single_precision_histogram); maker_float.TestSyncHistograms(); } TEST(QuantileHist, BuildHist) { // Don't enable feature grouping std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}, {"enable_feature_grouping", std::to_string(0)}}; QuantileHistMock maker(cfg); maker.TestBuildHist(); const bool single_precision_histogram = true; QuantileHistMock maker_float(cfg, single_precision_histogram); maker_float.TestBuildHist(); } TEST(QuantileHist, EvalSplits) { std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}, {"split_evaluator", "elastic_net"}, {"reg_lambda", "0"}, {"reg_alpha", "0"}, {"max_delta_step", "0"}, {"min_child_weight", "0"}}; QuantileHistMock maker(cfg); maker.TestEvaluateSplit(); const bool single_precision_histogram = true; QuantileHistMock maker_float(cfg, single_precision_histogram); maker_float.TestEvaluateSplit(); } TEST(QuantileHist, ApplySplit) { std::vector> cfg {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}, {"split_evaluator", "elastic_net"}, {"reg_lambda", "0"}, {"reg_alpha", "0"}, {"max_delta_step", "0"}, {"min_child_weight", "0"}}; QuantileHistMock maker(cfg); maker.TestApplySplit(); const bool single_precision_histogram = true; QuantileHistMock maker_float(cfg, single_precision_histogram); maker_float.TestApplySplit(); } } // namespace tree } // namespace xgboost