/** * Copyright 2024, XGBoost Contributors */ #include "quantile_dmatrix.h" #include // for accumulate #include "../collective/allreduce.h" // for Allreduce #include "../collective/communicator-inl.h" // for IsDistributed #include "../common/threading_utils.h" // for ParallelFor #include "gradient_index.h" // for GHistIndexMatrix #include "xgboost/collective/result.h" // for SafeColl #include "xgboost/linalg.h" // for Tensor namespace xgboost::data { void GetCutsFromRef(Context const* ctx, std::shared_ptr ref, bst_feature_t n_features, BatchParam p, common::HistogramCuts* p_cuts) { CHECK(ref); CHECK(p_cuts); p.forbid_regen = true; // Fetch cuts from GIDX auto csr = [&] { for (auto const& page : ref->GetBatches(ctx, p)) { *p_cuts = page.cut; break; } }; // Fetch cuts from Ellpack. auto ellpack = [&] { for (auto const& page : ref->GetBatches(ctx, p)) { GetCutsFromEllpack(page, p_cuts); break; } }; if (ref->PageExists() && ref->PageExists()) { // Both exists if (ctx->IsCUDA()) { ellpack(); } else { csr(); } } else if (ref->PageExists()) { csr(); } else if (ref->PageExists()) { ellpack(); } else { // None exist if (ctx->IsCUDA()) { ellpack(); } else { csr(); } } CHECK_EQ(ref->Info().num_col_, n_features) << "Invalid ref DMatrix, different number of features."; } #if !defined(XGBOOST_USE_CUDA) void GetCutsFromEllpack(EllpackPage const&, common::HistogramCuts*) { common::AssertGPUSupport(); } #endif namespace cpu_impl { // Synchronize feature type in case of empty DMatrix void SyncFeatureType(Context const* ctx, std::vector* p_h_ft) { if (!collective::IsDistributed()) { return; } auto& h_ft = *p_h_ft; bst_idx_t n_ft = h_ft.size(); collective::SafeColl(collective::Allreduce(ctx, &n_ft, collective::Op::kMax)); if (!h_ft.empty()) { // Check correct size if this is not an empty DMatrix. CHECK_EQ(h_ft.size(), n_ft); } if (n_ft > 0) { h_ft.resize(n_ft); auto ptr = reinterpret_cast*>(h_ft.data()); collective::SafeColl( collective::Allreduce(ctx, linalg::MakeVec(ptr, h_ft.size()), collective::Op::kMax)); } } void GetDataShape(Context const* ctx, DMatrixProxy* proxy, DataIterProxy iter, float missing, ExternalDataInfo* p_info) { auto& info = *p_info; auto const is_valid = data::IsValidFunctor{missing}; auto nnz_cnt = [&]() { return HostAdapterDispatch(proxy, [&](auto const& value) { bst_idx_t n_threads = ctx->Threads(); bst_idx_t n_features = info.column_sizes.size(); linalg::Tensor column_sizes_tloc({n_threads, n_features}, DeviceOrd::CPU()); column_sizes_tloc.Data()->Fill(0ul); auto view = column_sizes_tloc.HostView(); common::ParallelFor(value.Size(), n_threads, common::Sched::Static(256), [&](auto i) { auto const& line = value.GetLine(i); for (bst_idx_t j = 0; j < line.Size(); ++j) { data::COOTuple const& elem = line.GetElement(j); if (is_valid(elem)) { view(omp_get_thread_num(), elem.column_idx)++; } } }); auto ptr = column_sizes_tloc.Data()->HostPointer(); auto result = std::accumulate(ptr, ptr + column_sizes_tloc.Size(), static_cast(0)); for (bst_idx_t tidx = 0; tidx < n_threads; ++tidx) { for (bst_idx_t fidx = 0; fidx < n_features; ++fidx) { info.column_sizes[fidx] += view(tidx, fidx); } } return result; }); }; /** * CPU impl needs an additional loop for accumulating the column size. */ do { // We use do while here as the first batch is fetched in ctor if (info.n_features == 0) { info.n_features = BatchColumns(proxy); collective::SafeColl(collective::Allreduce(ctx, &info.n_features, collective::Op::kMax)); info.column_sizes.clear(); info.column_sizes.resize(info.n_features, 0); } else { CHECK_EQ(info.n_features, BatchColumns(proxy)) << "Inconsistent number of columns."; } bst_idx_t batch_size = BatchSamples(proxy); info.batch_nnz.push_back(nnz_cnt()); info.base_rows.push_back(batch_size); info.nnz += info.batch_nnz.back(); info.accumulated_rows += batch_size; info.n_batches++; } while (iter.Next()); iter.Reset(); std::partial_sum(info.base_rows.cbegin(), info.base_rows.cend(), info.base_rows.begin()); } void MakeSketches(Context const* ctx, DataIterProxy* iter, DMatrixProxy* proxy, std::shared_ptr ref, float missing, common::HistogramCuts* cuts, BatchParam const& p, MetaInfo const& info, ExternalDataInfo const& ext_info, std::vector* p_h_ft) { std::unique_ptr p_sketch; auto& h_ft = *p_h_ft; bst_idx_t accumulated_rows = 0; if (ref) { GetCutsFromRef(ctx, ref, info.num_col_, p, cuts); h_ft = ref->Info().feature_types.HostVector(); } else { size_t i = 0; while (iter->Next()) { if (!p_sketch) { h_ft = proxy->Info().feature_types.ConstHostVector(); cpu_impl::SyncFeatureType(ctx, &h_ft); p_sketch = std::make_unique( ctx, p.max_bin, h_ft, ext_info.column_sizes, !proxy->Info().group_ptr_.empty()); } HostAdapterDispatch(proxy, [&](auto const& batch) { proxy->Info().num_nonzero_ = ext_info.batch_nnz[i]; // We don't need base row idx here as Info is from proxy and the number of rows in // it is consistent with data batch. p_sketch->PushAdapterBatch(batch, 0, proxy->Info(), missing); }); accumulated_rows += BatchSamples(proxy); ++i; } iter->Reset(); CHECK_EQ(accumulated_rows, info.num_row_); CHECK(p_sketch); p_sketch->MakeCuts(ctx, info, cuts); } if (!h_ft.empty()) { CHECK_EQ(h_ft.size(), ext_info.n_features); } } } // namespace cpu_impl } // namespace xgboost::data