Support exporting cut values (#9356)
This commit is contained in:
@@ -455,7 +455,7 @@ class ArrayInterface {
|
||||
|
||||
explicit ArrayInterface(std::string const &str) : ArrayInterface{StringView{str}} {}
|
||||
|
||||
explicit ArrayInterface(StringView str) : ArrayInterface<D>{Json::Load(str)} {}
|
||||
explicit ArrayInterface(StringView str) : ArrayInterface{Json::Load(str)} {}
|
||||
|
||||
void AssignType(StringView typestr) {
|
||||
using T = ArrayInterfaceHandler::Type;
|
||||
|
||||
@@ -3,12 +3,20 @@
|
||||
*/
|
||||
#ifndef XGBOOST_USE_CUDA
|
||||
|
||||
#include "ellpack_page.h"
|
||||
|
||||
#include <xgboost/data.h>
|
||||
|
||||
// dummy implementation of EllpackPage in case CUDA is not used
|
||||
namespace xgboost {
|
||||
|
||||
class EllpackPageImpl {};
|
||||
class EllpackPageImpl {
|
||||
common::HistogramCuts cuts_;
|
||||
|
||||
public:
|
||||
[[nodiscard]] common::HistogramCuts& Cuts() { return cuts_; }
|
||||
[[nodiscard]] common::HistogramCuts const& Cuts() const { return cuts_; }
|
||||
};
|
||||
|
||||
EllpackPage::EllpackPage() = default;
|
||||
|
||||
@@ -32,6 +40,17 @@ size_t EllpackPage::Size() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] common::HistogramCuts& EllpackPage::Cuts() {
|
||||
LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but "
|
||||
"EllpackPage is required";
|
||||
return impl_->Cuts();
|
||||
}
|
||||
|
||||
[[nodiscard]] common::HistogramCuts const& EllpackPage::Cuts() const {
|
||||
LOG(FATAL) << "Internal Error: XGBoost is not compiled with CUDA but "
|
||||
"EllpackPage is required";
|
||||
return impl_->Cuts();
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
#include <thrust/iterator/discard_iterator.h>
|
||||
#include <thrust/iterator/transform_output_iterator.h>
|
||||
|
||||
#include <algorithm> // for copy
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../common/categorical.h"
|
||||
#include "../common/cuda_context.cuh"
|
||||
#include "../common/hist_util.cuh"
|
||||
@@ -11,6 +15,7 @@
|
||||
#include "../common/transform_iterator.h" // MakeIndexTransformIter
|
||||
#include "./ellpack_page.cuh"
|
||||
#include "device_adapter.cuh" // for HasInfInData
|
||||
#include "ellpack_page.h"
|
||||
#include "gradient_index.h"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
@@ -29,6 +34,16 @@ size_t EllpackPage::Size() const { return impl_->Size(); }
|
||||
|
||||
void EllpackPage::SetBaseRowId(std::size_t row_id) { impl_->SetBaseRowId(row_id); }
|
||||
|
||||
[[nodiscard]] common::HistogramCuts& EllpackPage::Cuts() {
|
||||
CHECK(impl_);
|
||||
return impl_->Cuts();
|
||||
}
|
||||
|
||||
[[nodiscard]] common::HistogramCuts const& EllpackPage::Cuts() const {
|
||||
CHECK(impl_);
|
||||
return impl_->Cuts();
|
||||
}
|
||||
|
||||
// Bin each input data entry, store the bin indices in compressed form.
|
||||
__global__ void CompressBinEllpackKernel(
|
||||
common::CompressedBufferWriter wr,
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
/*!
|
||||
* Copyright 2019 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
|
||||
#ifndef XGBOOST_DATA_ELLPACK_PAGE_H_
|
||||
#define XGBOOST_DATA_ELLPACK_PAGE_H_
|
||||
#ifndef XGBOOST_DATA_ELLPACK_PAGE_CUH_
|
||||
#define XGBOOST_DATA_ELLPACK_PAGE_CUH_
|
||||
|
||||
#include <thrust/binary_search.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../common/categorical.h"
|
||||
#include "../common/compressed_iterator.h"
|
||||
#include "../common/device_helpers.cuh"
|
||||
#include "../common/hist_util.h"
|
||||
#include "../common/categorical.h"
|
||||
#include <thrust/binary_search.h>
|
||||
#include "ellpack_page.h"
|
||||
|
||||
namespace xgboost {
|
||||
/** \brief Struct for accessing and manipulating an ELLPACK matrix on the
|
||||
@@ -194,8 +195,8 @@ class EllpackPageImpl {
|
||||
base_rowid = row_id;
|
||||
}
|
||||
|
||||
common::HistogramCuts& Cuts() { return cuts_; }
|
||||
common::HistogramCuts const& Cuts() const { return cuts_; }
|
||||
[[nodiscard]] common::HistogramCuts& Cuts() { return cuts_; }
|
||||
[[nodiscard]] common::HistogramCuts const& Cuts() const { return cuts_; }
|
||||
|
||||
/*! \return Estimation of memory cost of this page. */
|
||||
static size_t MemCostBytes(size_t num_rows, size_t row_stride, const common::HistogramCuts&cuts) ;
|
||||
@@ -256,4 +257,4 @@ inline size_t GetRowStride(DMatrix* dmat) {
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_DATA_ELLPACK_PAGE_H_
|
||||
#endif // XGBOOST_DATA_ELLPACK_PAGE_CUH_
|
||||
|
||||
59
src/data/ellpack_page.h
Normal file
59
src/data/ellpack_page.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_ELLPACK_PAGE_H_
|
||||
#define XGBOOST_DATA_ELLPACK_PAGE_H_
|
||||
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "../common/hist_util.h" // for HistogramCuts
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for DMatrix, BatchParam
|
||||
|
||||
namespace xgboost {
|
||||
class EllpackPageImpl;
|
||||
/**
|
||||
* @brief A page stored in ELLPACK format.
|
||||
*
|
||||
* This class uses the PImpl idiom (https://en.cppreference.com/w/cpp/language/pimpl) to avoid
|
||||
* including CUDA-specific implementation details in the header.
|
||||
*/
|
||||
class EllpackPage {
|
||||
public:
|
||||
/**
|
||||
* @brief Default constructor.
|
||||
*
|
||||
* This is used in the external memory case. An empty ELLPACK page is constructed with its content
|
||||
* set later by the reader.
|
||||
*/
|
||||
EllpackPage();
|
||||
/**
|
||||
* @brief Constructor from an existing DMatrix.
|
||||
*
|
||||
* This is used in the in-memory case. The ELLPACK page is constructed from an existing DMatrix
|
||||
* in CSR format.
|
||||
*/
|
||||
explicit EllpackPage(Context const* ctx, DMatrix* dmat, const BatchParam& param);
|
||||
|
||||
/*! \brief Destructor. */
|
||||
~EllpackPage();
|
||||
|
||||
EllpackPage(EllpackPage&& that);
|
||||
|
||||
/*! \return Number of instances in the page. */
|
||||
[[nodiscard]] size_t Size() const;
|
||||
|
||||
/*! \brief Set the base row id for this page. */
|
||||
void SetBaseRowId(std::size_t row_id);
|
||||
|
||||
[[nodiscard]] const EllpackPageImpl* Impl() const { return impl_.get(); }
|
||||
EllpackPageImpl* Impl() { return impl_.get(); }
|
||||
|
||||
[[nodiscard]] common::HistogramCuts& Cuts();
|
||||
[[nodiscard]] common::HistogramCuts const& Cuts() const;
|
||||
|
||||
private:
|
||||
std::unique_ptr<EllpackPageImpl> impl_;
|
||||
};
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_DATA_ELLPACK_PAGE_H_
|
||||
@@ -5,10 +5,10 @@
|
||||
#include <utility>
|
||||
|
||||
#include "ellpack_page.cuh"
|
||||
#include "ellpack_page.h" // for EllpackPage
|
||||
#include "ellpack_page_source.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
namespace xgboost::data {
|
||||
void EllpackPageSource::Fetch() {
|
||||
dh::safe_cuda(cudaSetDevice(device_));
|
||||
if (!this->ReadCache()) {
|
||||
@@ -27,5 +27,4 @@ void EllpackPageSource::Fetch() {
|
||||
this->WriteCache();
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -6,17 +6,17 @@
|
||||
#define XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
|
||||
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "../common/hist_util.h"
|
||||
#include "ellpack_page.h" // for EllpackPage
|
||||
#include "sparse_page_source.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
namespace xgboost::data {
|
||||
class EllpackPageSource : public PageSourceIncMixIn<EllpackPage> {
|
||||
bool is_dense_;
|
||||
size_t row_stride_;
|
||||
@@ -53,7 +53,6 @@ inline void EllpackPageSource::Fetch() {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
#endif // XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
|
||||
|
||||
@@ -245,6 +245,9 @@ class GHistIndexMatrix {
|
||||
std::vector<float> const& values, std::vector<float> const& mins,
|
||||
bst_row_t ridx, bst_feature_t fidx, bool is_cat) const;
|
||||
|
||||
[[nodiscard]] common::HistogramCuts& Cuts() { return cut; }
|
||||
[[nodiscard]] common::HistogramCuts const& Cuts() const { return cut; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<common::ColumnMatrix> columns_;
|
||||
std::vector<size_t> hit_count_tloc_;
|
||||
|
||||
@@ -16,7 +16,8 @@
|
||||
#include "../common/threading_utils.h"
|
||||
#include "./simple_batch_iterator.h"
|
||||
#include "adapter.h"
|
||||
#include "batch_utils.h" // for CheckEmpty, RegenGHist
|
||||
#include "batch_utils.h" // for CheckEmpty, RegenGHist
|
||||
#include "ellpack_page.h" // for EllpackPage
|
||||
#include "gradient_index.h"
|
||||
#include "xgboost/c_api.h"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
@@ -165,7 +165,10 @@ BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches(Context const
|
||||
|
||||
BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(Context const *ctx,
|
||||
const BatchParam ¶m) {
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
if (param.Initialized()) {
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
}
|
||||
detail::CheckEmpty(batch_param_, param);
|
||||
auto id = MakeCache(this, ".gradient_index.page", cache_prefix_, &cache_info_);
|
||||
this->InitializeSparsePage(ctx);
|
||||
if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
/**
|
||||
* Copyright 2021-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <memory>
|
||||
|
||||
#include "../common/hist_util.cuh"
|
||||
#include "batch_utils.h" // for CheckEmpty, RegenGHist
|
||||
#include "ellpack_page.cuh"
|
||||
@@ -11,7 +13,9 @@ namespace xgboost::data {
|
||||
BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
const BatchParam& param) {
|
||||
CHECK(ctx->IsCUDA());
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
if (param.Initialized()) {
|
||||
CHECK_GE(param.max_bin, 2);
|
||||
}
|
||||
detail::CheckEmpty(batch_param_, param);
|
||||
auto id = MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
|
||||
size_t row_stride = 0;
|
||||
@@ -21,8 +25,8 @@ BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,
|
||||
cache_info_.erase(id);
|
||||
MakeCache(this, ".ellpack.page", cache_prefix_, &cache_info_);
|
||||
std::unique_ptr<common::HistogramCuts> cuts;
|
||||
cuts.reset(
|
||||
new common::HistogramCuts{common::DeviceSketch(ctx->gpu_id, this, param.max_bin, 0)});
|
||||
cuts = std::make_unique<common::HistogramCuts>(
|
||||
common::DeviceSketch(ctx->gpu_id, this, param.max_bin, 0));
|
||||
this->InitializeSparsePage(ctx); // reset after use.
|
||||
|
||||
row_stride = GetRowStride(this);
|
||||
|
||||
Reference in New Issue
Block a user