sync upstream code
This commit is contained in:
commit
ff549ae933
2
.github/workflows/python_wheels.yml
vendored
2
.github/workflows/python_wheels.yml
vendored
@ -21,7 +21,7 @@ jobs:
|
|||||||
- os: macos-latest
|
- os: macos-latest
|
||||||
platform_id: macosx_arm64
|
platform_id: macosx_arm64
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
|
- uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # v3.0.0
|
||||||
with:
|
with:
|
||||||
submodules: 'true'
|
submodules: 'true'
|
||||||
- name: Setup Python
|
- name: Setup Python
|
||||||
|
|||||||
8
.github/workflows/scorecards.yml
vendored
8
.github/workflows/scorecards.yml
vendored
@ -22,12 +22,12 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: "Checkout code"
|
- name: "Checkout code"
|
||||||
uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # tag=v3.0.0
|
uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # v3.0.0
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
|
||||||
- name: "Run analysis"
|
- name: "Run analysis"
|
||||||
uses: ossf/scorecard-action@08b4669551908b1024bb425080c797723083c031 # tag=v2.2.0
|
uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
|
||||||
with:
|
with:
|
||||||
results_file: results.sarif
|
results_file: results.sarif
|
||||||
results_format: sarif
|
results_format: sarif
|
||||||
@ -41,7 +41,7 @@ jobs:
|
|||||||
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
|
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
|
||||||
# format to the repository Actions tab.
|
# format to the repository Actions tab.
|
||||||
- name: "Upload artifact"
|
- name: "Upload artifact"
|
||||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3.1.2
|
uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
|
||||||
with:
|
with:
|
||||||
name: SARIF file
|
name: SARIF file
|
||||||
path: results.sarif
|
path: results.sarif
|
||||||
@ -49,6 +49,6 @@ jobs:
|
|||||||
|
|
||||||
# Upload the results to GitHub's code scanning dashboard.
|
# Upload the results to GitHub's code scanning dashboard.
|
||||||
- name: "Upload to code-scanning"
|
- name: "Upload to code-scanning"
|
||||||
uses: github/codeql-action/upload-sarif@7b6664fa89524ee6e3c3e9749402d5afd69b3cd8 # tag=v2.14.1
|
uses: github/codeql-action/upload-sarif@83a02f7883b12e0e4e1a146174f5e2292a01e601 # v2.16.4
|
||||||
with:
|
with:
|
||||||
sarif_file: results.sarif
|
sarif_file: results.sarif
|
||||||
|
|||||||
@ -454,7 +454,8 @@ depr_par_lut <- matrix(c(
|
|||||||
'plot.height', 'plot_height',
|
'plot.height', 'plot_height',
|
||||||
'plot.width', 'plot_width',
|
'plot.width', 'plot_width',
|
||||||
'n_first_tree', 'trees',
|
'n_first_tree', 'trees',
|
||||||
'dummy', 'DUMMY'
|
'dummy', 'DUMMY',
|
||||||
|
'watchlist', 'evals'
|
||||||
), ncol = 2, byrow = TRUE)
|
), ncol = 2, byrow = TRUE)
|
||||||
colnames(depr_par_lut) <- c('old', 'new')
|
colnames(depr_par_lut) <- c('old', 'new')
|
||||||
|
|
||||||
|
|||||||
@ -1,20 +1,18 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2015-2023 by XGBoost Contributors
|
* Copyright 2015-2024, XGBoost Contributors
|
||||||
* \file base.h
|
* \file base.h
|
||||||
* \brief Defines configuration macros and basic types for xgboost.
|
* \brief Defines configuration macros and basic types for xgboost.
|
||||||
*/
|
*/
|
||||||
#ifndef XGBOOST_BASE_H_
|
#ifndef XGBOOST_BASE_H_
|
||||||
#define XGBOOST_BASE_H_
|
#define XGBOOST_BASE_H_
|
||||||
|
|
||||||
#include <dmlc/base.h>
|
#include <dmlc/omp.h> // for omp_uint, omp_ulong
|
||||||
#include <dmlc/omp.h>
|
|
||||||
|
|
||||||
#include <cmath>
|
#include <cstdint> // for int32_t, uint64_t, int16_t
|
||||||
#include <cstdint>
|
#include <ostream> // for ostream
|
||||||
#include <iostream>
|
#include <string> // for string
|
||||||
#include <string>
|
#include <utility> // for pair
|
||||||
#include <utility>
|
#include <vector> // for vector
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief string flag for R library, to leave hooks when needed.
|
* \brief string flag for R library, to leave hooks when needed.
|
||||||
@ -86,34 +84,31 @@
|
|||||||
|
|
||||||
#endif // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined()
|
#endif // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined()
|
||||||
|
|
||||||
/*! \brief namespace of xgboost*/
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|
||||||
/*! \brief unsigned integer type used for feature index. */
|
/*! \brief unsigned integer type used for feature index. */
|
||||||
using bst_uint = uint32_t; // NOLINT
|
using bst_uint = std::uint32_t; // NOLINT
|
||||||
/*! \brief unsigned long integers */
|
/*! \brief unsigned long integers */
|
||||||
using bst_ulong = uint64_t; // NOLINT
|
using bst_ulong = std::uint64_t; // NOLINT
|
||||||
/*! \brief float type, used for storing statistics */
|
/*! \brief float type, used for storing statistics */
|
||||||
using bst_float = float; // NOLINT
|
using bst_float = float; // NOLINT
|
||||||
/*! \brief Categorical value type. */
|
/*! \brief Categorical value type. */
|
||||||
using bst_cat_t = int32_t; // NOLINT
|
using bst_cat_t = std::int32_t; // NOLINT
|
||||||
/*! \brief Type for data column (feature) index. */
|
/*! \brief Type for data column (feature) index. */
|
||||||
using bst_feature_t = uint32_t; // NOLINT
|
using bst_feature_t = std::uint32_t; // NOLINT
|
||||||
/*! \brief Type for histogram bin index. */
|
/**
|
||||||
using bst_bin_t = int32_t; // NOLINT
|
* @brief Type for histogram bin index. We sometimes use -1 to indicate invalid bin.
|
||||||
/*! \brief Type for data row index.
|
|
||||||
*
|
|
||||||
* Be careful `std::size_t' is implementation-defined. Meaning that the binary
|
|
||||||
* representation of DMatrix might not be portable across platform. Booster model should
|
|
||||||
* be portable as parameters are floating points.
|
|
||||||
*/
|
*/
|
||||||
using bst_row_t = std::size_t; // NOLINT
|
using bst_bin_t = std::int32_t; // NOLINT
|
||||||
|
/**
|
||||||
|
* @brief Type for data row index (sample).
|
||||||
|
*/
|
||||||
|
using bst_idx_t = std::uint64_t; // NOLINT
|
||||||
/*! \brief Type for tree node index. */
|
/*! \brief Type for tree node index. */
|
||||||
using bst_node_t = std::int32_t; // NOLINT
|
using bst_node_t = std::int32_t; // NOLINT
|
||||||
/*! \brief Type for ranking group index. */
|
/*! \brief Type for ranking group index. */
|
||||||
using bst_group_t = std::uint32_t; // NOLINT
|
using bst_group_t = std::uint32_t; // NOLINT
|
||||||
/**
|
/**
|
||||||
* \brief Type for indexing into output targets.
|
* @brief Type for indexing into output targets.
|
||||||
*/
|
*/
|
||||||
using bst_target_t = std::uint32_t; // NOLINT
|
using bst_target_t = std::uint32_t; // NOLINT
|
||||||
/**
|
/**
|
||||||
@ -306,8 +301,7 @@ class GradientPairInt64 {
|
|||||||
XGBOOST_DEVICE bool operator==(const GradientPairInt64 &rhs) const {
|
XGBOOST_DEVICE bool operator==(const GradientPairInt64 &rhs) const {
|
||||||
return grad_ == rhs.grad_ && hess_ == rhs.hess_;
|
return grad_ == rhs.grad_ && hess_ == rhs.hess_;
|
||||||
}
|
}
|
||||||
friend std::ostream &operator<<(std::ostream &os,
|
friend std::ostream &operator<<(std::ostream &os, const GradientPairInt64 &g) {
|
||||||
const GradientPairInt64 &g) {
|
|
||||||
os << g.GetQuantisedGrad() << "/" << g.GetQuantisedHess();
|
os << g.GetQuantisedGrad() << "/" << g.GetQuantisedHess();
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
@ -323,7 +317,7 @@ using omp_ulong = dmlc::omp_ulong; // NOLINT
|
|||||||
/*! \brief define unsigned int for openmp loop */
|
/*! \brief define unsigned int for openmp loop */
|
||||||
using bst_omp_uint = dmlc::omp_uint; // NOLINT
|
using bst_omp_uint = dmlc::omp_uint; // NOLINT
|
||||||
/*! \brief Type used for representing version number in binary form.*/
|
/*! \brief Type used for representing version number in binary form.*/
|
||||||
using XGBoostVersionT = int32_t;
|
using XGBoostVersionT = std::int32_t;
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|
||||||
#endif // XGBOOST_BASE_H_
|
#endif // XGBOOST_BASE_H_
|
||||||
|
|||||||
@ -436,28 +436,38 @@ class TCPSocket {
|
|||||||
* \brief Accept new connection, returns a new TCP socket for the new connection.
|
* \brief Accept new connection, returns a new TCP socket for the new connection.
|
||||||
*/
|
*/
|
||||||
TCPSocket Accept() {
|
TCPSocket Accept() {
|
||||||
HandleT newfd = accept(Handle(), nullptr, nullptr);
|
SockAddress addr;
|
||||||
|
TCPSocket newsock;
|
||||||
|
auto rc = this->Accept(&newsock, &addr);
|
||||||
|
SafeColl(rc);
|
||||||
|
return newsock;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] Result Accept(TCPSocket *out, SockAddress *addr) {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
auto interrupt = WSAEINTR;
|
auto interrupt = WSAEINTR;
|
||||||
#else
|
#else
|
||||||
auto interrupt = EINTR;
|
auto interrupt = EINTR;
|
||||||
#endif
|
#endif
|
||||||
if (newfd == InvalidSocket() && system::LastError() != interrupt) {
|
if (this->Domain() == SockDomain::kV4) {
|
||||||
system::ThrowAtError("accept");
|
struct sockaddr_in caddr;
|
||||||
|
socklen_t caddr_len = sizeof(caddr);
|
||||||
|
HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);
|
||||||
|
if (newfd == InvalidSocket() && system::LastError() != interrupt) {
|
||||||
|
return system::FailWithCode("Failed to accept.");
|
||||||
|
}
|
||||||
|
*addr = SockAddress{SockAddrV4{caddr}};
|
||||||
|
*out = TCPSocket{newfd};
|
||||||
|
} else {
|
||||||
|
struct sockaddr_in6 caddr;
|
||||||
|
socklen_t caddr_len = sizeof(caddr);
|
||||||
|
HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);
|
||||||
|
if (newfd == InvalidSocket() && system::LastError() != interrupt) {
|
||||||
|
return system::FailWithCode("Failed to accept.");
|
||||||
|
}
|
||||||
|
*addr = SockAddress{SockAddrV6{caddr}};
|
||||||
|
*out = TCPSocket{newfd};
|
||||||
}
|
}
|
||||||
TCPSocket newsock{newfd};
|
|
||||||
return newsock;
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] Result Accept(TCPSocket *out, SockAddrV4 *addr) {
|
|
||||||
struct sockaddr_in caddr;
|
|
||||||
socklen_t caddr_len = sizeof(caddr);
|
|
||||||
HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);
|
|
||||||
if (newfd == InvalidSocket()) {
|
|
||||||
return system::FailWithCode("Failed to accept.");
|
|
||||||
}
|
|
||||||
*addr = SockAddrV4{caddr};
|
|
||||||
*out = TCPSocket{newfd};
|
|
||||||
return Success();
|
return Success();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -315,7 +315,7 @@ struct BatchParam {
|
|||||||
struct HostSparsePageView {
|
struct HostSparsePageView {
|
||||||
using Inst = common::Span<Entry const>;
|
using Inst = common::Span<Entry const>;
|
||||||
|
|
||||||
common::Span<bst_row_t const> offset;
|
common::Span<bst_idx_t const> offset;
|
||||||
common::Span<Entry const> data;
|
common::Span<Entry const> data;
|
||||||
|
|
||||||
Inst operator[](size_t i) const {
|
Inst operator[](size_t i) const {
|
||||||
@ -333,7 +333,7 @@ struct HostSparsePageView {
|
|||||||
class SparsePage {
|
class SparsePage {
|
||||||
public:
|
public:
|
||||||
// Offset for each row.
|
// Offset for each row.
|
||||||
HostDeviceVector<bst_row_t> offset;
|
HostDeviceVector<bst_idx_t> offset;
|
||||||
/*! \brief the data of the segments */
|
/*! \brief the data of the segments */
|
||||||
HostDeviceVector<Entry> data;
|
HostDeviceVector<Entry> data;
|
||||||
|
|
||||||
|
|||||||
@ -60,9 +60,7 @@ class Value {
|
|||||||
virtual Json& operator[](int ind);
|
virtual Json& operator[](int ind);
|
||||||
|
|
||||||
virtual bool operator==(Value const& rhs) const = 0;
|
virtual bool operator==(Value const& rhs) const = 0;
|
||||||
#if !defined(__APPLE__)
|
|
||||||
virtual Value& operator=(Value const& rhs) = delete;
|
virtual Value& operator=(Value const& rhs) = delete;
|
||||||
#endif // !defined(__APPLE__)
|
|
||||||
|
|
||||||
std::string TypeStr() const;
|
std::string TypeStr() const;
|
||||||
|
|
||||||
@ -105,6 +103,7 @@ class JsonString : public Value {
|
|||||||
std::string& GetString() & { return str_; }
|
std::string& GetString() & { return str_; }
|
||||||
|
|
||||||
bool operator==(Value const& rhs) const override;
|
bool operator==(Value const& rhs) const override;
|
||||||
|
Value& operator=(Value const& rhs) override = delete;
|
||||||
|
|
||||||
static bool IsClassOf(Value const* value) {
|
static bool IsClassOf(Value const* value) {
|
||||||
return value->Type() == ValueKind::kString;
|
return value->Type() == ValueKind::kString;
|
||||||
@ -134,6 +133,7 @@ class JsonArray : public Value {
|
|||||||
std::vector<Json>& GetArray() & { return vec_; }
|
std::vector<Json>& GetArray() & { return vec_; }
|
||||||
|
|
||||||
bool operator==(Value const& rhs) const override;
|
bool operator==(Value const& rhs) const override;
|
||||||
|
Value& operator=(Value const& rhs) override = delete;
|
||||||
|
|
||||||
static bool IsClassOf(Value const* value) {
|
static bool IsClassOf(Value const* value) {
|
||||||
return value->Type() == ValueKind::kArray;
|
return value->Type() == ValueKind::kArray;
|
||||||
@ -158,6 +158,7 @@ class JsonTypedArray : public Value {
|
|||||||
JsonTypedArray(JsonTypedArray&& that) noexcept : Value{kind}, vec_{std::move(that.vec_)} {}
|
JsonTypedArray(JsonTypedArray&& that) noexcept : Value{kind}, vec_{std::move(that.vec_)} {}
|
||||||
|
|
||||||
bool operator==(Value const& rhs) const override;
|
bool operator==(Value const& rhs) const override;
|
||||||
|
Value& operator=(Value const& rhs) override = delete;
|
||||||
|
|
||||||
void Set(size_t i, T v) { vec_[i] = v; }
|
void Set(size_t i, T v) { vec_[i] = v; }
|
||||||
size_t Size() const { return vec_.size(); }
|
size_t Size() const { return vec_.size(); }
|
||||||
@ -216,6 +217,7 @@ class JsonObject : public Value {
|
|||||||
Map& GetObject() & { return object_; }
|
Map& GetObject() & { return object_; }
|
||||||
|
|
||||||
bool operator==(Value const& rhs) const override;
|
bool operator==(Value const& rhs) const override;
|
||||||
|
Value& operator=(Value const& rhs) override = delete;
|
||||||
|
|
||||||
static bool IsClassOf(Value const* value) { return value->Type() == ValueKind::kObject; }
|
static bool IsClassOf(Value const* value) { return value->Type() == ValueKind::kObject; }
|
||||||
~JsonObject() override = default;
|
~JsonObject() override = default;
|
||||||
@ -249,6 +251,7 @@ class JsonNumber : public Value {
|
|||||||
Float& GetNumber() & { return number_; }
|
Float& GetNumber() & { return number_; }
|
||||||
|
|
||||||
bool operator==(Value const& rhs) const override;
|
bool operator==(Value const& rhs) const override;
|
||||||
|
Value& operator=(Value const& rhs) override = delete;
|
||||||
|
|
||||||
static bool IsClassOf(Value const* value) {
|
static bool IsClassOf(Value const* value) {
|
||||||
return value->Type() == ValueKind::kNumber;
|
return value->Type() == ValueKind::kNumber;
|
||||||
@ -287,6 +290,7 @@ class JsonInteger : public Value {
|
|||||||
: Value{ValueKind::kInteger}, integer_{that.integer_} {}
|
: Value{ValueKind::kInteger}, integer_{that.integer_} {}
|
||||||
|
|
||||||
bool operator==(Value const& rhs) const override;
|
bool operator==(Value const& rhs) const override;
|
||||||
|
Value& operator=(Value const& rhs) override = delete;
|
||||||
|
|
||||||
Int const& GetInteger() && { return integer_; }
|
Int const& GetInteger() && { return integer_; }
|
||||||
Int const& GetInteger() const & { return integer_; }
|
Int const& GetInteger() const & { return integer_; }
|
||||||
@ -307,6 +311,7 @@ class JsonNull : public Value {
|
|||||||
void Save(JsonWriter* writer) const override;
|
void Save(JsonWriter* writer) const override;
|
||||||
|
|
||||||
bool operator==(Value const& rhs) const override;
|
bool operator==(Value const& rhs) const override;
|
||||||
|
Value& operator=(Value const& rhs) override = delete;
|
||||||
|
|
||||||
static bool IsClassOf(Value const* value) {
|
static bool IsClassOf(Value const* value) {
|
||||||
return value->Type() == ValueKind::kNull;
|
return value->Type() == ValueKind::kNull;
|
||||||
@ -336,6 +341,7 @@ class JsonBoolean : public Value {
|
|||||||
bool& GetBoolean() & { return boolean_; }
|
bool& GetBoolean() & { return boolean_; }
|
||||||
|
|
||||||
bool operator==(Value const& rhs) const override;
|
bool operator==(Value const& rhs) const override;
|
||||||
|
Value& operator=(Value const& rhs) override = delete;
|
||||||
|
|
||||||
static bool IsClassOf(Value const* value) {
|
static bool IsClassOf(Value const* value) {
|
||||||
return value->Type() == ValueKind::kBoolean;
|
return value->Type() == ValueKind::kBoolean;
|
||||||
|
|||||||
@ -57,7 +57,7 @@ void GHistIndexMatrix::SetIndexData(::sycl::queue qu,
|
|||||||
uint32_t* offsets) {
|
uint32_t* offsets) {
|
||||||
if (nbins == 0) return;
|
if (nbins == 0) return;
|
||||||
const xgboost::Entry *data_ptr = dmat.data.DataConst();
|
const xgboost::Entry *data_ptr = dmat.data.DataConst();
|
||||||
const bst_row_t *offset_vec = dmat.row_ptr.DataConst();
|
const bst_idx_t *offset_vec = dmat.row_ptr.DataConst();
|
||||||
const size_t num_rows = dmat.row_ptr.Size() - 1;
|
const size_t num_rows = dmat.row_ptr.Size() - 1;
|
||||||
const bst_float* cut_values = cut_device.Values().DataConst();
|
const bst_float* cut_values = cut_device.Values().DataConst();
|
||||||
const uint32_t* cut_ptrs = cut_device.Ptrs().DataConst();
|
const uint32_t* cut_ptrs = cut_device.Ptrs().DataConst();
|
||||||
|
|||||||
55
plugin/sycl/tree/param.h
Normal file
55
plugin/sycl/tree/param.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
/*!
|
||||||
|
* Copyright 2014-2024 by Contributors
|
||||||
|
*/
|
||||||
|
#ifndef PLUGIN_SYCL_TREE_PARAM_H_
|
||||||
|
#define PLUGIN_SYCL_TREE_PARAM_H_
|
||||||
|
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstring>
|
||||||
|
#include <limits>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
|
#include "xgboost/parameter.h"
|
||||||
|
#include "xgboost/data.h"
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
|
||||||
|
#include "../src/tree/param.h"
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
#include <CL/sycl.hpp>
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
namespace sycl {
|
||||||
|
namespace tree {
|
||||||
|
|
||||||
|
|
||||||
|
/*! \brief Wrapper for necessary training parameters for regression tree to access on device */
|
||||||
|
/* The original structure xgboost::tree::TrainParam can't be used,
|
||||||
|
* since std::vector are not copyable on sycl-devices.
|
||||||
|
*/
|
||||||
|
struct TrainParam {
|
||||||
|
float min_child_weight;
|
||||||
|
float reg_lambda;
|
||||||
|
float reg_alpha;
|
||||||
|
float max_delta_step;
|
||||||
|
|
||||||
|
TrainParam() {}
|
||||||
|
|
||||||
|
explicit TrainParam(const xgboost::tree::TrainParam& param) {
|
||||||
|
reg_lambda = param.reg_lambda;
|
||||||
|
reg_alpha = param.reg_alpha;
|
||||||
|
min_child_weight = param.min_child_weight;
|
||||||
|
max_delta_step = param.max_delta_step;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename GradType>
|
||||||
|
using GradStats = xgboost::detail::GradientPairInternal<GradType>;
|
||||||
|
|
||||||
|
} // namespace tree
|
||||||
|
} // namespace sycl
|
||||||
|
} // namespace xgboost
|
||||||
|
#endif // PLUGIN_SYCL_TREE_PARAM_H_
|
||||||
208
plugin/sycl/tree/split_evaluator.h
Normal file
208
plugin/sycl/tree/split_evaluator.h
Normal file
@ -0,0 +1,208 @@
|
|||||||
|
/*!
|
||||||
|
* Copyright 2018-2024 by Contributors
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef PLUGIN_SYCL_TREE_SPLIT_EVALUATOR_H_
|
||||||
|
#define PLUGIN_SYCL_TREE_SPLIT_EVALUATOR_H_
|
||||||
|
|
||||||
|
#include <dmlc/registry.h>
|
||||||
|
#include <xgboost/base.h>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
#include "param.h"
|
||||||
|
#include "../data.h"
|
||||||
|
|
||||||
|
#include "xgboost/tree_model.h"
|
||||||
|
#include "xgboost/host_device_vector.h"
|
||||||
|
#include "xgboost/context.h"
|
||||||
|
#include "../../src/common/transform.h"
|
||||||
|
#include "../../src/common/math.h"
|
||||||
|
#include "../../src/tree/param.h"
|
||||||
|
|
||||||
|
#include <CL/sycl.hpp>
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
namespace sycl {
|
||||||
|
namespace tree {
|
||||||
|
|
||||||
|
/*! \brief SYCL implementation of TreeEvaluator, with USM memory for temporary buffer to access on device.
|
||||||
|
* It also contains own implementation of SplitEvaluator for device compilation, because some of the
|
||||||
|
functions from the original SplitEvaluator are currently not supported
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<typename GradType>
|
||||||
|
class TreeEvaluator {
|
||||||
|
// hist and exact use parent id to calculate constraints.
|
||||||
|
static constexpr bst_node_t kRootParentId =
|
||||||
|
(-1 & static_cast<bst_node_t>((1U << 31) - 1));
|
||||||
|
|
||||||
|
USMVector<GradType> lower_bounds_;
|
||||||
|
USMVector<GradType> upper_bounds_;
|
||||||
|
USMVector<int> monotone_;
|
||||||
|
TrainParam param_;
|
||||||
|
::sycl::queue qu_;
|
||||||
|
bool has_constraint_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
void Reset(::sycl::queue qu, xgboost::tree::TrainParam const& p, bst_feature_t n_features) {
|
||||||
|
qu_ = qu;
|
||||||
|
|
||||||
|
has_constraint_ = false;
|
||||||
|
for (const auto& constraint : p.monotone_constraints) {
|
||||||
|
if (constraint != 0) {
|
||||||
|
has_constraint_ = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_constraint_) {
|
||||||
|
monotone_.Resize(&qu_, n_features, 0);
|
||||||
|
qu_.memcpy(monotone_.Data(), p.monotone_constraints.data(),
|
||||||
|
sizeof(int) * p.monotone_constraints.size());
|
||||||
|
qu_.wait();
|
||||||
|
|
||||||
|
lower_bounds_.Resize(&qu_, p.MaxNodes(), std::numeric_limits<GradType>::lowest());
|
||||||
|
upper_bounds_.Resize(&qu_, p.MaxNodes(), std::numeric_limits<GradType>::max());
|
||||||
|
}
|
||||||
|
param_ = TrainParam(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasConstraint() const {
|
||||||
|
return has_constraint_;
|
||||||
|
}
|
||||||
|
|
||||||
|
TreeEvaluator(::sycl::queue qu, xgboost::tree::TrainParam const& p, bst_feature_t n_features) {
|
||||||
|
Reset(qu, p, n_features);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SplitEvaluator {
|
||||||
|
const int* constraints;
|
||||||
|
const GradType* lower;
|
||||||
|
const GradType* upper;
|
||||||
|
bool has_constraint;
|
||||||
|
TrainParam param;
|
||||||
|
|
||||||
|
GradType CalcSplitGain(bst_node_t nidx,
|
||||||
|
bst_feature_t fidx,
|
||||||
|
const GradStats<GradType>& left,
|
||||||
|
const GradStats<GradType>& right) const {
|
||||||
|
const GradType negative_infinity = -std::numeric_limits<GradType>::infinity();
|
||||||
|
GradType wleft = this->CalcWeight(nidx, left);
|
||||||
|
GradType wright = this->CalcWeight(nidx, right);
|
||||||
|
|
||||||
|
GradType gain = this->CalcGainGivenWeight(nidx, left, wleft) +
|
||||||
|
this->CalcGainGivenWeight(nidx, right, wright);
|
||||||
|
if (!has_constraint) {
|
||||||
|
return gain;
|
||||||
|
}
|
||||||
|
|
||||||
|
int constraint = constraints[fidx];
|
||||||
|
if (constraint == 0) {
|
||||||
|
return gain;
|
||||||
|
} else if (constraint > 0) {
|
||||||
|
return wleft <= wright ? gain : negative_infinity;
|
||||||
|
} else {
|
||||||
|
return wleft >= wright ? gain : negative_infinity;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static GradType ThresholdL1(GradType w, float alpha) {
|
||||||
|
if (w > + alpha) {
|
||||||
|
return w - alpha;
|
||||||
|
}
|
||||||
|
if (w < - alpha) {
|
||||||
|
return w + alpha;
|
||||||
|
}
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GradType CalcWeight(GradType sum_grad, GradType sum_hess) const {
|
||||||
|
if (sum_hess < param.min_child_weight || sum_hess <= 0.0) {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
GradType dw = -this->ThresholdL1(sum_grad, param.reg_alpha) / (sum_hess + param.reg_lambda);
|
||||||
|
if (param.max_delta_step != 0.0f && std::abs(dw) > param.max_delta_step) {
|
||||||
|
dw = ::sycl::copysign((GradType)param.max_delta_step, dw);
|
||||||
|
}
|
||||||
|
return dw;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GradType CalcWeight(bst_node_t nodeid, const GradStats<GradType>& stats) const {
|
||||||
|
GradType w = this->CalcWeight(stats.GetGrad(), stats.GetHess());
|
||||||
|
if (!has_constraint) {
|
||||||
|
return w;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nodeid == kRootParentId) {
|
||||||
|
return w;
|
||||||
|
} else if (w < lower[nodeid]) {
|
||||||
|
return lower[nodeid];
|
||||||
|
} else if (w > upper[nodeid]) {
|
||||||
|
return upper[nodeid];
|
||||||
|
} else {
|
||||||
|
return w;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GradType CalcGainGivenWeight(GradType sum_grad, GradType sum_hess, GradType w) const {
|
||||||
|
return -(2.0f * sum_grad * w + (sum_hess + param.reg_lambda) * xgboost::common::Sqr(w));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GradType CalcGainGivenWeight(bst_node_t nid, const GradStats<GradType>& stats,
|
||||||
|
GradType w) const {
|
||||||
|
if (stats.GetHess() <= 0) {
|
||||||
|
return .0f;
|
||||||
|
}
|
||||||
|
// Avoiding tree::CalcGainGivenWeight can significantly reduce avg floating point error.
|
||||||
|
if (param.max_delta_step == 0.0f && has_constraint == false) {
|
||||||
|
return xgboost::common::Sqr(this->ThresholdL1(stats.GetGrad(), param.reg_alpha)) /
|
||||||
|
(stats.GetHess() + param.reg_lambda);
|
||||||
|
}
|
||||||
|
return this->CalcGainGivenWeight(stats.GetGrad(), stats.GetHess(), w);
|
||||||
|
}
|
||||||
|
|
||||||
|
GradType CalcGain(bst_node_t nid, const GradStats<GradType>& stats) const {
|
||||||
|
return this->CalcGainGivenWeight(nid, stats, this->CalcWeight(nid, stats));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
/* Get a view to the evaluator that can be passed down to device. */
|
||||||
|
auto GetEvaluator() const {
|
||||||
|
return SplitEvaluator{monotone_.DataConst(),
|
||||||
|
lower_bounds_.DataConst(),
|
||||||
|
upper_bounds_.DataConst(),
|
||||||
|
has_constraint_,
|
||||||
|
param_};
|
||||||
|
}
|
||||||
|
|
||||||
|
void AddSplit(bst_node_t nodeid, bst_node_t leftid, bst_node_t rightid,
|
||||||
|
bst_feature_t f, GradType left_weight, GradType right_weight) {
|
||||||
|
if (!has_constraint_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lower_bounds_[leftid] = lower_bounds_[nodeid];
|
||||||
|
upper_bounds_[leftid] = upper_bounds_[nodeid];
|
||||||
|
|
||||||
|
lower_bounds_[rightid] = lower_bounds_[nodeid];
|
||||||
|
upper_bounds_[rightid] = upper_bounds_[nodeid];
|
||||||
|
int32_t c = monotone_[f];
|
||||||
|
GradType mid = (left_weight + right_weight) / 2;
|
||||||
|
|
||||||
|
if (c < 0) {
|
||||||
|
lower_bounds_[leftid] = mid;
|
||||||
|
upper_bounds_[rightid] = mid;
|
||||||
|
} else if (c > 0) {
|
||||||
|
upper_bounds_[leftid] = mid;
|
||||||
|
lower_bounds_[rightid] = mid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace tree
|
||||||
|
} // namespace sycl
|
||||||
|
} // namespace xgboost
|
||||||
|
|
||||||
|
#endif // PLUGIN_SYCL_TREE_SPLIT_EVALUATOR_H_
|
||||||
@ -429,8 +429,8 @@ def make_categorical(
|
|||||||
categories = np.arange(0, n_categories)
|
categories = np.arange(0, n_categories)
|
||||||
for col in df.columns:
|
for col in df.columns:
|
||||||
if rng.binomial(1, cat_ratio, size=1)[0] == 1:
|
if rng.binomial(1, cat_ratio, size=1)[0] == 1:
|
||||||
df[col] = df[col].astype("category")
|
df.loc[:, col] = df[col].astype("category")
|
||||||
df[col] = df[col].cat.set_categories(categories)
|
df.loc[:, col] = df[col].cat.set_categories(categories)
|
||||||
|
|
||||||
if sparsity > 0.0:
|
if sparsity > 0.0:
|
||||||
for i in range(n_features):
|
for i in range(n_features):
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
Subproject commit 2fea6734e83cf147c1bbe580ac4713cd50abcad5
|
Subproject commit 187e4be94513c71bea1e10a3eded6b9b2da0521f
|
||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2023, XGBoost Contributors
|
* Copyright 2023-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include "coll.h"
|
#include "coll.h"
|
||||||
|
|
||||||
@ -7,6 +7,7 @@
|
|||||||
#include <cstddef> // for size_t
|
#include <cstddef> // for size_t
|
||||||
#include <cstdint> // for int8_t, int64_t
|
#include <cstdint> // for int8_t, int64_t
|
||||||
#include <functional> // for bit_and, bit_or, bit_xor, plus
|
#include <functional> // for bit_and, bit_or, bit_xor, plus
|
||||||
|
#include <string> // for string
|
||||||
#include <type_traits> // for is_floating_point_v, is_same_v
|
#include <type_traits> // for is_floating_point_v, is_same_v
|
||||||
#include <utility> // for move
|
#include <utility> // for move
|
||||||
|
|
||||||
@ -60,6 +61,8 @@ bool constexpr IsFloatingPointV() {
|
|||||||
return cpu_impl::RingAllreduce(comm, data, erased_fn, type);
|
return cpu_impl::RingAllreduce(comm, data, erased_fn, type);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::string msg{"Floating point is not supported for bit wise collective operations."};
|
||||||
|
|
||||||
auto rc = DispatchDType(type, [&](auto t) {
|
auto rc = DispatchDType(type, [&](auto t) {
|
||||||
using T = decltype(t);
|
using T = decltype(t);
|
||||||
switch (op) {
|
switch (op) {
|
||||||
@ -74,21 +77,21 @@ bool constexpr IsFloatingPointV() {
|
|||||||
}
|
}
|
||||||
case Op::kBitwiseAND: {
|
case Op::kBitwiseAND: {
|
||||||
if constexpr (IsFloatingPointV<T>()) {
|
if constexpr (IsFloatingPointV<T>()) {
|
||||||
return Fail("Invalid type.");
|
return Fail(msg);
|
||||||
} else {
|
} else {
|
||||||
return fn(std::bit_and<>{}, t);
|
return fn(std::bit_and<>{}, t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case Op::kBitwiseOR: {
|
case Op::kBitwiseOR: {
|
||||||
if constexpr (IsFloatingPointV<T>()) {
|
if constexpr (IsFloatingPointV<T>()) {
|
||||||
return Fail("Invalid type.");
|
return Fail(msg);
|
||||||
} else {
|
} else {
|
||||||
return fn(std::bit_or<>{}, t);
|
return fn(std::bit_or<>{}, t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case Op::kBitwiseXOR: {
|
case Op::kBitwiseXOR: {
|
||||||
if constexpr (IsFloatingPointV<T>()) {
|
if constexpr (IsFloatingPointV<T>()) {
|
||||||
return Fail("Invalid type.");
|
return Fail(msg);
|
||||||
} else {
|
} else {
|
||||||
return fn(std::bit_xor<>{}, t);
|
return fn(std::bit_xor<>{}, t);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -75,9 +75,11 @@ Result ConnectTrackerImpl(proto::PeerInfo info, std::chrono::seconds timeout, st
|
|||||||
} << [&] {
|
} << [&] {
|
||||||
return next->NonBlocking(true);
|
return next->NonBlocking(true);
|
||||||
} << [&] {
|
} << [&] {
|
||||||
SockAddrV4 addr;
|
SockAddress addr;
|
||||||
return listener->Accept(prev.get(), &addr);
|
return listener->Accept(prev.get(), &addr);
|
||||||
} << [&] { return prev->NonBlocking(true); };
|
} << [&] {
|
||||||
|
return prev->NonBlocking(true);
|
||||||
|
};
|
||||||
if (!rc.OK()) {
|
if (!rc.OK()) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -157,10 +159,13 @@ Result ConnectTrackerImpl(proto::PeerInfo info, std::chrono::seconds timeout, st
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (std::int32_t r = 0; r < comm.Rank(); ++r) {
|
for (std::int32_t r = 0; r < comm.Rank(); ++r) {
|
||||||
SockAddrV4 addr;
|
|
||||||
auto peer = std::shared_ptr<TCPSocket>(TCPSocket::CreatePtr(comm.Domain()));
|
auto peer = std::shared_ptr<TCPSocket>(TCPSocket::CreatePtr(comm.Domain()));
|
||||||
rc = std::move(rc) << [&] { return listener->Accept(peer.get(), &addr); }
|
rc = std::move(rc) << [&] {
|
||||||
<< [&] { return peer->RecvTimeout(timeout); };
|
SockAddress addr;
|
||||||
|
return listener->Accept(peer.get(), &addr);
|
||||||
|
} << [&] {
|
||||||
|
return peer->RecvTimeout(timeout);
|
||||||
|
};
|
||||||
if (!rc.OK()) {
|
if (!rc.OK()) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -187,7 +192,9 @@ RabitComm::RabitComm(std::string const& host, std::int32_t port, std::chrono::se
|
|||||||
: HostComm{std::move(host), port, timeout, retry, std::move(task_id)},
|
: HostComm{std::move(host), port, timeout, retry, std::move(task_id)},
|
||||||
nccl_path_{std::move(nccl_path)} {
|
nccl_path_{std::move(nccl_path)} {
|
||||||
auto rc = this->Bootstrap(timeout_, retry_, task_id_);
|
auto rc = this->Bootstrap(timeout_, retry_, task_id_);
|
||||||
CHECK(rc.OK()) << rc.Report();
|
if (!rc.OK()) {
|
||||||
|
SafeColl(Fail("Failed to bootstrap the communication group.", std::move(rc)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(XGBOOST_USE_NCCL) && !defined(XGBOOST_USE_RCCL)
|
#if !defined(XGBOOST_USE_NCCL) && !defined(XGBOOST_USE_RCCL)
|
||||||
@ -247,10 +254,12 @@ Comm* RabitComm::MakeCUDAVar(Context const*, std::shared_ptr<Coll>) const {
|
|||||||
// get ring neighbors
|
// get ring neighbors
|
||||||
std::string snext;
|
std::string snext;
|
||||||
tracker.Recv(&snext);
|
tracker.Recv(&snext);
|
||||||
|
if (!rc.OK()) {
|
||||||
|
return Fail("Failed to receive the rank for the next worker.", std::move(rc));
|
||||||
|
}
|
||||||
auto jnext = Json::Load(StringView{snext});
|
auto jnext = Json::Load(StringView{snext});
|
||||||
|
|
||||||
proto::PeerInfo ninfo{jnext};
|
proto::PeerInfo ninfo{jnext};
|
||||||
|
|
||||||
// get the rank of this worker
|
// get the rank of this worker
|
||||||
this->rank_ = BootstrapPrev(ninfo.rank, world);
|
this->rank_ = BootstrapPrev(ninfo.rank, world);
|
||||||
this->tracker_.rank = rank_;
|
this->tracker_.rank = rank_;
|
||||||
@ -258,7 +267,7 @@ Comm* RabitComm::MakeCUDAVar(Context const*, std::shared_ptr<Coll>) const {
|
|||||||
std::vector<std::shared_ptr<TCPSocket>> workers;
|
std::vector<std::shared_ptr<TCPSocket>> workers;
|
||||||
rc = ConnectWorkers(*this, &listener, lport, ninfo, timeout, retry, &workers);
|
rc = ConnectWorkers(*this, &listener, lport, ninfo, timeout, retry, &workers);
|
||||||
if (!rc.OK()) {
|
if (!rc.OK()) {
|
||||||
return rc;
|
return Fail("Failed to connect to other workers.", std::move(rc));
|
||||||
}
|
}
|
||||||
|
|
||||||
CHECK(this->channels_.empty());
|
CHECK(this->channels_.empty());
|
||||||
|
|||||||
@ -3,6 +3,8 @@
|
|||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <numeric> // for accumulate
|
||||||
|
|
||||||
#include "communicator.h"
|
#include "communicator.h"
|
||||||
#include "device_communicator.cuh"
|
#include "device_communicator.cuh"
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2023, XGBoost Contributors
|
* Copyright 2023-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#if defined(__unix__) || defined(__APPLE__)
|
#if defined(__unix__) || defined(__APPLE__)
|
||||||
#include <netdb.h> // gethostbyname
|
#include <netdb.h> // gethostbyname
|
||||||
@ -27,12 +27,14 @@
|
|||||||
#include "tracker.h"
|
#include "tracker.h"
|
||||||
#include "xgboost/collective/result.h" // for Result, Fail, Success
|
#include "xgboost/collective/result.h" // for Result, Fail, Success
|
||||||
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
|
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
|
||||||
#include "xgboost/json.h"
|
#include "xgboost/json.h" // for Json
|
||||||
|
|
||||||
namespace xgboost::collective {
|
namespace xgboost::collective {
|
||||||
Tracker::Tracker(Json const& config)
|
Tracker::Tracker(Json const& config)
|
||||||
: n_workers_{static_cast<std::int32_t>(
|
: sortby_{static_cast<SortBy>(
|
||||||
RequiredArg<Integer const>(config, "n_workers", __func__))},
|
OptionalArg<Integer const>(config, "sortby", static_cast<Integer::Int>(SortBy::kHost)))},
|
||||||
|
n_workers_{
|
||||||
|
static_cast<std::int32_t>(RequiredArg<Integer const>(config, "n_workers", __func__))},
|
||||||
port_{static_cast<std::int32_t>(OptionalArg<Integer const>(config, "port", Integer::Int{0}))},
|
port_{static_cast<std::int32_t>(OptionalArg<Integer const>(config, "port", Integer::Int{0}))},
|
||||||
timeout_{std::chrono::seconds{OptionalArg<Integer const>(
|
timeout_{std::chrono::seconds{OptionalArg<Integer const>(
|
||||||
config, "timeout", static_cast<std::int64_t>(collective::DefaultTimeoutSec()))}} {}
|
config, "timeout", static_cast<std::int64_t>(collective::DefaultTimeoutSec()))}} {}
|
||||||
@ -56,13 +58,15 @@ Result Tracker::WaitUntilReady() const {
|
|||||||
return Success();
|
return Success();
|
||||||
}
|
}
|
||||||
|
|
||||||
RabitTracker::WorkerProxy::WorkerProxy(std::int32_t world, TCPSocket sock, SockAddrV4 addr)
|
RabitTracker::WorkerProxy::WorkerProxy(std::int32_t world, TCPSocket sock, SockAddress addr)
|
||||||
: sock_{std::move(sock)} {
|
: sock_{std::move(sock)} {
|
||||||
std::int32_t rank{0};
|
std::int32_t rank{0};
|
||||||
Json jcmd;
|
Json jcmd;
|
||||||
std::int32_t port{0};
|
std::int32_t port{0};
|
||||||
|
|
||||||
rc_ = Success() << [&] { return proto::Magic{}.Verify(&sock_); } << [&] {
|
rc_ = Success() << [&] {
|
||||||
|
return proto::Magic{}.Verify(&sock_);
|
||||||
|
} << [&] {
|
||||||
return proto::Connect{}.TrackerRecv(&sock_, &world_, &rank, &task_id_);
|
return proto::Connect{}.TrackerRecv(&sock_, &world_, &rank, &task_id_);
|
||||||
} << [&] {
|
} << [&] {
|
||||||
std::string cmd;
|
std::string cmd;
|
||||||
@ -83,8 +87,13 @@ RabitTracker::WorkerProxy::WorkerProxy(std::int32_t world, TCPSocket sock, SockA
|
|||||||
}
|
}
|
||||||
return Success();
|
return Success();
|
||||||
} << [&] {
|
} << [&] {
|
||||||
auto host = addr.Addr();
|
if (addr.IsV4()) {
|
||||||
info_ = proto::PeerInfo{host, port, rank};
|
auto host = addr.V4().Addr();
|
||||||
|
info_ = proto::PeerInfo{host, port, rank};
|
||||||
|
} else {
|
||||||
|
auto host = addr.V6().Addr();
|
||||||
|
info_ = proto::PeerInfo{host, port, rank};
|
||||||
|
}
|
||||||
return Success();
|
return Success();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -92,19 +101,19 @@ RabitTracker::WorkerProxy::WorkerProxy(std::int32_t world, TCPSocket sock, SockA
|
|||||||
RabitTracker::RabitTracker(Json const& config) : Tracker{config} {
|
RabitTracker::RabitTracker(Json const& config) : Tracker{config} {
|
||||||
std::string self;
|
std::string self;
|
||||||
auto rc = collective::GetHostAddress(&self);
|
auto rc = collective::GetHostAddress(&self);
|
||||||
auto host = OptionalArg<String>(config, "host", self);
|
host_ = OptionalArg<String>(config, "host", self);
|
||||||
|
|
||||||
host_ = host;
|
auto addr = MakeSockAddress(xgboost::StringView{host_}, 0);
|
||||||
listener_ = TCPSocket::Create(SockDomain::kV4);
|
listener_ = TCPSocket::Create(addr.IsV4() ? SockDomain::kV4 : SockDomain::kV6);
|
||||||
rc = listener_.Bind(host, &this->port_);
|
rc = listener_.Bind(host_, &this->port_);
|
||||||
CHECK(rc.OK()) << rc.Report();
|
SafeColl(rc);
|
||||||
listener_.Listen();
|
listener_.Listen();
|
||||||
}
|
}
|
||||||
|
|
||||||
Result RabitTracker::Bootstrap(std::vector<WorkerProxy>* p_workers) {
|
Result RabitTracker::Bootstrap(std::vector<WorkerProxy>* p_workers) {
|
||||||
auto& workers = *p_workers;
|
auto& workers = *p_workers;
|
||||||
|
|
||||||
std::sort(workers.begin(), workers.end(), WorkerCmp{});
|
std::sort(workers.begin(), workers.end(), WorkerCmp{this->sortby_});
|
||||||
|
|
||||||
std::vector<std::thread> bootstrap_threads;
|
std::vector<std::thread> bootstrap_threads;
|
||||||
for (std::int32_t r = 0; r < n_workers_; ++r) {
|
for (std::int32_t r = 0; r < n_workers_; ++r) {
|
||||||
@ -224,7 +233,7 @@ Result RabitTracker::Bootstrap(std::vector<WorkerProxy>* p_workers) {
|
|||||||
|
|
||||||
while (state.ShouldContinue()) {
|
while (state.ShouldContinue()) {
|
||||||
TCPSocket sock;
|
TCPSocket sock;
|
||||||
SockAddrV4 addr;
|
SockAddress addr;
|
||||||
this->ready_ = true;
|
this->ready_ = true;
|
||||||
auto rc = listener_.Accept(&sock, &addr);
|
auto rc = listener_.Accept(&sock, &addr);
|
||||||
if (!rc.OK()) {
|
if (!rc.OK()) {
|
||||||
@ -291,7 +300,7 @@ Result RabitTracker::Bootstrap(std::vector<WorkerProxy>* p_workers) {
|
|||||||
|
|
||||||
[[nodiscard]] Json RabitTracker::WorkerArgs() const {
|
[[nodiscard]] Json RabitTracker::WorkerArgs() const {
|
||||||
auto rc = this->WaitUntilReady();
|
auto rc = this->WaitUntilReady();
|
||||||
CHECK(rc.OK()) << rc.Report();
|
SafeColl(rc);
|
||||||
|
|
||||||
Json args{Object{}};
|
Json args{Object{}};
|
||||||
args["DMLC_TRACKER_URI"] = String{host_};
|
args["DMLC_TRACKER_URI"] = String{host_};
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2023, XGBoost Contributors
|
* Copyright 2023-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <chrono> // for seconds
|
#include <chrono> // for seconds
|
||||||
@ -36,6 +36,16 @@ namespace xgboost::collective {
|
|||||||
* signal an error to the tracker and the tracker will notify other workers.
|
* signal an error to the tracker and the tracker will notify other workers.
|
||||||
*/
|
*/
|
||||||
class Tracker {
|
class Tracker {
|
||||||
|
protected:
|
||||||
|
// How to sort the workers, either by host name or by task ID. When using a multi-GPU
|
||||||
|
// setting, multiple workers can occupy the same host, in which case one should sort
|
||||||
|
// workers by task. Due to compatibility reason, the task ID is not always available, so
|
||||||
|
// we use host as the default.
|
||||||
|
enum class SortBy : std::int8_t {
|
||||||
|
kHost = 0,
|
||||||
|
kTask = 1,
|
||||||
|
} sortby_;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::int32_t n_workers_{0};
|
std::int32_t n_workers_{0};
|
||||||
std::int32_t port_{-1};
|
std::int32_t port_{-1};
|
||||||
@ -76,7 +86,7 @@ class RabitTracker : public Tracker {
|
|||||||
Result rc_;
|
Result rc_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit WorkerProxy(std::int32_t world, TCPSocket sock, SockAddrV4 addr);
|
explicit WorkerProxy(std::int32_t world, TCPSocket sock, SockAddress addr);
|
||||||
WorkerProxy(WorkerProxy const& that) = delete;
|
WorkerProxy(WorkerProxy const& that) = delete;
|
||||||
WorkerProxy(WorkerProxy&& that) = default;
|
WorkerProxy(WorkerProxy&& that) = default;
|
||||||
WorkerProxy& operator=(WorkerProxy const&) = delete;
|
WorkerProxy& operator=(WorkerProxy const&) = delete;
|
||||||
@ -96,11 +106,14 @@ class RabitTracker : public Tracker {
|
|||||||
|
|
||||||
void Send(StringView value) { this->sock_.Send(value); }
|
void Send(StringView value) { this->sock_.Send(value); }
|
||||||
};
|
};
|
||||||
// provide an ordering for workers, this helps us get deterministic topology.
|
// Provide an ordering for workers, this helps us get deterministic topology.
|
||||||
struct WorkerCmp {
|
struct WorkerCmp {
|
||||||
|
SortBy sortby;
|
||||||
|
explicit WorkerCmp(SortBy sortby) : sortby{sortby} {}
|
||||||
|
|
||||||
[[nodiscard]] bool operator()(WorkerProxy const& lhs, WorkerProxy const& rhs) {
|
[[nodiscard]] bool operator()(WorkerProxy const& lhs, WorkerProxy const& rhs) {
|
||||||
auto const& lh = lhs.Host();
|
auto const& lh = sortby == Tracker::SortBy::kHost ? lhs.Host() : lhs.TaskID();
|
||||||
auto const& rh = rhs.Host();
|
auto const& rh = sortby == Tracker::SortBy::kHost ? rhs.Host() : rhs.TaskID();
|
||||||
|
|
||||||
if (lh != rh) {
|
if (lh != rh) {
|
||||||
return lh < rh;
|
return lh < rh;
|
||||||
|
|||||||
@ -72,7 +72,7 @@ class SparseColumnIter : public Column<BinIdxT> {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
SparseColumnIter(common::Span<const BinIdxT> index, bst_bin_t least_bin_idx,
|
SparseColumnIter(common::Span<const BinIdxT> index, bst_bin_t least_bin_idx,
|
||||||
common::Span<const size_t> row_ind, bst_row_t first_row_idx)
|
common::Span<const size_t> row_ind, bst_idx_t first_row_idx)
|
||||||
: Base{index, least_bin_idx}, row_ind_(row_ind) {
|
: Base{index, least_bin_idx}, row_ind_(row_ind) {
|
||||||
// first_row_id is the first row in the leaf partition
|
// first_row_id is the first row in the leaf partition
|
||||||
const size_t* row_data = RowIndices();
|
const size_t* row_data = RowIndices();
|
||||||
@ -301,7 +301,7 @@ class ColumnMatrix {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename BinIdxType>
|
template <typename BinIdxType>
|
||||||
auto SparseColumn(bst_feature_t fidx, bst_row_t first_row_idx) const {
|
auto SparseColumn(bst_feature_t fidx, bst_idx_t first_row_idx) const {
|
||||||
const size_t feature_offset = feature_offsets_[fidx]; // to get right place for certain feature
|
const size_t feature_offset = feature_offsets_[fidx]; // to get right place for certain feature
|
||||||
const size_t column_size = feature_offsets_[fidx + 1] - feature_offset;
|
const size_t column_size = feature_offsets_[fidx + 1] - feature_offset;
|
||||||
common::Span<const BinIdxType> bin_index = {
|
common::Span<const BinIdxType> bin_index = {
|
||||||
@ -325,7 +325,7 @@ class ColumnMatrix {
|
|||||||
// all columns are dense column and has no missing value
|
// all columns are dense column and has no missing value
|
||||||
// FIXME(jiamingy): We don't need a column matrix if there's no missing value.
|
// FIXME(jiamingy): We don't need a column matrix if there's no missing value.
|
||||||
template <typename RowBinIdxT>
|
template <typename RowBinIdxT>
|
||||||
void SetIndexNoMissing(bst_row_t base_rowid, RowBinIdxT const* row_index, const size_t n_samples,
|
void SetIndexNoMissing(bst_idx_t base_rowid, RowBinIdxT const* row_index, const size_t n_samples,
|
||||||
const size_t n_features, int32_t n_threads) {
|
const size_t n_features, int32_t n_threads) {
|
||||||
missing_.GrowTo(feature_offsets_[n_features], false);
|
missing_.GrowTo(feature_offsets_[n_features], false);
|
||||||
|
|
||||||
|
|||||||
@ -21,11 +21,9 @@
|
|||||||
#include <thrust/unique.h>
|
#include <thrust/unique.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <chrono>
|
|
||||||
#include <cstddef> // for size_t
|
#include <cstddef> // for size_t
|
||||||
#include <cub/cub.cuh>
|
#include <cub/cub.cuh>
|
||||||
#include <cub/util_allocator.cuh>
|
#include <cub/util_allocator.cuh>
|
||||||
#include <numeric>
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
@ -33,7 +31,6 @@
|
|||||||
|
|
||||||
#include "../collective/communicator-inl.h"
|
#include "../collective/communicator-inl.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "xgboost/global_config.h"
|
|
||||||
#include "xgboost/host_device_vector.h"
|
#include "xgboost/host_device_vector.h"
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
#include "xgboost/span.h"
|
#include "xgboost/span.h"
|
||||||
|
|||||||
@ -34,7 +34,7 @@ HistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins
|
|||||||
HistogramCuts out;
|
HistogramCuts out;
|
||||||
auto const &info = m->Info();
|
auto const &info = m->Info();
|
||||||
auto n_threads = ctx->Threads();
|
auto n_threads = ctx->Threads();
|
||||||
std::vector<bst_row_t> reduced(info.num_col_, 0);
|
std::vector<bst_idx_t> reduced(info.num_col_, 0);
|
||||||
for (auto const &page : m->GetBatches<SparsePage>()) {
|
for (auto const &page : m->GetBatches<SparsePage>()) {
|
||||||
auto const &entries_per_column =
|
auto const &entries_per_column =
|
||||||
CalcColumnSize(data::SparsePageAdapterBatch{page.GetView()}, info.num_col_, n_threads,
|
CalcColumnSize(data::SparsePageAdapterBatch{page.GetView()}, info.num_col_, n_threads,
|
||||||
@ -209,10 +209,10 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
|
|||||||
CHECK(offsets);
|
CHECK(offsets);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto get_row_ptr = [&](bst_row_t ridx) {
|
auto get_row_ptr = [&](bst_idx_t ridx) {
|
||||||
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
|
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
|
||||||
};
|
};
|
||||||
auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
|
auto get_rid = [&](bst_idx_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
|
||||||
|
|
||||||
const size_t n_features =
|
const size_t n_features =
|
||||||
get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
|
get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
|
||||||
@ -275,10 +275,10 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair,
|
|||||||
auto const &row_ptr = gmat.row_ptr.data();
|
auto const &row_ptr = gmat.row_ptr.data();
|
||||||
auto base_rowid = gmat.base_rowid;
|
auto base_rowid = gmat.base_rowid;
|
||||||
const uint32_t *offsets = gmat.index.Offset();
|
const uint32_t *offsets = gmat.index.Offset();
|
||||||
auto get_row_ptr = [&](bst_row_t ridx) {
|
auto get_row_ptr = [&](bst_idx_t ridx) {
|
||||||
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
|
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
|
||||||
};
|
};
|
||||||
auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
|
auto get_rid = [&](bst_idx_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };
|
||||||
|
|
||||||
const size_t n_features = gmat.cut.Ptrs().size() - 1;
|
const size_t n_features = gmat.cut.Ptrs().size() - 1;
|
||||||
const size_t n_columns = n_features;
|
const size_t n_columns = n_features;
|
||||||
|
|||||||
@ -13,8 +13,6 @@
|
|||||||
#include <xgboost/logging.h>
|
#include <xgboost/logging.h>
|
||||||
|
|
||||||
#include <cstddef> // for size_t
|
#include <cstddef> // for size_t
|
||||||
#include <memory>
|
|
||||||
#include <mutex>
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -39,7 +37,7 @@ size_t RequiredSampleCutsPerColumn(int max_bins, size_t num_rows) {
|
|||||||
return std::min(num_cuts, num_rows);
|
return std::min(num_cuts, num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t RequiredSampleCuts(bst_row_t num_rows, bst_feature_t num_columns,
|
size_t RequiredSampleCuts(bst_idx_t num_rows, bst_feature_t num_columns,
|
||||||
size_t max_bins, size_t nnz) {
|
size_t max_bins, size_t nnz) {
|
||||||
auto per_column = RequiredSampleCutsPerColumn(max_bins, num_rows);
|
auto per_column = RequiredSampleCutsPerColumn(max_bins, num_rows);
|
||||||
auto if_dense = num_columns * per_column;
|
auto if_dense = num_columns * per_column;
|
||||||
@ -47,7 +45,7 @@ size_t RequiredSampleCuts(bst_row_t num_rows, bst_feature_t num_columns,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz,
|
size_t RequiredMemory(bst_idx_t num_rows, bst_feature_t num_columns, size_t nnz,
|
||||||
size_t num_bins, bool with_weights) {
|
size_t num_bins, bool with_weights) {
|
||||||
size_t peak = 0;
|
size_t peak = 0;
|
||||||
// 0. Allocate cut pointer in quantile container by increasing: n_columns + 1
|
// 0. Allocate cut pointer in quantile container by increasing: n_columns + 1
|
||||||
@ -85,7 +83,7 @@ size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz,
|
|||||||
return peak;
|
return peak;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t SketchBatchNumElements(size_t sketch_batch_num_elements, bst_row_t num_rows,
|
size_t SketchBatchNumElements(size_t sketch_batch_num_elements, bst_idx_t num_rows,
|
||||||
bst_feature_t columns, size_t nnz, int device, size_t num_cuts,
|
bst_feature_t columns, size_t nnz, int device, size_t num_cuts,
|
||||||
bool has_weight) {
|
bool has_weight) {
|
||||||
auto constexpr kIntMax = static_cast<std::size_t>(std::numeric_limits<std::int32_t>::max());
|
auto constexpr kIntMax = static_cast<std::size_t>(std::numeric_limits<std::int32_t>::max());
|
||||||
@ -123,7 +121,7 @@ void SortByWeight(dh::device_vector<float>* weights, dh::device_vector<Entry>* s
|
|||||||
[=] __device__(const Entry& a, const Entry& b) { return a.index == b.index; });
|
[=] __device__(const Entry& a, const Entry& b) { return a.index == b.index; });
|
||||||
}
|
}
|
||||||
|
|
||||||
void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_row_t> d_cuts_ptr,
|
void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_idx_t> d_cuts_ptr,
|
||||||
dh::device_vector<Entry>* p_sorted_entries,
|
dh::device_vector<Entry>* p_sorted_entries,
|
||||||
dh::device_vector<float>* p_sorted_weights,
|
dh::device_vector<float>* p_sorted_weights,
|
||||||
dh::caching_device_vector<size_t>* p_column_sizes_scan) {
|
dh::caching_device_vector<size_t>* p_column_sizes_scan) {
|
||||||
@ -210,7 +208,7 @@ void ProcessWeightedBatch(Context const* ctx, const SparsePage& page, MetaInfo c
|
|||||||
sorted_entries = dh::device_vector<Entry>(h_data.begin() + begin, h_data.begin() + end);
|
sorted_entries = dh::device_vector<Entry>(h_data.begin() + begin, h_data.begin() + end);
|
||||||
}
|
}
|
||||||
|
|
||||||
bst_row_t base_rowid = page.base_rowid;
|
bst_idx_t base_rowid = page.base_rowid;
|
||||||
|
|
||||||
dh::device_vector<float> entry_weight;
|
dh::device_vector<float> entry_weight;
|
||||||
auto cuctx = ctx->CUDACtx();
|
auto cuctx = ctx->CUDACtx();
|
||||||
|
|||||||
@ -187,7 +187,7 @@ inline size_t constexpr BytesPerElement(bool has_weight) {
|
|||||||
* directly if it's not 0.
|
* directly if it's not 0.
|
||||||
*/
|
*/
|
||||||
size_t SketchBatchNumElements(size_t sketch_batch_num_elements,
|
size_t SketchBatchNumElements(size_t sketch_batch_num_elements,
|
||||||
bst_row_t num_rows, bst_feature_t columns,
|
bst_idx_t num_rows, bst_feature_t columns,
|
||||||
size_t nnz, int device,
|
size_t nnz, int device,
|
||||||
size_t num_cuts, bool has_weight);
|
size_t num_cuts, bool has_weight);
|
||||||
|
|
||||||
@ -210,7 +210,7 @@ size_t RequiredSampleCutsPerColumn(int max_bins, size_t num_rows);
|
|||||||
*
|
*
|
||||||
* \return The estimated bytes
|
* \return The estimated bytes
|
||||||
*/
|
*/
|
||||||
size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz,
|
size_t RequiredMemory(bst_idx_t num_rows, bst_feature_t num_columns, size_t nnz,
|
||||||
size_t num_bins, bool with_weights);
|
size_t num_bins, bool with_weights);
|
||||||
|
|
||||||
// Count the valid entries in each column and copy them out.
|
// Count the valid entries in each column and copy them out.
|
||||||
@ -241,7 +241,7 @@ void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter, Ran
|
|||||||
void SortByWeight(dh::device_vector<float>* weights,
|
void SortByWeight(dh::device_vector<float>* weights,
|
||||||
dh::device_vector<Entry>* sorted_entries);
|
dh::device_vector<Entry>* sorted_entries);
|
||||||
|
|
||||||
void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_row_t> d_cuts_ptr,
|
void RemoveDuplicatedCategories(DeviceOrd device, MetaInfo const& info, Span<bst_idx_t> d_cuts_ptr,
|
||||||
dh::device_vector<Entry>* p_sorted_entries,
|
dh::device_vector<Entry>* p_sorted_entries,
|
||||||
dh::device_vector<float>* p_sorted_weights,
|
dh::device_vector<float>* p_sorted_weights,
|
||||||
dh::caching_device_vector<size_t>* p_column_sizes_scan);
|
dh::caching_device_vector<size_t>* p_column_sizes_scan);
|
||||||
|
|||||||
@ -178,7 +178,7 @@ template class HostDeviceVector<uint8_t>;
|
|||||||
template class HostDeviceVector<int8_t>;
|
template class HostDeviceVector<int8_t>;
|
||||||
template class HostDeviceVector<FeatureType>;
|
template class HostDeviceVector<FeatureType>;
|
||||||
template class HostDeviceVector<Entry>;
|
template class HostDeviceVector<Entry>;
|
||||||
template class HostDeviceVector<uint64_t>; // bst_row_t
|
template class HostDeviceVector<bst_idx_t>;
|
||||||
template class HostDeviceVector<uint32_t>; // bst_feature_t
|
template class HostDeviceVector<uint32_t>; // bst_feature_t
|
||||||
|
|
||||||
#if defined(__APPLE__) || defined(__EMSCRIPTEN__)
|
#if defined(__APPLE__) || defined(__EMSCRIPTEN__)
|
||||||
|
|||||||
@ -416,7 +416,7 @@ template class HostDeviceVector<uint8_t>;
|
|||||||
template class HostDeviceVector<int8_t>;
|
template class HostDeviceVector<int8_t>;
|
||||||
template class HostDeviceVector<FeatureType>;
|
template class HostDeviceVector<FeatureType>;
|
||||||
template class HostDeviceVector<Entry>;
|
template class HostDeviceVector<Entry>;
|
||||||
template class HostDeviceVector<uint64_t>; // bst_row_t
|
template class HostDeviceVector<bst_idx_t>;
|
||||||
template class HostDeviceVector<uint32_t>; // bst_feature_t
|
template class HostDeviceVector<uint32_t>; // bst_feature_t
|
||||||
template class HostDeviceVector<RegTree::Node>;
|
template class HostDeviceVector<RegTree::Node>;
|
||||||
template class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;
|
template class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;
|
||||||
|
|||||||
@ -14,7 +14,7 @@
|
|||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
template <typename WQSketch>
|
template <typename WQSketch>
|
||||||
SketchContainerImpl<WQSketch>::SketchContainerImpl(Context const *ctx,
|
SketchContainerImpl<WQSketch>::SketchContainerImpl(Context const *ctx,
|
||||||
std::vector<bst_row_t> columns_size,
|
std::vector<bst_idx_t> columns_size,
|
||||||
int32_t max_bins,
|
int32_t max_bins,
|
||||||
Span<FeatureType const> feature_types,
|
Span<FeatureType const> feature_types,
|
||||||
bool use_group)
|
bool use_group)
|
||||||
@ -120,8 +120,8 @@ namespace {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
struct QuantileAllreduce {
|
struct QuantileAllreduce {
|
||||||
common::Span<T> global_values;
|
common::Span<T> global_values;
|
||||||
common::Span<size_t> worker_indptr;
|
common::Span<bst_idx_t> worker_indptr;
|
||||||
common::Span<size_t> feature_indptr;
|
common::Span<bst_idx_t> feature_indptr;
|
||||||
size_t n_features{0};
|
size_t n_features{0};
|
||||||
/**
|
/**
|
||||||
* \brief Get sketch values of the a feature from a worker.
|
* \brief Get sketch values of the a feature from a worker.
|
||||||
@ -147,7 +147,7 @@ template <typename WQSketch>
|
|||||||
void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
||||||
Context const *ctx, MetaInfo const &info,
|
Context const *ctx, MetaInfo const &info,
|
||||||
std::vector<typename WQSketch::SummaryContainer> const &reduced,
|
std::vector<typename WQSketch::SummaryContainer> const &reduced,
|
||||||
std::vector<size_t> *p_worker_segments, std::vector<bst_row_t> *p_sketches_scan,
|
std::vector<bst_idx_t> *p_worker_segments, std::vector<bst_idx_t> *p_sketches_scan,
|
||||||
std::vector<typename WQSketch::Entry> *p_global_sketches) {
|
std::vector<typename WQSketch::Entry> *p_global_sketches) {
|
||||||
auto &worker_segments = *p_worker_segments;
|
auto &worker_segments = *p_worker_segments;
|
||||||
worker_segments.resize(1, 0);
|
worker_segments.resize(1, 0);
|
||||||
@ -156,7 +156,7 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
|||||||
auto n_columns = sketches_.size();
|
auto n_columns = sketches_.size();
|
||||||
|
|
||||||
// get the size of each feature.
|
// get the size of each feature.
|
||||||
std::vector<bst_row_t> sketch_size;
|
std::vector<bst_idx_t> sketch_size;
|
||||||
for (size_t i = 0; i < reduced.size(); ++i) {
|
for (size_t i = 0; i < reduced.size(); ++i) {
|
||||||
if (IsCat(feature_types_, i)) {
|
if (IsCat(feature_types_, i)) {
|
||||||
sketch_size.push_back(0);
|
sketch_size.push_back(0);
|
||||||
@ -165,7 +165,7 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// turn the size into CSC indptr
|
// turn the size into CSC indptr
|
||||||
std::vector<bst_row_t> &sketches_scan = *p_sketches_scan;
|
std::vector<bst_idx_t> &sketches_scan = *p_sketches_scan;
|
||||||
sketches_scan.resize((n_columns + 1) * world, 0);
|
sketches_scan.resize((n_columns + 1) * world, 0);
|
||||||
size_t beg_scan = rank * (n_columns + 1); // starting storage for current worker.
|
size_t beg_scan = rank * (n_columns + 1); // starting storage for current worker.
|
||||||
std::partial_sum(sketch_size.cbegin(), sketch_size.cend(), sketches_scan.begin() + beg_scan + 1);
|
std::partial_sum(sketch_size.cbegin(), sketch_size.cend(), sketches_scan.begin() + beg_scan + 1);
|
||||||
@ -226,7 +226,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const* ctx, Meta
|
|||||||
CHECK_EQ(feature_ptr.front(), 0);
|
CHECK_EQ(feature_ptr.front(), 0);
|
||||||
|
|
||||||
// gather all feature ptrs from workers
|
// gather all feature ptrs from workers
|
||||||
std::vector<size_t> global_feat_ptrs(feature_ptr.size() * world_size, 0);
|
std::vector<bst_idx_t> global_feat_ptrs(feature_ptr.size() * world_size, 0);
|
||||||
size_t feat_begin = rank * feature_ptr.size(); // pointer to current worker
|
size_t feat_begin = rank * feature_ptr.size(); // pointer to current worker
|
||||||
std::copy(feature_ptr.begin(), feature_ptr.end(), global_feat_ptrs.begin() + feat_begin);
|
std::copy(feature_ptr.begin(), feature_ptr.end(), global_feat_ptrs.begin() + feat_begin);
|
||||||
auto rc = collective::GlobalSum(
|
auto rc = collective::GlobalSum(
|
||||||
@ -241,7 +241,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(Context const* ctx, Meta
|
|||||||
}
|
}
|
||||||
|
|
||||||
// indptr for indexing workers
|
// indptr for indexing workers
|
||||||
std::vector<size_t> global_worker_ptr(world_size + 1, 0);
|
std::vector<bst_idx_t> global_worker_ptr(world_size + 1, 0);
|
||||||
global_worker_ptr[rank + 1] = total; // shift 1 to right for constructing the indptr
|
global_worker_ptr[rank + 1] = total; // shift 1 to right for constructing the indptr
|
||||||
rc = collective::GlobalSum(ctx, info,
|
rc = collective::GlobalSum(ctx, info,
|
||||||
linalg::MakeVec(global_worker_ptr.data(), global_worker_ptr.size()));
|
linalg::MakeVec(global_worker_ptr.data(), global_worker_ptr.size()));
|
||||||
@ -298,14 +298,14 @@ void SketchContainerImpl<WQSketch>::AllReduce(
|
|||||||
reduced.resize(sketches_.size());
|
reduced.resize(sketches_.size());
|
||||||
|
|
||||||
// Prune the intermediate num cuts for synchronization.
|
// Prune the intermediate num cuts for synchronization.
|
||||||
std::vector<bst_row_t> global_column_size(columns_size_);
|
std::vector<bst_idx_t> global_column_size(columns_size_);
|
||||||
auto rc = collective::GlobalSum(
|
auto rc = collective::GlobalSum(
|
||||||
ctx, info, linalg::MakeVec(global_column_size.data(), global_column_size.size()));
|
ctx, info, linalg::MakeVec(global_column_size.data(), global_column_size.size()));
|
||||||
collective::SafeColl(rc);
|
collective::SafeColl(rc);
|
||||||
|
|
||||||
ParallelFor(sketches_.size(), n_threads_, [&](size_t i) {
|
ParallelFor(sketches_.size(), n_threads_, [&](size_t i) {
|
||||||
int32_t intermediate_num_cuts = static_cast<int32_t>(
|
int32_t intermediate_num_cuts = static_cast<int32_t>(
|
||||||
std::min(global_column_size[i], static_cast<size_t>(max_bins_ * WQSketch::kFactor)));
|
std::min(global_column_size[i], static_cast<bst_idx_t>(max_bins_ * WQSketch::kFactor)));
|
||||||
if (global_column_size[i] == 0) {
|
if (global_column_size[i] == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -327,8 +327,8 @@ void SketchContainerImpl<WQSketch>::AllReduce(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<size_t> worker_segments(1, 0); // CSC pointer to sketches.
|
std::vector<bst_idx_t> worker_segments(1, 0); // CSC pointer to sketches.
|
||||||
std::vector<bst_row_t> sketches_scan((n_columns + 1) * world, 0);
|
std::vector<bst_idx_t> sketches_scan((n_columns + 1) * world, 0);
|
||||||
|
|
||||||
std::vector<typename WQSketch::Entry> global_sketches;
|
std::vector<typename WQSketch::Entry> global_sketches;
|
||||||
this->GatherSketchInfo(ctx, info, reduced, &worker_segments, &sketches_scan, &global_sketches);
|
this->GatherSketchInfo(ctx, info, reduced, &worker_segments, &sketches_scan, &global_sketches);
|
||||||
@ -452,11 +452,11 @@ template class SketchContainerImpl<WXQuantileSketch<float, float>>;
|
|||||||
|
|
||||||
HostSketchContainer::HostSketchContainer(Context const *ctx, bst_bin_t max_bins,
|
HostSketchContainer::HostSketchContainer(Context const *ctx, bst_bin_t max_bins,
|
||||||
common::Span<FeatureType const> ft,
|
common::Span<FeatureType const> ft,
|
||||||
std::vector<size_t> columns_size, bool use_group)
|
std::vector<bst_idx_t> columns_size, bool use_group)
|
||||||
: SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
|
: SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
|
||||||
monitor_.Init(__func__);
|
monitor_.Init(__func__);
|
||||||
ParallelFor(sketches_.size(), n_threads_, Sched::Auto(), [&](auto i) {
|
ParallelFor(sketches_.size(), n_threads_, Sched::Auto(), [&](auto i) {
|
||||||
auto n_bins = std::min(static_cast<size_t>(max_bins_), columns_size_[i]);
|
auto n_bins = std::min(static_cast<bst_idx_t>(max_bins_), columns_size_[i]);
|
||||||
n_bins = std::max(n_bins, static_cast<decltype(n_bins)>(1));
|
n_bins = std::max(n_bins, static_cast<decltype(n_bins)>(1));
|
||||||
auto eps = 1.0 / (static_cast<float>(n_bins) * WQSketch::kFactor);
|
auto eps = 1.0 / (static_cast<float>(n_bins) * WQSketch::kFactor);
|
||||||
if (!IsCat(this->feature_types_, i)) {
|
if (!IsCat(this->feature_types_, i)) {
|
||||||
|
|||||||
@ -115,16 +115,16 @@ void CopyTo(Span<T> out, Span<U> src) {
|
|||||||
|
|
||||||
// Compute the merge path.
|
// Compute the merge path.
|
||||||
common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
|
common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
|
||||||
Span<SketchEntry const> const &d_x, Span<bst_row_t const> const &x_ptr,
|
Span<SketchEntry const> const &d_x, Span<bst_idx_t const> const &x_ptr,
|
||||||
Span<SketchEntry const> const &d_y, Span<bst_row_t const> const &y_ptr,
|
Span<SketchEntry const> const &d_y, Span<bst_idx_t const> const &y_ptr,
|
||||||
Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
|
Span<SketchEntry> out, Span<bst_idx_t> out_ptr) {
|
||||||
auto x_merge_key_it = thrust::make_zip_iterator(thrust::make_tuple(
|
auto x_merge_key_it = thrust::make_zip_iterator(thrust::make_tuple(
|
||||||
dh::MakeTransformIterator<bst_row_t>(
|
dh::MakeTransformIterator<bst_idx_t>(
|
||||||
thrust::make_counting_iterator(0ul),
|
thrust::make_counting_iterator(0ul),
|
||||||
[=] __device__(size_t idx) { return dh::SegmentId(x_ptr, idx); }),
|
[=] __device__(size_t idx) { return dh::SegmentId(x_ptr, idx); }),
|
||||||
d_x.data()));
|
d_x.data()));
|
||||||
auto y_merge_key_it = thrust::make_zip_iterator(thrust::make_tuple(
|
auto y_merge_key_it = thrust::make_zip_iterator(thrust::make_tuple(
|
||||||
dh::MakeTransformIterator<bst_row_t>(
|
dh::MakeTransformIterator<bst_idx_t>(
|
||||||
thrust::make_counting_iterator(0ul),
|
thrust::make_counting_iterator(0ul),
|
||||||
[=] __device__(size_t idx) { return dh::SegmentId(y_ptr, idx); }),
|
[=] __device__(size_t idx) { return dh::SegmentId(y_ptr, idx); }),
|
||||||
d_y.data()));
|
d_y.data()));
|
||||||
@ -175,13 +175,13 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
|
|||||||
|
|
||||||
auto scan_key_it = dh::MakeTransformIterator<size_t>(
|
auto scan_key_it = dh::MakeTransformIterator<size_t>(
|
||||||
thrust::make_counting_iterator(0ul),
|
thrust::make_counting_iterator(0ul),
|
||||||
[=] __device__(size_t idx) { return dh::SegmentId(out_ptr, idx); });
|
[=] XGBOOST_DEVICE(size_t idx) { return dh::SegmentId(out_ptr, idx); });
|
||||||
|
|
||||||
auto scan_val_it = dh::MakeTransformIterator<Tuple>(
|
auto scan_val_it = dh::MakeTransformIterator<Tuple>(
|
||||||
merge_path.data(), [=] __device__(Tuple const &t) -> Tuple {
|
merge_path.data(), [=] XGBOOST_DEVICE(Tuple const &t) -> Tuple {
|
||||||
auto ind = get_ind(t); // == 0 if element is from x
|
auto ind = get_ind(t); // == 0 if element is from x
|
||||||
// x_counter, y_counter
|
// x_counter, y_counter
|
||||||
return thrust::make_tuple<uint64_t, uint64_t>(!ind, ind);
|
return thrust::tuple<std::uint64_t, std::uint64_t>{!ind, ind};
|
||||||
});
|
});
|
||||||
|
|
||||||
// Compute the index for both x and y (which of the element in a and b are used in each
|
// Compute the index for both x and y (which of the element in a and b are used in each
|
||||||
@ -208,8 +208,8 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
|
|||||||
// run it in 2 passes to obtain the merge path and then customize the standard merge
|
// run it in 2 passes to obtain the merge path and then customize the standard merge
|
||||||
// algorithm.
|
// algorithm.
|
||||||
void MergeImpl(DeviceOrd device, Span<SketchEntry const> const &d_x,
|
void MergeImpl(DeviceOrd device, Span<SketchEntry const> const &d_x,
|
||||||
Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
|
Span<bst_idx_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
|
||||||
Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
|
Span<bst_idx_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_idx_t> out_ptr) {
|
||||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||||
CHECK_EQ(d_x.size() + d_y.size(), out.size());
|
CHECK_EQ(d_x.size() + d_y.size(), out.size());
|
||||||
CHECK_EQ(x_ptr.size(), out_ptr.size());
|
CHECK_EQ(x_ptr.size(), out_ptr.size());
|
||||||
|
|||||||
@ -32,13 +32,13 @@ struct SketchUnique {
|
|||||||
class SketchContainer {
|
class SketchContainer {
|
||||||
public:
|
public:
|
||||||
static constexpr float kFactor = WQSketch::kFactor;
|
static constexpr float kFactor = WQSketch::kFactor;
|
||||||
using OffsetT = bst_row_t;
|
using OffsetT = bst_idx_t;
|
||||||
static_assert(sizeof(OffsetT) == sizeof(size_t), "Wrong type for sketch element offset.");
|
static_assert(sizeof(OffsetT) == sizeof(size_t), "Wrong type for sketch element offset.");
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Monitor timer_;
|
Monitor timer_;
|
||||||
HostDeviceVector<FeatureType> feature_types_;
|
HostDeviceVector<FeatureType> feature_types_;
|
||||||
bst_row_t num_rows_;
|
bst_idx_t num_rows_;
|
||||||
bst_feature_t num_columns_;
|
bst_feature_t num_columns_;
|
||||||
int32_t num_bins_;
|
int32_t num_bins_;
|
||||||
DeviceOrd device_;
|
DeviceOrd device_;
|
||||||
@ -94,7 +94,7 @@ class SketchContainer {
|
|||||||
* \param device GPU ID.
|
* \param device GPU ID.
|
||||||
*/
|
*/
|
||||||
SketchContainer(HostDeviceVector<FeatureType> const& feature_types, int32_t max_bin,
|
SketchContainer(HostDeviceVector<FeatureType> const& feature_types, int32_t max_bin,
|
||||||
bst_feature_t num_columns, bst_row_t num_rows, DeviceOrd device)
|
bst_feature_t num_columns, bst_idx_t num_rows, DeviceOrd device)
|
||||||
: num_rows_{num_rows}, num_columns_{num_columns}, num_bins_{max_bin}, device_{device} {
|
: num_rows_{num_rows}, num_columns_{num_columns}, num_bins_{max_bin}, device_{device} {
|
||||||
CHECK(device.IsCUDA());
|
CHECK(device.IsCUDA());
|
||||||
// Initialize Sketches for this dmatrix
|
// Initialize Sketches for this dmatrix
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2014-2023 by XGBoost Contributors
|
* Copyright 2014-2024, XGBoost Contributors
|
||||||
* \file quantile.h
|
* \file quantile.h
|
||||||
* \brief util to compute quantiles
|
* \brief util to compute quantiles
|
||||||
* \author Tianqi Chen
|
* \author Tianqi Chen
|
||||||
@ -701,12 +701,12 @@ inline std::vector<float> UnrollGroupWeights(MetaInfo const &info) {
|
|||||||
auto n_groups = group_ptr.size() - 1;
|
auto n_groups = group_ptr.size() - 1;
|
||||||
CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight();
|
CHECK_EQ(info.weights_.Size(), n_groups) << error::GroupWeight();
|
||||||
|
|
||||||
bst_row_t n_samples = info.num_row_;
|
bst_idx_t n_samples = info.num_row_;
|
||||||
std::vector<float> results(n_samples);
|
std::vector<float> results(n_samples);
|
||||||
CHECK_EQ(group_ptr.back(), n_samples)
|
CHECK_EQ(group_ptr.back(), n_samples)
|
||||||
<< error::GroupSize() << " the number of rows from the data.";
|
<< error::GroupSize() << " the number of rows from the data.";
|
||||||
size_t cur_group = 0;
|
size_t cur_group = 0;
|
||||||
for (bst_row_t i = 0; i < n_samples; ++i) {
|
for (bst_idx_t i = 0; i < n_samples; ++i) {
|
||||||
results[i] = group_weights[cur_group];
|
results[i] = group_weights[cur_group];
|
||||||
if (i == group_ptr[cur_group + 1]) {
|
if (i == group_ptr[cur_group + 1]) {
|
||||||
cur_group++;
|
cur_group++;
|
||||||
@ -719,9 +719,9 @@ inline std::vector<float> UnrollGroupWeights(MetaInfo const &info) {
|
|||||||
class HistogramCuts;
|
class HistogramCuts;
|
||||||
|
|
||||||
template <typename Batch, typename IsValid>
|
template <typename Batch, typename IsValid>
|
||||||
std::vector<bst_row_t> CalcColumnSize(Batch const &batch, bst_feature_t const n_columns,
|
std::vector<bst_idx_t> CalcColumnSize(Batch const &batch, bst_feature_t const n_columns,
|
||||||
size_t const n_threads, IsValid &&is_valid) {
|
size_t const n_threads, IsValid &&is_valid) {
|
||||||
std::vector<std::vector<bst_row_t>> column_sizes_tloc(n_threads);
|
std::vector<std::vector<bst_idx_t>> column_sizes_tloc(n_threads);
|
||||||
for (auto &column : column_sizes_tloc) {
|
for (auto &column : column_sizes_tloc) {
|
||||||
column.resize(n_columns, 0);
|
column.resize(n_columns, 0);
|
||||||
}
|
}
|
||||||
@ -759,7 +759,7 @@ std::vector<bst_feature_t> LoadBalance(Batch const &batch, size_t nnz, bst_featu
|
|||||||
size_t const entries_per_thread = DivRoundUp(total_entries, nthreads);
|
size_t const entries_per_thread = DivRoundUp(total_entries, nthreads);
|
||||||
|
|
||||||
// Need to calculate the size for each batch.
|
// Need to calculate the size for each batch.
|
||||||
std::vector<bst_row_t> entries_per_columns = CalcColumnSize(batch, n_columns, nthreads, is_valid);
|
std::vector<bst_idx_t> entries_per_columns = CalcColumnSize(batch, n_columns, nthreads, is_valid);
|
||||||
std::vector<bst_feature_t> cols_ptr(nthreads + 1, 0);
|
std::vector<bst_feature_t> cols_ptr(nthreads + 1, 0);
|
||||||
size_t count{0};
|
size_t count{0};
|
||||||
size_t current_thread{1};
|
size_t current_thread{1};
|
||||||
@ -791,8 +791,8 @@ class SketchContainerImpl {
|
|||||||
std::vector<std::set<float>> categories_;
|
std::vector<std::set<float>> categories_;
|
||||||
std::vector<FeatureType> const feature_types_;
|
std::vector<FeatureType> const feature_types_;
|
||||||
|
|
||||||
std::vector<bst_row_t> columns_size_;
|
std::vector<bst_idx_t> columns_size_;
|
||||||
int32_t max_bins_;
|
bst_bin_t max_bins_;
|
||||||
bool use_group_ind_{false};
|
bool use_group_ind_{false};
|
||||||
int32_t n_threads_;
|
int32_t n_threads_;
|
||||||
bool has_categorical_{false};
|
bool has_categorical_{false};
|
||||||
@ -805,7 +805,7 @@ class SketchContainerImpl {
|
|||||||
* \param max_bins maximum number of bins for each feature.
|
* \param max_bins maximum number of bins for each feature.
|
||||||
* \param use_group whether is assigned to group to data instance.
|
* \param use_group whether is assigned to group to data instance.
|
||||||
*/
|
*/
|
||||||
SketchContainerImpl(Context const *ctx, std::vector<bst_row_t> columns_size, int32_t max_bins,
|
SketchContainerImpl(Context const *ctx, std::vector<bst_idx_t> columns_size, bst_bin_t max_bins,
|
||||||
common::Span<FeatureType const> feature_types, bool use_group);
|
common::Span<FeatureType const> feature_types, bool use_group);
|
||||||
|
|
||||||
static bool UseGroup(MetaInfo const &info) {
|
static bool UseGroup(MetaInfo const &info) {
|
||||||
@ -829,8 +829,8 @@ class SketchContainerImpl {
|
|||||||
// Gather sketches from all workers.
|
// Gather sketches from all workers.
|
||||||
void GatherSketchInfo(Context const *ctx, MetaInfo const &info,
|
void GatherSketchInfo(Context const *ctx, MetaInfo const &info,
|
||||||
std::vector<typename WQSketch::SummaryContainer> const &reduced,
|
std::vector<typename WQSketch::SummaryContainer> const &reduced,
|
||||||
std::vector<bst_row_t> *p_worker_segments,
|
std::vector<bst_idx_t> *p_worker_segments,
|
||||||
std::vector<bst_row_t> *p_sketches_scan,
|
std::vector<bst_idx_t> *p_sketches_scan,
|
||||||
std::vector<typename WQSketch::Entry> *p_global_sketches);
|
std::vector<typename WQSketch::Entry> *p_global_sketches);
|
||||||
// Merge sketches from all workers.
|
// Merge sketches from all workers.
|
||||||
void AllReduce(Context const *ctx, MetaInfo const &info,
|
void AllReduce(Context const *ctx, MetaInfo const &info,
|
||||||
@ -901,7 +901,7 @@ class HostSketchContainer : public SketchContainerImpl<WQuantileSketch<float, fl
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
HostSketchContainer(Context const *ctx, bst_bin_t max_bins, common::Span<FeatureType const> ft,
|
HostSketchContainer(Context const *ctx, bst_bin_t max_bins, common::Span<FeatureType const> ft,
|
||||||
std::vector<size_t> columns_size, bool use_group);
|
std::vector<bst_idx_t> columns_size, bool use_group);
|
||||||
|
|
||||||
template <typename Batch>
|
template <typename Batch>
|
||||||
void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing);
|
void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing);
|
||||||
@ -998,7 +998,7 @@ class SortedSketchContainer : public SketchContainerImpl<WXQuantileSketch<float,
|
|||||||
public:
|
public:
|
||||||
explicit SortedSketchContainer(Context const *ctx, int32_t max_bins,
|
explicit SortedSketchContainer(Context const *ctx, int32_t max_bins,
|
||||||
common::Span<FeatureType const> ft,
|
common::Span<FeatureType const> ft,
|
||||||
std::vector<size_t> columns_size, bool use_group)
|
std::vector<bst_idx_t> columns_size, bool use_group)
|
||||||
: SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
|
: SketchContainerImpl{ctx, columns_size, max_bins, ft, use_group} {
|
||||||
monitor_.Init(__func__);
|
monitor_.Init(__func__);
|
||||||
sketches_.resize(columns_size.size());
|
sketches_.resize(columns_size.size());
|
||||||
|
|||||||
@ -73,11 +73,11 @@ constexpr size_t kAdapterUnknownSize = std::numeric_limits<size_t >::max();
|
|||||||
|
|
||||||
struct COOTuple {
|
struct COOTuple {
|
||||||
COOTuple() = default;
|
COOTuple() = default;
|
||||||
XGBOOST_DEVICE COOTuple(size_t row_idx, size_t column_idx, float value)
|
XGBOOST_DEVICE COOTuple(bst_idx_t row_idx, bst_idx_t column_idx, float value)
|
||||||
: row_idx(row_idx), column_idx(column_idx), value(value) {}
|
: row_idx(row_idx), column_idx(column_idx), value(value) {}
|
||||||
|
|
||||||
size_t row_idx{0};
|
bst_idx_t row_idx{0};
|
||||||
size_t column_idx{0};
|
bst_idx_t column_idx{0};
|
||||||
float value{0};
|
float value{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -136,12 +136,8 @@ class CSRAdapterBatch : public detail::NoMetaInfo {
|
|||||||
public:
|
public:
|
||||||
class Line {
|
class Line {
|
||||||
public:
|
public:
|
||||||
Line(size_t row_idx, size_t size, const unsigned* feature_idx,
|
Line(bst_idx_t row_idx, bst_idx_t size, const unsigned* feature_idx, const float* values)
|
||||||
const float* values)
|
: row_idx_(row_idx), size_(size), feature_idx_(feature_idx), values_(values) {}
|
||||||
: row_idx_(row_idx),
|
|
||||||
size_(size),
|
|
||||||
feature_idx_(feature_idx),
|
|
||||||
values_(values) {}
|
|
||||||
|
|
||||||
size_t Size() const { return size_; }
|
size_t Size() const { return size_; }
|
||||||
COOTuple GetElement(size_t idx) const {
|
COOTuple GetElement(size_t idx) const {
|
||||||
@ -149,8 +145,8 @@ class CSRAdapterBatch : public detail::NoMetaInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t row_idx_;
|
bst_idx_t row_idx_;
|
||||||
size_t size_;
|
bst_idx_t size_;
|
||||||
const unsigned* feature_idx_;
|
const unsigned* feature_idx_;
|
||||||
const float* values_;
|
const float* values_;
|
||||||
};
|
};
|
||||||
@ -178,29 +174,25 @@ class CSRAdapterBatch : public detail::NoMetaInfo {
|
|||||||
|
|
||||||
class CSRAdapter : public detail::SingleBatchDataIter<CSRAdapterBatch> {
|
class CSRAdapter : public detail::SingleBatchDataIter<CSRAdapterBatch> {
|
||||||
public:
|
public:
|
||||||
CSRAdapter(const size_t* row_ptr, const unsigned* feature_idx,
|
CSRAdapter(const size_t* row_ptr, const unsigned* feature_idx, const float* values,
|
||||||
const float* values, size_t num_rows, size_t num_elements,
|
bst_idx_t num_rows, bst_idx_t num_elements, size_t num_features)
|
||||||
size_t num_features)
|
: batch_(row_ptr, feature_idx, values, num_rows, num_elements, num_features),
|
||||||
: batch_(row_ptr, feature_idx, values, num_rows, num_elements,
|
|
||||||
num_features),
|
|
||||||
num_rows_(num_rows),
|
num_rows_(num_rows),
|
||||||
num_columns_(num_features) {}
|
num_columns_(num_features) {}
|
||||||
const CSRAdapterBatch& Value() const override { return batch_; }
|
const CSRAdapterBatch& Value() const override { return batch_; }
|
||||||
size_t NumRows() const { return num_rows_; }
|
bst_idx_t NumRows() const { return num_rows_; }
|
||||||
size_t NumColumns() const { return num_columns_; }
|
bst_idx_t NumColumns() const { return num_columns_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CSRAdapterBatch batch_;
|
CSRAdapterBatch batch_;
|
||||||
size_t num_rows_;
|
bst_idx_t num_rows_;
|
||||||
size_t num_columns_;
|
bst_idx_t num_columns_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class DenseAdapterBatch : public detail::NoMetaInfo {
|
class DenseAdapterBatch : public detail::NoMetaInfo {
|
||||||
public:
|
public:
|
||||||
DenseAdapterBatch(const float* values, size_t num_rows, size_t num_features)
|
DenseAdapterBatch(const float* values, bst_idx_t num_rows, bst_idx_t num_features)
|
||||||
: values_(values),
|
: values_(values), num_rows_(num_rows), num_features_(num_features) {}
|
||||||
num_rows_(num_rows),
|
|
||||||
num_features_(num_features) {}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class Line {
|
class Line {
|
||||||
@ -910,7 +902,7 @@ class SparsePageAdapterBatch {
|
|||||||
struct Line {
|
struct Line {
|
||||||
Entry const* inst;
|
Entry const* inst;
|
||||||
size_t n;
|
size_t n;
|
||||||
bst_row_t ridx;
|
bst_idx_t ridx;
|
||||||
COOTuple GetElement(size_t idx) const { return {ridx, inst[idx].index, inst[idx].fvalue}; }
|
COOTuple GetElement(size_t idx) const { return {ridx, inst[idx].index, inst[idx].fvalue}; }
|
||||||
size_t Size() const { return n; }
|
size_t Size() const { return n; }
|
||||||
};
|
};
|
||||||
|
|||||||
@ -47,7 +47,7 @@
|
|||||||
#include "simple_dmatrix.h" // for SimpleDMatrix
|
#include "simple_dmatrix.h" // for SimpleDMatrix
|
||||||
#include "sparse_page_writer.h" // for SparsePageFormatReg
|
#include "sparse_page_writer.h" // for SparsePageFormatReg
|
||||||
#include "validation.h" // for LabelsCheck, WeightsCheck, ValidateQueryGroup
|
#include "validation.h" // for LabelsCheck, WeightsCheck, ValidateQueryGroup
|
||||||
#include "xgboost/base.h" // for bst_group_t, bst_row_t, bst_float, bst_ulong
|
#include "xgboost/base.h" // for bst_group_t, bst_idx_t, bst_float, bst_ulong
|
||||||
#include "xgboost/context.h" // for Context
|
#include "xgboost/context.h" // for Context
|
||||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||||
#include "xgboost/learner.h" // for HostDeviceVector
|
#include "xgboost/learner.h" // for HostDeviceVector
|
||||||
@ -996,7 +996,7 @@ template DMatrix* DMatrix::Create(
|
|||||||
|
|
||||||
SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const {
|
SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const {
|
||||||
SparsePage transpose;
|
SparsePage transpose;
|
||||||
common::ParallelGroupBuilder<Entry, bst_row_t> builder(&transpose.offset.HostVector(),
|
common::ParallelGroupBuilder<Entry, bst_idx_t> builder(&transpose.offset.HostVector(),
|
||||||
&transpose.data.HostVector());
|
&transpose.data.HostVector());
|
||||||
builder.InitBudget(num_columns, n_threads);
|
builder.InitBudget(num_columns, n_threads);
|
||||||
long batch_size = static_cast<long>(this->Size()); // NOLINT(*)
|
long batch_size = static_cast<long>(this->Size()); // NOLINT(*)
|
||||||
@ -1192,7 +1192,7 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread
|
|||||||
|
|
||||||
void SparsePage::PushCSC(const SparsePage &batch) {
|
void SparsePage::PushCSC(const SparsePage &batch) {
|
||||||
std::vector<xgboost::Entry>& self_data = data.HostVector();
|
std::vector<xgboost::Entry>& self_data = data.HostVector();
|
||||||
std::vector<bst_row_t>& self_offset = offset.HostVector();
|
std::vector<bst_idx_t>& self_offset = offset.HostVector();
|
||||||
|
|
||||||
auto const& other_data = batch.data.ConstHostVector();
|
auto const& other_data = batch.data.ConstHostVector();
|
||||||
auto const& other_offset = batch.offset.ConstHostVector();
|
auto const& other_offset = batch.offset.ConstHostVector();
|
||||||
@ -1211,7 +1211,7 @@ void SparsePage::PushCSC(const SparsePage &batch) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<bst_row_t> offset(other_offset.size());
|
std::vector<bst_idx_t> offset(other_offset.size());
|
||||||
offset[0] = 0;
|
offset[0] = 0;
|
||||||
|
|
||||||
std::vector<xgboost::Entry> data(self_data.size() + other_data.size());
|
std::vector<xgboost::Entry> data(self_data.size() + other_data.size());
|
||||||
|
|||||||
@ -39,7 +39,7 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
|
|||||||
return {row_idx, column_idx, value};
|
return {row_idx, column_idx, value};
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] __device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
|
[[nodiscard]] __device__ float GetElement(bst_idx_t ridx, bst_feature_t fidx) const {
|
||||||
auto const& column = columns_[fidx];
|
auto const& column = columns_[fidx];
|
||||||
float value = column.valid.Data() == nullptr || column.valid.Check(ridx)
|
float value = column.valid.Data() == nullptr || column.valid.Check(ridx)
|
||||||
? column(ridx)
|
? column(ridx)
|
||||||
@ -47,8 +47,8 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] XGBOOST_DEVICE bst_row_t NumRows() const { return num_rows_; }
|
[[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return num_rows_; }
|
||||||
[[nodiscard]] XGBOOST_DEVICE bst_row_t NumCols() const { return columns_.size(); }
|
[[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return columns_.size(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
common::Span<ArrayInterface<1>> columns_;
|
common::Span<ArrayInterface<1>> columns_;
|
||||||
@ -168,13 +168,13 @@ class CupyAdapterBatch : public detail::NoMetaInfo {
|
|||||||
float value = array_interface_(row_idx, column_idx);
|
float value = array_interface_(row_idx, column_idx);
|
||||||
return {row_idx, column_idx, value};
|
return {row_idx, column_idx, value};
|
||||||
}
|
}
|
||||||
[[nodiscard]] __device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
|
[[nodiscard]] __device__ float GetElement(bst_idx_t ridx, bst_feature_t fidx) const {
|
||||||
float value = array_interface_(ridx, fidx);
|
float value = array_interface_(ridx, fidx);
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] XGBOOST_DEVICE bst_row_t NumRows() const { return array_interface_.Shape(0); }
|
[[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return array_interface_.Shape(0); }
|
||||||
[[nodiscard]] XGBOOST_DEVICE bst_row_t NumCols() const { return array_interface_.Shape(1); }
|
[[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return array_interface_.Shape(1); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ArrayInterface<2> array_interface_;
|
ArrayInterface<2> array_interface_;
|
||||||
@ -208,8 +208,8 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
|
|||||||
|
|
||||||
// Returns maximum row length
|
// Returns maximum row length
|
||||||
template <typename AdapterBatchT>
|
template <typename AdapterBatchT>
|
||||||
std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offset, DeviceOrd device,
|
bst_idx_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_idx_t> offset, DeviceOrd device,
|
||||||
float missing) {
|
float missing) {
|
||||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||||
IsValidFunctor is_valid(missing);
|
IsValidFunctor is_valid(missing);
|
||||||
dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes()));
|
dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes()));
|
||||||
@ -231,7 +231,7 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
|
|||||||
|
|
||||||
// Count elements per row
|
// Count elements per row
|
||||||
dh::LaunchN(n_samples * stride, [=] __device__(std::size_t idx) {
|
dh::LaunchN(n_samples * stride, [=] __device__(std::size_t idx) {
|
||||||
bst_row_t cnt{0};
|
bst_idx_t cnt{0};
|
||||||
auto [ridx, fbeg] = linalg::UnravelIndex(idx, n_samples, stride);
|
auto [ridx, fbeg] = linalg::UnravelIndex(idx, n_samples, stride);
|
||||||
SPAN_CHECK(ridx < n_samples);
|
SPAN_CHECK(ridx < n_samples);
|
||||||
for (bst_feature_t fidx = fbeg; fidx < n_features; fidx += stride) {
|
for (bst_feature_t fidx = fbeg; fidx < n_features; fidx += stride) {
|
||||||
@ -246,10 +246,10 @@ std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offs
|
|||||||
});
|
});
|
||||||
|
|
||||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||||
bst_row_t row_stride =
|
bst_idx_t row_stride =
|
||||||
dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
|
dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
|
||||||
thrust::device_pointer_cast(offset.data()) + offset.size(),
|
thrust::device_pointer_cast(offset.data()) + offset.size(),
|
||||||
static_cast<bst_row_t>(0), thrust::maximum<bst_row_t>());
|
static_cast<bst_idx_t>(0), thrust::maximum<bst_idx_t>());
|
||||||
return row_stride;
|
return row_stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -175,11 +175,10 @@ struct WriteCompressedEllpackFunctor {
|
|||||||
|
|
||||||
using Tuple = thrust::tuple<size_t, size_t, size_t>;
|
using Tuple = thrust::tuple<size_t, size_t, size_t>;
|
||||||
__device__ size_t operator()(Tuple out) {
|
__device__ size_t operator()(Tuple out) {
|
||||||
auto e = batch.GetElement(out.get<2>());
|
auto e = batch.GetElement(thrust::get<2>(out));
|
||||||
if (is_valid(e)) {
|
if (is_valid(e)) {
|
||||||
// -1 because the scan is inclusive
|
// -1 because the scan is inclusive
|
||||||
size_t output_position =
|
size_t output_position = accessor.row_stride * e.row_idx + thrust::get<1>(out) - 1;
|
||||||
accessor.row_stride * e.row_idx + out.get<1>() - 1;
|
|
||||||
uint32_t bin_idx = 0;
|
uint32_t bin_idx = 0;
|
||||||
if (common::IsCat(feature_types, e.column_idx)) {
|
if (common::IsCat(feature_types, e.column_idx)) {
|
||||||
bin_idx = accessor.SearchBin<true>(e.value, e.column_idx);
|
bin_idx = accessor.SearchBin<true>(e.value, e.column_idx);
|
||||||
@ -196,8 +195,8 @@ template <typename Tuple>
|
|||||||
struct TupleScanOp {
|
struct TupleScanOp {
|
||||||
__device__ Tuple operator()(Tuple a, Tuple b) {
|
__device__ Tuple operator()(Tuple a, Tuple b) {
|
||||||
// Key equal
|
// Key equal
|
||||||
if (a.template get<0>() == b.template get<0>()) {
|
if (thrust::get<0>(a) == thrust::get<0>(b)) {
|
||||||
b.template get<1>() += a.template get<1>();
|
thrust::get<1>(b) += thrust::get<1>(a);
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
// Not equal
|
// Not equal
|
||||||
|
|||||||
@ -193,7 +193,7 @@ float GHistIndexMatrix::GetFvalue(size_t ridx, size_t fidx, bool is_cat) const {
|
|||||||
|
|
||||||
float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
|
float GHistIndexMatrix::GetFvalue(std::vector<std::uint32_t> const &ptrs,
|
||||||
std::vector<float> const &values, std::vector<float> const &mins,
|
std::vector<float> const &values, std::vector<float> const &mins,
|
||||||
bst_row_t ridx, bst_feature_t fidx, bool is_cat) const {
|
bst_idx_t ridx, bst_feature_t fidx, bool is_cat) const {
|
||||||
if (is_cat) {
|
if (is_cat) {
|
||||||
auto gidx = GetGindex(ridx, fidx);
|
auto gidx = GetGindex(ridx, fidx);
|
||||||
if (gidx == -1) {
|
if (gidx == -1) {
|
||||||
|
|||||||
@ -149,7 +149,7 @@ class GHistIndexMatrix {
|
|||||||
/** @brief max_bin for each feature. */
|
/** @brief max_bin for each feature. */
|
||||||
bst_bin_t max_numeric_bins_per_feat;
|
bst_bin_t max_numeric_bins_per_feat;
|
||||||
/** @brief base row index for current page (used by external memory) */
|
/** @brief base row index for current page (used by external memory) */
|
||||||
bst_row_t base_rowid{0};
|
bst_idx_t base_rowid{0};
|
||||||
|
|
||||||
[[nodiscard]] bst_bin_t MaxNumBinPerFeat() const {
|
[[nodiscard]] bst_bin_t MaxNumBinPerFeat() const {
|
||||||
return std::max(static_cast<bst_bin_t>(cut.MaxCategory() + 1), max_numeric_bins_per_feat);
|
return std::max(static_cast<bst_bin_t>(cut.MaxCategory() + 1), max_numeric_bins_per_feat);
|
||||||
@ -230,7 +230,7 @@ class GHistIndexMatrix {
|
|||||||
*/
|
*/
|
||||||
[[nodiscard]] std::size_t RowIdx(size_t ridx) const { return row_ptr[ridx - base_rowid]; }
|
[[nodiscard]] std::size_t RowIdx(size_t ridx) const { return row_ptr[ridx - base_rowid]; }
|
||||||
|
|
||||||
[[nodiscard]] bst_row_t Size() const { return row_ptr.empty() ? 0 : row_ptr.size() - 1; }
|
[[nodiscard]] bst_idx_t Size() const { return row_ptr.empty() ? 0 : row_ptr.size() - 1; }
|
||||||
[[nodiscard]] bst_feature_t Features() const { return cut.Ptrs().size() - 1; }
|
[[nodiscard]] bst_feature_t Features() const { return cut.Ptrs().size() - 1; }
|
||||||
|
|
||||||
[[nodiscard]] bool ReadColumnPage(common::AlignedResourceReadStream* fi);
|
[[nodiscard]] bool ReadColumnPage(common::AlignedResourceReadStream* fi);
|
||||||
@ -243,7 +243,7 @@ class GHistIndexMatrix {
|
|||||||
[[nodiscard]] float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
|
[[nodiscard]] float GetFvalue(size_t ridx, size_t fidx, bool is_cat) const;
|
||||||
[[nodiscard]] float GetFvalue(std::vector<std::uint32_t> const& ptrs,
|
[[nodiscard]] float GetFvalue(std::vector<std::uint32_t> const& ptrs,
|
||||||
std::vector<float> const& values, std::vector<float> const& mins,
|
std::vector<float> const& values, std::vector<float> const& mins,
|
||||||
bst_row_t ridx, bst_feature_t fidx, bool is_cat) const;
|
bst_idx_t ridx, bst_feature_t fidx, bool is_cat) const;
|
||||||
|
|
||||||
[[nodiscard]] common::HistogramCuts& Cuts() { return cut; }
|
[[nodiscard]] common::HistogramCuts& Cuts() { return cut; }
|
||||||
[[nodiscard]] common::HistogramCuts const& Cuts() const { return cut; }
|
[[nodiscard]] common::HistogramCuts const& Cuts() const { return cut; }
|
||||||
|
|||||||
@ -132,7 +132,7 @@ void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p,
|
|||||||
return HostAdapterDispatch(proxy, [](auto const& value) { return value.NumCols(); });
|
return HostAdapterDispatch(proxy, [](auto const& value) { return value.NumCols(); });
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<std::size_t> column_sizes;
|
std::vector<bst_idx_t> column_sizes;
|
||||||
auto const is_valid = data::IsValidFunctor{missing};
|
auto const is_valid = data::IsValidFunctor{missing};
|
||||||
auto nnz_cnt = [&]() {
|
auto nnz_cnt = [&]() {
|
||||||
return HostAdapterDispatch(proxy, [&](auto const& value) {
|
return HostAdapterDispatch(proxy, [&](auto const& value) {
|
||||||
|
|||||||
@ -59,7 +59,7 @@ DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
|
|||||||
auto& h_data = out_page.data.HostVector();
|
auto& h_data = out_page.data.HostVector();
|
||||||
auto& h_offset = out_page.offset.HostVector();
|
auto& h_offset = out_page.offset.HostVector();
|
||||||
size_t rptr{0};
|
size_t rptr{0};
|
||||||
for (bst_row_t i = 0; i < this->Info().num_row_; i++) {
|
for (bst_idx_t i = 0; i < this->Info().num_row_; i++) {
|
||||||
auto inst = batch[i];
|
auto inst = batch[i];
|
||||||
auto prev_size = h_data.size();
|
auto prev_size = h_data.size();
|
||||||
std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data),
|
std::copy_if(inst.begin(), inst.end(), std::back_inserter(h_data),
|
||||||
|
|||||||
@ -54,7 +54,7 @@ void CopyDataToDMatrix(AdapterBatchT batch, common::Span<Entry> data,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename AdapterBatchT>
|
template <typename AdapterBatchT>
|
||||||
void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset, DeviceOrd device,
|
void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_idx_t> offset, DeviceOrd device,
|
||||||
float missing) {
|
float missing) {
|
||||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||||
IsValidFunctor is_valid(missing);
|
IsValidFunctor is_valid(missing);
|
||||||
|
|||||||
@ -18,7 +18,6 @@
|
|||||||
#include <cstdint> // for int32_t, uint32_t, int64_t, uint64_t
|
#include <cstdint> // for int32_t, uint32_t, int64_t, uint64_t
|
||||||
#include <cstdlib> // for atoi
|
#include <cstdlib> // for atoi
|
||||||
#include <cstring> // for memcpy, size_t, memset
|
#include <cstring> // for memcpy, size_t, memset
|
||||||
#include <functional> // for less
|
|
||||||
#include <iomanip> // for operator<<, setiosflags
|
#include <iomanip> // for operator<<, setiosflags
|
||||||
#include <iterator> // for back_insert_iterator, distance, back_inserter
|
#include <iterator> // for back_insert_iterator, distance, back_inserter
|
||||||
#include <limits> // for numeric_limits
|
#include <limits> // for numeric_limits
|
||||||
|
|||||||
@ -184,7 +184,7 @@ void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
|
|||||||
static std::size_t constexpr kUnroll = 8;
|
static std::size_t constexpr kUnroll = 8;
|
||||||
|
|
||||||
struct SparsePageView {
|
struct SparsePageView {
|
||||||
bst_row_t base_rowid;
|
bst_idx_t base_rowid;
|
||||||
HostSparsePageView view;
|
HostSparsePageView view;
|
||||||
|
|
||||||
explicit SparsePageView(SparsePage const *p) : base_rowid{p->base_rowid} { view = p->GetView(); }
|
explicit SparsePageView(SparsePage const *p) : base_rowid{p->base_rowid} { view = p->GetView(); }
|
||||||
@ -193,7 +193,7 @@ struct SparsePageView {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct SingleInstanceView {
|
struct SingleInstanceView {
|
||||||
bst_row_t base_rowid{};
|
bst_idx_t base_rowid{};
|
||||||
SparsePage::Inst const &inst;
|
SparsePage::Inst const &inst;
|
||||||
|
|
||||||
explicit SingleInstanceView(SparsePage::Inst const &instance) : inst{instance} {}
|
explicit SingleInstanceView(SparsePage::Inst const &instance) : inst{instance} {}
|
||||||
@ -214,7 +214,7 @@ struct GHistIndexMatrixView {
|
|||||||
std::vector<float> const& values_;
|
std::vector<float> const& values_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
size_t base_rowid;
|
bst_idx_t base_rowid;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GHistIndexMatrixView(GHistIndexMatrix const &_page, uint64_t n_feat,
|
GHistIndexMatrixView(GHistIndexMatrix const &_page, uint64_t n_feat,
|
||||||
@ -292,7 +292,7 @@ class AdapterView {
|
|||||||
|
|
||||||
[[nodiscard]] size_t Size() const { return adapter_->NumRows(); }
|
[[nodiscard]] size_t Size() const { return adapter_->NumRows(); }
|
||||||
|
|
||||||
bst_row_t const static base_rowid = 0; // NOLINT
|
bst_idx_t const static base_rowid = 0; // NOLINT
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename DataView, size_t block_of_rows_size>
|
template <typename DataView, size_t block_of_rows_size>
|
||||||
|
|||||||
@ -67,12 +67,12 @@ struct TreeView {
|
|||||||
|
|
||||||
struct SparsePageView {
|
struct SparsePageView {
|
||||||
common::Span<const Entry> d_data;
|
common::Span<const Entry> d_data;
|
||||||
common::Span<const bst_row_t> d_row_ptr;
|
common::Span<const bst_idx_t> d_row_ptr;
|
||||||
bst_feature_t num_features;
|
bst_feature_t num_features;
|
||||||
|
|
||||||
SparsePageView() = default;
|
SparsePageView() = default;
|
||||||
XGBOOST_DEVICE SparsePageView(common::Span<const Entry> data,
|
XGBOOST_DEVICE SparsePageView(common::Span<const Entry> data,
|
||||||
common::Span<const bst_row_t> row_ptr,
|
common::Span<const bst_idx_t> row_ptr,
|
||||||
bst_feature_t num_features)
|
bst_feature_t num_features)
|
||||||
: d_data{data}, d_row_ptr{row_ptr}, num_features(num_features) {}
|
: d_data{data}, d_row_ptr{row_ptr}, num_features(num_features) {}
|
||||||
[[nodiscard]] __device__ float GetElement(size_t ridx, size_t fidx) const {
|
[[nodiscard]] __device__ float GetElement(size_t ridx, size_t fidx) const {
|
||||||
@ -113,7 +113,7 @@ struct SparsePageLoader {
|
|||||||
float* smem;
|
float* smem;
|
||||||
|
|
||||||
__device__ SparsePageLoader(SparsePageView data, bool use_shared, bst_feature_t num_features,
|
__device__ SparsePageLoader(SparsePageView data, bool use_shared, bst_feature_t num_features,
|
||||||
bst_row_t num_rows, size_t entry_start, float)
|
bst_idx_t num_rows, size_t entry_start, float)
|
||||||
: use_shared(use_shared),
|
: use_shared(use_shared),
|
||||||
data(data) {
|
data(data) {
|
||||||
extern __shared__ float _smem[];
|
extern __shared__ float _smem[];
|
||||||
@ -146,7 +146,7 @@ struct SparsePageLoader {
|
|||||||
|
|
||||||
struct EllpackLoader {
|
struct EllpackLoader {
|
||||||
EllpackDeviceAccessor const& matrix;
|
EllpackDeviceAccessor const& matrix;
|
||||||
XGBOOST_DEVICE EllpackLoader(EllpackDeviceAccessor const& m, bool, bst_feature_t, bst_row_t,
|
XGBOOST_DEVICE EllpackLoader(EllpackDeviceAccessor const& m, bool, bst_feature_t, bst_idx_t,
|
||||||
size_t, float)
|
size_t, float)
|
||||||
: matrix{m} {}
|
: matrix{m} {}
|
||||||
[[nodiscard]] __device__ __forceinline__ float GetElement(size_t ridx, size_t fidx) const {
|
[[nodiscard]] __device__ __forceinline__ float GetElement(size_t ridx, size_t fidx) const {
|
||||||
@ -177,7 +177,7 @@ struct DeviceAdapterLoader {
|
|||||||
using BatchT = Batch;
|
using BatchT = Batch;
|
||||||
|
|
||||||
XGBOOST_DEV_INLINE DeviceAdapterLoader(Batch const batch, bool use_shared,
|
XGBOOST_DEV_INLINE DeviceAdapterLoader(Batch const batch, bool use_shared,
|
||||||
bst_feature_t num_features, bst_row_t num_rows,
|
bst_feature_t num_features, bst_idx_t num_rows,
|
||||||
size_t entry_start, float missing)
|
size_t entry_start, float missing)
|
||||||
: batch{batch}, columns{num_features}, use_shared{use_shared}, is_valid{missing} {
|
: batch{batch}, columns{num_features}, use_shared{use_shared}, is_valid{missing} {
|
||||||
extern __shared__ float _smem[];
|
extern __shared__ float _smem[];
|
||||||
@ -215,7 +215,7 @@ struct DeviceAdapterLoader {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <bool has_missing, bool has_categorical, typename Loader>
|
template <bool has_missing, bool has_categorical, typename Loader>
|
||||||
__device__ bst_node_t GetLeafIndex(bst_row_t ridx, TreeView const &tree,
|
__device__ bst_node_t GetLeafIndex(bst_idx_t ridx, TreeView const &tree,
|
||||||
Loader *loader) {
|
Loader *loader) {
|
||||||
bst_node_t nidx = 0;
|
bst_node_t nidx = 0;
|
||||||
RegTree::Node n = tree.d_tree[nidx];
|
RegTree::Node n = tree.d_tree[nidx];
|
||||||
@ -230,7 +230,7 @@ __device__ bst_node_t GetLeafIndex(bst_row_t ridx, TreeView const &tree,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool has_missing, typename Loader>
|
template <bool has_missing, typename Loader>
|
||||||
__device__ float GetLeafWeight(bst_row_t ridx, TreeView const &tree,
|
__device__ float GetLeafWeight(bst_idx_t ridx, TreeView const &tree,
|
||||||
Loader *loader) {
|
Loader *loader) {
|
||||||
bst_node_t nidx = -1;
|
bst_node_t nidx = -1;
|
||||||
if (tree.HasCategoricalSplit()) {
|
if (tree.HasCategoricalSplit()) {
|
||||||
@ -255,7 +255,7 @@ PredictLeafKernel(Data data, common::Span<const RegTree::Node> d_nodes,
|
|||||||
size_t tree_begin, size_t tree_end, size_t num_features,
|
size_t tree_begin, size_t tree_end, size_t num_features,
|
||||||
size_t num_rows, size_t entry_start, bool use_shared,
|
size_t num_rows, size_t entry_start, bool use_shared,
|
||||||
float missing) {
|
float missing) {
|
||||||
bst_row_t ridx = blockDim.x * blockIdx.x + threadIdx.x;
|
bst_idx_t ridx = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
if (ridx >= num_rows) {
|
if (ridx >= num_rows) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -670,7 +670,7 @@ __global__ void MaskBitVectorKernel(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ bst_node_t GetLeafIndexByBitVector(bst_row_t ridx, TreeView const& tree,
|
__device__ bst_node_t GetLeafIndexByBitVector(bst_idx_t ridx, TreeView const& tree,
|
||||||
BitVector const& decision_bits,
|
BitVector const& decision_bits,
|
||||||
BitVector const& missing_bits, std::size_t num_nodes,
|
BitVector const& missing_bits, std::size_t num_nodes,
|
||||||
std::size_t tree_offset) {
|
std::size_t tree_offset) {
|
||||||
@ -688,7 +688,7 @@ __device__ bst_node_t GetLeafIndexByBitVector(bst_row_t ridx, TreeView const& tr
|
|||||||
return nidx;
|
return nidx;
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ float GetLeafWeightByBitVector(bst_row_t ridx, TreeView const& tree,
|
__device__ float GetLeafWeightByBitVector(bst_idx_t ridx, TreeView const& tree,
|
||||||
BitVector const& decision_bits,
|
BitVector const& decision_bits,
|
||||||
BitVector const& missing_bits, std::size_t num_nodes,
|
BitVector const& missing_bits, std::size_t num_nodes,
|
||||||
std::size_t tree_offset) {
|
std::size_t tree_offset) {
|
||||||
@ -1177,7 +1177,7 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
auto max_shared_memory_bytes = ConfigureDevice(ctx_->Device());
|
auto max_shared_memory_bytes = ConfigureDevice(ctx_->Device());
|
||||||
|
|
||||||
const MetaInfo& info = p_fmat->Info();
|
const MetaInfo& info = p_fmat->Info();
|
||||||
bst_row_t num_rows = info.num_row_;
|
bst_idx_t num_rows = info.num_row_;
|
||||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||||
}
|
}
|
||||||
@ -1202,7 +1202,7 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
|
for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||||
batch.data.SetDevice(ctx_->Device());
|
batch.data.SetDevice(ctx_->Device());
|
||||||
batch.offset.SetDevice(ctx_->Device());
|
batch.offset.SetDevice(ctx_->Device());
|
||||||
bst_row_t batch_offset = 0;
|
bst_idx_t batch_offset = 0;
|
||||||
SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
|
SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
|
||||||
model.learner_model_param->num_feature};
|
model.learner_model_param->num_feature};
|
||||||
size_t num_rows = batch.Size();
|
size_t num_rows = batch.Size();
|
||||||
@ -1225,7 +1225,7 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (auto const& batch : p_fmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
|
for (auto const& batch : p_fmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
|
||||||
bst_row_t batch_offset = 0;
|
bst_idx_t batch_offset = 0;
|
||||||
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->Device())};
|
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->Device())};
|
||||||
size_t num_rows = batch.Size();
|
size_t num_rows = batch.Size();
|
||||||
auto grid =
|
auto grid =
|
||||||
|
|||||||
@ -9,7 +9,7 @@
|
|||||||
#include <string> // for string, to_string
|
#include <string> // for string, to_string
|
||||||
|
|
||||||
#include "../gbm/gbtree_model.h" // for GBTreeModel
|
#include "../gbm/gbtree_model.h" // for GBTreeModel
|
||||||
#include "xgboost/base.h" // for bst_float, Args, bst_group_t, bst_row_t
|
#include "xgboost/base.h" // for bst_float, Args, bst_group_t, bst_idx_t
|
||||||
#include "xgboost/context.h" // for Context
|
#include "xgboost/context.h" // for Context
|
||||||
#include "xgboost/data.h" // for MetaInfo
|
#include "xgboost/data.h" // for MetaInfo
|
||||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||||
@ -34,7 +34,7 @@ Predictor* Predictor::Create(std::string const& name, Context const* ctx) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int32_t D>
|
template <int32_t D>
|
||||||
void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_row_t n_samples,
|
void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_idx_t n_samples,
|
||||||
bst_group_t n_groups) {
|
bst_group_t n_groups) {
|
||||||
// FIXME: Bindings other than Python doesn't have shape.
|
// FIXME: Bindings other than Python doesn't have shape.
|
||||||
std::string expected{"Invalid shape of base_margin. Expected: (" + std::to_string(n_samples) +
|
std::string expected{"Invalid shape of base_margin. Expected: (" + std::to_string(n_samples) +
|
||||||
|
|||||||
@ -28,7 +28,7 @@ class ColumnSplitHelper {
|
|||||||
public:
|
public:
|
||||||
ColumnSplitHelper() = default;
|
ColumnSplitHelper() = default;
|
||||||
|
|
||||||
ColumnSplitHelper(bst_row_t num_row,
|
ColumnSplitHelper(bst_idx_t num_row,
|
||||||
common::PartitionBuilder<kPartitionBlockSize>* partition_builder,
|
common::PartitionBuilder<kPartitionBlockSize>* partition_builder,
|
||||||
common::RowSetCollection* row_set_collection)
|
common::RowSetCollection* row_set_collection)
|
||||||
: partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {
|
: partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {
|
||||||
@ -85,10 +85,10 @@ class ColumnSplitHelper {
|
|||||||
|
|
||||||
class CommonRowPartitioner {
|
class CommonRowPartitioner {
|
||||||
public:
|
public:
|
||||||
bst_row_t base_rowid = 0;
|
bst_idx_t base_rowid = 0;
|
||||||
|
|
||||||
CommonRowPartitioner() = default;
|
CommonRowPartitioner() = default;
|
||||||
CommonRowPartitioner(Context const* ctx, bst_row_t num_row, bst_row_t _base_rowid,
|
CommonRowPartitioner(Context const* ctx, bst_idx_t num_row, bst_idx_t _base_rowid,
|
||||||
bool is_col_split)
|
bool is_col_split)
|
||||||
: base_rowid{_base_rowid}, is_col_split_{is_col_split} {
|
: base_rowid{_base_rowid}, is_col_split_{is_col_split} {
|
||||||
row_set_collection_.Clear();
|
row_set_collection_.Clear();
|
||||||
|
|||||||
@ -277,7 +277,7 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c
|
|||||||
common::Span<GradientPair> gpair,
|
common::Span<GradientPair> gpair,
|
||||||
DMatrix* dmat) {
|
DMatrix* dmat) {
|
||||||
auto cuctx = ctx->CUDACtx();
|
auto cuctx = ctx->CUDACtx();
|
||||||
bst_row_t n_rows = dmat->Info().num_row_;
|
bst_idx_t n_rows = dmat->Info().num_row_;
|
||||||
size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex(
|
size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex(
|
||||||
gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_);
|
gpair, dh::ToSpan(threshold_), dh::ToSpan(grad_sum_), n_rows * subsample_);
|
||||||
|
|
||||||
|
|||||||
@ -54,7 +54,7 @@ inline void SampleGradient(Context const* ctx, TrainParam param,
|
|||||||
if (param.subsample >= 1.0) {
|
if (param.subsample >= 1.0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
bst_row_t n_samples = out.Shape(0);
|
bst_idx_t n_samples = out.Shape(0);
|
||||||
auto& rnd = common::GlobalRandom();
|
auto& rnd = common::GlobalRandom();
|
||||||
|
|
||||||
#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
|
#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
|
||||||
|
|||||||
@ -192,7 +192,7 @@ struct GPUHistMakerDevice {
|
|||||||
std::unique_ptr<FeatureGroups> feature_groups;
|
std::unique_ptr<FeatureGroups> feature_groups;
|
||||||
|
|
||||||
GPUHistMakerDevice(Context const* ctx, bool is_external_memory,
|
GPUHistMakerDevice(Context const* ctx, bool is_external_memory,
|
||||||
common::Span<FeatureType const> _feature_types, bst_row_t _n_rows,
|
common::Span<FeatureType const> _feature_types, bst_idx_t _n_rows,
|
||||||
TrainParam _param, std::shared_ptr<common::ColumnSampler> column_sampler,
|
TrainParam _param, std::shared_ptr<common::ColumnSampler> column_sampler,
|
||||||
uint32_t n_features, BatchParam batch_param, MetaInfo const& info)
|
uint32_t n_features, BatchParam batch_param, MetaInfo const& info)
|
||||||
: evaluator_{_param, n_features, ctx->Device()},
|
: evaluator_{_param, n_features, ctx->Device()},
|
||||||
|
|||||||
@ -25,7 +25,7 @@ RUN \
|
|||||||
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||||
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
||||||
nccl>=$(cut -d "-" -f 1 << $NCCL_VERSION_ARG) \
|
nccl>=$(cut -d "-" -f 1 << $NCCL_VERSION_ARG) \
|
||||||
dask \
|
dask=2024.1.1 \
|
||||||
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||||
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||||
pyspark>=3.4.0 cloudpickle cuda-python && \
|
pyspark>=3.4.0 cloudpickle cuda-python && \
|
||||||
|
|||||||
@ -439,7 +439,7 @@ void MakeLabelForTest(std::shared_ptr<DMatrix> Xy, DMatrixHandle cxy) {
|
|||||||
XGDMatrixSetInfoFromInterface(cxy, "label", s_y_int.c_str());
|
XGDMatrixSetInfoFromInterface(cxy, "label", s_y_int.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto MakeSimpleDMatrixForTest(bst_row_t n_samples, bst_feature_t n_features, Json dconfig) {
|
auto MakeSimpleDMatrixForTest(bst_idx_t n_samples, bst_feature_t n_features, Json dconfig) {
|
||||||
HostDeviceVector<float> storage;
|
HostDeviceVector<float> storage;
|
||||||
auto arr_int = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateArrayInterface(&storage);
|
auto arr_int = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateArrayInterface(&storage);
|
||||||
|
|
||||||
@ -456,7 +456,7 @@ auto MakeSimpleDMatrixForTest(bst_row_t n_samples, bst_feature_t n_features, Jso
|
|||||||
return std::pair{p_fmat, Xy};
|
return std::pair{p_fmat, Xy};
|
||||||
}
|
}
|
||||||
|
|
||||||
auto MakeQDMForTest(Context const *ctx, bst_row_t n_samples, bst_feature_t n_features,
|
auto MakeQDMForTest(Context const *ctx, bst_idx_t n_samples, bst_feature_t n_features,
|
||||||
Json dconfig) {
|
Json dconfig) {
|
||||||
bst_bin_t n_bins{16};
|
bst_bin_t n_bins{16};
|
||||||
dconfig["max_bin"] = Integer{n_bins};
|
dconfig["max_bin"] = Integer{n_bins};
|
||||||
@ -488,7 +488,7 @@ auto MakeQDMForTest(Context const *ctx, bst_row_t n_samples, bst_feature_t n_fea
|
|||||||
return std::pair{p_fmat, Xy};
|
return std::pair{p_fmat, Xy};
|
||||||
}
|
}
|
||||||
|
|
||||||
auto MakeExtMemForTest(bst_row_t n_samples, bst_feature_t n_features, Json dconfig) {
|
auto MakeExtMemForTest(bst_idx_t n_samples, bst_feature_t n_features, Json dconfig) {
|
||||||
std::size_t n_batches{4};
|
std::size_t n_batches{4};
|
||||||
NumpyArrayIterForTest iter_0{0.0f, n_samples, n_features, n_batches};
|
NumpyArrayIterForTest iter_0{0.0f, n_samples, n_features, n_batches};
|
||||||
std::string s_dconfig;
|
std::string s_dconfig;
|
||||||
@ -530,7 +530,7 @@ void CheckResult(Context const *ctx, bst_feature_t n_features, std::shared_ptr<D
|
|||||||
}
|
}
|
||||||
|
|
||||||
void TestXGDMatrixGetQuantileCut(Context const *ctx) {
|
void TestXGDMatrixGetQuantileCut(Context const *ctx) {
|
||||||
bst_row_t n_samples{1024};
|
bst_idx_t n_samples{1024};
|
||||||
bst_feature_t n_features{16};
|
bst_feature_t n_features{16};
|
||||||
|
|
||||||
Json dconfig{Object{}};
|
Json dconfig{Object{}};
|
||||||
|
|||||||
@ -181,7 +181,7 @@ void TestMixedSketch() {
|
|||||||
TEST(HistUtil, DeviceSketchMixedFeatures) { TestMixedSketch(); }
|
TEST(HistUtil, DeviceSketchMixedFeatures) { TestMixedSketch(); }
|
||||||
|
|
||||||
TEST(HistUtil, RemoveDuplicatedCategories) {
|
TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||||
bst_row_t n_samples = 512;
|
bst_idx_t n_samples = 512;
|
||||||
bst_feature_t n_features = 3;
|
bst_feature_t n_features = 3;
|
||||||
bst_cat_t n_categories = 5;
|
bst_cat_t n_categories = 5;
|
||||||
|
|
||||||
@ -210,13 +210,13 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
|||||||
FeatureType::kNumerical, FeatureType::kCategorical, FeatureType::kNumerical};
|
FeatureType::kNumerical, FeatureType::kCategorical, FeatureType::kNumerical};
|
||||||
ASSERT_EQ(info.feature_types.Size(), n_features);
|
ASSERT_EQ(info.feature_types.Size(), n_features);
|
||||||
|
|
||||||
HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
|
HostDeviceVector<bst_idx_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
|
||||||
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
|
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
|
||||||
|
|
||||||
dh::device_vector<float> weight(n_samples * n_features, 0);
|
dh::device_vector<float> weight(n_samples * n_features, 0);
|
||||||
dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream());
|
dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream());
|
||||||
|
|
||||||
dh::caching_device_vector<bst_row_t> columns_ptr(4);
|
dh::caching_device_vector<bst_idx_t> columns_ptr(4);
|
||||||
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
|
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
|
||||||
columns_ptr[i] = i * n_samples;
|
columns_ptr[i] = i * n_samples;
|
||||||
}
|
}
|
||||||
@ -641,7 +641,7 @@ void TestGetColumnSize(std::size_t n_samples) {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
TEST(HistUtil, GetColumnSize) {
|
TEST(HistUtil, GetColumnSize) {
|
||||||
bst_row_t n_samples = 4096;
|
bst_idx_t n_samples = 4096;
|
||||||
TestGetColumnSize(n_samples);
|
TestGetColumnSize(n_samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -797,11 +797,11 @@ TEST(HistUtil, AdapterSketchFromWeights) {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
class DeviceSketchWithHessianTest
|
class DeviceSketchWithHessianTest
|
||||||
: public ::testing::TestWithParam<std::tuple<bool, bst_row_t, bst_bin_t>> {
|
: public ::testing::TestWithParam<std::tuple<bool, bst_idx_t, bst_bin_t>> {
|
||||||
bst_feature_t n_features_ = 5;
|
bst_feature_t n_features_ = 5;
|
||||||
bst_group_t n_groups_{3};
|
bst_group_t n_groups_{3};
|
||||||
|
|
||||||
auto GenerateHessian(Context const* ctx, bst_row_t n_samples) const {
|
auto GenerateHessian(Context const* ctx, bst_idx_t n_samples) const {
|
||||||
HostDeviceVector<float> hessian;
|
HostDeviceVector<float> hessian;
|
||||||
auto& h_hess = hessian.HostVector();
|
auto& h_hess = hessian.HostVector();
|
||||||
h_hess = GenerateRandomWeights(n_samples);
|
h_hess = GenerateRandomWeights(n_samples);
|
||||||
@ -846,7 +846,7 @@ class DeviceSketchWithHessianTest
|
|||||||
protected:
|
protected:
|
||||||
Context ctx_ = MakeCUDACtx(0);
|
Context ctx_ = MakeCUDACtx(0);
|
||||||
|
|
||||||
void TestLTR(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
|
void TestLTR(Context const* ctx, bst_idx_t n_samples, bst_bin_t n_bins,
|
||||||
std::size_t n_elements) const {
|
std::size_t n_elements) const {
|
||||||
auto x = GenerateRandom(n_samples, n_features_);
|
auto x = GenerateRandom(n_samples, n_features_);
|
||||||
|
|
||||||
@ -899,7 +899,7 @@ class DeviceSketchWithHessianTest
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TestRegression(Context const* ctx, bst_row_t n_samples, bst_bin_t n_bins,
|
void TestRegression(Context const* ctx, bst_idx_t n_samples, bst_bin_t n_bins,
|
||||||
std::size_t n_elements) const {
|
std::size_t n_elements) const {
|
||||||
auto x = GenerateRandom(n_samples, n_features_);
|
auto x = GenerateRandom(n_samples, n_features_);
|
||||||
auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);
|
auto p_fmat = GetDMatrixFromData(x, n_samples, n_features_);
|
||||||
@ -912,9 +912,9 @@ class DeviceSketchWithHessianTest
|
|||||||
};
|
};
|
||||||
|
|
||||||
auto MakeParamsForTest() {
|
auto MakeParamsForTest() {
|
||||||
std::vector<bst_row_t> sizes = {1, 2, 256, 512, 1000, 1500};
|
std::vector<bst_idx_t> sizes = {1, 2, 256, 512, 1000, 1500};
|
||||||
std::vector<bst_bin_t> bin_sizes = {2, 16, 256, 512};
|
std::vector<bst_bin_t> bin_sizes = {2, 16, 256, 512};
|
||||||
std::vector<std::tuple<bool, bst_row_t, bst_bin_t>> configs;
|
std::vector<std::tuple<bool, bst_idx_t, bst_bin_t>> configs;
|
||||||
for (auto n_samples : sizes) {
|
for (auto n_samples : sizes) {
|
||||||
for (auto n_bins : bin_sizes) {
|
for (auto n_bins : bin_sizes) {
|
||||||
configs.emplace_back(true, n_samples, n_bins);
|
configs.emplace_back(true, n_samples, n_bins);
|
||||||
|
|||||||
@ -50,7 +50,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
|||||||
SimpleLCG lcg;
|
SimpleLCG lcg;
|
||||||
SimpleRealUniformDistribution<float> dist(3, 1000);
|
SimpleRealUniformDistribution<float> dist(3, 1000);
|
||||||
std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });
|
std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });
|
||||||
std::vector<bst_row_t> column_size(cols, rows);
|
std::vector<bst_idx_t> column_size(cols, rows);
|
||||||
bst_bin_t n_bins = 64;
|
bst_bin_t n_bins = 64;
|
||||||
|
|
||||||
// Generate cuts for distributed environment.
|
// Generate cuts for distributed environment.
|
||||||
@ -192,7 +192,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
|
|||||||
return dmat->SliceCol(world, rank);
|
return dmat->SliceCol(world, rank);
|
||||||
}()};
|
}()};
|
||||||
|
|
||||||
std::vector<bst_row_t> column_size(cols, 0);
|
std::vector<bst_idx_t> column_size(cols, 0);
|
||||||
auto const slice_size = cols / world;
|
auto const slice_size = cols / world;
|
||||||
auto const slice_start = slice_size * rank;
|
auto const slice_start = slice_size * rank;
|
||||||
auto const slice_end = (rank == world - 1) ? cols : slice_start + slice_size;
|
auto const slice_end = (rank == world - 1) ? cols : slice_start + slice_size;
|
||||||
|
|||||||
@ -27,7 +27,7 @@ TEST(GPUQuantile, Basic) {
|
|||||||
HostDeviceVector<FeatureType> ft;
|
HostDeviceVector<FeatureType> ft;
|
||||||
SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
|
SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
|
||||||
dh::caching_device_vector<Entry> entries;
|
dh::caching_device_vector<Entry> entries;
|
||||||
dh::device_vector<bst_row_t> cuts_ptr(kCols+1);
|
dh::device_vector<bst_idx_t> cuts_ptr(kCols+1);
|
||||||
thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
|
thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
|
||||||
// Push empty
|
// Push empty
|
||||||
sketch.Push(dh::ToSpan(entries), dh::ToSpan(cuts_ptr), dh::ToSpan(cuts_ptr), 0);
|
sketch.Push(dh::ToSpan(entries), dh::ToSpan(cuts_ptr), dh::ToSpan(cuts_ptr), 0);
|
||||||
@ -87,11 +87,11 @@ TEST(GPUQuantile, Unique) {
|
|||||||
|
|
||||||
// if with_error is true, the test tolerates floating point error
|
// if with_error is true, the test tolerates floating point error
|
||||||
void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
|
void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
|
||||||
Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
|
Span<bst_idx_t const> d_columns_ptr, bool with_error = false) {
|
||||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||||
std::vector<SketchEntry> h_in(in.size());
|
std::vector<SketchEntry> h_in(in.size());
|
||||||
dh::CopyDeviceSpanToVector(&h_in, in);
|
dh::CopyDeviceSpanToVector(&h_in, in);
|
||||||
std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
|
std::vector<bst_idx_t> h_columns_ptr(d_columns_ptr.size());
|
||||||
dh::CopyDeviceSpanToVector(&h_columns_ptr, d_columns_ptr);
|
dh::CopyDeviceSpanToVector(&h_columns_ptr, d_columns_ptr);
|
||||||
|
|
||||||
for (size_t i = 1; i < d_columns_ptr.size(); ++i) {
|
for (size_t i = 1; i < d_columns_ptr.size(); ++i) {
|
||||||
@ -164,7 +164,7 @@ TEST(GPUQuantile, MergeEmpty) {
|
|||||||
|
|
||||||
std::vector<SketchEntry> entries_before(sketch_0.Data().size());
|
std::vector<SketchEntry> entries_before(sketch_0.Data().size());
|
||||||
dh::CopyDeviceSpanToVector(&entries_before, sketch_0.Data());
|
dh::CopyDeviceSpanToVector(&entries_before, sketch_0.Data());
|
||||||
std::vector<bst_row_t> ptrs_before(sketch_0.ColumnsPtr().size());
|
std::vector<bst_idx_t> ptrs_before(sketch_0.ColumnsPtr().size());
|
||||||
dh::CopyDeviceSpanToVector(&ptrs_before, sketch_0.ColumnsPtr());
|
dh::CopyDeviceSpanToVector(&ptrs_before, sketch_0.ColumnsPtr());
|
||||||
thrust::device_vector<size_t> columns_ptr(kCols + 1);
|
thrust::device_vector<size_t> columns_ptr(kCols + 1);
|
||||||
// Merge an empty sketch
|
// Merge an empty sketch
|
||||||
@ -172,7 +172,7 @@ TEST(GPUQuantile, MergeEmpty) {
|
|||||||
|
|
||||||
std::vector<SketchEntry> entries_after(sketch_0.Data().size());
|
std::vector<SketchEntry> entries_after(sketch_0.Data().size());
|
||||||
dh::CopyDeviceSpanToVector(&entries_after, sketch_0.Data());
|
dh::CopyDeviceSpanToVector(&entries_after, sketch_0.Data());
|
||||||
std::vector<bst_row_t> ptrs_after(sketch_0.ColumnsPtr().size());
|
std::vector<bst_idx_t> ptrs_after(sketch_0.ColumnsPtr().size());
|
||||||
dh::CopyDeviceSpanToVector(&ptrs_after, sketch_0.ColumnsPtr());
|
dh::CopyDeviceSpanToVector(&ptrs_after, sketch_0.ColumnsPtr());
|
||||||
|
|
||||||
CHECK_EQ(entries_before.size(), entries_after.size());
|
CHECK_EQ(entries_before.size(), entries_after.size());
|
||||||
@ -222,7 +222,7 @@ TEST(GPUQuantile, MergeBasic) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||||
std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
|
std::vector<bst_idx_t> h_columns_ptr(columns_ptr.size());
|
||||||
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
|
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
|
||||||
ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);
|
ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);
|
||||||
|
|
||||||
@ -278,7 +278,7 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
|
|||||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||||
|
|
||||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||||
std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
|
std::vector<bst_idx_t> h_columns_ptr(columns_ptr.size());
|
||||||
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
|
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
|
||||||
ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);
|
ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);
|
||||||
|
|
||||||
|
|||||||
@ -1,13 +1,15 @@
|
|||||||
/*!
|
/**
|
||||||
* Copyright 2018 XGBoost contributors
|
* Copyright 2018-2024, XGBoost contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#include <thrust/host_vector.h>
|
|
||||||
#include <thrust/device_vector.h>
|
#include <thrust/device_vector.h>
|
||||||
#include <thrust/execution_policy.h>
|
#include <thrust/execution_policy.h>
|
||||||
#include "../../../src/common/device_helpers.cuh"
|
#include <thrust/host_vector.h>
|
||||||
#include <xgboost/span.h>
|
#include <xgboost/span.h>
|
||||||
|
|
||||||
|
#include <numeric> // for iota
|
||||||
|
|
||||||
|
#include "../../../src/common/device_helpers.cuh"
|
||||||
#include "test_span.h"
|
#include "test_span.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
|
|||||||
@ -36,7 +36,7 @@ TEST(Adapter, CSRAdapter) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(Adapter, CSRArrayAdapter) {
|
TEST(Adapter, CSRArrayAdapter) {
|
||||||
HostDeviceVector<bst_row_t> indptr;
|
HostDeviceVector<std::size_t> indptr;
|
||||||
HostDeviceVector<float> values;
|
HostDeviceVector<float> values;
|
||||||
HostDeviceVector<bst_feature_t> indices;
|
HostDeviceVector<bst_feature_t> indices;
|
||||||
size_t n_features = 100, n_samples = 10;
|
size_t n_features = 100, n_samples = 10;
|
||||||
@ -155,7 +155,7 @@ TEST(Adapter, IteratorAdapter) {
|
|||||||
ASSERT_EQ(data->Info().num_row_, kRows);
|
ASSERT_EQ(data->Info().num_row_, kRows);
|
||||||
int num_batch = 0;
|
int num_batch = 0;
|
||||||
for (auto const& batch : data->GetBatches<SparsePage>()) {
|
for (auto const& batch : data->GetBatches<SparsePage>()) {
|
||||||
ASSERT_EQ(batch.offset.HostVector(), std::vector<bst_row_t>({0, 2, 4, 5, 5, 7, 9, 10, 10}));
|
ASSERT_EQ(batch.offset.HostVector(), std::vector<bst_idx_t>({0, 2, 4, 5, 5, 7, 9, 10, 10}));
|
||||||
++num_batch;
|
++num_batch;
|
||||||
}
|
}
|
||||||
ASSERT_EQ(num_batch, 1);
|
ASSERT_EQ(num_batch, 1);
|
||||||
|
|||||||
@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
TEST(SparsePage, PushCSC) {
|
TEST(SparsePage, PushCSC) {
|
||||||
std::vector<bst_row_t> offset {0};
|
std::vector<bst_idx_t> offset {0};
|
||||||
std::vector<Entry> data;
|
std::vector<Entry> data;
|
||||||
SparsePage batch;
|
SparsePage batch;
|
||||||
batch.offset.HostVector() = offset;
|
batch.offset.HostVector() = offset;
|
||||||
|
|||||||
@ -64,7 +64,7 @@ TEST(DeviceAdapter, GetRowCounts) {
|
|||||||
.Device(ctx.Device())
|
.Device(ctx.Device())
|
||||||
.GenerateArrayInterface(&storage);
|
.GenerateArrayInterface(&storage);
|
||||||
auto adapter = CupyAdapter{str_arr};
|
auto adapter = CupyAdapter{str_arr};
|
||||||
HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
|
HostDeviceVector<bst_idx_t> offset(adapter.NumRows() + 1, 0);
|
||||||
offset.SetDevice(ctx.Device());
|
offset.SetDevice(ctx.Device());
|
||||||
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(),
|
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(),
|
||||||
std::numeric_limits<float>::quiet_NaN());
|
std::numeric_limits<float>::quiet_NaN());
|
||||||
|
|||||||
@ -231,7 +231,7 @@ TEST(MetaInfo, LoadQid) {
|
|||||||
const std::vector<xgboost::bst_uint> expected_group_ptr{0, 4, 8, 12};
|
const std::vector<xgboost::bst_uint> expected_group_ptr{0, 4, 8, 12};
|
||||||
CHECK(info.group_ptr_ == expected_group_ptr);
|
CHECK(info.group_ptr_ == expected_group_ptr);
|
||||||
|
|
||||||
const std::vector<xgboost::bst_row_t> expected_offset{
|
const std::vector<xgboost::bst_idx_t> expected_offset{
|
||||||
0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60
|
0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60
|
||||||
};
|
};
|
||||||
const std::vector<xgboost::Entry> expected_data{
|
const std::vector<xgboost::Entry> expected_data{
|
||||||
|
|||||||
@ -223,7 +223,7 @@ TEST(SimpleDMatrix, FromFile) {
|
|||||||
auto batch = page.GetView();
|
auto batch = page.GetView();
|
||||||
EXPECT_EQ(batch.Size(), kExpectedNumRow);
|
EXPECT_EQ(batch.Size(), kExpectedNumRow);
|
||||||
EXPECT_EQ(page.offset.HostVector(),
|
EXPECT_EQ(page.offset.HostVector(),
|
||||||
std::vector<bst_row_t>({0, 3, 6, 9, 12, 15, 15}));
|
std::vector<bst_idx_t>({0, 3, 6, 9, 12, 15, 15}));
|
||||||
EXPECT_EQ(page.base_rowid, 0);
|
EXPECT_EQ(page.base_rowid, 0);
|
||||||
|
|
||||||
for (auto i = 0ull; i < batch.Size() - 1; i++) {
|
for (auto i = 0ull; i < batch.Size() - 1; i++) {
|
||||||
|
|||||||
@ -171,7 +171,7 @@ TEST(GBTree, ChoosePredictor) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(GBTree, ChooseTreeMethod) {
|
TEST(GBTree, ChooseTreeMethod) {
|
||||||
bst_row_t n_samples{128};
|
bst_idx_t n_samples{128};
|
||||||
bst_feature_t n_features{64};
|
bst_feature_t n_features{64};
|
||||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateDMatrix(true);
|
auto Xy = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateDMatrix(true);
|
||||||
|
|
||||||
|
|||||||
@ -18,7 +18,7 @@
|
|||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
void TestInplaceFallback(Context const* ctx) {
|
void TestInplaceFallback(Context const* ctx) {
|
||||||
// prepare data
|
// prepare data
|
||||||
bst_row_t n_samples{1024};
|
bst_idx_t n_samples{1024};
|
||||||
bst_feature_t n_features{32};
|
bst_feature_t n_features{32};
|
||||||
HostDeviceVector<float> X_storage;
|
HostDeviceVector<float> X_storage;
|
||||||
// use a different device than the learner
|
// use a different device than the learner
|
||||||
|
|||||||
@ -216,7 +216,7 @@ SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
|
|||||||
static_assert(SimpleLCG::max() - SimpleLCG::min());
|
static_assert(SimpleLCG::max() - SimpleLCG::min());
|
||||||
|
|
||||||
void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const {
|
void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const {
|
||||||
RandomDataGenerator{static_cast<bst_row_t>(p_fmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
|
RandomDataGenerator{static_cast<bst_idx_t>(p_fmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
|
||||||
p_fmat->Info().labels.Data());
|
p_fmat->Info().labels.Data());
|
||||||
CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
|
CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
|
||||||
p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
|
p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||||
@ -334,7 +334,7 @@ std::string RandomDataGenerator::GenerateColumnarArrayInterface(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void RandomDataGenerator::GenerateCSR(
|
void RandomDataGenerator::GenerateCSR(
|
||||||
HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
|
HostDeviceVector<float>* value, HostDeviceVector<std::size_t>* row_ptr,
|
||||||
HostDeviceVector<bst_feature_t>* columns) const {
|
HostDeviceVector<bst_feature_t>* columns) const {
|
||||||
auto& h_value = value->HostVector();
|
auto& h_value = value->HostVector();
|
||||||
auto& h_rptr = row_ptr->HostVector();
|
auto& h_rptr = row_ptr->HostVector();
|
||||||
@ -381,7 +381,7 @@ void RandomDataGenerator::GenerateCSR(
|
|||||||
[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(
|
[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(
|
||||||
bool with_label, bool float_label, size_t classes, DataSplitMode data_split_mode) const {
|
bool with_label, bool float_label, size_t classes, DataSplitMode data_split_mode) const {
|
||||||
HostDeviceVector<float> data;
|
HostDeviceVector<float> data;
|
||||||
HostDeviceVector<bst_row_t> rptrs;
|
HostDeviceVector<std::size_t> rptrs;
|
||||||
HostDeviceVector<bst_feature_t> columns;
|
HostDeviceVector<bst_feature_t> columns;
|
||||||
this->GenerateCSR(&data, &rptrs, &columns);
|
this->GenerateCSR(&data, &rptrs, &columns);
|
||||||
data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), rows_,
|
data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), rows_,
|
||||||
@ -447,7 +447,7 @@ void RandomDataGenerator::GenerateCSR(
|
|||||||
|
|
||||||
// Loop over the batches and count the number of pages
|
// Loop over the batches and count the number of pages
|
||||||
std::size_t batch_count = 0;
|
std::size_t batch_count = 0;
|
||||||
bst_row_t row_count = 0;
|
bst_idx_t row_count = 0;
|
||||||
for (const auto& batch : dmat->GetBatches<xgboost::SparsePage>()) {
|
for (const auto& batch : dmat->GetBatches<xgboost::SparsePage>()) {
|
||||||
batch_count++;
|
batch_count++;
|
||||||
row_count += batch.Size();
|
row_count += batch.Size();
|
||||||
@ -458,7 +458,7 @@ void RandomDataGenerator::GenerateCSR(
|
|||||||
EXPECT_EQ(row_count, dmat->Info().num_row_);
|
EXPECT_EQ(row_count, dmat->Info().num_row_);
|
||||||
|
|
||||||
if (with_label) {
|
if (with_label) {
|
||||||
RandomDataGenerator{static_cast<bst_row_t>(dmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
|
RandomDataGenerator{static_cast<bst_idx_t>(dmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
|
||||||
dmat->Info().labels.Data());
|
dmat->Info().labels.Data());
|
||||||
CHECK_EQ(dmat->Info().labels.Size(), this->rows_ * this->n_targets_);
|
CHECK_EQ(dmat->Info().labels.Size(), this->rows_ * this->n_targets_);
|
||||||
dmat->Info().labels.Reshape(this->rows_, this->n_targets_);
|
dmat->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||||
@ -488,7 +488,7 @@ int CudaArrayIterForTest::Next() {
|
|||||||
}
|
}
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA)
|
||||||
|
|
||||||
NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
|
NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, bst_idx_t rows, size_t cols,
|
||||||
size_t batches)
|
size_t batches)
|
||||||
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
||||||
rng_->Device(DeviceOrd::CPU());
|
rng_->Device(DeviceOrd::CPU());
|
||||||
@ -515,7 +515,7 @@ std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::si
|
|||||||
return p_fmat;
|
return p_fmat;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
|
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_idx_t n_samples, bst_feature_t n_features,
|
||||||
size_t n_batches, std::string prefix) {
|
size_t n_batches, std::string prefix) {
|
||||||
CHECK_GE(n_samples, n_batches);
|
CHECK_GE(n_samples, n_batches);
|
||||||
NumpyArrayIterForTest iter(0, n_samples, n_features, n_batches);
|
NumpyArrayIterForTest iter(0, n_samples, n_features, n_batches);
|
||||||
@ -662,7 +662,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
|
|||||||
return gbm;
|
return gbm;
|
||||||
}
|
}
|
||||||
|
|
||||||
ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches)
|
ArrayIterForTest::ArrayIterForTest(float sparsity, bst_idx_t rows, size_t cols, size_t batches)
|
||||||
: rows_{rows}, cols_{cols}, n_batches_{batches} {
|
: rows_{rows}, cols_{cols}, n_batches_{batches} {
|
||||||
XGProxyDMatrixCreate(&proxy_);
|
XGProxyDMatrixCreate(&proxy_);
|
||||||
rng_ = std::make_unique<RandomDataGenerator>(rows_, cols_, sparsity);
|
rng_ = std::make_unique<RandomDataGenerator>(rows_, cols_, sparsity);
|
||||||
|
|||||||
@ -223,7 +223,7 @@ Json GetArrayInterface(HostDeviceVector<T> const* storage, size_t rows, size_t c
|
|||||||
|
|
||||||
// Generate in-memory random data without using DMatrix.
|
// Generate in-memory random data without using DMatrix.
|
||||||
class RandomDataGenerator {
|
class RandomDataGenerator {
|
||||||
bst_row_t rows_;
|
bst_idx_t rows_;
|
||||||
size_t cols_;
|
size_t cols_;
|
||||||
float sparsity_;
|
float sparsity_;
|
||||||
|
|
||||||
@ -246,7 +246,7 @@ class RandomDataGenerator {
|
|||||||
void GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const;
|
void GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RandomDataGenerator(bst_row_t rows, size_t cols, float sparsity)
|
RandomDataGenerator(bst_idx_t rows, size_t cols, float sparsity)
|
||||||
: rows_{rows}, cols_{cols}, sparsity_{sparsity}, lcg_{seed_} {}
|
: rows_{rows}, cols_{cols}, sparsity_{sparsity}, lcg_{seed_} {}
|
||||||
|
|
||||||
RandomDataGenerator& Lower(float v) {
|
RandomDataGenerator& Lower(float v) {
|
||||||
@ -308,7 +308,7 @@ class RandomDataGenerator {
|
|||||||
|
|
||||||
std::string GenerateColumnarArrayInterface(std::vector<HostDeviceVector<float>>* data) const;
|
std::string GenerateColumnarArrayInterface(std::vector<HostDeviceVector<float>>* data) const;
|
||||||
|
|
||||||
void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
|
void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<std::size_t>* row_ptr,
|
||||||
HostDeviceVector<bst_feature_t>* columns) const;
|
HostDeviceVector<bst_feature_t>* columns) const;
|
||||||
|
|
||||||
[[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(
|
[[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(
|
||||||
@ -354,7 +354,7 @@ std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::si
|
|||||||
*
|
*
|
||||||
* \return A Sparse DMatrix with n_batches.
|
* \return A Sparse DMatrix with n_batches.
|
||||||
*/
|
*/
|
||||||
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
|
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_idx_t n_samples, bst_feature_t n_features,
|
||||||
size_t n_batches, std::string prefix = "cache");
|
size_t n_batches, std::string prefix = "cache");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -413,12 +413,12 @@ inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_row
|
|||||||
return gpair;
|
return gpair;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline linalg::Matrix<GradientPair> GenerateRandomGradients(Context const* ctx, bst_row_t n_rows,
|
inline linalg::Matrix<GradientPair> GenerateRandomGradients(Context const* ctx, bst_idx_t n_rows,
|
||||||
bst_target_t n_targets,
|
bst_target_t n_targets,
|
||||||
float lower = 0.0f,
|
float lower = 0.0f,
|
||||||
float upper = 1.0f) {
|
float upper = 1.0f) {
|
||||||
auto g = GenerateRandomGradients(n_rows * n_targets, lower, upper);
|
auto g = GenerateRandomGradients(n_rows * n_targets, lower, upper);
|
||||||
linalg::Matrix<GradientPair> gpair({n_rows, static_cast<bst_row_t>(n_targets)}, ctx->Device());
|
linalg::Matrix<GradientPair> gpair({n_rows, static_cast<bst_idx_t>(n_targets)}, ctx->Device());
|
||||||
gpair.Data()->Copy(g);
|
gpair.Data()->Copy(g);
|
||||||
return gpair;
|
return gpair;
|
||||||
}
|
}
|
||||||
@ -434,12 +434,12 @@ class ArrayIterForTest {
|
|||||||
|
|
||||||
std::vector<std::string> batches_;
|
std::vector<std::string> batches_;
|
||||||
std::string interface_;
|
std::string interface_;
|
||||||
size_t rows_;
|
bst_idx_t rows_;
|
||||||
size_t cols_;
|
size_t cols_;
|
||||||
size_t n_batches_;
|
size_t n_batches_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
size_t static constexpr Rows() { return 1024; }
|
bst_idx_t static constexpr Rows() { return 1024; }
|
||||||
size_t static constexpr Batches() { return 100; }
|
size_t static constexpr Batches() { return 100; }
|
||||||
size_t static constexpr Cols() { return 13; }
|
size_t static constexpr Cols() { return 13; }
|
||||||
|
|
||||||
@ -451,7 +451,7 @@ class ArrayIterForTest {
|
|||||||
[[nodiscard]] std::size_t Iter() const { return iter_; }
|
[[nodiscard]] std::size_t Iter() const { return iter_; }
|
||||||
auto Proxy() -> decltype(proxy_) { return proxy_; }
|
auto Proxy() -> decltype(proxy_) { return proxy_; }
|
||||||
|
|
||||||
explicit ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches);
|
explicit ArrayIterForTest(float sparsity, bst_idx_t rows, size_t cols, size_t batches);
|
||||||
/**
|
/**
|
||||||
* \brief Create iterator with user provided data.
|
* \brief Create iterator with user provided data.
|
||||||
*/
|
*/
|
||||||
@ -470,7 +470,7 @@ class CudaArrayIterForTest : public ArrayIterForTest {
|
|||||||
|
|
||||||
class NumpyArrayIterForTest : public ArrayIterForTest {
|
class NumpyArrayIterForTest : public ArrayIterForTest {
|
||||||
public:
|
public:
|
||||||
explicit NumpyArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
|
explicit NumpyArrayIterForTest(float sparsity, bst_idx_t rows = Rows(), size_t cols = Cols(),
|
||||||
size_t batches = Batches());
|
size_t batches = Batches());
|
||||||
explicit NumpyArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
|
explicit NumpyArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
|
||||||
std::size_t n_samples, bst_feature_t n_features,
|
std::size_t n_samples, bst_feature_t n_features,
|
||||||
|
|||||||
@ -47,7 +47,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
|
|||||||
0.26f, 0.71f, 1.83f});
|
0.26f, 0.71f, 1.83f});
|
||||||
cmat.SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
|
cmat.SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f});
|
||||||
|
|
||||||
bst_row_t row_stride = 0;
|
bst_idx_t row_stride = 0;
|
||||||
const auto &offset_vec = batch.offset.ConstHostVector();
|
const auto &offset_vec = batch.offset.ConstHostVector();
|
||||||
for (size_t i = 1; i < offset_vec.size(); ++i) {
|
for (size_t i = 1; i < offset_vec.size(); ++i) {
|
||||||
row_stride = std::max(row_stride, offset_vec[i] - offset_vec[i-1]);
|
row_stride = std::max(row_stride, offset_vec[i] - offset_vec[i-1]);
|
||||||
|
|||||||
@ -43,7 +43,7 @@ TEST(SyclPredictor, ExternalMemory) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(SyclPredictor, InplacePredict) {
|
TEST(SyclPredictor, InplacePredict) {
|
||||||
bst_row_t constexpr kRows{128};
|
bst_idx_t constexpr kRows{128};
|
||||||
bst_feature_t constexpr kCols{64};
|
bst_feature_t constexpr kCols{64};
|
||||||
Context ctx;
|
Context ctx;
|
||||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
|
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
|
||||||
@ -106,4 +106,4 @@ TEST(SyclPredictor, Multi) {
|
|||||||
TestVectorLeafPrediction(&ctx);
|
TestVectorLeafPrediction(&ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
134
tests/cpp/plugin/test_sycl_split_evaluator.cc
Normal file
134
tests/cpp/plugin/test_sycl_split_evaluator.cc
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020-2024 by XGBoost contributors
|
||||||
|
*/
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
|
||||||
|
#pragma GCC diagnostic ignored "-W#pragma-messages"
|
||||||
|
#include "../../../plugin/sycl/tree/split_evaluator.h"
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
#include "../../../plugin/sycl/device_manager.h"
|
||||||
|
#include "../helpers.h"
|
||||||
|
|
||||||
|
namespace xgboost::sycl::tree {
|
||||||
|
|
||||||
|
template<typename GradientSumT>
|
||||||
|
void BasicTestSplitEvaluator(const std::string& monotone_constraints, bool has_constrains) {
|
||||||
|
const size_t n_columns = 2;
|
||||||
|
|
||||||
|
xgboost::tree::TrainParam param;
|
||||||
|
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"},
|
||||||
|
{"reg_lambda", "0"},
|
||||||
|
{"monotone_constraints", monotone_constraints}});
|
||||||
|
|
||||||
|
DeviceManager device_manager;
|
||||||
|
auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());
|
||||||
|
|
||||||
|
TreeEvaluator<GradientSumT> tree_evaluator(qu, param, n_columns);
|
||||||
|
{
|
||||||
|
// Check correctness of has_constrains flag
|
||||||
|
ASSERT_EQ(tree_evaluator.HasConstraint(), has_constrains);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto split_evaluator = tree_evaluator.GetEvaluator();
|
||||||
|
{
|
||||||
|
// Check if params were inititialised correctly
|
||||||
|
ASSERT_EQ(split_evaluator.param.min_child_weight, param.min_child_weight);
|
||||||
|
ASSERT_EQ(split_evaluator.param.reg_lambda, param.reg_lambda);
|
||||||
|
ASSERT_EQ(split_evaluator.param.reg_alpha, param.reg_alpha);
|
||||||
|
ASSERT_EQ(split_evaluator.param.max_delta_step, param.max_delta_step);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename GradientSumT>
|
||||||
|
void TestSplitEvaluator(const std::string& monotone_constraints) {
|
||||||
|
const size_t n_columns = 2;
|
||||||
|
|
||||||
|
xgboost::tree::TrainParam param;
|
||||||
|
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"},
|
||||||
|
{"reg_lambda", "0"},
|
||||||
|
{"monotone_constraints", monotone_constraints}});
|
||||||
|
|
||||||
|
DeviceManager device_manager;
|
||||||
|
auto qu = device_manager.GetQueue(DeviceOrd::SyclDefault());
|
||||||
|
|
||||||
|
TreeEvaluator<GradientSumT> tree_evaluator(qu, param, n_columns);
|
||||||
|
auto split_evaluator = tree_evaluator.GetEvaluator();
|
||||||
|
{
|
||||||
|
// Test ThresholdL1
|
||||||
|
const GradientSumT alpha = 0.5;
|
||||||
|
{
|
||||||
|
const GradientSumT val = 0.0;
|
||||||
|
const auto trh = split_evaluator.ThresholdL1(val, alpha);
|
||||||
|
ASSERT_EQ(trh, 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const GradientSumT val = 1.0;
|
||||||
|
const auto trh = split_evaluator.ThresholdL1(val, alpha);
|
||||||
|
ASSERT_EQ(trh, val - alpha);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const GradientSumT val = -1.0;
|
||||||
|
const auto trh = split_evaluator.ThresholdL1(val, alpha);
|
||||||
|
ASSERT_EQ(trh, val + alpha);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
constexpr float eps = 1e-8;
|
||||||
|
tree_evaluator.AddSplit(0, 1, 2, 0, 0.3, 0.7);
|
||||||
|
|
||||||
|
GradStats<GradientSumT> left(0.1, 0.2);
|
||||||
|
GradStats<GradientSumT> right(0.3, 0.4);
|
||||||
|
bst_node_t nidx = 0;
|
||||||
|
bst_feature_t fidx = 0;
|
||||||
|
|
||||||
|
GradientSumT wleft = split_evaluator.CalcWeight(nidx, left);
|
||||||
|
// wleft = -grad/hess = -0.1/0.2
|
||||||
|
EXPECT_NEAR(wleft, -0.5, eps);
|
||||||
|
GradientSumT wright = split_evaluator.CalcWeight(nidx, right);
|
||||||
|
// wright = -grad/hess = -0.3/0.4
|
||||||
|
EXPECT_NEAR(wright, -0.75, eps);
|
||||||
|
|
||||||
|
GradientSumT gweight_left = split_evaluator.CalcGainGivenWeight(nidx, left, wleft);
|
||||||
|
// gweight_left = left.grad**2 / left.hess = 0.1*0.1/0.2 = 0.05
|
||||||
|
EXPECT_NEAR(gweight_left, 0.05, eps);
|
||||||
|
// gweight_left = right.grad**2 / right.hess = 0.3*0.3/0.4 = 0.225
|
||||||
|
GradientSumT gweight_right = split_evaluator.CalcGainGivenWeight(nidx, right, wright);
|
||||||
|
EXPECT_NEAR(gweight_right, 0.225, eps);
|
||||||
|
|
||||||
|
GradientSumT split_gain = split_evaluator.CalcSplitGain(nidx, fidx, left, right);
|
||||||
|
if (!tree_evaluator.HasConstraint()) {
|
||||||
|
EXPECT_NEAR(split_gain, gweight_left + gweight_right, eps);
|
||||||
|
} else {
|
||||||
|
// Parameters are chosen to have -inf here
|
||||||
|
ASSERT_EQ(split_gain, -std::numeric_limits<GradientSumT>::infinity());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(SyclSplitEvaluator, BasicTest) {
|
||||||
|
BasicTestSplitEvaluator<float>("( 0, 0)", false);
|
||||||
|
BasicTestSplitEvaluator<float>("( 1, 0)", true);
|
||||||
|
BasicTestSplitEvaluator<float>("( 0, 1)", true);
|
||||||
|
BasicTestSplitEvaluator<float>("(-1, 0)", true);
|
||||||
|
BasicTestSplitEvaluator<float>("( 0, -1)", true);
|
||||||
|
BasicTestSplitEvaluator<float>("( 1, 1)", true);
|
||||||
|
BasicTestSplitEvaluator<float>("(-1, -1)", true);
|
||||||
|
BasicTestSplitEvaluator<float>("( 1, -1)", true);
|
||||||
|
BasicTestSplitEvaluator<float>("(-1, 1)", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(SyclSplitEvaluator, TestMath) {
|
||||||
|
// Without constraints
|
||||||
|
TestSplitEvaluator<float>("( 0, 0)");
|
||||||
|
// With constraints
|
||||||
|
TestSplitEvaluator<float>("( 1, 0)");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace xgboost::sycl::tree
|
||||||
@ -65,7 +65,7 @@ TEST(CpuPredictor, ExternalMemory) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(CpuPredictor, InplacePredict) {
|
TEST(CpuPredictor, InplacePredict) {
|
||||||
bst_row_t constexpr kRows{128};
|
bst_idx_t constexpr kRows{128};
|
||||||
bst_feature_t constexpr kCols{64};
|
bst_feature_t constexpr kCols{64};
|
||||||
Context ctx;
|
Context ctx;
|
||||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
|
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
|
||||||
@ -83,7 +83,7 @@ TEST(CpuPredictor, InplacePredict) {
|
|||||||
|
|
||||||
{
|
{
|
||||||
HostDeviceVector<float> data;
|
HostDeviceVector<float> data;
|
||||||
HostDeviceVector<bst_row_t> rptrs;
|
HostDeviceVector<std::size_t> rptrs;
|
||||||
HostDeviceVector<bst_feature_t> columns;
|
HostDeviceVector<bst_feature_t> columns;
|
||||||
gen.GenerateCSR(&data, &rptrs, &columns);
|
gen.GenerateCSR(&data, &rptrs, &columns);
|
||||||
auto data_interface = GetArrayInterface(&data, kRows * kCols, 1);
|
auto data_interface = GetArrayInterface(&data, kRows * kCols, 1);
|
||||||
|
|||||||
@ -186,7 +186,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
|
void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_idx_t rows,
|
||||||
bst_feature_t cols) {
|
bst_feature_t cols) {
|
||||||
std::size_t constexpr kClasses { 4 };
|
std::size_t constexpr kClasses { 4 };
|
||||||
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device());
|
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device());
|
||||||
@ -255,7 +255,7 @@ std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMat
|
|||||||
return learner;
|
return learner;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VerifyPredictionWithLesserFeatures(Learner *learner, bst_row_t kRows,
|
void VerifyPredictionWithLesserFeatures(Learner *learner, bst_idx_t kRows,
|
||||||
std::shared_ptr<DMatrix> m_test,
|
std::shared_ptr<DMatrix> m_test,
|
||||||
std::shared_ptr<DMatrix> m_invalid) {
|
std::shared_ptr<DMatrix> m_invalid) {
|
||||||
HostDeviceVector<float> prediction;
|
HostDeviceVector<float> prediction;
|
||||||
|
|||||||
@ -92,7 +92,7 @@ void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
|
|||||||
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist,
|
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist,
|
||||||
bool check_contribs = false);
|
bool check_contribs = false);
|
||||||
|
|
||||||
void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
|
void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_idx_t rows,
|
||||||
bst_feature_t cols);
|
bst_feature_t cols);
|
||||||
|
|
||||||
void TestPredictionWithLesserFeatures(Context const* ctx);
|
void TestPredictionWithLesserFeatures(Context const* ctx);
|
||||||
|
|||||||
@ -11,7 +11,7 @@ TEST(RandomDataGenerator, DMatrix) {
|
|||||||
auto p_dmatrix = RandomDataGenerator{kRows, kCols, kSparsity}.GenerateDMatrix();
|
auto p_dmatrix = RandomDataGenerator{kRows, kCols, kSparsity}.GenerateDMatrix();
|
||||||
|
|
||||||
HostDeviceVector<float> csr_value;
|
HostDeviceVector<float> csr_value;
|
||||||
HostDeviceVector<bst_row_t> csr_rptr;
|
HostDeviceVector<std::size_t> csr_rptr;
|
||||||
HostDeviceVector<bst_feature_t> csr_cidx;
|
HostDeviceVector<bst_feature_t> csr_cidx;
|
||||||
RandomDataGenerator{kRows, kCols, kSparsity}.GenerateCSR(&csr_value, &csr_rptr, &csr_cidx);
|
RandomDataGenerator{kRows, kCols, kSparsity}.GenerateCSR(&csr_value, &csr_rptr, &csr_cidx);
|
||||||
|
|
||||||
|
|||||||
@ -217,7 +217,7 @@ TEST(Learner, JsonModelIO) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(Learner, ConfigIO) {
|
TEST(Learner, ConfigIO) {
|
||||||
bst_row_t n_samples = 128;
|
bst_idx_t n_samples = 128;
|
||||||
bst_feature_t n_features = 12;
|
bst_feature_t n_features = 12;
|
||||||
std::shared_ptr<DMatrix> p_fmat{
|
std::shared_ptr<DMatrix> p_fmat{
|
||||||
RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true, false, 2)};
|
RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true, false, 2)};
|
||||||
|
|||||||
@ -363,7 +363,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
|||||||
GPUTrainingParam param{tparam};
|
GPUTrainingParam param{tparam};
|
||||||
|
|
||||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
|
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
|
||||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2};
|
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2};
|
||||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
|
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
|
||||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
|
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
|
||||||
auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}});
|
auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}});
|
||||||
@ -412,7 +412,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
|||||||
GPUTrainingParam param{tparam};
|
GPUTrainingParam param{tparam};
|
||||||
|
|
||||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{1};
|
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{1};
|
||||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
|
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};
|
||||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
|
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
|
||||||
auto feature_histogram =
|
auto feature_histogram =
|
||||||
@ -446,7 +446,7 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
|||||||
GPUTrainingParam param{tparam};
|
GPUTrainingParam param{tparam};
|
||||||
|
|
||||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
|
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};
|
||||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
|
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
|
||||||
auto feature_histogram =
|
auto feature_histogram =
|
||||||
@ -478,7 +478,7 @@ TEST(GpuHist, EvaluateSplits) {
|
|||||||
GPUTrainingParam param{tparam};
|
GPUTrainingParam param{tparam};
|
||||||
|
|
||||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
|
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_idx_t>{0, 2, 4};
|
||||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 0.0};
|
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 0.0};
|
||||||
auto feature_histogram_left =
|
auto feature_histogram_left =
|
||||||
|
|||||||
@ -409,9 +409,9 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
|
|||||||
batch_param.hess = hess;
|
batch_param.hess = hess;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::size_t> partition_size(1, 0);
|
std::vector<bst_idx_t> partition_size(1, 0);
|
||||||
bst_bin_t total_bins{0};
|
bst_bin_t total_bins{0};
|
||||||
bst_row_t n_samples{0};
|
bst_idx_t n_samples{0};
|
||||||
|
|
||||||
auto gpair = GenerateRandomGradients(m->Info().num_row_, 0.0, 1.0);
|
auto gpair = GenerateRandomGradients(m->Info().num_row_, 0.0, 1.0);
|
||||||
auto const &h_gpair = gpair.HostVector();
|
auto const &h_gpair = gpair.HostVector();
|
||||||
|
|||||||
@ -441,7 +441,7 @@ RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
|
|||||||
return tree;
|
return tree;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VerifyHistColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
void VerifyHistColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||||
Context ctx(MakeCUDACtx(GPUIDX));
|
Context ctx(MakeCUDACtx(GPUIDX));
|
||||||
|
|
||||||
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
||||||
@ -491,7 +491,7 @@ RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
|
|||||||
return tree;
|
return tree;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VerifyApproxColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
void VerifyApproxColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||||
Context ctx(MakeCUDACtx(GPUIDX));
|
Context ctx(MakeCUDACtx(GPUIDX));
|
||||||
|
|
||||||
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
||||||
|
|||||||
@ -201,7 +201,7 @@ TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEn
|
|||||||
TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
|
TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
void VerifyColumnSplit(Context const* ctx, bst_row_t rows, bst_feature_t cols, bst_target_t n_targets,
|
void VerifyColumnSplit(Context const* ctx, bst_idx_t rows, bst_feature_t cols, bst_target_t n_targets,
|
||||||
RegTree const& expected_tree) {
|
RegTree const& expected_tree) {
|
||||||
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
||||||
linalg::Matrix<GradientPair> gpair = GenerateRandomGradients(ctx, rows, n_targets);
|
linalg::Matrix<GradientPair> gpair = GenerateRandomGradients(ctx, rows, n_targets);
|
||||||
|
|||||||
@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
namespace xgboost::tree {
|
namespace xgboost::tree {
|
||||||
TEST(Updater, Refresh) {
|
TEST(Updater, Refresh) {
|
||||||
bst_row_t constexpr kRows = 8;
|
bst_idx_t constexpr kRows = 8;
|
||||||
bst_feature_t constexpr kCols = 16;
|
bst_feature_t constexpr kCols = 16;
|
||||||
Context ctx;
|
Context ctx;
|
||||||
|
|
||||||
|
|||||||
@ -252,7 +252,7 @@ class TestDistributedGPU:
|
|||||||
|
|
||||||
X_onehot, _ = make_categorical(local_cuda_client, 10000, 30, 13, True)
|
X_onehot, _ = make_categorical(local_cuda_client, 10000, 30, 13, True)
|
||||||
X_onehot = dask_cudf.from_dask_dataframe(X_onehot)
|
X_onehot = dask_cudf.from_dask_dataframe(X_onehot)
|
||||||
run_categorical(local_cuda_client, "gpu_hist", X, X_onehot, y)
|
run_categorical(local_cuda_client, "hist", "cuda", X, X_onehot, y)
|
||||||
|
|
||||||
@given(
|
@given(
|
||||||
params=hist_parameter_strategy,
|
params=hist_parameter_strategy,
|
||||||
|
|||||||
@ -315,8 +315,15 @@ def test_dask_sparse(client: "Client") -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None:
|
def run_categorical(
|
||||||
parameters = {"tree_method": tree_method, "max_cat_to_onehot": 9999} # force onehot
|
client: "Client", tree_method: str, device: str, X, X_onehot, y
|
||||||
|
) -> None:
|
||||||
|
# Force onehot
|
||||||
|
parameters = {
|
||||||
|
"tree_method": tree_method,
|
||||||
|
"device": device,
|
||||||
|
"max_cat_to_onehot": 9999,
|
||||||
|
}
|
||||||
rounds = 10
|
rounds = 10
|
||||||
m = xgb.dask.DaskDMatrix(client, X_onehot, y, enable_categorical=True)
|
m = xgb.dask.DaskDMatrix(client, X_onehot, y, enable_categorical=True)
|
||||||
by_etl_results = xgb.dask.train(
|
by_etl_results = xgb.dask.train(
|
||||||
@ -364,6 +371,7 @@ def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None:
|
|||||||
enable_categorical=True,
|
enable_categorical=True,
|
||||||
n_estimators=10,
|
n_estimators=10,
|
||||||
tree_method=tree_method,
|
tree_method=tree_method,
|
||||||
|
device=device,
|
||||||
# force onehot
|
# force onehot
|
||||||
max_cat_to_onehot=9999,
|
max_cat_to_onehot=9999,
|
||||||
)
|
)
|
||||||
@ -378,7 +386,10 @@ def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None:
|
|||||||
reg.fit(X, y)
|
reg.fit(X, y)
|
||||||
# check partition based
|
# check partition based
|
||||||
reg = xgb.dask.DaskXGBRegressor(
|
reg = xgb.dask.DaskXGBRegressor(
|
||||||
enable_categorical=True, n_estimators=10, tree_method=tree_method
|
enable_categorical=True,
|
||||||
|
n_estimators=10,
|
||||||
|
tree_method=tree_method,
|
||||||
|
device=device,
|
||||||
)
|
)
|
||||||
reg.fit(X, y, eval_set=[(X, y)])
|
reg.fit(X, y, eval_set=[(X, y)])
|
||||||
assert tm.non_increasing(reg.evals_result()["validation_0"]["rmse"])
|
assert tm.non_increasing(reg.evals_result()["validation_0"]["rmse"])
|
||||||
@ -398,8 +409,8 @@ def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None:
|
|||||||
def test_categorical(client: "Client") -> None:
|
def test_categorical(client: "Client") -> None:
|
||||||
X, y = make_categorical(client, 10000, 30, 13)
|
X, y = make_categorical(client, 10000, 30, 13)
|
||||||
X_onehot, _ = make_categorical(client, 10000, 30, 13, True)
|
X_onehot, _ = make_categorical(client, 10000, 30, 13, True)
|
||||||
run_categorical(client, "approx", X, X_onehot, y)
|
run_categorical(client, "approx", "cpu", X, X_onehot, y)
|
||||||
run_categorical(client, "hist", X, X_onehot, y)
|
run_categorical(client, "hist", "cpu", X, X_onehot, y)
|
||||||
|
|
||||||
ft = ["c"] * X.shape[1]
|
ft = ["c"] * X.shape[1]
|
||||||
reg = xgb.dask.DaskXGBRegressor(
|
reg = xgb.dask.DaskXGBRegressor(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user