[SYCL] Implement UpdatePredictionCache and connect updater with leraner. (#10701)

---------

Co-authored-by: Dmitry Razdoburdin <>
This commit is contained in:
Dmitry Razdoburdin
2024-08-21 20:07:44 +02:00
committed by GitHub
parent 9b88495840
commit 24d225c1ab
11 changed files with 502 additions and 126 deletions

View File

@@ -21,10 +21,8 @@ class TestHistUpdater : public HistUpdater<GradientSumT> {
TestHistUpdater(const Context* ctx,
::sycl::queue qu,
const xgboost::tree::TrainParam& param,
std::unique_ptr<TreeUpdater> pruner,
FeatureInteractionConstraintHost int_constraints_,
DMatrix const* fmat) : HistUpdater<GradientSumT>(ctx, qu, param,
std::move(pruner),
int_constraints_, fmat) {}
void TestInitSampling(const USMVector<GradientPair, MemoryType::on_device> &gpair,
@@ -110,14 +108,12 @@ void TestHistUpdaterSampling(const xgboost::tree::TrainParam& param) {
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
ObjInfo task{ObjInfo::kRegression};
auto p_fmat = RandomDataGenerator{num_rows, num_columns, 0.0}.GenerateDMatrix();
FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
USMVector<size_t, MemoryType::on_device> row_indices_0(&qu, num_rows);
USMVector<size_t, MemoryType::on_device> row_indices_1(&qu, num_rows);
@@ -165,14 +161,12 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
ObjInfo task{ObjInfo::kRegression};
auto p_fmat = RandomDataGenerator{num_rows, num_columns, 0.0}.GenerateDMatrix();
FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
GenerateRandomGPairs(&qu, gpair.Data(), num_rows, has_neg_hess);
@@ -221,14 +215,12 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
ObjInfo task{ObjInfo::kRegression};
auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();
FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
@@ -285,14 +277,12 @@ void TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sp
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
ObjInfo task{ObjInfo::kRegression};
auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();
FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
@@ -345,14 +335,12 @@ void TestHistUpdaterEvaluateSplits(const xgboost::tree::TrainParam& param) {
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
ObjInfo task{ObjInfo::kRegression};
auto p_fmat = RandomDataGenerator{num_rows, num_columns, 0.0f}.GenerateDMatrix();
FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
@@ -423,8 +411,6 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
DeviceManager device_manager;
auto qu = device_manager.GetQueue(ctx.Device());
ObjInfo task{ObjInfo::kRegression};
auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();
sycl::DeviceMatrix dmat;
dmat.Init(qu, p_fmat.get());
@@ -439,8 +425,7 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
nodes.emplace_back(tree::ExpandEntry(0, tree.GetDepth(0)));
FeatureInteractionConstraintHost int_constraints;
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
GenerateRandomGPairs(&qu, gpair.Data(), num_rows, false);
@@ -455,8 +440,7 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
std::vector<size_t> row_indices_desired_host(num_rows);
size_t n_left, n_right;
{
std::unique_ptr<TreeUpdater> pruner4verification{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater4verification(&ctx, qu, param, std::move(pruner4verification), int_constraints, p_fmat.get());
TestHistUpdater<GradientSumT> updater4verification(&ctx, qu, param, int_constraints, p_fmat.get());
auto* row_set_collection4verification = updater4verification.TestInitData(gmat, gpair, *p_fmat, tree);
size_t n_nodes = nodes.size();
@@ -526,9 +510,7 @@ void TestHistUpdaterExpandWithLossGuide(const xgboost::tree::TrainParam& param)
RegTree tree;
FeatureInteractionConstraintHost int_constraints;
ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);
@@ -576,9 +558,7 @@ void TestHistUpdaterExpandWithDepthWise(const xgboost::tree::TrainParam& param)
RegTree tree;
FeatureInteractionConstraintHost int_constraints;
ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);

View File

@@ -0,0 +1,23 @@
/**
* Copyright 2020-2024 by XGBoost contributors
*/
#include <gtest/gtest.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#pragma GCC diagnostic ignored "-W#pragma-messages"
#include "../tree/test_prediction_cache.h"
#pragma GCC diagnostic pop
namespace xgboost::sycl::tree {
class SyclPredictionCache : public xgboost::TestPredictionCache {};
TEST_F(SyclPredictionCache, Hist) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
this->RunTest(&ctx, "grow_quantile_histmaker_sycl", "one_output_per_tree");
}
} // namespace xgboost::sycl::tree

View File

@@ -2,97 +2,10 @@
* Copyright 2021-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/tree_updater.h>
#include <memory>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "xgboost/task.h" // for ObjInfo
#include "test_prediction_cache.h"
namespace xgboost {
class TestPredictionCache : public ::testing::Test {
std::shared_ptr<DMatrix> Xy_;
std::size_t n_samples_{2048};
protected:
void SetUp() override {
std::size_t n_features = 13;
bst_target_t n_targets = 3;
Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.Targets(n_targets).GenerateDMatrix(true);
}
void RunLearnerTest(Context const* ctx, std::string updater_name, float subsample,
std::string const& grow_policy, std::string const& strategy) {
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
learner->SetParam("device", ctx->DeviceName());
learner->SetParam("updater", updater_name);
learner->SetParam("multi_strategy", strategy);
learner->SetParam("grow_policy", grow_policy);
learner->SetParam("subsample", std::to_string(subsample));
learner->SetParam("nthread", "0");
learner->Configure();
for (size_t i = 0; i < 8; ++i) {
learner->UpdateOneIter(i, Xy_);
}
HostDeviceVector<float> out_prediction_cached;
learner->Predict(Xy_, false, &out_prediction_cached, 0, 0);
Json model{Object()};
learner->SaveModel(&model);
HostDeviceVector<float> out_prediction;
{
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
learner->LoadModel(model);
learner->Predict(Xy_, false, &out_prediction, 0, 0);
}
auto const h_predt_cached = out_prediction_cached.ConstHostSpan();
auto const h_predt = out_prediction.ConstHostSpan();
ASSERT_EQ(h_predt.size(), h_predt_cached.size());
for (size_t i = 0; i < h_predt.size(); ++i) {
ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);
}
}
void RunTest(Context* ctx, std::string const& updater_name, std::string const& strategy) {
{
ctx->InitAllowUnknown(Args{{"nthread", "8"}});
ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, ctx, &task)};
RegTree tree;
std::vector<RegTree*> trees{&tree};
auto gpair = GenerateRandomGradients(ctx, n_samples_, 1);
tree::TrainParam param;
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
updater->Configure(Args{});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&param, &gpair, Xy_.get(), position, trees);
HostDeviceVector<float> out_prediction_cached;
out_prediction_cached.SetDevice(ctx->Device());
out_prediction_cached.Resize(n_samples_);
auto cache =
linalg::MakeTensorView(ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);
ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), cache));
}
for (auto policy : {"depthwise", "lossguide"}) {
for (auto subsample : {1.0f, 0.4f}) {
this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
}
}
}
};
TEST_F(TestPredictionCache, Approx) {
Context ctx;
this->RunTest(&ctx, "grow_histmaker", "one_output_per_tree");
@@ -119,4 +32,4 @@ TEST_F(TestPredictionCache, GpuApprox) {
this->RunTest(&ctx, "grow_gpu_approx", "one_output_per_tree");
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost
} // namespace xgboost

View File

@@ -0,0 +1,97 @@
/**
* Copyright 2021-2024 by XGBoost contributors.
*/
#pragma once
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/tree_updater.h>
#include <memory>
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "xgboost/task.h" // for ObjInfo
namespace xgboost {
class TestPredictionCache : public ::testing::Test {
std::shared_ptr<DMatrix> Xy_;
std::size_t n_samples_{2048};
protected:
void SetUp() override {
std::size_t n_features = 13;
bst_target_t n_targets = 3;
Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.Targets(n_targets).GenerateDMatrix(true);
}
void RunLearnerTest(Context const* ctx, std::string updater_name, float subsample,
std::string const& grow_policy, std::string const& strategy) {
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
learner->SetParam("device", ctx->DeviceName());
learner->SetParam("updater", updater_name);
learner->SetParam("multi_strategy", strategy);
learner->SetParam("grow_policy", grow_policy);
learner->SetParam("subsample", std::to_string(subsample));
learner->SetParam("nthread", "0");
learner->Configure();
for (size_t i = 0; i < 8; ++i) {
learner->UpdateOneIter(i, Xy_);
}
HostDeviceVector<float> out_prediction_cached;
learner->Predict(Xy_, false, &out_prediction_cached, 0, 0);
Json model{Object()};
learner->SaveModel(&model);
HostDeviceVector<float> out_prediction;
{
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
learner->LoadModel(model);
learner->Predict(Xy_, false, &out_prediction, 0, 0);
}
auto const h_predt_cached = out_prediction_cached.ConstHostSpan();
auto const h_predt = out_prediction.ConstHostSpan();
ASSERT_EQ(h_predt.size(), h_predt_cached.size());
for (size_t i = 0; i < h_predt.size(); ++i) {
ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);
}
}
void RunTest(Context* ctx, std::string const& updater_name, std::string const& strategy) {
{
ctx->InitAllowUnknown(Args{{"nthread", "8"}});
ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, ctx, &task)};
RegTree tree;
std::vector<RegTree*> trees{&tree};
auto gpair = GenerateRandomGradients(ctx, n_samples_, 1);
tree::TrainParam param;
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
updater->Configure(Args{});
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&param, &gpair, Xy_.get(), position, trees);
HostDeviceVector<float> out_prediction_cached;
out_prediction_cached.SetDevice(ctx->Device());
out_prediction_cached.Resize(n_samples_);
auto cache =
linalg::MakeTensorView(ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);
ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), cache));
}
for (auto policy : {"depthwise", "lossguide"}) {
for (auto subsample : {1.0f, 0.4f}) {
this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
}
}
}
};
} // namespace xgboost

View File

@@ -0,0 +1,59 @@
import numpy as np
import xgboost as xgb
import json
rng = np.random.RandomState(1994)
class TestSYCLTrainingContinuation:
def run_training_continuation(self, use_json):
kRows = 64
kCols = 32
X = np.random.randn(kRows, kCols)
y = np.random.randn(kRows)
dtrain = xgb.DMatrix(X, y)
params = {
"device": "sycl",
"max_depth": "2",
"gamma": "0.1",
"alpha": "0.01",
"enable_experimental_json_serialization": use_json,
}
bst_0 = xgb.train(params, dtrain, num_boost_round=64)
dump_0 = bst_0.get_dump(dump_format="json")
bst_1 = xgb.train(params, dtrain, num_boost_round=32)
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
dump_1 = bst_1.get_dump(dump_format="json")
def recursive_compare(obj_0, obj_1):
if isinstance(obj_0, float):
assert np.isclose(obj_0, obj_1, atol=1e-6)
elif isinstance(obj_0, str):
assert obj_0 == obj_1
elif isinstance(obj_0, int):
assert obj_0 == obj_1
elif isinstance(obj_0, dict):
keys_0 = list(obj_0.keys())
keys_1 = list(obj_1.keys())
values_0 = list(obj_0.values())
values_1 = list(obj_1.values())
for i in range(len(obj_0.items())):
assert keys_0[i] == keys_1[i]
if list(obj_0.keys())[i] != "missing":
recursive_compare(values_0[i], values_1[i])
else:
for i in range(len(obj_0)):
recursive_compare(obj_0[i], obj_1[i])
assert len(dump_0) == len(dump_1)
for i in range(len(dump_0)):
obj_0 = json.loads(dump_0[i])
obj_1 = json.loads(dump_1[i])
recursive_compare(obj_0, obj_1)
def test_sycl_training_continuation_binary(self):
self.run_training_continuation(False)
def test_sycl_training_continuation_json(self):
self.run_training_continuation(True)

View File

@@ -0,0 +1,80 @@
import numpy as np
import gc
import pytest
import xgboost as xgb
from hypothesis import given, strategies, assume, settings, note
import sys
import os
# sys.path.append("tests/python")
# import testing as tm
from xgboost import testing as tm
parameter_strategy = strategies.fixed_dictionaries(
{
"max_depth": strategies.integers(0, 11),
"max_leaves": strategies.integers(0, 256),
"max_bin": strategies.integers(2, 1024),
"grow_policy": strategies.sampled_from(["lossguide", "depthwise"]),
"single_precision_histogram": strategies.booleans(),
"min_child_weight": strategies.floats(0.5, 2.0),
"seed": strategies.integers(0, 10),
# We cannot enable subsampling as the training loss can increase
# 'subsample': strategies.floats(0.5, 1.0),
"colsample_bytree": strategies.floats(0.5, 1.0),
"colsample_bylevel": strategies.floats(0.5, 1.0),
}
).filter(
lambda x: (x["max_depth"] > 0 or x["max_leaves"] > 0)
and (x["max_depth"] > 0 or x["grow_policy"] == "lossguide")
)
def train_result(param, dmat, num_rounds):
result = {}
xgb.train(
param,
dmat,
num_rounds,
[(dmat, "train")],
verbose_eval=False,
evals_result=result,
)
return result
class TestSYCLUpdaters:
@given(parameter_strategy, strategies.integers(1, 5), tm.make_dataset_strategy())
@settings(deadline=None)
def test_sycl_hist(self, param, num_rounds, dataset):
param["tree_method"] = "hist"
param["device"] = "sycl"
param["verbosity"] = 0
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(result)
assert tm.non_increasing(result["train"][dataset.metric])
@given(tm.make_dataset_strategy(), strategies.integers(0, 1))
@settings(deadline=None)
def test_specified_device_id_sycl_update(self, dataset, device_id):
# Read the list of sycl-devicese
sycl_ls = os.popen("sycl-ls").read()
devices = sycl_ls.split("\n")
# Test should launch only on gpu
# Find gpus in the list of devices
# and use the id in the list insteard of device_id
target_device_type = "opencl:gpu"
found_devices = 0
for idx in range(len(devices)):
if len(devices[idx]) >= len(target_device_type):
if devices[idx][1 : 1 + len(target_device_type)] == target_device_type:
if found_devices == device_id:
param = {"device": f"sycl:gpu:{idx}"}
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), 10)
assert tm.non_increasing(result["train"][dataset.metric])
else:
found_devices += 1