merge latest changes

This commit is contained in:
Hui Liu
2023-12-13 21:06:28 -08:00
194 changed files with 4859 additions and 2838 deletions

View File

@@ -18,9 +18,9 @@ if (USE_HIP)
list(APPEND TEST_SOURCES ${HIP_TEST_SOURCES})
endif (USE_HIP)
file(GLOB_RECURSE ONEAPI_TEST_SOURCES "plugin/*_oneapi.cc")
if(NOT PLUGIN_UPDATER_ONEAPI)
list(REMOVE_ITEM TEST_SOURCES ${ONEAPI_TEST_SOURCES})
file(GLOB_RECURSE SYCL_TEST_SOURCES "plugin/test_sycl_*.cc")
if(NOT PLUGIN_SYCL)
list(REMOVE_ITEM TEST_SOURCES ${SYCL_TEST_SOURCES})
endif()
if(PLUGIN_FEDERATED)

View File

@@ -47,7 +47,7 @@ class Worker : public WorkerForTest {
std::size_t n = 8192; // n_bytes = 8192 * sizeof(int)
std::vector<std::int32_t> data(comm_.World() * n, 0);
auto s_data = common::Span{data.data(), data.size()};
auto s_data = common::Span<std::int32_t>{data};
auto seg = s_data.subspan(comm_.Rank() * n, n);
std::iota(seg.begin(), seg.end(), comm_.Rank());

View File

@@ -90,10 +90,10 @@ class Worker : public NCCLWorkerForTest {
}
};
class AllgatherTestGPU : public SocketTest {};
class MGPUAllgatherTest : public SocketTest {};
} // namespace
TEST_F(AllgatherTestGPU, MGPUTestVRing) {
TEST_F(MGPUAllgatherTest, MGPUTestVRing) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
@@ -104,7 +104,7 @@ TEST_F(AllgatherTestGPU, MGPUTestVRing) {
});
}
TEST_F(AllgatherTestGPU, MGPUTestVBcast) {
TEST_F(MGPUAllgatherTest, MGPUTestVBcast) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {

View File

@@ -18,31 +18,34 @@ class AllreduceWorker : public WorkerForTest {
void Basic() {
{
std::vector<double> data(13, 0.0);
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
for (std::size_t i = 0; i < rhs.size(); ++i) {
rhs[i] += lhs[i];
}
});
ASSERT_TRUE(rc.OK());
ASSERT_EQ(std::accumulate(data.cbegin(), data.cend(), 0.0), 0.0);
}
{
std::vector<double> data(1, 1.0);
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
for (std::size_t i = 0; i < rhs.size(); ++i) {
rhs[i] += lhs[i];
}
});
ASSERT_TRUE(rc.OK());
ASSERT_EQ(data[0], static_cast<double>(comm_.World()));
}
}
void Acc() {
std::vector<double> data(314, 1.5);
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
auto rc = Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
for (std::size_t i = 0; i < rhs.size(); ++i) {
rhs[i] += lhs[i];
}
});
ASSERT_TRUE(rc.OK());
for (std::size_t i = 0; i < data.size(); ++i) {
auto v = data[i];
ASSERT_EQ(v, 1.5 * static_cast<double>(comm_.World())) << i;

View File

@@ -5,17 +5,15 @@
#include <gtest/gtest.h>
#include <thrust/host_vector.h> // for host_vector
#include "../../../src/collective/coll.h" // for Coll
#include "../../../src/common/common.h"
#include "../../../src/common/device_helpers.cuh" // for ToSpan, device_vector
#include "../../../src/common/type.h" // for EraseType
#include "../helpers.h" // for MakeCUDACtx
#include "test_worker.cuh" // for NCCLWorkerForTest
#include "test_worker.h" // for WorkerForTest, TestDistributed
namespace xgboost::collective {
namespace {
class AllreduceTestGPU : public SocketTest {};
class MGPUAllreduceTest : public SocketTest {};
class Worker : public NCCLWorkerForTest {
public:
@@ -47,7 +45,7 @@ class Worker : public NCCLWorkerForTest {
};
} // namespace
TEST_F(AllreduceTestGPU, BitOr) {
TEST_F(MGPUAllreduceTest, BitOr) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
@@ -57,7 +55,7 @@ TEST_F(AllreduceTestGPU, BitOr) {
});
}
TEST_F(AllreduceTestGPU, Sum) {
TEST_F(MGPUAllreduceTest, Sum) {
auto n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {

View File

@@ -0,0 +1,63 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/c_api.h>
#include <chrono> // for ""s
#include <thread> // for thread
#include "../../../src/collective/tracker.h"
#include "test_worker.h" // for SocketTest
#include "xgboost/json.h" // for Json
namespace xgboost::collective {
namespace {
class TrackerAPITest : public SocketTest {};
} // namespace
TEST_F(TrackerAPITest, CAPI) {
TrackerHandle handle;
Json config{Object{}};
config["dmlc_communicator"] = String{"rabit"};
config["n_workers"] = 2;
config["timeout"] = 1;
auto config_str = Json::Dump(config);
auto rc = XGTrackerCreate(config_str.c_str(), &handle);
ASSERT_EQ(rc, 0);
rc = XGTrackerRun(handle);
ASSERT_EQ(rc, 0);
std::thread bg_wait{[&] {
Json config{Object{}};
auto config_str = Json::Dump(config);
auto rc = XGTrackerWait(handle, config_str.c_str());
ASSERT_EQ(rc, 0);
}};
char const* cargs;
rc = XGTrackerWorkerArgs(handle, &cargs);
ASSERT_EQ(rc, 0);
auto args = Json::Load(StringView{cargs});
std::string host;
ASSERT_TRUE(GetHostAddress(&host).OK());
ASSERT_EQ(host, get<String const>(args["DMLC_TRACKER_URI"]));
auto port = get<Integer const>(args["DMLC_TRACKER_PORT"]);
ASSERT_NE(port, 0);
std::vector<std::thread> workers;
using namespace std::chrono_literals; // NOLINT
for (std::int32_t r = 0; r < 2; ++r) {
workers.emplace_back([=] { WorkerForTest w{host, static_cast<std::int32_t>(port), 1s, 2, r}; });
}
for (auto& w : workers) {
w.join();
}
rc = XGTrackerFree(handle);
ASSERT_EQ(rc, 0);
bg_wait.join();
}
} // namespace xgboost::collective

View File

@@ -25,15 +25,18 @@ TEST_F(CommTest, Channel) {
WorkerForTest worker{host, port, timeout, n_workers, i};
if (i % 2 == 0) {
auto p_chan = worker.Comm().Chan(i + 1);
p_chan->SendAll(
EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));
auto rc = p_chan->Block();
auto rc = Success() << [&] {
return p_chan->SendAll(
EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));
} << [&] { return p_chan->Block(); };
ASSERT_TRUE(rc.OK()) << rc.Report();
} else {
auto p_chan = worker.Comm().Chan(i - 1);
std::int32_t r{-1};
p_chan->RecvAll(EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));
auto rc = p_chan->Block();
auto rc = Success() << [&] {
return p_chan->RecvAll(
EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));
} << [&] { return p_chan->Block(); };
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_EQ(r, i - 1);
}

View File

@@ -0,0 +1,63 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/json.h> // for Json
#include <chrono> // for seconds
#include <cstdint> // for int32_t
#include <string> // for string
#include <thread> // for thread
#include "../../../src/collective/comm.h"
#include "../../../src/collective/comm_group.h"
#include "../../../src/common/common.h" // for AllVisibleGPUs
#include "../helpers.h" // for MakeCUDACtx
#include "test_worker.h" // for TestDistributed
namespace xgboost::collective {
namespace {
auto MakeConfig(std::string host, std::int32_t port, std::chrono::seconds timeout, std::int32_t r) {
Json config{Object{}};
config["dmlc_communicator"] = std::string{"rabit"};
config["DMLC_TRACKER_URI"] = host;
config["DMLC_TRACKER_PORT"] = port;
config["dmlc_timeout_sec"] = static_cast<std::int64_t>(timeout.count());
config["DMLC_TASK_ID"] = std::to_string(r);
config["dmlc_retry"] = 2;
return config;
}
class CommGroupTest : public SocketTest {};
} // namespace
TEST_F(CommGroupTest, Basic) {
std::int32_t n_workers = std::min(std::thread::hardware_concurrency(), 5u);
TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Context ctx;
auto config = MakeConfig(host, port, timeout, r);
std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};
ASSERT_TRUE(ptr->IsDistributed());
ASSERT_EQ(ptr->World(), n_workers);
auto const& comm = ptr->Ctx(&ctx, DeviceOrd::CPU());
ASSERT_EQ(comm.TaskID(), std::to_string(r));
ASSERT_EQ(comm.Retry(), 2);
});
}
#if defined(XGBOOST_USE_NCCL)
TEST_F(CommGroupTest, BasicGPU) {
std::int32_t n_workers = common::AllVisibleGPUs();
TestDistributed(n_workers, [&](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
auto ctx = MakeCUDACtx(r);
auto config = MakeConfig(host, port, timeout, r);
std::unique_ptr<CommGroup> ptr{CommGroup::Create(config)};
auto const& comm = ptr->Ctx(&ctx, DeviceOrd::CUDA(0));
ASSERT_EQ(comm.TaskID(), std::to_string(r));
ASSERT_EQ(comm.Retry(), 2);
});
}
#endif // for defined(XGBOOST_USE_NCCL)
} // namespace xgboost::collective

View File

@@ -8,6 +8,7 @@
#include <bitset>
#include <string> // for string
#include "../../../src/collective/comm.cuh"
#include "../../../src/collective/communicator-inl.cuh"
#include "../../../src/collective/nccl_device_communicator.cuh"
#include "../helpers.h"
@@ -16,17 +17,15 @@ namespace xgboost {
namespace collective {
TEST(NcclDeviceCommunicatorSimpleTest, ThrowOnInvalidDeviceOrdinal) {
auto construct = []() { NcclDeviceCommunicator comm{-1, false}; };
auto construct = []() { NcclDeviceCommunicator comm{-1, false, DefaultNcclName()}; };
EXPECT_THROW(construct(), dmlc::Error);
}
TEST(NcclDeviceCommunicatorSimpleTest, SystemError) {
try {
dh::safe_nccl(ncclSystemError);
} catch (dmlc::Error const& e) {
auto str = std::string{e.what()};
ASSERT_TRUE(str.find("environment variables") != std::string::npos);
}
auto stub = std::make_shared<NcclStub>(DefaultNcclName());
auto rc = stub->GetNcclResult(ncclSystemError);
auto msg = rc.Report();
ASSERT_TRUE(msg.find("environment variables") != std::string::npos);
}
namespace {

View File

@@ -33,7 +33,7 @@ class WorkerForTest {
tracker_port_{port},
world_size_{world},
task_id_{"t:" + std::to_string(rank)},
comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_} {
comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_, DefaultNcclName()} {
CHECK_EQ(world_size_, comm_.World());
}
virtual ~WorkerForTest() = default;
@@ -92,10 +92,12 @@ class TrackerTest : public SocketTest {
template <typename WorkerFn>
void TestDistributed(std::int32_t n_workers, WorkerFn worker_fn) {
std::chrono::seconds timeout{1};
std::chrono::seconds timeout{2};
std::string host;
ASSERT_TRUE(GetHostAddress(&host).OK());
auto rc = GetHostAddress(&host);
ASSERT_TRUE(rc.OK()) << rc.Report();
LOG(INFO) << "Using " << n_workers << " workers for test.";
RabitTracker tracker{StringView{host}, n_workers, 0, timeout};
auto fut = tracker.Run();

View File

@@ -57,13 +57,13 @@ TEST(Algorithm, GpuArgSort) {
auto ctx = MakeCUDACtx(0);
dh::device_vector<float> values(20);
dh::Iota(dh::ToSpan(values)); // accending
dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream()); // accending
dh::device_vector<size_t> sorted_idx(20);
dh::ArgSort<false>(dh::ToSpan(values), dh::ToSpan(sorted_idx)); // sort to descending
ASSERT_TRUE(thrust::is_sorted(thrust::device, sorted_idx.begin(), sorted_idx.end(),
ArgSort<false>(&ctx, dh::ToSpan(values), dh::ToSpan(sorted_idx)); // sort to descending
ASSERT_TRUE(thrust::is_sorted(ctx.CUDACtx()->CTP(), sorted_idx.begin(), sorted_idx.end(),
thrust::greater<size_t>{}));
dh::Iota(dh::ToSpan(values));
dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream());
dh::device_vector<size_t> groups(3);
groups[0] = 0;
groups[1] = 10;

View File

@@ -16,6 +16,7 @@
#include <vector> // for vector
#include "../../../include/xgboost/logging.h"
#include "../../../src/common/cuda_context.cuh"
#include "../../../src/common/device_helpers.cuh"
#include "../../../src/common/hist_util.cuh"
#include "../../../src/common/hist_util.h"
@@ -213,7 +214,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
dh::device_vector<float> weight(n_samples * n_features, 0);
dh::Iota(dh::ToSpan(weight));
dh::Iota(dh::ToSpan(weight), ctx.CUDACtx()->Stream());
dh::caching_device_vector<bst_row_t> columns_ptr(4);
for (std::size_t i = 0; i < columns_ptr.size(); ++i) {
@@ -362,25 +363,27 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
}
template <typename Adapter>
auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing, size_t batch_size = 0) {
auto MakeUnweightedCutsForTest(Context const* ctx, Adapter adapter, int32_t num_bins, float missing,
size_t batch_size = 0) {
common::HistogramCuts batched_cuts;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
DeviceOrd::CUDA(0));
MetaInfo info;
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
sketch_container.MakeCuts(ctx, &batched_cuts, info.IsColumnSplit());
return batched_cuts;
}
template <typename Adapter>
void ValidateBatchedCuts(Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
void ValidateBatchedCuts(Context const* ctx, Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
common::HistogramCuts batched_cuts = MakeUnweightedCutsForTest(
adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
ValidateCuts(batched_cuts, dmat, num_bins);
}
TEST(HistUtil, AdapterDeviceSketch) {
auto ctx = MakeCUDACtx(0);
int rows = 5;
int cols = 1;
int num_bins = 4;
@@ -393,8 +396,8 @@ TEST(HistUtil, AdapterDeviceSketch) {
data::CupyAdapter adapter(str);
auto device_cuts = MakeUnweightedCutsForTest(adapter, num_bins, missing);
Context ctx;
auto device_cuts = MakeUnweightedCutsForTest(&ctx, adapter, num_bins, missing);
ctx = ctx.MakeCPU();
auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing);
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
@@ -403,6 +406,7 @@ TEST(HistUtil, AdapterDeviceSketch) {
}
TEST(HistUtil, AdapterDeviceSketchMemory) {
auto ctx = MakeCUDACtx(0);
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
@@ -412,7 +416,8 @@ TEST(HistUtil, AdapterDeviceSketchMemory) {
dh::GlobalMemoryLogger().Clear();
ConsoleLogger::Configure({{"verbosity", "3"}});
auto cuts = MakeUnweightedCutsForTest(adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
auto cuts =
MakeUnweightedCutsForTest(&ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
ConsoleLogger::Configure({{"verbosity", "0"}});
size_t bytes_required = detail::RequiredMemory(
num_rows, num_columns, num_rows * num_columns, num_bins, false);
@@ -421,6 +426,7 @@ TEST(HistUtil, AdapterDeviceSketchMemory) {
}
TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
auto ctx = MakeCUDACtx(0);
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
@@ -437,7 +443,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
HistogramCuts cuts;
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
size_t bytes_required = detail::RequiredMemory(
num_rows, num_columns, num_rows * num_columns, num_bins, false);
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);
@@ -446,6 +452,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
}
TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
auto ctx = MakeCUDACtx(0);
int num_columns = 100;
int num_rows = 1000;
int num_bins = 256;
@@ -467,7 +474,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
&sketch_container);
HistogramCuts cuts;
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
ConsoleLogger::Configure({{"verbosity", "0"}});
size_t bytes_required = detail::RequiredMemory(
num_rows, num_columns, num_rows * num_columns, num_bins, true);
@@ -477,6 +484,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
int32_t num_bins, bool weighted) {
auto ctx = MakeCUDACtx(0);
auto h_x = GenerateRandomCategoricalSingleColumn(n, num_categories);
thrust::device_vector<float> x(h_x);
auto adapter = AdapterFromData(x, n, 1);
@@ -500,7 +508,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
AdapterDeviceSketch(adapter.Value(), num_bins, info,
std::numeric_limits<float>::quiet_NaN(), &container);
HistogramCuts cuts;
container.MakeCuts(&cuts, info.IsColumnSplit());
container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
thrust::sort(x.begin(), x.end());
auto n_uniques = thrust::unique(x.begin(), x.end()) - x.begin();
@@ -524,6 +532,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
TEST(HistUtil, AdapterDeviceSketchCategorical) {
auto categorical_sizes = {2, 6, 8, 12};
int num_bins = 256;
auto ctx = MakeCUDACtx(0);
auto sizes = {25, 100, 1000};
for (auto n : sizes) {
for (auto num_categories : categorical_sizes) {
@@ -531,7 +540,7 @@ TEST(HistUtil, AdapterDeviceSketchCategorical) {
auto dmat = GetDMatrixFromData(x, n, 1);
auto x_device = thrust::device_vector<float>(x);
auto adapter = AdapterFromData(x_device, n, 1);
ValidateBatchedCuts(adapter, num_bins, dmat.get());
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
TestCategoricalSketchAdapter(n, num_categories, num_bins, true);
TestCategoricalSketchAdapter(n, num_categories, num_bins, false);
}
@@ -542,13 +551,14 @@ TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
auto bin_sizes = {2, 16, 256, 512};
auto sizes = {100, 1000, 1500};
int num_columns = 5;
auto ctx = MakeCUDACtx(0);
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto x_device = thrust::device_vector<float>(x);
for (auto num_bins : bin_sizes) {
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
ValidateBatchedCuts(adapter, num_bins, dmat.get());
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
}
}
}
@@ -558,12 +568,13 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
int num_rows = 5000;
auto batch_sizes = {0, 100, 1500, 6000};
int num_columns = 5;
auto ctx = MakeCUDACtx(0);
for (auto batch_size : batch_sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto x_device = thrust::device_vector<float>(x);
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
ValidateBatchedCuts(adapter, num_bins, dmat.get(), batch_size);
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get(), batch_size);
}
}
@@ -649,12 +660,12 @@ TEST(HistUtil, SketchingEquivalent) {
auto x_device = thrust::device_vector<float>(x);
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
common::HistogramCuts adapter_cuts = MakeUnweightedCutsForTest(
adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
&ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
EXPECT_EQ(dmat_cuts.Values(), adapter_cuts.Values());
EXPECT_EQ(dmat_cuts.Ptrs(), adapter_cuts.Ptrs());
EXPECT_EQ(dmat_cuts.MinValues(), adapter_cuts.MinValues());
ValidateBatchedCuts(adapter, num_bins, dmat.get());
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
}
}
}
@@ -704,7 +715,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
.Device(DeviceOrd::CUDA(0))
.GenerateArrayInterface(&storage);
MetaInfo info;
Context ctx;
auto ctx = MakeCUDACtx(0);
auto& h_weights = info.weights_.HostVector();
if (with_group) {
h_weights.resize(kGroups);
@@ -733,7 +744,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
&sketch_container);
common::HistogramCuts cuts;
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
if (with_group) {
@@ -746,10 +757,9 @@ void TestAdapterSketchFromWeights(bool with_group) {
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
ValidateCuts(cuts, dmat.get(), kBins);
auto cuda_ctx = MakeCUDACtx(0);
if (with_group) {
dmat->Info().weights_ = decltype(dmat->Info().weights_)(); // remove weight
HistogramCuts non_weighted = DeviceSketch(&cuda_ctx, dmat.get(), kBins, 0);
HistogramCuts non_weighted = DeviceSketch(&ctx, dmat.get(), kBins, 0);
for (size_t i = 0; i < cuts.Values().size(); ++i) {
ASSERT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
}
@@ -775,7 +785,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
sketch_container.MakeCuts(&ctx, &weighted, info.IsColumnSplit());
ValidateCuts(weighted, dmat.get(), kBins);
}
}

View File

@@ -15,6 +15,15 @@
namespace xgboost::linalg {
namespace {
DeviceOrd CPU() { return DeviceOrd::CPU(); }
template <typename T>
void ConstView(linalg::VectorView<T> v1, linalg::VectorView<std::add_const_t<T>> v2) {
// compile test for being able to pass non-const view to const view.
auto s = v1.Slice(linalg::All());
ASSERT_EQ(s.Size(), v1.Size());
auto s2 = v2.Slice(linalg::All());
ASSERT_EQ(s2.Size(), v2.Size());
}
} // namespace
auto MakeMatrixFromTest(HostDeviceVector<float> *storage, std::size_t n_rows, std::size_t n_cols) {
@@ -206,6 +215,11 @@ TEST(Linalg, TensorView) {
ASSERT_TRUE(t.FContiguous());
ASSERT_FALSE(t.CContiguous());
}
{
// const
TensorView<double, 1> t{data, {data.size()}, CPU()};
ConstView(t, t);
}
}
TEST(Linalg, Tensor) {

View File

@@ -86,7 +86,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
}
HistogramCuts distributed_cuts;
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
sketch_distributed.MakeCuts(&ctx, m->Info(), &distributed_cuts);
// Generate cuts for single node environment
collective::Finalize();
@@ -117,7 +117,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
}
HistogramCuts single_node_cuts;
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
sketch_on_single_node.MakeCuts(&ctx, m->Info(), &single_node_cuts);
auto const& sptrs = single_node_cuts.Ptrs();
auto const& dptrs = distributed_cuts.Ptrs();
@@ -220,7 +220,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
}
}
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
sketch_distributed.MakeCuts(&ctx, m->Info(), &distributed_cuts);
}
// Generate cuts for single node environment
@@ -243,7 +243,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
}
}
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
sketch_on_single_node.MakeCuts(&ctx, m->Info(), &single_node_cuts);
}
auto const& sptrs = single_node_cuts.Ptrs();

View File

@@ -370,6 +370,7 @@ void TestAllReduceBasic() {
constexpr size_t kRows = 1000, kCols = 100;
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
auto const device = DeviceOrd::CUDA(GPUIDX);
auto ctx = MakeCUDACtx(device.ordinal);
// Set up single node version;
HostDeviceVector<FeatureType> ft({}, device);
@@ -413,7 +414,7 @@ void TestAllReduceBasic() {
AdapterDeviceSketch(adapter.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(),
&sketch_distributed);
sketch_distributed.AllReduce(false);
sketch_distributed.AllReduce(&ctx, false);
sketch_distributed.Unique();
ASSERT_EQ(sketch_distributed.ColumnsPtr().size(),
@@ -517,6 +518,7 @@ void TestSameOnAllWorkers() {
MetaInfo const &info) {
auto const rank = collective::GetRank();
auto const device = DeviceOrd::CUDA(GPUIDX);
Context ctx = MakeCUDACtx(device.ordinal);
HostDeviceVector<FeatureType> ft({}, device);
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
HostDeviceVector<float> storage({}, device);
@@ -528,7 +530,7 @@ void TestSameOnAllWorkers() {
AdapterDeviceSketch(adapter.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(),
&sketch_distributed);
sketch_distributed.AllReduce(false);
sketch_distributed.AllReduce(&ctx, false);
sketch_distributed.Unique();
TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);

View File

@@ -1,19 +1,20 @@
#include <valarray>
/**
* Copyright 2018-2023, XGBoost Contributors
*/
#include "../../../src/common/random.h"
#include "../helpers.h"
#include "gtest/gtest.h"
#include "xgboost/context.h" // Context
#include "xgboost/context.h" // for Context
namespace xgboost {
namespace common {
TEST(ColumnSampler, Test) {
Context ctx;
namespace xgboost::common {
namespace {
void TestBasic(Context const* ctx) {
int n = 128;
ColumnSampler cs;
ColumnSampler cs{1u};
std::vector<float> feature_weights;
// No node sampling
cs.Init(&ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
cs.Init(ctx, n, feature_weights, 1.0f, 0.5f, 0.5f);
auto set0 = cs.GetFeatureSet(0);
ASSERT_EQ(set0->Size(), 32);
@@ -26,7 +27,7 @@ TEST(ColumnSampler, Test) {
ASSERT_EQ(set2->Size(), 32);
// Node sampling
cs.Init(&ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
cs.Init(ctx, n, feature_weights, 0.5f, 1.0f, 0.5f);
auto set3 = cs.GetFeatureSet(0);
ASSERT_EQ(set3->Size(), 32);
@@ -36,21 +37,33 @@ TEST(ColumnSampler, Test) {
ASSERT_EQ(set4->Size(), 32);
// No level or node sampling, should be the same at different depth
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(),
cs.GetFeatureSet(1)->HostVector());
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 0.5f);
ASSERT_EQ(cs.GetFeatureSet(0)->HostVector(), cs.GetFeatureSet(1)->HostVector());
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
auto set5 = cs.GetFeatureSet(0);
ASSERT_EQ(set5->Size(), n);
cs.Init(&ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
cs.Init(ctx, n, feature_weights, 1.0f, 1.0f, 1.0f);
auto set6 = cs.GetFeatureSet(0);
ASSERT_EQ(set5->HostVector(), set6->HostVector());
// Should always be a minimum of one feature
cs.Init(&ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
cs.Init(ctx, n, feature_weights, 1e-16f, 1e-16f, 1e-16f);
ASSERT_EQ(cs.GetFeatureSet(0)->Size(), 1);
}
} // namespace
TEST(ColumnSampler, Test) {
Context ctx;
TestBasic(&ctx);
}
#if defined(XGBOOST_USE_CUDA)
TEST(ColumnSampler, GPUTest) {
auto ctx = MakeCUDACtx(0);
TestBasic(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA)
// Test if different threads using the same seed produce the same result
TEST(ColumnSampler, ThreadSynchronisation) {
@@ -81,16 +94,16 @@ TEST(ColumnSampler, ThreadSynchronisation) {
ASSERT_TRUE(success);
}
TEST(ColumnSampler, WeightedSampling) {
auto test_basic = [](int first) {
Context ctx;
namespace {
void TestWeightedSampling(Context const* ctx) {
auto test_basic = [ctx](int first) {
std::vector<float> feature_weights(2);
feature_weights[0] = std::abs(first - 1.0f);
feature_weights[1] = first - 0.0f;
ColumnSampler cs{0};
cs.Init(&ctx, 2, feature_weights, 1.0, 1.0, 0.5);
cs.Init(ctx, 2, feature_weights, 1.0, 1.0, 0.5);
auto feature_sets = cs.GetFeatureSet(0);
auto const &h_feat_set = feature_sets->HostVector();
auto const& h_feat_set = feature_sets->HostVector();
ASSERT_EQ(h_feat_set.size(), 1);
ASSERT_EQ(h_feat_set[0], first - 0);
};
@@ -104,8 +117,7 @@ TEST(ColumnSampler, WeightedSampling) {
SimpleRealUniformDistribution<float> dist(.0f, 12.0f);
std::generate(feature_weights.begin(), feature_weights.end(), [&]() { return dist(&rng); });
ColumnSampler cs{0};
Context ctx;
cs.Init(&ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
cs.Init(ctx, kCols, feature_weights, 0.5f, 1.0f, 1.0f);
std::vector<bst_feature_t> features(kCols);
std::iota(features.begin(), features.end(), 0);
std::vector<float> freq(kCols, 0);
@@ -131,8 +143,22 @@ TEST(ColumnSampler, WeightedSampling) {
EXPECT_NEAR(freq[i], feature_weights[i], 1e-2);
}
}
} // namespace
TEST(ColumnSampler, WeightedMultiSampling) {
TEST(ColumnSampler, WeightedSampling) {
Context ctx;
TestWeightedSampling(&ctx);
}
#if defined(XGBOOST_USE_CUDA)
TEST(ColumnSampler, GPUWeightedSampling) {
auto ctx = MakeCUDACtx(0);
TestWeightedSampling(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA)
namespace {
void TestWeightedMultiSampling(Context const* ctx) {
size_t constexpr kCols = 32;
std::vector<float> feature_weights(kCols, 0);
for (size_t i = 0; i < feature_weights.size(); ++i) {
@@ -140,13 +166,24 @@ TEST(ColumnSampler, WeightedMultiSampling) {
}
ColumnSampler cs{0};
float bytree{0.5}, bylevel{0.5}, bynode{0.5};
Context ctx;
cs.Init(&ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
cs.Init(ctx, feature_weights.size(), feature_weights, bytree, bylevel, bynode);
auto feature_set = cs.GetFeatureSet(0);
size_t n_sampled = kCols * bytree * bylevel * bynode;
ASSERT_EQ(feature_set->Size(), n_sampled);
feature_set = cs.GetFeatureSet(1);
ASSERT_EQ(feature_set->Size(), n_sampled);
}
} // namespace common
} // namespace xgboost
} // namespace
TEST(ColumnSampler, WeightedMultiSampling) {
Context ctx;
TestWeightedMultiSampling(&ctx);
}
#if defined(XGBOOST_USE_CUDA)
TEST(ColumnSampler, GPUWeightedMultiSampling) {
auto ctx = MakeCUDACtx(0);
TestWeightedMultiSampling(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost::common

View File

@@ -124,6 +124,9 @@ TEST_F(FederatedCollTestGPU, Allgather) {
TEST_F(FederatedCollTestGPU, AllgatherV) {
std::int32_t n_workers = 2;
if (common::AllVisibleGPUs() < n_workers) {
GTEST_SKIP_("At least 2 GPUs are required for the test.");
}
TestFederated(n_workers, [=](std::shared_ptr<FederatedComm> comm, std::int32_t rank) {
TestAllgatherV(comm, rank);
});

View File

@@ -1,6 +1,7 @@
/**
* Copyright 2022-2023, XGBoost contributors
*/
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <string> // for string
@@ -19,12 +20,14 @@ class FederatedCommTest : public SocketTest {};
TEST_F(FederatedCommTest, ThrowOnWorldSizeTooSmall) {
auto construct = [] { FederatedComm comm{"localhost", 0, 0, 0}; };
ExpectThrow<dmlc::Error>("Invalid world size.", construct);
ASSERT_THAT(construct,
::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr("Invalid world size")));
}
TEST_F(FederatedCommTest, ThrowOnRankTooSmall) {
auto construct = [] { FederatedComm comm{"localhost", 0, 1, -1}; };
ExpectThrow<dmlc::Error>("Invalid worker rank.", construct);
ASSERT_THAT(construct,
::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr("Invalid worker rank.")));
}
TEST_F(FederatedCommTest, ThrowOnRankTooBig) {
@@ -38,7 +41,7 @@ TEST_F(FederatedCommTest, ThrowOnWorldSizeNotInteger) {
config["federated_server_address"] = std::string("localhost:0");
config["federated_world_size"] = std::string("1");
config["federated_rank"] = Integer(0);
FederatedComm comm(config);
FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config};
};
ExpectThrow<dmlc::Error>("got: `String`", construct);
}
@@ -49,7 +52,7 @@ TEST_F(FederatedCommTest, ThrowOnRankNotInteger) {
config["federated_server_address"] = std::string("localhost:0");
config["federated_world_size"] = 1;
config["federated_rank"] = std::string("0");
FederatedComm comm(config);
FederatedComm comm(DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config);
};
ExpectThrow<dmlc::Error>("got: `String`", construct);
}
@@ -59,7 +62,7 @@ TEST_F(FederatedCommTest, GetWorldSizeAndRank) {
config["federated_world_size"] = 6;
config["federated_rank"] = 3;
config["federated_server_address"] = String{"localhost:0"};
FederatedComm comm{config};
FederatedComm comm{DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, "", config};
EXPECT_EQ(comm.World(), 6);
EXPECT_EQ(comm.Rank(), 3);
}

View File

@@ -0,0 +1,22 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/json.h> // for Json
#include "../../../../src/collective/comm_group.h"
#include "../../helpers.h"
#include "test_worker.h"
namespace xgboost::collective {
TEST(CommGroup, Federated) {
std::int32_t n_workers = common::AllVisibleGPUs();
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
Context ctx;
ASSERT_EQ(comm_group->Rank(), r);
auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CPU());
ASSERT_EQ(comm.TaskID(), std::to_string(r));
ASSERT_EQ(comm.Retry(), 2);
});
}
} // namespace xgboost::collective

View File

@@ -0,0 +1,22 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/json.h> // for Json
#include "../../../../src/collective/comm_group.h"
#include "../../helpers.h"
#include "test_worker.h"
namespace xgboost::collective {
TEST(CommGroup, FederatedGPU) {
std::int32_t n_workers = common::AllVisibleGPUs();
TestFederatedGroup(n_workers, [&](std::shared_ptr<CommGroup> comm_group, std::int32_t r) {
Context ctx = MakeCUDACtx(0);
auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CUDA(0));
ASSERT_EQ(comm_group->Rank(), r);
ASSERT_EQ(comm.TaskID(), std::to_string(r));
ASSERT_EQ(comm.Retry(), 2);
});
}
} // namespace xgboost::collective

View File

@@ -5,10 +5,12 @@
#include <gtest/gtest.h>
#include <chrono> // for ms
#include <chrono> // for ms, seconds
#include <memory> // for shared_ptr
#include <thread> // for thread
#include "../../../../plugin/federated/federated_tracker.h"
#include "../../../../src/collective/comm_group.h"
#include "federated_comm.h" // for FederatedComm
#include "xgboost/json.h" // for Json
@@ -23,9 +25,8 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
std::vector<std::thread> workers;
using namespace std::chrono_literals;
while (tracker.Port() == 0) {
std::this_thread::sleep_for(100ms);
}
auto rc = tracker.WaitUntilReady();
ASSERT_TRUE(rc.OK()) << rc.Report();
std::int32_t port = tracker.Port();
for (std::int32_t i = 0; i < n_workers; ++i) {
@@ -34,7 +35,8 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
config["federated_world_size"] = n_workers;
config["federated_rank"] = i;
config["federated_server_address"] = "0.0.0.0:" + std::to_string(port);
auto comm = std::make_shared<FederatedComm>(config);
auto comm = std::make_shared<FederatedComm>(
DefaultRetry(), std::chrono::seconds{DefaultTimeoutSec()}, std::to_string(i), config);
fn(comm, i);
});
@@ -44,7 +46,43 @@ void TestFederated(std::int32_t n_workers, WorkerFn&& fn) {
t.join();
}
auto rc = tracker.Shutdown();
rc = tracker.Shutdown();
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_TRUE(fut.get().OK());
}
template <typename WorkerFn>
void TestFederatedGroup(std::int32_t n_workers, WorkerFn&& fn) {
Json config{Object()};
config["federated_secure"] = Boolean{false};
config["n_workers"] = Integer{n_workers};
FederatedTracker tracker{config};
auto fut = tracker.Run();
std::vector<std::thread> workers;
auto rc = tracker.WaitUntilReady();
ASSERT_TRUE(rc.OK()) << rc.Report();
std::int32_t port = tracker.Port();
for (std::int32_t i = 0; i < n_workers; ++i) {
workers.emplace_back([=] {
Json config{Object{}};
config["dmlc_communicator"] = std::string{"federated"};
config["dmlc_task_id"] = std::to_string(i);
config["dmlc_retry"] = 2;
config["federated_world_size"] = n_workers;
config["federated_rank"] = i;
config["federated_server_address"] = "0.0.0.0:" + std::to_string(port);
std::shared_ptr<CommGroup> comm_group{CommGroup::Create(config)};
fn(comm_group, i);
});
}
for (auto& t : workers) {
t.join();
}
rc = tracker.Shutdown();
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_TRUE(fut.get().OK());
}

View File

@@ -73,6 +73,7 @@ void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_
auto run = [&](auto rank) {
Json config{JsonObject()};
config["xgboost_communicator"] = String("federated");
config["federated_secure"] = false;
config["federated_server_address"] = String(server_address);
config["federated_world_size"] = world_size;
config["federated_rank"] = rank;

View File

@@ -1,168 +0,0 @@
/*!
* Copyright 2017-2020 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/predictor.h>
#include "../../../src/data/adapter.h"
#include "../../../src/gbm/gbtree_model.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include "../predictor/test_predictor.h"
namespace xgboost {
TEST(Plugin, OneAPIPredictorBasic) {
auto lparam = MakeCUDACtx(0);
std::unique_ptr<Predictor> oneapi_predictor =
std::unique_ptr<Predictor>(Predictor::Create("oneapi_predictor", &lparam));
int kRows = 5;
int kCols = 5;
LearnerModelParam param;
param.num_feature = kCols;
param.base_score = 0.0;
param.num_output_group = 1;
gbm::GBTreeModel model = CreateTestModel(&param);
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
// Test predict batch
PredictionCacheEntry out_predictions;
oneapi_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
ASSERT_EQ(model.trees.size(), out_predictions.version);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5);
}
// Test predict instance
auto const &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions;
oneapi_predictor->PredictInstance(batch[i], &instance_out_predictions, model);
ASSERT_EQ(instance_out_predictions[0], 1.5);
}
// Test predict leaf
std::vector<float> leaf_out_predictions;
oneapi_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
for (auto v : leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
// Test predict contribution
std::vector<float> out_contribution;
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
TEST(Plugin, OneAPIPredictorExternalMemory) {
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(12, 64, filename);
auto lparam = MakeCUDACtx(0);
std::unique_ptr<Predictor> oneapi_predictor =
std::unique_ptr<Predictor>(Predictor::Create("oneapi_predictor", &lparam));
LearnerModelParam param;
param.base_score = 0;
param.num_feature = dmat->Info().num_col_;
param.num_output_group = 1;
gbm::GBTreeModel model = CreateTestModel(&param);
// Test predict batch
PredictionCacheEntry out_predictions;
oneapi_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();
ASSERT_EQ(out_predictions.predictions.Size(), dmat->Info().num_row_);
for (const auto& v : out_predictions_h) {
ASSERT_EQ(v, 1.5);
}
// Test predict leaf
std::vector<float> leaf_out_predictions;
oneapi_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
ASSERT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
for (const auto& v : leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
// Test predict contribution
std::vector<float> out_contribution;
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution, model);
ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
std::vector<float> out_contribution_approximate;
oneapi_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, 0, nullptr, true);
ASSERT_EQ(out_contribution_approximate.size(),
dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
TEST(Plugin, OneAPIPredictorInplacePredict) {
bst_row_t constexpr kRows{128};
bst_feature_t constexpr kCols{64};
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(-1);
{
HostDeviceVector<float> data;
gen.GenerateDense(&data);
ASSERT_EQ(data.Size(), kRows * kCols);
std::shared_ptr<data::DenseAdapter> x{
new data::DenseAdapter(data.HostPointer(), kRows, kCols)};
TestInplacePrediction(x, "oneapi_predictor", kRows, kCols, -1);
}
{
HostDeviceVector<float> data;
HostDeviceVector<bst_row_t> rptrs;
HostDeviceVector<bst_feature_t> columns;
gen.GenerateCSR(&data, &rptrs, &columns);
std::shared_ptr<data::CSRAdapter> x{new data::CSRAdapter(
rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), kRows,
data.Size(), kCols)};
TestInplacePrediction(x, "oneapi_predictor", kRows, kCols, -1);
}
}
} // namespace xgboost

View File

@@ -1,176 +0,0 @@
/*!
* Copyright 2017-2019 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/objective.h>
#include <xgboost/context.h>
#include <xgboost/json.h>
#include "../helpers.h"
namespace xgboost {
TEST(Plugin, LinearRegressionGPairOneAPI) {
Context tparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("reg:squarederror_oneapi", &tparam)
};
obj->Configure(args);
CheckObjFunction(obj,
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{0, 0, 0, 0, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1, 1},
{0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},
{1, 1, 1, 1, 1, 1, 1, 1});
CheckObjFunction(obj,
{0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{0, 0, 0, 0, 1, 1, 1, 1},
{}, // empty weight
{0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},
{1, 1, 1, 1, 1, 1, 1, 1});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Plugin, SquaredLogOneAPI) {
Context tparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj { ObjFunction::Create("reg:squaredlogerror_oneapi", &tparam) };
obj->Configure(args);
CheckConfigReload(obj, "reg:squaredlogerror_oneapi");
CheckObjFunction(obj,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
{-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
{ 1.3205f, 1.0492f, 0.69215f, 0.34115f, 0.1091f});
CheckObjFunction(obj,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
{}, // empty weights
{-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
{ 1.3205f, 1.0492f, 0.69215f, 0.34115f, 0.1091f});
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"rmsle"});
}
TEST(Plugin, LogisticRegressionGPairOneAPI) {
Context tparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj { ObjFunction::Create("reg:logistic_oneapi", &tparam) };
obj->Configure(args);
CheckConfigReload(obj, "reg:logistic_oneapi");
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1}, // preds
{ 0, 0, 0, 0, 1, 1, 1, 1}, // labels
{ 1, 1, 1, 1, 1, 1, 1, 1}, // weights
{ 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f}, // out_grad
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f}); // out_hess
}
TEST(Plugin, LogisticRegressionBasicOneAPI) {
Context lparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("reg:logistic_oneapi", &lparam)
};
obj->Configure(args);
CheckConfigReload(obj, "reg:logistic_oneapi");
// test label validation
EXPECT_ANY_THROW(CheckObjFunction(obj, {0}, {10}, {1}, {0}, {0}))
<< "Expected error when label not in range [0,1f] for LogisticRegression";
// test ProbToMargin
EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.197f, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.5f), 0, 0.01f);
EXPECT_NEAR(obj->ProbToMargin(0.9f), 2.197f, 0.01f);
EXPECT_ANY_THROW(obj->ProbToMargin(10))
<< "Expected error when base_score not in range [0,1f] for LogisticRegression";
// test PredTransform
HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
obj->PredTransform(&io_preds);
auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
}
}
TEST(Plugin, LogisticRawGPairOneAPI) {
Context lparam = MakeCUDACtx(0);
std::vector<std::pair<std::string, std::string>> args;
std::unique_ptr<ObjFunction> obj {
ObjFunction::Create("binary:logitraw_oneapi", &lparam)
};
obj->Configure(args);
CheckObjFunction(obj,
{ 0, 0.1f, 0.9f, 1, 0, 0.1f, 0.9f, 1},
{ 0, 0, 0, 0, 1, 1, 1, 1},
{ 1, 1, 1, 1, 1, 1, 1, 1},
{ 0.5f, 0.52f, 0.71f, 0.73f, -0.5f, -0.47f, -0.28f, -0.26f},
{0.25f, 0.24f, 0.20f, 0.19f, 0.25f, 0.24f, 0.20f, 0.19f});
}
TEST(Plugin, CPUvsOneAPI) {
Context ctx = MakeCUDACtx(0);
ObjFunction * obj_cpu =
ObjFunction::Create("reg:squarederror", &ctx);
ObjFunction * obj_oneapi =
ObjFunction::Create("reg:squarederror_oneapi", &ctx);
HostDeviceVector<GradientPair> cpu_out_preds;
HostDeviceVector<GradientPair> oneapi_out_preds;
constexpr size_t kRows = 400;
constexpr size_t kCols = 100;
auto pdmat = RandomDataGenerator(kRows, kCols, 0).Seed(0).GenerateDMatrix();
HostDeviceVector<float> preds;
preds.Resize(kRows);
auto& h_preds = preds.HostVector();
for (size_t i = 0; i < h_preds.size(); ++i) {
h_preds[i] = static_cast<float>(i);
}
auto& info = pdmat->Info();
info.labels.Reshape(kRows, 1);
auto& h_labels = info.labels.Data()->HostVector();
for (size_t i = 0; i < h_labels.size(); ++i) {
h_labels[i] = 1 / static_cast<float>(i+1);
}
{
// CPU
ctx = ctx.MakeCPU();
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
}
{
// oneapi
ctx.gpu_id = 0;
obj_oneapi->GetGradient(preds, info, 0, &oneapi_out_preds);
}
auto& h_cpu_out = cpu_out_preds.HostVector();
auto& h_oneapi_out = oneapi_out_preds.HostVector();
float sgrad = 0;
float shess = 0;
for (size_t i = 0; i < kRows; ++i) {
sgrad += std::pow(h_cpu_out[i].GetGrad() - h_oneapi_out[i].GetGrad(), 2);
shess += std::pow(h_cpu_out[i].GetHess() - h_oneapi_out[i].GetHess(), 2);
}
ASSERT_NEAR(sgrad, 0.0f, kRtEps);
ASSERT_NEAR(shess, 0.0f, kRtEps);
delete obj_cpu;
delete obj_oneapi;
}
} // namespace xgboost

View File

@@ -0,0 +1,101 @@
/*!
* Copyright 2017-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/predictor.h>
#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/gbm/gbtree.h"
#include "../../../src/gbm/gbtree_model.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include "../predictor/test_predictor.h"
namespace xgboost {
TEST(SyclPredictor, Basic) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
TestBasic(dmat.get(), &ctx);
}
TEST(SyclPredictor, ExternalMemory) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
TestBasic(dmat.get(), &ctx);
}
TEST(SyclPredictor, InplacePredict) {
bst_row_t constexpr kRows{128};
bst_feature_t constexpr kCols{64};
Context ctx;
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
{
HostDeviceVector<float> data;
gen.GenerateDense(&data);
ASSERT_EQ(data.Size(), kRows * kCols);
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
auto array_interface = GetArrayInterface(&data, kRows, kCols);
std::string arr_str;
Json::Dump(array_interface, &arr_str);
x->SetArrayData(arr_str.data());
TestInplacePrediction(&ctx, x, kRows, kCols);
}
}
TEST(SyclPredictor, IterationRange) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestIterationRange(&ctx);
}
TEST(SyclPredictor, GHistIndexTraining) {
size_t constexpr kRows{128}, kCols{16}, kBins{64};
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateDMatrix(false);
HostDeviceVector<float> storage(kRows * kCols);
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
auto adapter = data::ArrayAdapter(columnar.c_str());
std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
}
TEST(SyclPredictor, CategoricalPredictLeaf) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestCategoricalPredictLeaf(&ctx, false);
}
TEST(SyclPredictor, LesserFeatures) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestPredictionWithLesserFeatures(&ctx);
}
TEST(SyclPredictor, Sparse) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestSparsePrediction(&ctx, 0.2);
TestSparsePrediction(&ctx, 0.8);
}
TEST(SyclPredictor, Multi) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
TestVectorLeafPrediction(&ctx);
}
} // namespace xgboost

View File

@@ -18,92 +18,17 @@
namespace xgboost {
namespace {
void TestBasic(DMatrix* dmat) {
Context ctx;
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &ctx));
size_t const kRows = dmat->Info().num_row_;
size_t const kCols = dmat->Info().num_col_;
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
ctx.UpdateAllowUnknown(Args{});
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
PredictionCacheEntry out_predictions;
cpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
cpu_predictor->PredictBatch(dmat, &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5);
}
// Test predict instance
auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
auto page = batch.GetView();
for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions;
cpu_predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
dmat->Info().IsColumnSplit());
ASSERT_EQ(instance_out_predictions[0], 1.5);
}
// Test predict leaf
HostDeviceVector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
for (auto v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
if (dmat->Info().IsColumnSplit()) {
// Predict contribution is not supported for column split.
return;
}
// Test predict contribution
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
cpu_predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
} // anonymous namespace
TEST(CpuPredictor, Basic) {
Context ctx;
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
TestBasic(dmat.get());
TestBasic(dmat.get(), &ctx);
}
namespace {
void TestColumnSplit() {
Context ctx;
size_t constexpr kRows = 5;
size_t constexpr kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
@@ -112,7 +37,7 @@ void TestColumnSplit() {
auto const rank = collective::GetRank();
dmat = std::unique_ptr<DMatrix>{dmat->SliceCol(world_size, rank)};
TestBasic(dmat.get());
TestBasic(dmat.get(), &ctx);
}
} // anonymous namespace
@@ -132,10 +57,11 @@ TEST(CpuPredictor, IterationRangeColmnSplit) {
}
TEST(CpuPredictor, ExternalMemory) {
Context ctx;
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries);
TestBasic(dmat.get());
TestBasic(dmat.get(), &ctx);
}
TEST(CpuPredictor, InplacePredict) {
@@ -235,12 +161,14 @@ TEST(CPUPredictor, CategoricalPredictionColumnSplit) {
}
TEST(CPUPredictor, CategoricalPredictLeaf) {
TestCategoricalPredictLeaf(false, false);
Context ctx;
TestCategoricalPredictLeaf(&ctx, false);
}
TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, false, true);
Context ctx;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, &ctx, true);
}
TEST(CpuPredictor, UpdatePredictionCache) {

View File

@@ -289,11 +289,13 @@ TEST_F(MGPUPredictorTest, CategoricalPredictionColumnSplit) {
}
TEST(GPUPredictor, CategoricalPredictLeaf) {
TestCategoricalPredictLeaf(true, false);
auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
TestCategoricalPredictLeaf(&ctx, false);
}
TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, true, true);
auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, &ctx, true);
}
TEST(GPUPredictor, PredictLeafBasic) {

View File

@@ -26,6 +26,79 @@
#include "xgboost/tree_model.h" // for RegTree
namespace xgboost {
void TestBasic(DMatrix* dmat, Context const *ctx) {
auto predictor = std::unique_ptr<Predictor>(CreatePredictorForTest(ctx));
size_t const kRows = dmat->Info().num_row_;
size_t const kCols = dmat->Info().num_col_;
LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
gbm::GBTreeModel model = CreateTestModel(&mparam, ctx);
// Test predict batch
PredictionCacheEntry out_predictions;
predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
predictor->PredictBatch(dmat, &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5);
}
// Test predict instance
auto const& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
auto page = batch.GetView();
for (size_t i = 0; i < batch.Size(); i++) {
std::vector<float> instance_out_predictions;
predictor->PredictInstance(page[i], &instance_out_predictions, model, 0,
dmat->Info().IsColumnSplit());
ASSERT_EQ(instance_out_predictions[0], 1.5);
}
// Test predict leaf
HostDeviceVector<float> leaf_out_predictions;
predictor->PredictLeaf(dmat, &leaf_out_predictions, model);
auto const& h_leaf_out_predictions = leaf_out_predictions.ConstHostVector();
for (auto v : h_leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
if (dmat->Info().IsColumnSplit()) {
// Predict contribution is not supported for column split.
return;
}
// Test predict contribution
HostDeviceVector<float> out_contribution_hdv;
auto& out_contribution = out_contribution_hdv.HostVector();
predictor->PredictContribution(dmat, &out_contribution_hdv, model);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
predictor->PredictContribution(dmat, &out_contribution_hdv, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is
// filled with LeafValue().
if ((i + 1) % (kCols + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
TEST(Predictor, PredictionCache) {
size_t constexpr kRows = 16, kCols = 4;
@@ -64,7 +137,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
{"num_feature", std::to_string(kCols)},
{"num_class", std::to_string(kClasses)},
{"max_bin", std::to_string(bins)},
{"device", ctx->DeviceName()}});
{"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
learner->Configure();
for (size_t i = 0; i < kIters; ++i) {
@@ -151,7 +224,7 @@ std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMat
size_t iters, size_t forest = 1) {
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
learner->SetParams(
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
for (size_t i = 0; i < iters; ++i) {
learner->UpdateOneIter(i, dmat);
}
@@ -305,11 +378,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split) {
ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
}
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
}
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
size_t constexpr kCols = 10;
PredictionCacheEntry out_predictions;
@@ -320,10 +389,10 @@ void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
float left_weight = 1.3f;
float right_weight = 1.7f;
gbm::GBTreeModel model(&mparam, &ctx);
gbm::GBTreeModel model(&mparam, ctx);
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
std::vector<float> row(kCols);
row[split_ind] = split_cat;
@@ -363,7 +432,6 @@ void TestIterationRange(Context const* ctx) {
HostDeviceVector<float> out_predt_sliced;
HostDeviceVector<float> out_predt_ranged;
// margin
{
sliced->Predict(dmat, true, &out_predt_sliced, 0, 0, false, false, false, false, false);
learner->Predict(dmat, true, &out_predt_ranged, 0, lend, false, false, false, false, false);
@@ -519,6 +587,8 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
learner.reset(Learner::Create({Xy}));
learner->LoadModel(model);
learner->SetParam("device", ctx->DeviceName());
learner->Configure();
if (ctx->IsCUDA()) {
learner->SetParam("tree_method", "gpu_hist");

View File

@@ -34,6 +34,8 @@ inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context
inline auto CreatePredictorForTest(Context const* ctx) {
if (ctx->IsCPU()) {
return Predictor::Create("cpu_predictor", ctx);
} else if (ctx->IsSycl()) {
return Predictor::Create("sycl_predictor", ctx);
} else {
return Predictor::Create("gpu_predictor", ctx);
}
@@ -83,6 +85,8 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
}
}
void TestBasic(DMatrix* dmat, Context const * ctx);
// p_full and p_hist should come from the same data set.
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
@@ -98,7 +102,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split);
void TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu);
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split);
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split);
void TestIterationRange(Context const* ctx);

View File

@@ -2,6 +2,7 @@
* Copyright (c) 2017-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <xgboost/learner.h> // for Learner
#include <xgboost/logging.h> // for LogCheck_NE, CHECK_NE, LogCheck_EQ
#include <xgboost/objective.h> // for ObjFunction
@@ -81,7 +82,9 @@ TEST(Learner, ParameterValidation) {
// whitespace
learner->SetParam("tree method", "exact");
EXPECT_THROW(learner->Configure(), dmlc::Error);
EXPECT_THAT([&] { learner->Configure(); },
::testing::ThrowsMessage<dmlc::Error>(
::testing::HasSubstr(R"("tree method" contains whitespace)")));
}
TEST(Learner, CheckGroup) {

View File

@@ -19,14 +19,15 @@ auto ZeroParam() {
}
} // anonymous namespace
inline GradientQuantiser DummyRoundingFactor() {
inline GradientQuantiser DummyRoundingFactor(Context const* ctx) {
thrust::device_vector<GradientPair> gpair(1);
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
return {dh::ToSpan(gpair), MetaInfo()};
return {ctx, dh::ToSpan(gpair), MetaInfo()};
}
thrust::device_vector<GradientPairInt64> ConvertToInteger(std::vector<GradientPairPrecise> x) {
auto r = DummyRoundingFactor();
thrust::device_vector<GradientPairInt64> ConvertToInteger(Context const* ctx,
std::vector<GradientPairPrecise> x) {
auto r = DummyRoundingFactor(ctx);
std::vector<GradientPairInt64> y(x.size());
for (std::size_t i = 0; i < x.size(); i++) {
y[i] = r.ToFixedPoint(GradientPair(x[i]));
@@ -41,11 +42,12 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
cuts_.cut_ptrs_.SetDevice(ctx.Device());
cuts_.cut_values_.SetDevice(ctx.Device());
cuts_.min_vals_.SetDevice(ctx.Device());
thrust::device_vector<GradientPairInt64> feature_histogram{ConvertToInteger(feature_histogram_)};
thrust::device_vector<GradientPairInt64> feature_histogram{
ConvertToInteger(&ctx, feature_histogram_)};
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
auto d_feature_types = dh::ToSpan(feature_types);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
EvaluateSplitInputs input{1, 0, quantiser.ToFixedPoint(parent_sum_), dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
EvaluateSplitSharedInputs shared_inputs{param,
@@ -60,7 +62,7 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false,
ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
ASSERT_EQ(result.thresh, 1);
this->CheckResult(result.loss_chg, result.findex, result.fvalue, result.is_cat,
@@ -90,7 +92,7 @@ TEST(GpuHist, PartitionBasic) {
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
cuts.SetCategorical(true, max_cat);
d_feature_types = dh::ToSpan(feature_types);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
EvaluateSplitSharedInputs shared_inputs{
param,
quantiser,
@@ -108,10 +110,10 @@ TEST(GpuHist, PartitionBasic) {
// -1.0s go right
// -3.0s go left
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.dir, kLeftDir);
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
@@ -122,10 +124,10 @@ TEST(GpuHist, PartitionBasic) {
// -1.0s go right
// -3.0s go left
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-7.0, 3.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-3.0, 1.0}, {-3.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-3.0, 1.0}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.dir, kLeftDir);
EXPECT_EQ(cats, std::bitset<32>("10000000000000000000000000000000"));
@@ -134,10 +136,10 @@ TEST(GpuHist, PartitionBasic) {
{
// All -1.0, gain from splitting should be 0.0
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-3.0, 3.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
EvaluateSplitInputs input{2, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.dir, kLeftDir);
EXPECT_FLOAT_EQ(result.loss_chg, 0.0f);
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);
@@ -147,10 +149,10 @@ TEST(GpuHist, PartitionBasic) {
// value
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 6.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
EvaluateSplitInputs input{3, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
EXPECT_EQ(result.dir, kLeftDir);
@@ -160,10 +162,10 @@ TEST(GpuHist, PartitionBasic) {
// -1.0s go right
// -3.0s go left
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-3.0, 1.0}, {-1.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-1.0, 1.0}});
EvaluateSplitInputs input{4, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.dir, kLeftDir);
EXPECT_EQ(cats, std::bitset<32>("10100000000000000000000000000000"));
@@ -173,10 +175,10 @@ TEST(GpuHist, PartitionBasic) {
// -1.0s go right
// -3.0s go left
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
auto feature_histogram = ConvertToInteger({{-3.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
auto feature_histogram = ConvertToInteger(&ctx, {{-3.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
EvaluateSplitInputs input{5, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(cats, std::bitset<32>("01000000000000000000000000000000"));
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);
@@ -205,7 +207,7 @@ TEST(GpuHist, PartitionTwoFeatures) {
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
cuts.SetCategorical(true, max_cat);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
EvaluateSplitSharedInputs shared_inputs{param,
quantiser,
d_feature_types,
@@ -220,10 +222,10 @@ TEST(GpuHist, PartitionTwoFeatures) {
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
auto feature_histogram = ConvertToInteger(
{{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
&ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.findex, 1);
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
@@ -233,10 +235,10 @@ TEST(GpuHist, PartitionTwoFeatures) {
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
auto feature_histogram = ConvertToInteger(
{{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}});
&ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
EXPECT_EQ(result.findex, 1);
EXPECT_EQ(cats, std::bitset<32>("10000000000000000000000000000000"));
@@ -266,7 +268,7 @@ TEST(GpuHist, PartitionTwoNodes) {
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
cuts.SetCategorical(true, max_cat);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
EvaluateSplitSharedInputs shared_inputs{param,
quantiser,
d_feature_types,
@@ -283,15 +285,16 @@ TEST(GpuHist, PartitionTwoNodes) {
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
auto feature_histogram_a = ConvertToInteger(
{{-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
&ctx, {{-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
thrust::device_vector<EvaluateSplitInputs> inputs(2);
inputs[0] = EvaluateSplitInputs{0, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram_a)};
auto feature_histogram_b = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
auto feature_histogram_b = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
inputs[1] = EvaluateSplitInputs{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram_b)};
thrust::device_vector<GPUExpandEntry> results(2);
evaluator.EvaluateSplits({0, 1}, 1, dh::ToSpan(inputs), shared_inputs, dh::ToSpan(results));
evaluator.EvaluateSplits(&ctx, {0, 1}, 1, dh::ToSpan(inputs), shared_inputs,
dh::ToSpan(results));
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(0)[0]),
std::bitset<32>("10000000000000000000000000000000"));
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(1)[0]),
@@ -301,7 +304,7 @@ TEST(GpuHist, PartitionTwoNodes) {
void TestEvaluateSingleSplit(bool is_categorical) {
auto ctx = MakeCUDACtx(0);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
GPUTrainingParam param{tparam};
@@ -311,7 +314,8 @@ void TestEvaluateSingleSplit(bool is_categorical) {
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
// Setup gradients so that second feature gets higher gain
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
auto feature_histogram =
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
common::Span<FeatureType> d_feature_types;
@@ -336,7 +340,7 @@ void TestEvaluateSingleSplit(bool is_categorical) {
ctx.Device()};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 1);
if (is_categorical) {
@@ -352,7 +356,8 @@ TEST(GpuHist, EvaluateSingleSplit) { TestEvaluateSingleSplit(false); }
TEST(GpuHist, EvaluateSingleCategoricalSplit) { TestEvaluateSingleSplit(true); }
TEST(GpuHist, EvaluateSingleSplitMissing) {
auto quantiser = DummyRoundingFactor();
auto ctx = MakeCUDACtx(0);
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{1.0, 1.5});
TrainParam tparam = ZeroParam();
GPUTrainingParam param{tparam};
@@ -361,7 +366,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}});
auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
EvaluateSplitSharedInputs shared_inputs{param,
@@ -373,7 +378,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
false};
GPUHistEvaluator evaluator(tparam, feature_set.size(), FstCU());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 0);
EXPECT_EQ(result.fvalue, 1.0);
@@ -383,14 +388,15 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
}
TEST(GpuHist, EvaluateSingleSplitEmpty) {
auto ctx = MakeCUDACtx(0);
TrainParam tparam = ZeroParam();
GPUHistEvaluator evaluator(tparam, 1, FstCU());
DeviceSplitCandidate result =
evaluator
.EvaluateSingleSplit(
EvaluateSplitInputs{},
&ctx, EvaluateSplitInputs{},
EvaluateSplitSharedInputs{
GPUTrainingParam(tparam), DummyRoundingFactor(), {}, {}, {}, {}, false})
GPUTrainingParam(tparam), DummyRoundingFactor(&ctx), {}, {}, {}, {}, false})
.split;
EXPECT_EQ(result.findex, -1);
EXPECT_LT(result.loss_chg, 0.0f);
@@ -398,7 +404,8 @@ TEST(GpuHist, EvaluateSingleSplitEmpty) {
// Feature 0 has a better split, but the algorithm must select feature 1
TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
auto quantiser = DummyRoundingFactor();
auto ctx = MakeCUDACtx(0);
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
tparam.UpdateAllowUnknown(Args{});
@@ -408,7 +415,8 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
auto feature_histogram = ConvertToInteger({{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
auto feature_histogram =
ConvertToInteger(&ctx, {{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
EvaluateSplitSharedInputs shared_inputs{param,
@@ -420,7 +428,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
false};
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 1);
EXPECT_EQ(result.fvalue, 11.0);
@@ -430,7 +438,8 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
// Features 0 and 1 have identical gain, the algorithm must select 0
TEST(GpuHist, EvaluateSingleSplitBreakTies) {
auto quantiser = DummyRoundingFactor();
auto ctx = MakeCUDACtx(0);
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
tparam.UpdateAllowUnknown(Args{});
@@ -440,7 +449,8 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
auto feature_histogram =
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram)};
EvaluateSplitSharedInputs shared_inputs{param,
@@ -452,15 +462,16 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
false};
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 0);
EXPECT_EQ(result.fvalue, 1.0);
}
TEST(GpuHist, EvaluateSplits) {
auto ctx = MakeCUDACtx(0);
thrust::device_vector<DeviceSplitCandidate> out_splits(2);
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
tparam.UpdateAllowUnknown(Args{});
@@ -471,9 +482,9 @@ TEST(GpuHist, EvaluateSplits) {
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 0.0};
auto feature_histogram_left =
ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
auto feature_histogram_right =
ConvertToInteger({{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
EvaluateSplitInputs input_left{1, 0, parent_sum, dh::ToSpan(feature_set),
dh::ToSpan(feature_histogram_left)};
EvaluateSplitInputs input_right{2, 0, parent_sum, dh::ToSpan(feature_set),
@@ -514,7 +525,7 @@ TEST_F(TestPartitionBasedSplit, GpuHist) {
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, ctx.Device());
// Convert the sample histogram to fixed point
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
thrust::host_vector<GradientPairInt64> h_hist;
for (auto e : hist_[0]) {
h_hist.push_back(quantiser.ToFixedPoint(e));
@@ -531,7 +542,7 @@ TEST_F(TestPartitionBasedSplit, GpuHist) {
cuts_.cut_values_.ConstDeviceSpan(),
cuts_.min_vals_.ConstDeviceSpan(),
false};
auto split = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
auto split = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
ASSERT_NEAR(split.loss_chg, best_score_, 1e-2);
}
@@ -541,7 +552,7 @@ namespace {
void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
auto ctx = MakeCUDACtx(GPUIDX);
auto rank = collective::GetRank();
auto quantiser = DummyRoundingFactor();
auto quantiser = DummyRoundingFactor(&ctx);
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
GPUTrainingParam param{tparam};
@@ -552,8 +563,8 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
// Setup gradients so that second feature gets higher gain
auto feature_histogram = rank == 0 ? ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}})
: ConvertToInteger({{-1.0, 0.5}, {1.0, 0.5}});
auto feature_histogram = rank == 0 ? ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}})
: ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}});
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
common::Span<FeatureType> d_feature_types;
@@ -576,7 +587,7 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 1) << "rank: " << rank;
if (is_categorical) {

View File

@@ -37,7 +37,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
sizeof(GradientPairInt64));
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
d_histogram, quantiser);
@@ -51,7 +51,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
dh::device_vector<GradientPairInt64> new_histogram(num_bins);
auto d_new_histogram = dh::ToSpan(new_histogram);
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
d_new_histogram, quantiser);
@@ -129,7 +129,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
auto gpair = GenerateRandomGradients(kRows, 0, 2);
gpair.SetDevice(DeviceOrd::CUDA(0));
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
/**
* Generate hist with cat data.
*/

View File

@@ -28,7 +28,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
Context ctx;
ctx.nthread = 4;
int static constexpr kRows = 8, kCols = 16;
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
TrainParam param;
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
@@ -102,7 +102,7 @@ TEST(HistMultiEvaluator, Evaluate) {
TrainParam param;
param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
std::size_t n_samples = 3;
bst_feature_t n_features = 2;
@@ -166,7 +166,7 @@ TEST(HistEvaluator, Apply) {
TrainParam param;
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
auto evaluator_ = HistEvaluator{&ctx, &param, dmat->Info(), sampler};
CPUExpandEntry entry{0, 0};
@@ -194,7 +194,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
Context ctx;
// check the evaluator is returning the optimal split
std::vector<FeatureType> ft{FeatureType::kCategorical};
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
HistEvaluator evaluator{&ctx, &param_, info_, sampler};
evaluator.InitRoot(GradStats{total_gpair_});
RegTree tree;
@@ -224,7 +224,7 @@ auto CompareOneHotAndPartition(bool onehot) {
auto dmat =
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
auto evaluator = HistEvaluator{&ctx, &param, dmat->Info(), sampler};
std::vector<CPUExpandEntry> entries(1);
HistMakerTrainParam hist_param;
@@ -271,7 +271,7 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
ASSERT_EQ(node_hist.size(), feature_histogram_.size());
std::copy(feature_histogram_.cbegin(), feature_histogram_.cend(), node_hist.begin());
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
MetaInfo info;
info.num_col_ = 1;
info.feature_types = {FeatureType::kCategorical};

View File

@@ -181,7 +181,7 @@ void TestSyncHist(bool is_distributed) {
histogram.Buffer().Reset(1, n_nodes, space, target_hists);
// sync hist
histogram.SyncHistogram(&tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
histogram.SyncHistogram(&ctx, &tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
using GHistRowT = common::GHistRow;
auto check_hist = [](const GHistRowT parent, const GHistRowT left, const GHistRowT right,
@@ -266,7 +266,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
histogram.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
linalg::MakeTensorView(&ctx, gpair, gpair.size()), force_read_by_column);
}
histogram.SyncHistogram(&tree, nodes_to_build, {});
histogram.SyncHistogram(&ctx, &tree, nodes_to_build, {});
// Check if number of histogram bins is correct
ASSERT_EQ(histogram.Histogram()[nid].size(), gmat.cut.Ptrs().back());
@@ -366,7 +366,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
force_read_by_column);
}
cat_hist.SyncHistogram(&tree, nodes_to_build, {});
cat_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
/**
* Generate hist with one hot encoded data.
@@ -382,7 +382,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
force_read_by_column);
}
onehot_hist.SyncHistogram(&tree, nodes_to_build, {});
onehot_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
auto cat = cat_hist.Histogram()[0];
auto onehot = onehot_hist.Histogram()[0];
@@ -451,7 +451,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
force_read_by_column);
++page_idx;
}
multi_build.SyncHistogram(&tree, nodes, {});
multi_build.SyncHistogram(ctx, &tree, nodes, {});
multi_page = multi_build.Histogram()[RegTree::kRoot];
}
@@ -480,7 +480,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
single_build.BuildHist(0, space, gmat, row_set_collection, nodes,
linalg::MakeTensorView(ctx, h_gpair, h_gpair.size()),
force_read_by_column);
single_build.SyncHistogram(&tree, nodes, {});
single_build.SyncHistogram(ctx, &tree, nodes, {});
single_page = single_build.Histogram()[RegTree::kRoot];
}
@@ -570,7 +570,7 @@ class OverflowTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {
CHECK_NE(partitioners.front()[tree.RightChild(best.nid)].Size(), 0);
hist_builder.BuildHistLeftRight(
Xy.get(), &tree, partitioners, valid_candidates,
&ctx, Xy.get(), &tree, partitioners, valid_candidates,
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1), batch);
if (limit) {

View File

@@ -1,3 +1,6 @@
/**
* Copyright 2019-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h>
#include <xgboost/logging.h>
@@ -9,9 +12,7 @@
#include "../../../src/tree/hist/evaluate_splits.h"
#include "../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
TEST(CPUFeatureInteractionConstraint, Empty) {
TrainParam param;
param.UpdateAllowUnknown(Args{});
@@ -77,7 +78,7 @@ TEST(CPUMonoConstraint, Basic) {
param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
auto sampler = std::make_shared<common::ColumnSampler>();
auto sampler = std::make_shared<common::ColumnSampler>(1u);
HistEvaluator evalutor{&ctx, &param, Xy->Info(), sampler};
evalutor.InitRoot(GradStats{2.0, 2.0});
@@ -90,5 +91,4 @@ TEST(CPUMonoConstraint, Basic) {
ASSERT_TRUE(evalutor.Evaluator().has_constraint);
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -111,7 +111,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
maker.hist.AllocateHistograms({0});
maker.gpair = gpair.DeviceSpan();
maker.quantiser = std::make_unique<GradientQuantiser>(maker.gpair, MetaInfo());
maker.quantiser = std::make_unique<GradientQuantiser>(&ctx, maker.gpair, MetaInfo());
maker.page = page.get();
maker.InitFeatureGroupsOnce();
@@ -163,12 +163,6 @@ HistogramCutsWrapper GetHostCutMatrix () {
return cmat;
}
inline GradientQuantiser DummyRoundingFactor() {
thrust::device_vector<GradientPair> gpair(1);
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
return {dh::ToSpan(gpair), MetaInfo()};
}
void TestHistogramIndexImpl() {
// Test if the compressed histogram index matches when using a sparse
// dmatrix with and without using external memory