enable ROCm on latest XGBoost

This commit is contained in:
Hui Liu
2023-10-23 11:07:08 -07:00
328 changed files with 8028 additions and 3642 deletions

View File

@@ -1,17 +1,17 @@
if (USE_DMLC_GTEST)
if (NOT TARGET gtest)
if(USE_DMLC_GTEST)
if(NOT TARGET gtest)
message(FATAL_ERROR "USE_DMLC_GTEST=ON but dmlc-core didn't bundle gtest")
endif (NOT TARGET gtest)
endif()
set(GTEST_LIBRARIES gtest)
else (USE_DMLC_GTEST)
else()
find_package(GTest REQUIRED)
endif (USE_DMLC_GTEST)
endif()
file(GLOB_RECURSE TEST_SOURCES "*.cc")
if (USE_CUDA)
if(USE_CUDA)
file(GLOB_RECURSE CUDA_TEST_SOURCES "*.cu")
list(APPEND TEST_SOURCES ${CUDA_TEST_SOURCES})
endif (USE_CUDA)
endif()
if (USE_HIP)
file(GLOB_RECURSE HIP_TEST_SOURCES "*.hip")
@@ -19,24 +19,24 @@ if (USE_HIP)
endif (USE_HIP)
file(GLOB_RECURSE ONEAPI_TEST_SOURCES "plugin/*_oneapi.cc")
if (NOT PLUGIN_UPDATER_ONEAPI)
if(NOT PLUGIN_UPDATER_ONEAPI)
list(REMOVE_ITEM TEST_SOURCES ${ONEAPI_TEST_SOURCES})
endif (NOT PLUGIN_UPDATER_ONEAPI)
endif()
if (PLUGIN_FEDERATED)
if(PLUGIN_FEDERATED)
target_include_directories(testxgboost PRIVATE ${xgboost_SOURCE_DIR}/plugin/federated)
target_link_libraries(testxgboost PRIVATE federated_client)
else (PLUGIN_FEDERATED)
else()
file(GLOB_RECURSE FEDERATED_TEST_SOURCES "plugin/*_federated_*.*")
list(REMOVE_ITEM TEST_SOURCES ${FEDERATED_TEST_SOURCES})
endif (PLUGIN_FEDERATED)
endif()
target_sources(testxgboost PRIVATE ${TEST_SOURCES} ${xgboost_SOURCE_DIR}/plugin/example/custom_obj.cc)
if (USE_CUDA AND PLUGIN_RMM)
if(USE_CUDA AND PLUGIN_RMM)
find_package(CUDA)
target_include_directories(testxgboost PRIVATE ${CUDA_INCLUDE_DIRS})
endif (USE_CUDA AND PLUGIN_RMM)
endif()
if (USE_HIP AND PLUGIN_RMM)
find_package(HIP)

View File

@@ -108,6 +108,7 @@ TEST(CAPI, XGDMatrixCreateFromCSR) {
Json::Dump(data_arr, &sdata);
Json config{Object{}};
config["missing"] = Number{std::numeric_limits<float>::quiet_NaN()};
config["data_split_mode"] = Integer{static_cast<int64_t>(DataSplitMode::kCol)};
Json::Dump(config, &sconfig);
DMatrixHandle handle;
@@ -120,6 +121,8 @@ TEST(CAPI, XGDMatrixCreateFromCSR) {
ASSERT_EQ(n, 3);
ASSERT_EQ(XGDMatrixNumNonMissing(handle, &n), 0);
ASSERT_EQ(n, 3);
ASSERT_EQ(XGDMatrixDataSplitMode(handle, &n), 0);
ASSERT_EQ(n, static_cast<int64_t>(DataSplitMode::kCol));
std::shared_ptr<xgboost::DMatrix> *pp_fmat =
static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);

View File

@@ -0,0 +1,41 @@
/**
* Copyright 2022-2023, XGBoost Contributors
*/
#pragma once
#include <gtest/gtest.h>
#include <xgboost/collective/socket.h>
#include <fstream> // ifstream
#include "../helpers.h" // for FileExists
namespace xgboost::collective {
class SocketTest : public ::testing::Test {
protected:
std::string skip_msg_{"Skipping IPv6 test"};
bool SkipTest() {
std::string path{"/sys/module/ipv6/parameters/disable"};
if (FileExists(path)) {
std::ifstream fin(path);
if (!fin) {
return true;
}
std::string s_value;
fin >> s_value;
auto value = std::stoi(s_value);
if (value != 0) {
return true;
}
} else {
return true;
}
return false;
}
protected:
void SetUp() override { system::SocketStartup(); }
void TearDown() override { system::SocketFinalize(); }
};
} // namespace xgboost::collective

View File

@@ -0,0 +1,117 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h> // for ASSERT_EQ
#include <xgboost/span.h> // for Span, oper...
#include <algorithm> // for min
#include <chrono> // for seconds
#include <cstddef> // for size_t
#include <cstdint> // for int32_t
#include <numeric> // for iota
#include <string> // for string
#include <thread> // for thread
#include <vector> // for vector
#include "../../../src/collective/allgather.h" // for RingAllgather
#include "../../../src/collective/comm.h" // for RabitComm
#include "gtest/gtest.h" // for AssertionR...
#include "test_worker.h" // for TestDistri...
#include "xgboost/collective/result.h" // for Result
namespace xgboost::collective {
namespace {
class AllgatherTest : public TrackerTest {};
class Worker : public WorkerForTest {
public:
using WorkerForTest::WorkerForTest;
void Run() {
{
// basic test
std::vector<std::int32_t> data(comm_.World(), 0);
data[comm_.Rank()] = comm_.Rank();
auto rc = RingAllgather(this->comm_, common::Span{data.data(), data.size()}, 1);
ASSERT_TRUE(rc.OK()) << rc.Report();
for (std::int32_t r = 0; r < comm_.World(); ++r) {
ASSERT_EQ(data[r], r);
}
}
{
// test for limited socket buffer
this->LimitSockBuf(4096);
std::size_t n = 8192; // n_bytes = 8192 * sizeof(int)
std::vector<std::int32_t> data(comm_.World() * n, 0);
auto s_data = common::Span{data.data(), data.size()};
auto seg = s_data.subspan(comm_.Rank() * n, n);
std::iota(seg.begin(), seg.end(), comm_.Rank());
auto rc = RingAllgather(comm_, common::Span{data.data(), data.size()}, n);
ASSERT_TRUE(rc.OK()) << rc.Report();
for (std::int32_t r = 0; r < comm_.World(); ++r) {
auto seg = s_data.subspan(r * n, n);
for (std::int32_t i = 0; i < static_cast<std::int32_t>(seg.size()); ++i) {
auto v = seg[i];
ASSERT_EQ(v, r + i);
}
}
}
}
void TestV() {
{
// basic test
std::int32_t n{comm_.Rank()};
std::vector<std::int32_t> result;
auto rc = RingAllgatherV(comm_, common::Span{&n, 1}, &result);
ASSERT_TRUE(rc.OK()) << rc.Report();
for (std::int32_t i = 0; i < comm_.World(); ++i) {
ASSERT_EQ(result[i], i);
}
}
{
// V test
std::vector<std::int32_t> data(comm_.Rank() + 1, comm_.Rank());
std::vector<std::int32_t> result;
auto rc = RingAllgatherV(comm_, common::Span{data.data(), data.size()}, &result);
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_EQ(result.size(), (1 + comm_.World()) * comm_.World() / 2);
std::int32_t k{0};
for (std::int32_t r = 0; r < comm_.World(); ++r) {
auto seg = common::Span{result.data(), result.size()}.subspan(k, (r + 1));
if (comm_.Rank() == 0) {
for (auto v : seg) {
ASSERT_EQ(v, r);
}
k += seg.size();
}
}
}
}
};
} // namespace
TEST_F(AllgatherTest, Basic) {
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker worker{host, port, timeout, n_workers, r};
worker.Run();
});
}
TEST_F(AllgatherTest, V) {
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker worker{host, port, timeout, n_workers, r};
worker.TestV();
});
}
} // namespace xgboost::collective

View File

@@ -0,0 +1,72 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include "../../../src/collective/allreduce.h"
#include "../../../src/collective/tracker.h"
#include "test_worker.h" // for WorkerForTest, TestDistributed
namespace xgboost::collective {
namespace {
class AllreduceWorker : public WorkerForTest {
public:
using WorkerForTest::WorkerForTest;
void Basic() {
{
std::vector<double> data(13, 0.0);
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
for (std::size_t i = 0; i < rhs.size(); ++i) {
rhs[i] += lhs[i];
}
});
ASSERT_EQ(std::accumulate(data.cbegin(), data.cend(), 0.0), 0.0);
}
{
std::vector<double> data(1, 1.0);
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
for (std::size_t i = 0; i < rhs.size(); ++i) {
rhs[i] += lhs[i];
}
});
ASSERT_EQ(data[0], static_cast<double>(comm_.World()));
}
}
void Acc() {
std::vector<double> data(314, 1.5);
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
for (std::size_t i = 0; i < rhs.size(); ++i) {
rhs[i] += lhs[i];
}
});
for (std::size_t i = 0; i < data.size(); ++i) {
auto v = data[i];
ASSERT_EQ(v, 1.5 * static_cast<double>(comm_.World())) << i;
}
}
};
class AllreduceTest : public SocketTest {};
} // namespace
TEST_F(AllreduceTest, Basic) {
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
AllreduceWorker worker{host, port, timeout, n_workers, r};
worker.Basic();
});
}
TEST_F(AllreduceTest, Sum) {
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
AllreduceWorker worker{host, port, timeout, n_workers, r};
worker.Acc();
});
}
} // namespace xgboost::collective

View File

@@ -0,0 +1,51 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/collective/socket.h>
#include <cstdint> // for int32_t
#include <string> // for string
#include <thread> // for thread
#include <vector> // for vector
#include "../../../src/collective/broadcast.h" // for Broadcast
#include "../../../src/collective/tracker.h" // for GetHostAddress
#include "test_worker.h" // for WorkerForTest, TestDistributed
namespace xgboost::collective {
namespace {
class Worker : public WorkerForTest {
public:
using WorkerForTest::WorkerForTest;
void Run() {
for (std::int32_t r = 0; r < comm_.World(); ++r) {
// basic test
std::vector<std::int32_t> data(1, comm_.Rank());
auto rc = Broadcast(this->comm_, common::Span{data.data(), data.size()}, r);
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_EQ(data[0], r);
}
for (std::int32_t r = 0; r < comm_.World(); ++r) {
std::vector<std::int32_t> data(1 << 16, comm_.Rank());
auto rc = Broadcast(this->comm_, common::Span{data.data(), data.size()}, r);
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_EQ(data[0], r);
}
}
};
class BroadcastTest : public SocketTest {};
} // namespace
TEST_F(BroadcastTest, Basic) {
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t r) {
Worker worker{host, port, timeout, n_workers, r};
worker.Run();
});
}
} // namespace xgboost::collective

View File

@@ -0,0 +1,47 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include "../../../src/collective/comm.h"
#include "test_worker.h"
namespace xgboost::collective {
namespace {
class CommTest : public TrackerTest {};
} // namespace
TEST_F(CommTest, Channel) {
auto n_workers = 4;
RabitTracker tracker{host, n_workers, 0, timeout};
auto fut = tracker.Run();
std::vector<std::thread> workers;
std::int32_t port = tracker.Port();
for (std::int32_t i = 0; i < n_workers; ++i) {
workers.emplace_back([=] {
WorkerForTest worker{host, port, timeout, n_workers, i};
if (i % 2 == 0) {
auto p_chan = worker.Comm().Chan(i + 1);
p_chan->SendAll(
EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));
auto rc = p_chan->Block();
ASSERT_TRUE(rc.OK()) << rc.Report();
} else {
auto p_chan = worker.Comm().Chan(i - 1);
std::int32_t r{-1};
p_chan->RecvAll(EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));
auto rc = p_chan->Block();
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_EQ(r, i - 1);
}
});
}
for (auto &w : workers) {
w.join();
}
ASSERT_TRUE(fut.get().OK());
}
} // namespace xgboost::collective

View File

@@ -29,6 +29,11 @@ class InMemoryCommunicatorTest : public ::testing::Test {
VerifyAllgather(comm, rank);
}
static void AllgatherV(int rank) {
InMemoryCommunicator comm{kWorldSize, rank};
VerifyAllgatherV(comm, rank);
}
static void AllreduceMax(int rank) {
InMemoryCommunicator comm{kWorldSize, rank};
VerifyAllreduceMax(comm, rank);
@@ -80,14 +85,19 @@ class InMemoryCommunicatorTest : public ::testing::Test {
protected:
static void VerifyAllgather(InMemoryCommunicator &comm, int rank) {
char buffer[kWorldSize] = {'a', 'b', 'c'};
buffer[rank] = '0' + rank;
comm.AllGather(buffer, kWorldSize);
std::string input{static_cast<char>('0' + rank)};
auto output = comm.AllGather(input);
for (auto i = 0; i < kWorldSize; i++) {
EXPECT_EQ(buffer[i], '0' + i);
EXPECT_EQ(output[i], static_cast<char>('0' + i));
}
}
static void VerifyAllgatherV(InMemoryCommunicator &comm, int rank) {
std::vector<std::string_view> inputs{"a", "bb", "ccc"};
auto output = comm.AllGatherV(inputs[rank]);
EXPECT_EQ(output, "abbccc");
}
static void VerifyAllreduceMax(InMemoryCommunicator &comm, int rank) {
int buffer[] = {1 + rank, 2 + rank, 3 + rank, 4 + rank, 5 + rank};
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kMax);
@@ -205,6 +215,8 @@ TEST(InMemoryCommunicatorSimpleTest, IsDistributed) {
TEST_F(InMemoryCommunicatorTest, Allgather) { Verify(&Allgather); }
TEST_F(InMemoryCommunicatorTest, AllgatherV) { Verify(&AllgatherV); }
TEST_F(InMemoryCommunicatorTest, AllreduceMax) { Verify(&AllreduceMax); }
TEST_F(InMemoryCommunicatorTest, AllreduceMin) { Verify(&AllreduceMin); }

View File

@@ -0,0 +1,81 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h> // for ASSERT_TRUE, ASSERT_EQ
#include <xgboost/collective/socket.h> // for TCPSocket, Connect, SocketFinalize, SocketStartup
#include <xgboost/string_view.h> // for StringView
#include <chrono> // for seconds
#include <cstdint> // for int8_t
#include <memory> // for make_shared, shared_ptr
#include <system_error> // for make_error_code, errc
#include <utility> // for pair
#include <vector> // for vector
#include "../../../src/collective/loop.h" // for Loop
namespace xgboost::collective {
namespace {
class LoopTest : public ::testing::Test {
protected:
std::pair<TCPSocket, TCPSocket> pair_;
std::shared_ptr<Loop> loop_;
protected:
void SetUp() override {
system::SocketStartup();
std::chrono::seconds timeout{1};
auto domain = SockDomain::kV4;
pair_.first = TCPSocket::Create(domain);
auto port = pair_.first.BindHost();
pair_.first.Listen();
auto const& addr = SockAddrV4::Loopback().Addr();
auto rc = Connect(StringView{addr}, port, 1, timeout, &pair_.second);
ASSERT_TRUE(rc.OK());
rc = pair_.second.NonBlocking(true);
ASSERT_TRUE(rc.OK());
pair_.first = pair_.first.Accept();
rc = pair_.first.NonBlocking(true);
ASSERT_TRUE(rc.OK());
loop_ = std::make_shared<Loop>(timeout);
}
void TearDown() override {
pair_ = decltype(pair_){};
system::SocketFinalize();
}
};
} // namespace
TEST_F(LoopTest, Timeout) {
std::vector<std::int8_t> data(1);
Loop::Op op{Loop::Op::kRead, 0, data.data(), data.size(), &pair_.second, 0};
loop_->Submit(op);
auto rc = loop_->Block();
ASSERT_FALSE(rc.OK());
ASSERT_EQ(rc.Code(), std::make_error_code(std::errc::timed_out)) << rc.Report();
}
TEST_F(LoopTest, Op) {
TCPSocket& send = pair_.first;
TCPSocket& recv = pair_.second;
std::vector<std::int8_t> wbuf(1, 1);
std::vector<std::int8_t> rbuf(1, 0);
Loop::Op wop{Loop::Op::kWrite, 0, wbuf.data(), wbuf.size(), &send, 0};
Loop::Op rop{Loop::Op::kRead, 0, rbuf.data(), rbuf.size(), &recv, 0};
loop_->Submit(wop);
loop_->Submit(rop);
auto rc = loop_->Block();
ASSERT_TRUE(rc.OK()) << rc.Report();
ASSERT_EQ(rbuf[0], wbuf[0]);
}
} // namespace xgboost::collective

View File

@@ -38,7 +38,7 @@ void VerifyAllReduceBitwiseAND() {
auto const rank = collective::GetRank();
std::bitset<64> original{};
original[rank] = true;
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
collective::AllReduce<collective::Operation::kBitwiseAND>(rank, buffer.DevicePointer(), 1);
collective::Synchronize(rank);
EXPECT_EQ(buffer.HostVector()[0], 0ULL);
@@ -60,7 +60,7 @@ void VerifyAllReduceBitwiseOR() {
auto const rank = collective::GetRank();
std::bitset<64> original{};
original[rank] = true;
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
collective::AllReduce<collective::Operation::kBitwiseOR>(rank, buffer.DevicePointer(), 1);
collective::Synchronize(rank);
EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);
@@ -82,7 +82,7 @@ void VerifyAllReduceBitwiseXOR() {
auto const rank = collective::GetRank();
std::bitset<64> original{~0ULL};
original[rank] = false;
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
collective::AllReduce<collective::Operation::kBitwiseXOR>(rank, buffer.DevicePointer(), 1);
collective::Synchronize(rank);
EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);

View File

@@ -1,19 +1,16 @@
/**
* Copyright 2022-2023 by XGBoost Contributors
* Copyright 2022-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/collective/socket.h>
#include <cerrno> // EADDRNOTAVAIL
#include <fstream> // ifstream
#include <system_error> // std::error_code, std::system_category
#include "../helpers.h"
#include "test_worker.h" // for SocketTest
namespace xgboost::collective {
TEST(Socket, Basic) {
system::SocketStartup();
TEST_F(SocketTest, Basic) {
SockAddress addr{SockAddrV6::Loopback()};
ASSERT_TRUE(addr.IsV6());
addr = SockAddress{SockAddrV4::Loopback()};
@@ -54,23 +51,27 @@ TEST(Socket, Basic) {
run_test(SockDomain::kV4);
std::string path{"/sys/module/ipv6/parameters/disable"};
if (FileExists(path)) {
std::ifstream fin(path);
if (!fin) {
GTEST_SKIP_(msg.c_str());
}
std::string s_value;
fin >> s_value;
auto value = std::stoi(s_value);
if (value != 0) {
GTEST_SKIP_(msg.c_str());
}
} else {
GTEST_SKIP_(msg.c_str());
if (SkipTest()) {
GTEST_SKIP_(skip_msg_.c_str());
}
run_test(SockDomain::kV6);
}
system::SocketFinalize();
TEST_F(SocketTest, Bind) {
auto run = [](SockDomain domain) {
auto any =
domain == SockDomain::kV4 ? SockAddrV4::InaddrAny().Addr() : SockAddrV6::InaddrAny().Addr();
auto sock = TCPSocket::Create(domain);
std::int32_t port{0};
auto rc = sock.Bind(any, &port);
ASSERT_TRUE(rc.OK());
ASSERT_NE(port, 0);
};
run(SockDomain::kV4);
if (SkipTest()) {
GTEST_SKIP_(skip_msg_.c_str());
}
run(SockDomain::kV6);
}
} // namespace xgboost::collective

View File

@@ -0,0 +1,67 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <chrono> // for seconds
#include <cstdint> // for int32_t
#include <string> // for string
#include <thread> // for thread
#include <vector> // for vector
#include "../../../src/collective/comm.h"
#include "test_worker.h"
namespace xgboost::collective {
namespace {
class PrintWorker : public WorkerForTest {
public:
using WorkerForTest::WorkerForTest;
void Print() {
auto rc = comm_.LogTracker("ack:" + std::to_string(this->comm_.Rank()));
ASSERT_TRUE(rc.OK()) << rc.Report();
}
};
} // namespace
TEST_F(TrackerTest, Bootstrap) {
RabitTracker tracker{host, n_workers, 0, timeout};
auto fut = tracker.Run();
std::vector<std::thread> workers;
std::int32_t port = tracker.Port();
for (std::int32_t i = 0; i < n_workers; ++i) {
workers.emplace_back([=] { WorkerForTest worker{host, port, timeout, n_workers, i}; });
}
for (auto &w : workers) {
w.join();
}
ASSERT_TRUE(fut.get().OK());
}
TEST_F(TrackerTest, Print) {
RabitTracker tracker{host, n_workers, 0, timeout};
auto fut = tracker.Run();
std::vector<std::thread> workers;
std::int32_t port = tracker.Port();
for (std::int32_t i = 0; i < n_workers; ++i) {
workers.emplace_back([=] {
PrintWorker worker{host, port, timeout, n_workers, i};
worker.Print();
});
}
for (auto &w : workers) {
w.join();
}
ASSERT_TRUE(fut.get().OK());
}
TEST_F(TrackerTest, GetHostAddress) { ASSERT_TRUE(host.find("127.") == std::string::npos); }
} // namespace xgboost::collective

View File

@@ -0,0 +1,114 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <chrono> // for seconds
#include <cstdint> // for int32_t
#include <string> // for string
#include <thread> // for thread
#include <utility> // for move
#include <vector> // for vector
#include "../../../src/collective/comm.h"
#include "../../../src/collective/tracker.h" // for GetHostAddress
#include "../helpers.h" // for FileExists
namespace xgboost::collective {
class WorkerForTest {
std::string tracker_host_;
std::int32_t tracker_port_;
std::int32_t world_size_;
protected:
std::int32_t retry_{1};
std::string task_id_;
RabitComm comm_;
public:
WorkerForTest(std::string host, std::int32_t port, std::chrono::seconds timeout,
std::int32_t world, std::int32_t rank)
: tracker_host_{std::move(host)},
tracker_port_{port},
world_size_{world},
task_id_{"t:" + std::to_string(rank)},
comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_} {
CHECK_EQ(world_size_, comm_.World());
}
virtual ~WorkerForTest() = default;
auto& Comm() { return comm_; }
void LimitSockBuf(std::int32_t n_bytes) {
for (std::int32_t i = 0; i < comm_.World(); ++i) {
if (i != comm_.Rank()) {
ASSERT_TRUE(comm_.Chan(i)->Socket()->NonBlocking());
ASSERT_TRUE(comm_.Chan(i)->Socket()->SetBufSize(n_bytes).OK());
}
}
}
};
class SocketTest : public ::testing::Test {
protected:
std::string skip_msg_{"Skipping IPv6 test"};
bool SkipTest() {
std::string path{"/sys/module/ipv6/parameters/disable"};
if (FileExists(path)) {
std::ifstream fin(path);
if (!fin) {
return true;
}
std::string s_value;
fin >> s_value;
auto value = std::stoi(s_value);
if (value != 0) {
return true;
}
} else {
return true;
}
return false;
}
protected:
void SetUp() override { system::SocketStartup(); }
void TearDown() override { system::SocketFinalize(); }
};
class TrackerTest : public SocketTest {
public:
std::int32_t n_workers{2};
std::chrono::seconds timeout{1};
std::string host;
void SetUp() override {
SocketTest::SetUp();
auto rc = GetHostAddress(&host);
ASSERT_TRUE(rc.OK()) << rc.Report();
}
};
template <typename WorkerFn>
void TestDistributed(std::int32_t n_workers, WorkerFn worker_fn) {
std::chrono::seconds timeout{1};
std::string host;
ASSERT_TRUE(GetHostAddress(&host).OK());
RabitTracker tracker{StringView{host}, n_workers, 0, timeout};
auto fut = tracker.Run();
std::vector<std::thread> workers;
std::int32_t port = tracker.Port();
for (std::int32_t i = 0; i < n_workers; ++i) {
workers.emplace_back([=] { worker_fn(host, port, timeout, i); });
}
for (auto& t : workers) {
t.join();
}
ASSERT_TRUE(fut.get().OK());
}
} // namespace xgboost::collective

View File

@@ -147,7 +147,7 @@ TEST(CutsBuilder, SearchGroupInd) {
EXPECT_ANY_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17));
p_mat->Info().Validate(-1);
p_mat->Info().Validate(DeviceOrd::CPU());
EXPECT_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17),
dmlc::Error);
@@ -330,7 +330,7 @@ TEST(HistUtil, IndexBinData) {
void TestSketchFromWeights(bool with_group) {
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
size_t constexpr kGroups = 10;
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(DeviceOrd::CUDA(0)).GenerateDMatrix();
Context ctx;
common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);

View File

@@ -222,7 +222,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
ASSERT_EQ(info.feature_types.Size(), n_features);
HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
cuts_ptr.SetDevice(0);
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
dh::device_vector<float> weight(n_samples * n_features, 0);
dh::Iota(dh::ToSpan(weight));
@@ -235,7 +235,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),
detail::EntryCompareOp());
detail::RemoveDuplicatedCategories(ctx.gpu_id, info, cuts_ptr.DeviceSpan(), &sorted_entries,
detail::RemoveDuplicatedCategories(ctx.Device(), info, cuts_ptr.DeviceSpan(), &sorted_entries,
&weight, &columns_ptr);
auto const& h_cptr = cuts_ptr.ConstHostVector();
@@ -377,7 +377,8 @@ template <typename Adapter>
auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing, size_t batch_size = 0) {
common::HistogramCuts batched_cuts;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
DeviceOrd::CUDA(0));
MetaInfo info;
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
@@ -444,7 +445,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
ConsoleLogger::Configure({{"verbosity", "3"}});
common::HistogramCuts batched_cuts;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
HistogramCuts cuts;
@@ -472,7 +473,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
ConsoleLogger::Configure({{"verbosity", "3"}});
common::HistogramCuts batched_cuts;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
AdapterDeviceSketch(adapter.Value(), num_bins, info,
std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
@@ -507,7 +508,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
}
ASSERT_EQ(info.feature_types.Size(), 1);
SketchContainer container(info.feature_types, num_bins, 1, n, 0);
SketchContainer container(info.feature_types, num_bins, 1, n, DeviceOrd::CUDA(0));
AdapterDeviceSketch(adapter.Value(), num_bins, info,
std::numeric_limits<float>::quiet_NaN(), &container);
HistogramCuts cuts;
@@ -580,11 +581,7 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
namespace {
auto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) {
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(ctx->gpu_id));
#endif
dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
auto n = n_samples * n_features;
std::vector<float> x;
x.resize(n);
@@ -624,21 +621,21 @@ void TestGetColumnSize(std::size_t n_samples) {
std::vector<std::size_t> h_column_size_1(column_sizes_scan.size());
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, true>(
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin());
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, false>(
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
ASSERT_EQ(h_column_size, h_column_size_1);
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, true>(
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
ASSERT_EQ(h_column_size, h_column_size_1);
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, false>(
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
ASSERT_EQ(h_column_size, h_column_size_1);
}
@@ -715,9 +712,9 @@ void TestAdapterSketchFromWeights(bool with_group) {
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
size_t constexpr kGroups = 10;
HostDeviceVector<float> storage;
std::string m =
RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
&storage);
std::string m = RandomDataGenerator{kRows, kCols, 0}
.Device(DeviceOrd::CUDA(0))
.GenerateArrayInterface(&storage);
MetaInfo info;
Context ctx;
auto& h_weights = info.weights_.HostVector();
@@ -736,14 +733,14 @@ void TestAdapterSketchFromWeights(bool with_group) {
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
}
info.weights_.SetDevice(0);
info.weights_.SetDevice(DeviceOrd::CUDA(0));
info.num_row_ = kRows;
info.num_col_ = kCols;
data::CupyAdapter adapter(m);
auto const& batch = adapter.Value();
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
SketchContainer sketch_container(ft, kBins, kCols, kRows, DeviceOrd::CUDA(0));
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
@@ -787,7 +784,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
// https://github.com/dmlc/xgboost/issues/7946
h_weights[i] = (i % 2 == 0 ? 1 : 2) / static_cast<float>(kGroups);
}
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_container);
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());

View File

@@ -1,7 +1,6 @@
/*!
* Copyright 2018 XGBoost contributors
/**
* Copyright 2018-2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include <thrust/equal.h>
#include <thrust/iterator/counting_iterator.h>
@@ -13,21 +12,14 @@
#endif
#include <xgboost/host_device_vector.h>
namespace xgboost {
namespace common {
namespace xgboost::common {
namespace {
void SetDeviceForTest(int device) {
void SetDeviceForTest(DeviceOrd device) {
int n_devices;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaGetDeviceCount(&n_devices));
device %= n_devices;
dh::safe_cuda(cudaSetDevice(device));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipGetDeviceCount(&n_devices));
device %= n_devices;
dh::safe_cuda(hipSetDevice(device));
#endif
device.ordinal %= n_devices;
dh::safe_cuda(cudaSetDevice(device.ordinal));
}
} // namespace
@@ -42,13 +34,13 @@ struct HostDeviceVectorSetDeviceHandler {
}
};
void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
void InitHostDeviceVector(size_t n, DeviceOrd device, HostDeviceVector<int> *v) {
// create the vector
v->SetDevice(device);
v->Resize(n);
ASSERT_EQ(v->Size(), n);
ASSERT_EQ(v->DeviceIdx(), device);
ASSERT_EQ(v->Device(), device);
// ensure that the device have read-write access
ASSERT_TRUE(v->DeviceCanRead());
ASSERT_TRUE(v->DeviceCanWrite());
@@ -68,7 +60,7 @@ void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
}
void PlusOne(HostDeviceVector<int> *v) {
int device = v->DeviceIdx();
auto device = v->Device();
SetDeviceForTest(device);
thrust::transform(dh::tcbegin(*v), dh::tcend(*v), dh::tbegin(*v),
[=]__device__(unsigned int a){ return a + 1; });
@@ -80,7 +72,7 @@ void CheckDevice(HostDeviceVector<int>* v,
unsigned int first,
GPUAccess access) {
ASSERT_EQ(v->Size(), size);
SetDeviceForTest(v->DeviceIdx());
SetDeviceForTest(v->Device());
ASSERT_TRUE(thrust::equal(dh::tcbegin(*v), dh::tcend(*v),
thrust::make_counting_iterator(first)));
@@ -111,7 +103,7 @@ void CheckHost(HostDeviceVector<int> *v, GPUAccess access) {
ASSERT_FALSE(v->DeviceCanWrite());
}
void TestHostDeviceVector(size_t n, int device) {
void TestHostDeviceVector(size_t n, DeviceOrd device) {
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
HostDeviceVector<int> v;
InitHostDeviceVector(n, device, &v);
@@ -124,13 +116,13 @@ void TestHostDeviceVector(size_t n, int device) {
TEST(HostDeviceVector, Basic) {
size_t n = 1001;
int device = 0;
DeviceOrd device = DeviceOrd::CUDA(0);
TestHostDeviceVector(n, device);
}
TEST(HostDeviceVector, Copy) {
size_t n = 1001;
int device = 0;
auto device = DeviceOrd::CUDA(0);
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
HostDeviceVector<int> v;
@@ -154,15 +146,15 @@ TEST(HostDeviceVector, SetDevice) {
h_vec[i] = i;
}
HostDeviceVector<int> vec (h_vec);
auto device = 0;
auto device = DeviceOrd::CUDA(0);
vec.SetDevice(device);
ASSERT_EQ(vec.Size(), h_vec.size());
auto span = vec.DeviceSpan(); // sync to device
vec.SetDevice(-1); // pull back to cpu.
vec.SetDevice(DeviceOrd::CPU()); // pull back to cpu.
ASSERT_EQ(vec.Size(), h_vec.size());
ASSERT_EQ(vec.DeviceIdx(), -1);
ASSERT_EQ(vec.Device(), DeviceOrd::CPU());
auto h_vec_1 = vec.HostVector();
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
@@ -170,7 +162,7 @@ TEST(HostDeviceVector, SetDevice) {
TEST(HostDeviceVector, Span) {
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
vec.SetDevice(0);
vec.SetDevice(DeviceOrd::CUDA(0));
auto span = vec.DeviceSpan();
ASSERT_EQ(vec.Size(), span.size());
ASSERT_EQ(vec.DevicePointer(), span.data());
@@ -194,5 +186,4 @@ TEST(HostDeviceVector, Empty) {
ASSERT_FALSE(another.Empty());
ASSERT_TRUE(vec.Empty());
}
} // namespace common
} // namespace xgboost
} // namespace xgboost::common

View File

@@ -144,7 +144,8 @@ TEST(IO, Resource) {
fout << 1.0 << std::endl;
fout.close();
auto resource = std::make_shared<MmapResource>(path, 0, sizeof(double));
auto resource = std::shared_ptr<MmapResource>{
new MmapResource{path, 0, sizeof(double)}};
ASSERT_EQ(resource->Size(), sizeof(double));
ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);
ASSERT_EQ(resource->DataAs<double>()[0], val);

View File

@@ -1,13 +1,15 @@
/**
* Copyright (c) 2019-2023, XGBoost Contributors
* Copyright 2019-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <fstream>
#include <iterator> // for back_inserter
#include <map>
#include "../../../src/common/charconv.h"
#include "../../../src/common/io.h"
#include "../../../src/common/json_utils.h"
#include "../../../src/common/threading_utils.h" // for ParallelFor
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
@@ -691,4 +693,16 @@ TEST(Json, TypeCheck) {
ASSERT_NE(err.find("foo"), std::string::npos);
}
}
TEST(Json, Dump) {
auto str = GetModelStr();
auto jobj = Json::Load(str);
std::string result_s = Json::Dump(jobj);
std::vector<char> result_v = Json::Dump<std::vector<char>>(jobj);
ASSERT_EQ(result_s.size(), result_v.size());
for (std::size_t i = 0; i < result_s.size(); ++i) {
ASSERT_EQ(result_s[i], result_v[i]);
}
}
} // namespace xgboost

View File

@@ -16,7 +16,7 @@ namespace xgboost::linalg {
namespace {
void TestElementWiseKernel() {
auto device = DeviceOrd::CUDA(0);
Tensor<float, 3> l{{2, 3, 4}, 0};
Tensor<float, 3> l{{2, 3, 4}, device};
{
/**
* Non-contiguous

View File

@@ -9,9 +9,7 @@
#include "../../../src/data/adapter.h"
#include "xgboost/context.h"
namespace xgboost {
namespace common {
namespace xgboost::common {
TEST(Quantile, LoadBalance) {
size_t constexpr kRows = 1000, kCols = 100;
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
@@ -314,7 +312,7 @@ void TestSameOnAllWorkers() {
}
auto m = RandomDataGenerator{kRows, kCols, 0}
.Device(Context::kCpuId)
.Device(DeviceOrd::CPU())
.Type(ft)
.MaxCategory(17)
.Seed(rank + seed)
@@ -373,6 +371,4 @@ TEST(Quantile, SameOnAllWorkers) {
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, TestSameOnAllWorkers);
}
} // namespace common
} // namespace xgboost
} // namespace xgboost::common

View File

@@ -32,7 +32,7 @@ class MGPUQuantileTest : public BaseMGPUTest {};
TEST(GPUQuantile, Basic) {
constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch(ft, kBins, kCols, kRows, 0);
SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
dh::caching_device_vector<Entry> entries;
dh::device_vector<bst_row_t> cuts_ptr(kCols+1);
thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
@@ -45,12 +45,12 @@ void TestSketchUnique(float sparsity) {
constexpr size_t kRows = 1000, kCols = 100;
RunWithSeedsAndBins(kRows, [kRows, kCols, sparsity](int32_t seed, size_t n_bins, MetaInfo const& info) {
HostDeviceVector<FeatureType> ft;
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
HostDeviceVector<float> storage;
std::string interface_str = RandomDataGenerator{kRows, kCols, sparsity}
.Seed(seed)
.Device(0)
.Device(FstCU())
.GenerateArrayInterface(&storage);
data::CupyAdapter adapter(interface_str);
AdapterDeviceSketch(adapter.Value(), n_bins, info,
@@ -65,7 +65,7 @@ void TestSketchUnique(float sparsity) {
thrust::make_counting_iterator(0llu),
[=] __device__(size_t idx) { return batch.GetElement(idx); });
auto end = kCols * kRows;
detail::GetColumnSizesScan(0, kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
detail::GetColumnSizesScan(FstCU(), kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
&cut_sizes_scan, &column_sizes_scan);
auto const& cut_sizes = cut_sizes_scan.HostVector();
ASSERT_LE(sketch.Data().size(), cut_sizes.back());
@@ -93,13 +93,9 @@ TEST(GPUQuantile, Unique) {
}
// if with_error is true, the test tolerates floating point error
void TestQuantileElemRank(int32_t device, Span<SketchEntry const> in,
void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(device));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(device));
#endif
dh::safe_cuda(cudaSetDevice(device.ordinal));
std::vector<SketchEntry> h_in(in.size());
dh::CopyDeviceSpanToVector(&h_in, in);
std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
@@ -134,13 +130,12 @@ TEST(GPUQuantile, Prune) {
constexpr size_t kRows = 1000, kCols = 100;
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
HostDeviceVector<FeatureType> ft;
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
HostDeviceVector<float> storage;
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
.Device(0)
.Seed(seed)
.GenerateArrayInterface(&storage);
std::string interface_str =
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
&storage);
data::CupyAdapter adapter(interface_str);
AdapterDeviceSketch(adapter.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(), &sketch);
@@ -156,7 +151,7 @@ TEST(GPUQuantile, Prune) {
ASSERT_TRUE(thrust::is_sorted(thrust::device, sketch.Data().data(),
sketch.Data().data() + sketch.Data().size(),
detail::SketchUnique{}));
TestQuantileElemRank(0, sketch.Data(), sketch.ColumnsPtr());
TestQuantileElemRank(FstCU(), sketch.Data(), sketch.ColumnsPtr());
});
}
@@ -164,10 +159,10 @@ TEST(GPUQuantile, MergeEmpty) {
constexpr size_t kRows = 1000, kCols = 100;
size_t n_bins = 10;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
HostDeviceVector<float> storage_0;
std::string interface_str_0 =
RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).GenerateArrayInterface(
&storage_0);
data::CupyAdapter adapter_0(interface_str_0);
MetaInfo info;
@@ -204,34 +199,33 @@ TEST(GPUQuantile, MergeBasic) {
constexpr size_t kRows = 1000, kCols = 100;
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const &info) {
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
HostDeviceVector<float> storage_0;
std::string interface_str_0 = RandomDataGenerator{kRows, kCols, 0}
.Device(0)
.Device(FstCU())
.Seed(seed)
.GenerateArrayInterface(&storage_0);
data::CupyAdapter adapter_0(interface_str_0);
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(), &sketch_0);
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, 0);
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, FstCU());
HostDeviceVector<float> storage_1;
std::string interface_str_1 = RandomDataGenerator{kRows, kCols, 0}
.Device(0)
.Seed(seed)
.GenerateArrayInterface(&storage_1);
std::string interface_str_1 =
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
&storage_1);
data::CupyAdapter adapter_1(interface_str_1);
AdapterDeviceSketch(adapter_1.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(), &sketch_1);
AdapterDeviceSketch(adapter_1.Value(), n_bins, info, std::numeric_limits<float>::quiet_NaN(),
&sketch_1);
size_t size_before_merge = sketch_0.Data().size();
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
if (info.weights_.Size() != 0) {
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), true);
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), true);
sketch_0.FixError();
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), false);
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), false);
} else {
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
}
auto columns_ptr = sketch_0.ColumnsPtr();
@@ -251,24 +245,22 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
MetaInfo info;
int32_t seed = 0;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_0(ft, n_bins, cols, rows, 0);
SketchContainer sketch_0(ft, n_bins, cols, rows, FstCU());
HostDeviceVector<float> storage_0;
std::string interface_str_0 = RandomDataGenerator{rows, cols, 0}
.Device(0)
.Seed(seed)
.GenerateArrayInterface(&storage_0);
std::string interface_str_0 =
RandomDataGenerator{rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
&storage_0);
data::CupyAdapter adapter_0(interface_str_0);
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(),
&sketch_0);
size_t f_rows = rows * frac;
SketchContainer sketch_1(ft, n_bins, cols, f_rows, 0);
SketchContainer sketch_1(ft, n_bins, cols, f_rows, FstCU());
HostDeviceVector<float> storage_1;
std::string interface_str_1 = RandomDataGenerator{f_rows, cols, 0}
.Device(0)
.Seed(seed)
.GenerateArrayInterface(&storage_1);
std::string interface_str_1 =
RandomDataGenerator{f_rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
&storage_1);
auto data_1 = storage_1.DeviceSpan();
auto tuple_it = thrust::make_tuple(
thrust::make_counting_iterator<size_t>(0ul), data_1.data());
@@ -290,7 +282,7 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
size_t size_before_merge = sketch_0.Data().size();
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
auto columns_ptr = sketch_0.ColumnsPtr();
std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
@@ -321,11 +313,10 @@ TEST(GPUQuantile, MergeDuplicated) {
TEST(GPUQuantile, MultiMerge) {
constexpr size_t kRows = 20, kCols = 1;
int32_t world = 2;
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
MetaInfo const &info) {
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
// Set up single node version
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, 0);
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, FstCU());
size_t intermediate_num_cuts = std::min(
kRows * world, static_cast<size_t>(n_bins * WQSketch::kFactor));
@@ -333,12 +324,12 @@ TEST(GPUQuantile, MultiMerge) {
for (auto rank = 0; rank < world; ++rank) {
HostDeviceVector<float> storage;
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
.Device(0)
.Device(FstCU())
.Seed(rank + seed)
.GenerateArrayInterface(&storage);
data::CupyAdapter adapter(interface_str);
HostDeviceVector<FeatureType> ft;
containers.emplace_back(ft, n_bins, kCols, kRows, 0);
containers.emplace_back(ft, n_bins, kCols, kRows, FstCU());
AdapterDeviceSketch(adapter.Value(), n_bins, info,
std::numeric_limits<float>::quiet_NaN(),
&containers.back());
@@ -348,21 +339,44 @@ TEST(GPUQuantile, MultiMerge) {
sketch_on_single_node.Merge(sketch.ColumnsPtr(), sketch.Data());
sketch_on_single_node.FixError();
}
TestQuantileElemRank(0, sketch_on_single_node.Data(),
sketch_on_single_node.ColumnsPtr());
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
sketch_on_single_node.Unique();
TestQuantileElemRank(0, sketch_on_single_node.Data(),
sketch_on_single_node.ColumnsPtr());
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
});
}
TEST(GPUQuantile, MissingColumns) {
auto dmat = std::unique_ptr<DMatrix>{[=]() {
std::size_t constexpr kRows = 1000, kCols = 100;
auto sparsity = 0.5f;
std::vector<FeatureType> ft(kCols);
for (size_t i = 0; i < ft.size(); ++i) {
ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
}
auto dmat = RandomDataGenerator{kRows, kCols, sparsity}
.Seed(0)
.Lower(.0f)
.Upper(1.0f)
.Type(ft)
.MaxCategory(13)
.GenerateDMatrix();
return dmat->SliceCol(2, 1);
}()};
dmat->Info().data_split_mode = DataSplitMode::kRow;
auto ctx = MakeCUDACtx(0);
std::size_t constexpr kBins = 64;
HistogramCuts cuts = common::DeviceSketch(&ctx, dmat.get(), kBins);
ASSERT_TRUE(cuts.HasCategorical());
}
namespace {
void TestAllReduceBasic() {
auto const world = collective::GetWorldSize();
constexpr size_t kRows = 1000, kCols = 100;
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
auto const device = GPUIDX;
auto const device = DeviceOrd::CUDA(GPUIDX);
// Set up single node version;
HostDeviceVector<FeatureType> ft({}, device);
@@ -440,18 +454,14 @@ TEST_F(MGPUQuantileTest, AllReduceBasic) {
}
namespace {
void TestColumnSplitBasic() {
void TestColumnSplit(DMatrix* dmat) {
auto const world = collective::GetWorldSize();
auto const rank = collective::GetRank();
std::size_t constexpr kRows = 1000, kCols = 100, kBins = 64;
auto m = std::unique_ptr<DMatrix>{[=]() {
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
return dmat->SliceCol(world, rank);
}()};
auto m = std::unique_ptr<DMatrix>{dmat->SliceCol(world, rank)};
// Generate cuts for distributed environment.
auto ctx = MakeCUDACtx(GPUIDX);
std::size_t constexpr kBins = 64;
HistogramCuts distributed_cuts = common::DeviceSketch(&ctx, m.get(), kBins);
// Generate cuts for single node environment
@@ -484,7 +494,26 @@ void TestColumnSplitBasic() {
} // anonymous namespace
TEST_F(MGPUQuantileTest, ColumnSplitBasic) {
DoTest(TestColumnSplitBasic);
std::size_t constexpr kRows = 1000, kCols = 100;
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
DoTest(TestColumnSplit, dmat.get());
}
TEST_F(MGPUQuantileTest, ColumnSplitCategorical) {
std::size_t constexpr kRows = 1000, kCols = 100;
auto sparsity = 0.5f;
std::vector<FeatureType> ft(kCols);
for (size_t i = 0; i < ft.size(); ++i) {
ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
}
auto dmat = RandomDataGenerator{kRows, kCols, sparsity}
.Seed(0)
.Lower(.0f)
.Upper(1.0f)
.Type(ft)
.MaxCategory(13)
.GenerateDMatrix();
DoTest(TestColumnSplit, dmat.get());
}
namespace {
@@ -494,7 +523,7 @@ void TestSameOnAllWorkers() {
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
MetaInfo const &info) {
auto const rank = collective::GetRank();
auto const device = GPUIDX;
auto const device = DeviceOrd::CUDA(GPUIDX);
HostDeviceVector<FeatureType> ft({}, device);
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
HostDeviceVector<float> storage({}, device);
@@ -525,9 +554,9 @@ void TestSameOnAllWorkers() {
thrust::copy(thrust::device, local_data.data(),
local_data.data() + local_data.size(),
all_workers.begin() + local_data.size() * rank);
collective::AllReduce<collective::Operation::kSum>(device, all_workers.data().get(),
collective::AllReduce<collective::Operation::kSum>(device.ordinal, all_workers.data().get(),
all_workers.size());
collective::Synchronize(device);
collective::Synchronize(device.ordinal);
auto base_line = dh::ToSpan(all_workers).subspan(0, size_as_float);
std::vector<float> h_base_line(base_line.size());
@@ -573,7 +602,7 @@ TEST(GPUQuantile, Push) {
columns_ptr[1] = kRows;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
sketch.Push(dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(columns_ptr), kRows, {});
auto sketch_data = sketch.Data();
@@ -613,7 +642,7 @@ TEST(GPUQuantile, MultiColPush) {
int32_t n_bins = 16;
HostDeviceVector<FeatureType> ft;
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
dh::device_vector<Entry> d_entries {entries};
dh::device_vector<size_t> columns_ptr(kCols + 1, 0);

View File

@@ -95,7 +95,7 @@ void TestRankingCache(Context const* ctx) {
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
predt.SetDevice(ctx->Device());
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
@@ -129,7 +129,7 @@ void TestNDCGCache(Context const* ctx) {
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
// empty label
ASSERT_THROW(fail(), dmlc::Error);
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, DeviceOrd::CPU()};
// invalid label
ASSERT_THROW(fail(), dmlc::Error);
auto h_labels = info.labels.HostView();

View File

@@ -42,7 +42,7 @@ void TestCalcQueriesInvIDCG() {
auto d_scores = dh::ToSpan(scores);
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
linalg::Vector<double> inv_IDCG({n_groups}, ctx.Device());
ltr::LambdaRankParam p;
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
@@ -77,7 +77,7 @@ void TestRankingCache(Context const* ctx) {
HostDeviceVector<float> predt(info.num_row_, 0);
auto& h_predt = predt.HostVector();
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
predt.SetDevice(ctx->gpu_id);
predt.SetDevice(ctx->Device());
auto rank_idx =
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());

View File

@@ -9,12 +9,11 @@
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
#include "../helpers.h"
namespace xgboost {
namespace common {
namespace xgboost::common {
TEST(Stats, Quantile) {
Context ctx;
{
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, DeviceOrd::CPU());
std::vector<size_t> index{0, 2, 3, 4, 6};
auto h_arr = arr.HostView();
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
@@ -40,8 +39,8 @@ TEST(Stats, Quantile) {
TEST(Stats, WeightedQuantile) {
Context ctx;
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, DeviceOrd::CPU());
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, DeviceOrd::CPU());
auto h_arr = arr.HostView();
auto h_weight = weight.HostView();
@@ -64,7 +63,7 @@ TEST(Stats, Median) {
Context ctx;
{
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, DeviceOrd::CPU()};
HostDeviceVector<float> weights;
linalg::Tensor<float, 1> out;
Median(&ctx, values, weights, &out);
@@ -83,7 +82,7 @@ TEST(Stats, Median) {
{
ctx = ctx.MakeCPU();
// 4x2 matrix
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.Device()};
HostDeviceVector<float> weights;
linalg::Tensor<float, 1> out;
Median(&ctx, values, weights, &out);
@@ -102,14 +101,14 @@ TEST(Stats, Median) {
namespace {
void TestMean(Context const* ctx) {
std::size_t n{128};
linalg::Vector<float> data({n}, ctx->gpu_id);
linalg::Vector<float> data({n}, ctx->Device());
auto h_v = data.HostView().Values();
std::iota(h_v.begin(), h_v.end(), .0f);
auto nf = static_cast<float>(n);
float mean = nf * (nf - 1) / 2 / n;
linalg::Vector<float> res{{1}, ctx->gpu_id};
linalg::Vector<float> res{{1}, ctx->Device()};
Mean(ctx, data, &res);
auto h_res = res.HostView();
ASSERT_EQ(h_res.Size(), 1);
@@ -127,6 +126,5 @@ TEST(Stats, GPUMean) {
auto ctx = MakeCUDACtx(0);
TestMean(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
} // namespace common
} // namespace xgboost
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost::common

View File

@@ -25,8 +25,8 @@ namespace common {
namespace {
class StatsGPU : public ::testing::Test {
private:
linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, 0};
linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, 0};
linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, FstCU()};
linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, FstCU()};
HostDeviceVector<float> results_;
using TestSet = std::vector<std::pair<float, float>>;
Context ctx_;
@@ -51,7 +51,7 @@ class StatsGPU : public ::testing::Test {
data.insert(data.cend(), seg.begin(), seg.end());
data.insert(data.cend(), seg.begin(), seg.end());
data.insert(data.cend(), seg.begin(), seg.end());
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
auto d_arr = arr.View(DeviceOrd::CUDA(0));
auto key_it = dh::MakeTransformIterator<std::size_t>(
@@ -63,7 +63,7 @@ class StatsGPU : public ::testing::Test {
// one alpha for each segment
HostDeviceVector<float> alphas{0.0f, 0.5f, 1.0f};
alphas.SetDevice(0);
alphas.SetDevice(FstCU());
auto d_alphas = alphas.ConstDeviceSpan();
auto w_it = thrust::make_constant_iterator(0.1f);
SegmentedWeightedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
@@ -85,7 +85,7 @@ class StatsGPU : public ::testing::Test {
auto val_it =
dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });
linalg::Tensor<float, 1> weights{{10}, 0};
linalg::Tensor<float, 1> weights{{10}, FstCU()};
linalg::ElementWiseTransformDevice(weights.View(DeviceOrd::CUDA(0)),
[=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
auto w_it = weights.Data()->ConstDevicePointer();
@@ -106,7 +106,7 @@ class StatsGPU : public ::testing::Test {
data.insert(data.cend(), seg.begin(), seg.end());
data.insert(data.cend(), seg.begin(), seg.end());
data.insert(data.cend(), seg.begin(), seg.end());
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
auto d_arr = arr.View(DeviceOrd::CUDA(0));
auto key_it = dh::MakeTransformIterator<std::size_t>(
@@ -118,7 +118,7 @@ class StatsGPU : public ::testing::Test {
// one alpha for each segment
HostDeviceVector<float> alphas{0.1f, 0.2f, 0.4f};
alphas.SetDevice(0);
alphas.SetDevice(FstCU());
auto d_alphas = alphas.ConstDeviceSpan();
SegmentedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
val_it + d_arr.Size(), &results_);

View File

@@ -11,63 +11,59 @@
#include "../../../src/common/transform.h"
#include "../helpers.h"
namespace xgboost::common {
namespace {
constexpr DeviceOrd TransformDevice() {
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
#define TRANSFORM_GPU 0
return DeviceOrd::CUDA(0);
#else
#define TRANSFORM_GPU -1
return DeviceOrd::CPU();
#endif
namespace xgboost {
namespace common {
}
} // namespace
template <typename T>
struct TestTransformRange {
void XGBOOST_DEVICE operator()(size_t _idx,
Span<bst_float> _out, Span<const bst_float> _in) {
void XGBOOST_DEVICE operator()(std::size_t _idx, Span<float> _out, Span<const float> _in) {
_out[_idx] = _in[_idx];
}
};
TEST(Transform, DeclareUnifiedTest(Basic)) {
const size_t size {256};
std::vector<bst_float> h_in(size);
std::vector<bst_float> h_out(size);
const size_t size{256};
std::vector<float> h_in(size);
std::vector<float> h_out(size);
std::iota(h_in.begin(), h_in.end(), 0);
std::vector<bst_float> h_sol(size);
std::vector<float> h_sol(size);
std::iota(h_sol.begin(), h_sol.end(), 0);
const HostDeviceVector<bst_float> in_vec{h_in, TRANSFORM_GPU};
HostDeviceVector<bst_float> out_vec{h_out, TRANSFORM_GPU};
auto device = TransformDevice();
HostDeviceVector<float> const in_vec{h_in, device};
HostDeviceVector<float> out_vec{h_out, device};
out_vec.Fill(0);
Transform<>::Init(TestTransformRange<bst_float>{},
Transform<>::Init(TestTransformRange<float>{},
Range{0, static_cast<Range::DifferenceType>(size)}, AllThreadsForTest(),
TRANSFORM_GPU)
TransformDevice())
.Eval(&out_vec, &in_vec);
std::vector<bst_float> res = out_vec.HostVector();
std::vector<float> res = out_vec.HostVector();
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
}
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
TEST(TransformDeathTest, Exception) {
size_t const kSize {16};
std::vector<bst_float> h_in(kSize);
const HostDeviceVector<bst_float> in_vec{h_in, -1};
size_t const kSize{16};
std::vector<float> h_in(kSize);
const HostDeviceVector<float> in_vec{h_in, DeviceOrd::CPU()};
EXPECT_DEATH(
{
Transform<>::Init([](size_t idx, common::Span<float const> _in) { _in[idx + 1]; },
Range(0, static_cast<Range::DifferenceType>(kSize)), AllThreadsForTest(),
-1)
DeviceOrd::CPU())
.Eval(&in_vec);
},
"");
}
#endif
} // namespace common
} // namespace xgboost
} // namespace xgboost::common

View File

@@ -0,0 +1,5 @@
/**
* Copyright 2023 XGBoost contributors
*/
// Dummy file to keep the CUDA tests.
#include "test_transform_range.cc"

View File

@@ -70,12 +70,12 @@ TEST(DeviceAdapter, GetRowCounts) {
for (bst_feature_t n_features : {1, 2, 4, 64, 128, 256}) {
HostDeviceVector<float> storage;
auto str_arr = RandomDataGenerator{8192, n_features, 0.0}
.Device(ctx.gpu_id)
.Device(ctx.Device())
.GenerateArrayInterface(&storage);
auto adapter = CupyAdapter{str_arr};
HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
offset.SetDevice(ctx.gpu_id);
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.gpu_id,
offset.SetDevice(ctx.Device());
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(),
std::numeric_limits<float>::quiet_NaN());
ASSERT_EQ(rstride, n_features);
}

View File

@@ -98,7 +98,7 @@ TEST(EllpackPage, FromCategoricalBasic) {
Context ctx{MakeCUDACtx(0)};
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
auto ellpack = EllpackPage(&ctx, m.get(), p);
auto accessor = ellpack.Impl()->GetDeviceAccessor(0);
auto accessor = ellpack.Impl()->GetDeviceAccessor(FstCU());
ASSERT_EQ(kCats, accessor.NumBins());
auto x_copy = x;
@@ -156,13 +156,12 @@ TEST(EllpackPage, Copy) {
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
kRows);
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kRows);
// Copy batch pages into the result page.
size_t offset = 0;
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
size_t num_elements = result.Copy(0, batch.Impl(), offset);
size_t num_elements = result.Copy(FstCU(), batch.Impl(), offset);
offset += num_elements;
}
@@ -176,10 +175,12 @@ TEST(EllpackPage, Copy) {
EXPECT_EQ(impl->base_rowid, current_row);
for (size_t i = 0; i < impl->Size(); i++) {
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()), current_row,
row_d.data().get()));
thrust::copy(row_d.begin(), row_d.end(), row.begin());
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(0), current_row, row_result_d.data().get()));
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(FstCU()), current_row,
row_result_d.data().get()));
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
EXPECT_EQ(row, row_result);
@@ -203,8 +204,7 @@ TEST(EllpackPage, Compact) {
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
kCompactedRows);
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kCompactedRows);
// Compact batch pages into the result page.
std::vector<size_t> row_indexes_h {
@@ -213,7 +213,7 @@ TEST(EllpackPage, Compact) {
thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
result.Compact(0, batch.Impl(), row_indexes_span);
result.Compact(FstCU(), batch.Impl(), row_indexes_span);
}
size_t current_row = 0;
@@ -232,7 +232,7 @@ TEST(EllpackPage, Compact) {
continue;
}
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0),
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()),
current_row, row_d.data().get()));
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaDeviceSynchronize());
@@ -242,7 +242,7 @@ TEST(EllpackPage, Compact) {
thrust::copy(row_d.begin(), row_d.end(), row.begin());
dh::LaunchN(kCols,
ReadRowFunction(result.GetDeviceAccessor(0), compacted_row,
ReadRowFunction(result.GetDeviceAccessor(FstCU()), compacted_row,
row_result_d.data().get()));
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());

View File

@@ -30,7 +30,7 @@ namespace xgboost::data {
TEST(GradientIndex, ExternalMemoryBaseRowID) {
Context ctx;
auto p_fmat = RandomDataGenerator{4096, 256, 0.5}
.Device(ctx.gpu_id)
.Device(ctx.Device())
.Batches(8)
.GenerateSparsePageDMatrix("cache", true);

View File

@@ -16,9 +16,7 @@
#include "../helpers.h"
#include "test_iterative_dmatrix.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
void TestEquivalent(float sparsity) {
Context ctx{MakeCUDACtx(0)};
@@ -28,14 +26,14 @@ void TestEquivalent(float sparsity) {
std::size_t offset = 0;
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
std::unique_ptr<EllpackPageImpl> page_concatenated {
new EllpackPageImpl(0, first->Cuts(), first->is_dense,
new EllpackPageImpl(ctx.Device(), first->Cuts(), first->is_dense,
first->row_stride, 1000 * 100)};
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
auto page = batch.Impl();
size_t num_elements = page_concatenated->Copy(0, page, offset);
size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);
offset += num_elements;
}
auto from_iter = page_concatenated->GetDeviceAccessor(0);
auto from_iter = page_concatenated->GetDeviceAccessor(ctx.Device());
ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::Cols());
ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::Rows());
@@ -45,7 +43,7 @@ void TestEquivalent(float sparsity) {
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
auto from_data = ellpack.Impl()->GetDeviceAccessor(ctx.Device());
std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
std::vector<float> min_fvalues_iter(from_iter.min_fvalue.size());
@@ -157,10 +155,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
auto impl = ellpack.Impl();
common::CompressedIterator<uint32_t> iterator(
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(0).NullValue());
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(0).NullValue());
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(ctx.Device()).NullValue());
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(ctx.Device()).NullValue());
// null values get placed after valid values in a row
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(ctx.Device()).NullValue());
EXPECT_EQ(m.Info().num_col_, cols);
EXPECT_EQ(m.Info().num_row_, rows);
EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
@@ -188,5 +186,4 @@ TEST(IterativeDeviceDMatrix, Ref) {
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
&ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -12,6 +12,7 @@
#include "../helpers.h"
#include "xgboost/base.h"
namespace xgboost {
TEST(MetaInfo, GetSet) {
xgboost::Context ctx;
xgboost::MetaInfo info;
@@ -73,6 +74,49 @@ TEST(MetaInfo, GetSetFeature) {
// Other conditions are tested in `SaveLoadBinary`.
}
namespace {
void VerifyGetSetFeatureColumnSplit() {
xgboost::MetaInfo info;
info.data_split_mode = DataSplitMode::kCol;
auto const world_size = collective::GetWorldSize();
auto constexpr kCols{2};
std::vector<std::string> types{u8"float", u8"c"};
std::vector<char const *> c_types(kCols);
std::transform(types.cbegin(), types.cend(), c_types.begin(),
[](auto const &str) { return str.c_str(); });
info.num_col_ = kCols;
EXPECT_THROW(info.SetFeatureInfo(u8"feature_type", c_types.data(), c_types.size()), dmlc::Error);
info.num_col_ = kCols * world_size;
EXPECT_NO_THROW(info.SetFeatureInfo(u8"feature_type", c_types.data(), c_types.size()));
std::vector<std::string> expected_type_names{u8"float", u8"c", u8"float",
u8"c", u8"float", u8"c"};
EXPECT_EQ(info.feature_type_names, expected_type_names);
std::vector<xgboost::FeatureType> expected_types{
xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical,
xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical,
xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical};
EXPECT_EQ(info.feature_types.HostVector(), expected_types);
std::vector<std::string> names{u8"feature0", u8"feature1"};
std::vector<char const *> c_names(kCols);
std::transform(names.cbegin(), names.cend(), c_names.begin(),
[](auto const &str) { return str.c_str(); });
info.num_col_ = kCols;
EXPECT_THROW(info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size()), dmlc::Error);
info.num_col_ = kCols * world_size;
EXPECT_NO_THROW(info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size()));
std::vector<std::string> expected_names{u8"0.feature0", u8"0.feature1", u8"1.feature0",
u8"1.feature1", u8"2.feature0", u8"2.feature1"};
EXPECT_EQ(info.feature_names, expected_names);
}
} // anonymous namespace
TEST(MetaInfo, GetSetFeatureColumnSplit) {
auto constexpr kWorldSize{3};
RunWithInMemoryCommunicator(kWorldSize, VerifyGetSetFeatureColumnSplit);
}
TEST(MetaInfo, SaveLoadBinary) {
xgboost::MetaInfo info;
xgboost::Context ctx;
@@ -236,9 +280,9 @@ TEST(MetaInfo, Validate) {
info.num_nonzero_ = 12;
info.num_col_ = 3;
std::vector<xgboost::bst_group_t> groups (11);
xgboost::Context ctx;
Context ctx;
info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11);
EXPECT_THROW(info.Validate(0), dmlc::Error);
EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);
std::vector<float> labels(info.num_row_ + 1);
EXPECT_THROW(
@@ -261,11 +305,11 @@ TEST(MetaInfo, Validate) {
info.group_ptr_.clear();
labels.resize(info.num_row_);
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
info.labels.SetDevice(0);
EXPECT_THROW(info.Validate(1), dmlc::Error);
info.labels.SetDevice(FstCU());
EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
d_groups.SetDevice(0);
d_groups.SetDevice(FstCU());
d_groups.DevicePointer(); // pull to device
std::string arr_interface_str{ArrayInterfaceStr(xgboost::linalg::MakeVec(
d_groups.ConstDevicePointer(), d_groups.Size(), xgboost::DeviceOrd::CUDA(0)))};
@@ -306,6 +350,5 @@ TEST(MetaInfo, HostExtend) {
}
}
namespace xgboost {
TEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(DeviceOrd::CPU()); }
} // namespace xgboost

View File

@@ -1,31 +1,27 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../helpers.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/data/adapter.h"
namespace xgboost {
namespace data {
#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../helpers.h"
namespace xgboost::data {
TEST(ProxyDMatrix, HostData) {
DMatrixProxy proxy;
size_t constexpr kRows = 100, kCols = 10;
std::vector<HostDeviceVector<float>> label_storage(1);
HostDeviceVector<float> storage;
auto data = RandomDataGenerator(kRows, kCols, 0.5)
.Device(0)
.GenerateArrayInterface(&storage);
auto data =
RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
proxy.SetArrayData(data.c_str());
auto n_samples = HostAdapterDispatch(
&proxy, [](auto const &value) { return value.Size(); });
auto n_samples = HostAdapterDispatch(&proxy, [](auto const &value) { return value.Size(); });
ASSERT_EQ(n_samples, kRows);
auto n_features = HostAdapterDispatch(
&proxy, [](auto const &value) { return value.NumCols(); });
auto n_features = HostAdapterDispatch(&proxy, [](auto const &value) { return value.NumCols(); });
ASSERT_EQ(n_features, kCols);
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -19,10 +19,12 @@ namespace xgboost::data {
TEST(ProxyDMatrix, DeviceData) {
constexpr size_t kRows{100}, kCols{100};
HostDeviceVector<float> storage;
auto data = RandomDataGenerator(kRows, kCols, 0.5).Device(0).GenerateArrayInterface(&storage);
auto data =
RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
std::vector<HostDeviceVector<float>> label_storage(1);
auto labels =
RandomDataGenerator(kRows, 1, 0).Device(0).GenerateColumnarArrayInterface(&label_storage);
auto labels = RandomDataGenerator(kRows, 1, 0)
.Device(FstCU())
.GenerateColumnarArrayInterface(&label_storage);
DMatrixProxy proxy;
proxy.SetCUDAArray(data.c_str());
@@ -35,7 +37,7 @@ TEST(ProxyDMatrix, DeviceData) {
std::vector<HostDeviceVector<float>> columnar_storage(kCols);
data = RandomDataGenerator(kRows, kCols, 0)
.Device(0)
.Device(FstCU())
.GenerateColumnarArrayInterface(&columnar_storage);
proxy.SetCUDAArray(data.c_str());
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));

View File

@@ -268,7 +268,7 @@ TEST(SimpleDMatrix, Slice) {
std::iota(upper.begin(), upper.end(), 1.0f);
auto& margin = p_m->Info().base_margin_;
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};
std::array<int32_t, 3> ridxs {1, 3, 5};
std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };
@@ -341,7 +341,7 @@ TEST(SimpleDMatrix, SliceCol) {
std::iota(upper.begin(), upper.end(), 1.0f);
auto& margin = p_m->Info().base_margin_;
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};
auto constexpr kSlices {2};
auto constexpr kSliceSize {4};
@@ -428,3 +428,21 @@ TEST(SimpleDMatrix, Threads) {
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0, "")};
ASSERT_EQ(p_fmat->Ctx()->Threads(), AllThreadsForTest());
}
namespace {
void VerifyColumnSplit() {
size_t constexpr kRows {16};
size_t constexpr kCols {8};
auto dmat =
RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(false, false, 1, DataSplitMode::kCol);
ASSERT_EQ(dmat->Info().num_col_, kCols * collective::GetWorldSize());
ASSERT_EQ(dmat->Info().num_row_, kRows);
ASSERT_EQ(dmat->Info().data_split_mode, DataSplitMode::kCol);
}
} // anonymous namespace
TEST(SimpleDMatrix, ColumnSplit) {
auto constexpr kWorldSize{3};
RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit);
}

View File

@@ -138,11 +138,11 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
size_t offset = 0;
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
if (!impl_ext) {
impl_ext.reset(new EllpackPageImpl(
batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
batch.Impl()->is_dense, batch.Impl()->row_stride, kRows));
impl_ext = std::make_unique<EllpackPageImpl>(batch.Impl()->gidx_buffer.Device(),
batch.Impl()->Cuts(), batch.Impl()->is_dense,
batch.Impl()->row_stride, kRows);
}
auto n_elems = impl_ext->Copy(0, batch.Impl(), offset);
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
offset += n_elems;
}
EXPECT_EQ(impl_ext->base_rowid, 0);
@@ -202,10 +202,12 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
EXPECT_EQ(impl_ext->base_rowid, current_row);
for (size_t i = 0; i < impl_ext->Size(); i++) {
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(ctx.Device()), current_row,
row_d.data().get()));
thrust::copy(row_d.begin(), row_d.end(), row.begin());
dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(0), current_row, row_ext_d.data().get()));
dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(ctx.Device()), current_row,
row_ext_d.data().get()));
thrust::copy(row_ext_d.begin(), row_ext_d.end(), row_ext.begin());
EXPECT_EQ(row, row_ext);

View File

@@ -65,7 +65,7 @@ TEST(GBTree, PredictionCache) {
gbtree.Configure({{"tree_method", "hist"}});
auto p_m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
gpair.Data()->Copy(GenerateRandomGradients(kRows));
PredictionCacheEntry out_predictions;
@@ -156,7 +156,7 @@ TEST(GBTree, ChoosePredictor) {
// pull data into device.
data.HostVector();
data.SetDevice(0);
data.SetDevice(DeviceOrd::CUDA(0));
data.DeviceSpan();
ASSERT_FALSE(data.HostCanWrite());
@@ -215,7 +215,7 @@ TEST(GBTree, ChooseTreeMethod) {
}
learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) {
linalg::Matrix<GradientPair> gpair{{Xy->Info().num_row_}, Context::kCpuId};
linalg::Matrix<GradientPair> gpair{{Xy->Info().num_row_}, DeviceOrd::CPU()};
gpair.Data()->Copy(GenerateRandomGradients(Xy->Info().num_row_));
learner->BoostOneIter(0, Xy, &gpair);
}
@@ -400,7 +400,7 @@ class Dart : public testing::TestWithParam<char const*> {
if (device == "GPU") {
ctx = MakeCUDACtx(0);
}
auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.gpu_id);
auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.Device());
auto array_str = rng.GenerateArrayInterface(&data);
auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);
@@ -710,7 +710,7 @@ TEST(GBTree, InplacePredictionError) {
auto test_qdm_err = [&](std::string booster, Context const* ctx) {
std::shared_ptr<DMatrix> p_fmat;
bst_bin_t max_bins = 16;
auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->gpu_id).Bins(max_bins);
auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->Device()).Bins(max_bins);
if (ctx->IsCPU()) {
p_fmat = rng.GenerateQuantileDMatrix(true);
} else {

View File

@@ -22,7 +22,7 @@ void TestInplaceFallback(Context const* ctx) {
bst_feature_t n_features{32};
HostDeviceVector<float> X_storage;
// use a different device than the learner
std::int32_t data_ordinal = ctx->IsCPU() ? 0 : -1;
auto data_ordinal = ctx->IsCPU() ? DeviceOrd::CUDA(0) : DeviceOrd::CPU();
auto X = RandomDataGenerator{n_samples, n_features, 0.0}
.Device(data_ordinal)
.GenerateArrayInterface(&X_storage);
@@ -30,7 +30,7 @@ void TestInplaceFallback(Context const* ctx) {
auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);
std::shared_ptr<DMatrix> Xy;
if (data_ordinal == Context::kCpuId) {
if (data_ordinal.IsCPU()) {
auto X_adapter = data::ArrayAdapter{StringView{X}};
Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
} else {
@@ -49,7 +49,7 @@ void TestInplaceFallback(Context const* ctx) {
std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
if (data_ordinal == Context::kCpuId) {
if (data_ordinal.IsCPU()) {
proxy->SetArrayData(StringView{X});
} else {
proxy->SetCUDAArray(X.c_str());
@@ -64,7 +64,7 @@ void TestInplaceFallback(Context const* ctx) {
// test when the contexts match
Context new_ctx = *proxy->Ctx();
ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
ASSERT_NE(new_ctx.Ordinal(), ctx->Ordinal());
learner->SetParam("device", new_ctx.DeviceName());
HostDeviceVector<float>* out_predt_1{nullptr};

View File

@@ -119,8 +119,10 @@ void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
std::vector<xgboost::bst_float> out_hess) {
xgboost::MetaInfo info;
info.num_row_ = labels.size();
info.labels = xgboost::linalg::Tensor<float, 2>{
labels.cbegin(), labels.cend(), {labels.size(), static_cast<std::size_t>(1)}, -1};
info.labels = xgboost::linalg::Tensor<float, 2>{labels.cbegin(),
labels.cend(),
{labels.size(), static_cast<std::size_t>(1)},
xgboost::DeviceOrd::CPU()};
info.weights_.HostVector() = weights;
CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);
@@ -155,8 +157,10 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
std::vector<xgboost::bst_float> out_hess) {
xgboost::MetaInfo info;
info.num_row_ = labels.size();
info.labels = xgboost::linalg::Matrix<float>{
labels.cbegin(), labels.cend(), {labels.size(), static_cast<std::size_t>(1)}, -1};
info.labels = xgboost::linalg::Matrix<float>{labels.cbegin(),
labels.cend(),
{labels.size(), static_cast<std::size_t>(1)},
xgboost::DeviceOrd::CPU()};
info.weights_.HostVector() = weights;
info.group_ptr_ = groups;
@@ -171,8 +175,9 @@ xgboost::bst_float GetMetricEval(xgboost::Metric* metric,
xgboost::DataSplitMode data_split_mode) {
return GetMultiMetricEval(
metric, preds,
xgboost::linalg::Tensor<float, 2>{labels.begin(), labels.end(), {labels.size()}, -1}, weights,
groups, data_split_mode);
xgboost::linalg::Tensor<float, 2>{
labels.begin(), labels.end(), {labels.size()}, xgboost::DeviceOrd::CPU()},
weights, groups, data_split_mode);
}
double GetMultiMetricEval(xgboost::Metric* metric,
@@ -215,7 +220,7 @@ void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const
p_fmat->Info().labels.Data());
CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
if (device_ != Context::kCpuId) {
if (device_.IsCUDA()) {
p_fmat->Info().labels.SetDevice(device_);
}
}
@@ -236,7 +241,7 @@ void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
v = dist(&lcg);
}
}
if (device_ >= 0) {
if (device_.IsCUDA()) {
out->SetDevice(device_);
out->DeviceSpan();
}
@@ -258,7 +263,7 @@ std::string RandomDataGenerator::GenerateArrayInterface(
std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
HostDeviceVector<float> const* storage, std::size_t n_samples, bst_feature_t n_features,
std::size_t batches, std::int32_t device) {
std::size_t batches, DeviceOrd device) {
std::vector<std::string> result(batches);
std::vector<Json> objects;
@@ -267,7 +272,7 @@ std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
auto make_interface = [storage, device, n_features](std::size_t offset, std::size_t rows) {
Json array_interface{Object()};
array_interface["data"] = std::vector<Json>(2);
if (device >= 0) {
if (device.IsCUDA()) {
array_interface["data"][0] =
Integer(reinterpret_cast<int64_t>(storage->DevicePointer() + offset));
array_interface["stream"] = Null{};
@@ -359,7 +364,7 @@ void RandomDataGenerator::GenerateCSR(
h_rptr.emplace_back(rptr);
}
if (device_ >= 0) {
if (device_.IsCUDA()) {
value->SetDevice(device_);
value->DeviceSpan();
row_ptr->SetDevice(device_);
@@ -373,9 +378,8 @@ void RandomDataGenerator::GenerateCSR(
CHECK_EQ(columns->Size(), value->Size());
}
[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(bool with_label,
bool float_label,
size_t classes) const {
[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(
bool with_label, bool float_label, size_t classes, DataSplitMode data_split_mode) const {
HostDeviceVector<float> data;
HostDeviceVector<bst_row_t> rptrs;
HostDeviceVector<bst_feature_t> columns;
@@ -383,7 +387,7 @@ void RandomDataGenerator::GenerateCSR(
data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), rows_,
data.Size(), cols_);
std::shared_ptr<DMatrix> out{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1, "", data_split_mode)};
if (with_label) {
RandomDataGenerator gen{rows_, n_targets_, 0.0f};
@@ -400,7 +404,7 @@ void RandomDataGenerator::GenerateCSR(
out->Info().labels.Reshape(this->rows_, this->n_targets_);
}
}
if (device_ >= 0) {
if (device_.IsCUDA()) {
out->Info().labels.SetDevice(device_);
out->Info().feature_types.SetDevice(device_);
for (auto const& page : out->GetBatches<SparsePage>()) {
@@ -423,7 +427,7 @@ void RandomDataGenerator::GenerateCSR(
CHECK_GE(this->n_batches_, 1)
<< "Must set the n_batches before generating an external memory DMatrix.";
std::unique_ptr<ArrayIterForTest> iter;
if (device_ == Context::kCpuId) {
if (device_.IsCPU()) {
iter = std::make_unique<NumpyArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);
} else {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
@@ -487,7 +491,7 @@ int CudaArrayIterForTest::Next() {
NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
size_t batches)
: ArrayIterForTest{sparsity, rows, cols, batches} {
rng_->Device(Context::kCpuId);
rng_->Device(DeviceOrd::CPU());
std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
this->Reset();
}
@@ -644,8 +648,8 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
labels[i] = i;
}
p_dmat->Info().labels =
linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
linalg::Matrix<GradientPair> gpair({kRows}, ctx->Ordinal());
linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, DeviceOrd::CPU()};
linalg::Matrix<GradientPair> gpair({kRows}, ctx->Device());
auto h_gpair = gpair.HostView();
for (size_t i = 0; i < kRows; ++i) {
h_gpair(i) = GradientPair{static_cast<float>(i), 1};
@@ -674,7 +678,7 @@ ArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector<float> c
CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches);
this->data_.Copy(data);
std::tie(batches_, interface_) =
MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->gpu_id);
MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->Device());
}
ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }

View File

@@ -13,7 +13,7 @@ namespace xgboost {
CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
size_t cols, size_t batches)
: ArrayIterForTest{sparsity, rows, cols, batches} {
rng_->Device(0);
rng_->Device(FstCU());
std::tie(batches_, interface_) =
rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
this->Reset();

View File

@@ -231,7 +231,7 @@ class RandomDataGenerator {
bst_target_t n_targets_{1};
std::int32_t device_{Context::kCpuId};
DeviceOrd device_{DeviceOrd::CPU()};
std::size_t n_batches_{0};
std::uint64_t seed_{0};
SimpleLCG lcg_;
@@ -256,7 +256,7 @@ class RandomDataGenerator {
upper_ = v;
return *this;
}
RandomDataGenerator& Device(int32_t d) {
RandomDataGenerator& Device(DeviceOrd d) {
device_ = d;
return *this;
}
@@ -310,9 +310,9 @@ class RandomDataGenerator {
void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
HostDeviceVector<bst_feature_t>* columns) const;
[[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(bool with_label = false,
bool float_label = true,
size_t classes = 1) const;
[[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(
bool with_label = false, bool float_label = true, size_t classes = 1,
DataSplitMode data_split_mode = DataSplitMode::kRow) const;
[[nodiscard]] std::shared_ptr<DMatrix> GenerateSparsePageDMatrix(std::string prefix,
bool with_label) const;
@@ -391,7 +391,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
* \brief Make a context that uses CUDA if device >= 0.
*/
inline Context MakeCUDACtx(std::int32_t device) {
if (device == Context::kCpuId) {
if (device == DeviceOrd::CPUOrdinal()) {
return Context{};
}
return Context{}.MakeCUDA(device);
@@ -501,7 +501,7 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
* \brief Make learner model param
*/
inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups,
int32_t device = Context::kCpuId) {
DeviceOrd device = DeviceOrd::CPU()) {
size_t shape[1]{1};
LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
n_groups, 1, MultiStrategy::kOneOutputPerTree);
@@ -571,4 +571,5 @@ class BaseMGPUTest : public ::testing::Test {
class DeclareUnifiedDistributedTest(MetricTest) : public BaseMGPUTest{};
inline DeviceOrd FstCU() { return DeviceOrd::CUDA(0); }
} // namespace xgboost

View File

@@ -1,3 +1,8 @@
/**
* Copyright 2020-2023, XGBoost contributors
*/
#pragma once
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
#include "../../src/data/ellpack_page.cuh"
#endif
@@ -24,8 +29,8 @@ class HistogramCutsWrapper : public common::HistogramCuts {
};
} // anonymous namespace
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
int n_rows, int n_cols, bst_float sparsity= 0) {
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
bst_float sparsity = 0) {
auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();
const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
@@ -49,7 +54,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
}
auto page = std::unique_ptr<EllpackPageImpl>(
new EllpackPageImpl(0, cmat, batch, dmat->IsDense(), row_stride, {}));
new EllpackPageImpl(DeviceOrd::CUDA(0), cmat, batch, dmat->IsDense(), row_stride, {}));
return page;
}

View File

@@ -28,7 +28,7 @@ inline void VerifyBinaryAUC(DataSplitMode data_split_mode = DataSplitMode::kRow)
// Invalid dataset
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, DeviceOrd::CPU()};
float auc = metric->Evaluate({1, 1}, p_fmat);
ASSERT_TRUE(std::isnan(auc));
*info.labels.Data() = HostDeviceVector<float>{};

View File

@@ -3,8 +3,7 @@
*/
#include "test_elementwise_metric.h"
namespace xgboost {
namespace metric {
namespace xgboost::metric {
TEST(Metric, DeclareUnifiedTest(RMSE)) { VerifyRMSE(); }
TEST(Metric, DeclareUnifiedTest(RMSLE)) { VerifyRMSLE(); }
@@ -104,5 +103,4 @@ TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileRowSplit) {
TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileColumnSplit) {
DoTest(VerifyQuantile, DataSplitMode::kCol);
}
} // namespace metric
} // namespace xgboost
} // namespace xgboost::metric

View File

@@ -11,9 +11,7 @@
#include "../../../src/common/linalg_op.h"
#include "../helpers.h"
namespace xgboost {
namespace metric {
namespace xgboost::metric {
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
auto ctx = MakeCUDACtx(device);
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
@@ -325,14 +323,14 @@ inline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode = DataSplitMode
}
inline void VerifyMultiRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = MakeCUDACtx(GPUIDX);
size_t n_samples = 32, n_targets = 8;
linalg::Tensor<float, 2> y{{n_samples, n_targets}, GPUIDX};
linalg::Tensor<float, 2> y{{n_samples, n_targets}, ctx.Device()};
auto &h_y = y.Data()->HostVector();
std::iota(h_y.begin(), h_y.end(), 0);
HostDeviceVector<float> predt(n_samples * n_targets, 0);
auto ctx = MakeCUDACtx(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("rmse", &ctx)};
metric->Configure({});
@@ -381,5 +379,4 @@ inline void VerifyQuantile(DataSplitMode data_split_mode = DataSplitMode::kRow)
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
}
} // namespace metric
} // namespace xgboost
} // namespace xgboost::metric

View File

@@ -154,7 +154,7 @@ inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRo
auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
MetaInfo& info = p_fmat->Info();
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.Device()};
info.num_row_ = info.labels.Shape(0);
info.group_ptr_.resize(2);
info.group_ptr_[0] = 0;

View File

@@ -71,7 +71,7 @@ void TestNDCGGPair(Context const* ctx) {
HostDeviceVector<float> predts{0, 1, 0, 1};
MetaInfo info;
info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, GPUIDX};
info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, ctx->Device()};
info.group_ptr_ = {0, 2, 4};
info.num_row_ = 4;
linalg::Matrix<GradientPair> gpairs;
@@ -146,7 +146,7 @@ TEST(LambdaRank, UnbiasedNDCG) {
}
void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt) {
out_predt->SetDevice(ctx->gpu_id);
out_predt->SetDevice(ctx->Device());
MetaInfo& info = *out_info;
info.num_row_ = 128;
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
@@ -243,7 +243,7 @@ void TestMAPStat(Context const* ctx) {
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
predt.SetDevice(ctx->gpu_id);
predt.SetDevice(ctx->Device());
auto rank_idx =
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
@@ -280,7 +280,7 @@ void TestMAPStat(Context const* ctx) {
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
predt.SetDevice(ctx->gpu_id);
predt.SetDevice(ctx->Device());
auto rank_idx =
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());

View File

@@ -6,6 +6,7 @@
#include <xgboost/objective.h>
#include "../helpers.h"
#include "../objective_helpers.h"
TEST(Objective, UnknownFunction) {
xgboost::ObjFunction* obj = nullptr;
@@ -43,4 +44,61 @@ TEST(Objective, PredTransform) {
ASSERT_TRUE(predts.HostCanWrite());
}
}
class TestDefaultObjConfig : public ::testing::TestWithParam<std::string> {
Context ctx_;
public:
void Run(std::string objective) {
auto Xy = MakeFmatForObjTest(objective);
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
std::unique_ptr<ObjFunction> objfn{ObjFunction::Create(objective, &ctx_)};
learner->SetParam("objective", objective);
if (objective.find("multi") != std::string::npos) {
learner->SetParam("num_class", "3");
objfn->Configure(Args{{"num_class", "3"}});
} else if (objective.find("quantile") != std::string::npos) {
learner->SetParam("quantile_alpha", "0.5");
objfn->Configure(Args{{"quantile_alpha", "0.5"}});
} else {
objfn->Configure(Args{});
}
learner->Configure();
learner->UpdateOneIter(0, Xy);
learner->EvalOneIter(0, {Xy}, {"train"});
Json config{Object{}};
learner->SaveConfig(&config);
auto jobj = get<Object const>(config["learner"]["objective"]);
ASSERT_TRUE(jobj.find("name") != jobj.cend());
// FIXME(jiamingy): We should have the following check, but some legacy parameter like
// "pos_weight", "delta_step" in objectives are not in metrics.
// if (jobj.size() > 1) {
// ASSERT_FALSE(IsA<Null>(objfn->DefaultMetricConfig()));
// }
auto mconfig = objfn->DefaultMetricConfig();
if (!IsA<Null>(mconfig)) {
// make sure metric can handle it
std::unique_ptr<Metric> metricfn{Metric::Create(get<String const>(mconfig["name"]), &ctx_)};
metricfn->LoadConfig(mconfig);
Json loaded(Object{});
metricfn->SaveConfig(&loaded);
metricfn->Configure(Args{});
ASSERT_EQ(mconfig, loaded);
}
}
};
TEST_P(TestDefaultObjConfig, Objective) {
std::string objective = GetParam();
this->Run(objective);
}
INSTANTIATE_TEST_SUITE_P(Objective, TestDefaultObjConfig,
::testing::ValuesIn(MakeObjNamesForTest()),
[](const ::testing::TestParamInfo<TestDefaultObjConfig::ParamType>& info) {
return ObjTestNameGenerator(info);
});
} // namespace xgboost

View File

@@ -45,7 +45,7 @@ TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {
MetaInfo info;
info.num_row_ = 10;
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
data->SetDevice(ctx.gpu_id);
data->SetDevice(ctx.Device());
data->Resize(info.num_row_);
shape[0] = info.num_row_;
shape[1] = 1;

View File

@@ -0,0 +1,31 @@
/**
* Copyright (c) 2023, XGBoost contributors
*/
#include "objective_helpers.h"
#include "../../src/common/linalg_op.h" // for begin, end
#include "helpers.h" // for RandomDataGenerator
namespace xgboost {
std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj) {
auto constexpr kRows = 10, kCols = 10;
auto p_fmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
auto& h_upper = p_fmat->Info().labels_upper_bound_.HostVector();
auto& h_lower = p_fmat->Info().labels_lower_bound_.HostVector();
h_lower.resize(kRows);
h_upper.resize(kRows);
for (size_t i = 0; i < kRows; ++i) {
h_lower[i] = 1;
h_upper[i] = 10;
}
if (obj.find("rank:") != std::string::npos) {
auto h_label = p_fmat->Info().labels.HostView();
std::size_t k = 0;
for (auto& v : h_label) {
v = k % 2 == 0;
++k;
}
}
return p_fmat;
};
} // namespace xgboost

View File

@@ -1,6 +1,8 @@
/**
* Copyright (c) 2023, XGBoost contributors
*/
#pragma once
#include <dmlc/registry.h> // for Registry
#include <gtest/gtest.h>
#include <xgboost/objective.h> // for ObjFunctionReg
@@ -29,4 +31,6 @@ inline std::string ObjTestNameGenerator(const ::testing::TestParamInfo<ParamType
}
return name;
};
std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj);
} // namespace xgboost

View File

@@ -23,7 +23,7 @@ class ServerForTest {
std::unique_ptr<grpc::Server> server_;
public:
explicit ServerForTest(std::int32_t world_size) {
explicit ServerForTest(std::size_t world_size) {
server_thread_.reset(new std::thread([this, world_size] {
grpc::ServerBuilder builder;
xgboost::federated::FederatedService service{world_size};

View File

@@ -19,6 +19,11 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
CheckAllgather(comm, rank);
}
static void VerifyAllgatherV(int rank, const std::string &server_address) {
FederatedCommunicator comm{kWorldSize, rank, server_address};
CheckAllgatherV(comm, rank);
}
static void VerifyAllreduce(int rank, const std::string &server_address) {
FederatedCommunicator comm{kWorldSize, rank, server_address};
CheckAllreduce(comm);
@@ -31,14 +36,19 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
protected:
static void CheckAllgather(FederatedCommunicator &comm, int rank) {
int buffer[kWorldSize] = {0, 0};
buffer[rank] = rank;
comm.AllGather(buffer, sizeof(buffer));
std::string input{static_cast<char>('0' + rank)};
auto output = comm.AllGather(input);
for (auto i = 0; i < kWorldSize; i++) {
EXPECT_EQ(buffer[i], i);
EXPECT_EQ(output[i], static_cast<char>('0' + i));
}
}
static void CheckAllgatherV(FederatedCommunicator &comm, int rank) {
std::vector<std::string_view> inputs{"Federated", " Learning!!!"};
auto output = comm.AllGatherV(inputs[rank]);
EXPECT_EQ(output, "Federated Learning!!!");
}
static void CheckAllreduce(FederatedCommunicator &comm) {
int buffer[] = {1, 2, 3, 4, 5};
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kSum);
@@ -119,6 +129,16 @@ TEST_F(FederatedCommunicatorTest, Allgather) {
}
}
TEST_F(FederatedCommunicatorTest, AllgatherV) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(&FederatedCommunicatorTest::VerifyAllgatherV, rank, server_->Address());
}
for (auto &thread : threads) {
thread.join();
}
}
TEST_F(FederatedCommunicatorTest, Allreduce) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {

View File

@@ -120,6 +120,11 @@ TEST_P(VerticalFederatedLearnerTest, Hist) {
}
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
TEST_P(VerticalFederatedLearnerTest, GPUApprox) {
std::string objective = GetParam();
this->Run("approx", "cuda:0", objective);
}
TEST_P(VerticalFederatedLearnerTest, GPUHist) {
std::string objective = GetParam();
this->Run("hist", "cuda:0", objective);

View File

@@ -18,6 +18,11 @@ class FederatedServerTest : public BaseFederatedTest {
CheckAllgather(client, rank);
}
static void VerifyAllgatherV(int rank, const std::string& server_address) {
federated::FederatedClient client{server_address, rank};
CheckAllgatherV(client, rank);
}
static void VerifyAllreduce(int rank, const std::string& server_address) {
federated::FederatedClient client{server_address, rank};
CheckAllreduce(client);
@@ -39,8 +44,7 @@ class FederatedServerTest : public BaseFederatedTest {
protected:
static void CheckAllgather(federated::FederatedClient& client, int rank) {
int data[kWorldSize] = {0, 0};
data[rank] = rank;
int data[] = {rank};
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
auto reply = client.Allgather(send_buffer);
auto const* result = reinterpret_cast<int const*>(reply.data());
@@ -49,6 +53,12 @@ class FederatedServerTest : public BaseFederatedTest {
}
}
static void CheckAllgatherV(federated::FederatedClient& client, int rank) {
std::vector<std::string_view> inputs{"Hello,", " World!"};
auto reply = client.AllgatherV(inputs[rank]);
EXPECT_EQ(reply, "Hello, World!");
}
static void CheckAllreduce(federated::FederatedClient& client) {
int data[] = {1, 2, 3, 4, 5};
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
@@ -80,6 +90,16 @@ TEST_F(FederatedServerTest, Allgather) {
}
}
TEST_F(FederatedServerTest, AllgatherV) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {
threads.emplace_back(&FederatedServerTest::VerifyAllgatherV, rank, server_->Address());
}
for (auto& thread : threads) {
thread.join();
}
}
TEST_F(FederatedServerTest, Allreduce) {
std::vector<std::thread> threads;
for (auto rank = 0; rank < kWorldSize; rank++) {

View File

@@ -127,8 +127,8 @@ TEST(CpuPredictor, IterationRange) {
}
TEST(CpuPredictor, IterationRangeColmnSplit) {
Context ctx;
TestIterationRangeColumnSplit(&ctx);
auto constexpr kWorldSize = 2;
TestIterationRangeColumnSplit(kWorldSize, false);
}
TEST(CpuPredictor, ExternalMemory) {
@@ -142,7 +142,7 @@ TEST(CpuPredictor, InplacePredict) {
bst_row_t constexpr kRows{128};
bst_feature_t constexpr kCols{64};
Context ctx;
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.gpu_id);
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
{
HostDeviceVector<float> data;
gen.GenerateDense(&data);
@@ -226,23 +226,21 @@ TEST(CPUPredictor, GHistIndexTraining) {
}
TEST(CPUPredictor, CategoricalPrediction) {
Context ctx;
TestCategoricalPrediction(&ctx, false);
TestCategoricalPrediction(false, false);
}
TEST(CPUPredictor, CategoricalPredictionColumnSplit) {
Context ctx;
TestCategoricalPredictionColumnSplit(&ctx);
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPrediction, false, true);
}
TEST(CPUPredictor, CategoricalPredictLeaf) {
Context ctx;
TestCategoricalPredictLeaf(&ctx, false);
TestCategoricalPredictLeaf(false, false);
}
TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {
Context ctx;
TestCategoricalPredictLeafColumnSplit(&ctx);
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, false, true);
}
TEST(CpuPredictor, UpdatePredictionCache) {
@@ -256,8 +254,8 @@ TEST(CpuPredictor, LesserFeatures) {
}
TEST(CpuPredictor, LesserFeaturesColumnSplit) {
Context ctx;
TestPredictionWithLesserFeaturesColumnSplit(&ctx);
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestPredictionWithLesserFeaturesColumnSplit, false);
}
TEST(CpuPredictor, Sparse) {
@@ -267,9 +265,9 @@ TEST(CpuPredictor, Sparse) {
}
TEST(CpuPredictor, SparseColumnSplit) {
Context ctx;
TestSparsePredictionColumnSplit(&ctx, 0.2);
TestSparsePredictionColumnSplit(&ctx, 0.8);
auto constexpr kWorldSize = 2;
TestSparsePredictionColumnSplit(kWorldSize, false, 0.2);
TestSparsePredictionColumnSplit(kWorldSize, false, 0.8);
}
TEST(CpuPredictor, Multi) {

View File

@@ -38,7 +38,7 @@ TEST(GPUPredictor, Basic) {
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
auto ctx = MakeCUDACtx(0);
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
@@ -74,7 +74,7 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
@@ -102,7 +102,7 @@ TEST_F(MGPUPredictorTest, BasicColumnSplit) {
size_t n_row = i, n_col = i;
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
@@ -123,8 +123,10 @@ TEST(GPUPredictor, EllpackBasic) {
auto ctx = MakeCUDACtx(0);
for (size_t bins = 2; bins < 258; bins += 16) {
size_t rows = bins * 16;
auto p_m =
RandomDataGenerator{rows, kCols, 0.0}.Bins(bins).Device(0).GenerateDeviceDMatrix(false);
auto p_m = RandomDataGenerator{rows, kCols, 0.0}
.Bins(bins)
.Device(DeviceOrd::CUDA(0))
.GenerateDeviceDMatrix(false);
ASSERT_FALSE(p_m->PageExists<SparsePage>());
TestPredictionFromGradientIndex<EllpackPage>(&ctx, rows, kCols, p_m);
TestPredictionFromGradientIndex<EllpackPage>(&ctx, bins, kCols, p_m);
@@ -136,11 +138,11 @@ TEST(GPUPredictor, EllpackTraining) {
size_t constexpr kRows{128}, kCols{16}, kBins{64};
auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}
.Bins(kBins)
.Device(ctx.Ordinal())
.Device(ctx.Device())
.GenerateDeviceDMatrix(false);
HostDeviceVector<float> storage(kRows * kCols);
auto columnar =
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Ordinal()).GenerateArrayInterface(&storage);
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Device()).GenerateArrayInterface(&storage);
auto adapter = data::CupyAdapter(columnar);
std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
@@ -155,7 +157,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {
const int n_classes = 3;
Context ctx = MakeCUDACtx(0);
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Device())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
std::vector<std::unique_ptr<DMatrix>> dmats;
@@ -166,7 +168,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {
for (const auto& dmat: dmats) {
dmat->Info().base_margin_ = decltype(dmat->Info().base_margin_){
{dmat->Info().num_row_, static_cast<size_t>(n_classes)}, 0};
{dmat->Info().num_row_, static_cast<size_t>(n_classes)}, DeviceOrd::CUDA(0)};
dmat->Info().base_margin_.Data()->Fill(0.5);
PredictionCacheEntry out_predictions;
gpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
@@ -185,7 +187,7 @@ TEST(GPUPredictor, InplacePredictCupy) {
auto ctx = MakeCUDACtx(0);
size_t constexpr kRows{128}, kCols{64};
RandomDataGenerator gen(kRows, kCols, 0.5);
gen.Device(ctx.Ordinal());
gen.Device(ctx.Device());
HostDeviceVector<float> data;
std::string interface_str = gen.GenerateArrayInterface(&data);
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
@@ -197,7 +199,7 @@ TEST(GPUPredictor, InplacePredictCuDF) {
auto ctx = MakeCUDACtx(0);
size_t constexpr kRows{128}, kCols{64};
RandomDataGenerator gen(kRows, kCols, 0.5);
gen.Device(ctx.Ordinal());
gen.Device(ctx.Device());
std::vector<HostDeviceVector<float>> storage(kCols);
auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
@@ -210,6 +212,10 @@ TEST(GpuPredictor, LesserFeatures) {
TestPredictionWithLesserFeatures(&ctx);
}
TEST_F(MGPUPredictorTest, LesserFeaturesColumnSplit) {
RunWithInMemoryCommunicator(world_size_, TestPredictionWithLesserFeaturesColumnSplit, true);
}
// Very basic test of empty model
TEST(GPUPredictor, ShapStump) {
#if defined(XGBOOST_USE_CUDA)
@@ -219,7 +225,7 @@ TEST(GPUPredictor, ShapStump) {
#endif
auto ctx = MakeCUDACtx(0);
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Device())};
gbm::GBTreeModel model(&mparam, &ctx);
std::vector<std::unique_ptr<RegTree>> trees;
@@ -245,7 +251,7 @@ TEST(GPUPredictor, ShapStump) {
TEST(GPUPredictor, Shap) {
auto ctx = MakeCUDACtx(0);
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Device())};
gbm::GBTreeModel model(&mparam, &ctx);
std::vector<std::unique_ptr<RegTree>> trees;
@@ -278,19 +284,29 @@ TEST(GPUPredictor, IterationRange) {
TestIterationRange(&ctx);
}
TEST_F(MGPUPredictorTest, IterationRangeColumnSplit) {
TestIterationRangeColumnSplit(world_size_, true);
}
TEST(GPUPredictor, CategoricalPrediction) {
auto ctx = MakeCUDACtx(0);
TestCategoricalPrediction(&ctx, false);
TestCategoricalPrediction(true, false);
}
TEST_F(MGPUPredictorTest, CategoricalPredictionColumnSplit) {
RunWithInMemoryCommunicator(world_size_, TestCategoricalPrediction, true, true);
}
TEST(GPUPredictor, CategoricalPredictLeaf) {
auto ctx = MakeCUDACtx(0);
TestCategoricalPredictLeaf(&ctx, false);
TestCategoricalPredictLeaf(true, false);
}
TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, true, true);
}
TEST(GPUPredictor, PredictLeafBasic) {
size_t constexpr kRows = 5, kCols = 5;
auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(0).GenerateDMatrix();
auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(DeviceOrd::CUDA(0)).GenerateDMatrix();
auto lparam = MakeCUDACtx(GPUIDX);
std::unique_ptr<Predictor> gpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
@@ -313,4 +329,9 @@ TEST(GPUPredictor, Sparse) {
TestSparsePrediction(&ctx, 0.2);
TestSparsePrediction(&ctx, 0.8);
}
TEST_F(MGPUPredictorTest, SparseColumnSplit) {
TestSparsePredictionColumnSplit(world_size_, true, 0.2);
TestSparsePredictionColumnSplit(world_size_, true, 0.8);
}
} // namespace xgboost::predictor

View File

@@ -34,7 +34,7 @@ TEST(Predictor, PredictionCache) {
// Add a cache that is immediately expired.
auto add_cache = [&]() {
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
container.Cache(p_dmat, Context::kCpuId);
container.Cache(p_dmat, DeviceOrd::CPU());
m = p_dmat.get();
};
@@ -93,7 +93,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
bst_feature_t cols) {
std::size_t constexpr kClasses { 4 };
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->gpu_id);
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device());
std::shared_ptr<DMatrix> m = gen.GenerateDMatrix(true, false, kClasses);
std::unique_ptr<Learner> learner {
@@ -172,16 +172,6 @@ void VerifyPredictionWithLesserFeatures(Learner *learner, bst_row_t kRows,
ASSERT_THROW({ learner->Predict(m_invalid, false, &prediction, 0, 0); }, dmlc::Error);
}
void VerifyPredictionWithLesserFeaturesColumnSplit(Learner *learner, size_t rows,
std::shared_ptr<DMatrix> m_test,
std::shared_ptr<DMatrix> m_invalid) {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
std::shared_ptr<DMatrix> sliced_test{m_test->SliceCol(world_size, rank)};
std::shared_ptr<DMatrix> sliced_invalid{m_invalid->SliceCol(world_size, rank)};
VerifyPredictionWithLesserFeatures(learner, rows, sliced_test, sliced_invalid);
}
} // anonymous namespace
void TestPredictionWithLesserFeatures(Context const *ctx) {
@@ -202,7 +192,7 @@ void TestPredictionDeviceAccess() {
HostDeviceVector<float> from_cpu;
{
ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId);
ASSERT_TRUE(from_cpu.Device().IsCPU());
Context cpu_ctx;
learner->SetParam("device", cpu_ctx.DeviceName());
learner->Predict(m_test, false, &from_cpu, 0, 0);
@@ -216,7 +206,7 @@ void TestPredictionDeviceAccess() {
Context cuda_ctx = MakeCUDACtx(0);
learner->SetParam("device", cuda_ctx.DeviceName());
learner->Predict(m_test, false, &from_cuda, 0, 0);
ASSERT_EQ(from_cuda.DeviceIdx(), 0);
ASSERT_EQ(from_cuda.Device(), DeviceOrd::CUDA(0));
ASSERT_TRUE(from_cuda.DeviceCanWrite());
ASSERT_FALSE(from_cuda.HostCanRead());
}
@@ -229,16 +219,24 @@ void TestPredictionDeviceAccess() {
#endif // defined(XGBOOST_USE_CUDA)
}
void TestPredictionWithLesserFeaturesColumnSplit(Context const *ctx) {
size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;
auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true);
auto learner = LearnerForTest(ctx, m_train, kIters);
void TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu) {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
std::size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;
auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).Seed(rank).GenerateDMatrix(true);
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : rank);
}
auto learner = LearnerForTest(&ctx, m_train, kIters);
auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false);
auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false);
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, VerifyPredictionWithLesserFeaturesColumnSplit,
learner.get(), kRows, m_test, m_invalid);
std::shared_ptr<DMatrix> sliced_test{m_test->SliceCol(world_size, rank)};
std::shared_ptr<DMatrix> sliced_invalid{m_invalid->SliceCol(world_size, rank)};
VerifyPredictionWithLesserFeatures(learner.get(), kRows, sliced_test, sliced_invalid);
}
void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind,
@@ -260,7 +258,11 @@ void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind,
model->CommitModelGroup(std::move(trees), 0);
}
void TestCategoricalPrediction(Context const* ctx, bool is_column_split) {
void TestCategoricalPrediction(bool use_gpu, bool is_column_split) {
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
}
size_t constexpr kCols = 10;
PredictionCacheEntry out_predictions;
@@ -270,10 +272,10 @@ void TestCategoricalPrediction(Context const* ctx, bool is_column_split) {
float left_weight = 1.3f;
float right_weight = 1.7f;
gbm::GBTreeModel model(&mparam, ctx);
gbm::GBTreeModel model(&mparam, &ctx);
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};
std::vector<float> row(kCols);
row[split_ind] = split_cat;
@@ -303,12 +305,11 @@ void TestCategoricalPrediction(Context const* ctx, bool is_column_split) {
ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
}
void TestCategoricalPredictionColumnSplit(Context const *ctx) {
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPrediction, ctx, true);
}
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
}
size_t constexpr kCols = 10;
PredictionCacheEntry out_predictions;
@@ -319,10 +320,10 @@ void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
float left_weight = 1.3f;
float right_weight = 1.7f;
gbm::GBTreeModel model(&mparam, ctx);
gbm::GBTreeModel model(&mparam, &ctx);
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};
std::vector<float> row(kCols);
row[split_ind] = split_cat;
@@ -347,15 +348,10 @@ void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
ASSERT_EQ(out_predictions.predictions.HostVector()[0], 1);
}
void TestCategoricalPredictLeafColumnSplit(Context const *ctx) {
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, ctx, true);
}
void TestIterationRange(Context const* ctx) {
size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
auto dmat = RandomDataGenerator(kRows, kCols, 0)
.Device(ctx->gpu_id)
.Device(ctx->Device())
.GenerateDMatrix(true, true, kClasses);
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
@@ -411,15 +407,30 @@ void TestIterationRange(Context const* ctx) {
}
namespace {
void VerifyIterationRangeColumnSplit(DMatrix *dmat, Learner *learner, Learner *sliced,
void VerifyIterationRangeColumnSplit(bool use_gpu, Json const &ranged_model,
Json const &sliced_model, std::size_t rows, std::size_t cols,
std::size_t classes,
std::vector<float> const &expected_margin_ranged,
std::vector<float> const &expected_margin_sliced,
std::vector<float> const &expected_leaf_ranged,
std::vector<float> const &expected_leaf_sliced) {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : rank);
}
auto dmat = RandomDataGenerator(rows, cols, 0).GenerateDMatrix(true, true, classes);
std::shared_ptr<DMatrix> Xy{dmat->SliceCol(world_size, rank)};
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
learner->SetParam("device", ctx.DeviceName());
learner->LoadModel(ranged_model);
std::unique_ptr<Learner> sliced{Learner::Create({Xy})};
sliced->SetParam("device", ctx.DeviceName());
sliced->LoadModel(sliced_model);
HostDeviceVector<float> out_predt_sliced;
HostDeviceVector<float> out_predt_ranged;
@@ -428,11 +439,15 @@ void VerifyIterationRangeColumnSplit(DMatrix *dmat, Learner *learner, Learner *s
sliced->Predict(Xy, true, &out_predt_sliced, 0, 0, false, false, false, false, false);
learner->Predict(Xy, true, &out_predt_ranged, 0, 3, false, false, false, false, false);
auto const &h_sliced = out_predt_sliced.HostVector();
auto const &h_range = out_predt_ranged.HostVector();
ASSERT_EQ(h_sliced.size(), expected_margin_sliced.size());
ASSERT_EQ(h_sliced, expected_margin_sliced);
ASSERT_EQ(h_range.size(), expected_margin_ranged.size());
ASSERT_EQ(h_range, expected_margin_ranged);
auto const &h_ranged = out_predt_ranged.HostVector();
EXPECT_EQ(h_sliced.size(), expected_margin_sliced.size());
for (std::size_t i = 0; i < expected_margin_sliced.size(); ++i) {
ASSERT_FLOAT_EQ(h_sliced[i], expected_margin_sliced[i]) << "rank " << rank << ", i " << i;
}
EXPECT_EQ(h_ranged.size(), expected_margin_ranged.size());
for (std::size_t i = 0; i < expected_margin_ranged.size(); ++i) {
ASSERT_FLOAT_EQ(h_ranged[i], expected_margin_ranged[i]) << "rank " << rank << ", i " << i;
}
}
// Leaf
@@ -440,21 +455,27 @@ void VerifyIterationRangeColumnSplit(DMatrix *dmat, Learner *learner, Learner *s
sliced->Predict(Xy, false, &out_predt_sliced, 0, 0, false, true, false, false, false);
learner->Predict(Xy, false, &out_predt_ranged, 0, 3, false, true, false, false, false);
auto const &h_sliced = out_predt_sliced.HostVector();
auto const &h_range = out_predt_ranged.HostVector();
ASSERT_EQ(h_sliced.size(), expected_leaf_sliced.size());
ASSERT_EQ(h_sliced, expected_leaf_sliced);
ASSERT_EQ(h_range.size(), expected_leaf_ranged.size());
ASSERT_EQ(h_range, expected_leaf_ranged);
auto const &h_ranged = out_predt_ranged.HostVector();
EXPECT_EQ(h_sliced.size(), expected_leaf_sliced.size());
for (std::size_t i = 0; i < expected_leaf_sliced.size(); ++i) {
ASSERT_FLOAT_EQ(h_sliced[i], expected_leaf_sliced[i]) << "rank " << rank << ", i " << i;
}
EXPECT_EQ(h_ranged.size(), expected_leaf_ranged.size());
for (std::size_t i = 0; i < expected_leaf_ranged.size(); ++i) {
ASSERT_FLOAT_EQ(h_ranged[i], expected_leaf_ranged[i]) << "rank " << rank << ", i " << i;
}
}
}
} // anonymous namespace
void TestIterationRangeColumnSplit(Context const* ctx) {
size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
void TestIterationRangeColumnSplit(int world_size, bool use_gpu) {
std::size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
learner->SetParam("device", ctx->DeviceName());
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(0);
}
auto learner = LearnerForTest(&ctx, dmat, kIters, kForest);
bool bound = false;
std::unique_ptr<Learner> sliced{learner->Slice(0, 3, 1, &bound)};
@@ -476,9 +497,13 @@ void TestIterationRangeColumnSplit(Context const* ctx) {
auto const &leaf_sliced = leaf_predt_sliced.HostVector();
auto const &leaf_ranged = leaf_predt_ranged.HostVector();
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, VerifyIterationRangeColumnSplit, dmat.get(),
learner.get(), sliced.get(), margin_ranged, margin_sliced,
Json ranged_model{Object{}};
learner->SaveModel(&ranged_model);
Json sliced_model{Object{}};
sliced->SaveModel(&sliced_model);
RunWithInMemoryCommunicator(world_size, VerifyIterationRangeColumnSplit, use_gpu, ranged_model,
sliced_model, kRows, kCols, kClasses, margin_ranged, margin_sliced,
leaf_ranged, leaf_sliced);
}
@@ -497,7 +522,7 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
if (ctx->IsCUDA()) {
learner->SetParam("tree_method", "gpu_hist");
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
learner->SetParam("device", ctx->Device().Name());
}
learner->Predict(Xy, false, &sparse_predt, 0, 0);
@@ -539,11 +564,20 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
}
namespace {
void VerifySparsePredictionColumnSplit(DMatrix *dmat, Learner *learner,
void VerifySparsePredictionColumnSplit(bool use_gpu, Json const &model, std::size_t rows,
std::size_t cols, float sparsity,
std::vector<float> const &expected_predt) {
std::shared_ptr<DMatrix> sliced{
dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
}
auto Xy = RandomDataGenerator(rows, cols, sparsity).GenerateDMatrix(true);
std::shared_ptr<DMatrix> sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
HostDeviceVector<float> sparse_predt;
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
learner->SetParam("device", ctx.DeviceName());
learner->LoadModel(model);
learner->Predict(sliced, false, &sparse_predt, 0, 0);
auto const &predt = sparse_predt.HostVector();
@@ -554,10 +588,14 @@ void VerifySparsePredictionColumnSplit(DMatrix *dmat, Learner *learner,
}
} // anonymous namespace
void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity) {
void TestSparsePredictionColumnSplit(int world_size, bool use_gpu, float sparsity) {
Context ctx;
if (use_gpu) {
ctx = MakeCUDACtx(0);
}
size_t constexpr kRows = 512, kCols = 128, kIters = 4;
auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true);
auto learner = LearnerForTest(ctx, Xy, kIters);
auto learner = LearnerForTest(&ctx, Xy, kIters);
HostDeviceVector<float> sparse_predt;
@@ -567,12 +605,11 @@ void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity) {
learner.reset(Learner::Create({Xy}));
learner->LoadModel(model);
learner->SetParam("device", ctx->DeviceName());
learner->SetParam("device", ctx.DeviceName());
learner->Predict(Xy, false, &sparse_predt, 0, 0);
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, VerifySparsePredictionColumnSplit, Xy.get(),
learner.get(), sparse_predt.HostVector());
RunWithInMemoryCommunicator(world_size, VerifySparsePredictionColumnSplit, use_gpu, model,
kRows, kCols, sparsity, sparse_predt.HostVector());
}
void TestVectorLeafPrediction(Context const *ctx) {
@@ -583,7 +620,7 @@ void TestVectorLeafPrediction(Context const *ctx) {
size_t constexpr kCols = 5;
LearnerModelParam mparam{static_cast<bst_feature_t>(kCols),
linalg::Vector<float>{{0.5}, {1}, Context::kCpuId}, 1, 3,
linalg::Vector<float>{{0.5}, {1}, DeviceOrd::CPU()}, 1, 3,
MultiStrategy::kMultiOutputTree};
std::vector<std::unique_ptr<RegTree>> trees;

View File

@@ -94,23 +94,19 @@ void TestPredictionWithLesserFeatures(Context const* ctx);
void TestPredictionDeviceAccess();
void TestCategoricalPrediction(Context const* ctx, bool is_column_split);
void TestCategoricalPrediction(bool use_gpu, bool is_column_split);
void TestCategoricalPredictionColumnSplit(Context const* ctx);
void TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu);
void TestPredictionWithLesserFeaturesColumnSplit(Context const* ctx);
void TestCategoricalPredictLeaf(Context const* ctx, bool is_column_split);
void TestCategoricalPredictLeafColumnSplit(Context const* ctx);
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split);
void TestIterationRange(Context const* ctx);
void TestIterationRangeColumnSplit(Context const* ctx);
void TestIterationRangeColumnSplit(int world_size, bool use_gpu);
void TestSparsePrediction(Context const* ctx, float sparsity);
void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity);
void TestSparsePredictionColumnSplit(int world_size, bool use_gpu, float sparsity);
void TestVectorLeafPrediction(Context const* ctx);
} // namespace xgboost

View File

@@ -5,11 +5,13 @@
#include <xgboost/base.h>
#include <xgboost/context.h>
#include <sstream>
namespace xgboost {
TEST(Context, CPU) {
Context ctx;
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
ASSERT_EQ(ctx.Ordinal(), Context::kCpuId);
ASSERT_EQ(ctx.Ordinal(), DeviceOrd::CPUOrdinal());
std::int32_t flag{0};
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
@@ -27,5 +29,20 @@ TEST(Context, CPU) {
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":gpu"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":0"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ""}}), dmlc::Error);
std::stringstream ss;
ss << ctx.Device();
ASSERT_EQ(ss.str(), "cpu");
}
TEST(Context, ErrorInit) {
Context ctx;
ASSERT_THROW({ ctx.Init({{"foo", "bar"}}); }, dmlc::Error);
try {
ctx.Init({{"foo", "bar"}});
} catch (dmlc::Error const& e) {
auto msg = std::string{e.what()};
ASSERT_NE(msg.find("foo"), std::string::npos);
}
}
} // namespace xgboost

View File

@@ -13,7 +13,6 @@
namespace xgboost {
namespace {
void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
ASSERT_EQ(ctx.gpu_id, ord);
ASSERT_EQ(ctx.Device().ordinal, ord);
ASSERT_EQ(ctx.DeviceName(), "cuda:" + std::to_string(ord));
ASSERT_EQ(ctx.Ordinal(), ord);
@@ -25,7 +24,7 @@ void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
Context new_ctx;
FromJson(jctx, &new_ctx);
ASSERT_EQ(new_ctx.Device(), ctx.Device());
ASSERT_EQ(new_ctx.gpu_id, ctx.gpu_id);
ASSERT_EQ(new_ctx.Ordinal(), ctx.Ordinal());
}
} // namespace
@@ -53,7 +52,7 @@ TEST(Context, DeviceOrdinal) {
auto cpu_ctx = ctx.MakeCPU();
ASSERT_TRUE(cpu_ctx.IsCPU());
ASSERT_EQ(cpu_ctx.Ordinal(), Context::kCpuId);
ASSERT_EQ(cpu_ctx.Ordinal(), DeviceOrd::CPUOrdinal());
ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());
auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());

View File

@@ -655,33 +655,11 @@ TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }
TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); }
class TestColumnSplit : public ::testing::TestWithParam<std::string> {
static auto MakeFmat(std::string const& obj) {
auto constexpr kRows = 10, kCols = 10;
auto p_fmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
auto& h_upper = p_fmat->Info().labels_upper_bound_.HostVector();
auto& h_lower = p_fmat->Info().labels_lower_bound_.HostVector();
h_lower.resize(kRows);
h_upper.resize(kRows);
for (size_t i = 0; i < kRows; ++i) {
h_lower[i] = 1;
h_upper[i] = 10;
}
if (obj.find("rank:") != std::string::npos) {
auto h_label = p_fmat->Info().labels.HostView();
std::size_t k = 0;
for (auto& v : h_label) {
v = k % 2 == 0;
++k;
}
}
return p_fmat;
};
void TestBaseScore(std::string objective, float expected_base_score, Json expected_model) {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto p_fmat = MakeFmat(objective);
auto p_fmat = MakeFmatForObjTest(objective);
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
learner->SetParam("tree_method", "approx");
@@ -705,7 +683,7 @@ class TestColumnSplit : public ::testing::TestWithParam<std::string> {
public:
void Run(std::string objective) {
auto p_fmat = MakeFmat(objective);
auto p_fmat = MakeFmatForObjTest(objective);
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParam("tree_method", "approx");
learner->SetParam("objective", objective);
@@ -740,4 +718,112 @@ INSTANTIATE_TEST_SUITE_P(ColumnSplitObjective, TestColumnSplit,
[](const ::testing::TestParamInfo<TestColumnSplit::ParamType>& info) {
return ObjTestNameGenerator(info);
});
namespace {
Json GetModelWithArgs(std::shared_ptr<DMatrix> dmat, std::string const& tree_method,
std::string const& device, Args const& args) {
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
learner->SetParam("tree_method", tree_method);
learner->SetParam("device", device);
learner->SetParam("objective", "reg:logistic");
learner->SetParams(args);
learner->UpdateOneIter(0, dmat);
Json model{Object{}};
learner->SaveModel(&model);
return model;
}
void VerifyColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Args const& args,
Json const& expected_model) {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto p_fmat = MakeFmatForObjTest("");
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
std::string device = "cpu";
if (use_gpu) {
auto gpu_id = common::AllVisibleGPUs() == 1 ? 0 : rank;
device = "cuda:" + std::to_string(gpu_id);
}
auto model = GetModelWithArgs(sliced, tree_method, device, args);
ASSERT_EQ(model, expected_model);
}
void TestColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Args const& args) {
auto p_fmat = MakeFmatForObjTest("");
std::string device = use_gpu ? "cuda:0" : "cpu";
auto model = GetModelWithArgs(p_fmat, tree_method, device, args);
auto world_size{3};
if (use_gpu) {
world_size = common::AllVisibleGPUs();
// Simulate MPU on a single GPU.
if (world_size == 1) {
world_size = 3;
}
}
RunWithInMemoryCommunicator(world_size, VerifyColumnSplitWithArgs, tree_method, use_gpu, args,
model);
}
void TestColumnSplitColumnSampler(std::string const& tree_method, bool use_gpu) {
Args args{{"colsample_bytree", "0.5"}, {"colsample_bylevel", "0.6"}, {"colsample_bynode", "0.7"}};
TestColumnSplitWithArgs(tree_method, use_gpu, args);
}
void TestColumnSplitInteractionConstraints(std::string const& tree_method, bool use_gpu) {
Args args{{"interaction_constraints", "[[0, 5, 7], [2, 8, 9], [1, 3, 6]]"}};
TestColumnSplitWithArgs(tree_method, use_gpu, args);
}
void TestColumnSplitMonotoneConstraints(std::string const& tree_method, bool use_gpu) {
Args args{{"monotone_constraints", "(1,-1,0,1,1,-1,-1,0,0,1)"}};
TestColumnSplitWithArgs(tree_method, use_gpu, args);
}
} // anonymous namespace
TEST(ColumnSplitColumnSampler, Approx) { TestColumnSplitColumnSampler("approx", false); }
TEST(ColumnSplitColumnSampler, Hist) { TestColumnSplitColumnSampler("hist", false); }
#if defined(XGBOOST_USE_CUDA)
TEST(MGPUColumnSplitColumnSampler, GPUApprox) { TestColumnSplitColumnSampler("approx", true); }
TEST(MGPUColumnSplitColumnSampler, GPUHist) { TestColumnSplitColumnSampler("hist", true); }
#endif // defined(XGBOOST_USE_CUDA)
TEST(ColumnSplitInteractionConstraints, Approx) {
TestColumnSplitInteractionConstraints("approx", false);
}
TEST(ColumnSplitInteractionConstraints, Hist) {
TestColumnSplitInteractionConstraints("hist", false);
}
#if defined(XGBOOST_USE_CUDA)
TEST(MGPUColumnSplitInteractionConstraints, GPUApprox) {
TestColumnSplitInteractionConstraints("approx", true);
}
TEST(MGPUColumnSplitInteractionConstraints, GPUHist) {
TestColumnSplitInteractionConstraints("hist", true);
}
#endif // defined(XGBOOST_USE_CUDA)
TEST(ColumnSplitMonotoneConstraints, Approx) {
TestColumnSplitMonotoneConstraints("approx", false);
}
TEST(ColumnSplitMonotoneConstraints, Hist) {
TestColumnSplitMonotoneConstraints("hist", false);
}
#if defined(XGBOOST_USE_CUDA)
TEST(MGPUColumnSplitMonotoneConstraints, GPUApprox) {
TestColumnSplitMonotoneConstraints("approx", true);
}
TEST(MGPUColumnSplitMonotoneConstraints, GPUHist) {
TestColumnSplitMonotoneConstraints("hist", true);
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost

View File

@@ -210,9 +210,9 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
}
// Pull data to device
for (auto &batch : p_dmat->GetBatches<SparsePage>()) {
batch.data.SetDevice(0);
batch.data.SetDevice(DeviceOrd::CUDA(0));
batch.data.DeviceSpan();
batch.offset.SetDevice(0);
batch.offset.SetDevice(DeviceOrd::CUDA(0));
batch.offset.DeviceSpan();
}

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2020-2022 by XGBoost contributors
/**
* Copyright 2020-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <thrust/host_vector.h>
@@ -13,9 +13,7 @@
#include "../../histogram_helpers.h"
#include "../test_evaluate_splits.h" // TestPartitionBasedSplit
namespace xgboost {
namespace tree {
namespace xgboost::tree {
namespace {
auto ZeroParam() {
auto args = Args{{"min_child_weight", "0"}, {"lambda", "0"}};
@@ -41,11 +39,12 @@ thrust::device_vector<GradientPairInt64> ConvertToInteger(std::vector<GradientPa
}
TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
auto ctx = MakeCUDACtx(0);
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
GPUTrainingParam param{param_};
cuts_.cut_ptrs_.SetDevice(0);
cuts_.cut_values_.SetDevice(0);
cuts_.min_vals_.SetDevice(0);
cuts_.cut_ptrs_.SetDevice(ctx.Device());
cuts_.cut_values_.SetDevice(ctx.Device());
cuts_.min_vals_.SetDevice(ctx.Device());
thrust::device_vector<GradientPairInt64> feature_histogram{ConvertToInteger(feature_histogram_)};
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
@@ -61,9 +60,10 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
cuts_.min_vals_.ConstDeviceSpan(),
false};
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(feature_set.size()), 0};
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false, 0);
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false,
ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
ASSERT_EQ(result.thresh, 1);
@@ -73,6 +73,7 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
}
TEST(GpuHist, PartitionBasic) {
auto ctx = MakeCUDACtx(0);
TrainParam tparam = ZeroParam();
tparam.max_cat_to_onehot = 0;
GPUTrainingParam param{tparam};
@@ -81,9 +82,9 @@ TEST(GpuHist, PartitionBasic) {
cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};
cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};
cuts.min_vals_.HostVector() = std::vector<float>{0.0};
cuts.cut_ptrs_.SetDevice(0);
cuts.cut_values_.SetDevice(0);
cuts.min_vals_.SetDevice(0);
cuts.cut_ptrs_.SetDevice(ctx.Device());
cuts.cut_values_.SetDevice(ctx.Device());
cuts.min_vals_.SetDevice(ctx.Device());
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
@@ -104,8 +105,8 @@ TEST(GpuHist, PartitionBasic) {
false,
};
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, ctx.Device());
{
// -1.0s go right
@@ -187,6 +188,7 @@ TEST(GpuHist, PartitionBasic) {
}
TEST(GpuHist, PartitionTwoFeatures) {
auto ctx = MakeCUDACtx(0);
TrainParam tparam = ZeroParam();
tparam.max_cat_to_onehot = 0;
GPUTrainingParam param{tparam};
@@ -195,9 +197,9 @@ TEST(GpuHist, PartitionTwoFeatures) {
cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0, 0.0, 1.0, 2.0};
cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3, 6};
cuts.min_vals_.HostVector() = std::vector<float>{0.0, 0.0};
cuts.cut_ptrs_.SetDevice(0);
cuts.cut_values_.SetDevice(0);
cuts.min_vals_.SetDevice(0);
cuts.cut_ptrs_.SetDevice(ctx.Device());
cuts.cut_values_.SetDevice(ctx.Device());
cuts.min_vals_.SetDevice(ctx.Device());
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
@@ -216,8 +218,8 @@ TEST(GpuHist, PartitionTwoFeatures) {
cuts.min_vals_.ConstDeviceSpan(),
false};
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, ctx.Device());
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
@@ -247,6 +249,7 @@ TEST(GpuHist, PartitionTwoFeatures) {
}
TEST(GpuHist, PartitionTwoNodes) {
auto ctx = MakeCUDACtx(0);
TrainParam tparam = ZeroParam();
tparam.max_cat_to_onehot = 0;
GPUTrainingParam param{tparam};
@@ -255,9 +258,9 @@ TEST(GpuHist, PartitionTwoNodes) {
cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};
cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};
cuts.min_vals_.HostVector() = std::vector<float>{0.0};
cuts.cut_ptrs_.SetDevice(0);
cuts.cut_values_.SetDevice(0);
cuts.min_vals_.SetDevice(0);
cuts.cut_ptrs_.SetDevice(ctx.Device());
cuts.cut_values_.SetDevice(ctx.Device());
cuts.min_vals_.SetDevice(ctx.Device());
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
@@ -276,8 +279,10 @@ TEST(GpuHist, PartitionTwoNodes) {
cuts.min_vals_.ConstDeviceSpan(),
false};
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()),
ctx.Device()};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
ctx.Device());
{
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
@@ -299,12 +304,14 @@ TEST(GpuHist, PartitionTwoNodes) {
}
void TestEvaluateSingleSplit(bool is_categorical) {
auto ctx = MakeCUDACtx(0);
auto quantiser = DummyRoundingFactor();
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
GPUTrainingParam param{tparam};
common::HistogramCuts cuts{MakeCutsForTest({1.0, 2.0, 11.0, 12.0}, {0, 2, 4}, {0.0, 0.0}, 0)};
common::HistogramCuts cuts{
MakeCutsForTest({1.0, 2.0, 11.0, 12.0}, {0, 2, 4}, {0.0, 0.0}, ctx.Device())};
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
// Setup gradients so that second feature gets higher gain
@@ -329,8 +336,10 @@ void TestEvaluateSingleSplit(bool is_categorical) {
cuts.min_vals_.ConstDeviceSpan(),
false};
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()),
ctx.Device()};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
EXPECT_EQ(result.findex, 1);
@@ -367,7 +376,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
dh::ToSpan(feature_min_values),
false};
GPUHistEvaluator evaluator(tparam, feature_set.size(), 0);
GPUHistEvaluator evaluator(tparam, feature_set.size(), FstCU());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
EXPECT_EQ(result.findex, 0);
@@ -379,7 +388,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
TEST(GpuHist, EvaluateSingleSplitEmpty) {
TrainParam tparam = ZeroParam();
GPUHistEvaluator evaluator(tparam, 1, 0);
GPUHistEvaluator evaluator(tparam, 1, FstCU());
DeviceSplitCandidate result =
evaluator
.EvaluateSingleSplit(
@@ -414,7 +423,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
dh::ToSpan(feature_min_values),
false};
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), 0);
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
EXPECT_EQ(result.findex, 1);
@@ -446,7 +455,7 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
dh::ToSpan(feature_min_values),
false};
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), 0);
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
EXPECT_EQ(result.findex, 0);
@@ -481,7 +490,8 @@ TEST(GpuHist, EvaluateSplits) {
dh::ToSpan(feature_min_values),
false};
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_min_values.size()), 0};
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_min_values.size()),
FstCU()};
dh::device_vector<EvaluateSplitInputs> inputs =
std::vector<EvaluateSplitInputs>{input_left, input_right};
evaluator.LaunchEvaluateSplits(input_left.feature_set.size(), dh::ToSpan(inputs), shared_inputs,
@@ -497,14 +507,15 @@ TEST(GpuHist, EvaluateSplits) {
}
TEST_F(TestPartitionBasedSplit, GpuHist) {
auto ctx = MakeCUDACtx(0);
dh::device_vector<FeatureType> ft{std::vector<FeatureType>{FeatureType::kCategorical}};
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(info_.num_col_), 0};
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(info_.num_col_), ctx.Device()};
cuts_.cut_ptrs_.SetDevice(0);
cuts_.cut_values_.SetDevice(0);
cuts_.min_vals_.SetDevice(0);
cuts_.cut_ptrs_.SetDevice(ctx.Device());
cuts_.cut_values_.SetDevice(ctx.Device());
cuts_.min_vals_.SetDevice(ctx.Device());
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, 0);
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, ctx.Device());
// Convert the sample histogram to fixed point
auto quantiser = DummyRoundingFactor();
@@ -532,15 +543,16 @@ class MGPUHistTest : public BaseMGPUTest {};
namespace {
void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
auto ctx = MakeCUDACtx(GPUIDX);
auto rank = collective::GetRank();
auto quantiser = DummyRoundingFactor();
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
TrainParam tparam = ZeroParam();
GPUTrainingParam param{tparam};
common::HistogramCuts cuts{rank == 0
? MakeCutsForTest({1.0, 2.0}, {0, 2, 2}, {0.0, 0.0}, GPUIDX)
: MakeCutsForTest({11.0, 12.0}, {0, 0, 2}, {0.0, 0.0}, GPUIDX)};
common::HistogramCuts cuts{
rank == 0 ? MakeCutsForTest({1.0, 2.0}, {0, 2, 2}, {0.0, 0.0}, ctx.Device())
: MakeCutsForTest({11.0, 12.0}, {0, 0, 2}, {0.0, 0.0}, ctx.Device())};
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
// Setup gradients so that second feature gets higher gain
@@ -566,8 +578,8 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
cuts.min_vals_.ConstDeviceSpan(),
false};
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), GPUIDX};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, GPUIDX);
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
EXPECT_EQ(result.findex, 1) << "rank: " << rank;
@@ -587,5 +599,4 @@ TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleSplit) {
TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleCategoricalSplit) {
DoTest(VerifyColumnSplitEvaluateSingleSplit, true);
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -34,9 +34,9 @@ void VerifySampling(size_t page_size,
for (const auto& gp : gpair.ConstHostVector()) {
sum_gpair += gp;
}
gpair.SetDevice(0);
Context ctx{MakeCUDACtx(0)};
gpair.SetDevice(ctx.Device());
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
if (page_size != 0) {
@@ -91,9 +91,9 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
std::unique_ptr<DMatrix> dmat(
CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
auto gpair = GenerateRandomGradients(kRows);
gpair.SetDevice(0);
Context ctx{MakeCUDACtx(0)};
gpair.SetDevice(ctx.Device());
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_NE(page->n_rows, kRows);

View File

@@ -17,9 +17,7 @@
#include "../../categorical_helpers.h"
#include "../../helpers.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
void TestDeterministicHistogram(bool is_dense, int shm_size) {
Context ctx = MakeCUDACtx(0);
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
@@ -32,22 +30,22 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {
auto* page = batch.Impl();
tree::RowPartitioner row_partitioner(0, kRows);
tree::RowPartitioner row_partitioner(FstCU(), kRows);
auto ridx = row_partitioner.GetRows(0);
int num_bins = kBins * kCols;
dh::device_vector<GradientPairInt64> histogram(num_bins);
auto d_histogram = dh::ToSpan(histogram);
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
gpair.SetDevice(0);
gpair.SetDevice(FstCU());
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
sizeof(GradientPairInt64));
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx, d_histogram,
quantiser);
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
d_histogram, quantiser);
std::vector<GradientPairInt64> histogram_h(num_bins);
#if defined(XGBOOST_USE_CUDA)
@@ -65,8 +63,8 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
auto d_new_histogram = dh::ToSpan(new_histogram);
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
d_new_histogram, quantiser);
std::vector<GradientPairInt64> new_histogram_h(num_bins);
@@ -87,14 +85,14 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
{
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
gpair.SetDevice(0);
gpair.SetDevice(FstCU());
// Use a single feature group to compute the baseline.
FeatureGroups single_group(page->Cuts());
dh::device_vector<GradientPairInt64> baseline(num_bins);
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
single_group.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
dh::ToSpan(baseline), quantiser);
std::vector<GradientPairInt64> baseline_h(num_bins);
@@ -149,11 +147,11 @@ void TestGPUHistogramCategorical(size_t num_categories) {
auto cat_m = GetDMatrixFromData(x, kRows, 1);
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
tree::RowPartitioner row_partitioner(0, kRows);
tree::RowPartitioner row_partitioner(ctx.Device(), kRows);
auto ridx = row_partitioner.GetRows(0);
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
auto gpair = GenerateRandomGradients(kRows, 0, 2);
gpair.SetDevice(0);
gpair.SetDevice(DeviceOrd::CUDA(0));
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
/**
* Generate hist with cat data.
@@ -161,8 +159,8 @@ void TestGPUHistogramCategorical(size_t num_categories) {
for (auto const &batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
auto* page = batch.Impl();
FeatureGroups single_group(page->Cuts());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
dh::ToSpan(cat_hist), quantiser);
}
@@ -175,8 +173,8 @@ void TestGPUHistogramCategorical(size_t num_categories) {
for (auto const &batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
auto* page = batch.Impl();
FeatureGroups single_group(page->Cuts());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
dh::ToSpan(encode_hist), quantiser);
}
@@ -264,5 +262,4 @@ void TestAtomicAdd() {
TEST(Histogram, AtomicAddInt64) {
TestAtomicAdd();
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -20,12 +20,10 @@
#include "xgboost/task.h"
#include "xgboost/tree_model.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
void TestUpdatePositionBatch() {
const int kNumRows = 10;
RowPartitioner rp(0, kNumRows);
RowPartitioner rp(FstCU(), kNumRows);
auto rows = rp.GetRowsHost(0);
EXPECT_EQ(rows.size(), kNumRows);
for (auto i = 0ull; i < kNumRows; i++) {
@@ -100,12 +98,11 @@ void TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Se
}
}
TEST(GpuHist, SortPositionBatch) {
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}});
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}});
TEST(GpuHist, SortPositionBatch) {
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}});
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}});
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 6}});
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{3, 6}, {0, 2}});
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -115,7 +115,7 @@ TEST(HistMultiEvaluator, Evaluate) {
HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), &param, sampler};
HistMakerTrainParam hist_param;
std::vector<BoundedHistCollection> histogram(n_targets);
linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
linalg::Vector<GradientPairPrecise> root_sum({2}, DeviceOrd::CPU());
for (bst_target_t t{0}; t < n_targets; ++t) {
auto &hist = histogram[t];
hist.Reset(n_bins * n_features, hist_param.max_cached_hist_node);

View File

@@ -76,7 +76,7 @@ class TestPartitionBasedSplit : public ::testing::Test {
GradientPairPrecise parent_sum) {
int32_t best_thresh = -1;
float best_score{-std::numeric_limits<float>::infinity()};
TreeEvaluator evaluator{param_, static_cast<bst_feature_t>(n_feat), -1};
TreeEvaluator evaluator{param_, static_cast<bst_feature_t>(n_feat), DeviceOrd::CPU()};
auto tree_evaluator = evaluator.GetEvaluator<TrainParam>();
GradientPairPrecise left_sum;
auto parent_gain = tree_evaluator.CalcGain(0, param_, GradStats{total_gpair_});
@@ -111,13 +111,13 @@ class TestPartitionBasedSplit : public ::testing::Test {
};
inline auto MakeCutsForTest(std::vector<float> values, std::vector<uint32_t> ptrs,
std::vector<float> min_values, int32_t device) {
std::vector<float> min_values, DeviceOrd device) {
common::HistogramCuts cuts;
cuts.cut_values_.HostVector() = values;
cuts.cut_ptrs_.HostVector() = ptrs;
cuts.min_vals_.HostVector() = min_values;
if (device >= 0) {
if (device.IsCUDA()) {
cuts.cut_ptrs_.SetDevice(device);
cuts.cut_values_.SetDevice(device);
cuts.min_vals_.SetDevice(device);
@@ -136,7 +136,7 @@ class TestCategoricalSplitWithMissing : public testing::Test {
TrainParam param_;
void SetUp() override {
cuts_ = MakeCutsForTest({0.0, 1.0, 2.0, 3.0}, {0, 4}, {0.0}, -1);
cuts_ = MakeCutsForTest({0.0, 1.0, 2.0, 3.0}, {0, 4}, {0.0}, DeviceOrd::CPU());
auto max_cat = *std::max_element(cuts_.cut_values_.HostVector().begin(),
cuts_.cut_values_.HostVector().end());
cuts_.SetCategorical(true, max_cat);

View File

@@ -40,7 +40,7 @@ TEST(GpuHist, DeviceHistogram) {
constexpr int kNNodes = 4;
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
DeviceHistogramStorage<kStopGrowing> histogram;
histogram.Init(0, kNBins);
histogram.Init(FstCU(), kNBins);
for (int i = 0; i < kNNodes; ++i) {
histogram.AllocateHistograms({i});
}
@@ -113,12 +113,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
bst_float hess = dist(&gen);
gp = GradientPair(grad, hess);
}
gpair.SetDevice(0);
gpair.SetDevice(DeviceOrd::CUDA(0));
thrust::host_vector<common::CompressedByteT> h_gidx_buffer (page->gidx_buffer.HostVector());
maker.row_partitioner = std::make_unique<RowPartitioner>(0, kNRows);
maker.row_partitioner = std::make_unique<RowPartitioner>(FstCU(), kNRows);
maker.hist.Init(0, page->Cuts().TotalBins());
maker.hist.Init(FstCU(), page->Cuts().TotalBins());
maker.hist.AllocateHistograms({0});
maker.gpair = gpair.DeviceSpan();
@@ -127,8 +127,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
maker.InitFeatureGroupsOnce();
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
maker.feature_groups->DeviceAccessor(0), gpair.DeviceSpan(),
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(DeviceOrd::CUDA(0)),
maker.feature_groups->DeviceAccessor(DeviceOrd::CUDA(0)), gpair.DeviceSpan(),
maker.row_partitioner->GetRows(0), maker.hist.GetNodeHistogram(0),
*maker.quantiser, !use_shared_memory_histograms);
@@ -215,7 +215,7 @@ void TestHistogramIndexImpl() {
// histogram index
const auto &maker = hist_maker.maker;
auto grad = GenerateRandomGradients(kNRows);
grad.SetDevice(0);
grad.SetDevice(DeviceOrd::CUDA(0));
maker->Reset(&grad, hist_maker_dmat.get(), kNCols);
std::vector<common::CompressedByteT> h_gidx_buffer(maker->page->gidx_buffer.HostVector());
@@ -281,17 +281,17 @@ TEST(GpuHist, UniformSampling) {
// Create an in-memory DMatrix.
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
linalg::Matrix<GradientPair> gpair({kRows}, Context{}.MakeCUDA().Ordinal());
linalg::Matrix<GradientPair> gpair({kRows}, Context{}.MakeCUDA().Device());
gpair.Data()->Copy(GenerateRandomGradients(kRows));
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
Context ctx(MakeCUDACtx(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
kRows);
@@ -312,18 +312,18 @@ TEST(GpuHist, GradientBasedSampling) {
// Create an in-memory DMatrix.
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
linalg::Matrix<GradientPair> gpair({kRows}, MakeCUDACtx(0).Ordinal());
linalg::Matrix<GradientPair> gpair({kRows}, MakeCUDACtx(0).Device());
gpair.Data()->Copy(GenerateRandomGradients(kRows));
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
Context ctx(MakeCUDACtx(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
"gradient_based", kRows);
@@ -350,16 +350,16 @@ TEST(GpuHist, ExternalMemory) {
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrix(kRows, kCols, 1, tmpdir.path + "/cache"));
Context ctx(MakeCUDACtx(0));
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
gpair.Data()->Copy(GenerateRandomGradients(kRows));
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
// Build another tree using multiple ELLPACK pages.
RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
// Make sure the predictions are the same.
@@ -388,20 +388,20 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
Context ctx(MakeCUDACtx(0));
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
gpair.Data()->Copy(GenerateRandomGradients(kRows));
// Build a tree using the in-memory DMatrix.
auto rng = common::GlobalRandom();
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows);
// Build another tree using multiple ELLPACK pages.
common::GlobalRandom() = rng;
RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, DeviceOrd::CUDA(0));
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
kSamplingMethod, kRows);
@@ -445,7 +445,7 @@ TEST(GpuHist, MaxDepth) {
}
namespace {
RegTree GetUpdatedTree(Context const* ctx, DMatrix* dmat) {
RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
ObjInfo task{ObjInfo::kRegression};
GPUHistMaker hist_maker{ctx, &task};
hist_maker.Configure(Args{});
@@ -453,7 +453,7 @@ RegTree GetUpdatedTree(Context const* ctx, DMatrix* dmat) {
TrainParam param;
param.UpdateAllowUnknown(Args{});
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Ordinal());
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));
std::vector<HostDeviceVector<bst_node_t>> position(1);
@@ -463,7 +463,7 @@ RegTree GetUpdatedTree(Context const* ctx, DMatrix* dmat) {
return tree;
}
void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
void VerifyHistColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
Context ctx(MakeCUDACtx(GPUIDX));
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
@@ -471,7 +471,7 @@ void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expect
auto const rank = collective::GetRank();
std::unique_ptr<DMatrix> sliced{Xy->SliceCol(world_size, rank)};
RegTree tree = GetUpdatedTree(&ctx, sliced.get());
RegTree tree = GetHistTree(&ctx, sliced.get());
Json json{Object{}};
tree.SaveModel(&json);
@@ -489,8 +489,58 @@ TEST_F(MGPUHistTest, GPUHistColumnSplit) {
Context ctx(MakeCUDACtx(0));
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
RegTree expected_tree = GetUpdatedTree(&ctx, dmat.get());
RegTree expected_tree = GetHistTree(&ctx, dmat.get());
DoTest(VerifyColumnSplit, kRows, kCols, expected_tree);
DoTest(VerifyHistColumnSplit, kRows, kCols, expected_tree);
}
namespace {
RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
ObjInfo task{ObjInfo::kRegression};
GPUGlobalApproxMaker approx_maker{ctx, &task};
approx_maker.Configure(Args{});
TrainParam param;
param.UpdateAllowUnknown(Args{});
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));
std::vector<HostDeviceVector<bst_node_t>> position(1);
RegTree tree;
approx_maker.Update(&param, &gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
{&tree});
return tree;
}
void VerifyApproxColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
Context ctx(MakeCUDACtx(GPUIDX));
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
std::unique_ptr<DMatrix> sliced{Xy->SliceCol(world_size, rank)};
RegTree tree = GetApproxTree(&ctx, sliced.get());
Json json{Object{}};
tree.SaveModel(&json);
Json expected_json{Object{}};
expected_tree.SaveModel(&expected_json);
ASSERT_EQ(json, expected_json);
}
} // anonymous namespace
class MGPUApproxTest : public BaseMGPUTest {};
TEST_F(MGPUApproxTest, GPUApproxColumnSplit) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
Context ctx(MakeCUDACtx(0));
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
RegTree expected_tree = GetApproxTree(&ctx, dmat.get());
DoTest(VerifyApproxColumnSplit, kRows, kCols, expected_tree);
}
} // namespace xgboost::tree

View File

@@ -28,7 +28,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
auto p_dmat = GenerateDMatrix(kRows, kCols);
Context ctx;
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
gpair.Data()->Copy(GenerateRandomGradients(kRows));
ObjInfo task{ObjInfo::kRegression};
@@ -74,7 +74,7 @@ void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
RegTree const& expected_tree) {
Context ctx;
auto p_dmat = GenerateDMatrix(rows, cols, categorical);
linalg::Matrix<GradientPair> gpair({rows}, ctx.Ordinal());
linalg::Matrix<GradientPair> gpair({rows}, ctx.Device());
gpair.Data()->Copy(GenerateRandomGradients(rows));
@@ -107,7 +107,7 @@ void TestColumnSplit(bool categorical) {
{
Context ctx;
auto p_dmat = GenerateDMatrix(kRows, kCols, categorical);
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
gpair.Data()->Copy(GenerateRandomGradients(kRows));
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);

View File

@@ -12,9 +12,9 @@ TEST(MultiTargetTree, JsonIO) {
bst_feature_t n_features{4};
RegTree tree{n_targets, n_features};
ASSERT_TRUE(tree.IsMultiTarget());
linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, Context::kCpuId};
linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, Context::kCpuId};
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, DeviceOrd::CPU()};
linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, DeviceOrd::CPU()};
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, DeviceOrd::CPU()};
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
left_weight.HostView(), right_weight.HostView());
ASSERT_EQ(tree.NumNodes(), 3);

View File

@@ -33,7 +33,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
ObjInfo task{ObjInfo::kRegression};
param.Init(Args{});
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(Context::kCpuId));
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
up->Configure(Args{});
RegTree tree{1u, kCols};
@@ -78,7 +78,7 @@ class UpdaterEtaTest : public ::testing::Test {
void RunTest(std::string updater) {
ObjInfo task{ObjInfo::kClassification};
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(Context::kCpuId));
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));
float eta = 0.4;
auto up_0 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};