enable ROCm on latest XGBoost
This commit is contained in:
@@ -1,17 +1,17 @@
|
||||
if (USE_DMLC_GTEST)
|
||||
if (NOT TARGET gtest)
|
||||
if(USE_DMLC_GTEST)
|
||||
if(NOT TARGET gtest)
|
||||
message(FATAL_ERROR "USE_DMLC_GTEST=ON but dmlc-core didn't bundle gtest")
|
||||
endif (NOT TARGET gtest)
|
||||
endif()
|
||||
set(GTEST_LIBRARIES gtest)
|
||||
else (USE_DMLC_GTEST)
|
||||
else()
|
||||
find_package(GTest REQUIRED)
|
||||
endif (USE_DMLC_GTEST)
|
||||
endif()
|
||||
file(GLOB_RECURSE TEST_SOURCES "*.cc")
|
||||
|
||||
if (USE_CUDA)
|
||||
if(USE_CUDA)
|
||||
file(GLOB_RECURSE CUDA_TEST_SOURCES "*.cu")
|
||||
list(APPEND TEST_SOURCES ${CUDA_TEST_SOURCES})
|
||||
endif (USE_CUDA)
|
||||
endif()
|
||||
|
||||
if (USE_HIP)
|
||||
file(GLOB_RECURSE HIP_TEST_SOURCES "*.hip")
|
||||
@@ -19,24 +19,24 @@ if (USE_HIP)
|
||||
endif (USE_HIP)
|
||||
|
||||
file(GLOB_RECURSE ONEAPI_TEST_SOURCES "plugin/*_oneapi.cc")
|
||||
if (NOT PLUGIN_UPDATER_ONEAPI)
|
||||
if(NOT PLUGIN_UPDATER_ONEAPI)
|
||||
list(REMOVE_ITEM TEST_SOURCES ${ONEAPI_TEST_SOURCES})
|
||||
endif (NOT PLUGIN_UPDATER_ONEAPI)
|
||||
endif()
|
||||
|
||||
if (PLUGIN_FEDERATED)
|
||||
if(PLUGIN_FEDERATED)
|
||||
target_include_directories(testxgboost PRIVATE ${xgboost_SOURCE_DIR}/plugin/federated)
|
||||
target_link_libraries(testxgboost PRIVATE federated_client)
|
||||
else (PLUGIN_FEDERATED)
|
||||
else()
|
||||
file(GLOB_RECURSE FEDERATED_TEST_SOURCES "plugin/*_federated_*.*")
|
||||
list(REMOVE_ITEM TEST_SOURCES ${FEDERATED_TEST_SOURCES})
|
||||
endif (PLUGIN_FEDERATED)
|
||||
endif()
|
||||
|
||||
target_sources(testxgboost PRIVATE ${TEST_SOURCES} ${xgboost_SOURCE_DIR}/plugin/example/custom_obj.cc)
|
||||
|
||||
if (USE_CUDA AND PLUGIN_RMM)
|
||||
if(USE_CUDA AND PLUGIN_RMM)
|
||||
find_package(CUDA)
|
||||
target_include_directories(testxgboost PRIVATE ${CUDA_INCLUDE_DIRS})
|
||||
endif (USE_CUDA AND PLUGIN_RMM)
|
||||
endif()
|
||||
|
||||
if (USE_HIP AND PLUGIN_RMM)
|
||||
find_package(HIP)
|
||||
|
||||
@@ -108,6 +108,7 @@ TEST(CAPI, XGDMatrixCreateFromCSR) {
|
||||
Json::Dump(data_arr, &sdata);
|
||||
Json config{Object{}};
|
||||
config["missing"] = Number{std::numeric_limits<float>::quiet_NaN()};
|
||||
config["data_split_mode"] = Integer{static_cast<int64_t>(DataSplitMode::kCol)};
|
||||
Json::Dump(config, &sconfig);
|
||||
|
||||
DMatrixHandle handle;
|
||||
@@ -120,6 +121,8 @@ TEST(CAPI, XGDMatrixCreateFromCSR) {
|
||||
ASSERT_EQ(n, 3);
|
||||
ASSERT_EQ(XGDMatrixNumNonMissing(handle, &n), 0);
|
||||
ASSERT_EQ(n, 3);
|
||||
ASSERT_EQ(XGDMatrixDataSplitMode(handle, &n), 0);
|
||||
ASSERT_EQ(n, static_cast<int64_t>(DataSplitMode::kCol));
|
||||
|
||||
std::shared_ptr<xgboost::DMatrix> *pp_fmat =
|
||||
static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
|
||||
|
||||
41
tests/cpp/collective/net_test.h
Normal file
41
tests/cpp/collective/net_test.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Copyright 2022-2023, XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/collective/socket.h>
|
||||
|
||||
#include <fstream> // ifstream
|
||||
|
||||
#include "../helpers.h" // for FileExists
|
||||
|
||||
namespace xgboost::collective {
|
||||
class SocketTest : public ::testing::Test {
|
||||
protected:
|
||||
std::string skip_msg_{"Skipping IPv6 test"};
|
||||
|
||||
bool SkipTest() {
|
||||
std::string path{"/sys/module/ipv6/parameters/disable"};
|
||||
if (FileExists(path)) {
|
||||
std::ifstream fin(path);
|
||||
if (!fin) {
|
||||
return true;
|
||||
}
|
||||
std::string s_value;
|
||||
fin >> s_value;
|
||||
auto value = std::stoi(s_value);
|
||||
if (value != 0) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override { system::SocketStartup(); }
|
||||
void TearDown() override { system::SocketFinalize(); }
|
||||
};
|
||||
} // namespace xgboost::collective
|
||||
117
tests/cpp/collective/test_allgather.cc
Normal file
117
tests/cpp/collective/test_allgather.cc
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h> // for ASSERT_EQ
|
||||
#include <xgboost/span.h> // for Span, oper...
|
||||
|
||||
#include <algorithm> // for min
|
||||
#include <chrono> // for seconds
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t
|
||||
#include <numeric> // for iota
|
||||
#include <string> // for string
|
||||
#include <thread> // for thread
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/collective/allgather.h" // for RingAllgather
|
||||
#include "../../../src/collective/comm.h" // for RabitComm
|
||||
#include "gtest/gtest.h" // for AssertionR...
|
||||
#include "test_worker.h" // for TestDistri...
|
||||
#include "xgboost/collective/result.h" // for Result
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
class AllgatherTest : public TrackerTest {};
|
||||
|
||||
class Worker : public WorkerForTest {
|
||||
public:
|
||||
using WorkerForTest::WorkerForTest;
|
||||
|
||||
void Run() {
|
||||
{
|
||||
// basic test
|
||||
std::vector<std::int32_t> data(comm_.World(), 0);
|
||||
data[comm_.Rank()] = comm_.Rank();
|
||||
|
||||
auto rc = RingAllgather(this->comm_, common::Span{data.data(), data.size()}, 1);
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
|
||||
for (std::int32_t r = 0; r < comm_.World(); ++r) {
|
||||
ASSERT_EQ(data[r], r);
|
||||
}
|
||||
}
|
||||
{
|
||||
// test for limited socket buffer
|
||||
this->LimitSockBuf(4096);
|
||||
|
||||
std::size_t n = 8192; // n_bytes = 8192 * sizeof(int)
|
||||
std::vector<std::int32_t> data(comm_.World() * n, 0);
|
||||
auto s_data = common::Span{data.data(), data.size()};
|
||||
auto seg = s_data.subspan(comm_.Rank() * n, n);
|
||||
std::iota(seg.begin(), seg.end(), comm_.Rank());
|
||||
|
||||
auto rc = RingAllgather(comm_, common::Span{data.data(), data.size()}, n);
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
|
||||
for (std::int32_t r = 0; r < comm_.World(); ++r) {
|
||||
auto seg = s_data.subspan(r * n, n);
|
||||
for (std::int32_t i = 0; i < static_cast<std::int32_t>(seg.size()); ++i) {
|
||||
auto v = seg[i];
|
||||
ASSERT_EQ(v, r + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TestV() {
|
||||
{
|
||||
// basic test
|
||||
std::int32_t n{comm_.Rank()};
|
||||
std::vector<std::int32_t> result;
|
||||
auto rc = RingAllgatherV(comm_, common::Span{&n, 1}, &result);
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
for (std::int32_t i = 0; i < comm_.World(); ++i) {
|
||||
ASSERT_EQ(result[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// V test
|
||||
std::vector<std::int32_t> data(comm_.Rank() + 1, comm_.Rank());
|
||||
std::vector<std::int32_t> result;
|
||||
auto rc = RingAllgatherV(comm_, common::Span{data.data(), data.size()}, &result);
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
ASSERT_EQ(result.size(), (1 + comm_.World()) * comm_.World() / 2);
|
||||
std::int32_t k{0};
|
||||
for (std::int32_t r = 0; r < comm_.World(); ++r) {
|
||||
auto seg = common::Span{result.data(), result.size()}.subspan(k, (r + 1));
|
||||
if (comm_.Rank() == 0) {
|
||||
for (auto v : seg) {
|
||||
ASSERT_EQ(v, r);
|
||||
}
|
||||
k += seg.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
TEST_F(AllgatherTest, Basic) {
|
||||
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
Worker worker{host, port, timeout, n_workers, r};
|
||||
worker.Run();
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(AllgatherTest, V) {
|
||||
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
Worker worker{host, port, timeout, n_workers, r};
|
||||
worker.TestV();
|
||||
});
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
72
tests/cpp/collective/test_allreduce.cc
Normal file
72
tests/cpp/collective/test_allreduce.cc
Normal file
@@ -0,0 +1,72 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/collective/allreduce.h"
|
||||
#include "../../../src/collective/tracker.h"
|
||||
#include "test_worker.h" // for WorkerForTest, TestDistributed
|
||||
|
||||
namespace xgboost::collective {
|
||||
|
||||
namespace {
|
||||
class AllreduceWorker : public WorkerForTest {
|
||||
public:
|
||||
using WorkerForTest::WorkerForTest;
|
||||
|
||||
void Basic() {
|
||||
{
|
||||
std::vector<double> data(13, 0.0);
|
||||
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
|
||||
for (std::size_t i = 0; i < rhs.size(); ++i) {
|
||||
rhs[i] += lhs[i];
|
||||
}
|
||||
});
|
||||
ASSERT_EQ(std::accumulate(data.cbegin(), data.cend(), 0.0), 0.0);
|
||||
}
|
||||
{
|
||||
std::vector<double> data(1, 1.0);
|
||||
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
|
||||
for (std::size_t i = 0; i < rhs.size(); ++i) {
|
||||
rhs[i] += lhs[i];
|
||||
}
|
||||
});
|
||||
ASSERT_EQ(data[0], static_cast<double>(comm_.World()));
|
||||
}
|
||||
}
|
||||
|
||||
void Acc() {
|
||||
std::vector<double> data(314, 1.5);
|
||||
Allreduce(comm_, common::Span{data.data(), data.size()}, [](auto lhs, auto rhs) {
|
||||
for (std::size_t i = 0; i < rhs.size(); ++i) {
|
||||
rhs[i] += lhs[i];
|
||||
}
|
||||
});
|
||||
for (std::size_t i = 0; i < data.size(); ++i) {
|
||||
auto v = data[i];
|
||||
ASSERT_EQ(v, 1.5 * static_cast<double>(comm_.World())) << i;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class AllreduceTest : public SocketTest {};
|
||||
} // namespace
|
||||
|
||||
TEST_F(AllreduceTest, Basic) {
|
||||
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
AllreduceWorker worker{host, port, timeout, n_workers, r};
|
||||
worker.Basic();
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(AllreduceTest, Sum) {
|
||||
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
AllreduceWorker worker{host, port, timeout, n_workers, r};
|
||||
worker.Acc();
|
||||
});
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
51
tests/cpp/collective/test_broadcast.cc
Normal file
51
tests/cpp/collective/test_broadcast.cc
Normal file
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/collective/socket.h>
|
||||
|
||||
#include <cstdint> // for int32_t
|
||||
#include <string> // for string
|
||||
#include <thread> // for thread
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/collective/broadcast.h" // for Broadcast
|
||||
#include "../../../src/collective/tracker.h" // for GetHostAddress
|
||||
#include "test_worker.h" // for WorkerForTest, TestDistributed
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
class Worker : public WorkerForTest {
|
||||
public:
|
||||
using WorkerForTest::WorkerForTest;
|
||||
|
||||
void Run() {
|
||||
for (std::int32_t r = 0; r < comm_.World(); ++r) {
|
||||
// basic test
|
||||
std::vector<std::int32_t> data(1, comm_.Rank());
|
||||
auto rc = Broadcast(this->comm_, common::Span{data.data(), data.size()}, r);
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
ASSERT_EQ(data[0], r);
|
||||
}
|
||||
|
||||
for (std::int32_t r = 0; r < comm_.World(); ++r) {
|
||||
std::vector<std::int32_t> data(1 << 16, comm_.Rank());
|
||||
auto rc = Broadcast(this->comm_, common::Span{data.data(), data.size()}, r);
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
ASSERT_EQ(data[0], r);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class BroadcastTest : public SocketTest {};
|
||||
} // namespace
|
||||
|
||||
TEST_F(BroadcastTest, Basic) {
|
||||
std::int32_t n_workers = std::min(7u, std::thread::hardware_concurrency());
|
||||
TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t r) {
|
||||
Worker worker{host, port, timeout, n_workers, r};
|
||||
worker.Run();
|
||||
});
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
47
tests/cpp/collective/test_comm.cc
Normal file
47
tests/cpp/collective/test_comm.cc
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/collective/comm.h"
|
||||
#include "test_worker.h"
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
class CommTest : public TrackerTest {};
|
||||
} // namespace
|
||||
|
||||
TEST_F(CommTest, Channel) {
|
||||
auto n_workers = 4;
|
||||
RabitTracker tracker{host, n_workers, 0, timeout};
|
||||
auto fut = tracker.Run();
|
||||
|
||||
std::vector<std::thread> workers;
|
||||
std::int32_t port = tracker.Port();
|
||||
|
||||
for (std::int32_t i = 0; i < n_workers; ++i) {
|
||||
workers.emplace_back([=] {
|
||||
WorkerForTest worker{host, port, timeout, n_workers, i};
|
||||
if (i % 2 == 0) {
|
||||
auto p_chan = worker.Comm().Chan(i + 1);
|
||||
p_chan->SendAll(
|
||||
EraseType(common::Span<std::int32_t const>{&i, static_cast<std::size_t>(1)}));
|
||||
auto rc = p_chan->Block();
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
} else {
|
||||
auto p_chan = worker.Comm().Chan(i - 1);
|
||||
std::int32_t r{-1};
|
||||
p_chan->RecvAll(EraseType(common::Span<std::int32_t>{&r, static_cast<std::size_t>(1)}));
|
||||
auto rc = p_chan->Block();
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
ASSERT_EQ(r, i - 1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &w : workers) {
|
||||
w.join();
|
||||
}
|
||||
|
||||
ASSERT_TRUE(fut.get().OK());
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
@@ -29,6 +29,11 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
VerifyAllgather(comm, rank);
|
||||
}
|
||||
|
||||
static void AllgatherV(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyAllgatherV(comm, rank);
|
||||
}
|
||||
|
||||
static void AllreduceMax(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyAllreduceMax(comm, rank);
|
||||
@@ -80,14 +85,19 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
|
||||
protected:
|
||||
static void VerifyAllgather(InMemoryCommunicator &comm, int rank) {
|
||||
char buffer[kWorldSize] = {'a', 'b', 'c'};
|
||||
buffer[rank] = '0' + rank;
|
||||
comm.AllGather(buffer, kWorldSize);
|
||||
std::string input{static_cast<char>('0' + rank)};
|
||||
auto output = comm.AllGather(input);
|
||||
for (auto i = 0; i < kWorldSize; i++) {
|
||||
EXPECT_EQ(buffer[i], '0' + i);
|
||||
EXPECT_EQ(output[i], static_cast<char>('0' + i));
|
||||
}
|
||||
}
|
||||
|
||||
static void VerifyAllgatherV(InMemoryCommunicator &comm, int rank) {
|
||||
std::vector<std::string_view> inputs{"a", "bb", "ccc"};
|
||||
auto output = comm.AllGatherV(inputs[rank]);
|
||||
EXPECT_EQ(output, "abbccc");
|
||||
}
|
||||
|
||||
static void VerifyAllreduceMax(InMemoryCommunicator &comm, int rank) {
|
||||
int buffer[] = {1 + rank, 2 + rank, 3 + rank, 4 + rank, 5 + rank};
|
||||
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kMax);
|
||||
@@ -205,6 +215,8 @@ TEST(InMemoryCommunicatorSimpleTest, IsDistributed) {
|
||||
|
||||
TEST_F(InMemoryCommunicatorTest, Allgather) { Verify(&Allgather); }
|
||||
|
||||
TEST_F(InMemoryCommunicatorTest, AllgatherV) { Verify(&AllgatherV); }
|
||||
|
||||
TEST_F(InMemoryCommunicatorTest, AllreduceMax) { Verify(&AllreduceMax); }
|
||||
|
||||
TEST_F(InMemoryCommunicatorTest, AllreduceMin) { Verify(&AllreduceMin); }
|
||||
|
||||
81
tests/cpp/collective/test_loop.cc
Normal file
81
tests/cpp/collective/test_loop.cc
Normal file
@@ -0,0 +1,81 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h> // for ASSERT_TRUE, ASSERT_EQ
|
||||
#include <xgboost/collective/socket.h> // for TCPSocket, Connect, SocketFinalize, SocketStartup
|
||||
#include <xgboost/string_view.h> // for StringView
|
||||
|
||||
#include <chrono> // for seconds
|
||||
#include <cstdint> // for int8_t
|
||||
#include <memory> // for make_shared, shared_ptr
|
||||
#include <system_error> // for make_error_code, errc
|
||||
#include <utility> // for pair
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/collective/loop.h" // for Loop
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
class LoopTest : public ::testing::Test {
|
||||
protected:
|
||||
std::pair<TCPSocket, TCPSocket> pair_;
|
||||
std::shared_ptr<Loop> loop_;
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
system::SocketStartup();
|
||||
std::chrono::seconds timeout{1};
|
||||
|
||||
auto domain = SockDomain::kV4;
|
||||
pair_.first = TCPSocket::Create(domain);
|
||||
auto port = pair_.first.BindHost();
|
||||
pair_.first.Listen();
|
||||
|
||||
auto const& addr = SockAddrV4::Loopback().Addr();
|
||||
auto rc = Connect(StringView{addr}, port, 1, timeout, &pair_.second);
|
||||
ASSERT_TRUE(rc.OK());
|
||||
rc = pair_.second.NonBlocking(true);
|
||||
ASSERT_TRUE(rc.OK());
|
||||
|
||||
pair_.first = pair_.first.Accept();
|
||||
rc = pair_.first.NonBlocking(true);
|
||||
ASSERT_TRUE(rc.OK());
|
||||
|
||||
loop_ = std::make_shared<Loop>(timeout);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
pair_ = decltype(pair_){};
|
||||
system::SocketFinalize();
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
TEST_F(LoopTest, Timeout) {
|
||||
std::vector<std::int8_t> data(1);
|
||||
Loop::Op op{Loop::Op::kRead, 0, data.data(), data.size(), &pair_.second, 0};
|
||||
loop_->Submit(op);
|
||||
auto rc = loop_->Block();
|
||||
ASSERT_FALSE(rc.OK());
|
||||
ASSERT_EQ(rc.Code(), std::make_error_code(std::errc::timed_out)) << rc.Report();
|
||||
}
|
||||
|
||||
TEST_F(LoopTest, Op) {
|
||||
TCPSocket& send = pair_.first;
|
||||
TCPSocket& recv = pair_.second;
|
||||
|
||||
std::vector<std::int8_t> wbuf(1, 1);
|
||||
std::vector<std::int8_t> rbuf(1, 0);
|
||||
|
||||
Loop::Op wop{Loop::Op::kWrite, 0, wbuf.data(), wbuf.size(), &send, 0};
|
||||
Loop::Op rop{Loop::Op::kRead, 0, rbuf.data(), rbuf.size(), &recv, 0};
|
||||
|
||||
loop_->Submit(wop);
|
||||
loop_->Submit(rop);
|
||||
|
||||
auto rc = loop_->Block();
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
|
||||
ASSERT_EQ(rbuf[0], wbuf[0]);
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
@@ -38,7 +38,7 @@ void VerifyAllReduceBitwiseAND() {
|
||||
auto const rank = collective::GetRank();
|
||||
std::bitset<64> original{};
|
||||
original[rank] = true;
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
|
||||
collective::AllReduce<collective::Operation::kBitwiseAND>(rank, buffer.DevicePointer(), 1);
|
||||
collective::Synchronize(rank);
|
||||
EXPECT_EQ(buffer.HostVector()[0], 0ULL);
|
||||
@@ -60,7 +60,7 @@ void VerifyAllReduceBitwiseOR() {
|
||||
auto const rank = collective::GetRank();
|
||||
std::bitset<64> original{};
|
||||
original[rank] = true;
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
|
||||
collective::AllReduce<collective::Operation::kBitwiseOR>(rank, buffer.DevicePointer(), 1);
|
||||
collective::Synchronize(rank);
|
||||
EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);
|
||||
@@ -82,7 +82,7 @@ void VerifyAllReduceBitwiseXOR() {
|
||||
auto const rank = collective::GetRank();
|
||||
std::bitset<64> original{~0ULL};
|
||||
original[rank] = false;
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, rank);
|
||||
HostDeviceVector<uint64_t> buffer({original.to_ullong()}, DeviceOrd::CUDA(rank));
|
||||
collective::AllReduce<collective::Operation::kBitwiseXOR>(rank, buffer.DevicePointer(), 1);
|
||||
collective::Synchronize(rank);
|
||||
EXPECT_EQ(buffer.HostVector()[0], (1ULL << world_size) - 1);
|
||||
|
||||
@@ -1,19 +1,16 @@
|
||||
/**
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
* Copyright 2022-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/collective/socket.h>
|
||||
|
||||
#include <cerrno> // EADDRNOTAVAIL
|
||||
#include <fstream> // ifstream
|
||||
#include <system_error> // std::error_code, std::system_category
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "test_worker.h" // for SocketTest
|
||||
|
||||
namespace xgboost::collective {
|
||||
TEST(Socket, Basic) {
|
||||
system::SocketStartup();
|
||||
|
||||
TEST_F(SocketTest, Basic) {
|
||||
SockAddress addr{SockAddrV6::Loopback()};
|
||||
ASSERT_TRUE(addr.IsV6());
|
||||
addr = SockAddress{SockAddrV4::Loopback()};
|
||||
@@ -54,23 +51,27 @@ TEST(Socket, Basic) {
|
||||
|
||||
run_test(SockDomain::kV4);
|
||||
|
||||
std::string path{"/sys/module/ipv6/parameters/disable"};
|
||||
if (FileExists(path)) {
|
||||
std::ifstream fin(path);
|
||||
if (!fin) {
|
||||
GTEST_SKIP_(msg.c_str());
|
||||
}
|
||||
std::string s_value;
|
||||
fin >> s_value;
|
||||
auto value = std::stoi(s_value);
|
||||
if (value != 0) {
|
||||
GTEST_SKIP_(msg.c_str());
|
||||
}
|
||||
} else {
|
||||
GTEST_SKIP_(msg.c_str());
|
||||
if (SkipTest()) {
|
||||
GTEST_SKIP_(skip_msg_.c_str());
|
||||
}
|
||||
run_test(SockDomain::kV6);
|
||||
}
|
||||
|
||||
system::SocketFinalize();
|
||||
TEST_F(SocketTest, Bind) {
|
||||
auto run = [](SockDomain domain) {
|
||||
auto any =
|
||||
domain == SockDomain::kV4 ? SockAddrV4::InaddrAny().Addr() : SockAddrV6::InaddrAny().Addr();
|
||||
auto sock = TCPSocket::Create(domain);
|
||||
std::int32_t port{0};
|
||||
auto rc = sock.Bind(any, &port);
|
||||
ASSERT_TRUE(rc.OK());
|
||||
ASSERT_NE(port, 0);
|
||||
};
|
||||
|
||||
run(SockDomain::kV4);
|
||||
if (SkipTest()) {
|
||||
GTEST_SKIP_(skip_msg_.c_str());
|
||||
}
|
||||
run(SockDomain::kV6);
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
|
||||
67
tests/cpp/collective/test_tracker.cc
Normal file
67
tests/cpp/collective/test_tracker.cc
Normal file
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <chrono> // for seconds
|
||||
#include <cstdint> // for int32_t
|
||||
#include <string> // for string
|
||||
#include <thread> // for thread
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/collective/comm.h"
|
||||
#include "test_worker.h"
|
||||
|
||||
namespace xgboost::collective {
|
||||
namespace {
|
||||
class PrintWorker : public WorkerForTest {
|
||||
public:
|
||||
using WorkerForTest::WorkerForTest;
|
||||
|
||||
void Print() {
|
||||
auto rc = comm_.LogTracker("ack:" + std::to_string(this->comm_.Rank()));
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
TEST_F(TrackerTest, Bootstrap) {
|
||||
RabitTracker tracker{host, n_workers, 0, timeout};
|
||||
auto fut = tracker.Run();
|
||||
|
||||
std::vector<std::thread> workers;
|
||||
std::int32_t port = tracker.Port();
|
||||
|
||||
for (std::int32_t i = 0; i < n_workers; ++i) {
|
||||
workers.emplace_back([=] { WorkerForTest worker{host, port, timeout, n_workers, i}; });
|
||||
}
|
||||
for (auto &w : workers) {
|
||||
w.join();
|
||||
}
|
||||
|
||||
ASSERT_TRUE(fut.get().OK());
|
||||
}
|
||||
|
||||
TEST_F(TrackerTest, Print) {
|
||||
RabitTracker tracker{host, n_workers, 0, timeout};
|
||||
auto fut = tracker.Run();
|
||||
|
||||
std::vector<std::thread> workers;
|
||||
std::int32_t port = tracker.Port();
|
||||
|
||||
for (std::int32_t i = 0; i < n_workers; ++i) {
|
||||
workers.emplace_back([=] {
|
||||
PrintWorker worker{host, port, timeout, n_workers, i};
|
||||
worker.Print();
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &w : workers) {
|
||||
w.join();
|
||||
}
|
||||
|
||||
ASSERT_TRUE(fut.get().OK());
|
||||
}
|
||||
|
||||
TEST_F(TrackerTest, GetHostAddress) { ASSERT_TRUE(host.find("127.") == std::string::npos); }
|
||||
} // namespace xgboost::collective
|
||||
114
tests/cpp/collective/test_worker.h
Normal file
114
tests/cpp/collective/test_worker.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <chrono> // for seconds
|
||||
#include <cstdint> // for int32_t
|
||||
#include <string> // for string
|
||||
#include <thread> // for thread
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../../src/collective/comm.h"
|
||||
#include "../../../src/collective/tracker.h" // for GetHostAddress
|
||||
#include "../helpers.h" // for FileExists
|
||||
|
||||
namespace xgboost::collective {
|
||||
class WorkerForTest {
|
||||
std::string tracker_host_;
|
||||
std::int32_t tracker_port_;
|
||||
std::int32_t world_size_;
|
||||
|
||||
protected:
|
||||
std::int32_t retry_{1};
|
||||
std::string task_id_;
|
||||
RabitComm comm_;
|
||||
|
||||
public:
|
||||
WorkerForTest(std::string host, std::int32_t port, std::chrono::seconds timeout,
|
||||
std::int32_t world, std::int32_t rank)
|
||||
: tracker_host_{std::move(host)},
|
||||
tracker_port_{port},
|
||||
world_size_{world},
|
||||
task_id_{"t:" + std::to_string(rank)},
|
||||
comm_{tracker_host_, tracker_port_, timeout, retry_, task_id_} {
|
||||
CHECK_EQ(world_size_, comm_.World());
|
||||
}
|
||||
virtual ~WorkerForTest() = default;
|
||||
auto& Comm() { return comm_; }
|
||||
|
||||
void LimitSockBuf(std::int32_t n_bytes) {
|
||||
for (std::int32_t i = 0; i < comm_.World(); ++i) {
|
||||
if (i != comm_.Rank()) {
|
||||
ASSERT_TRUE(comm_.Chan(i)->Socket()->NonBlocking());
|
||||
ASSERT_TRUE(comm_.Chan(i)->Socket()->SetBufSize(n_bytes).OK());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class SocketTest : public ::testing::Test {
|
||||
protected:
|
||||
std::string skip_msg_{"Skipping IPv6 test"};
|
||||
|
||||
bool SkipTest() {
|
||||
std::string path{"/sys/module/ipv6/parameters/disable"};
|
||||
if (FileExists(path)) {
|
||||
std::ifstream fin(path);
|
||||
if (!fin) {
|
||||
return true;
|
||||
}
|
||||
std::string s_value;
|
||||
fin >> s_value;
|
||||
auto value = std::stoi(s_value);
|
||||
if (value != 0) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override { system::SocketStartup(); }
|
||||
void TearDown() override { system::SocketFinalize(); }
|
||||
};
|
||||
|
||||
class TrackerTest : public SocketTest {
|
||||
public:
|
||||
std::int32_t n_workers{2};
|
||||
std::chrono::seconds timeout{1};
|
||||
std::string host;
|
||||
|
||||
void SetUp() override {
|
||||
SocketTest::SetUp();
|
||||
auto rc = GetHostAddress(&host);
|
||||
ASSERT_TRUE(rc.OK()) << rc.Report();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename WorkerFn>
|
||||
void TestDistributed(std::int32_t n_workers, WorkerFn worker_fn) {
|
||||
std::chrono::seconds timeout{1};
|
||||
|
||||
std::string host;
|
||||
ASSERT_TRUE(GetHostAddress(&host).OK());
|
||||
RabitTracker tracker{StringView{host}, n_workers, 0, timeout};
|
||||
auto fut = tracker.Run();
|
||||
|
||||
std::vector<std::thread> workers;
|
||||
std::int32_t port = tracker.Port();
|
||||
|
||||
for (std::int32_t i = 0; i < n_workers; ++i) {
|
||||
workers.emplace_back([=] { worker_fn(host, port, timeout, i); });
|
||||
}
|
||||
|
||||
for (auto& t : workers) {
|
||||
t.join();
|
||||
}
|
||||
|
||||
ASSERT_TRUE(fut.get().OK());
|
||||
}
|
||||
} // namespace xgboost::collective
|
||||
@@ -147,7 +147,7 @@ TEST(CutsBuilder, SearchGroupInd) {
|
||||
|
||||
EXPECT_ANY_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17));
|
||||
|
||||
p_mat->Info().Validate(-1);
|
||||
p_mat->Info().Validate(DeviceOrd::CPU());
|
||||
EXPECT_THROW(HostSketchContainer::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17),
|
||||
dmlc::Error);
|
||||
|
||||
@@ -330,7 +330,7 @@ TEST(HistUtil, IndexBinData) {
|
||||
void TestSketchFromWeights(bool with_group) {
|
||||
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(DeviceOrd::CUDA(0)).GenerateDMatrix();
|
||||
Context ctx;
|
||||
common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);
|
||||
|
||||
|
||||
@@ -222,7 +222,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
ASSERT_EQ(info.feature_types.Size(), n_features);
|
||||
|
||||
HostDeviceVector<bst_row_t> cuts_ptr{0, n_samples, n_samples * 2, n_samples * 3};
|
||||
cuts_ptr.SetDevice(0);
|
||||
cuts_ptr.SetDevice(DeviceOrd::CUDA(0));
|
||||
|
||||
dh::device_vector<float> weight(n_samples * n_features, 0);
|
||||
dh::Iota(dh::ToSpan(weight));
|
||||
@@ -235,7 +235,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
thrust::sort_by_key(sorted_entries.begin(), sorted_entries.end(), weight.begin(),
|
||||
detail::EntryCompareOp());
|
||||
|
||||
detail::RemoveDuplicatedCategories(ctx.gpu_id, info, cuts_ptr.DeviceSpan(), &sorted_entries,
|
||||
detail::RemoveDuplicatedCategories(ctx.Device(), info, cuts_ptr.DeviceSpan(), &sorted_entries,
|
||||
&weight, &columns_ptr);
|
||||
|
||||
auto const& h_cptr = cuts_ptr.ConstHostVector();
|
||||
@@ -377,7 +377,8 @@ template <typename Adapter>
|
||||
auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing, size_t batch_size = 0) {
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(), 0);
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
|
||||
DeviceOrd::CUDA(0));
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
|
||||
@@ -444,7 +445,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
HistogramCuts cuts;
|
||||
@@ -472,7 +473,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, 0);
|
||||
SketchContainer sketch_container(ft, num_bins, num_columns, num_rows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
@@ -507,7 +508,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
}
|
||||
|
||||
ASSERT_EQ(info.feature_types.Size(), 1);
|
||||
SketchContainer container(info.feature_types, num_bins, 1, n, 0);
|
||||
SketchContainer container(info.feature_types, num_bins, 1, n, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &container);
|
||||
HistogramCuts cuts;
|
||||
@@ -580,11 +581,7 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
|
||||
namespace {
|
||||
auto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(ctx->gpu_id));
|
||||
#endif
|
||||
dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
|
||||
auto n = n_samples * n_features;
|
||||
std::vector<float> x;
|
||||
x.resize(n);
|
||||
@@ -624,21 +621,21 @@ void TestGetColumnSize(std::size_t n_samples) {
|
||||
std::vector<std::size_t> h_column_size_1(column_sizes_scan.size());
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, true>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin());
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, false>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, true>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, false>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
ctx.Device(), IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
}
|
||||
@@ -715,9 +712,9 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
|
||||
size_t constexpr kGroups = 10;
|
||||
HostDeviceVector<float> storage;
|
||||
std::string m =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
|
||||
&storage);
|
||||
std::string m = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(DeviceOrd::CUDA(0))
|
||||
.GenerateArrayInterface(&storage);
|
||||
MetaInfo info;
|
||||
Context ctx;
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
@@ -736,14 +733,14 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
info.SetInfo(ctx, "group", groups.data(), DataType::kUInt32, kGroups);
|
||||
}
|
||||
|
||||
info.weights_.SetDevice(0);
|
||||
info.weights_.SetDevice(DeviceOrd::CUDA(0));
|
||||
info.num_row_ = kRows;
|
||||
info.num_col_ = kCols;
|
||||
|
||||
data::CupyAdapter adapter(m);
|
||||
auto const& batch = adapter.Value();
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, DeviceOrd::CUDA(0));
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
|
||||
@@ -787,7 +784,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
// https://github.com/dmlc/xgboost/issues/7946
|
||||
h_weights[i] = (i % 2 == 0 ? 1 : 2) / static_cast<float>(kGroups);
|
||||
}
|
||||
SketchContainer sketch_container(ft, kBins, kCols, kRows, 0);
|
||||
SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/*!
|
||||
* Copyright 2018 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2018-2023 XGBoost contributors
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/equal.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
@@ -13,21 +12,14 @@
|
||||
#endif
|
||||
#include <xgboost/host_device_vector.h>
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
void SetDeviceForTest(int device) {
|
||||
void SetDeviceForTest(DeviceOrd device) {
|
||||
int n_devices;
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaGetDeviceCount(&n_devices));
|
||||
device %= n_devices;
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipGetDeviceCount(&n_devices));
|
||||
device %= n_devices;
|
||||
dh::safe_cuda(hipSetDevice(device));
|
||||
#endif
|
||||
device.ordinal %= n_devices;
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -42,13 +34,13 @@ struct HostDeviceVectorSetDeviceHandler {
|
||||
}
|
||||
};
|
||||
|
||||
void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
|
||||
void InitHostDeviceVector(size_t n, DeviceOrd device, HostDeviceVector<int> *v) {
|
||||
// create the vector
|
||||
v->SetDevice(device);
|
||||
v->Resize(n);
|
||||
|
||||
ASSERT_EQ(v->Size(), n);
|
||||
ASSERT_EQ(v->DeviceIdx(), device);
|
||||
ASSERT_EQ(v->Device(), device);
|
||||
// ensure that the device have read-write access
|
||||
ASSERT_TRUE(v->DeviceCanRead());
|
||||
ASSERT_TRUE(v->DeviceCanWrite());
|
||||
@@ -68,7 +60,7 @@ void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {
|
||||
}
|
||||
|
||||
void PlusOne(HostDeviceVector<int> *v) {
|
||||
int device = v->DeviceIdx();
|
||||
auto device = v->Device();
|
||||
SetDeviceForTest(device);
|
||||
thrust::transform(dh::tcbegin(*v), dh::tcend(*v), dh::tbegin(*v),
|
||||
[=]__device__(unsigned int a){ return a + 1; });
|
||||
@@ -80,7 +72,7 @@ void CheckDevice(HostDeviceVector<int>* v,
|
||||
unsigned int first,
|
||||
GPUAccess access) {
|
||||
ASSERT_EQ(v->Size(), size);
|
||||
SetDeviceForTest(v->DeviceIdx());
|
||||
SetDeviceForTest(v->Device());
|
||||
|
||||
ASSERT_TRUE(thrust::equal(dh::tcbegin(*v), dh::tcend(*v),
|
||||
thrust::make_counting_iterator(first)));
|
||||
@@ -111,7 +103,7 @@ void CheckHost(HostDeviceVector<int> *v, GPUAccess access) {
|
||||
ASSERT_FALSE(v->DeviceCanWrite());
|
||||
}
|
||||
|
||||
void TestHostDeviceVector(size_t n, int device) {
|
||||
void TestHostDeviceVector(size_t n, DeviceOrd device) {
|
||||
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
|
||||
HostDeviceVector<int> v;
|
||||
InitHostDeviceVector(n, device, &v);
|
||||
@@ -124,13 +116,13 @@ void TestHostDeviceVector(size_t n, int device) {
|
||||
|
||||
TEST(HostDeviceVector, Basic) {
|
||||
size_t n = 1001;
|
||||
int device = 0;
|
||||
DeviceOrd device = DeviceOrd::CUDA(0);
|
||||
TestHostDeviceVector(n, device);
|
||||
}
|
||||
|
||||
TEST(HostDeviceVector, Copy) {
|
||||
size_t n = 1001;
|
||||
int device = 0;
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
HostDeviceVectorSetDeviceHandler hdvec_dev_hndlr(SetDevice);
|
||||
|
||||
HostDeviceVector<int> v;
|
||||
@@ -154,15 +146,15 @@ TEST(HostDeviceVector, SetDevice) {
|
||||
h_vec[i] = i;
|
||||
}
|
||||
HostDeviceVector<int> vec (h_vec);
|
||||
auto device = 0;
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
|
||||
vec.SetDevice(device);
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
auto span = vec.DeviceSpan(); // sync to device
|
||||
|
||||
vec.SetDevice(-1); // pull back to cpu.
|
||||
vec.SetDevice(DeviceOrd::CPU()); // pull back to cpu.
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
ASSERT_EQ(vec.DeviceIdx(), -1);
|
||||
ASSERT_EQ(vec.Device(), DeviceOrd::CPU());
|
||||
|
||||
auto h_vec_1 = vec.HostVector();
|
||||
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
|
||||
@@ -170,7 +162,7 @@ TEST(HostDeviceVector, SetDevice) {
|
||||
|
||||
TEST(HostDeviceVector, Span) {
|
||||
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
vec.SetDevice(0);
|
||||
vec.SetDevice(DeviceOrd::CUDA(0));
|
||||
auto span = vec.DeviceSpan();
|
||||
ASSERT_EQ(vec.Size(), span.size());
|
||||
ASSERT_EQ(vec.DevicePointer(), span.data());
|
||||
@@ -194,5 +186,4 @@ TEST(HostDeviceVector, Empty) {
|
||||
ASSERT_FALSE(another.Empty());
|
||||
ASSERT_TRUE(vec.Empty());
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -144,7 +144,8 @@ TEST(IO, Resource) {
|
||||
fout << 1.0 << std::endl;
|
||||
fout.close();
|
||||
|
||||
auto resource = std::make_shared<MmapResource>(path, 0, sizeof(double));
|
||||
auto resource = std::shared_ptr<MmapResource>{
|
||||
new MmapResource{path, 0, sizeof(double)}};
|
||||
ASSERT_EQ(resource->Size(), sizeof(double));
|
||||
ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);
|
||||
ASSERT_EQ(resource->DataAs<double>()[0], val);
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
/**
|
||||
* Copyright (c) 2019-2023, XGBoost Contributors
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iterator> // for back_inserter
|
||||
#include <map>
|
||||
|
||||
#include "../../../src/common/charconv.h"
|
||||
#include "../../../src/common/io.h"
|
||||
#include "../../../src/common/json_utils.h"
|
||||
#include "../../../src/common/threading_utils.h" // for ParallelFor
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
@@ -691,4 +693,16 @@ TEST(Json, TypeCheck) {
|
||||
ASSERT_NE(err.find("foo"), std::string::npos);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Json, Dump) {
|
||||
auto str = GetModelStr();
|
||||
auto jobj = Json::Load(str);
|
||||
std::string result_s = Json::Dump(jobj);
|
||||
|
||||
std::vector<char> result_v = Json::Dump<std::vector<char>>(jobj);
|
||||
ASSERT_EQ(result_s.size(), result_v.size());
|
||||
for (std::size_t i = 0; i < result_s.size(); ++i) {
|
||||
ASSERT_EQ(result_s[i], result_v[i]);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -16,7 +16,7 @@ namespace xgboost::linalg {
|
||||
namespace {
|
||||
void TestElementWiseKernel() {
|
||||
auto device = DeviceOrd::CUDA(0);
|
||||
Tensor<float, 3> l{{2, 3, 4}, 0};
|
||||
Tensor<float, 3> l{{2, 3, 4}, device};
|
||||
{
|
||||
/**
|
||||
* Non-contiguous
|
||||
|
||||
@@ -9,9 +9,7 @@
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "xgboost/context.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(Quantile, LoadBalance) {
|
||||
size_t constexpr kRows = 1000, kCols = 100;
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
@@ -314,7 +312,7 @@ void TestSameOnAllWorkers() {
|
||||
}
|
||||
|
||||
auto m = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(Context::kCpuId)
|
||||
.Device(DeviceOrd::CPU())
|
||||
.Type(ft)
|
||||
.MaxCategory(17)
|
||||
.Seed(rank + seed)
|
||||
@@ -373,6 +371,4 @@ TEST(Quantile, SameOnAllWorkers) {
|
||||
auto constexpr kWorkers = 4;
|
||||
RunWithInMemoryCommunicator(kWorkers, TestSameOnAllWorkers);
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -32,7 +32,7 @@ class MGPUQuantileTest : public BaseMGPUTest {};
|
||||
TEST(GPUQuantile, Basic) {
|
||||
constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, kBins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, kBins, kCols, kRows, FstCU());
|
||||
dh::caching_device_vector<Entry> entries;
|
||||
dh::device_vector<bst_row_t> cuts_ptr(kCols+1);
|
||||
thrust::fill(cuts_ptr.begin(), cuts_ptr.end(), 0);
|
||||
@@ -45,12 +45,12 @@ void TestSketchUnique(float sparsity) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [kRows, kCols, sparsity](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, sparsity}
|
||||
.Seed(seed)
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
@@ -65,7 +65,7 @@ void TestSketchUnique(float sparsity) {
|
||||
thrust::make_counting_iterator(0llu),
|
||||
[=] __device__(size_t idx) { return batch.GetElement(idx); });
|
||||
auto end = kCols * kRows;
|
||||
detail::GetColumnSizesScan(0, kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
|
||||
detail::GetColumnSizesScan(FstCU(), kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
|
||||
&cut_sizes_scan, &column_sizes_scan);
|
||||
auto const& cut_sizes = cut_sizes_scan.HostVector();
|
||||
ASSERT_LE(sketch.Data().size(), cut_sizes.back());
|
||||
@@ -93,13 +93,9 @@ TEST(GPUQuantile, Unique) {
|
||||
}
|
||||
|
||||
// if with_error is true, the test tolerates floating point error
|
||||
void TestQuantileElemRank(int32_t device, Span<SketchEntry const> in,
|
||||
void TestQuantileElemRank(DeviceOrd device, Span<SketchEntry const> in,
|
||||
Span<bst_row_t const> d_columns_ptr, bool with_error = false) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(device));
|
||||
#endif
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
std::vector<SketchEntry> h_in(in.size());
|
||||
dh::CopyDeviceSpanToVector(&h_in, in);
|
||||
std::vector<bst_row_t> h_columns_ptr(d_columns_ptr.size());
|
||||
@@ -134,13 +130,12 @@ TEST(GPUQuantile, Prune) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
std::string interface_str =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch);
|
||||
@@ -156,7 +151,7 @@ TEST(GPUQuantile, Prune) {
|
||||
ASSERT_TRUE(thrust::is_sorted(thrust::device, sketch.Data().data(),
|
||||
sketch.Data().data() + sketch.Data().size(),
|
||||
detail::SketchUnique{}));
|
||||
TestQuantileElemRank(0, sketch.Data(), sketch.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch.Data(), sketch.ColumnsPtr());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -164,10 +159,10 @@ TEST(GPUQuantile, MergeEmpty) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
size_t n_bins = 10;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateArrayInterface(
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).GenerateArrayInterface(
|
||||
&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
MetaInfo info;
|
||||
@@ -204,34 +199,33 @@ TEST(GPUQuantile, MergeBasic) {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const &info) {
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_0(ft, n_bins, kCols, kRows, FstCU());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_0);
|
||||
|
||||
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, 0);
|
||||
SketchContainer sketch_1(ft, n_bins, kCols, kRows * kRows, FstCU());
|
||||
HostDeviceVector<float> storage_1;
|
||||
std::string interface_str_1 = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_1);
|
||||
std::string interface_str_1 =
|
||||
RandomDataGenerator{kRows, kCols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_1);
|
||||
data::CupyAdapter adapter_1(interface_str_1);
|
||||
AdapterDeviceSketch(adapter_1.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &sketch_1);
|
||||
AdapterDeviceSketch(adapter_1.Value(), n_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_1);
|
||||
|
||||
size_t size_before_merge = sketch_0.Data().size();
|
||||
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
if (info.weights_.Size() != 0) {
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), true);
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), true);
|
||||
sketch_0.FixError();
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr(), false);
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr(), false);
|
||||
} else {
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
}
|
||||
|
||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||
@@ -251,24 +245,22 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
|
||||
MetaInfo info;
|
||||
int32_t seed = 0;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_0(ft, n_bins, cols, rows, 0);
|
||||
SketchContainer sketch_0(ft, n_bins, cols, rows, FstCU());
|
||||
HostDeviceVector<float> storage_0;
|
||||
std::string interface_str_0 = RandomDataGenerator{rows, cols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_0);
|
||||
std::string interface_str_0 =
|
||||
RandomDataGenerator{rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_0);
|
||||
data::CupyAdapter adapter_0(interface_str_0);
|
||||
AdapterDeviceSketch(adapter_0.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_0);
|
||||
|
||||
size_t f_rows = rows * frac;
|
||||
SketchContainer sketch_1(ft, n_bins, cols, f_rows, 0);
|
||||
SketchContainer sketch_1(ft, n_bins, cols, f_rows, FstCU());
|
||||
HostDeviceVector<float> storage_1;
|
||||
std::string interface_str_1 = RandomDataGenerator{f_rows, cols, 0}
|
||||
.Device(0)
|
||||
.Seed(seed)
|
||||
.GenerateArrayInterface(&storage_1);
|
||||
std::string interface_str_1 =
|
||||
RandomDataGenerator{f_rows, cols, 0}.Device(FstCU()).Seed(seed).GenerateArrayInterface(
|
||||
&storage_1);
|
||||
auto data_1 = storage_1.DeviceSpan();
|
||||
auto tuple_it = thrust::make_tuple(
|
||||
thrust::make_counting_iterator<size_t>(0ul), data_1.data());
|
||||
@@ -290,7 +282,7 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
|
||||
|
||||
size_t size_before_merge = sketch_0.Data().size();
|
||||
sketch_0.Merge(sketch_1.ColumnsPtr(), sketch_1.Data());
|
||||
TestQuantileElemRank(0, sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_0.Data(), sketch_0.ColumnsPtr());
|
||||
|
||||
auto columns_ptr = sketch_0.ColumnsPtr();
|
||||
std::vector<bst_row_t> h_columns_ptr(columns_ptr.size());
|
||||
@@ -321,11 +313,10 @@ TEST(GPUQuantile, MergeDuplicated) {
|
||||
TEST(GPUQuantile, MultiMerge) {
|
||||
constexpr size_t kRows = 20, kCols = 1;
|
||||
int32_t world = 2;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
|
||||
MetaInfo const &info) {
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
// Set up single node version
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch_on_single_node(ft, n_bins, kCols, kRows, FstCU());
|
||||
|
||||
size_t intermediate_num_cuts = std::min(
|
||||
kRows * world, static_cast<size_t>(n_bins * WQSketch::kFactor));
|
||||
@@ -333,12 +324,12 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
for (auto rank = 0; rank < world; ++rank) {
|
||||
HostDeviceVector<float> storage;
|
||||
std::string interface_str = RandomDataGenerator{kRows, kCols, 0}
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.Seed(rank + seed)
|
||||
.GenerateArrayInterface(&storage);
|
||||
data::CupyAdapter adapter(interface_str);
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
containers.emplace_back(ft, n_bins, kCols, kRows, 0);
|
||||
containers.emplace_back(ft, n_bins, kCols, kRows, FstCU());
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&containers.back());
|
||||
@@ -348,21 +339,44 @@ TEST(GPUQuantile, MultiMerge) {
|
||||
sketch_on_single_node.Merge(sketch.ColumnsPtr(), sketch.Data());
|
||||
sketch_on_single_node.FixError();
|
||||
}
|
||||
TestQuantileElemRank(0, sketch_on_single_node.Data(),
|
||||
sketch_on_single_node.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
|
||||
|
||||
sketch_on_single_node.Unique();
|
||||
TestQuantileElemRank(0, sketch_on_single_node.Data(),
|
||||
sketch_on_single_node.ColumnsPtr());
|
||||
TestQuantileElemRank(FstCU(), sketch_on_single_node.Data(), sketch_on_single_node.ColumnsPtr());
|
||||
});
|
||||
}
|
||||
|
||||
TEST(GPUQuantile, MissingColumns) {
|
||||
auto dmat = std::unique_ptr<DMatrix>{[=]() {
|
||||
std::size_t constexpr kRows = 1000, kCols = 100;
|
||||
auto sparsity = 0.5f;
|
||||
std::vector<FeatureType> ft(kCols);
|
||||
for (size_t i = 0; i < ft.size(); ++i) {
|
||||
ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
|
||||
}
|
||||
auto dmat = RandomDataGenerator{kRows, kCols, sparsity}
|
||||
.Seed(0)
|
||||
.Lower(.0f)
|
||||
.Upper(1.0f)
|
||||
.Type(ft)
|
||||
.MaxCategory(13)
|
||||
.GenerateDMatrix();
|
||||
return dmat->SliceCol(2, 1);
|
||||
}()};
|
||||
dmat->Info().data_split_mode = DataSplitMode::kRow;
|
||||
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
std::size_t constexpr kBins = 64;
|
||||
HistogramCuts cuts = common::DeviceSketch(&ctx, dmat.get(), kBins);
|
||||
ASSERT_TRUE(cuts.HasCategorical());
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestAllReduceBasic() {
|
||||
auto const world = collective::GetWorldSize();
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
auto const device = GPUIDX;
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
|
||||
// Set up single node version;
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
@@ -440,18 +454,14 @@ TEST_F(MGPUQuantileTest, AllReduceBasic) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestColumnSplitBasic() {
|
||||
void TestColumnSplit(DMatrix* dmat) {
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::size_t constexpr kRows = 1000, kCols = 100, kBins = 64;
|
||||
|
||||
auto m = std::unique_ptr<DMatrix>{[=]() {
|
||||
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
return dmat->SliceCol(world, rank);
|
||||
}()};
|
||||
auto m = std::unique_ptr<DMatrix>{dmat->SliceCol(world, rank)};
|
||||
|
||||
// Generate cuts for distributed environment.
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
std::size_t constexpr kBins = 64;
|
||||
HistogramCuts distributed_cuts = common::DeviceSketch(&ctx, m.get(), kBins);
|
||||
|
||||
// Generate cuts for single node environment
|
||||
@@ -484,7 +494,26 @@ void TestColumnSplitBasic() {
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_F(MGPUQuantileTest, ColumnSplitBasic) {
|
||||
DoTest(TestColumnSplitBasic);
|
||||
std::size_t constexpr kRows = 1000, kCols = 100;
|
||||
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
DoTest(TestColumnSplit, dmat.get());
|
||||
}
|
||||
|
||||
TEST_F(MGPUQuantileTest, ColumnSplitCategorical) {
|
||||
std::size_t constexpr kRows = 1000, kCols = 100;
|
||||
auto sparsity = 0.5f;
|
||||
std::vector<FeatureType> ft(kCols);
|
||||
for (size_t i = 0; i < ft.size(); ++i) {
|
||||
ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
|
||||
}
|
||||
auto dmat = RandomDataGenerator{kRows, kCols, sparsity}
|
||||
.Seed(0)
|
||||
.Lower(.0f)
|
||||
.Upper(1.0f)
|
||||
.Type(ft)
|
||||
.MaxCategory(13)
|
||||
.GenerateDMatrix();
|
||||
DoTest(TestColumnSplit, dmat.get());
|
||||
}
|
||||
|
||||
namespace {
|
||||
@@ -494,7 +523,7 @@ void TestSameOnAllWorkers() {
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins,
|
||||
MetaInfo const &info) {
|
||||
auto const rank = collective::GetRank();
|
||||
auto const device = GPUIDX;
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
|
||||
HostDeviceVector<float> storage({}, device);
|
||||
@@ -525,9 +554,9 @@ void TestSameOnAllWorkers() {
|
||||
thrust::copy(thrust::device, local_data.data(),
|
||||
local_data.data() + local_data.size(),
|
||||
all_workers.begin() + local_data.size() * rank);
|
||||
collective::AllReduce<collective::Operation::kSum>(device, all_workers.data().get(),
|
||||
collective::AllReduce<collective::Operation::kSum>(device.ordinal, all_workers.data().get(),
|
||||
all_workers.size());
|
||||
collective::Synchronize(device);
|
||||
collective::Synchronize(device.ordinal);
|
||||
|
||||
auto base_line = dh::ToSpan(all_workers).subspan(0, size_as_float);
|
||||
std::vector<float> h_base_line(base_line.size());
|
||||
@@ -573,7 +602,7 @@ TEST(GPUQuantile, Push) {
|
||||
columns_ptr[1] = kRows;
|
||||
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
sketch.Push(dh::ToSpan(d_entries), dh::ToSpan(columns_ptr), dh::ToSpan(columns_ptr), kRows, {});
|
||||
|
||||
auto sketch_data = sketch.Data();
|
||||
@@ -613,7 +642,7 @@ TEST(GPUQuantile, MultiColPush) {
|
||||
|
||||
int32_t n_bins = 16;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, 0);
|
||||
SketchContainer sketch(ft, n_bins, kCols, kRows, FstCU());
|
||||
dh::device_vector<Entry> d_entries {entries};
|
||||
|
||||
dh::device_vector<size_t> columns_ptr(kCols + 1, 0);
|
||||
|
||||
@@ -95,7 +95,7 @@ void TestRankingCache(Context const* ctx) {
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
@@ -129,7 +129,7 @@ void TestNDCGCache(Context const* ctx) {
|
||||
auto fail = [&]() { NDCGCache cache{ctx, info, param}; };
|
||||
// empty label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, Context::kCpuId};
|
||||
info.labels = linalg::Matrix<float>{{0.0f, 0.1f, 0.2f}, {3}, DeviceOrd::CPU()};
|
||||
// invalid label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
auto h_labels = info.labels.HostView();
|
||||
|
||||
@@ -42,7 +42,7 @@ void TestCalcQueriesInvIDCG() {
|
||||
auto d_scores = dh::ToSpan(scores);
|
||||
common::SegmentedSequence(&ctx, d_group_ptr, d_scores);
|
||||
|
||||
linalg::Vector<double> inv_IDCG({n_groups}, ctx.gpu_id);
|
||||
linalg::Vector<double> inv_IDCG({n_groups}, ctx.Device());
|
||||
|
||||
ltr::LambdaRankParam p;
|
||||
p.UpdateAllowUnknown(Args{{"ndcg_exp_gain", "false"}});
|
||||
@@ -77,7 +77,7 @@ void TestRankingCache(Context const* ctx) {
|
||||
HostDeviceVector<float> predt(info.num_row_, 0);
|
||||
auto& h_predt = predt.HostVector();
|
||||
std::iota(h_predt.begin(), h_predt.end(), 0.0f);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
|
||||
auto rank_idx =
|
||||
cache.SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
@@ -9,12 +9,11 @@
|
||||
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace xgboost::common {
|
||||
TEST(Stats, Quantile) {
|
||||
Context ctx;
|
||||
{
|
||||
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, DeviceOrd::CPU());
|
||||
std::vector<size_t> index{0, 2, 3, 4, 6};
|
||||
auto h_arr = arr.HostView();
|
||||
auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
|
||||
@@ -40,8 +39,8 @@ TEST(Stats, Quantile) {
|
||||
|
||||
TEST(Stats, WeightedQuantile) {
|
||||
Context ctx;
|
||||
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
|
||||
linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, DeviceOrd::CPU());
|
||||
linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, DeviceOrd::CPU());
|
||||
|
||||
auto h_arr = arr.HostView();
|
||||
auto h_weight = weight.HostView();
|
||||
@@ -64,7 +63,7 @@ TEST(Stats, Median) {
|
||||
Context ctx;
|
||||
|
||||
{
|
||||
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, Context::kCpuId};
|
||||
linalg::Tensor<float, 2> values{{.0f, .0f, 1.f, 2.f}, {4}, DeviceOrd::CPU()};
|
||||
HostDeviceVector<float> weights;
|
||||
linalg::Tensor<float, 1> out;
|
||||
Median(&ctx, values, weights, &out);
|
||||
@@ -83,7 +82,7 @@ TEST(Stats, Median) {
|
||||
{
|
||||
ctx = ctx.MakeCPU();
|
||||
// 4x2 matrix
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.Device()};
|
||||
HostDeviceVector<float> weights;
|
||||
linalg::Tensor<float, 1> out;
|
||||
Median(&ctx, values, weights, &out);
|
||||
@@ -102,14 +101,14 @@ TEST(Stats, Median) {
|
||||
namespace {
|
||||
void TestMean(Context const* ctx) {
|
||||
std::size_t n{128};
|
||||
linalg::Vector<float> data({n}, ctx->gpu_id);
|
||||
linalg::Vector<float> data({n}, ctx->Device());
|
||||
auto h_v = data.HostView().Values();
|
||||
std::iota(h_v.begin(), h_v.end(), .0f);
|
||||
|
||||
auto nf = static_cast<float>(n);
|
||||
float mean = nf * (nf - 1) / 2 / n;
|
||||
|
||||
linalg::Vector<float> res{{1}, ctx->gpu_id};
|
||||
linalg::Vector<float> res{{1}, ctx->Device()};
|
||||
Mean(ctx, data, &res);
|
||||
auto h_res = res.HostView();
|
||||
ASSERT_EQ(h_res.Size(), 1);
|
||||
@@ -127,6 +126,5 @@ TEST(Stats, GPUMean) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMean(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost::common
|
||||
|
||||
@@ -25,8 +25,8 @@ namespace common {
|
||||
namespace {
|
||||
class StatsGPU : public ::testing::Test {
|
||||
private:
|
||||
linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, 0};
|
||||
linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, 0};
|
||||
linalg::Tensor<float, 1> arr_{{1.f, 2.f, 3.f, 4.f, 5.f, 2.f, 4.f, 5.f, 3.f, 1.f}, {10}, FstCU()};
|
||||
linalg::Tensor<std::size_t, 1> indptr_{{0, 5, 10}, {3}, FstCU()};
|
||||
HostDeviceVector<float> results_;
|
||||
using TestSet = std::vector<std::pair<float, float>>;
|
||||
Context ctx_;
|
||||
@@ -51,7 +51,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
@@ -63,7 +63,7 @@ class StatsGPU : public ::testing::Test {
|
||||
|
||||
// one alpha for each segment
|
||||
HostDeviceVector<float> alphas{0.0f, 0.5f, 1.0f};
|
||||
alphas.SetDevice(0);
|
||||
alphas.SetDevice(FstCU());
|
||||
auto d_alphas = alphas.ConstDeviceSpan();
|
||||
auto w_it = thrust::make_constant_iterator(0.1f);
|
||||
SegmentedWeightedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
|
||||
@@ -85,7 +85,7 @@ class StatsGPU : public ::testing::Test {
|
||||
auto val_it =
|
||||
dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
|
||||
[=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });
|
||||
linalg::Tensor<float, 1> weights{{10}, 0};
|
||||
linalg::Tensor<float, 1> weights{{10}, FstCU()};
|
||||
linalg::ElementWiseTransformDevice(weights.View(DeviceOrd::CUDA(0)),
|
||||
[=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
|
||||
auto w_it = weights.Data()->ConstDevicePointer();
|
||||
@@ -106,7 +106,7 @@ class StatsGPU : public ::testing::Test {
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
data.insert(data.cend(), seg.begin(), seg.end());
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, 0};
|
||||
linalg::Tensor<float, 1> arr{data.cbegin(), data.cend(), {data.size()}, FstCU()};
|
||||
auto d_arr = arr.View(DeviceOrd::CUDA(0));
|
||||
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
@@ -118,7 +118,7 @@ class StatsGPU : public ::testing::Test {
|
||||
|
||||
// one alpha for each segment
|
||||
HostDeviceVector<float> alphas{0.1f, 0.2f, 0.4f};
|
||||
alphas.SetDevice(0);
|
||||
alphas.SetDevice(FstCU());
|
||||
auto d_alphas = alphas.ConstDeviceSpan();
|
||||
SegmentedQuantile(&ctx_, d_alphas.data(), key_it, key_it + d_alphas.size() + 1, val_it,
|
||||
val_it + d_arr.Size(), &results_);
|
||||
|
||||
@@ -11,63 +11,59 @@
|
||||
#include "../../../src/common/transform.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::common {
|
||||
namespace {
|
||||
constexpr DeviceOrd TransformDevice() {
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
|
||||
#define TRANSFORM_GPU 0
|
||||
|
||||
return DeviceOrd::CUDA(0);
|
||||
#else
|
||||
|
||||
#define TRANSFORM_GPU -1
|
||||
|
||||
return DeviceOrd::CPU();
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
struct TestTransformRange {
|
||||
void XGBOOST_DEVICE operator()(size_t _idx,
|
||||
Span<bst_float> _out, Span<const bst_float> _in) {
|
||||
void XGBOOST_DEVICE operator()(std::size_t _idx, Span<float> _out, Span<const float> _in) {
|
||||
_out[_idx] = _in[_idx];
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Transform, DeclareUnifiedTest(Basic)) {
|
||||
const size_t size {256};
|
||||
std::vector<bst_float> h_in(size);
|
||||
std::vector<bst_float> h_out(size);
|
||||
const size_t size{256};
|
||||
std::vector<float> h_in(size);
|
||||
std::vector<float> h_out(size);
|
||||
std::iota(h_in.begin(), h_in.end(), 0);
|
||||
std::vector<bst_float> h_sol(size);
|
||||
std::vector<float> h_sol(size);
|
||||
std::iota(h_sol.begin(), h_sol.end(), 0);
|
||||
|
||||
const HostDeviceVector<bst_float> in_vec{h_in, TRANSFORM_GPU};
|
||||
HostDeviceVector<bst_float> out_vec{h_out, TRANSFORM_GPU};
|
||||
auto device = TransformDevice();
|
||||
HostDeviceVector<float> const in_vec{h_in, device};
|
||||
HostDeviceVector<float> out_vec{h_out, device};
|
||||
out_vec.Fill(0);
|
||||
|
||||
Transform<>::Init(TestTransformRange<bst_float>{},
|
||||
Transform<>::Init(TestTransformRange<float>{},
|
||||
Range{0, static_cast<Range::DifferenceType>(size)}, AllThreadsForTest(),
|
||||
TRANSFORM_GPU)
|
||||
TransformDevice())
|
||||
.Eval(&out_vec, &in_vec);
|
||||
std::vector<bst_float> res = out_vec.HostVector();
|
||||
std::vector<float> res = out_vec.HostVector();
|
||||
|
||||
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
|
||||
}
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
|
||||
TEST(TransformDeathTest, Exception) {
|
||||
size_t const kSize {16};
|
||||
std::vector<bst_float> h_in(kSize);
|
||||
const HostDeviceVector<bst_float> in_vec{h_in, -1};
|
||||
size_t const kSize{16};
|
||||
std::vector<float> h_in(kSize);
|
||||
const HostDeviceVector<float> in_vec{h_in, DeviceOrd::CPU()};
|
||||
EXPECT_DEATH(
|
||||
{
|
||||
Transform<>::Init([](size_t idx, common::Span<float const> _in) { _in[idx + 1]; },
|
||||
Range(0, static_cast<Range::DifferenceType>(kSize)), AllThreadsForTest(),
|
||||
-1)
|
||||
DeviceOrd::CPU())
|
||||
.Eval(&in_vec);
|
||||
},
|
||||
"");
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::common
|
||||
|
||||
5
tests/cpp/common/test_transform_range.cu
Normal file
5
tests/cpp/common/test_transform_range.cu
Normal file
@@ -0,0 +1,5 @@
|
||||
/**
|
||||
* Copyright 2023 XGBoost contributors
|
||||
*/
|
||||
// Dummy file to keep the CUDA tests.
|
||||
#include "test_transform_range.cc"
|
||||
@@ -70,12 +70,12 @@ TEST(DeviceAdapter, GetRowCounts) {
|
||||
for (bst_feature_t n_features : {1, 2, 4, 64, 128, 256}) {
|
||||
HostDeviceVector<float> storage;
|
||||
auto str_arr = RandomDataGenerator{8192, n_features, 0.0}
|
||||
.Device(ctx.gpu_id)
|
||||
.Device(ctx.Device())
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto adapter = CupyAdapter{str_arr};
|
||||
HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
|
||||
offset.SetDevice(ctx.gpu_id);
|
||||
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.gpu_id,
|
||||
offset.SetDevice(ctx.Device());
|
||||
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(),
|
||||
std::numeric_limits<float>::quiet_NaN());
|
||||
ASSERT_EQ(rstride, n_features);
|
||||
}
|
||||
|
||||
@@ -98,7 +98,7 @@ TEST(EllpackPage, FromCategoricalBasic) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
|
||||
auto ellpack = EllpackPage(&ctx, m.get(), p);
|
||||
auto accessor = ellpack.Impl()->GetDeviceAccessor(0);
|
||||
auto accessor = ellpack.Impl()->GetDeviceAccessor(FstCU());
|
||||
ASSERT_EQ(kCats, accessor.NumBins());
|
||||
|
||||
auto x_copy = x;
|
||||
@@ -156,13 +156,12 @@ TEST(EllpackPage, Copy) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
|
||||
kRows);
|
||||
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kRows);
|
||||
|
||||
// Copy batch pages into the result page.
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
size_t num_elements = result.Copy(0, batch.Impl(), offset);
|
||||
size_t num_elements = result.Copy(FstCU(), batch.Impl(), offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
|
||||
@@ -176,10 +175,12 @@ TEST(EllpackPage, Copy) {
|
||||
EXPECT_EQ(impl->base_rowid, current_row);
|
||||
|
||||
for (size_t i = 0; i < impl->Size(); i++) {
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()), current_row,
|
||||
row_d.data().get()));
|
||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||
|
||||
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(0), current_row, row_result_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(FstCU()), current_row,
|
||||
row_result_d.data().get()));
|
||||
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
||||
|
||||
EXPECT_EQ(row, row_result);
|
||||
@@ -203,8 +204,7 @@ TEST(EllpackPage, Compact) {
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
|
||||
// Create an empty result page.
|
||||
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
|
||||
kCompactedRows);
|
||||
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kCompactedRows);
|
||||
|
||||
// Compact batch pages into the result page.
|
||||
std::vector<size_t> row_indexes_h {
|
||||
@@ -213,7 +213,7 @@ TEST(EllpackPage, Compact) {
|
||||
thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
|
||||
common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
result.Compact(0, batch.Impl(), row_indexes_span);
|
||||
result.Compact(FstCU(), batch.Impl(), row_indexes_span);
|
||||
}
|
||||
|
||||
size_t current_row = 0;
|
||||
@@ -232,7 +232,7 @@ TEST(EllpackPage, Compact) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0),
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()),
|
||||
current_row, row_d.data().get()));
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
@@ -242,7 +242,7 @@ TEST(EllpackPage, Compact) {
|
||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||
|
||||
dh::LaunchN(kCols,
|
||||
ReadRowFunction(result.GetDeviceAccessor(0), compacted_row,
|
||||
ReadRowFunction(result.GetDeviceAccessor(FstCU()), compacted_row,
|
||||
row_result_d.data().get()));
|
||||
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ namespace xgboost::data {
|
||||
TEST(GradientIndex, ExternalMemoryBaseRowID) {
|
||||
Context ctx;
|
||||
auto p_fmat = RandomDataGenerator{4096, 256, 0.5}
|
||||
.Device(ctx.gpu_id)
|
||||
.Device(ctx.Device())
|
||||
.Batches(8)
|
||||
.GenerateSparsePageDMatrix("cache", true);
|
||||
|
||||
|
||||
@@ -16,9 +16,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "test_iterative_dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
namespace xgboost::data {
|
||||
void TestEquivalent(float sparsity) {
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
|
||||
@@ -28,14 +26,14 @@ void TestEquivalent(float sparsity) {
|
||||
std::size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated {
|
||||
new EllpackPageImpl(0, first->Cuts(), first->is_dense,
|
||||
new EllpackPageImpl(ctx.Device(), first->Cuts(), first->is_dense,
|
||||
first->row_stride, 1000 * 100)};
|
||||
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_concatenated->Copy(0, page, offset);
|
||||
size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
auto from_iter = page_concatenated->GetDeviceAccessor(0);
|
||||
auto from_iter = page_concatenated->GetDeviceAccessor(ctx.Device());
|
||||
ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::Cols());
|
||||
ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::Rows());
|
||||
|
||||
@@ -45,7 +43,7 @@ void TestEquivalent(float sparsity) {
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
|
||||
auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
|
||||
auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
|
||||
auto from_data = ellpack.Impl()->GetDeviceAccessor(ctx.Device());
|
||||
|
||||
std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
|
||||
std::vector<float> min_fvalues_iter(from_iter.min_fvalue.size());
|
||||
@@ -157,10 +155,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
// null values get placed after valid values in a row
|
||||
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(ctx.Device()).NullValue());
|
||||
EXPECT_EQ(m.Info().num_col_, cols);
|
||||
EXPECT_EQ(m.Info().num_row_, rows);
|
||||
EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
|
||||
@@ -188,5 +186,4 @@ TEST(IterativeDeviceDMatrix, Ref) {
|
||||
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
|
||||
&ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(MetaInfo, GetSet) {
|
||||
xgboost::Context ctx;
|
||||
xgboost::MetaInfo info;
|
||||
@@ -73,6 +74,49 @@ TEST(MetaInfo, GetSetFeature) {
|
||||
// Other conditions are tested in `SaveLoadBinary`.
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyGetSetFeatureColumnSplit() {
|
||||
xgboost::MetaInfo info;
|
||||
info.data_split_mode = DataSplitMode::kCol;
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
|
||||
auto constexpr kCols{2};
|
||||
std::vector<std::string> types{u8"float", u8"c"};
|
||||
std::vector<char const *> c_types(kCols);
|
||||
std::transform(types.cbegin(), types.cend(), c_types.begin(),
|
||||
[](auto const &str) { return str.c_str(); });
|
||||
info.num_col_ = kCols;
|
||||
EXPECT_THROW(info.SetFeatureInfo(u8"feature_type", c_types.data(), c_types.size()), dmlc::Error);
|
||||
info.num_col_ = kCols * world_size;
|
||||
EXPECT_NO_THROW(info.SetFeatureInfo(u8"feature_type", c_types.data(), c_types.size()));
|
||||
std::vector<std::string> expected_type_names{u8"float", u8"c", u8"float",
|
||||
u8"c", u8"float", u8"c"};
|
||||
EXPECT_EQ(info.feature_type_names, expected_type_names);
|
||||
std::vector<xgboost::FeatureType> expected_types{
|
||||
xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical,
|
||||
xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical,
|
||||
xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical};
|
||||
EXPECT_EQ(info.feature_types.HostVector(), expected_types);
|
||||
|
||||
std::vector<std::string> names{u8"feature0", u8"feature1"};
|
||||
std::vector<char const *> c_names(kCols);
|
||||
std::transform(names.cbegin(), names.cend(), c_names.begin(),
|
||||
[](auto const &str) { return str.c_str(); });
|
||||
info.num_col_ = kCols;
|
||||
EXPECT_THROW(info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size()), dmlc::Error);
|
||||
info.num_col_ = kCols * world_size;
|
||||
EXPECT_NO_THROW(info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size()));
|
||||
std::vector<std::string> expected_names{u8"0.feature0", u8"0.feature1", u8"1.feature0",
|
||||
u8"1.feature1", u8"2.feature0", u8"2.feature1"};
|
||||
EXPECT_EQ(info.feature_names, expected_names);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(MetaInfo, GetSetFeatureColumnSplit) {
|
||||
auto constexpr kWorldSize{3};
|
||||
RunWithInMemoryCommunicator(kWorldSize, VerifyGetSetFeatureColumnSplit);
|
||||
}
|
||||
|
||||
TEST(MetaInfo, SaveLoadBinary) {
|
||||
xgboost::MetaInfo info;
|
||||
xgboost::Context ctx;
|
||||
@@ -236,9 +280,9 @@ TEST(MetaInfo, Validate) {
|
||||
info.num_nonzero_ = 12;
|
||||
info.num_col_ = 3;
|
||||
std::vector<xgboost::bst_group_t> groups (11);
|
||||
xgboost::Context ctx;
|
||||
Context ctx;
|
||||
info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11);
|
||||
EXPECT_THROW(info.Validate(0), dmlc::Error);
|
||||
EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);
|
||||
|
||||
std::vector<float> labels(info.num_row_ + 1);
|
||||
EXPECT_THROW(
|
||||
@@ -261,11 +305,11 @@ TEST(MetaInfo, Validate) {
|
||||
info.group_ptr_.clear();
|
||||
labels.resize(info.num_row_);
|
||||
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
|
||||
info.labels.SetDevice(0);
|
||||
EXPECT_THROW(info.Validate(1), dmlc::Error);
|
||||
info.labels.SetDevice(FstCU());
|
||||
EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);
|
||||
|
||||
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
|
||||
d_groups.SetDevice(0);
|
||||
d_groups.SetDevice(FstCU());
|
||||
d_groups.DevicePointer(); // pull to device
|
||||
std::string arr_interface_str{ArrayInterfaceStr(xgboost::linalg::MakeVec(
|
||||
d_groups.ConstDevicePointer(), d_groups.Size(), xgboost::DeviceOrd::CUDA(0)))};
|
||||
@@ -306,6 +350,5 @@ TEST(MetaInfo, HostExtend) {
|
||||
}
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
TEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(DeviceOrd::CPU()); }
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,31 +1,27 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include "../../../src/data/adapter.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::data {
|
||||
TEST(ProxyDMatrix, HostData) {
|
||||
DMatrixProxy proxy;
|
||||
size_t constexpr kRows = 100, kCols = 10;
|
||||
std::vector<HostDeviceVector<float>> label_storage(1);
|
||||
|
||||
HostDeviceVector<float> storage;
|
||||
auto data = RandomDataGenerator(kRows, kCols, 0.5)
|
||||
.Device(0)
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto data =
|
||||
RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
|
||||
|
||||
proxy.SetArrayData(data.c_str());
|
||||
|
||||
auto n_samples = HostAdapterDispatch(
|
||||
&proxy, [](auto const &value) { return value.Size(); });
|
||||
auto n_samples = HostAdapterDispatch(&proxy, [](auto const &value) { return value.Size(); });
|
||||
ASSERT_EQ(n_samples, kRows);
|
||||
auto n_features = HostAdapterDispatch(
|
||||
&proxy, [](auto const &value) { return value.NumCols(); });
|
||||
auto n_features = HostAdapterDispatch(&proxy, [](auto const &value) { return value.NumCols(); });
|
||||
ASSERT_EQ(n_features, kCols);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -19,10 +19,12 @@ namespace xgboost::data {
|
||||
TEST(ProxyDMatrix, DeviceData) {
|
||||
constexpr size_t kRows{100}, kCols{100};
|
||||
HostDeviceVector<float> storage;
|
||||
auto data = RandomDataGenerator(kRows, kCols, 0.5).Device(0).GenerateArrayInterface(&storage);
|
||||
auto data =
|
||||
RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
|
||||
std::vector<HostDeviceVector<float>> label_storage(1);
|
||||
auto labels =
|
||||
RandomDataGenerator(kRows, 1, 0).Device(0).GenerateColumnarArrayInterface(&label_storage);
|
||||
auto labels = RandomDataGenerator(kRows, 1, 0)
|
||||
.Device(FstCU())
|
||||
.GenerateColumnarArrayInterface(&label_storage);
|
||||
|
||||
DMatrixProxy proxy;
|
||||
proxy.SetCUDAArray(data.c_str());
|
||||
@@ -35,7 +37,7 @@ TEST(ProxyDMatrix, DeviceData) {
|
||||
|
||||
std::vector<HostDeviceVector<float>> columnar_storage(kCols);
|
||||
data = RandomDataGenerator(kRows, kCols, 0)
|
||||
.Device(0)
|
||||
.Device(FstCU())
|
||||
.GenerateColumnarArrayInterface(&columnar_storage);
|
||||
proxy.SetCUDAArray(data.c_str());
|
||||
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));
|
||||
|
||||
@@ -268,7 +268,7 @@ TEST(SimpleDMatrix, Slice) {
|
||||
std::iota(upper.begin(), upper.end(), 1.0f);
|
||||
|
||||
auto& margin = p_m->Info().base_margin_;
|
||||
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
|
||||
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};
|
||||
|
||||
std::array<int32_t, 3> ridxs {1, 3, 5};
|
||||
std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };
|
||||
@@ -341,7 +341,7 @@ TEST(SimpleDMatrix, SliceCol) {
|
||||
std::iota(upper.begin(), upper.end(), 1.0f);
|
||||
|
||||
auto& margin = p_m->Info().base_margin_;
|
||||
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
|
||||
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};
|
||||
|
||||
auto constexpr kSlices {2};
|
||||
auto constexpr kSliceSize {4};
|
||||
@@ -428,3 +428,21 @@ TEST(SimpleDMatrix, Threads) {
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0, "")};
|
||||
ASSERT_EQ(p_fmat->Ctx()->Threads(), AllThreadsForTest());
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyColumnSplit() {
|
||||
size_t constexpr kRows {16};
|
||||
size_t constexpr kCols {8};
|
||||
auto dmat =
|
||||
RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(false, false, 1, DataSplitMode::kCol);
|
||||
|
||||
ASSERT_EQ(dmat->Info().num_col_, kCols * collective::GetWorldSize());
|
||||
ASSERT_EQ(dmat->Info().num_row_, kRows);
|
||||
ASSERT_EQ(dmat->Info().data_split_mode, DataSplitMode::kCol);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(SimpleDMatrix, ColumnSplit) {
|
||||
auto constexpr kWorldSize{3};
|
||||
RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit);
|
||||
}
|
||||
|
||||
@@ -138,11 +138,11 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
if (!impl_ext) {
|
||||
impl_ext.reset(new EllpackPageImpl(
|
||||
batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
|
||||
batch.Impl()->is_dense, batch.Impl()->row_stride, kRows));
|
||||
impl_ext = std::make_unique<EllpackPageImpl>(batch.Impl()->gidx_buffer.Device(),
|
||||
batch.Impl()->Cuts(), batch.Impl()->is_dense,
|
||||
batch.Impl()->row_stride, kRows);
|
||||
}
|
||||
auto n_elems = impl_ext->Copy(0, batch.Impl(), offset);
|
||||
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
|
||||
offset += n_elems;
|
||||
}
|
||||
EXPECT_EQ(impl_ext->base_rowid, 0);
|
||||
@@ -202,10 +202,12 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
|
||||
EXPECT_EQ(impl_ext->base_rowid, current_row);
|
||||
|
||||
for (size_t i = 0; i < impl_ext->Size(); i++) {
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(ctx.Device()), current_row,
|
||||
row_d.data().get()));
|
||||
thrust::copy(row_d.begin(), row_d.end(), row.begin());
|
||||
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(0), current_row, row_ext_d.data().get()));
|
||||
dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(ctx.Device()), current_row,
|
||||
row_ext_d.data().get()));
|
||||
thrust::copy(row_ext_d.begin(), row_ext_d.end(), row_ext.begin());
|
||||
|
||||
EXPECT_EQ(row, row_ext);
|
||||
|
||||
@@ -65,7 +65,7 @@ TEST(GBTree, PredictionCache) {
|
||||
|
||||
gbtree.Configure({{"tree_method", "hist"}});
|
||||
auto p_m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
PredictionCacheEntry out_predictions;
|
||||
@@ -156,7 +156,7 @@ TEST(GBTree, ChoosePredictor) {
|
||||
|
||||
// pull data into device.
|
||||
data.HostVector();
|
||||
data.SetDevice(0);
|
||||
data.SetDevice(DeviceOrd::CUDA(0));
|
||||
data.DeviceSpan();
|
||||
ASSERT_FALSE(data.HostCanWrite());
|
||||
|
||||
@@ -215,7 +215,7 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
}
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
linalg::Matrix<GradientPair> gpair{{Xy->Info().num_row_}, Context::kCpuId};
|
||||
linalg::Matrix<GradientPair> gpair{{Xy->Info().num_row_}, DeviceOrd::CPU()};
|
||||
gpair.Data()->Copy(GenerateRandomGradients(Xy->Info().num_row_));
|
||||
learner->BoostOneIter(0, Xy, &gpair);
|
||||
}
|
||||
@@ -400,7 +400,7 @@ class Dart : public testing::TestWithParam<char const*> {
|
||||
if (device == "GPU") {
|
||||
ctx = MakeCUDACtx(0);
|
||||
}
|
||||
auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.gpu_id);
|
||||
auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.Device());
|
||||
auto array_str = rng.GenerateArrayInterface(&data);
|
||||
auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);
|
||||
|
||||
@@ -710,7 +710,7 @@ TEST(GBTree, InplacePredictionError) {
|
||||
auto test_qdm_err = [&](std::string booster, Context const* ctx) {
|
||||
std::shared_ptr<DMatrix> p_fmat;
|
||||
bst_bin_t max_bins = 16;
|
||||
auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->gpu_id).Bins(max_bins);
|
||||
auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->Device()).Bins(max_bins);
|
||||
if (ctx->IsCPU()) {
|
||||
p_fmat = rng.GenerateQuantileDMatrix(true);
|
||||
} else {
|
||||
|
||||
@@ -22,7 +22,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
bst_feature_t n_features{32};
|
||||
HostDeviceVector<float> X_storage;
|
||||
// use a different device than the learner
|
||||
std::int32_t data_ordinal = ctx->IsCPU() ? 0 : -1;
|
||||
auto data_ordinal = ctx->IsCPU() ? DeviceOrd::CUDA(0) : DeviceOrd::CPU();
|
||||
auto X = RandomDataGenerator{n_samples, n_features, 0.0}
|
||||
.Device(data_ordinal)
|
||||
.GenerateArrayInterface(&X_storage);
|
||||
@@ -30,7 +30,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);
|
||||
|
||||
std::shared_ptr<DMatrix> Xy;
|
||||
if (data_ordinal == Context::kCpuId) {
|
||||
if (data_ordinal.IsCPU()) {
|
||||
auto X_adapter = data::ArrayAdapter{StringView{X}};
|
||||
Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
|
||||
} else {
|
||||
@@ -49,7 +49,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
|
||||
std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
|
||||
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
|
||||
if (data_ordinal == Context::kCpuId) {
|
||||
if (data_ordinal.IsCPU()) {
|
||||
proxy->SetArrayData(StringView{X});
|
||||
} else {
|
||||
proxy->SetCUDAArray(X.c_str());
|
||||
@@ -64,7 +64,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
|
||||
// test when the contexts match
|
||||
Context new_ctx = *proxy->Ctx();
|
||||
ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
|
||||
ASSERT_NE(new_ctx.Ordinal(), ctx->Ordinal());
|
||||
|
||||
learner->SetParam("device", new_ctx.DeviceName());
|
||||
HostDeviceVector<float>* out_predt_1{nullptr};
|
||||
|
||||
@@ -119,8 +119,10 @@ void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
||||
std::vector<xgboost::bst_float> out_hess) {
|
||||
xgboost::MetaInfo info;
|
||||
info.num_row_ = labels.size();
|
||||
info.labels = xgboost::linalg::Tensor<float, 2>{
|
||||
labels.cbegin(), labels.cend(), {labels.size(), static_cast<std::size_t>(1)}, -1};
|
||||
info.labels = xgboost::linalg::Tensor<float, 2>{labels.cbegin(),
|
||||
labels.cend(),
|
||||
{labels.size(), static_cast<std::size_t>(1)},
|
||||
xgboost::DeviceOrd::CPU()};
|
||||
info.weights_.HostVector() = weights;
|
||||
|
||||
CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);
|
||||
@@ -155,8 +157,10 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
||||
std::vector<xgboost::bst_float> out_hess) {
|
||||
xgboost::MetaInfo info;
|
||||
info.num_row_ = labels.size();
|
||||
info.labels = xgboost::linalg::Matrix<float>{
|
||||
labels.cbegin(), labels.cend(), {labels.size(), static_cast<std::size_t>(1)}, -1};
|
||||
info.labels = xgboost::linalg::Matrix<float>{labels.cbegin(),
|
||||
labels.cend(),
|
||||
{labels.size(), static_cast<std::size_t>(1)},
|
||||
xgboost::DeviceOrd::CPU()};
|
||||
info.weights_.HostVector() = weights;
|
||||
info.group_ptr_ = groups;
|
||||
|
||||
@@ -171,8 +175,9 @@ xgboost::bst_float GetMetricEval(xgboost::Metric* metric,
|
||||
xgboost::DataSplitMode data_split_mode) {
|
||||
return GetMultiMetricEval(
|
||||
metric, preds,
|
||||
xgboost::linalg::Tensor<float, 2>{labels.begin(), labels.end(), {labels.size()}, -1}, weights,
|
||||
groups, data_split_mode);
|
||||
xgboost::linalg::Tensor<float, 2>{
|
||||
labels.begin(), labels.end(), {labels.size()}, xgboost::DeviceOrd::CPU()},
|
||||
weights, groups, data_split_mode);
|
||||
}
|
||||
|
||||
double GetMultiMetricEval(xgboost::Metric* metric,
|
||||
@@ -215,7 +220,7 @@ void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const
|
||||
p_fmat->Info().labels.Data());
|
||||
CHECK_EQ(p_fmat->Info().labels.Size(), this->rows_ * this->n_targets_);
|
||||
p_fmat->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||
if (device_ != Context::kCpuId) {
|
||||
if (device_.IsCUDA()) {
|
||||
p_fmat->Info().labels.SetDevice(device_);
|
||||
}
|
||||
}
|
||||
@@ -236,7 +241,7 @@ void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
|
||||
v = dist(&lcg);
|
||||
}
|
||||
}
|
||||
if (device_ >= 0) {
|
||||
if (device_.IsCUDA()) {
|
||||
out->SetDevice(device_);
|
||||
out->DeviceSpan();
|
||||
}
|
||||
@@ -258,7 +263,7 @@ std::string RandomDataGenerator::GenerateArrayInterface(
|
||||
|
||||
std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
|
||||
HostDeviceVector<float> const* storage, std::size_t n_samples, bst_feature_t n_features,
|
||||
std::size_t batches, std::int32_t device) {
|
||||
std::size_t batches, DeviceOrd device) {
|
||||
std::vector<std::string> result(batches);
|
||||
std::vector<Json> objects;
|
||||
|
||||
@@ -267,7 +272,7 @@ std::pair<std::vector<std::string>, std::string> MakeArrayInterfaceBatch(
|
||||
auto make_interface = [storage, device, n_features](std::size_t offset, std::size_t rows) {
|
||||
Json array_interface{Object()};
|
||||
array_interface["data"] = std::vector<Json>(2);
|
||||
if (device >= 0) {
|
||||
if (device.IsCUDA()) {
|
||||
array_interface["data"][0] =
|
||||
Integer(reinterpret_cast<int64_t>(storage->DevicePointer() + offset));
|
||||
array_interface["stream"] = Null{};
|
||||
@@ -359,7 +364,7 @@ void RandomDataGenerator::GenerateCSR(
|
||||
h_rptr.emplace_back(rptr);
|
||||
}
|
||||
|
||||
if (device_ >= 0) {
|
||||
if (device_.IsCUDA()) {
|
||||
value->SetDevice(device_);
|
||||
value->DeviceSpan();
|
||||
row_ptr->SetDevice(device_);
|
||||
@@ -373,9 +378,8 @@ void RandomDataGenerator::GenerateCSR(
|
||||
CHECK_EQ(columns->Size(), value->Size());
|
||||
}
|
||||
|
||||
[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(bool with_label,
|
||||
bool float_label,
|
||||
size_t classes) const {
|
||||
[[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(
|
||||
bool with_label, bool float_label, size_t classes, DataSplitMode data_split_mode) const {
|
||||
HostDeviceVector<float> data;
|
||||
HostDeviceVector<bst_row_t> rptrs;
|
||||
HostDeviceVector<bst_feature_t> columns;
|
||||
@@ -383,7 +387,7 @@ void RandomDataGenerator::GenerateCSR(
|
||||
data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), rows_,
|
||||
data.Size(), cols_);
|
||||
std::shared_ptr<DMatrix> out{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1, "", data_split_mode)};
|
||||
|
||||
if (with_label) {
|
||||
RandomDataGenerator gen{rows_, n_targets_, 0.0f};
|
||||
@@ -400,7 +404,7 @@ void RandomDataGenerator::GenerateCSR(
|
||||
out->Info().labels.Reshape(this->rows_, this->n_targets_);
|
||||
}
|
||||
}
|
||||
if (device_ >= 0) {
|
||||
if (device_.IsCUDA()) {
|
||||
out->Info().labels.SetDevice(device_);
|
||||
out->Info().feature_types.SetDevice(device_);
|
||||
for (auto const& page : out->GetBatches<SparsePage>()) {
|
||||
@@ -423,7 +427,7 @@ void RandomDataGenerator::GenerateCSR(
|
||||
CHECK_GE(this->n_batches_, 1)
|
||||
<< "Must set the n_batches before generating an external memory DMatrix.";
|
||||
std::unique_ptr<ArrayIterForTest> iter;
|
||||
if (device_ == Context::kCpuId) {
|
||||
if (device_.IsCPU()) {
|
||||
iter = std::make_unique<NumpyArrayIterForTest>(this->sparsity_, rows_, cols_, n_batches_);
|
||||
} else {
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
@@ -487,7 +491,7 @@ int CudaArrayIterForTest::Next() {
|
||||
NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
|
||||
size_t batches)
|
||||
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
||||
rng_->Device(Context::kCpuId);
|
||||
rng_->Device(DeviceOrd::CPU());
|
||||
std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||
this->Reset();
|
||||
}
|
||||
@@ -644,8 +648,8 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
|
||||
labels[i] = i;
|
||||
}
|
||||
p_dmat->Info().labels =
|
||||
linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx->Ordinal());
|
||||
linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, DeviceOrd::CPU()};
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx->Device());
|
||||
auto h_gpair = gpair.HostView();
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
h_gpair(i) = GradientPair{static_cast<float>(i), 1};
|
||||
@@ -674,7 +678,7 @@ ArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector<float> c
|
||||
CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches);
|
||||
this->data_.Copy(data);
|
||||
std::tie(batches_, interface_) =
|
||||
MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->gpu_id);
|
||||
MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->Device());
|
||||
}
|
||||
|
||||
ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }
|
||||
|
||||
@@ -13,7 +13,7 @@ namespace xgboost {
|
||||
CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
|
||||
size_t cols, size_t batches)
|
||||
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
||||
rng_->Device(0);
|
||||
rng_->Device(FstCU());
|
||||
std::tie(batches_, interface_) =
|
||||
rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||
this->Reset();
|
||||
|
||||
@@ -231,7 +231,7 @@ class RandomDataGenerator {
|
||||
|
||||
bst_target_t n_targets_{1};
|
||||
|
||||
std::int32_t device_{Context::kCpuId};
|
||||
DeviceOrd device_{DeviceOrd::CPU()};
|
||||
std::size_t n_batches_{0};
|
||||
std::uint64_t seed_{0};
|
||||
SimpleLCG lcg_;
|
||||
@@ -256,7 +256,7 @@ class RandomDataGenerator {
|
||||
upper_ = v;
|
||||
return *this;
|
||||
}
|
||||
RandomDataGenerator& Device(int32_t d) {
|
||||
RandomDataGenerator& Device(DeviceOrd d) {
|
||||
device_ = d;
|
||||
return *this;
|
||||
}
|
||||
@@ -310,9 +310,9 @@ class RandomDataGenerator {
|
||||
void GenerateCSR(HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
|
||||
HostDeviceVector<bst_feature_t>* columns) const;
|
||||
|
||||
[[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(bool with_label = false,
|
||||
bool float_label = true,
|
||||
size_t classes = 1) const;
|
||||
[[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(
|
||||
bool with_label = false, bool float_label = true, size_t classes = 1,
|
||||
DataSplitMode data_split_mode = DataSplitMode::kRow) const;
|
||||
|
||||
[[nodiscard]] std::shared_ptr<DMatrix> GenerateSparsePageDMatrix(std::string prefix,
|
||||
bool with_label) const;
|
||||
@@ -391,7 +391,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
|
||||
* \brief Make a context that uses CUDA if device >= 0.
|
||||
*/
|
||||
inline Context MakeCUDACtx(std::int32_t device) {
|
||||
if (device == Context::kCpuId) {
|
||||
if (device == DeviceOrd::CPUOrdinal()) {
|
||||
return Context{};
|
||||
}
|
||||
return Context{}.MakeCUDA(device);
|
||||
@@ -501,7 +501,7 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
|
||||
* \brief Make learner model param
|
||||
*/
|
||||
inline LearnerModelParam MakeMP(bst_feature_t n_features, float base_score, uint32_t n_groups,
|
||||
int32_t device = Context::kCpuId) {
|
||||
DeviceOrd device = DeviceOrd::CPU()) {
|
||||
size_t shape[1]{1};
|
||||
LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
|
||||
n_groups, 1, MultiStrategy::kOneOutputPerTree);
|
||||
@@ -571,4 +571,5 @@ class BaseMGPUTest : public ::testing::Test {
|
||||
|
||||
class DeclareUnifiedDistributedTest(MetricTest) : public BaseMGPUTest{};
|
||||
|
||||
inline DeviceOrd FstCU() { return DeviceOrd::CUDA(0); }
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIP_PLATFORM_AMD__)
|
||||
#include "../../src/data/ellpack_page.cuh"
|
||||
#endif
|
||||
@@ -24,8 +29,8 @@ class HistogramCutsWrapper : public common::HistogramCuts {
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
|
||||
int n_rows, int n_cols, bst_float sparsity= 0) {
|
||||
inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(int n_rows, int n_cols,
|
||||
bst_float sparsity = 0) {
|
||||
auto dmat = RandomDataGenerator(n_rows, n_cols, sparsity).Seed(3).GenerateDMatrix();
|
||||
const SparsePage& batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
|
||||
@@ -49,7 +54,7 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(
|
||||
}
|
||||
|
||||
auto page = std::unique_ptr<EllpackPageImpl>(
|
||||
new EllpackPageImpl(0, cmat, batch, dmat->IsDense(), row_stride, {}));
|
||||
new EllpackPageImpl(DeviceOrd::CUDA(0), cmat, batch, dmat->IsDense(), row_stride, {}));
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ inline void VerifyBinaryAUC(DataSplitMode data_split_mode = DataSplitMode::kRow)
|
||||
// Invalid dataset
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
|
||||
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, DeviceOrd::CPU()};
|
||||
float auc = metric->Evaluate({1, 1}, p_fmat);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
*info.labels.Data() = HostDeviceVector<float>{};
|
||||
|
||||
@@ -3,8 +3,7 @@
|
||||
*/
|
||||
#include "test_elementwise_metric.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
namespace xgboost::metric {
|
||||
TEST(Metric, DeclareUnifiedTest(RMSE)) { VerifyRMSE(); }
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(RMSLE)) { VerifyRMSLE(); }
|
||||
@@ -104,5 +103,4 @@ TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileRowSplit) {
|
||||
TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileColumnSplit) {
|
||||
DoTest(VerifyQuantile, DataSplitMode::kCol);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::metric
|
||||
|
||||
@@ -11,9 +11,7 @@
|
||||
#include "../../../src/common/linalg_op.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
|
||||
namespace xgboost::metric {
|
||||
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
|
||||
auto ctx = MakeCUDACtx(device);
|
||||
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
|
||||
@@ -325,14 +323,14 @@ inline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode = DataSplitMode
|
||||
}
|
||||
|
||||
inline void VerifyMultiRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
size_t n_samples = 32, n_targets = 8;
|
||||
linalg::Tensor<float, 2> y{{n_samples, n_targets}, GPUIDX};
|
||||
linalg::Tensor<float, 2> y{{n_samples, n_targets}, ctx.Device()};
|
||||
auto &h_y = y.Data()->HostVector();
|
||||
std::iota(h_y.begin(), h_y.end(), 0);
|
||||
|
||||
HostDeviceVector<float> predt(n_samples * n_targets, 0);
|
||||
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
std::unique_ptr<Metric> metric{Metric::Create("rmse", &ctx)};
|
||||
metric->Configure({});
|
||||
|
||||
@@ -381,5 +379,4 @@ inline void VerifyQuantile(DataSplitMode data_split_mode = DataSplitMode::kRow)
|
||||
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
|
||||
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::metric
|
||||
|
||||
@@ -154,7 +154,7 @@ inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRo
|
||||
|
||||
auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
|
||||
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.Device()};
|
||||
info.num_row_ = info.labels.Shape(0);
|
||||
info.group_ptr_.resize(2);
|
||||
info.group_ptr_[0] = 0;
|
||||
|
||||
@@ -71,7 +71,7 @@ void TestNDCGGPair(Context const* ctx) {
|
||||
|
||||
HostDeviceVector<float> predts{0, 1, 0, 1};
|
||||
MetaInfo info;
|
||||
info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, GPUIDX};
|
||||
info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, ctx->Device()};
|
||||
info.group_ptr_ = {0, 2, 4};
|
||||
info.num_row_ = 4;
|
||||
linalg::Matrix<GradientPair> gpairs;
|
||||
@@ -146,7 +146,7 @@ TEST(LambdaRank, UnbiasedNDCG) {
|
||||
}
|
||||
|
||||
void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt) {
|
||||
out_predt->SetDevice(ctx->gpu_id);
|
||||
out_predt->SetDevice(ctx->Device());
|
||||
MetaInfo& info = *out_info;
|
||||
info.num_row_ = 128;
|
||||
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
|
||||
@@ -243,7 +243,7 @@ void TestMAPStat(Context const* ctx) {
|
||||
|
||||
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
|
||||
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
auto rank_idx =
|
||||
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
@@ -280,7 +280,7 @@ void TestMAPStat(Context const* ctx) {
|
||||
|
||||
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
|
||||
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
auto rank_idx =
|
||||
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../objective_helpers.h"
|
||||
|
||||
TEST(Objective, UnknownFunction) {
|
||||
xgboost::ObjFunction* obj = nullptr;
|
||||
@@ -43,4 +44,61 @@ TEST(Objective, PredTransform) {
|
||||
ASSERT_TRUE(predts.HostCanWrite());
|
||||
}
|
||||
}
|
||||
|
||||
class TestDefaultObjConfig : public ::testing::TestWithParam<std::string> {
|
||||
Context ctx_;
|
||||
|
||||
public:
|
||||
void Run(std::string objective) {
|
||||
auto Xy = MakeFmatForObjTest(objective);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
std::unique_ptr<ObjFunction> objfn{ObjFunction::Create(objective, &ctx_)};
|
||||
|
||||
learner->SetParam("objective", objective);
|
||||
if (objective.find("multi") != std::string::npos) {
|
||||
learner->SetParam("num_class", "3");
|
||||
objfn->Configure(Args{{"num_class", "3"}});
|
||||
} else if (objective.find("quantile") != std::string::npos) {
|
||||
learner->SetParam("quantile_alpha", "0.5");
|
||||
objfn->Configure(Args{{"quantile_alpha", "0.5"}});
|
||||
} else {
|
||||
objfn->Configure(Args{});
|
||||
}
|
||||
learner->Configure();
|
||||
learner->UpdateOneIter(0, Xy);
|
||||
learner->EvalOneIter(0, {Xy}, {"train"});
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto jobj = get<Object const>(config["learner"]["objective"]);
|
||||
|
||||
ASSERT_TRUE(jobj.find("name") != jobj.cend());
|
||||
// FIXME(jiamingy): We should have the following check, but some legacy parameter like
|
||||
// "pos_weight", "delta_step" in objectives are not in metrics.
|
||||
|
||||
// if (jobj.size() > 1) {
|
||||
// ASSERT_FALSE(IsA<Null>(objfn->DefaultMetricConfig()));
|
||||
// }
|
||||
auto mconfig = objfn->DefaultMetricConfig();
|
||||
if (!IsA<Null>(mconfig)) {
|
||||
// make sure metric can handle it
|
||||
std::unique_ptr<Metric> metricfn{Metric::Create(get<String const>(mconfig["name"]), &ctx_)};
|
||||
metricfn->LoadConfig(mconfig);
|
||||
Json loaded(Object{});
|
||||
metricfn->SaveConfig(&loaded);
|
||||
metricfn->Configure(Args{});
|
||||
ASSERT_EQ(mconfig, loaded);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(TestDefaultObjConfig, Objective) {
|
||||
std::string objective = GetParam();
|
||||
this->Run(objective);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(Objective, TestDefaultObjConfig,
|
||||
::testing::ValuesIn(MakeObjNamesForTest()),
|
||||
[](const ::testing::TestParamInfo<TestDefaultObjConfig::ParamType>& info) {
|
||||
return ObjTestNameGenerator(info);
|
||||
});
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -45,7 +45,7 @@ TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {
|
||||
MetaInfo info;
|
||||
info.num_row_ = 10;
|
||||
info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
|
||||
data->SetDevice(ctx.gpu_id);
|
||||
data->SetDevice(ctx.Device());
|
||||
data->Resize(info.num_row_);
|
||||
shape[0] = info.num_row_;
|
||||
shape[1] = 1;
|
||||
|
||||
31
tests/cpp/objective_helpers.cc
Normal file
31
tests/cpp/objective_helpers.cc
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Copyright (c) 2023, XGBoost contributors
|
||||
*/
|
||||
#include "objective_helpers.h"
|
||||
|
||||
#include "../../src/common/linalg_op.h" // for begin, end
|
||||
#include "helpers.h" // for RandomDataGenerator
|
||||
|
||||
namespace xgboost {
|
||||
std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj) {
|
||||
auto constexpr kRows = 10, kCols = 10;
|
||||
auto p_fmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
||||
auto& h_upper = p_fmat->Info().labels_upper_bound_.HostVector();
|
||||
auto& h_lower = p_fmat->Info().labels_lower_bound_.HostVector();
|
||||
h_lower.resize(kRows);
|
||||
h_upper.resize(kRows);
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
h_lower[i] = 1;
|
||||
h_upper[i] = 10;
|
||||
}
|
||||
if (obj.find("rank:") != std::string::npos) {
|
||||
auto h_label = p_fmat->Info().labels.HostView();
|
||||
std::size_t k = 0;
|
||||
for (auto& v : h_label) {
|
||||
v = k % 2 == 0;
|
||||
++k;
|
||||
}
|
||||
}
|
||||
return p_fmat;
|
||||
};
|
||||
} // namespace xgboost
|
||||
@@ -1,6 +1,8 @@
|
||||
/**
|
||||
* Copyright (c) 2023, XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <dmlc/registry.h> // for Registry
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/objective.h> // for ObjFunctionReg
|
||||
@@ -29,4 +31,6 @@ inline std::string ObjTestNameGenerator(const ::testing::TestParamInfo<ParamType
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj);
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -23,7 +23,7 @@ class ServerForTest {
|
||||
std::unique_ptr<grpc::Server> server_;
|
||||
|
||||
public:
|
||||
explicit ServerForTest(std::int32_t world_size) {
|
||||
explicit ServerForTest(std::size_t world_size) {
|
||||
server_thread_.reset(new std::thread([this, world_size] {
|
||||
grpc::ServerBuilder builder;
|
||||
xgboost::federated::FederatedService service{world_size};
|
||||
|
||||
@@ -19,6 +19,11 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
|
||||
CheckAllgather(comm, rank);
|
||||
}
|
||||
|
||||
static void VerifyAllgatherV(int rank, const std::string &server_address) {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
CheckAllgatherV(comm, rank);
|
||||
}
|
||||
|
||||
static void VerifyAllreduce(int rank, const std::string &server_address) {
|
||||
FederatedCommunicator comm{kWorldSize, rank, server_address};
|
||||
CheckAllreduce(comm);
|
||||
@@ -31,14 +36,19 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
|
||||
|
||||
protected:
|
||||
static void CheckAllgather(FederatedCommunicator &comm, int rank) {
|
||||
int buffer[kWorldSize] = {0, 0};
|
||||
buffer[rank] = rank;
|
||||
comm.AllGather(buffer, sizeof(buffer));
|
||||
std::string input{static_cast<char>('0' + rank)};
|
||||
auto output = comm.AllGather(input);
|
||||
for (auto i = 0; i < kWorldSize; i++) {
|
||||
EXPECT_EQ(buffer[i], i);
|
||||
EXPECT_EQ(output[i], static_cast<char>('0' + i));
|
||||
}
|
||||
}
|
||||
|
||||
static void CheckAllgatherV(FederatedCommunicator &comm, int rank) {
|
||||
std::vector<std::string_view> inputs{"Federated", " Learning!!!"};
|
||||
auto output = comm.AllGatherV(inputs[rank]);
|
||||
EXPECT_EQ(output, "Federated Learning!!!");
|
||||
}
|
||||
|
||||
static void CheckAllreduce(FederatedCommunicator &comm) {
|
||||
int buffer[] = {1, 2, 3, 4, 5};
|
||||
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kSum);
|
||||
@@ -119,6 +129,16 @@ TEST_F(FederatedCommunicatorTest, Allgather) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FederatedCommunicatorTest, AllgatherV) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(&FederatedCommunicatorTest::VerifyAllgatherV, rank, server_->Address());
|
||||
}
|
||||
for (auto &thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FederatedCommunicatorTest, Allreduce) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
|
||||
@@ -120,6 +120,11 @@ TEST_P(VerticalFederatedLearnerTest, Hist) {
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST_P(VerticalFederatedLearnerTest, GPUApprox) {
|
||||
std::string objective = GetParam();
|
||||
this->Run("approx", "cuda:0", objective);
|
||||
}
|
||||
|
||||
TEST_P(VerticalFederatedLearnerTest, GPUHist) {
|
||||
std::string objective = GetParam();
|
||||
this->Run("hist", "cuda:0", objective);
|
||||
|
||||
@@ -18,6 +18,11 @@ class FederatedServerTest : public BaseFederatedTest {
|
||||
CheckAllgather(client, rank);
|
||||
}
|
||||
|
||||
static void VerifyAllgatherV(int rank, const std::string& server_address) {
|
||||
federated::FederatedClient client{server_address, rank};
|
||||
CheckAllgatherV(client, rank);
|
||||
}
|
||||
|
||||
static void VerifyAllreduce(int rank, const std::string& server_address) {
|
||||
federated::FederatedClient client{server_address, rank};
|
||||
CheckAllreduce(client);
|
||||
@@ -39,8 +44,7 @@ class FederatedServerTest : public BaseFederatedTest {
|
||||
|
||||
protected:
|
||||
static void CheckAllgather(federated::FederatedClient& client, int rank) {
|
||||
int data[kWorldSize] = {0, 0};
|
||||
data[rank] = rank;
|
||||
int data[] = {rank};
|
||||
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
|
||||
auto reply = client.Allgather(send_buffer);
|
||||
auto const* result = reinterpret_cast<int const*>(reply.data());
|
||||
@@ -49,6 +53,12 @@ class FederatedServerTest : public BaseFederatedTest {
|
||||
}
|
||||
}
|
||||
|
||||
static void CheckAllgatherV(federated::FederatedClient& client, int rank) {
|
||||
std::vector<std::string_view> inputs{"Hello,", " World!"};
|
||||
auto reply = client.AllgatherV(inputs[rank]);
|
||||
EXPECT_EQ(reply, "Hello, World!");
|
||||
}
|
||||
|
||||
static void CheckAllreduce(federated::FederatedClient& client) {
|
||||
int data[] = {1, 2, 3, 4, 5};
|
||||
std::string send_buffer(reinterpret_cast<char const*>(data), sizeof(data));
|
||||
@@ -80,6 +90,16 @@ TEST_F(FederatedServerTest, Allgather) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FederatedServerTest, AllgatherV) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(&FederatedServerTest::VerifyAllgatherV, rank, server_->Address());
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FederatedServerTest, Allreduce) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
|
||||
@@ -127,8 +127,8 @@ TEST(CpuPredictor, IterationRange) {
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, IterationRangeColmnSplit) {
|
||||
Context ctx;
|
||||
TestIterationRangeColumnSplit(&ctx);
|
||||
auto constexpr kWorldSize = 2;
|
||||
TestIterationRangeColumnSplit(kWorldSize, false);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, ExternalMemory) {
|
||||
@@ -142,7 +142,7 @@ TEST(CpuPredictor, InplacePredict) {
|
||||
bst_row_t constexpr kRows{128};
|
||||
bst_feature_t constexpr kCols{64};
|
||||
Context ctx;
|
||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.gpu_id);
|
||||
auto gen = RandomDataGenerator{kRows, kCols, 0.5}.Device(ctx.Device());
|
||||
{
|
||||
HostDeviceVector<float> data;
|
||||
gen.GenerateDense(&data);
|
||||
@@ -226,23 +226,21 @@ TEST(CPUPredictor, GHistIndexTraining) {
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPrediction) {
|
||||
Context ctx;
|
||||
TestCategoricalPrediction(&ctx, false);
|
||||
TestCategoricalPrediction(false, false);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictionColumnSplit) {
|
||||
Context ctx;
|
||||
TestCategoricalPredictionColumnSplit(&ctx);
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPrediction, false, true);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictLeaf) {
|
||||
Context ctx;
|
||||
TestCategoricalPredictLeaf(&ctx, false);
|
||||
TestCategoricalPredictLeaf(false, false);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {
|
||||
Context ctx;
|
||||
TestCategoricalPredictLeafColumnSplit(&ctx);
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, false, true);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, UpdatePredictionCache) {
|
||||
@@ -256,8 +254,8 @@ TEST(CpuPredictor, LesserFeatures) {
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, LesserFeaturesColumnSplit) {
|
||||
Context ctx;
|
||||
TestPredictionWithLesserFeaturesColumnSplit(&ctx);
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestPredictionWithLesserFeaturesColumnSplit, false);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, Sparse) {
|
||||
@@ -267,9 +265,9 @@ TEST(CpuPredictor, Sparse) {
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, SparseColumnSplit) {
|
||||
Context ctx;
|
||||
TestSparsePredictionColumnSplit(&ctx, 0.2);
|
||||
TestSparsePredictionColumnSplit(&ctx, 0.8);
|
||||
auto constexpr kWorldSize = 2;
|
||||
TestSparsePredictionColumnSplit(kWorldSize, false, 0.2);
|
||||
TestSparsePredictionColumnSplit(kWorldSize, false, 0.8);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, Multi) {
|
||||
|
||||
@@ -38,7 +38,7 @@ TEST(GPUPredictor, Basic) {
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -74,7 +74,7 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -102,7 +102,7 @@ TEST_F(MGPUPredictorTest, BasicColumnSplit) {
|
||||
size_t n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -123,8 +123,10 @@ TEST(GPUPredictor, EllpackBasic) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
for (size_t bins = 2; bins < 258; bins += 16) {
|
||||
size_t rows = bins * 16;
|
||||
auto p_m =
|
||||
RandomDataGenerator{rows, kCols, 0.0}.Bins(bins).Device(0).GenerateDeviceDMatrix(false);
|
||||
auto p_m = RandomDataGenerator{rows, kCols, 0.0}
|
||||
.Bins(bins)
|
||||
.Device(DeviceOrd::CUDA(0))
|
||||
.GenerateDeviceDMatrix(false);
|
||||
ASSERT_FALSE(p_m->PageExists<SparsePage>());
|
||||
TestPredictionFromGradientIndex<EllpackPage>(&ctx, rows, kCols, p_m);
|
||||
TestPredictionFromGradientIndex<EllpackPage>(&ctx, bins, kCols, p_m);
|
||||
@@ -136,11 +138,11 @@ TEST(GPUPredictor, EllpackTraining) {
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}
|
||||
.Bins(kBins)
|
||||
.Device(ctx.Ordinal())
|
||||
.Device(ctx.Device())
|
||||
.GenerateDeviceDMatrix(false);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar =
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Ordinal()).GenerateArrayInterface(&storage);
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Device()).GenerateArrayInterface(&storage);
|
||||
auto adapter = data::CupyAdapter(columnar);
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
@@ -155,7 +157,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
|
||||
const int n_classes = 3;
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Device())};
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
|
||||
std::vector<std::unique_ptr<DMatrix>> dmats;
|
||||
@@ -166,7 +168,7 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
|
||||
for (const auto& dmat: dmats) {
|
||||
dmat->Info().base_margin_ = decltype(dmat->Info().base_margin_){
|
||||
{dmat->Info().num_row_, static_cast<size_t>(n_classes)}, 0};
|
||||
{dmat->Info().num_row_, static_cast<size_t>(n_classes)}, DeviceOrd::CUDA(0)};
|
||||
dmat->Info().base_margin_.Data()->Fill(0.5);
|
||||
PredictionCacheEntry out_predictions;
|
||||
gpu_predictor->InitOutPredictions(dmat->Info(), &out_predictions.predictions, model);
|
||||
@@ -185,7 +187,7 @@ TEST(GPUPredictor, InplacePredictCupy) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(ctx.Ordinal());
|
||||
gen.Device(ctx.Device());
|
||||
HostDeviceVector<float> data;
|
||||
std::string interface_str = gen.GenerateArrayInterface(&data);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
@@ -197,7 +199,7 @@ TEST(GPUPredictor, InplacePredictCuDF) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(ctx.Ordinal());
|
||||
gen.Device(ctx.Device());
|
||||
std::vector<HostDeviceVector<float>> storage(kCols);
|
||||
auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
@@ -210,6 +212,10 @@ TEST(GpuPredictor, LesserFeatures) {
|
||||
TestPredictionWithLesserFeatures(&ctx);
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, LesserFeaturesColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, TestPredictionWithLesserFeaturesColumnSplit, true);
|
||||
}
|
||||
|
||||
// Very basic test of empty model
|
||||
TEST(GPUPredictor, ShapStump) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
@@ -219,7 +225,7 @@ TEST(GPUPredictor, ShapStump) {
|
||||
#endif
|
||||
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
@@ -245,7 +251,7 @@ TEST(GPUPredictor, ShapStump) {
|
||||
|
||||
TEST(GPUPredictor, Shap) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Device())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
@@ -278,19 +284,29 @@ TEST(GPUPredictor, IterationRange) {
|
||||
TestIterationRange(&ctx);
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, IterationRangeColumnSplit) {
|
||||
TestIterationRangeColumnSplit(world_size_, true);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, CategoricalPrediction) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestCategoricalPrediction(&ctx, false);
|
||||
TestCategoricalPrediction(true, false);
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, CategoricalPredictionColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, TestCategoricalPrediction, true, true);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, CategoricalPredictLeaf) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestCategoricalPredictLeaf(&ctx, false);
|
||||
TestCategoricalPredictLeaf(true, false);
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, true, true);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, PredictLeafBasic) {
|
||||
size_t constexpr kRows = 5, kCols = 5;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(0).GenerateDMatrix();
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Device(DeviceOrd::CUDA(0)).GenerateDMatrix();
|
||||
auto lparam = MakeCUDACtx(GPUIDX);
|
||||
std::unique_ptr<Predictor> gpu_predictor =
|
||||
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
|
||||
@@ -313,4 +329,9 @@ TEST(GPUPredictor, Sparse) {
|
||||
TestSparsePrediction(&ctx, 0.2);
|
||||
TestSparsePrediction(&ctx, 0.8);
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, SparseColumnSplit) {
|
||||
TestSparsePredictionColumnSplit(world_size_, true, 0.2);
|
||||
TestSparsePredictionColumnSplit(world_size_, true, 0.8);
|
||||
}
|
||||
} // namespace xgboost::predictor
|
||||
|
||||
@@ -34,7 +34,7 @@ TEST(Predictor, PredictionCache) {
|
||||
// Add a cache that is immediately expired.
|
||||
auto add_cache = [&]() {
|
||||
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
|
||||
container.Cache(p_dmat, Context::kCpuId);
|
||||
container.Cache(p_dmat, DeviceOrd::CPU());
|
||||
m = p_dmat.get();
|
||||
};
|
||||
|
||||
@@ -93,7 +93,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
|
||||
void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
|
||||
bst_feature_t cols) {
|
||||
std::size_t constexpr kClasses { 4 };
|
||||
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->gpu_id);
|
||||
auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(ctx->Device());
|
||||
std::shared_ptr<DMatrix> m = gen.GenerateDMatrix(true, false, kClasses);
|
||||
|
||||
std::unique_ptr<Learner> learner {
|
||||
@@ -172,16 +172,6 @@ void VerifyPredictionWithLesserFeatures(Learner *learner, bst_row_t kRows,
|
||||
ASSERT_THROW({ learner->Predict(m_invalid, false, &prediction, 0, 0); }, dmlc::Error);
|
||||
}
|
||||
|
||||
void VerifyPredictionWithLesserFeaturesColumnSplit(Learner *learner, size_t rows,
|
||||
std::shared_ptr<DMatrix> m_test,
|
||||
std::shared_ptr<DMatrix> m_invalid) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::shared_ptr<DMatrix> sliced_test{m_test->SliceCol(world_size, rank)};
|
||||
std::shared_ptr<DMatrix> sliced_invalid{m_invalid->SliceCol(world_size, rank)};
|
||||
|
||||
VerifyPredictionWithLesserFeatures(learner, rows, sliced_test, sliced_invalid);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
void TestPredictionWithLesserFeatures(Context const *ctx) {
|
||||
@@ -202,7 +192,7 @@ void TestPredictionDeviceAccess() {
|
||||
|
||||
HostDeviceVector<float> from_cpu;
|
||||
{
|
||||
ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId);
|
||||
ASSERT_TRUE(from_cpu.Device().IsCPU());
|
||||
Context cpu_ctx;
|
||||
learner->SetParam("device", cpu_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cpu, 0, 0);
|
||||
@@ -216,7 +206,7 @@ void TestPredictionDeviceAccess() {
|
||||
Context cuda_ctx = MakeCUDACtx(0);
|
||||
learner->SetParam("device", cuda_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cuda, 0, 0);
|
||||
ASSERT_EQ(from_cuda.DeviceIdx(), 0);
|
||||
ASSERT_EQ(from_cuda.Device(), DeviceOrd::CUDA(0));
|
||||
ASSERT_TRUE(from_cuda.DeviceCanWrite());
|
||||
ASSERT_FALSE(from_cuda.HostCanRead());
|
||||
}
|
||||
@@ -229,16 +219,24 @@ void TestPredictionDeviceAccess() {
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
void TestPredictionWithLesserFeaturesColumnSplit(Context const *ctx) {
|
||||
size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;
|
||||
auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).GenerateDMatrix(true);
|
||||
auto learner = LearnerForTest(ctx, m_train, kIters);
|
||||
void TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
|
||||
std::size_t constexpr kRows = 256, kTrainCols = 256, kTestCols = 4, kIters = 4;
|
||||
auto m_train = RandomDataGenerator(kRows, kTrainCols, 0.5).Seed(rank).GenerateDMatrix(true);
|
||||
Context ctx;
|
||||
if (use_gpu) {
|
||||
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : rank);
|
||||
}
|
||||
auto learner = LearnerForTest(&ctx, m_train, kIters);
|
||||
auto m_test = RandomDataGenerator(kRows, kTestCols, 0.5).GenerateDMatrix(false);
|
||||
auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false);
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, VerifyPredictionWithLesserFeaturesColumnSplit,
|
||||
learner.get(), kRows, m_test, m_invalid);
|
||||
std::shared_ptr<DMatrix> sliced_test{m_test->SliceCol(world_size, rank)};
|
||||
std::shared_ptr<DMatrix> sliced_invalid{m_invalid->SliceCol(world_size, rank)};
|
||||
|
||||
VerifyPredictionWithLesserFeatures(learner.get(), kRows, sliced_test, sliced_invalid);
|
||||
}
|
||||
|
||||
void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind,
|
||||
@@ -260,7 +258,11 @@ void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind,
|
||||
model->CommitModelGroup(std::move(trees), 0);
|
||||
}
|
||||
|
||||
void TestCategoricalPrediction(Context const* ctx, bool is_column_split) {
|
||||
void TestCategoricalPrediction(bool use_gpu, bool is_column_split) {
|
||||
Context ctx;
|
||||
if (use_gpu) {
|
||||
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
|
||||
}
|
||||
size_t constexpr kCols = 10;
|
||||
PredictionCacheEntry out_predictions;
|
||||
|
||||
@@ -270,10 +272,10 @@ void TestCategoricalPrediction(Context const* ctx, bool is_column_split) {
|
||||
float left_weight = 1.3f;
|
||||
float right_weight = 1.7f;
|
||||
|
||||
gbm::GBTreeModel model(&mparam, ctx);
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
|
||||
|
||||
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
|
||||
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};
|
||||
|
||||
std::vector<float> row(kCols);
|
||||
row[split_ind] = split_cat;
|
||||
@@ -303,12 +305,11 @@ void TestCategoricalPrediction(Context const* ctx, bool is_column_split) {
|
||||
ASSERT_EQ(out_predictions.predictions.HostVector()[0], left_weight + score);
|
||||
}
|
||||
|
||||
void TestCategoricalPredictionColumnSplit(Context const *ctx) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPrediction, ctx, true);
|
||||
}
|
||||
|
||||
void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
|
||||
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split) {
|
||||
Context ctx;
|
||||
if (use_gpu) {
|
||||
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
|
||||
}
|
||||
size_t constexpr kCols = 10;
|
||||
PredictionCacheEntry out_predictions;
|
||||
|
||||
@@ -319,10 +320,10 @@ void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
|
||||
float left_weight = 1.3f;
|
||||
float right_weight = 1.7f;
|
||||
|
||||
gbm::GBTreeModel model(&mparam, ctx);
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
GBTreeModelForTest(&model, split_ind, split_cat, left_weight, right_weight);
|
||||
|
||||
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
|
||||
std::unique_ptr<Predictor> predictor{CreatePredictorForTest(&ctx)};
|
||||
|
||||
std::vector<float> row(kCols);
|
||||
row[split_ind] = split_cat;
|
||||
@@ -347,15 +348,10 @@ void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
|
||||
ASSERT_EQ(out_predictions.predictions.HostVector()[0], 1);
|
||||
}
|
||||
|
||||
void TestCategoricalPredictLeafColumnSplit(Context const *ctx) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, ctx, true);
|
||||
}
|
||||
|
||||
void TestIterationRange(Context const* ctx) {
|
||||
size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0)
|
||||
.Device(ctx->gpu_id)
|
||||
.Device(ctx->Device())
|
||||
.GenerateDMatrix(true, true, kClasses);
|
||||
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
|
||||
|
||||
@@ -411,15 +407,30 @@ void TestIterationRange(Context const* ctx) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifyIterationRangeColumnSplit(DMatrix *dmat, Learner *learner, Learner *sliced,
|
||||
void VerifyIterationRangeColumnSplit(bool use_gpu, Json const &ranged_model,
|
||||
Json const &sliced_model, std::size_t rows, std::size_t cols,
|
||||
std::size_t classes,
|
||||
std::vector<float> const &expected_margin_ranged,
|
||||
std::vector<float> const &expected_margin_sliced,
|
||||
std::vector<float> const &expected_leaf_ranged,
|
||||
std::vector<float> const &expected_leaf_sliced) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
Context ctx;
|
||||
if (use_gpu) {
|
||||
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : rank);
|
||||
}
|
||||
auto dmat = RandomDataGenerator(rows, cols, 0).GenerateDMatrix(true, true, classes);
|
||||
std::shared_ptr<DMatrix> Xy{dmat->SliceCol(world_size, rank)};
|
||||
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
learner->SetParam("device", ctx.DeviceName());
|
||||
learner->LoadModel(ranged_model);
|
||||
|
||||
std::unique_ptr<Learner> sliced{Learner::Create({Xy})};
|
||||
sliced->SetParam("device", ctx.DeviceName());
|
||||
sliced->LoadModel(sliced_model);
|
||||
|
||||
HostDeviceVector<float> out_predt_sliced;
|
||||
HostDeviceVector<float> out_predt_ranged;
|
||||
|
||||
@@ -428,11 +439,15 @@ void VerifyIterationRangeColumnSplit(DMatrix *dmat, Learner *learner, Learner *s
|
||||
sliced->Predict(Xy, true, &out_predt_sliced, 0, 0, false, false, false, false, false);
|
||||
learner->Predict(Xy, true, &out_predt_ranged, 0, 3, false, false, false, false, false);
|
||||
auto const &h_sliced = out_predt_sliced.HostVector();
|
||||
auto const &h_range = out_predt_ranged.HostVector();
|
||||
ASSERT_EQ(h_sliced.size(), expected_margin_sliced.size());
|
||||
ASSERT_EQ(h_sliced, expected_margin_sliced);
|
||||
ASSERT_EQ(h_range.size(), expected_margin_ranged.size());
|
||||
ASSERT_EQ(h_range, expected_margin_ranged);
|
||||
auto const &h_ranged = out_predt_ranged.HostVector();
|
||||
EXPECT_EQ(h_sliced.size(), expected_margin_sliced.size());
|
||||
for (std::size_t i = 0; i < expected_margin_sliced.size(); ++i) {
|
||||
ASSERT_FLOAT_EQ(h_sliced[i], expected_margin_sliced[i]) << "rank " << rank << ", i " << i;
|
||||
}
|
||||
EXPECT_EQ(h_ranged.size(), expected_margin_ranged.size());
|
||||
for (std::size_t i = 0; i < expected_margin_ranged.size(); ++i) {
|
||||
ASSERT_FLOAT_EQ(h_ranged[i], expected_margin_ranged[i]) << "rank " << rank << ", i " << i;
|
||||
}
|
||||
}
|
||||
|
||||
// Leaf
|
||||
@@ -440,21 +455,27 @@ void VerifyIterationRangeColumnSplit(DMatrix *dmat, Learner *learner, Learner *s
|
||||
sliced->Predict(Xy, false, &out_predt_sliced, 0, 0, false, true, false, false, false);
|
||||
learner->Predict(Xy, false, &out_predt_ranged, 0, 3, false, true, false, false, false);
|
||||
auto const &h_sliced = out_predt_sliced.HostVector();
|
||||
auto const &h_range = out_predt_ranged.HostVector();
|
||||
ASSERT_EQ(h_sliced.size(), expected_leaf_sliced.size());
|
||||
ASSERT_EQ(h_sliced, expected_leaf_sliced);
|
||||
ASSERT_EQ(h_range.size(), expected_leaf_ranged.size());
|
||||
ASSERT_EQ(h_range, expected_leaf_ranged);
|
||||
auto const &h_ranged = out_predt_ranged.HostVector();
|
||||
EXPECT_EQ(h_sliced.size(), expected_leaf_sliced.size());
|
||||
for (std::size_t i = 0; i < expected_leaf_sliced.size(); ++i) {
|
||||
ASSERT_FLOAT_EQ(h_sliced[i], expected_leaf_sliced[i]) << "rank " << rank << ", i " << i;
|
||||
}
|
||||
EXPECT_EQ(h_ranged.size(), expected_leaf_ranged.size());
|
||||
for (std::size_t i = 0; i < expected_leaf_ranged.size(); ++i) {
|
||||
ASSERT_FLOAT_EQ(h_ranged[i], expected_leaf_ranged[i]) << "rank " << rank << ", i " << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
void TestIterationRangeColumnSplit(Context const* ctx) {
|
||||
size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
|
||||
void TestIterationRangeColumnSplit(int world_size, bool use_gpu) {
|
||||
std::size_t constexpr kRows = 1000, kCols = 20, kClasses = 4, kForest = 3, kIters = 10;
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
|
||||
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
|
||||
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
Context ctx;
|
||||
if (use_gpu) {
|
||||
ctx = MakeCUDACtx(0);
|
||||
}
|
||||
auto learner = LearnerForTest(&ctx, dmat, kIters, kForest);
|
||||
|
||||
bool bound = false;
|
||||
std::unique_ptr<Learner> sliced{learner->Slice(0, 3, 1, &bound)};
|
||||
@@ -476,9 +497,13 @@ void TestIterationRangeColumnSplit(Context const* ctx) {
|
||||
auto const &leaf_sliced = leaf_predt_sliced.HostVector();
|
||||
auto const &leaf_ranged = leaf_predt_ranged.HostVector();
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, VerifyIterationRangeColumnSplit, dmat.get(),
|
||||
learner.get(), sliced.get(), margin_ranged, margin_sliced,
|
||||
Json ranged_model{Object{}};
|
||||
learner->SaveModel(&ranged_model);
|
||||
Json sliced_model{Object{}};
|
||||
sliced->SaveModel(&sliced_model);
|
||||
|
||||
RunWithInMemoryCommunicator(world_size, VerifyIterationRangeColumnSplit, use_gpu, ranged_model,
|
||||
sliced_model, kRows, kCols, kClasses, margin_ranged, margin_sliced,
|
||||
leaf_ranged, leaf_sliced);
|
||||
}
|
||||
|
||||
@@ -497,7 +522,7 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
|
||||
|
||||
if (ctx->IsCUDA()) {
|
||||
learner->SetParam("tree_method", "gpu_hist");
|
||||
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
|
||||
learner->SetParam("device", ctx->Device().Name());
|
||||
}
|
||||
learner->Predict(Xy, false, &sparse_predt, 0, 0);
|
||||
|
||||
@@ -539,11 +564,20 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
void VerifySparsePredictionColumnSplit(DMatrix *dmat, Learner *learner,
|
||||
void VerifySparsePredictionColumnSplit(bool use_gpu, Json const &model, std::size_t rows,
|
||||
std::size_t cols, float sparsity,
|
||||
std::vector<float> const &expected_predt) {
|
||||
std::shared_ptr<DMatrix> sliced{
|
||||
dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
Context ctx;
|
||||
if (use_gpu) {
|
||||
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
|
||||
}
|
||||
auto Xy = RandomDataGenerator(rows, cols, sparsity).GenerateDMatrix(true);
|
||||
std::shared_ptr<DMatrix> sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
HostDeviceVector<float> sparse_predt;
|
||||
|
||||
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
||||
learner->SetParam("device", ctx.DeviceName());
|
||||
learner->LoadModel(model);
|
||||
learner->Predict(sliced, false, &sparse_predt, 0, 0);
|
||||
|
||||
auto const &predt = sparse_predt.HostVector();
|
||||
@@ -554,10 +588,14 @@ void VerifySparsePredictionColumnSplit(DMatrix *dmat, Learner *learner,
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity) {
|
||||
void TestSparsePredictionColumnSplit(int world_size, bool use_gpu, float sparsity) {
|
||||
Context ctx;
|
||||
if (use_gpu) {
|
||||
ctx = MakeCUDACtx(0);
|
||||
}
|
||||
size_t constexpr kRows = 512, kCols = 128, kIters = 4;
|
||||
auto Xy = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix(true);
|
||||
auto learner = LearnerForTest(ctx, Xy, kIters);
|
||||
auto learner = LearnerForTest(&ctx, Xy, kIters);
|
||||
|
||||
HostDeviceVector<float> sparse_predt;
|
||||
|
||||
@@ -567,12 +605,11 @@ void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity) {
|
||||
learner.reset(Learner::Create({Xy}));
|
||||
learner->LoadModel(model);
|
||||
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->SetParam("device", ctx.DeviceName());
|
||||
learner->Predict(Xy, false, &sparse_predt, 0, 0);
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, VerifySparsePredictionColumnSplit, Xy.get(),
|
||||
learner.get(), sparse_predt.HostVector());
|
||||
RunWithInMemoryCommunicator(world_size, VerifySparsePredictionColumnSplit, use_gpu, model,
|
||||
kRows, kCols, sparsity, sparse_predt.HostVector());
|
||||
}
|
||||
|
||||
void TestVectorLeafPrediction(Context const *ctx) {
|
||||
@@ -583,7 +620,7 @@ void TestVectorLeafPrediction(Context const *ctx) {
|
||||
size_t constexpr kCols = 5;
|
||||
|
||||
LearnerModelParam mparam{static_cast<bst_feature_t>(kCols),
|
||||
linalg::Vector<float>{{0.5}, {1}, Context::kCpuId}, 1, 3,
|
||||
linalg::Vector<float>{{0.5}, {1}, DeviceOrd::CPU()}, 1, 3,
|
||||
MultiStrategy::kMultiOutputTree};
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
|
||||
@@ -94,23 +94,19 @@ void TestPredictionWithLesserFeatures(Context const* ctx);
|
||||
|
||||
void TestPredictionDeviceAccess();
|
||||
|
||||
void TestCategoricalPrediction(Context const* ctx, bool is_column_split);
|
||||
void TestCategoricalPrediction(bool use_gpu, bool is_column_split);
|
||||
|
||||
void TestCategoricalPredictionColumnSplit(Context const* ctx);
|
||||
void TestPredictionWithLesserFeaturesColumnSplit(bool use_gpu);
|
||||
|
||||
void TestPredictionWithLesserFeaturesColumnSplit(Context const* ctx);
|
||||
|
||||
void TestCategoricalPredictLeaf(Context const* ctx, bool is_column_split);
|
||||
|
||||
void TestCategoricalPredictLeafColumnSplit(Context const* ctx);
|
||||
void TestCategoricalPredictLeaf(bool use_gpu, bool is_column_split);
|
||||
|
||||
void TestIterationRange(Context const* ctx);
|
||||
|
||||
void TestIterationRangeColumnSplit(Context const* ctx);
|
||||
void TestIterationRangeColumnSplit(int world_size, bool use_gpu);
|
||||
|
||||
void TestSparsePrediction(Context const* ctx, float sparsity);
|
||||
|
||||
void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity);
|
||||
void TestSparsePredictionColumnSplit(int world_size, bool use_gpu, float sparsity);
|
||||
|
||||
void TestVectorLeafPrediction(Context const* ctx);
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -5,11 +5,13 @@
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/context.h>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Context, CPU) {
|
||||
Context ctx;
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
|
||||
ASSERT_EQ(ctx.Ordinal(), Context::kCpuId);
|
||||
ASSERT_EQ(ctx.Ordinal(), DeviceOrd::CPUOrdinal());
|
||||
|
||||
std::int32_t flag{0};
|
||||
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
|
||||
@@ -27,5 +29,20 @@ TEST(Context, CPU) {
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":gpu"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ""}}), dmlc::Error);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << ctx.Device();
|
||||
ASSERT_EQ(ss.str(), "cpu");
|
||||
}
|
||||
|
||||
TEST(Context, ErrorInit) {
|
||||
Context ctx;
|
||||
ASSERT_THROW({ ctx.Init({{"foo", "bar"}}); }, dmlc::Error);
|
||||
try {
|
||||
ctx.Init({{"foo", "bar"}});
|
||||
} catch (dmlc::Error const& e) {
|
||||
auto msg = std::string{e.what()};
|
||||
ASSERT_NE(msg.find("foo"), std::string::npos);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
|
||||
ASSERT_EQ(ctx.gpu_id, ord);
|
||||
ASSERT_EQ(ctx.Device().ordinal, ord);
|
||||
ASSERT_EQ(ctx.DeviceName(), "cuda:" + std::to_string(ord));
|
||||
ASSERT_EQ(ctx.Ordinal(), ord);
|
||||
@@ -25,7 +24,7 @@ void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
|
||||
Context new_ctx;
|
||||
FromJson(jctx, &new_ctx);
|
||||
ASSERT_EQ(new_ctx.Device(), ctx.Device());
|
||||
ASSERT_EQ(new_ctx.gpu_id, ctx.gpu_id);
|
||||
ASSERT_EQ(new_ctx.Ordinal(), ctx.Ordinal());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -53,7 +52,7 @@ TEST(Context, DeviceOrdinal) {
|
||||
|
||||
auto cpu_ctx = ctx.MakeCPU();
|
||||
ASSERT_TRUE(cpu_ctx.IsCPU());
|
||||
ASSERT_EQ(cpu_ctx.Ordinal(), Context::kCpuId);
|
||||
ASSERT_EQ(cpu_ctx.Ordinal(), DeviceOrd::CPUOrdinal());
|
||||
ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());
|
||||
|
||||
auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());
|
||||
|
||||
@@ -655,33 +655,11 @@ TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }
|
||||
TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); }
|
||||
|
||||
class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
||||
static auto MakeFmat(std::string const& obj) {
|
||||
auto constexpr kRows = 10, kCols = 10;
|
||||
auto p_fmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
||||
auto& h_upper = p_fmat->Info().labels_upper_bound_.HostVector();
|
||||
auto& h_lower = p_fmat->Info().labels_lower_bound_.HostVector();
|
||||
h_lower.resize(kRows);
|
||||
h_upper.resize(kRows);
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
h_lower[i] = 1;
|
||||
h_upper[i] = 10;
|
||||
}
|
||||
if (obj.find("rank:") != std::string::npos) {
|
||||
auto h_label = p_fmat->Info().labels.HostView();
|
||||
std::size_t k = 0;
|
||||
for (auto& v : h_label) {
|
||||
v = k % 2 == 0;
|
||||
++k;
|
||||
}
|
||||
}
|
||||
return p_fmat;
|
||||
};
|
||||
|
||||
void TestBaseScore(std::string objective, float expected_base_score, Json expected_model) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
|
||||
auto p_fmat = MakeFmat(objective);
|
||||
auto p_fmat = MakeFmatForObjTest(objective);
|
||||
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
@@ -705,7 +683,7 @@ class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
||||
|
||||
public:
|
||||
void Run(std::string objective) {
|
||||
auto p_fmat = MakeFmat(objective);
|
||||
auto p_fmat = MakeFmatForObjTest(objective);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("objective", objective);
|
||||
@@ -740,4 +718,112 @@ INSTANTIATE_TEST_SUITE_P(ColumnSplitObjective, TestColumnSplit,
|
||||
[](const ::testing::TestParamInfo<TestColumnSplit::ParamType>& info) {
|
||||
return ObjTestNameGenerator(info);
|
||||
});
|
||||
|
||||
namespace {
|
||||
Json GetModelWithArgs(std::shared_ptr<DMatrix> dmat, std::string const& tree_method,
|
||||
std::string const& device, Args const& args) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
|
||||
learner->SetParam("tree_method", tree_method);
|
||||
learner->SetParam("device", device);
|
||||
learner->SetParam("objective", "reg:logistic");
|
||||
learner->SetParams(args);
|
||||
learner->UpdateOneIter(0, dmat);
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
return model;
|
||||
}
|
||||
|
||||
void VerifyColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Args const& args,
|
||||
Json const& expected_model) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
auto p_fmat = MakeFmatForObjTest("");
|
||||
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
|
||||
std::string device = "cpu";
|
||||
if (use_gpu) {
|
||||
auto gpu_id = common::AllVisibleGPUs() == 1 ? 0 : rank;
|
||||
device = "cuda:" + std::to_string(gpu_id);
|
||||
}
|
||||
auto model = GetModelWithArgs(sliced, tree_method, device, args);
|
||||
ASSERT_EQ(model, expected_model);
|
||||
}
|
||||
|
||||
void TestColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Args const& args) {
|
||||
auto p_fmat = MakeFmatForObjTest("");
|
||||
std::string device = use_gpu ? "cuda:0" : "cpu";
|
||||
auto model = GetModelWithArgs(p_fmat, tree_method, device, args);
|
||||
|
||||
auto world_size{3};
|
||||
if (use_gpu) {
|
||||
world_size = common::AllVisibleGPUs();
|
||||
// Simulate MPU on a single GPU.
|
||||
if (world_size == 1) {
|
||||
world_size = 3;
|
||||
}
|
||||
}
|
||||
RunWithInMemoryCommunicator(world_size, VerifyColumnSplitWithArgs, tree_method, use_gpu, args,
|
||||
model);
|
||||
}
|
||||
|
||||
void TestColumnSplitColumnSampler(std::string const& tree_method, bool use_gpu) {
|
||||
Args args{{"colsample_bytree", "0.5"}, {"colsample_bylevel", "0.6"}, {"colsample_bynode", "0.7"}};
|
||||
TestColumnSplitWithArgs(tree_method, use_gpu, args);
|
||||
}
|
||||
|
||||
void TestColumnSplitInteractionConstraints(std::string const& tree_method, bool use_gpu) {
|
||||
Args args{{"interaction_constraints", "[[0, 5, 7], [2, 8, 9], [1, 3, 6]]"}};
|
||||
TestColumnSplitWithArgs(tree_method, use_gpu, args);
|
||||
}
|
||||
|
||||
void TestColumnSplitMonotoneConstraints(std::string const& tree_method, bool use_gpu) {
|
||||
Args args{{"monotone_constraints", "(1,-1,0,1,1,-1,-1,0,0,1)"}};
|
||||
TestColumnSplitWithArgs(tree_method, use_gpu, args);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(ColumnSplitColumnSampler, Approx) { TestColumnSplitColumnSampler("approx", false); }
|
||||
|
||||
TEST(ColumnSplitColumnSampler, Hist) { TestColumnSplitColumnSampler("hist", false); }
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(MGPUColumnSplitColumnSampler, GPUApprox) { TestColumnSplitColumnSampler("approx", true); }
|
||||
|
||||
TEST(MGPUColumnSplitColumnSampler, GPUHist) { TestColumnSplitColumnSampler("hist", true); }
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
TEST(ColumnSplitInteractionConstraints, Approx) {
|
||||
TestColumnSplitInteractionConstraints("approx", false);
|
||||
}
|
||||
|
||||
TEST(ColumnSplitInteractionConstraints, Hist) {
|
||||
TestColumnSplitInteractionConstraints("hist", false);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(MGPUColumnSplitInteractionConstraints, GPUApprox) {
|
||||
TestColumnSplitInteractionConstraints("approx", true);
|
||||
}
|
||||
|
||||
TEST(MGPUColumnSplitInteractionConstraints, GPUHist) {
|
||||
TestColumnSplitInteractionConstraints("hist", true);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
TEST(ColumnSplitMonotoneConstraints, Approx) {
|
||||
TestColumnSplitMonotoneConstraints("approx", false);
|
||||
}
|
||||
|
||||
TEST(ColumnSplitMonotoneConstraints, Hist) {
|
||||
TestColumnSplitMonotoneConstraints("hist", false);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(MGPUColumnSplitMonotoneConstraints, GPUApprox) {
|
||||
TestColumnSplitMonotoneConstraints("approx", true);
|
||||
}
|
||||
|
||||
TEST(MGPUColumnSplitMonotoneConstraints, GPUHist) {
|
||||
TestColumnSplitMonotoneConstraints("hist", true);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -210,9 +210,9 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
|
||||
}
|
||||
// Pull data to device
|
||||
for (auto &batch : p_dmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(0);
|
||||
batch.data.SetDevice(DeviceOrd::CUDA(0));
|
||||
batch.data.DeviceSpan();
|
||||
batch.offset.SetDevice(0);
|
||||
batch.offset.SetDevice(DeviceOrd::CUDA(0));
|
||||
batch.offset.DeviceSpan();
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2020-2022 by XGBoost contributors
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <thrust/host_vector.h>
|
||||
@@ -13,9 +13,7 @@
|
||||
#include "../../histogram_helpers.h"
|
||||
#include "../test_evaluate_splits.h" // TestPartitionBasedSplit
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
namespace {
|
||||
auto ZeroParam() {
|
||||
auto args = Args{{"min_child_weight", "0"}, {"lambda", "0"}};
|
||||
@@ -41,11 +39,12 @@ thrust::device_vector<GradientPairInt64> ConvertToInteger(std::vector<GradientPa
|
||||
}
|
||||
|
||||
TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
|
||||
GPUTrainingParam param{param_};
|
||||
cuts_.cut_ptrs_.SetDevice(0);
|
||||
cuts_.cut_values_.SetDevice(0);
|
||||
cuts_.min_vals_.SetDevice(0);
|
||||
cuts_.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts_.cut_values_.SetDevice(ctx.Device());
|
||||
cuts_.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<GradientPairInt64> feature_histogram{ConvertToInteger(feature_histogram_)};
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
|
||||
@@ -61,9 +60,10 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
cuts_.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
|
||||
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false, 0);
|
||||
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false,
|
||||
ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
ASSERT_EQ(result.thresh, 1);
|
||||
@@ -73,6 +73,7 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
}
|
||||
|
||||
TEST(GpuHist, PartitionBasic) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.max_cat_to_onehot = 0;
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -81,9 +82,9 @@ TEST(GpuHist, PartitionBasic) {
|
||||
cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};
|
||||
cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};
|
||||
cuts.min_vals_.HostVector() = std::vector<float>{0.0};
|
||||
cuts.cut_ptrs_.SetDevice(0);
|
||||
cuts.cut_values_.SetDevice(0);
|
||||
cuts.min_vals_.SetDevice(0);
|
||||
cuts.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts.cut_values_.SetDevice(ctx.Device());
|
||||
cuts.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
|
||||
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
@@ -104,8 +105,8 @@ TEST(GpuHist, PartitionBasic) {
|
||||
false,
|
||||
};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, ctx.Device());
|
||||
|
||||
{
|
||||
// -1.0s go right
|
||||
@@ -187,6 +188,7 @@ TEST(GpuHist, PartitionBasic) {
|
||||
}
|
||||
|
||||
TEST(GpuHist, PartitionTwoFeatures) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.max_cat_to_onehot = 0;
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -195,9 +197,9 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0, 0.0, 1.0, 2.0};
|
||||
cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3, 6};
|
||||
cuts.min_vals_.HostVector() = std::vector<float>{0.0, 0.0};
|
||||
cuts.cut_ptrs_.SetDevice(0);
|
||||
cuts.cut_values_.SetDevice(0);
|
||||
cuts.min_vals_.SetDevice(0);
|
||||
cuts.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts.cut_values_.SetDevice(ctx.Device());
|
||||
cuts.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
@@ -216,8 +218,8 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
cuts.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, ctx.Device());
|
||||
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
@@ -247,6 +249,7 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
}
|
||||
|
||||
TEST(GpuHist, PartitionTwoNodes) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.max_cat_to_onehot = 0;
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -255,9 +258,9 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
cuts.cut_values_.HostVector() = std::vector<float>{0.0, 1.0, 2.0};
|
||||
cuts.cut_ptrs_.HostVector() = std::vector<uint32_t>{0, 3};
|
||||
cuts.min_vals_.HostVector() = std::vector<float>{0.0};
|
||||
cuts.cut_ptrs_.SetDevice(0);
|
||||
cuts.cut_values_.SetDevice(0);
|
||||
cuts.min_vals_.SetDevice(0);
|
||||
cuts.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts.cut_values_.SetDevice(ctx.Device());
|
||||
cuts.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0};
|
||||
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
@@ -276,8 +279,10 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
cuts.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()),
|
||||
ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
|
||||
ctx.Device());
|
||||
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
@@ -299,12 +304,14 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
}
|
||||
|
||||
void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
|
||||
common::HistogramCuts cuts{MakeCutsForTest({1.0, 2.0, 11.0, 12.0}, {0, 2, 4}, {0.0, 0.0}, 0)};
|
||||
common::HistogramCuts cuts{
|
||||
MakeCutsForTest({1.0, 2.0, 11.0, 12.0}, {0, 2, 4}, {0.0, 0.0}, ctx.Device())};
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
@@ -329,8 +336,10 @@ void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
cuts.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), 0};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false, 0);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()),
|
||||
ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
|
||||
ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
@@ -367,7 +376,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
dh::ToSpan(feature_min_values),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_set.size(), 0);
|
||||
GPUHistEvaluator evaluator(tparam, feature_set.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
@@ -379,7 +388,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplitEmpty) {
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUHistEvaluator evaluator(tparam, 1, 0);
|
||||
GPUHistEvaluator evaluator(tparam, 1, FstCU());
|
||||
DeviceSplitCandidate result =
|
||||
evaluator
|
||||
.EvaluateSingleSplit(
|
||||
@@ -414,7 +423,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
dh::ToSpan(feature_min_values),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), 0);
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
@@ -446,7 +455,7 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
dh::ToSpan(feature_min_values),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), 0);
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
@@ -481,7 +490,8 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
dh::ToSpan(feature_min_values),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_min_values.size()), 0};
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_min_values.size()),
|
||||
FstCU()};
|
||||
dh::device_vector<EvaluateSplitInputs> inputs =
|
||||
std::vector<EvaluateSplitInputs>{input_left, input_right};
|
||||
evaluator.LaunchEvaluateSplits(input_left.feature_set.size(), dh::ToSpan(inputs), shared_inputs,
|
||||
@@ -497,14 +507,15 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
}
|
||||
|
||||
TEST_F(TestPartitionBasedSplit, GpuHist) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
dh::device_vector<FeatureType> ft{std::vector<FeatureType>{FeatureType::kCategorical}};
|
||||
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(info_.num_col_), 0};
|
||||
GPUHistEvaluator evaluator{param_, static_cast<bst_feature_t>(info_.num_col_), ctx.Device()};
|
||||
|
||||
cuts_.cut_ptrs_.SetDevice(0);
|
||||
cuts_.cut_values_.SetDevice(0);
|
||||
cuts_.min_vals_.SetDevice(0);
|
||||
cuts_.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts_.cut_values_.SetDevice(ctx.Device());
|
||||
cuts_.min_vals_.SetDevice(ctx.Device());
|
||||
|
||||
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, 0);
|
||||
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, ctx.Device());
|
||||
|
||||
// Convert the sample histogram to fixed point
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
@@ -532,15 +543,16 @@ class MGPUHistTest : public BaseMGPUTest {};
|
||||
|
||||
namespace {
|
||||
void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
auto rank = collective::GetRank();
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
|
||||
common::HistogramCuts cuts{rank == 0
|
||||
? MakeCutsForTest({1.0, 2.0}, {0, 2, 2}, {0.0, 0.0}, GPUIDX)
|
||||
: MakeCutsForTest({11.0, 12.0}, {0, 0, 2}, {0.0, 0.0}, GPUIDX)};
|
||||
common::HistogramCuts cuts{
|
||||
rank == 0 ? MakeCutsForTest({1.0, 2.0}, {0, 2, 2}, {0.0, 0.0}, ctx.Device())
|
||||
: MakeCutsForTest({11.0, 12.0}, {0, 0, 2}, {0.0, 0.0}, ctx.Device())};
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
@@ -566,8 +578,8 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
cuts.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), GPUIDX};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, GPUIDX);
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1) << "rank: " << rank;
|
||||
@@ -587,5 +599,4 @@ TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleSplit) {
|
||||
TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleCategoricalSplit) {
|
||||
DoTest(VerifyColumnSplitEvaluateSingleSplit, true);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -34,9 +34,9 @@ void VerifySampling(size_t page_size,
|
||||
for (const auto& gp : gpair.ConstHostVector()) {
|
||||
sum_gpair += gp;
|
||||
}
|
||||
gpair.SetDevice(0);
|
||||
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
gpair.SetDevice(ctx.Device());
|
||||
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
if (page_size != 0) {
|
||||
@@ -91,9 +91,9 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
||||
std::unique_ptr<DMatrix> dmat(
|
||||
CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
|
||||
auto gpair = GenerateRandomGradients(kRows);
|
||||
gpair.SetDevice(0);
|
||||
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
gpair.SetDevice(ctx.Device());
|
||||
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
|
||||
EXPECT_NE(page->n_rows, kRows);
|
||||
|
||||
@@ -17,9 +17,7 @@
|
||||
#include "../../categorical_helpers.h"
|
||||
#include "../../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
|
||||
@@ -32,22 +30,22 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
|
||||
tree::RowPartitioner row_partitioner(0, kRows);
|
||||
tree::RowPartitioner row_partitioner(FstCU(), kRows);
|
||||
auto ridx = row_partitioner.GetRows(0);
|
||||
|
||||
int num_bins = kBins * kCols;
|
||||
dh::device_vector<GradientPairInt64> histogram(num_bins);
|
||||
auto d_histogram = dh::ToSpan(histogram);
|
||||
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
|
||||
gpair.SetDevice(0);
|
||||
gpair.SetDevice(FstCU());
|
||||
|
||||
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
|
||||
sizeof(GradientPairInt64));
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx, d_histogram,
|
||||
quantiser);
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
d_histogram, quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> histogram_h(num_bins);
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
@@ -65,8 +63,8 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
auto d_new_histogram = dh::ToSpan(new_histogram);
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
feature_groups.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
d_new_histogram, quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> new_histogram_h(num_bins);
|
||||
@@ -87,14 +85,14 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
|
||||
{
|
||||
auto gpair = GenerateRandomGradients(kRows, kLower, kUpper);
|
||||
gpair.SetDevice(0);
|
||||
gpair.SetDevice(FstCU());
|
||||
|
||||
// Use a single feature group to compute the baseline.
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
|
||||
dh::device_vector<GradientPairInt64> baseline(num_bins);
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
single_group.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(baseline), quantiser);
|
||||
|
||||
std::vector<GradientPairInt64> baseline_h(num_bins);
|
||||
@@ -149,11 +147,11 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
auto cat_m = GetDMatrixFromData(x, kRows, 1);
|
||||
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
|
||||
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
|
||||
tree::RowPartitioner row_partitioner(0, kRows);
|
||||
tree::RowPartitioner row_partitioner(ctx.Device(), kRows);
|
||||
auto ridx = row_partitioner.GetRows(0);
|
||||
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
|
||||
auto gpair = GenerateRandomGradients(kRows, 0, 2);
|
||||
gpair.SetDevice(0);
|
||||
gpair.SetDevice(DeviceOrd::CUDA(0));
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
/**
|
||||
* Generate hist with cat data.
|
||||
@@ -161,8 +159,8 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
for (auto const &batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(cat_hist), quantiser);
|
||||
}
|
||||
|
||||
@@ -175,8 +173,8 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
for (auto const &batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
single_group.DeviceAccessor(0), gpair.DeviceSpan(), ridx,
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(ctx.Device()),
|
||||
single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
|
||||
dh::ToSpan(encode_hist), quantiser);
|
||||
}
|
||||
|
||||
@@ -264,5 +262,4 @@ void TestAtomicAdd() {
|
||||
TEST(Histogram, AtomicAddInt64) {
|
||||
TestAtomicAdd();
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -20,12 +20,10 @@
|
||||
#include "xgboost/task.h"
|
||||
#include "xgboost/tree_model.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
void TestUpdatePositionBatch() {
|
||||
const int kNumRows = 10;
|
||||
RowPartitioner rp(0, kNumRows);
|
||||
RowPartitioner rp(FstCU(), kNumRows);
|
||||
auto rows = rp.GetRowsHost(0);
|
||||
EXPECT_EQ(rows.size(), kNumRows);
|
||||
for (auto i = 0ull; i < kNumRows; i++) {
|
||||
@@ -100,12 +98,11 @@ void TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Se
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GpuHist, SortPositionBatch) {
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}});
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}});
|
||||
TEST(GpuHist, SortPositionBatch) {
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 3}, {3, 6}});
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 1}, {3, 6}});
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{0, 6}});
|
||||
TestSortPositionBatch({0, 1, 2, 3, 4, 5}, {{3, 6}, {0, 2}});
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -115,7 +115,7 @@ TEST(HistMultiEvaluator, Evaluate) {
|
||||
HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), ¶m, sampler};
|
||||
HistMakerTrainParam hist_param;
|
||||
std::vector<BoundedHistCollection> histogram(n_targets);
|
||||
linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
|
||||
linalg::Vector<GradientPairPrecise> root_sum({2}, DeviceOrd::CPU());
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto &hist = histogram[t];
|
||||
hist.Reset(n_bins * n_features, hist_param.max_cached_hist_node);
|
||||
|
||||
@@ -76,7 +76,7 @@ class TestPartitionBasedSplit : public ::testing::Test {
|
||||
GradientPairPrecise parent_sum) {
|
||||
int32_t best_thresh = -1;
|
||||
float best_score{-std::numeric_limits<float>::infinity()};
|
||||
TreeEvaluator evaluator{param_, static_cast<bst_feature_t>(n_feat), -1};
|
||||
TreeEvaluator evaluator{param_, static_cast<bst_feature_t>(n_feat), DeviceOrd::CPU()};
|
||||
auto tree_evaluator = evaluator.GetEvaluator<TrainParam>();
|
||||
GradientPairPrecise left_sum;
|
||||
auto parent_gain = tree_evaluator.CalcGain(0, param_, GradStats{total_gpair_});
|
||||
@@ -111,13 +111,13 @@ class TestPartitionBasedSplit : public ::testing::Test {
|
||||
};
|
||||
|
||||
inline auto MakeCutsForTest(std::vector<float> values, std::vector<uint32_t> ptrs,
|
||||
std::vector<float> min_values, int32_t device) {
|
||||
std::vector<float> min_values, DeviceOrd device) {
|
||||
common::HistogramCuts cuts;
|
||||
cuts.cut_values_.HostVector() = values;
|
||||
cuts.cut_ptrs_.HostVector() = ptrs;
|
||||
cuts.min_vals_.HostVector() = min_values;
|
||||
|
||||
if (device >= 0) {
|
||||
if (device.IsCUDA()) {
|
||||
cuts.cut_ptrs_.SetDevice(device);
|
||||
cuts.cut_values_.SetDevice(device);
|
||||
cuts.min_vals_.SetDevice(device);
|
||||
@@ -136,7 +136,7 @@ class TestCategoricalSplitWithMissing : public testing::Test {
|
||||
TrainParam param_;
|
||||
|
||||
void SetUp() override {
|
||||
cuts_ = MakeCutsForTest({0.0, 1.0, 2.0, 3.0}, {0, 4}, {0.0}, -1);
|
||||
cuts_ = MakeCutsForTest({0.0, 1.0, 2.0, 3.0}, {0, 4}, {0.0}, DeviceOrd::CPU());
|
||||
auto max_cat = *std::max_element(cuts_.cut_values_.HostVector().begin(),
|
||||
cuts_.cut_values_.HostVector().end());
|
||||
cuts_.SetCategorical(true, max_cat);
|
||||
|
||||
@@ -40,7 +40,7 @@ TEST(GpuHist, DeviceHistogram) {
|
||||
constexpr int kNNodes = 4;
|
||||
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
|
||||
DeviceHistogramStorage<kStopGrowing> histogram;
|
||||
histogram.Init(0, kNBins);
|
||||
histogram.Init(FstCU(), kNBins);
|
||||
for (int i = 0; i < kNNodes; ++i) {
|
||||
histogram.AllocateHistograms({i});
|
||||
}
|
||||
@@ -113,12 +113,12 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
bst_float hess = dist(&gen);
|
||||
gp = GradientPair(grad, hess);
|
||||
}
|
||||
gpair.SetDevice(0);
|
||||
gpair.SetDevice(DeviceOrd::CUDA(0));
|
||||
|
||||
thrust::host_vector<common::CompressedByteT> h_gidx_buffer (page->gidx_buffer.HostVector());
|
||||
maker.row_partitioner = std::make_unique<RowPartitioner>(0, kNRows);
|
||||
maker.row_partitioner = std::make_unique<RowPartitioner>(FstCU(), kNRows);
|
||||
|
||||
maker.hist.Init(0, page->Cuts().TotalBins());
|
||||
maker.hist.Init(FstCU(), page->Cuts().TotalBins());
|
||||
maker.hist.AllocateHistograms({0});
|
||||
|
||||
maker.gpair = gpair.DeviceSpan();
|
||||
@@ -127,8 +127,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
|
||||
maker.InitFeatureGroupsOnce();
|
||||
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
|
||||
maker.feature_groups->DeviceAccessor(0), gpair.DeviceSpan(),
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(DeviceOrd::CUDA(0)),
|
||||
maker.feature_groups->DeviceAccessor(DeviceOrd::CUDA(0)), gpair.DeviceSpan(),
|
||||
maker.row_partitioner->GetRows(0), maker.hist.GetNodeHistogram(0),
|
||||
*maker.quantiser, !use_shared_memory_histograms);
|
||||
|
||||
@@ -215,7 +215,7 @@ void TestHistogramIndexImpl() {
|
||||
// histogram index
|
||||
const auto &maker = hist_maker.maker;
|
||||
auto grad = GenerateRandomGradients(kNRows);
|
||||
grad.SetDevice(0);
|
||||
grad.SetDevice(DeviceOrd::CUDA(0));
|
||||
maker->Reset(&grad, hist_maker_dmat.get(), kNCols);
|
||||
std::vector<common::CompressedByteT> h_gidx_buffer(maker->page->gidx_buffer.HostVector());
|
||||
|
||||
@@ -281,17 +281,17 @@ TEST(GpuHist, UniformSampling) {
|
||||
// Create an in-memory DMatrix.
|
||||
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, Context{}.MakeCUDA().Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, Context{}.MakeCUDA().Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
|
||||
kRows);
|
||||
|
||||
@@ -312,18 +312,18 @@ TEST(GpuHist, GradientBasedSampling) {
|
||||
// Create an in-memory DMatrix.
|
||||
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, MakeCUDACtx(0).Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, MakeCUDACtx(0).Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
|
||||
"gradient_based", kRows);
|
||||
|
||||
@@ -350,16 +350,16 @@ TEST(GpuHist, ExternalMemory) {
|
||||
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrix(kRows, kCols, 1, tmpdir.path + "/cache"));
|
||||
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
@@ -388,20 +388,20 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
|
||||
CreateSparsePageDMatrix(kRows, kCols, kRows / kPageSize, tmpdir.path + "/cache"));
|
||||
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
auto rng = common::GlobalRandom();
|
||||
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows);
|
||||
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
common::GlobalRandom() = rng;
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, DeviceOrd::CUDA(0));
|
||||
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
|
||||
kSamplingMethod, kRows);
|
||||
|
||||
@@ -445,7 +445,7 @@ TEST(GpuHist, MaxDepth) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
RegTree GetUpdatedTree(Context const* ctx, DMatrix* dmat) {
|
||||
RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
GPUHistMaker hist_maker{ctx, &task};
|
||||
hist_maker.Configure(Args{});
|
||||
@@ -453,7 +453,7 @@ RegTree GetUpdatedTree(Context const* ctx, DMatrix* dmat) {
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
@@ -463,7 +463,7 @@ RegTree GetUpdatedTree(Context const* ctx, DMatrix* dmat) {
|
||||
return tree;
|
||||
}
|
||||
|
||||
void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||
void VerifyHistColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||
Context ctx(MakeCUDACtx(GPUIDX));
|
||||
|
||||
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
||||
@@ -471,7 +471,7 @@ void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expect
|
||||
auto const rank = collective::GetRank();
|
||||
std::unique_ptr<DMatrix> sliced{Xy->SliceCol(world_size, rank)};
|
||||
|
||||
RegTree tree = GetUpdatedTree(&ctx, sliced.get());
|
||||
RegTree tree = GetHistTree(&ctx, sliced.get());
|
||||
|
||||
Json json{Object{}};
|
||||
tree.SaveModel(&json);
|
||||
@@ -489,8 +489,58 @@ TEST_F(MGPUHistTest, GPUHistColumnSplit) {
|
||||
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
||||
RegTree expected_tree = GetUpdatedTree(&ctx, dmat.get());
|
||||
RegTree expected_tree = GetHistTree(&ctx, dmat.get());
|
||||
|
||||
DoTest(VerifyColumnSplit, kRows, kCols, expected_tree);
|
||||
DoTest(VerifyHistColumnSplit, kRows, kCols, expected_tree);
|
||||
}
|
||||
|
||||
namespace {
|
||||
RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
GPUGlobalApproxMaker approx_maker{ctx, &task};
|
||||
approx_maker.Configure(Args{});
|
||||
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
RegTree tree;
|
||||
approx_maker.Update(¶m, &gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
{&tree});
|
||||
return tree;
|
||||
}
|
||||
|
||||
void VerifyApproxColumnSplit(bst_row_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||
Context ctx(MakeCUDACtx(GPUIDX));
|
||||
|
||||
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::unique_ptr<DMatrix> sliced{Xy->SliceCol(world_size, rank)};
|
||||
|
||||
RegTree tree = GetApproxTree(&ctx, sliced.get());
|
||||
|
||||
Json json{Object{}};
|
||||
tree.SaveModel(&json);
|
||||
Json expected_json{Object{}};
|
||||
expected_tree.SaveModel(&expected_json);
|
||||
ASSERT_EQ(json, expected_json);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
class MGPUApproxTest : public BaseMGPUTest {};
|
||||
|
||||
TEST_F(MGPUApproxTest, GPUApproxColumnSplit) {
|
||||
auto constexpr kRows = 32;
|
||||
auto constexpr kCols = 16;
|
||||
|
||||
Context ctx(MakeCUDACtx(0));
|
||||
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
||||
RegTree expected_tree = GetApproxTree(&ctx, dmat.get());
|
||||
|
||||
DoTest(VerifyApproxColumnSplit, kRows, kCols, expected_tree);
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -28,7 +28,7 @@ TEST(GrowHistMaker, InteractionConstraint) {
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols);
|
||||
Context ctx;
|
||||
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
@@ -74,7 +74,7 @@ void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
|
||||
RegTree const& expected_tree) {
|
||||
Context ctx;
|
||||
auto p_dmat = GenerateDMatrix(rows, cols, categorical);
|
||||
linalg::Matrix<GradientPair> gpair({rows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({rows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(rows));
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ void TestColumnSplit(bool categorical) {
|
||||
{
|
||||
Context ctx;
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols, categorical);
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Ordinal());
|
||||
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
|
||||
gpair.Data()->Copy(GenerateRandomGradients(kRows));
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
|
||||
@@ -12,9 +12,9 @@ TEST(MultiTargetTree, JsonIO) {
|
||||
bst_feature_t n_features{4};
|
||||
RegTree tree{n_targets, n_features};
|
||||
ASSERT_TRUE(tree.IsMultiTarget());
|
||||
linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, Context::kCpuId};
|
||||
linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, Context::kCpuId};
|
||||
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, Context::kCpuId};
|
||||
linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, DeviceOrd::CPU()};
|
||||
linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, DeviceOrd::CPU()};
|
||||
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, DeviceOrd::CPU()};
|
||||
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
|
||||
left_weight.HostView(), right_weight.HostView());
|
||||
ASSERT_EQ(tree.NumNodes(), 3);
|
||||
|
||||
@@ -33,7 +33,7 @@ class UpdaterTreeStatTest : public ::testing::Test {
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
param.Init(Args{});
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(Context::kCpuId));
|
||||
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));
|
||||
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
up->Configure(Args{});
|
||||
RegTree tree{1u, kCols};
|
||||
@@ -78,7 +78,7 @@ class UpdaterEtaTest : public ::testing::Test {
|
||||
void RunTest(std::string updater) {
|
||||
ObjInfo task{ObjInfo::kClassification};
|
||||
|
||||
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(Context::kCpuId));
|
||||
Context ctx(updater == "grow_gpu_hist" ? MakeCUDACtx(0) : MakeCUDACtx(DeviceOrd::CPUOrdinal()));
|
||||
|
||||
float eta = 0.4;
|
||||
auto up_0 = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
|
||||
|
||||
Reference in New Issue
Block a user