Implement transform to reduce CPU/GPU code duplication. (#3643)
* Implement Transform class. * Add tests for softmax. * Use Transform in regression, softmax and hinge objectives, except for Cox. * Mark old gpu objective functions deprecated. * static_assert for softmax. * Split up multi-gpu tests.
This commit is contained in:
committed by
Rory Mitchell
parent
87aca8c244
commit
d594b11f35
@@ -14,7 +14,7 @@ struct WriteSymbolFunction {
|
||||
WriteSymbolFunction(CompressedBufferWriter cbw, unsigned char* buffer_data_d,
|
||||
int* input_data_d)
|
||||
: cbw(cbw), buffer_data_d(buffer_data_d), input_data_d(input_data_d) {}
|
||||
|
||||
|
||||
__device__ void operator()(size_t i) {
|
||||
cbw.AtomicWriteSymbol(buffer_data_d, input_data_d[i], i);
|
||||
}
|
||||
@@ -28,7 +28,7 @@ struct ReadSymbolFunction {
|
||||
|
||||
__device__ void operator()(size_t i) {
|
||||
output_data_d[i] = ci[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST(CompressedIterator, TestGPU) {
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
TEST(gpu_hist_util, TestDeviceSketch) {
|
||||
void TestDeviceSketch(const GPUSet& devices) {
|
||||
// create the data
|
||||
int nrows = 10001;
|
||||
std::vector<float> test_data(nrows);
|
||||
@@ -28,7 +28,7 @@ TEST(gpu_hist_util, TestDeviceSketch) {
|
||||
tree::TrainParam p;
|
||||
p.max_bin = 20;
|
||||
p.gpu_id = 0;
|
||||
p.n_gpus = GPUSet::AllVisible().Size();
|
||||
p.n_gpus = devices.Size();
|
||||
// ensure that the exact quantiles are found
|
||||
p.gpu_batch_nrows = nrows * 10;
|
||||
|
||||
@@ -54,5 +54,17 @@ TEST(gpu_hist_util, TestDeviceSketch) {
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
TEST(gpu_hist_util, DeviceSketch) {
|
||||
TestDeviceSketch(GPUSet::Range(0, 1));
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_NCCL)
|
||||
TEST(gpu_hist_util, MGPU_DeviceSketch) {
|
||||
auto devices = GPUSet::AllVisible();
|
||||
CHECK_GT(devices.Size(), 1);
|
||||
TestDeviceSketch(devices);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -178,18 +178,57 @@ TEST(HostDeviceVector, TestCopy) {
|
||||
SetCudaSetDeviceHandler(nullptr);
|
||||
}
|
||||
|
||||
// The test is not really useful if n_gpus < 2
|
||||
TEST(HostDeviceVector, Reshard) {
|
||||
std::vector<int> h_vec (2345);
|
||||
for (size_t i = 0; i < h_vec.size(); ++i) {
|
||||
h_vec[i] = i;
|
||||
}
|
||||
HostDeviceVector<int> vec (h_vec);
|
||||
auto devices = GPUSet::Range(0, 1);
|
||||
|
||||
vec.Reshard(devices);
|
||||
ASSERT_EQ(vec.DeviceSize(0), h_vec.size());
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
auto span = vec.DeviceSpan(0); // sync to device
|
||||
|
||||
vec.Reshard(GPUSet::Empty()); // pull back to cpu, empty devices.
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
ASSERT_TRUE(vec.Devices().IsEmpty());
|
||||
|
||||
auto h_vec_1 = vec.HostVector();
|
||||
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
|
||||
}
|
||||
|
||||
TEST(HostDeviceVector, Span) {
|
||||
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
vec.Reshard(GPUSet{0, 1});
|
||||
auto span = vec.DeviceSpan(0);
|
||||
ASSERT_EQ(vec.DeviceSize(0), span.size());
|
||||
ASSERT_EQ(vec.DevicePointer(0), span.data());
|
||||
auto const_span = vec.ConstDeviceSpan(0);
|
||||
ASSERT_EQ(vec.DeviceSize(0), span.size());
|
||||
ASSERT_EQ(vec.ConstDevicePointer(0), span.data());
|
||||
}
|
||||
|
||||
// Multi-GPUs' test
|
||||
#if defined(XGBOOST_USE_NCCL)
|
||||
TEST(HostDeviceVector, MGPU_Reshard) {
|
||||
auto devices = GPUSet::AllVisible();
|
||||
if (devices.Size() < 2) {
|
||||
LOG(WARNING) << "Not testing in multi-gpu environment.";
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<int> h_vec (2345);
|
||||
for (size_t i = 0; i < h_vec.size(); ++i) {
|
||||
h_vec[i] = i;
|
||||
}
|
||||
HostDeviceVector<int> vec (h_vec);
|
||||
|
||||
// Data size for each device.
|
||||
std::vector<size_t> devices_size (devices.Size());
|
||||
|
||||
// From CPU to GPUs.
|
||||
// Assuming we have > 1 devices.
|
||||
vec.Reshard(devices);
|
||||
size_t total_size = 0;
|
||||
for (size_t i = 0; i < devices.Size(); ++i) {
|
||||
@@ -198,42 +237,26 @@ TEST(HostDeviceVector, Reshard) {
|
||||
}
|
||||
ASSERT_EQ(total_size, h_vec.size());
|
||||
ASSERT_EQ(total_size, vec.Size());
|
||||
auto h_vec_1 = vec.HostVector();
|
||||
|
||||
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
|
||||
vec.Reshard(GPUSet::Empty()); // clear out devices memory
|
||||
// Reshard from devices to devices with different distribution.
|
||||
EXPECT_ANY_THROW(
|
||||
vec.Reshard(GPUDistribution::Granular(devices, 12)));
|
||||
|
||||
// Shrink down the number of devices.
|
||||
vec.Reshard(GPUSet::Range(0, 1));
|
||||
// All data is drawn back to CPU
|
||||
vec.Reshard(GPUSet::Empty());
|
||||
ASSERT_TRUE(vec.Devices().IsEmpty());
|
||||
ASSERT_EQ(vec.Size(), h_vec.size());
|
||||
ASSERT_EQ(vec.DeviceSize(0), h_vec.size());
|
||||
h_vec_1 = vec.HostVector();
|
||||
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
|
||||
vec.Reshard(GPUSet::Empty()); // clear out devices memory
|
||||
|
||||
// Grow the number of devices.
|
||||
vec.Reshard(devices);
|
||||
vec.Reshard(GPUDistribution::Granular(devices, 12));
|
||||
total_size = 0;
|
||||
for (size_t i = 0; i < devices.Size(); ++i) {
|
||||
total_size += vec.DeviceSize(i);
|
||||
ASSERT_EQ(devices_size[i], vec.DeviceSize(i));
|
||||
devices_size[i] = vec.DeviceSize(i);
|
||||
}
|
||||
ASSERT_EQ(total_size, h_vec.size());
|
||||
ASSERT_EQ(total_size, vec.Size());
|
||||
h_vec_1 = vec.HostVector();
|
||||
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
|
||||
}
|
||||
|
||||
TEST(HostDeviceVector, Span) {
|
||||
HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
vec.Reshard(GPUSet{0, 1});
|
||||
auto span = vec.DeviceSpan(0);
|
||||
ASSERT_EQ(vec.Size(), span.size());
|
||||
ASSERT_EQ(vec.DevicePointer(0), span.data());
|
||||
auto const_span = vec.ConstDeviceSpan(0);
|
||||
ASSERT_EQ(vec.Size(), span.size());
|
||||
ASSERT_EQ(vec.ConstDevicePointer(0), span.data());
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -7,6 +7,14 @@
|
||||
#include "../../include/xgboost/base.h"
|
||||
#include "../../../src/common/span.h"
|
||||
|
||||
template <typename Iter>
|
||||
XGBOOST_DEVICE void InitializeRange(Iter _begin, Iter _end) {
|
||||
float j = 0;
|
||||
for (Iter i = _begin; i != _end; ++i, ++j) {
|
||||
*i = j;
|
||||
}
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
@@ -20,14 +28,6 @@ namespace common {
|
||||
*(status) = -1; \
|
||||
}
|
||||
|
||||
template <typename Iter>
|
||||
XGBOOST_DEVICE void InitializeRange(Iter _begin, Iter _end) {
|
||||
float j = 0;
|
||||
for (Iter i = _begin; i != _end; ++i, ++j) {
|
||||
*i = j;
|
||||
}
|
||||
}
|
||||
|
||||
struct TestTestStatus {
|
||||
int * status_;
|
||||
|
||||
|
||||
61
tests/cpp/common/test_transform_range.cc
Normal file
61
tests/cpp/common/test_transform_range.cc
Normal file
@@ -0,0 +1,61 @@
|
||||
#include <xgboost/base.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/host_device_vector.h"
|
||||
#include "../../../src/common/transform.h"
|
||||
#include "../../../src/common/span.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
|
||||
#define TRANSFORM_GPU_RANGE GPUSet::Range(0, 1)
|
||||
#define TRANSFORM_GPU_DIST GPUDistribution::Block(GPUSet::Range(0, 1))
|
||||
|
||||
#else
|
||||
|
||||
#define TRANSFORM_GPU_RANGE GPUSet::Empty()
|
||||
#define TRANSFORM_GPU_DIST GPUDistribution::Block(GPUSet::Empty())
|
||||
|
||||
#endif
|
||||
|
||||
template <typename Iter>
|
||||
void InitializeRange(Iter _begin, Iter _end) {
|
||||
float j = 0;
|
||||
for (Iter i = _begin; i != _end; ++i, ++j) {
|
||||
*i = j;
|
||||
}
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
template <typename T>
|
||||
struct TestTransformRange {
|
||||
void XGBOOST_DEVICE operator()(size_t _idx,
|
||||
Span<bst_float> _out, Span<const bst_float> _in) {
|
||||
_out[_idx] = _in[_idx];
|
||||
}
|
||||
};
|
||||
|
||||
TEST(Transform, DeclareUnifiedTest(Basic)) {
|
||||
const size_t size {256};
|
||||
std::vector<bst_float> h_in(size);
|
||||
std::vector<bst_float> h_out(size);
|
||||
InitializeRange(h_in.begin(), h_in.end());
|
||||
std::vector<bst_float> h_sol(size);
|
||||
InitializeRange(h_sol.begin(), h_sol.end());
|
||||
|
||||
const HostDeviceVector<bst_float> in_vec{h_in, TRANSFORM_GPU_DIST};
|
||||
HostDeviceVector<bst_float> out_vec{h_out, TRANSFORM_GPU_DIST};
|
||||
out_vec.Fill(0);
|
||||
|
||||
Transform<>::Init(TestTransformRange<bst_float>{}, Range{0, size}, TRANSFORM_GPU_RANGE)
|
||||
.Eval(&out_vec, &in_vec);
|
||||
std::vector<bst_float> res = out_vec.HostVector();
|
||||
|
||||
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
43
tests/cpp/common/test_transform_range.cu
Normal file
43
tests/cpp/common/test_transform_range.cu
Normal file
@@ -0,0 +1,43 @@
|
||||
// This converts all tests from CPU to GPU.
|
||||
#include "test_transform_range.cc"
|
||||
|
||||
#if defined(XGBOOST_USE_NCCL)
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
|
||||
// Test here is multi gpu specific
|
||||
TEST(Transform, MGPU_Basic) {
|
||||
auto devices = GPUSet::AllVisible();
|
||||
CHECK_GT(devices.Size(), 1);
|
||||
const size_t size {256};
|
||||
std::vector<bst_float> h_in(size);
|
||||
std::vector<bst_float> h_out(size);
|
||||
InitializeRange(h_in.begin(), h_in.end());
|
||||
std::vector<bst_float> h_sol(size);
|
||||
InitializeRange(h_sol.begin(), h_sol.end());
|
||||
|
||||
const HostDeviceVector<bst_float> in_vec {h_in,
|
||||
GPUDistribution::Block(GPUSet::Empty())};
|
||||
HostDeviceVector<bst_float> out_vec {h_out,
|
||||
GPUDistribution::Block(GPUSet::Empty())};
|
||||
out_vec.Fill(0);
|
||||
|
||||
in_vec.Reshard(GPUDistribution::Granular(devices, 8));
|
||||
out_vec.Reshard(GPUDistribution::Block(devices));
|
||||
|
||||
// Granularity is different, resharding will throw.
|
||||
EXPECT_ANY_THROW(
|
||||
Transform<>::Init(TestTransformRange<bst_float>{}, Range{0, size}, devices)
|
||||
.Eval(&out_vec, &in_vec));
|
||||
|
||||
|
||||
Transform<>::Init(TestTransformRange<bst_float>{}, Range{0, size},
|
||||
devices, false).Eval(&out_vec, &in_vec);
|
||||
std::vector<bst_float> res = out_vec.HostVector();
|
||||
|
||||
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
} // namespace common
|
||||
#endif
|
||||
Reference in New Issue
Block a user