Support host data in proxy DMatrix. (#7087)

This commit is contained in:
Jiaming Yuan
2021-07-08 11:35:48 +08:00
committed by GitHub
parent 5d7cdf2e36
commit 84d359efb8
9 changed files with 188 additions and 30 deletions

View File

@@ -1,4 +1,4 @@
// Copyright (c) 2019 by Contributors
// Copyright (c) 2019-2021 by XGBoost Contributors
#include <gtest/gtest.h>
#include <type_traits>
#include <utility>
@@ -35,6 +35,27 @@ TEST(Adapter, CSRAdapter) {
EXPECT_EQ(line2.GetElement(0).column_idx, 1);
}
TEST(Adapter, CSRArrayAdapter) {
HostDeviceVector<bst_row_t> indptr;
HostDeviceVector<float> values;
HostDeviceVector<bst_feature_t> indices;
size_t n_features = 100, n_samples = 10;
RandomDataGenerator{n_samples, n_features, 0.5}.GenerateCSR(&values, &indptr, &indices);
auto indptr_arr = MakeArrayInterface(indptr.HostPointer(), indptr.Size());
auto values_arr = MakeArrayInterface(values.HostPointer(), values.Size());
auto indices_arr = MakeArrayInterface(indices.HostPointer(), indices.Size());
auto adapter = data::CSRArrayAdapter(
StringView{indptr_arr.c_str(), indptr_arr.size()},
StringView{values_arr.c_str(), values_arr.size()},
StringView{indices_arr.c_str(), indices_arr.size()}, n_features);
auto batch = adapter.Value();
ASSERT_EQ(batch.NumRows(), n_samples);
ASSERT_EQ(batch.NumCols(), n_features);
ASSERT_EQ(adapter.NumRows(), n_samples);
ASSERT_EQ(adapter.NumColumns(), n_features);
}
TEST(Adapter, CSCAdapterColsMoreThanRows) {
std::vector<float> data = {1, 2, 3, 4, 5, 6, 7, 8};
std::vector<unsigned> row_idx = {0, 1, 0, 1, 0, 1, 0, 1};

View File

@@ -0,0 +1,31 @@
/*!
* Copyright 2021 XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../helpers.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/data/adapter.h"
namespace xgboost {
namespace data {
TEST(ProxyDMatrix, HostData) {
DMatrixProxy proxy;
size_t constexpr kRows = 100, kCols = 10;
std::vector<HostDeviceVector<float>> label_storage(1);
HostDeviceVector<float> storage;
auto data = RandomDataGenerator(kRows, kCols, 0.5)
.Device(0)
.GenerateArrayInterface(&storage);
proxy.SetArrayData(data.c_str());
auto n_samples = HostAdapterDispatch(
&proxy, [](auto const &value) { return value.Size(); });
ASSERT_EQ(n_samples, kRows);
auto n_features = HostAdapterDispatch(
&proxy, [](auto const &value) { return value.NumCols(); });
ASSERT_EQ(n_features, kCols);
}
} // namespace data
} // namespace xgboost

View File

@@ -7,7 +7,7 @@
namespace xgboost {
namespace data {
TEST(ProxyDMatrix, Basic) {
TEST(ProxyDMatrix, DeviceData) {
constexpr size_t kRows{100}, kCols{100};
HostDeviceVector<float> storage;
auto data = RandomDataGenerator(kRows, kCols, 0.5)