@@ -1,149 +0,0 @@
|
||||
|
||||
// Copyright (c) 2019 by Contributors
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include "../../../src/data/adapter.h"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/device_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
#include <thrust/device_vector.h>
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
#include "../common/test_hist_util.h"
|
||||
#include "../../../src/common/compressed_iterator.h"
|
||||
#include "../../../src/common/math.h"
|
||||
#include "test_array_interface.h"
|
||||
using namespace xgboost; // NOLINT
|
||||
|
||||
TEST(DeviceDMatrix, RowMajor) {
|
||||
int num_rows = 1000;
|
||||
int num_columns = 50;
|
||||
auto x = common::GenerateRandom(num_rows, num_columns);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = common::AdapterFromData(x_device, num_rows, num_columns);
|
||||
|
||||
data::DeviceDMatrix dmat(&adapter,
|
||||
std::numeric_limits<float>::quiet_NaN(), 1, 256);
|
||||
|
||||
auto &batch = *dmat.GetBatches<EllpackPage>({0, 256, 0}).begin();
|
||||
auto impl = batch.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
for(auto i = 0ull; i < x.size(); i++)
|
||||
{
|
||||
int column_idx = i % num_columns;
|
||||
EXPECT_EQ(impl->Cuts().SearchBin(x[i], column_idx), iterator[i]);
|
||||
}
|
||||
EXPECT_EQ(dmat.Info().num_col_, num_columns);
|
||||
EXPECT_EQ(dmat.Info().num_row_, num_rows);
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, num_rows * num_columns);
|
||||
|
||||
}
|
||||
|
||||
TEST(DeviceDMatrix, RowMajorMissing) {
|
||||
const float kMissing = std::numeric_limits<float>::quiet_NaN();
|
||||
int num_rows = 10;
|
||||
int num_columns = 2;
|
||||
auto x = common::GenerateRandom(num_rows, num_columns);
|
||||
x[1] = kMissing;
|
||||
x[5] = kMissing;
|
||||
x[6] = kMissing;
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = common::AdapterFromData(x_device, num_rows, num_columns);
|
||||
|
||||
data::DeviceDMatrix dmat(&adapter, kMissing, 1, 256);
|
||||
|
||||
auto &batch = *dmat.GetBatches<EllpackPage>({0, 256, 0}).begin();
|
||||
auto impl = batch.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(0).NullValue());
|
||||
// null values get placed after valid values in a row
|
||||
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
|
||||
EXPECT_EQ(dmat.Info().num_col_, num_columns);
|
||||
EXPECT_EQ(dmat.Info().num_row_, num_rows);
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, num_rows*num_columns-3);
|
||||
|
||||
}
|
||||
|
||||
TEST(DeviceDMatrix, ColumnMajor) {
|
||||
constexpr size_t kRows{100};
|
||||
std::vector<Json> columns;
|
||||
thrust::device_vector<double> d_data_0(kRows);
|
||||
thrust::device_vector<uint32_t> d_data_1(kRows);
|
||||
|
||||
columns.emplace_back(GenerateDenseColumn<double>("<f8", kRows, &d_data_0));
|
||||
columns.emplace_back(GenerateDenseColumn<uint32_t>("<u4", kRows, &d_data_1));
|
||||
|
||||
Json column_arr{columns};
|
||||
|
||||
std::string str;
|
||||
Json::Dump(column_arr, &str);
|
||||
|
||||
data::CudfAdapter adapter(str);
|
||||
data::DeviceDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),
|
||||
-1, 256);
|
||||
auto &batch = *dmat.GetBatches<EllpackPage>({0, 256, 0}).begin();
|
||||
auto impl = batch.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
|
||||
|
||||
for (auto i = 0ull; i < kRows; i++) {
|
||||
for (auto j = 0ull; j < columns.size(); j++) {
|
||||
if (j == 0) {
|
||||
EXPECT_EQ(iterator[i * 2 + j], impl->Cuts().SearchBin(d_data_0[i], j));
|
||||
} else {
|
||||
EXPECT_EQ(iterator[i * 2 + j], impl->Cuts().SearchBin(d_data_1[i], j));
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(dmat.Info().num_col_, 2);
|
||||
EXPECT_EQ(dmat.Info().num_row_, kRows);
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, kRows*2);
|
||||
|
||||
}
|
||||
|
||||
// Test equivalence with simple DMatrix
|
||||
TEST(DeviceDMatrix, Equivalent) {
|
||||
int bin_sizes[] = {2, 16, 256, 512};
|
||||
int sizes[] = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = common::GenerateRandom(num_rows, num_columns);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto dmat = common::GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = common::AdapterFromData(x_device, num_rows, num_columns);
|
||||
data::DeviceDMatrix device_dmat(
|
||||
&adapter, std::numeric_limits<float>::quiet_NaN(), 1, num_bins);
|
||||
|
||||
const auto &batch = *dmat->GetBatches<EllpackPage>({0, num_bins}).begin();
|
||||
const auto &device_dmat_batch =
|
||||
*device_dmat.GetBatches<EllpackPage>({0, num_bins}).begin();
|
||||
|
||||
ASSERT_EQ(batch.Impl()->Cuts().Values(), device_dmat_batch.Impl()->Cuts().Values());
|
||||
ASSERT_EQ(batch.Impl()->gidx_buffer.HostVector(),
|
||||
device_dmat_batch.Impl()->gidx_buffer.HostVector());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DeviceDMatrix, IsDense) {
|
||||
int num_bins = 16;
|
||||
auto test = [num_bins] (float sparsity) {
|
||||
HostDeviceVector<float> data;
|
||||
std::string interface_str = RandomDataGenerator{10, 10, sparsity}
|
||||
.Device(0).GenerateArrayInterface(&data);
|
||||
data::CupyAdapter x{interface_str};
|
||||
std::unique_ptr<data::DeviceDMatrix> device_dmat{ new data::DeviceDMatrix(
|
||||
&x, std::numeric_limits<float>::quiet_NaN(), 1, num_bins) };
|
||||
if (sparsity == 0.0) {
|
||||
ASSERT_TRUE(device_dmat->IsDense()) << sparsity;
|
||||
} else {
|
||||
ASSERT_FALSE(device_dmat->IsDense());
|
||||
}
|
||||
};
|
||||
test(0.0);
|
||||
test(0.1);
|
||||
}
|
||||
@@ -54,6 +54,7 @@ void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
learner->SetParam("objective", "multi:softprob");
|
||||
learner->SetParam("num_feature", std::to_string(kCols));
|
||||
learner->SetParam("num_class", std::to_string(kClasses));
|
||||
learner->SetParam("max_bin", std::to_string(bins));
|
||||
learner->Configure();
|
||||
|
||||
for (size_t i = 0; i < kIters; ++i) {
|
||||
|
||||
@@ -170,3 +170,83 @@ Arrow specification.'''
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_cudf_metainfo_device_dmatrix(self):
|
||||
_test_cudf_metainfo(xgb.DeviceQuantileDMatrix)
|
||||
|
||||
|
||||
class IterForDMatrixTest(xgb.core.DataIter):
|
||||
'''A data iterator for XGBoost DMatrix.
|
||||
|
||||
`reset` and `next` are required for any data iterator, other functions here
|
||||
are utilites for demonstration's purpose.
|
||||
|
||||
'''
|
||||
ROWS_PER_BATCH = 100 # data is splited by rows
|
||||
BATCHES = 16
|
||||
|
||||
def __init__(self):
|
||||
'''Generate some random data for demostration.
|
||||
|
||||
Actual data can be anything that is currently supported by XGBoost.
|
||||
'''
|
||||
import cudf
|
||||
self.rows = self.ROWS_PER_BATCH
|
||||
rng = np.random.RandomState(1994)
|
||||
self._data = [
|
||||
cudf.DataFrame(
|
||||
{'a': rng.randn(self.ROWS_PER_BATCH),
|
||||
'b': rng.randn(self.ROWS_PER_BATCH)})] * self.BATCHES
|
||||
self._labels = [rng.randn(self.rows)] * self.BATCHES
|
||||
|
||||
self.it = 0 # set iterator to 0
|
||||
super().__init__()
|
||||
|
||||
def as_array(self):
|
||||
import cudf
|
||||
return cudf.concat(self._data)
|
||||
|
||||
def as_array_labels(self):
|
||||
return np.concatenate(self._labels)
|
||||
|
||||
def data(self):
|
||||
'''Utility function for obtaining current batch of data.'''
|
||||
return self._data[self.it]
|
||||
|
||||
def labels(self):
|
||||
'''Utility function for obtaining current batch of label.'''
|
||||
return self._labels[self.it]
|
||||
|
||||
def reset(self):
|
||||
'''Reset the iterator'''
|
||||
self.it = 0
|
||||
|
||||
def next(self, input_data):
|
||||
'''Yield next batch of data'''
|
||||
if self.it == len(self._data):
|
||||
# Return 0 when there's no more batch.
|
||||
return 0
|
||||
input_data(data=self.data(), label=self.labels())
|
||||
self.it += 1
|
||||
return 1
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_from_cudf_iter():
|
||||
rounds = 100
|
||||
it = IterForDMatrixTest()
|
||||
|
||||
# Use iterator
|
||||
m_it = xgb.DeviceQuantileDMatrix(it)
|
||||
reg_with_it = xgb.train({'tree_method': 'gpu_hist'}, m_it,
|
||||
num_boost_round=rounds)
|
||||
predict_with_it = reg_with_it.predict(m_it)
|
||||
|
||||
# Without using iterator
|
||||
m = xgb.DMatrix(it.as_array(), it.as_array_labels())
|
||||
|
||||
assert m_it.num_col() == m.num_col()
|
||||
assert m_it.num_row() == m.num_row()
|
||||
|
||||
reg = xgb.train({'tree_method': 'gpu_hist'}, m,
|
||||
num_boost_round=rounds)
|
||||
predict = reg.predict(m)
|
||||
|
||||
np.testing.assert_allclose(predict_with_it, predict)
|
||||
|
||||
11
tests/python-gpu/test_gpu_demos.py
Normal file
11
tests/python-gpu/test_gpu_demos.py
Normal file
@@ -0,0 +1,11 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
sys.path.append("tests/python")
|
||||
import test_demos as td # noqa
|
||||
|
||||
|
||||
def test_data_iterator():
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, 'data_iterator.py')
|
||||
cmd = ['python', script]
|
||||
subprocess.check_call(cmd)
|
||||
Reference in New Issue
Block a user