Support CPU input for device QuantileDMatrix. (#8136)

- Copy `GHistIndexMatrix` to `Ellpack` when needed.
This commit is contained in:
Jiaming Yuan
2022-08-11 21:21:26 +08:00
committed by GitHub
parent 36e7c5364d
commit 16bca5d4a1
11 changed files with 220 additions and 19 deletions

View File

@@ -121,7 +121,6 @@ if __name__ == "__main__":
"python-package/xgboost/sklearn.py",
"python-package/xgboost/spark",
"python-package/xgboost/federated.py",
"python-package/xgboost/spark",
# tests
"tests/python/test_config.py",
"tests/python/test_spark/",

View File

@@ -236,4 +236,45 @@ TEST(EllpackPage, Compact) {
}
}
}
namespace {
class EllpackPageTest : public testing::TestWithParam<float> {
protected:
void Run(float sparsity) {
// Only testing with small sample size as the cuts might be different between host and
// device.
size_t n_samples{128}, n_features{13};
Context ctx;
ctx.gpu_id = 0;
auto Xy = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true);
std::unique_ptr<EllpackPageImpl> from_ghist;
ASSERT_TRUE(Xy->SingleColBlock());
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(BatchParam{17, 0.6})) {
from_ghist.reset(new EllpackPageImpl{&ctx, page, {}});
}
for (auto const& page : Xy->GetBatches<EllpackPage>(BatchParam{0, 17})) {
auto from_sparse_page = page.Impl();
ASSERT_EQ(from_sparse_page->is_dense, from_ghist->is_dense);
ASSERT_EQ(from_sparse_page->base_rowid, 0);
ASSERT_EQ(from_sparse_page->base_rowid, from_ghist->base_rowid);
ASSERT_EQ(from_sparse_page->n_rows, from_ghist->n_rows);
ASSERT_EQ(from_sparse_page->gidx_buffer.Size(), from_ghist->gidx_buffer.Size());
auto const& h_gidx_from_sparse = from_sparse_page->gidx_buffer.HostVector();
auto const& h_gidx_from_ghist = from_ghist->gidx_buffer.HostVector();
ASSERT_EQ(from_sparse_page->NumSymbols(), from_ghist->NumSymbols());
common::CompressedIterator<uint32_t> from_ghist_it(h_gidx_from_ghist.data(),
from_ghist->NumSymbols());
common::CompressedIterator<uint32_t> from_sparse_it(h_gidx_from_sparse.data(),
from_sparse_page->NumSymbols());
for (size_t i = 0; i < from_ghist->n_rows * from_ghist->row_stride; ++i) {
EXPECT_EQ(from_ghist_it[i], from_sparse_it[i]);
}
}
}
};
} // namespace
TEST_P(EllpackPageTest, FromGHistIndex) { this->Run(GetParam()); }
INSTANTIATE_TEST_SUITE_P(EllpackPage, EllpackPageTest, testing::Values(.0f, .2f, .4f, .8f));
} // namespace xgboost

View File

@@ -31,6 +31,34 @@ class TestDeviceQuantileDMatrix:
data = cp.random.randn(5, 5)
xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
@pytest.mark.skipif(**tm.no_cupy())
def test_from_host(self) -> None:
import cupy as cp
n_samples = 64
n_features = 3
X, y, w = tm.make_batches(
n_samples, n_features=n_features, n_batches=1, use_cupy=False
)
Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0])
booster_0 = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=4)
X[0] = cp.array(X[0])
y[0] = cp.array(y[0])
w[0] = cp.array(w[0])
Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0])
booster_1 = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=4)
cp.testing.assert_allclose(
booster_0.inplace_predict(X[0]), booster_1.inplace_predict(X[0])
)
with pytest.raises(ValueError, match="not initialized with CPU"):
# Training on CPU with GPU data is not supported.
xgb.train({"tree_method": "hist"}, Xy, num_boost_round=4)
with pytest.raises(ValueError, match=r"Only.*hist.*"):
xgb.train({"tree_method": "approx"}, Xy, num_boost_round=4)
@pytest.mark.skipif(**tm.no_cupy())
def test_metainfo(self) -> None:
import cupy as cp