Support CPU input for device QuantileDMatrix. (#8136)
- Copy `GHistIndexMatrix` to `Ellpack` when needed.
This commit is contained in:
@@ -121,7 +121,6 @@ if __name__ == "__main__":
|
||||
"python-package/xgboost/sklearn.py",
|
||||
"python-package/xgboost/spark",
|
||||
"python-package/xgboost/federated.py",
|
||||
"python-package/xgboost/spark",
|
||||
# tests
|
||||
"tests/python/test_config.py",
|
||||
"tests/python/test_spark/",
|
||||
|
||||
@@ -236,4 +236,45 @@ TEST(EllpackPage, Compact) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
class EllpackPageTest : public testing::TestWithParam<float> {
|
||||
protected:
|
||||
void Run(float sparsity) {
|
||||
// Only testing with small sample size as the cuts might be different between host and
|
||||
// device.
|
||||
size_t n_samples{128}, n_features{13};
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true);
|
||||
std::unique_ptr<EllpackPageImpl> from_ghist;
|
||||
ASSERT_TRUE(Xy->SingleColBlock());
|
||||
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(BatchParam{17, 0.6})) {
|
||||
from_ghist.reset(new EllpackPageImpl{&ctx, page, {}});
|
||||
}
|
||||
|
||||
for (auto const& page : Xy->GetBatches<EllpackPage>(BatchParam{0, 17})) {
|
||||
auto from_sparse_page = page.Impl();
|
||||
ASSERT_EQ(from_sparse_page->is_dense, from_ghist->is_dense);
|
||||
ASSERT_EQ(from_sparse_page->base_rowid, 0);
|
||||
ASSERT_EQ(from_sparse_page->base_rowid, from_ghist->base_rowid);
|
||||
ASSERT_EQ(from_sparse_page->n_rows, from_ghist->n_rows);
|
||||
ASSERT_EQ(from_sparse_page->gidx_buffer.Size(), from_ghist->gidx_buffer.Size());
|
||||
auto const& h_gidx_from_sparse = from_sparse_page->gidx_buffer.HostVector();
|
||||
auto const& h_gidx_from_ghist = from_ghist->gidx_buffer.HostVector();
|
||||
ASSERT_EQ(from_sparse_page->NumSymbols(), from_ghist->NumSymbols());
|
||||
common::CompressedIterator<uint32_t> from_ghist_it(h_gidx_from_ghist.data(),
|
||||
from_ghist->NumSymbols());
|
||||
common::CompressedIterator<uint32_t> from_sparse_it(h_gidx_from_sparse.data(),
|
||||
from_sparse_page->NumSymbols());
|
||||
for (size_t i = 0; i < from_ghist->n_rows * from_ghist->row_stride; ++i) {
|
||||
EXPECT_EQ(from_ghist_it[i], from_sparse_it[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
TEST_P(EllpackPageTest, FromGHistIndex) { this->Run(GetParam()); }
|
||||
INSTANTIATE_TEST_SUITE_P(EllpackPage, EllpackPageTest, testing::Values(.0f, .2f, .4f, .8f));
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -31,6 +31,34 @@ class TestDeviceQuantileDMatrix:
|
||||
data = cp.random.randn(5, 5)
|
||||
xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_from_host(self) -> None:
|
||||
import cupy as cp
|
||||
n_samples = 64
|
||||
n_features = 3
|
||||
X, y, w = tm.make_batches(
|
||||
n_samples, n_features=n_features, n_batches=1, use_cupy=False
|
||||
)
|
||||
Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0])
|
||||
booster_0 = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=4)
|
||||
|
||||
X[0] = cp.array(X[0])
|
||||
y[0] = cp.array(y[0])
|
||||
w[0] = cp.array(w[0])
|
||||
|
||||
Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0])
|
||||
booster_1 = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=4)
|
||||
cp.testing.assert_allclose(
|
||||
booster_0.inplace_predict(X[0]), booster_1.inplace_predict(X[0])
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="not initialized with CPU"):
|
||||
# Training on CPU with GPU data is not supported.
|
||||
xgb.train({"tree_method": "hist"}, Xy, num_boost_round=4)
|
||||
|
||||
with pytest.raises(ValueError, match=r"Only.*hist.*"):
|
||||
xgb.train({"tree_method": "approx"}, Xy, num_boost_round=4)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_metainfo(self) -> None:
|
||||
import cupy as cp
|
||||
|
||||
Reference in New Issue
Block a user