[EM] Make page concatenation optional. (#10826)
This PR introduces a new parameter `extmem_concat_pages` to make the page concatenation optional for GPU hist. In addition, the document is updated for the new GPU-based external memory.
This commit is contained in:
@@ -496,7 +496,7 @@ auto MakeExtMemForTest(bst_idx_t n_samples, bst_feature_t n_features, Json dconf
|
||||
|
||||
NumpyArrayIterForTest iter_1{0.0f, n_samples, n_features, n_batches};
|
||||
auto Xy = std::make_shared<data::SparsePageDMatrix>(
|
||||
&iter_1, iter_1.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, "");
|
||||
&iter_1, iter_1.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, "", false);
|
||||
MakeLabelForTest(Xy, p_fmat);
|
||||
return std::pair{p_fmat, Xy};
|
||||
}
|
||||
|
||||
@@ -37,7 +37,8 @@ void TestSparseDMatrixLoadFile(Context const* ctx) {
|
||||
data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
n_threads,
|
||||
tmpdir.path + "cache"};
|
||||
tmpdir.path + "cache",
|
||||
false};
|
||||
ASSERT_EQ(AllThreadsForTest(), m.Ctx()->Threads());
|
||||
ASSERT_EQ(m.Info().num_col_, 5);
|
||||
ASSERT_EQ(m.Info().num_row_, 64);
|
||||
@@ -364,9 +365,9 @@ auto TestSparsePageDMatrixDeterminism(int32_t threads) {
|
||||
CreateBigTestData(filename, 1 << 16);
|
||||
|
||||
data::FileIterator iter(filename + "?format=libsvm", 0, 1);
|
||||
std::unique_ptr<DMatrix> sparse{
|
||||
new data::SparsePageDMatrix{&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), threads, filename}};
|
||||
std::unique_ptr<DMatrix> sparse{new data::SparsePageDMatrix{
|
||||
&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), threads, filename, false}};
|
||||
CHECK(sparse->Ctx()->Threads() == threads || sparse->Ctx()->Threads() == AllThreadsForTest());
|
||||
|
||||
DMatrixToCSR(sparse.get(), &sparse_data, &sparse_rptr, &sparse_cids);
|
||||
|
||||
@@ -81,10 +81,11 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
|
||||
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true);
|
||||
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
|
||||
auto p_fmat = sample.p_fmat;
|
||||
ASSERT_EQ(p_fmat, dmat.get());
|
||||
ASSERT_THAT(
|
||||
[&] {
|
||||
GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true);
|
||||
},
|
||||
GMockThrow("extmem_concat_pages"));
|
||||
}
|
||||
|
||||
TEST(GradientBasedSampler, UniformSampling) {
|
||||
@@ -120,4 +121,4 @@ TEST(GradientBasedSampler, GradientBasedSamplingExternalMemory) {
|
||||
constexpr bool kFixedSizeSampling = false;
|
||||
VerifySampling(kPageSize, kSubsample, kSamplingMethod, kFixedSizeSampling);
|
||||
}
|
||||
}; // namespace xgboost::tree
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -23,7 +23,7 @@ namespace xgboost::tree {
|
||||
namespace {
|
||||
void UpdateTree(Context const* ctx, linalg::Matrix<GradientPair>* gpair, DMatrix* dmat,
|
||||
RegTree* tree, HostDeviceVector<bst_float>* preds, float subsample,
|
||||
const std::string& sampling_method, bst_bin_t max_bin) {
|
||||
const std::string& sampling_method, bst_bin_t max_bin, bool concat_pages) {
|
||||
Args args{
|
||||
{"max_depth", "2"},
|
||||
{"max_bin", std::to_string(max_bin)},
|
||||
@@ -38,13 +38,17 @@ void UpdateTree(Context const* ctx, linalg::Matrix<GradientPair>* gpair, DMatrix
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
std::unique_ptr<TreeUpdater> hist_maker{TreeUpdater::Create("grow_gpu_hist", ctx, &task)};
|
||||
hist_maker->Configure(Args{});
|
||||
if (subsample < 1.0) {
|
||||
hist_maker->Configure(Args{{"extmem_concat_pages", std::to_string(concat_pages)}});
|
||||
} else {
|
||||
hist_maker->Configure(Args{});
|
||||
}
|
||||
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
hist_maker->Update(¶m, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
{tree});
|
||||
auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1);
|
||||
if (subsample < 1.0 && !dmat->SingleColBlock()) {
|
||||
if (subsample < 1.0 && !dmat->SingleColBlock() && concat_pages) {
|
||||
ASSERT_FALSE(hist_maker->UpdatePredictionCache(dmat, cache));
|
||||
} else {
|
||||
ASSERT_TRUE(hist_maker->UpdatePredictionCache(dmat, cache));
|
||||
@@ -69,12 +73,12 @@ TEST(GpuHist, UniformSampling) {
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows, false);
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, ctx.Device());
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree_sampling, &preds_sampling, kSubsample, "uniform",
|
||||
kRows);
|
||||
kRows, false);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
@@ -100,13 +104,13 @@ TEST(GpuHist, GradientBasedSampling) {
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows, false);
|
||||
|
||||
// Build another tree using sampling.
|
||||
RegTree tree_sampling;
|
||||
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, ctx.Device());
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree_sampling, &preds_sampling, kSubsample,
|
||||
"gradient_based", kRows);
|
||||
"gradient_based", kRows, false);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
@@ -137,11 +141,11 @@ TEST(GpuHist, ExternalMemory) {
|
||||
// Build a tree using the in-memory DMatrix.
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows, true);
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, ctx.Device());
|
||||
UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, 1.0, "uniform", kRows);
|
||||
UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, 1.0, "uniform", kRows, true);
|
||||
|
||||
// Make sure the predictions are the same.
|
||||
auto preds_h = preds.ConstHostVector();
|
||||
@@ -181,14 +185,14 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
|
||||
|
||||
RegTree tree;
|
||||
HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, kSubsample, kSamplingMethod, kRows);
|
||||
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, kSubsample, kSamplingMethod, kRows, true);
|
||||
|
||||
// Build another tree using multiple ELLPACK pages.
|
||||
common::GlobalRandom() = rng;
|
||||
RegTree tree_ext;
|
||||
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, ctx.Device());
|
||||
UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, kSubsample, kSamplingMethod,
|
||||
kRows);
|
||||
kRows, true);
|
||||
|
||||
Json jtree{Object{}};
|
||||
Json jtree_ext{Object{}};
|
||||
@@ -228,6 +232,42 @@ TEST(GpuHist, MaxDepth) {
|
||||
ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error);
|
||||
}
|
||||
|
||||
TEST(GpuHist, PageConcatConfig) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
bst_idx_t n_samples = 64, n_features = 32;
|
||||
auto p_fmat = RandomDataGenerator{n_samples, n_features, 0}.Batches(2).GenerateSparsePageDMatrix(
|
||||
"temp", true);
|
||||
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create({p_fmat}));
|
||||
learner->SetParam("device", ctx.DeviceName());
|
||||
learner->SetParam("extmem_concat_pages", "true");
|
||||
learner->SetParam("subsample", "0.8");
|
||||
learner->Configure();
|
||||
|
||||
learner->UpdateOneIter(0, p_fmat);
|
||||
learner->SetParam("extmem_concat_pages", "false");
|
||||
learner->Configure();
|
||||
// GPU Hist rebuilds the updater after configuration. Training continues
|
||||
learner->UpdateOneIter(1, p_fmat);
|
||||
|
||||
learner->SetParam("extmem_concat_pages", "true");
|
||||
learner->SetParam("subsample", "1.0");
|
||||
ASSERT_THAT([&] { learner->UpdateOneIter(2, p_fmat); }, GMockThrow("extmem_concat_pages"));
|
||||
|
||||
// Throws error on CPU.
|
||||
{
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create({p_fmat}));
|
||||
learner->SetParam("extmem_concat_pages", "true");
|
||||
ASSERT_THAT([&] { learner->UpdateOneIter(0, p_fmat); }, GMockThrow("extmem_concat_pages"));
|
||||
}
|
||||
{
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create({p_fmat}));
|
||||
learner->SetParam("extmem_concat_pages", "true");
|
||||
learner->SetParam("tree_method", "approx");
|
||||
ASSERT_THAT([&] { learner->UpdateOneIter(0, p_fmat); }, GMockThrow("extmem_concat_pages"));
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
|
||||
@@ -3,6 +3,8 @@ import sys
|
||||
import pytest
|
||||
from hypothesis import given, settings, strategies
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing import no_cupy
|
||||
from xgboost.testing.updater import check_extmem_qdm, check_quantile_loss_extmem
|
||||
|
||||
@@ -72,6 +74,22 @@ def test_extmem_qdm(
|
||||
check_extmem_qdm(n_samples_per_batch, n_features, n_batches, "cuda", on_host)
|
||||
|
||||
|
||||
def test_concat_pages() -> None:
|
||||
it = tm.IteratorForTest(*tm.make_batches(64, 16, 4, use_cupy=True), cache=None)
|
||||
Xy = xgb.ExtMemQuantileDMatrix(it)
|
||||
with pytest.raises(ValueError, match="can not be used with concatenated pages"):
|
||||
booster = xgb.train(
|
||||
{
|
||||
"device": "cuda",
|
||||
"subsample": 0.5,
|
||||
"sampling_method": "gradient_based",
|
||||
"extmem_concat_pages": True,
|
||||
"objective": "reg:absoluteerror",
|
||||
},
|
||||
Xy,
|
||||
)
|
||||
|
||||
|
||||
@given(
|
||||
strategies.integers(1, 64),
|
||||
strategies.integers(1, 8),
|
||||
|
||||
@@ -6,24 +6,32 @@ import pytest
|
||||
|
||||
from xgboost import testing as tm
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_demos as td # noqa
|
||||
DEMO_DIR = tm.demo_dir(__file__)
|
||||
PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, "guide-python")
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_data_iterator():
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, "quantile_data_iterator.py")
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "quantile_data_iterator.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_update_process_demo():
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, "update_process.py")
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "update_process.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_categorical_demo():
|
||||
script = os.path.join(td.PYTHON_DEMO_DIR, "categorical.py")
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "categorical.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_rmm())
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_external_memory_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "external_memory.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
Reference in New Issue
Block a user