Consistent use of context to specify number of threads. (#8733)

- Use context in all tests.
- Use context in R.
- Use context in C API DMatrix initialization. (0 threads is used as dft).
This commit is contained in:
Jiaming Yuan
2023-01-30 15:25:31 +08:00
committed by GitHub
parent 21a28f2cc5
commit 3760cede0f
24 changed files with 212 additions and 152 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019-2022 by XGBoost Contributors
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
@@ -70,14 +70,14 @@ TEST(SparsePage, PushCSCAfterTranspose) {
SparsePage page; // Consolidated sparse page
for (const auto &batch : dmat->GetBatches<xgboost::SparsePage>()) {
// Transpose each batch and push
SparsePage tmp = batch.GetTranspose(ncols, common::OmpGetNumThreads(0));
SparsePage tmp = batch.GetTranspose(ncols, AllThreadsForTest());
page.PushCSC(tmp);
}
// Make sure that the final sparse page has the right number of entries
ASSERT_EQ(kEntries, page.data.Size());
page.SortRows(common::OmpGetNumThreads(0));
page.SortRows(AllThreadsForTest());
auto v = page.GetView();
for (size_t i = 0; i < v.Size(); ++i) {
auto column = v[i];
@@ -89,7 +89,7 @@ TEST(SparsePage, PushCSCAfterTranspose) {
TEST(SparsePage, SortIndices) {
auto p_fmat = RandomDataGenerator{100, 10, 0.6}.GenerateDMatrix();
auto n_threads = common::OmpGetNumThreads(0);
auto n_threads = AllThreadsForTest();
SparsePage copy;
for (auto const& page : p_fmat->GetBatches<SparsePage>()) {
ASSERT_TRUE(page.IsIndicesSorted(n_threads));

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021-2022 XGBoost contributors
/**
* Copyright 2021-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
@@ -46,7 +46,7 @@ TEST(GradientIndex, FromCategoricalBasic) {
h_ft.resize(kCols, FeatureType::kCategorical);
BatchParam p(max_bins, 0.8);
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, common::OmpGetNumThreads(0), {});
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
auto x_copy = x;
std::sort(x_copy.begin(), x_copy.end());
@@ -75,7 +75,7 @@ TEST(GradientIndex, PushBatch) {
auto test = [&](float sparisty) {
auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true);
auto cuts = common::SketchOnDMatrix(m.get(), max_bins, common::OmpGetNumThreads(0), false, {});
auto cuts = common::SketchOnDMatrix(m.get(), max_bins, AllThreadsForTest(), false, {});
common::HistogramCuts copy_cuts = cuts;
ASSERT_EQ(m->Info().num_row_, kRows);

View File

@@ -1,13 +1,16 @@
/*!
* Copyright 2022 XGBoost contributors
/**
* Copyright 2022-2023 by XGBoost contributors
*/
#include "test_iterative_dmatrix.h"
#include <gtest/gtest.h>
#include <limits>
#include <memory>
#include "../../../src/data/gradient_index.h"
#include "../../../src/data/iterative_dmatrix.h"
#include "../helpers.h"
#include "xgboost/data.h" // DMatrix
namespace xgboost {
namespace data {
@@ -20,8 +23,10 @@ TEST(IterativeDMatrix, IsDense) {
int n_bins = 16;
auto test = [n_bins](float sparsity) {
NumpyArrayIterForTest iter(sparsity);
auto n_threads = 0;
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
std::numeric_limits<float>::quiet_NaN(), n_threads, n_bins);
ASSERT_EQ(m.Ctx()->Threads(), AllThreadsForTest());
if (sparsity == 0.0) {
ASSERT_TRUE(m.IsDense());
} else {

View File

@@ -411,3 +411,14 @@ TEST(SimpleDMatrix, SaveLoadBinary) {
delete dmat;
delete dmat_read;
}
TEST(SimpleDMatrix, Threads) {
size_t constexpr kRows{16};
size_t constexpr kCols{8};
HostDeviceVector<float> data;
auto arr_str = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&data);
auto adapter = data::ArrayAdapter{StringView{arr_str}};
std::unique_ptr<DMatrix> p_fmat{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0, "")};
ASSERT_EQ(p_fmat->Ctx()->Threads(), AllThreadsForTest());
}

View File

@@ -1,4 +1,6 @@
// Copyright by Contributors
/**
* Copyright 2016-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
@@ -22,13 +24,15 @@ void TestSparseDMatrixLoadFile() {
CreateBigTestData(opath, 3 * 64, false);
opath += "?indexing_mode=1";
data::FileIterator iter{opath, 0, 1, "libsvm"};
auto n_threads = 0;
data::SparsePageDMatrix m{&iter,
iter.Proxy(),
data::fileiter::Reset,
data::fileiter::Next,
std::numeric_limits<float>::quiet_NaN(),
1,
n_threads,
tmpdir.path + "cache"};
ASSERT_EQ(AllThreadsForTest(), m.Ctx()->Threads());
ASSERT_EQ(m.Info().num_col_, 5);
ASSERT_EQ(m.Info().num_row_, 64);
@@ -213,16 +217,13 @@ TEST(SparsePageDMatrix, ColAccessBatches) {
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
// Create multiple sparse pages
std::unique_ptr<xgboost::DMatrix> dmat{xgboost::CreateSparsePageDMatrix(kEntries)};
auto n_threads = omp_get_max_threads();
omp_set_num_threads(16);
ASSERT_EQ(dmat->Ctx()->Threads(), AllThreadsForTest());
for (auto const &page : dmat->GetBatches<xgboost::CSCPage>()) {
ASSERT_EQ(dmat->Info().num_col_, page.Size());
}
omp_set_num_threads(n_threads);
}
auto TestSparsePageDMatrixDeterminism(int32_t threads) {
omp_set_num_threads(threads);
std::vector<float> sparse_data;
std::vector<size_t> sparse_rptr;
std::vector<bst_feature_t> sparse_cids;
@@ -231,16 +232,15 @@ auto TestSparsePageDMatrixDeterminism(int32_t threads) {
CreateBigTestData(filename, 1 << 16);
data::FileIterator iter(filename, 0, 1, "auto");
std::unique_ptr<DMatrix> sparse{new data::SparsePageDMatrix{
&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
std::numeric_limits<float>::quiet_NaN(), 1, filename}};
std::unique_ptr<DMatrix> sparse{
new data::SparsePageDMatrix{&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
std::numeric_limits<float>::quiet_NaN(), threads, filename}};
CHECK(sparse->Ctx()->Threads() == threads || sparse->Ctx()->Threads() == AllThreadsForTest());
DMatrixToCSR(sparse.get(), &sparse_data, &sparse_rptr, &sparse_cids);
auto cache_name =
data::MakeId(filename,
dynamic_cast<data::SparsePageDMatrix *>(sparse.get())) +
".row.page";
data::MakeId(filename, dynamic_cast<data::SparsePageDMatrix *>(sparse.get())) + ".row.page";
std::string cache = common::LoadSequentialFile(cache_name);
return cache;
}