merge 23Mar01

This commit is contained in:
amdsc21
2023-05-02 00:05:58 +02:00
258 changed files with 7471 additions and 5379 deletions

View File

@@ -18,7 +18,7 @@ $command_wrapper bash -c "cd build && ctest --extra-verbose"
echo "--- Build binary wheel"
$command_wrapper bash -c \
"cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
$command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}

View File

@@ -27,7 +27,7 @@ $command_wrapper tests/ci_build/build_via_cmake.sh -DCMAKE_PREFIX_PATH=/opt/grpc
-DNCCL_LIBRARY=/workspace/libnccl_static.a ${arch_flag}
echo "--- Build binary wheel"
$command_wrapper bash -c \
"cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
"cd python-package && rm -rf dist/* && pip wheel --no-deps -v . --wheel-dir dist/"
$command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG}

View File

@@ -24,21 +24,17 @@ if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
Write-Host "--- Build binary wheel"
cd ../python-package
conda activate
& python setup.py bdist_wheel --universal
& pip install --user -v "pip>=23"
& pip --version
& pip wheel --no-deps -v . --wheel-dir dist/
Get-ChildItem . -Filter dist/*.whl |
Foreach-Object {
& python ../tests/ci_build/rename_whl.py $_.FullName $Env:BUILDKITE_COMMIT win_amd64
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
}
Write-Host "--- Insert vcomp140.dll (OpenMP runtime) into the wheel"
cd dist
Copy-Item -Path ../../tests/ci_build/insert_vcomp140.py -Destination .
& python insert_vcomp140.py *.whl
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
Write-Host "--- Upload Python wheel"
cd ../..
cd ..
Get-ChildItem . -Filter python-package/dist/*.whl |
Foreach-Object {
& buildkite-agent artifact upload python-package/dist/$_

View File

@@ -26,7 +26,7 @@ if [[ "$platform_id" == macosx_* ]]; then
# cibuildwheel will take care of cross-compilation.
wheel_tag=macosx_12_0_arm64
cpython_ver=38
setup_env_var='CIBW_TARGET_OSX_ARM64=1' # extra flag to be passed to setup.py
setup_env_var='CIBW_TARGET_OSX_ARM64=1' # extra flag to be passed to xgboost.packager backend
export PYTHON_CROSSENV=1
export MACOSX_DEPLOYMENT_TARGET=12.0
#OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2"

View File

@@ -40,14 +40,24 @@ def pypkg(
major: int, minor: int, patch: int, rc: int, is_rc: bool, is_dev: bool
) -> None:
version = f"{major}.{minor}.{patch}"
pyver_path = os.path.join("xgboost", "VERSION")
pyver = version
if is_rc:
pyver = pyver + f"rc{rc}"
if is_dev:
pyver = pyver + "-dev"
pyver_path = os.path.join("xgboost", "VERSION")
with open(pyver_path, "w") as fd:
fd.write(pyver)
fd.write(pyver + "\n")
pyprj_path = os.path.join("pyproject.toml")
with open(pyprj_path, "r") as fd:
pyprj = fd.read()
matched = re.search('version = "' + r"([0-9]+\.[0-9]+\.[0-9]+.*)" + '"', pyprj)
assert matched, "Couldn't find version string in pyproject.toml."
pyprj = pyprj[: matched.start(1)] + pyver + pyprj[matched.end(1) :]
with open(pyprj_path, "w") as fd:
fd.write(pyprj)
@cd(R_PACKAGE)

View File

@@ -18,6 +18,7 @@ dependencies:
- cloudpickle
- pytest
- hypothesis
- hatchling
- pip:
# TODO: Replace it with pyspark>=3.4 once 3.4 released.
- https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz

View File

@@ -8,5 +8,6 @@ dependencies:
- wheel
- cmake
- ninja
- python-build
- c-compiler
- cxx-compiler

View File

@@ -1,102 +0,0 @@
import argparse
import base64
import glob
import hashlib
import os
import pathlib
import re
import shutil
import tempfile
VCOMP140_PATH = "C:\\Windows\\System32\\vcomp140.dll"
def get_sha256sum(path):
return (
base64.urlsafe_b64encode(hashlib.sha256(open(path, "rb").read()).digest())
.decode("latin1")
.rstrip("=")
)
def update_record(*, wheel_content_dir, xgboost_version):
vcomp140_size = os.path.getsize(VCOMP140_PATH)
vcomp140_hash = get_sha256sum(VCOMP140_PATH)
record_path = wheel_content_dir / pathlib.Path(
f"xgboost-{xgboost_version}.dist-info/RECORD"
)
with open(record_path, "r") as f:
record_content = f.read()
record_content += f"xgboost-{xgboost_version}.data/data/xgboost/vcomp140.dll,"
record_content += f"sha256={vcomp140_hash},{vcomp140_size}\n"
with open(record_path, "w") as f:
f.write(record_content)
def main(args):
candidates = list(sorted(glob.glob(args.wheel_path)))
for wheel_path in candidates:
print(f"Processing wheel {wheel_path}")
m = re.search(r"xgboost-(.*)\+.*-py3", wheel_path)
if not m:
raise ValueError(f"Wheel {wheel_path} has unexpected name")
version = m.group(1)
print(f" Detected version for {wheel_path}: {version}")
print(f" Inserting vcomp140.dll into {wheel_path}...")
with tempfile.TemporaryDirectory() as tempdir:
wheel_content_dir = pathlib.Path(tempdir) / "wheel_content"
print(f" Extract {wheel_path} into {wheel_content_dir}")
shutil.unpack_archive(
wheel_path, extract_dir=wheel_content_dir, format="zip"
)
data_dir = wheel_content_dir / pathlib.Path(
f"xgboost-{version}.data/data/xgboost"
)
data_dir.mkdir(parents=True, exist_ok=True)
print(f" Copy {VCOMP140_PATH} -> {data_dir}")
shutil.copy(VCOMP140_PATH, data_dir)
print(f" Update RECORD")
update_record(wheel_content_dir=wheel_content_dir, xgboost_version=version)
print(f" Content of {wheel_content_dir}:")
for e in sorted(wheel_content_dir.rglob("*")):
if e.is_file():
r = e.relative_to(wheel_content_dir)
print(f" {r}")
print(f" Create new wheel...")
new_wheel_tmp_path = pathlib.Path(tempdir) / "new_wheel"
shutil.make_archive(
str(new_wheel_tmp_path.resolve()),
format="zip",
root_dir=wheel_content_dir,
)
new_wheel_tmp_path = new_wheel_tmp_path.resolve().with_suffix(".zip")
new_wheel_tmp_path = new_wheel_tmp_path.rename(
new_wheel_tmp_path.with_suffix(".whl")
)
print(f" Created new wheel {new_wheel_tmp_path}")
# Rename the old wheel with suffix .bak
# The new wheel takes the name of the old wheel
wheel_path_obj = pathlib.Path(wheel_path).resolve()
backup_path = wheel_path_obj.with_suffix(".whl.bak")
print(f" Rename {wheel_path_obj} -> {backup_path}")
wheel_path_obj.replace(backup_path)
print(f" Rename {new_wheel_tmp_path} -> {wheel_path_obj}")
new_wheel_tmp_path.replace(wheel_path_obj)
shutil.rmtree(wheel_content_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"wheel_path", type=str, help="Path to wheel (wildcard permitted)"
)
args = parser.parse_args()
main(args)

View File

@@ -198,7 +198,7 @@ def main(args: argparse.Namespace) -> None:
run_mypy(path)
for path in [
# core
"python-package/xgboost/",
"python-package/",
# demo
"demo/json-model/json_parser.py",
"demo/guide-python/external_memory.py",

View File

@@ -28,7 +28,7 @@ function install_xgboost {
then
pushd .
cd python-package
python setup.py install --user
pip install --user -v .
popd
fi
}

View File

@@ -14,11 +14,12 @@ TEST(DenseColumn, Test) {
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
BinTypeSize last{kUint8BinsTypeSize};
for (int32_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatrix();
auto sparse_thresh = 0.2;
GHistIndexMatrix gmat{dmat.get(), max_num_bin, sparse_thresh, false, AllThreadsForTest()};
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, sparse_thresh, false};
ColumnMatrix column_matrix;
for (auto const& page : dmat->GetBatches<SparsePage>()) {
column_matrix.InitFromSparse(page, gmat, sparse_thresh, AllThreadsForTest());
@@ -62,9 +63,10 @@ TEST(SparseColumn, Test) {
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (int32_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatrix();
GHistIndexMatrix gmat{dmat.get(), max_num_bin, 0.5f, false, AllThreadsForTest()};
GHistIndexMatrix gmat{&ctx, dmat.get(), max_num_bin, 0.5f, false};
ColumnMatrix column_matrix;
for (auto const& page : dmat->GetBatches<SparsePage>()) {
column_matrix.InitFromSparse(page, gmat, 1.0, AllThreadsForTest());
@@ -90,9 +92,10 @@ TEST(DenseColumnWithMissing, Test) {
int32_t max_num_bins[] = {static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<int32_t>(std::numeric_limits<uint16_t>::max()) + 2};
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (int32_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatrix();
GHistIndexMatrix gmat(dmat.get(), max_num_bin, 0.2, false, AllThreadsForTest());
GHistIndexMatrix gmat(&ctx, dmat.get(), max_num_bin, 0.2, false);
ColumnMatrix column_matrix;
for (auto const& page : dmat->GetBatches<SparsePage>()) {
column_matrix.InitFromSparse(page, gmat, 0.2, AllThreadsForTest());

View File

@@ -156,6 +156,7 @@ TEST(CutsBuilder, SearchGroupInd) {
}
TEST(HistUtil, DenseCutsCategorical) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
int categorical_sizes[] = {2, 6, 8, 12};
int num_bins = 256;
int sizes[] = {25, 100, 1000};
@@ -165,7 +166,7 @@ TEST(HistUtil, DenseCutsCategorical) {
std::vector<float> x_sorted(x);
std::sort(x_sorted.begin(), x_sorted.end());
auto dmat = GetDMatrixFromData(x, n, 1);
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
auto cuts_from_sketch = cuts.Values();
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());
@@ -176,6 +177,7 @@ TEST(HistUtil, DenseCutsCategorical) {
}
TEST(HistUtil, DenseCutsAccuracyTest) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100};
int num_columns = 5;
@@ -183,7 +185,7 @@ TEST(HistUtil, DenseCutsAccuracyTest) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
for (auto num_bins : bin_sizes) {
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -193,6 +195,7 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
int num_columns = 5;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
@@ -200,11 +203,11 @@ TEST(HistUtil, DenseCutsAccuracyTestWeights) {
dmat->Info().weights_.HostVector() = w;
for (auto num_bins : bin_sizes) {
{
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), true);
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, true);
ValidateCuts(cuts, dmat.get(), num_bins);
}
{
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), false);
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins, false);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -215,6 +218,7 @@ void TestQuantileWithHessian(bool use_sorted) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {1000, 1500};
int num_columns = 5;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
@@ -225,15 +229,13 @@ void TestQuantileWithHessian(bool use_sorted) {
dmat->Info().weights_.HostVector() = w;
for (auto num_bins : bin_sizes) {
HistogramCuts cuts_hess =
SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted, hessian);
HistogramCuts cuts_hess = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted, hessian);
for (size_t i = 0; i < w.size(); ++i) {
dmat->Info().weights_.HostVector()[i] = w[i] * hessian[i];
}
ValidateCuts(cuts_hess, dmat.get(), num_bins);
HistogramCuts cuts_wh =
SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest(), use_sorted);
HistogramCuts cuts_wh = SketchOnDMatrix(&ctx, dmat.get(), num_bins, use_sorted);
ValidateCuts(cuts_wh, dmat.get(), num_bins);
ASSERT_EQ(cuts_hess.Values().size(), cuts_wh.Values().size());
@@ -255,12 +257,13 @@ TEST(HistUtil, DenseCutsExternalMemory) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
int num_columns = 5;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
dmlc::TemporaryDirectory tmpdir;
auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir);
for (auto num_bins : bin_sizes) {
HistogramCuts cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
ValidateCuts(cuts, dmat.get(), num_bins);
}
}
@@ -275,12 +278,12 @@ TEST(HistUtil, IndexBinBound) {
kUint32BinsTypeSize};
size_t constexpr kRows = 100;
size_t constexpr kCols = 10;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
size_t bin_id = 0;
for (auto max_bin : bin_sizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest());
GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize());
}
@@ -300,10 +303,11 @@ TEST(HistUtil, IndexBinData) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2 };
size_t constexpr kRows = 100;
size_t constexpr kCols = 10;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto max_bin : kBinSizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
GHistIndexMatrix hmat(p_fmat.get(), max_bin, 0.5, false, AllThreadsForTest());
GHistIndexMatrix hmat(&ctx, p_fmat.get(), max_bin, 0.5, false);
uint32_t const* offsets = hmat.index.Offset();
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
switch (max_bin) {
@@ -327,10 +331,10 @@ void TestSketchFromWeights(bool with_group) {
size_t constexpr kRows = 300, kCols = 20, kBins = 256;
size_t constexpr kGroups = 10;
auto m = RandomDataGenerator{kRows, kCols, 0}.Device(0).GenerateDMatrix();
common::HistogramCuts cuts = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
common::HistogramCuts cuts = SketchOnDMatrix(&ctx, m.get(), kBins);
MetaInfo info;
Context ctx;
auto& h_weights = info.weights_.HostVector();
if (with_group) {
h_weights.resize(kGroups);
@@ -363,7 +367,7 @@ void TestSketchFromWeights(bool with_group) {
if (with_group) {
m->Info().weights_ = decltype(m->Info().weights_)(); // remove weight
HistogramCuts non_weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
HistogramCuts non_weighted = SketchOnDMatrix(&ctx, m.get(), kBins);
for (size_t i = 0; i < cuts.Values().size(); ++i) {
EXPECT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
}
@@ -382,7 +386,7 @@ void TestSketchFromWeights(bool with_group) {
for (size_t i = 0; i < h_weights.size(); ++i) {
h_weights[i] = static_cast<float>(i + 1) / static_cast<float>(kGroups);
}
HistogramCuts weighted = SketchOnDMatrix(m.get(), kBins, AllThreadsForTest());
HistogramCuts weighted = SketchOnDMatrix(&ctx, m.get(), kBins);
ValidateCuts(weighted, m.get(), kBins);
}
}
@@ -393,11 +397,12 @@ TEST(HistUtil, SketchFromWeights) {
}
TEST(HistUtil, SketchCategoricalFeatures) {
TestCategoricalSketch(1000, 256, 32, false, [](DMatrix* p_fmat, int32_t num_bins) {
return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest());
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
TestCategoricalSketch(1000, 256, 32, false, [&ctx](DMatrix* p_fmat, int32_t num_bins) {
return SketchOnDMatrix(&ctx, p_fmat, num_bins);
});
TestCategoricalSketch(1000, 256, 32, true, [](DMatrix* p_fmat, int32_t num_bins) {
return SketchOnDMatrix(p_fmat, num_bins, AllThreadsForTest());
TestCategoricalSketch(1000, 256, 32, true, [&ctx](DMatrix* p_fmat, int32_t num_bins) {
return SketchOnDMatrix(&ctx, p_fmat, num_bins);
});
}
} // namespace common

View File

@@ -25,9 +25,9 @@ namespace xgboost {
namespace common {
template <typename AdapterT>
HistogramCuts GetHostCuts(AdapterT *adapter, int num_bins, float missing) {
HistogramCuts GetHostCuts(Context const* ctx, AdapterT* adapter, int num_bins, float missing) {
data::SimpleDMatrix dmat(adapter, missing, 1);
HistogramCuts cuts = SketchOnDMatrix(&dmat, num_bins, AllThreadsForTest());
HistogramCuts cuts = SketchOnDMatrix(ctx, &dmat, num_bins);
return cuts;
}
@@ -39,7 +39,9 @@ TEST(HistUtil, DeviceSketch) {
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
auto device_cuts = DeviceSketch(0, dmat.get(), num_bins);
HistogramCuts host_cuts = SketchOnDMatrix(dmat.get(), num_bins, AllThreadsForTest());
Context ctx;
HistogramCuts host_cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());
@@ -314,7 +316,8 @@ TEST(HistUtil, AdapterDeviceSketch) {
data::CupyAdapter adapter(str);
auto device_cuts = MakeUnweightedCutsForTest(adapter, num_bins, missing);
auto host_cuts = GetHostCuts(&adapter, num_bins, missing);
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing);
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
EXPECT_EQ(device_cuts.Ptrs(), host_cuts.Ptrs());

View File

@@ -88,7 +88,8 @@ inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
fo << row_data.str() << "\n";
}
fo.close();
return std::shared_ptr<DMatrix>(DMatrix::Load(tmp_file + "#" + tmp_file + ".cache"));
return std::shared_ptr<DMatrix>(
DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache"));
}
// Test that elements are approximately equally distributed among bins

View File

@@ -16,7 +16,8 @@ TEST(Quantile, LoadBalance) {
size_t constexpr kRows = 1000, kCols = 100;
auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
std::vector<bst_feature_t> cols_ptr;
for (auto const& page : m->GetBatches<SparsePage>()) {
Context ctx;
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
data::SparsePageAdapterBatch adapter{page.GetView()};
cols_ptr = LoadBalance(adapter, page.data.Size(), kCols, 13, [](auto) { return true; });
}
@@ -43,6 +44,7 @@ void PushPage(HostSketchContainer* container, SparsePage const& page, MetaInfo c
template <bool use_column>
void DoTestDistributedQuantile(size_t rows, size_t cols) {
Context ctx;
auto const world = collective::GetWorldSize();
std::vector<MetaInfo> infos(2);
auto& h_weights = infos.front().weights_.HostVector();
@@ -51,7 +53,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
SimpleRealUniformDistribution<float> dist(3, 1000);
std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });
std::vector<bst_row_t> column_size(cols, rows);
size_t n_bins = 64;
bst_bin_t n_bins = 64;
// Generate cuts for distributed environment.
auto sparsity = 0.5f;
@@ -72,29 +74,29 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
std::vector<float> hessian(rows, 1.0);
auto hess = Span<float const>{hessian};
ContainerType<use_column> sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(),
column_size, false, false, AllThreadsForTest());
ContainerType<use_column> sketch_distributed(
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
if (use_column) {
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
PushPage(&sketch_distributed, page, m->Info(), hess);
}
} else {
for (auto const& page : m->GetBatches<SparsePage>()) {
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
PushPage(&sketch_distributed, page, m->Info(), hess);
}
}
HistogramCuts distributed_cuts;
sketch_distributed.MakeCuts(&distributed_cuts);
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
// Generate cuts for single node environment
collective::Finalize();
CHECK_EQ(collective::GetWorldSize(), 1);
std::for_each(column_size.begin(), column_size.end(), [=](auto& size) { size *= world; });
m->Info().num_row_ = world * rows;
ContainerType<use_column> sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(),
column_size, false, false, AllThreadsForTest());
ContainerType<use_column> sketch_on_single_node(
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
m->Info().num_row_ = rows;
for (auto rank = 0; rank < world; ++rank) {
@@ -106,7 +108,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
.Upper(1.0f)
.GenerateDMatrix();
if (use_column) {
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
PushPage(&sketch_on_single_node, page, m->Info(), hess);
}
} else {
@@ -117,7 +119,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
}
HistogramCuts single_node_cuts;
sketch_on_single_node.MakeCuts(&single_node_cuts);
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
auto const& sptrs = single_node_cuts.Ptrs();
auto const& dptrs = distributed_cuts.Ptrs();
@@ -172,6 +174,7 @@ TEST(Quantile, SortedDistributed) {
namespace {
template <bool use_column>
void DoTestColSplitQuantile(size_t rows, size_t cols) {
Context ctx;
auto const world = collective::GetWorldSize();
auto const rank = collective::GetRank();
@@ -204,22 +207,22 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
// Generate cuts for distributed environment.
HistogramCuts distributed_cuts;
{
ContainerType<use_column> sketch_distributed(n_bins, m->Info().feature_types.ConstHostSpan(),
column_size, false, true, AllThreadsForTest());
ContainerType<use_column> sketch_distributed(
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
std::vector<float> hessian(rows, 1.0);
auto hess = Span<float const>{hessian};
if (use_column) {
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
PushPage(&sketch_distributed, page, m->Info(), hess);
}
} else {
for (auto const& page : m->GetBatches<SparsePage>()) {
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
PushPage(&sketch_distributed, page, m->Info(), hess);
}
}
sketch_distributed.MakeCuts(&distributed_cuts);
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
}
// Generate cuts for single node environment
@@ -227,22 +230,22 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
CHECK_EQ(collective::GetWorldSize(), 1);
HistogramCuts single_node_cuts;
{
ContainerType<use_column> sketch_on_single_node(n_bins, m->Info().feature_types.ConstHostSpan(),
column_size, false, false, AllThreadsForTest());
ContainerType<use_column> sketch_on_single_node(
&ctx, n_bins, m->Info().feature_types.ConstHostSpan(), column_size, false);
std::vector<float> hessian(rows, 1.0);
auto hess = Span<float const>{hessian};
if (use_column) {
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
for (auto const& page : m->GetBatches<SortedCSCPage>(&ctx)) {
PushPage(&sketch_on_single_node, page, m->Info(), hess);
}
} else {
for (auto const& page : m->GetBatches<SparsePage>()) {
for (auto const& page : m->GetBatches<SparsePage>(&ctx)) {
PushPage(&sketch_on_single_node, page, m->Info(), hess);
}
}
sketch_on_single_node.MakeCuts(&single_node_cuts);
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
}
auto const& sptrs = single_node_cuts.Ptrs();
@@ -299,8 +302,10 @@ namespace {
void TestSameOnAllWorkers() {
auto const world = collective::GetWorldSize();
constexpr size_t kRows = 1000, kCols = 100;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
RunWithSeedsAndBins(
kRows, [=](int32_t seed, size_t n_bins, MetaInfo const&) {
kRows, [=, &ctx](int32_t seed, size_t n_bins, MetaInfo const&) {
auto rank = collective::GetRank();
HostDeviceVector<float> storage;
std::vector<FeatureType> ft(kCols);
@@ -314,7 +319,7 @@ void TestSameOnAllWorkers() {
.MaxCategory(17)
.Seed(rank + seed)
.GenerateDMatrix();
auto cuts = SketchOnDMatrix(m.get(), n_bins, AllThreadsForTest());
auto cuts = SketchOnDMatrix(&ctx, m.get(), n_bins);
std::vector<float> cut_values(cuts.Values().size() * world, 0);
std::vector<
typename std::remove_reference_t<decltype(cuts.Ptrs())>::value_type>

View File

@@ -1,17 +1,17 @@
/*!
* Copyright 2019-2020 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <xgboost/base.h>
#include <utility>
#include "../helpers.h"
#include "../histogram_helpers.h"
#include "gtest/gtest.h"
#include "../../../src/common/categorical.h"
#include "../../../src/common/hist_util.h"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/tree/param.h" // TrainParam
#include "../helpers.h"
#include "../histogram_helpers.h"
#include "gtest/gtest.h"
namespace xgboost {
@@ -19,7 +19,10 @@ TEST(EllpackPage, EmptyDMatrix) {
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;
constexpr float kSparsity = 0;
auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatrix();
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin}).begin();
Context ctx{MakeCUDACtx(0)};
auto& page = *dmat->GetBatches<EllpackPage>(
&ctx, BatchParam{kMaxBin, tree::TrainParam::DftSparseThreshold()})
.begin();
auto impl = page.Impl();
ASSERT_EQ(impl->row_stride, 0);
ASSERT_EQ(impl->Cuts().TotalBins(), 0);
@@ -87,8 +90,9 @@ TEST(EllpackPage, FromCategoricalBasic) {
auto& h_ft = m->Info().feature_types.HostVector();
h_ft.resize(kCols, FeatureType::kCategorical);
BatchParam p{0, max_bins};
auto ellpack = EllpackPage(m.get(), p);
Context ctx{MakeCUDACtx(0)};
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
auto ellpack = EllpackPage(&ctx, m.get(), p);
auto accessor = ellpack.Impl()->GetDeviceAccessor(0);
ASSERT_EQ(kCats, accessor.NumBins());
@@ -142,8 +146,9 @@ TEST(EllpackPage, Copy) {
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
@@ -151,7 +156,7 @@ TEST(EllpackPage, Copy) {
// Copy batch pages into the result page.
size_t offset = 0;
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
size_t num_elements = result.Copy(0, batch.Impl(), offset);
offset += num_elements;
}
@@ -161,7 +166,7 @@ TEST(EllpackPage, Copy) {
thrust::device_vector<bst_float> row_result_d(kCols);
std::vector<bst_float> row(kCols);
std::vector<bst_float> row_result(kCols);
for (auto& page : dmat->GetBatches<EllpackPage>(param)) {
for (auto& page : dmat->GetBatches<EllpackPage>(&ctx, param)) {
auto impl = page.Impl();
EXPECT_EQ(impl->base_rowid, current_row);
@@ -186,10 +191,11 @@ TEST(EllpackPage, Compact) {
// Create a DMatrix with multiple batches.
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
std::unique_ptr<DMatrix> dmat(
CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
@@ -201,7 +207,7 @@ TEST(EllpackPage, Compact) {
SIZE_MAX};
thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
result.Compact(0, batch.Impl(), row_indexes_span);
}
@@ -210,7 +216,7 @@ TEST(EllpackPage, Compact) {
thrust::device_vector<bst_float> row_result_d(kCols);
std::vector<bst_float> row(kCols);
std::vector<bst_float> row_result(kCols);
for (auto& page : dmat->GetBatches<EllpackPage>(param)) {
for (auto& page : dmat->GetBatches<EllpackPage>(&ctx, param)) {
auto impl = page.Impl();
ASSERT_EQ(impl->base_rowid, current_row);
@@ -249,15 +255,17 @@ class EllpackPageTest : public testing::TestWithParam<float> {
// device.
size_t n_samples{128}, n_features{13};
Context ctx;
ctx.gpu_id = 0;
Context gpu_ctx{MakeCUDACtx(0)};
auto Xy = RandomDataGenerator{n_samples, n_features, sparsity}.GenerateDMatrix(true);
std::unique_ptr<EllpackPageImpl> from_ghist;
ASSERT_TRUE(Xy->SingleColBlock());
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(BatchParam{17, 0.6})) {
from_ghist.reset(new EllpackPageImpl{&ctx, page, {}});
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{17, 0.6})) {
from_ghist.reset(new EllpackPageImpl{&gpu_ctx, page, {}});
}
for (auto const& page : Xy->GetBatches<EllpackPage>(BatchParam{0, 17})) {
for (auto const& page : Xy->GetBatches<EllpackPage>(
&gpu_ctx, BatchParam{17, tree::TrainParam::DftSparseThreshold()})) {
auto from_sparse_page = page.Impl();
ASSERT_EQ(from_sparse_page->is_dense, from_ghist->is_dense);
ASSERT_EQ(from_sparse_page->base_rowid, 0);

View File

@@ -1,17 +1,21 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/sparse_page_source.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../../../src/tree/param.h" // TrainParam
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost {
namespace data {
TEST(EllpackPageRawFormat, IO) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
std::unique_ptr<SparsePageFormat<EllpackPage>> format{CreatePageFormat<EllpackPage>("raw")};
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
@@ -20,7 +24,7 @@ TEST(EllpackPageRawFormat, IO) {
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
for (auto const &ellpack : m->GetBatches<EllpackPage>({0, 256})) {
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
format->Write(ellpack, fo.get());
}
}
@@ -29,7 +33,7 @@ TEST(EllpackPageRawFormat, IO) {
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
format->Read(&page, fi.get());
for (auto const &ellpack : m->GetBatches<EllpackPage>({0, 256})) {
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
auto loaded = page.Impl();
auto orig = ellpack.Impl();
ASSERT_EQ(loaded->Cuts().Ptrs(), orig->Cuts().Ptrs());

View File

@@ -29,16 +29,16 @@ TEST(FileIterator, Basic) {
{
auto zpath = tmpdir.path + "/0-based.svm";
CreateBigTestData(zpath, 3 * 64, true);
zpath += "?indexing_mode=0";
FileIterator iter{zpath, 0, 1, "libsvm"};
zpath += "?indexing_mode=0&format=libsvm";
FileIterator iter{zpath, 0, 1};
check_n_features(&iter);
}
{
auto opath = tmpdir.path + "/1-based.svm";
CreateBigTestData(opath, 3 * 64, false);
opath += "?indexing_mode=1";
FileIterator iter{opath, 0, 1, "libsvm"};
opath += "?indexing_mode=1&format=libsvm";
FileIterator iter{opath, 0, 1};
check_n_features(&iter);
}
}

View File

@@ -2,20 +2,38 @@
* Copyright 2021-2023 by XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include <xgboost/data.h> // for BatchIterator, BatchSet, DMatrix, BatchParam
#include "../../../src/common/column_matrix.h"
#include "../../../src/common/io.h" // MemoryBufferStream
#include "../../../src/data/gradient_index.h"
#include "../helpers.h"
#include <algorithm> // for sort, unique
#include <cmath> // for isnan
#include <cstddef> // for size_t
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr, __shared_ptr_access, unique_ptr
#include <string> // for string
#include <tuple> // for make_tuple, tie, tuple
#include <utility> // for move
#include <vector> // for vector
#include "../../../src/common/categorical.h" // for AsCat
#include "../../../src/common/column_matrix.h" // for ColumnMatrix
#include "../../../src/common/hist_util.h" // for Index, HistogramCuts, SketchOnDMatrix
#include "../../../src/common/io.h" // for MemoryBufferStream
#include "../../../src/data/adapter.h" // for SparsePageAdapterBatch
#include "../../../src/data/gradient_index.h" // for GHistIndexMatrix
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h" // for CreateEmptyGenericParam, GenerateRandomCa...
#include "xgboost/base.h" // for bst_bin_t
#include "xgboost/context.h" // for Context
#include "xgboost/host_device_vector.h" // for HostDeviceVector
namespace xgboost {
namespace data {
TEST(GradientIndex, ExternalMemory) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(10000);
std::vector<size_t> base_rowids;
std::vector<float> hessian(dmat->Info().num_row_, 1);
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>({64, hessian, true})) {
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, true})) {
base_rowids.push_back(page.base_rowid);
}
size_t i = 0;
@@ -24,9 +42,8 @@ TEST(GradientIndex, ExternalMemory) {
++i;
}
base_rowids.clear();
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>({64, hessian, false})) {
for (auto const &page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, hessian, false})) {
base_rowids.push_back(page.base_rowid);
}
i = 0;
@@ -41,12 +58,13 @@ TEST(GradientIndex, FromCategoricalBasic) {
size_t max_bins = 8;
auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats);
auto m = GetDMatrixFromData(x, kRows, 1);
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
auto &h_ft = m->Info().feature_types.HostVector();
h_ft.resize(kCols, FeatureType::kCategorical);
BatchParam p(max_bins, 0.8);
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
GHistIndexMatrix gidx(&ctx, m.get(), max_bins, p.sparse_thresh, false, {});
auto x_copy = x;
std::sort(x_copy.begin(), x_copy.end());
@@ -80,11 +98,11 @@ TEST(GradientIndex, FromCategoricalLarge) {
BatchParam p{max_bins, 0.8};
{
GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, AllThreadsForTest(), {});
GHistIndexMatrix gidx{&ctx, m.get(), max_bins, p.sparse_thresh, false, {}};
ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize);
}
{
for (auto const &page : m->GetBatches<GHistIndexMatrix>(p)) {
for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, p)) {
common::HistogramCuts cut = page.cut;
GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins};
ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats);
@@ -96,10 +114,11 @@ TEST(GradientIndex, PushBatch) {
size_t constexpr kRows = 64, kCols = 4;
bst_bin_t max_bins = 64;
float st = 0.5;
Context ctx;
auto test = [&](float sparisty) {
auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true);
auto cuts = common::SketchOnDMatrix(m.get(), max_bins, AllThreadsForTest(), false, {});
auto cuts = common::SketchOnDMatrix(&ctx, m.get(), max_bins, false, {});
common::HistogramCuts copy_cuts = cuts;
ASSERT_EQ(m->Info().num_row_, kRows);
@@ -112,7 +131,7 @@ TEST(GradientIndex, PushBatch) {
m->Info().num_row_);
gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits<float>::quiet_NaN(), 0);
}
for (auto const &page : m->GetBatches<GHistIndexMatrix>(BatchParam{max_bins, st})) {
for (auto const &page : m->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{max_bins, st})) {
for (size_t i = 0; i < kRows; ++i) {
for (size_t j = 0; j < kCols; ++j) {
auto v0 = gmat.GetFvalue(i, j, false);
@@ -143,17 +162,19 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
// device.
size_t n_samples{128}, n_features{13};
Context ctx;
ctx.gpu_id = 0;
auto Xy = RandomDataGenerator{n_samples, n_features, 1 - density}.GenerateDMatrix(true);
std::unique_ptr<GHistIndexMatrix> from_ellpack;
ASSERT_TRUE(Xy->SingleColBlock());
bst_bin_t constexpr kBins{17};
auto p = BatchParam{kBins, threshold};
for (auto const &page : Xy->GetBatches<EllpackPage>(BatchParam{0, kBins})) {
Context gpu_ctx;
gpu_ctx.gpu_id = 0;
for (auto const &page : Xy->GetBatches<EllpackPage>(
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p});
}
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(p)) {
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(&ctx, p)) {
ASSERT_EQ(from_sparse_page.IsDense(), from_ellpack->IsDense());
ASSERT_EQ(from_sparse_page.base_rowid, 0);
ASSERT_EQ(from_sparse_page.base_rowid, from_ellpack->base_rowid);

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
@@ -11,6 +11,8 @@
namespace xgboost {
namespace data {
TEST(GHistIndexPageRawFormat, IO) {
Context ctx;
std::unique_ptr<SparsePageFormat<GHistIndexMatrix>> format{
CreatePageFormat<GHistIndexMatrix>("raw")};
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
@@ -20,7 +22,7 @@ TEST(GHistIndexPageRawFormat, IO) {
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
for (auto const &index : m->GetBatches<GHistIndexMatrix>(batch)) {
for (auto const &index : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
format->Write(index, fo.get());
}
}
@@ -29,7 +31,7 @@ TEST(GHistIndexPageRawFormat, IO) {
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
format->Read(&page, fi.get());
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(batch)) {
for (auto const &gidx : m->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
auto const &loaded = gidx;
ASSERT_EQ(loaded.cut.Ptrs(), page.cut.Ptrs());
ASSERT_EQ(loaded.cut.MinValues(), page.cut.MinValues());
@@ -43,5 +45,5 @@ TEST(GHistIndexPageRawFormat, IO) {
ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize());
}
}
} // namespace data
} // namespace xgboost
} // namespace data
} // namespace xgboost

View File

@@ -15,8 +15,9 @@
namespace xgboost {
namespace data {
TEST(IterativeDMatrix, Ref) {
Context ctx;
TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(
[&](GHistIndexMatrix const& page) { return page.cut; });
&ctx, [&](GHistIndexMatrix const& page) { return page.cut; });
}
TEST(IterativeDMatrix, IsDense) {

View File

@@ -1,11 +1,12 @@
/*!
* Copyright 2020-2022 XGBoost contributors
/**
* Copyright 2020-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../../../src/data/device_adapter.cuh"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/iterative_dmatrix.h"
#include "../../../src/tree/param.h" // TrainParam
#include "../helpers.h"
#include "test_iterative_dmatrix.h"
@@ -13,15 +14,17 @@ namespace xgboost {
namespace data {
void TestEquivalent(float sparsity) {
Context ctx{MakeCUDACtx(0)};
CudaArrayIterForTest iter{sparsity};
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, 256);
size_t offset = 0;
auto first = (*m.GetEllpackBatches({}).begin()).Impl();
std::size_t offset = 0;
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
std::unique_ptr<EllpackPageImpl> page_concatenated {
new EllpackPageImpl(0, first->Cuts(), first->is_dense,
first->row_stride, 1000 * 100)};
for (auto& batch : m.GetBatches<EllpackPage>({})) {
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
auto page = batch.Impl();
size_t num_elements = page_concatenated->Copy(0, page, offset);
offset += num_elements;
@@ -34,8 +37,8 @@ void TestEquivalent(float sparsity) {
auto adapter = CupyAdapter(interface_str);
std::unique_ptr<DMatrix> dm{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
BatchParam bp {0, 256};
for (auto& ellpack : dm->GetBatches<EllpackPage>(bp)) {
auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
@@ -92,7 +95,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
std::numeric_limits<float>::quiet_NaN(), 0, 256);
size_t n_batches = 0;
std::string interface_str = iter.AsArray();
for (auto& ellpack : m.GetBatches<EllpackPage>({})) {
Context ctx{MakeCUDACtx(0)};
for (auto& ellpack : m.GetBatches<EllpackPage>(&ctx, {})) {
n_batches ++;
auto impl = ellpack.Impl();
common::CompressedIterator<uint32_t> iterator(
@@ -140,7 +144,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, 256);
auto &ellpack = *m.GetBatches<EllpackPage>({0, 256}).begin();
auto ctx = MakeCUDACtx(0);
auto& ellpack =
*m.GetBatches<EllpackPage>(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()})
.begin();
auto impl = ellpack.Impl();
common::CompressedIterator<uint32_t> iterator(
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
@@ -171,8 +178,9 @@ TEST(IterativeDeviceDMatrix, IsDense) {
}
TEST(IterativeDeviceDMatrix, Ref) {
Context ctx{MakeCUDACtx(0)};
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
[](EllpackPage const& page) { return page.Impl()->Cuts(); });
&ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
}
} // namespace data
} // namespace xgboost

View File

@@ -1,8 +1,11 @@
/*!
* Copyright 2022 XGBoost contributors
/**
* Copyright 2022-2023, XGBoost contributors
*/
#pragma once
#include <memory> // std::make_shared
#include <xgboost/context.h> // for Context
#include <limits> // for numeric_limits
#include <memory> // for make_shared
#include "../../../src/data/iterative_dmatrix.h"
#include "../helpers.h"
@@ -10,7 +13,7 @@
namespace xgboost {
namespace data {
template <typename Page, typename Iter, typename Cuts>
void TestRefDMatrix(Cuts&& get_cuts) {
void TestRefDMatrix(Context const* ctx, Cuts&& get_cuts) {
int n_bins = 256;
Iter iter(0.3, 2048);
auto m = std::make_shared<IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
@@ -20,8 +23,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
auto m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), m, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
for (auto const& page_0 : m->template GetBatches<Page>({})) {
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {
for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {
auto const& cuts_0 = get_cuts(page_0);
auto const& cuts_1 = get_cuts(page_1);
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
@@ -32,8 +35,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), nullptr, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
for (auto const& page_0 : m->template GetBatches<Page>({})) {
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
for (auto const& page_0 : m->template GetBatches<Page>(ctx, {})) {
for (auto const& page_1 : m_1->template GetBatches<Page>(ctx, {})) {
auto const& cuts_0 = get_cuts(page_0);
auto const& cuts_1 = get_cuts(page_1);
ASSERT_NE(cuts_0.Values(), cuts_1.Values());
@@ -45,8 +48,8 @@ void TestRefDMatrix(Cuts&& get_cuts) {
auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true);
auto dqm = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), dm, Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
for (auto const& page_0 : dm->template GetBatches<Page>({})) {
for (auto const& page_1 : dqm->template GetBatches<Page>({})) {
for (auto const& page_0 : dm->template GetBatches<Page>(ctx, {})) {
for (auto const& page_1 : dqm->template GetBatches<Page>(ctx, {})) {
auto const& cuts_0 = get_cuts(page_0);
auto const& cuts_1 = get_cuts(page_1);
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());

View File

@@ -157,8 +157,7 @@ TEST(MetaInfo, LoadQid) {
dmlc::TemporaryDirectory tempdir;
std::string tmp_file = tempdir.path + "/qid_test.libsvm";
{
std::unique_ptr<dmlc::Stream> fs(
dmlc::Stream::Create(tmp_file.c_str(), "w"));
std::unique_ptr<dmlc::Stream> fs(dmlc::Stream::Create(tmp_file.c_str(), "w"));
dmlc::ostream os(fs.get());
os << R"qid(3 qid:1 1:1 2:1 3:0 4:0.2 5:0
2 qid:1 1:0 2:0 3:1 4:0.1 5:1
@@ -175,7 +174,7 @@ TEST(MetaInfo, LoadQid) {
os.set_stream(nullptr);
}
std::unique_ptr<xgboost::DMatrix> dmat(
xgboost::DMatrix::Load(tmp_file, true, xgboost::DataSplitMode::kRow, "libsvm"));
xgboost::DMatrix::Load(tmp_file + "?format=libsvm", true, xgboost::DataSplitMode::kRow));
const xgboost::MetaInfo& info = dmat->Info();
const std::vector<xgboost::bst_uint> expected_group_ptr{0, 4, 8, 12};

View File

@@ -17,11 +17,15 @@
using namespace xgboost; // NOLINT
namespace {
std::string UriSVM(std::string name) { return name + "?format=libsvm"; }
} // namespace
TEST(SimpleDMatrix, MetaInfo) {
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
// Test the metadata that was parsed
EXPECT_EQ(dmat->Info().num_row_, 2);
@@ -37,7 +41,7 @@ TEST(SimpleDMatrix, RowAccess) {
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file, false);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file), false);
// Loop over the batches and count the records
int64_t row_count = 0;
@@ -57,16 +61,17 @@ TEST(SimpleDMatrix, RowAccess) {
}
TEST(SimpleDMatrix, ColAccessWithoutBatches) {
Context ctx;
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
ASSERT_TRUE(dmat->SingleColBlock());
// Loop over the batches and assert the data is as expected
int64_t num_col_batch = 0;
for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {
num_col_batch += 1;
EXPECT_EQ(batch.Size(), dmat->Info().num_col_)
<< "Expected batch size = number of cells as #batches is 1.";
@@ -387,7 +392,7 @@ TEST(SimpleDMatrix, SaveLoadBinary) {
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
data::SimpleDMatrix *simple_dmat = dynamic_cast<data::SimpleDMatrix*>(dmat);
const std::string tmp_binfile = tempdir.path + "/csr_source.binary";

View File

@@ -16,14 +16,19 @@
#include "../helpers.h"
using namespace xgboost; // NOLINT
namespace {
std::string UriSVM(std::string name, std::string cache) {
return name + "?format=libsvm" + "#" + cache + ".cache";
}
} // namespace
template <typename Page>
void TestSparseDMatrixLoadFile() {
void TestSparseDMatrixLoadFile(Context const* ctx) {
dmlc::TemporaryDirectory tmpdir;
auto opath = tmpdir.path + "/1-based.svm";
CreateBigTestData(opath, 3 * 64, false);
opath += "?indexing_mode=1";
data::FileIterator iter{opath, 0, 1, "libsvm"};
opath += "?indexing_mode=1&format=libsvm";
data::FileIterator iter{opath, 0, 1};
auto n_threads = 0;
data::SparsePageDMatrix m{&iter,
iter.Proxy(),
@@ -43,7 +48,7 @@ void TestSparseDMatrixLoadFile() {
data::SimpleDMatrix simple{&adapter, std::numeric_limits<float>::quiet_NaN(),
1};
Page out;
for (auto const& page : m.GetBatches<Page>()) {
for (auto const &page : m.GetBatches<Page>(ctx)) {
if (std::is_same<Page, SparsePage>::value) {
out.Push(page);
} else {
@@ -53,7 +58,7 @@ void TestSparseDMatrixLoadFile() {
ASSERT_EQ(m.Info().num_col_, simple.Info().num_col_);
ASSERT_EQ(m.Info().num_row_, simple.Info().num_row_);
for (auto const& page : simple.GetBatches<Page>()) {
for (auto const& page : simple.GetBatches<Page>(ctx)) {
ASSERT_EQ(page.offset.HostVector(), out.offset.HostVector());
for (size_t i = 0; i < page.data.Size(); ++i) {
ASSERT_EQ(page.data.HostVector()[i].fvalue, out.data.HostVector()[i].fvalue);
@@ -62,16 +67,18 @@ void TestSparseDMatrixLoadFile() {
}
TEST(SparsePageDMatrix, LoadFile) {
TestSparseDMatrixLoadFile<SparsePage>();
TestSparseDMatrixLoadFile<CSCPage>();
TestSparseDMatrixLoadFile<SortedCSCPage>();
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
TestSparseDMatrixLoadFile<SparsePage>(&ctx);
TestSparseDMatrixLoadFile<CSCPage>(&ctx);
TestSparseDMatrixLoadFile<SortedCSCPage>(&ctx);
}
// allow caller to retain pages so they can process multiple pages at the same time.
template <typename Page>
void TestRetainPage() {
auto m = CreateSparsePageDMatrix(10000);
auto batches = m->GetBatches<Page>();
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
auto batches = m->GetBatches<Page>(&ctx);
auto begin = batches.begin();
auto end = batches.end();
@@ -95,7 +102,7 @@ void TestRetainPage() {
}
// make sure it's const and the caller can not modify the content of page.
for (auto& page : m->GetBatches<Page>()) {
for (auto &page : m->GetBatches<Page>({&ctx})) {
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
}
}
@@ -112,15 +119,13 @@ TEST(SparsePageDMatrix, MetaInfo) {
size_t constexpr kEntries = 24;
CreateBigTestData(tmp_file, kEntries);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache", false);
std::unique_ptr<DMatrix> dmat{xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file), false)};
// Test the metadata that was parsed
EXPECT_EQ(dmat->Info().num_row_, 8ul);
EXPECT_EQ(dmat->Info().num_col_, 5ul);
EXPECT_EQ(dmat->Info().num_nonzero_, kEntries);
EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
delete dmat;
}
TEST(SparsePageDMatrix, RowAccess) {
@@ -139,11 +144,12 @@ TEST(SparsePageDMatrix, ColAccess) {
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache");
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file));
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
// Loop over the batches and assert the data is as expected
size_t iter = 0;
for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {
auto col_page = col_batch.GetView();
ASSERT_EQ(col_page.Size(), dmat->Info().num_col_);
if (iter == 1) {
@@ -161,7 +167,7 @@ TEST(SparsePageDMatrix, ColAccess) {
// Loop over the batches and assert the data is as expected
iter = 0;
for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>()) {
for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {
auto col_page = col_batch.GetView();
EXPECT_EQ(col_page.Size(), dmat->Info().num_col_);
if (iter == 0) {
@@ -179,9 +185,9 @@ TEST(SparsePageDMatrix, ColAccess) {
TEST(SparsePageDMatrix, ThreadSafetyException) {
size_t constexpr kEntriesPerCol = 3;
size_t constexpr kEntries = 64 * kEntriesPerCol * 2;
Context ctx;
std::unique_ptr<xgboost::DMatrix> dmat =
xgboost::CreateSparsePageDMatrix(kEntries);
std::unique_ptr<xgboost::DMatrix> dmat = xgboost::CreateSparsePageDMatrix(kEntries);
int threads = 1000;
@@ -218,7 +224,8 @@ TEST(SparsePageDMatrix, ColAccessBatches) {
// Create multiple sparse pages
std::unique_ptr<xgboost::DMatrix> dmat{xgboost::CreateSparsePageDMatrix(kEntries)};
ASSERT_EQ(dmat->Ctx()->Threads(), AllThreadsForTest());
for (auto const &page : dmat->GetBatches<xgboost::CSCPage>()) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
for (auto const &page : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {
ASSERT_EQ(dmat->Info().num_col_, page.Size());
}
}
@@ -231,7 +238,7 @@ auto TestSparsePageDMatrixDeterminism(int32_t threads) {
std::string filename = tempdir.path + "/simple.libsvm";
CreateBigTestData(filename, 1 << 16);
data::FileIterator iter(filename, 0, 1, "auto");
data::FileIterator iter(filename + "?format=libsvm", 0, 1);
std::unique_ptr<DMatrix> sparse{
new data::SparsePageDMatrix{&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
std::numeric_limits<float>::quiet_NaN(), threads, filename}};

View File

@@ -1,23 +1,28 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include <xgboost/data.h> // for DMatrix
#include "../../../src/common/compressed_iterator.h"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/sparse_page_dmatrix.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../../../src/tree/param.h" // TrainParam
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost {
TEST(SparsePageDMatrix, EllpackPage) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
DMatrix* dmat = DMatrix::Load(tmp_file + "#" + tmp_file + ".cache");
DMatrix* dmat = DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache");
// Loop over the batches and assert the data is as expected
size_t n = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256})) {
for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
n += batch.Size();
}
EXPECT_EQ(n, dmat->Info().num_row_);
@@ -37,6 +42,8 @@ TEST(SparsePageDMatrix, EllpackPage) {
}
TEST(SparsePageDMatrix, MultipleEllpackPages) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
@@ -46,7 +53,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
// Loop over the batches and count the records
int64_t batch_count = 0;
int64_t row_count = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256})) {
for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
batch_count++;
row_count += batch.Size();
@@ -61,8 +68,11 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
}
TEST(SparsePageDMatrix, RetainEllpackPage) {
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()};
auto m = CreateSparsePageDMatrix(10000);
auto batches = m->GetBatches<EllpackPage>({0, 32});
auto batches = m->GetBatches<EllpackPage>(&ctx, param);
auto begin = batches.begin();
auto end = batches.end();
@@ -87,7 +97,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
}
// make sure it's const and the caller can not modify the content of page.
for (auto& page : m->GetBatches<EllpackPage>({0, 32})) {
for (auto& page : m->GetBatches<EllpackPage>(&ctx, param)) {
static_assert(std::is_const<std::remove_reference_t<decltype(page)>>::value);
}
@@ -98,6 +108,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
}
TEST(SparsePageDMatrix, EllpackPageContent) {
auto ctx = CreateEmptyGenericParam(0);
constexpr size_t kRows = 6;
constexpr size_t kCols = 2;
constexpr size_t kPageSize = 1;
@@ -110,8 +121,8 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, 2};
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()};
auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_EQ(impl->base_rowid, 0);
EXPECT_EQ(impl->n_rows, kRows);
EXPECT_FALSE(impl->is_dense);
@@ -120,7 +131,7 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
std::unique_ptr<EllpackPageImpl> impl_ext;
size_t offset = 0;
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(param)) {
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
if (!impl_ext) {
impl_ext.reset(new EllpackPageImpl(
batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
@@ -170,8 +181,9 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, kMaxBins};
auto impl = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};
auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_EQ(impl->base_rowid, 0);
EXPECT_EQ(impl->n_rows, kRows);
@@ -180,7 +192,7 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
thrust::device_vector<bst_float> row_ext_d(kCols);
std::vector<bst_float> row(kCols);
std::vector<bst_float> row_ext(kCols);
for (auto& page : dmat_ext->GetBatches<EllpackPage>(param)) {
for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
auto impl_ext = page.Impl();
EXPECT_EQ(impl_ext->base_rowid, current_row);
@@ -211,10 +223,11 @@ TEST(SparsePageDMatrix, EllpackPageMultipleLoops) {
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
BatchParam param{0, kMaxBins};
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{kMaxBins, tree::TrainParam::DftSparseThreshold()};
size_t current_row = 0;
for (auto& page : dmat_ext->GetBatches<EllpackPage>(param)) {
for (auto& page : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
auto impl_ext = page.Impl();
EXPECT_EQ(impl_ext->base_rowid, current_row);
current_row += impl_ext->n_rows;

View File

@@ -1,17 +1,24 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
#include "../../../src/data/sparse_page_source.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include <memory> // for allocator, unique_ptr, __shared_ptr_ac...
#include <string> // for char_traits, operator+, basic_string
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
#include "../helpers.h" // for RandomDataGenerator
#include "dmlc/filesystem.h" // for TemporaryDirectory
#include "dmlc/io.h" // for SeekStream, Stream
#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST
#include "xgboost/context.h" // for Context
namespace xgboost {
namespace data {
template <typename S> void TestSparsePageRawFormat() {
std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>("raw")};
Context ctx;
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
ASSERT_TRUE(m->SingleColBlock());
@@ -21,7 +28,7 @@ template <typename S> void TestSparsePageRawFormat() {
{
// block code to flush the stream
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
for (auto const &page : m->GetBatches<S>()) {
for (auto const &page : m->GetBatches<S>(&ctx)) {
orig.Push(page);
format->Write(page, fo.get());
}

View File

@@ -167,18 +167,20 @@ xgboost::bst_float GetMetricEval(xgboost::Metric* metric,
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
std::vector<xgboost::bst_float> labels,
std::vector<xgboost::bst_float> weights,
std::vector<xgboost::bst_uint> groups) {
std::vector<xgboost::bst_uint> groups,
xgboost::DataSplitMode data_split_mode) {
return GetMultiMetricEval(
metric, preds,
xgboost::linalg::Tensor<float, 2>{labels.begin(), labels.end(), {labels.size()}, -1}, weights,
groups);
groups, data_split_mode);
}
double GetMultiMetricEval(xgboost::Metric* metric,
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
xgboost::linalg::Tensor<float, 2> const& labels,
std::vector<xgboost::bst_float> weights,
std::vector<xgboost::bst_uint> groups) {
std::vector<xgboost::bst_uint> groups,
xgboost::DataSplitMode data_split_mode) {
std::shared_ptr<xgboost::DMatrix> p_fmat{xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix()};
auto& info = p_fmat->Info();
info.num_row_ = labels.Shape(0);
@@ -186,7 +188,10 @@ double GetMultiMetricEval(xgboost::Metric* metric,
info.labels.Data()->Copy(*labels.Data());
info.weights_.HostVector() = weights;
info.group_ptr_ = groups;
info.data_split_mode = data_split_mode;
if (info.IsVerticalFederated() && xgboost::collective::GetRank() != 0) {
info.labels.Reshape(0);
}
return metric->Evaluate(preds, p_fmat);
}
@@ -543,7 +548,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
}
fo.close();
std::string uri = tmp_file;
std::string uri = tmp_file + "?format=libsvm";
if (page_size > 0) {
uri += "#" + tmp_file + ".cache";
}

View File

@@ -39,6 +39,18 @@
#define GPUIDX -1
#endif
#if defined(__CUDACC__)
#define DeclareUnifiedDistributedTest(name) MGPU ## name
#else
#define DeclareUnifiedDistributedTest(name) name
#endif
#if defined(__CUDACC__)
#define WORLD_SIZE_FOR_TEST (xgboost::common::AllVisibleGPUs())
#else
#define WORLD_SIZE_FOR_TEST (3)
#endif
namespace xgboost {
class ObjFunction;
class Metric;
@@ -92,13 +104,15 @@ xgboost::bst_float GetMetricEval(
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
std::vector<xgboost::bst_float> labels,
std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(),
std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>());
std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>(),
xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow);
double GetMultiMetricEval(xgboost::Metric* metric,
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
xgboost::linalg::Tensor<float, 2> const& labels,
std::vector<xgboost::bst_float> weights = {},
std::vector<xgboost::bst_uint> groups = {});
std::vector<xgboost::bst_uint> groups = {},
xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow);
namespace xgboost {
@@ -374,6 +388,11 @@ inline Context CreateEmptyGenericParam(int gpu_id) {
return tparam;
}
/**
* \brief Make a context that uses CUDA.
*/
inline Context MakeCUDACtx(std::int32_t device) { return Context{}.MakeCUDA(device); }
inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_rows,
float lower= 0.0f, float upper = 1.0f) {
xgboost::SimpleLCG gen;
@@ -496,4 +515,17 @@ void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&
thread.join();
}
}
class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test {
protected:
int world_size_;
void SetUp() override {
world_size_ = WORLD_SIZE_FOR_TEST;
if (world_size_ <= 1) {
GTEST_SKIP() << "Skipping MGPU test with # GPUs = " << world_size_;
}
}
};
} // namespace xgboost

View File

@@ -1,261 +1,68 @@
#include "test_auc.h"
#include <xgboost/metric.h>
#include "../helpers.h"
namespace xgboost {
namespace metric {
TEST(Metric, DeclareUnifiedTest(BinaryAUC)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> uni_ptr {Metric::Create("auc", &ctx)};
Metric * metric = uni_ptr.get();
ASSERT_STREQ(metric->Name(), "auc");
TEST(Metric, DeclareUnifiedTest(BinaryAUC)) { VerifyBinaryAUC(); }
// Binary
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {1, 0}), 0.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {0, 1}), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {0, 1}), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {1, 0}), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {1, 0}), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {1, 0, 0}, {0, 0, 1}), 0.25f, 1e-10);
TEST(Metric, DeclareUnifiedTest(MultiClassAUC)) { VerifyMultiClassAUC(); }
// Invalid dataset
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
float auc = metric->Evaluate({1, 1}, p_fmat);
ASSERT_TRUE(std::isnan(auc));
*info.labels.Data() = HostDeviceVector<float>{};
auc = metric->Evaluate(HostDeviceVector<float>{}, p_fmat);
ASSERT_TRUE(std::isnan(auc));
TEST(Metric, DeclareUnifiedTest(RankingAUC)) { VerifyRankingAUC(); }
EXPECT_NEAR(GetMetricEval(metric, {0, 1, 0, 1}, {0, 1, 0, 1}), 1.0f, 1e-10);
TEST(Metric, DeclareUnifiedTest(PRAUC)) { VerifyPRAUC(); }
// AUC with instance weights
EXPECT_NEAR(GetMetricEval(metric,
{0.9f, 0.1f, 0.4f, 0.3f},
{0, 0, 1, 1},
{1.0f, 3.0f, 2.0f, 4.0f}),
0.75f, 0.001f);
TEST(Metric, DeclareUnifiedTest(MultiClassPRAUC)) { VerifyMultiClassPRAUC(); }
// regression test case
ASSERT_NEAR(GetMetricEval(
metric,
{0.79523796, 0.5201713, 0.79523796, 0.24273258, 0.53452194,
0.53452194, 0.24273258, 0.5201713, 0.79523796, 0.53452194,
0.24273258, 0.53452194, 0.79523796, 0.5201713, 0.24273258,
0.5201713, 0.5201713, 0.53452194, 0.5201713, 0.53452194},
{0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0}),
0.5, 1e-10);
TEST(Metric, DeclareUnifiedTest(RankingPRAUC)) { VerifyRankingPRAUC(); }
TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(MultiClassAUC)) {
auto ctx = CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> uni_ptr{
Metric::Create("auc", &ctx)};
auto metric = uni_ptr.get();
// MultiClass
// 3x3
EXPECT_NEAR(GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{0, 1, 2}),
1.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{0, 1, 2},
{1.0f, 1.0f, 1.0f}),
1.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{2, 1, 0}),
0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{2, 0, 1}),
0.25f, 1e-10);
// invalid dataset
float auc = GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{0, 1, 1}); // no class 2.
EXPECT_TRUE(std::isnan(auc)) << auc;
HostDeviceVector<float> predts{
0.0f, 1.0f, 0.0f,
1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f,
0.0f, 0.0f, 1.0f,
};
std::vector<float> labels {1.0f, 0.0f, 2.0f, 1.0f};
auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f});
ASSERT_GT(auc, 0.714);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), BinaryAUCColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyBinaryAUC, DataSplitMode::kCol);
}
TEST(Metric, DeclareUnifiedTest(RankingAUC)) {
auto ctx = CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("auc", &ctx)};
// single group
EXPECT_NEAR(GetMetricEval(metric.get(), {0.7f, 0.2f, 0.3f, 0.6f},
{1.0f, 0.8f, 0.4f, 0.2f}, /*weights=*/{},
{0, 4}),
0.5f, 1e-10);
// multi group
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2},
{0, 1, 2, 0, 1, 2}, /*weights=*/{}, {0, 3, 6}),
1.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2},
{0, 1, 2, 0, 1, 2}, /*weights=*/{1.0f, 2.0f},
{0, 3, 6}),
1.0f, 1e-10);
// AUC metric for grouped datasets - exception scenarios
ASSERT_TRUE(std::isnan(
GetMetricEval(metric.get(), {0, 1, 2}, {0, 0, 0}, {}, {0, 2, 3})));
// regression case
HostDeviceVector<float> predt{0.33935383, 0.5149714, 0.32138085, 1.4547751,
1.2010975, 0.42651367, 0.23104341, 0.83610827,
0.8494239, 0.07136688, 0.5623144, 0.8086237,
1.5066161, -4.094787, 0.76887935, -2.4082742};
std::vector<bst_group_t> groups{0, 7, 16};
std::vector<float> labels{1., 0., 0., 1., 2., 1., 0., 0.,
0., 0., 0., 0., 1., 0., 1., 0.};
EXPECT_NEAR(GetMetricEval(metric.get(), std::move(predt), labels,
/*weights=*/{}, groups),
0.769841f, 1e-6);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(PRAUC)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric *metric = xgboost::Metric::Create("aucpr", &ctx);
ASSERT_STREQ(metric->Name(), "aucpr");
EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}),
0.5f, 0.001f);
EXPECT_NEAR(GetMetricEval(
metric,
{0.4f, 0.2f, 0.9f, 0.1f, 0.2f, 0.4f, 0.1f, 0.1f, 0.2f, 0.1f},
{0, 0, 0, 0, 0, 1, 0, 0, 1, 1}),
0.2908445f, 0.001f);
EXPECT_NEAR(GetMetricEval(
metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f,
0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f,
0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}),
0.2769199f, 0.001f);
auto auc = GetMetricEval(metric, {0, 1}, {});
ASSERT_TRUE(std::isnan(auc));
// AUCPR with instance weights
EXPECT_NEAR(GetMetricEval(metric,
{0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f,
0.17f, 0.47f, 0.13f, 0.43f, 0.59f, 0.87f, 0.007f},
{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0},
{1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f,
4.5f}), // weights
0.694435f, 0.001f);
// Both groups contain only pos or neg samples.
auc = GetMetricEval(metric,
{0, 0.1f, 0.3f, 0.5f, 0.7f},
{1, 1, 0, 0, 0},
{},
{0, 2, 5});
ASSERT_TRUE(std::isnan(auc));
delete metric;
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassAUCColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassAUC, DataSplitMode::kCol);
}
TEST(Metric, DeclareUnifiedTest(MultiClassPRAUC)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &ctx)};
float auc = 0;
std::vector<float> labels {1.0f, 0.0f, 2.0f};
HostDeviceVector<float> predts{
0.0f, 1.0f, 0.0f,
1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f,
};
auc = GetMetricEval(metric.get(), predts, labels, {});
EXPECT_EQ(auc, 1.0f);
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 1.0f, 1.0f});
EXPECT_EQ(auc, 1.0f);
predts.HostVector() = {
0.0f, 1.0f, 0.0f,
1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f,
0.0f, 0.0f, 1.0f,
};
labels = {1.0f, 0.0f, 2.0f, 1.0f};
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 2.0f, 3.0f, 4.0f});
ASSERT_GT(auc, 0.699);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(RankingPRAUC)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingAUCColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyRankingAUC, DataSplitMode::kCol);
}
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &ctx)};
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kRow);
}
std::vector<float> labels {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f};
std::vector<uint32_t> groups {0, 2, 6};
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PRAUCColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyPRAUC, DataSplitMode::kCol);
}
float auc = 0;
auc = GetMetricEval(metric.get(), {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}, labels, {}, groups);
EXPECT_EQ(auc, 1.0f);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kRow);
}
auc = GetMetricEval(metric.get(), {1.0f, 0.5f, 0.8f, 0.3f, 0.2f, 1.0f}, labels, {}, groups);
EXPECT_EQ(auc, 1.0f);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassPRAUCColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassPRAUC, DataSplitMode::kCol);
}
auc = GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, {}, groups);
ASSERT_TRUE(std::isnan(auc));
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kRow);
}
// Incorrect label
ASSERT_THROW(GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, {}, groups),
dmlc::Error);
// AUCPR with groups and no weights
EXPECT_NEAR(GetMetricEval(
metric.get(), {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f,
0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f,
0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1},
{}, // weights
{0, 2, 5, 9, 14, 20}), // group info
0.556021f, 0.001f);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RankingPRAUCColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyRankingPRAUC, DataSplitMode::kCol);
}
} // namespace metric
} // namespace xgboost

249
tests/cpp/metric/test_auc.h Normal file
View File

@@ -0,0 +1,249 @@
/*!
* Copyright (c) 2023 by XGBoost Contributors
*/
#pragma once
#include <xgboost/metric.h>
#include "../helpers.h"
namespace xgboost {
namespace metric {
inline void VerifyBinaryAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> uni_ptr{Metric::Create("auc", &ctx)};
Metric* metric = uni_ptr.get();
ASSERT_STREQ(metric->Name(), "auc");
// Binary
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {1, 0}, {}, {}, data_split_mode), 0.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 0}, {1, 0}, {}, {}, data_split_mode), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {1, 1}, {1, 0}, {}, {}, data_split_mode), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {1, 0, 0}, {0, 0, 1}, {}, {}, data_split_mode), 0.25f, 1e-10);
// Invalid dataset
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
float auc = metric->Evaluate({1, 1}, p_fmat);
ASSERT_TRUE(std::isnan(auc));
*info.labels.Data() = HostDeviceVector<float>{};
auc = metric->Evaluate(HostDeviceVector<float>{}, p_fmat);
ASSERT_TRUE(std::isnan(auc));
EXPECT_NEAR(GetMetricEval(metric, {0, 1, 0, 1}, {0, 1, 0, 1}, {}, {}, data_split_mode), 1.0f,
1e-10);
// AUC with instance weights
EXPECT_NEAR(GetMetricEval(metric, {0.9f, 0.1f, 0.4f, 0.3f}, {0, 0, 1, 1},
{1.0f, 3.0f, 2.0f, 4.0f}, {}, data_split_mode),
0.75f, 0.001f);
// regression test case
ASSERT_NEAR(GetMetricEval(metric, {0.79523796, 0.5201713, 0.79523796, 0.24273258, 0.53452194,
0.53452194, 0.24273258, 0.5201713, 0.79523796, 0.53452194,
0.24273258, 0.53452194, 0.79523796, 0.5201713, 0.24273258,
0.5201713, 0.5201713, 0.53452194, 0.5201713, 0.53452194},
{0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0}, {}, {},
data_split_mode),
0.5, 1e-10);
}
inline void VerifyMultiClassAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> uni_ptr{Metric::Create("auc", &ctx)};
auto metric = uni_ptr.get();
// MultiClass
// 3x3
EXPECT_NEAR(GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{0, 1, 2}, {}, {}, data_split_mode),
1.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{0, 1, 2}, {1.0f, 1.0f, 1.0f}, {}, data_split_mode),
1.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{2, 1, 0}, {}, {}, data_split_mode),
0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{2, 0, 1}, {}, {}, data_split_mode),
0.25f, 1e-10);
// invalid dataset
float auc = GetMetricEval(metric,
{
1.0f, 0.0f, 0.0f, // p_0
0.0f, 1.0f, 0.0f, // p_1
0.0f, 0.0f, 1.0f // p_2
},
{0, 1, 1}, {}, {}, data_split_mode); // no class 2.
EXPECT_TRUE(std::isnan(auc)) << auc;
HostDeviceVector<float> predts{
0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f,
};
std::vector<float> labels{1.0f, 0.0f, 2.0f, 1.0f};
auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}, {}, data_split_mode);
ASSERT_GT(auc, 0.714);
}
inline void VerifyRankingAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("auc", &ctx)};
// single group
EXPECT_NEAR(GetMetricEval(metric.get(), {0.7f, 0.2f, 0.3f, 0.6f}, {1.0f, 0.8f, 0.4f, 0.2f},
/*weights=*/{}, {0, 4}, data_split_mode),
0.5f, 1e-10);
// multi group
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, {0, 1, 2, 0, 1, 2}, /*weights=*/{},
{0, 3, 6}, data_split_mode),
1.0f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1, 2, 0, 1, 2}, {0, 1, 2, 0, 1, 2},
/*weights=*/{1.0f, 2.0f}, {0, 3, 6}, data_split_mode),
1.0f, 1e-10);
// AUC metric for grouped datasets - exception scenarios
ASSERT_TRUE(std::isnan(
GetMetricEval(metric.get(), {0, 1, 2}, {0, 0, 0}, {}, {0, 2, 3}, data_split_mode)));
// regression case
HostDeviceVector<float> predt{
0.33935383, 0.5149714, 0.32138085, 1.4547751, 1.2010975, 0.42651367, 0.23104341, 0.83610827,
0.8494239, 0.07136688, 0.5623144, 0.8086237, 1.5066161, -4.094787, 0.76887935, -2.4082742};
std::vector<bst_group_t> groups{0, 7, 16};
std::vector<float> labels{1., 0., 0., 1., 2., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0.};
EXPECT_NEAR(GetMetricEval(metric.get(), std::move(predt), labels,
/*weights=*/{}, groups, data_split_mode),
0.769841f, 1e-6);
}
inline void VerifyPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric* metric = xgboost::Metric::Create("aucpr", &ctx);
ASSERT_STREQ(metric->Name(), "aucpr");
EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}, {}, {}, data_split_mode), 1, 1e-10);
EXPECT_NEAR(
GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode), 0.5f,
0.001f);
EXPECT_NEAR(GetMetricEval(metric, {0.4f, 0.2f, 0.9f, 0.1f, 0.2f, 0.4f, 0.1f, 0.1f, 0.2f, 0.1f},
{0, 0, 0, 0, 0, 1, 0, 0, 1, 1}, {}, {}, data_split_mode),
0.2908445f, 0.001f);
EXPECT_NEAR(
GetMetricEval(metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, 0.09f, 0.10f, 0.97f,
0.76f, 0.69f, 0.15f, 0.20f, 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}, {}, {},
data_split_mode),
0.2769199f, 0.001f);
auto auc = GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode);
ASSERT_TRUE(std::isnan(auc));
// AUCPR with instance weights
EXPECT_NEAR(GetMetricEval(metric,
{0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f, 0.17f, 0.47f, 0.13f,
0.43f, 0.59f, 0.87f, 0.007f},
{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0},
{1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f, 4.5f}, // weights
{}, data_split_mode),
0.694435f, 0.001f);
// Both groups contain only pos or neg samples.
auc = GetMetricEval(metric, {0, 0.1f, 0.3f, 0.5f, 0.7f}, {1, 1, 0, 0, 0}, {}, {0, 2, 5},
data_split_mode);
ASSERT_TRUE(std::isnan(auc));
delete metric;
}
inline void VerifyMultiClassPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &ctx)};
float auc = 0;
std::vector<float> labels{1.0f, 0.0f, 2.0f};
HostDeviceVector<float> predts{
0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
};
auc = GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode);
EXPECT_EQ(auc, 1.0f);
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 1.0f, 1.0f}, {}, data_split_mode);
EXPECT_EQ(auc, 1.0f);
predts.HostVector() = {
0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f,
};
labels = {1.0f, 0.0f, 2.0f, 1.0f};
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 2.0f, 3.0f, 4.0f}, {}, data_split_mode);
ASSERT_GT(auc, 0.699);
}
inline void VerifyRankingPRAUC(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &ctx)};
std::vector<float> labels{1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f};
std::vector<uint32_t> groups{0, 2, 6};
float auc = 0;
auc = GetMetricEval(metric.get(), {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}, labels, {}, groups,
data_split_mode);
EXPECT_EQ(auc, 1.0f);
auc = GetMetricEval(metric.get(), {1.0f, 0.5f, 0.8f, 0.3f, 0.2f, 1.0f}, labels, {}, groups,
data_split_mode);
EXPECT_EQ(auc, 1.0f);
auc = GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, {}, groups, data_split_mode);
ASSERT_TRUE(std::isnan(auc));
// Incorrect label
ASSERT_THROW(GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, {}, groups, data_split_mode),
dmlc::Error);
// AUCPR with groups and no weights
EXPECT_NEAR(
GetMetricEval(metric.get(),
{0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f, 0.09f, 0.10f, 0.97f,
0.76f, 0.69f, 0.15f, 0.20f, 0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}, {}, // weights
{0, 2, 5, 9, 14, 20}, // group info
data_split_mode),
0.556021f, 0.001f);
}
} // namespace metric
} // namespace xgboost

View File

@@ -1,347 +1,108 @@
/**
* Copyright 2018-2023 by XGBoost contributors
*/
#include <xgboost/json.h>
#include <xgboost/metric.h>
#include <map>
#include <memory>
#include "../../../src/common/linalg_op.h"
#include "../helpers.h"
namespace xgboost {
namespace {
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
auto ctx = CreateEmptyGenericParam(device);
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
HostDeviceVector<float> predts;
size_t n_samples = 2048;
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.labels.Reshape(n_samples, 1);
info.num_row_ = n_samples;
auto &h_labels = info.labels.Data()->HostVector();
auto &h_predts = predts.HostVector();
SimpleLCG lcg;
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
h_labels.resize(n_samples);
h_predts.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_predts[i] = dist(&lcg);
h_labels[i] = dist(&lcg);
}
auto result = metric->Evaluate(predts, p_fmat);
for (size_t i = 0; i < 8; ++i) {
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
}
}
} // anonymous namespace
} // namespace xgboost
#include "test_elementwise_metric.h"
namespace xgboost {
namespace metric {
TEST(Metric, DeclareUnifiedTest(RMSE)) { VerifyRMSE(); }
TEST(Metric, DeclareUnifiedTest(RMSE)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("rmse", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "rmse");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.6403f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}),
2.8284f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}),
0.6708f, 0.001f);
delete metric;
TEST(Metric, DeclareUnifiedTest(RMSLE)) { VerifyRMSLE(); }
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"rmse"}, GPUIDX);
TEST(Metric, DeclareUnifiedTest(MAE)) { VerifyMAE(); }
TEST(Metric, DeclareUnifiedTest(MAPE)) { VerifyMAPE(); }
TEST(Metric, DeclareUnifiedTest(MPHE)) { VerifyMPHE(); }
TEST(Metric, DeclareUnifiedTest(LogLoss)) { VerifyLogLoss(); }
TEST(Metric, DeclareUnifiedTest(Error)) { VerifyError(); }
TEST(Metric, DeclareUnifiedTest(PoissonNegLogLik)) { VerifyPoissonNegLogLik(); }
TEST(Metric, DeclareUnifiedTest(MultiRMSE)) { VerifyMultiRMSE(); }
TEST(Metric, DeclareUnifiedTest(Quantile)) { VerifyQuantile(); }
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSERowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(RMSLE)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("rmsle", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "rmsle");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}),
0.4063f, 1e-4);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{ 0, -1, 1, -9, 9}),
0.6212f, 1e-4);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{ 0, 1, 2, 9, 8}),
0.2415f, 1e-4);
delete metric;
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"rmsle"}, GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSEColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyRMSE, DataSplitMode::kCol);
}
TEST(Metric, DeclareUnifiedTest(MAE)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("mae", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mae");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.5f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}),
8.0f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}),
0.54f, 0.001f);
delete metric;
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"mae"}, GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLERowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(MAPE)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("mape", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mape");
EXPECT_NEAR(GetMetricEval(metric, {150, 300}, {100, 200}), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{50, 400, 500, 4000},
{100, 200, 500, 1000}),
1.125f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{50, 400, 500, 4000},
{100, 200, 500, 1000},
{ -1, 1, 9, -9}),
-26.5f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{50, 400, 500, 4000},
{100, 200, 500, 1000},
{ 1, 2, 9, 8}),
1.3250f, 0.001f);
delete metric;
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"mape"}, GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), RMSLEColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyRMSLE, DataSplitMode::kCol);
}
TEST(Metric, DeclareUnifiedTest(MPHE)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<xgboost::Metric> metric{xgboost::Metric::Create("mphe", &ctx)};
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mphe");
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric.get(),
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.1751f, 1e-4);
EXPECT_NEAR(GetMetricEval(metric.get(),
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}),
3.4037f, 1e-4);
EXPECT_NEAR(GetMetricEval(metric.get(),
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}),
0.1922f, 1e-4);
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"mphe"}, GPUIDX);
metric->Configure({{"huber_slope", "0.1"}});
EXPECT_NEAR(GetMetricEval(metric.get(),
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}),
0.0461686f, 1e-4);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAERowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(LogLoss)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("logloss", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "logloss");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.5f, 1e-17f, 1.0f+1e-17f, 0.9f},
{ 0, 0, 1, 1}),
0.1996f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
1.2039f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}),
21.9722f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}),
1.3138f, 0.001f);
delete metric;
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"logloss"}, GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAEColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMAE, DataSplitMode::kCol);
}
TEST(Metric, DeclareUnifiedTest(Error)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("error", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "error");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.5f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}),
10.0f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}),
0.55f, 0.001f);
EXPECT_ANY_THROW(xgboost::Metric::Create("error@abc", &ctx));
delete metric;
metric = xgboost::Metric::Create("error@0.5f", &ctx);
metric->Configure({});
EXPECT_STREQ(metric->Name(), "error");
delete metric;
metric = xgboost::Metric::Create("error@0.1", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "error@0.1");
EXPECT_STREQ(metric->Name(), "error@0.1");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{-0.1f, -0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.25f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{-0.1f, -0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}),
9.0f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{-0.1f, -0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}),
0.45f, 0.001f);
delete metric;
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"error@0.5"}, GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPERowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(PoissionNegLogLik)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("poisson-nloglik", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "poisson-nloglik");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.5f, 1e-17f, 1.0f+1e-17f, 0.9f},
{ 0, 0, 1, 1}),
0.6263f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
1.1019f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}),
13.3750f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}),
1.5783f, 0.001f);
delete metric;
xgboost::CheckDeterministicMetricElementWise(xgboost::StringView{"poisson-nloglik"}, GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPEColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMAPE, DataSplitMode::kCol);
}
TEST(Metric, DeclareUnifiedTest(MultiRMSE)) {
size_t n_samples = 32, n_targets = 8;
linalg::Tensor<float, 2> y{{n_samples, n_targets}, GPUIDX};
auto &h_y = y.Data()->HostVector();
std::iota(h_y.begin(), h_y.end(), 0);
HostDeviceVector<float> predt(n_samples * n_targets, 0);
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("rmse", &ctx)};
metric->Configure({});
auto loss = GetMultiMetricEval(metric.get(), predt, y);
std::vector<float> weights(n_samples, 1);
auto loss_w = GetMultiMetricEval(metric.get(), predt, y, weights);
std::transform(h_y.cbegin(), h_y.cend(), h_y.begin(), [](auto &v) { return v * v; });
auto ret = std::sqrt(std::accumulate(h_y.cbegin(), h_y.cend(), 1.0, std::plus<>{}) / h_y.size());
ASSERT_FLOAT_EQ(ret, loss);
ASSERT_FLOAT_EQ(ret, loss_w);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHERowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(Quantile)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("quantile", &ctx)};
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MPHEColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMPHE, DataSplitMode::kCol);
}
HostDeviceVector<float> predts{0.1f, 0.9f, 0.1f, 0.9f};
std::vector<float> labels{0.5f, 0.5f, 0.9f, 0.1f};
std::vector<float> weights{0.2f, 0.4f,0.6f, 0.8f};
TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kRow);
}
metric->Configure(Args{{"quantile_alpha", "[0.0]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.400f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.2]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.376f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.4]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.352f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.8]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.304f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights), 0.28f, 0.001f);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), LogLossColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyLogLoss, DataSplitMode::kCol);
}
metric->Configure(Args{{"quantile_alpha", "[0.0]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.2]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.4]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.8]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels), 0.3f, 0.001f);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kRow);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), ErrorColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyError, DataSplitMode::kCol);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kRow);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PoissonNegLogLikColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyPoissonNegLogLik, DataSplitMode::kCol);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSERowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kRow);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiRMSEColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiRMSE, DataSplitMode::kCol);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kRow);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), QuantileColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyQuantile, DataSplitMode::kCol);
}
} // namespace metric
} // namespace xgboost

View File

@@ -0,0 +1,385 @@
/**
* Copyright 2018-2023 by XGBoost contributors
*/
#pragma once
#include <xgboost/json.h>
#include <xgboost/metric.h>
#include <map>
#include <memory>
#include "../../../src/common/linalg_op.h"
#include "../helpers.h"
namespace xgboost {
namespace metric {
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
auto ctx = CreateEmptyGenericParam(device);
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
HostDeviceVector<float> predts;
size_t n_samples = 2048;
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.labels.Reshape(n_samples, 1);
info.num_row_ = n_samples;
auto &h_labels = info.labels.Data()->HostVector();
auto &h_predts = predts.HostVector();
SimpleLCG lcg;
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
h_labels.resize(n_samples);
h_predts.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_predts[i] = dist(&lcg);
h_labels[i] = dist(&lcg);
}
auto result = metric->Evaluate(predts, p_fmat);
for (size_t i = 0; i < 8; ++i) {
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
}
}
inline void VerifyRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("rmse", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "rmse");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.6403f, 0.001f);
auto expected = 2.8284f;
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
expected = sqrt(8.0f * collective::GetWorldSize());
}
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}, {}, data_split_mode),
expected, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}, {}, data_split_mode),
0.6708f, 0.001f);
delete metric;
CheckDeterministicMetricElementWise(StringView{"rmse"}, GPUIDX);
}
inline void VerifyRMSLE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("rmsle", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "rmsle");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, {}, {}, data_split_mode),
0.4063f, 1e-4);
auto expected = 0.6212f;
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
expected = sqrt(0.3859f * collective::GetWorldSize());
}
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{ 0, -1, 1, -9, 9}, {}, data_split_mode),
expected, 1e-4);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{ 0, 1, 2, 9, 8}, {}, data_split_mode),
0.2415f, 1e-4);
delete metric;
CheckDeterministicMetricElementWise(StringView{"rmsle"}, GPUIDX);
}
inline void VerifyMAE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("mae", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mae");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.5f, 0.001f);
auto expected = 8.0f;
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
expected *= collective::GetWorldSize();
}
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}, {}, data_split_mode),
expected, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}, {}, data_split_mode),
0.54f, 0.001f);
delete metric;
CheckDeterministicMetricElementWise(StringView{"mae"}, GPUIDX);
}
inline void VerifyMAPE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("mape", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mape");
EXPECT_NEAR(GetMetricEval(metric, {150, 300}, {100, 200}, {}, {}, data_split_mode), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{50, 400, 500, 4000},
{100, 200, 500, 1000}, {}, {}, data_split_mode),
1.125f, 0.001f);
auto expected = -26.5f;
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
expected *= collective::GetWorldSize();
}
EXPECT_NEAR(GetMetricEval(metric,
{50, 400, 500, 4000},
{100, 200, 500, 1000},
{ -1, 1, 9, -9}, {}, data_split_mode),
expected, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{50, 400, 500, 4000},
{100, 200, 500, 1000},
{ 1, 2, 9, 8}, {}, data_split_mode),
1.3250f, 0.001f);
delete metric;
CheckDeterministicMetricElementWise(StringView{"mape"}, GPUIDX);
}
inline void VerifyMPHE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<xgboost::Metric> metric{xgboost::Metric::Create("mphe", &ctx)};
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mphe");
EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric.get(),
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.1751f, 1e-4);
auto expected = 3.40375f;
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
expected *= collective::GetWorldSize();
}
EXPECT_NEAR(GetMetricEval(metric.get(),
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}, {}, data_split_mode),
expected, 1e-4);
EXPECT_NEAR(GetMetricEval(metric.get(),
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}, {}, data_split_mode),
0.1922f, 1e-4);
CheckDeterministicMetricElementWise(StringView{"mphe"}, GPUIDX);
metric->Configure({{"huber_slope", "0.1"}});
EXPECT_NEAR(GetMetricEval(metric.get(),
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}, {}, data_split_mode),
0.0461686f, 1e-4);
}
inline void VerifyLogLoss(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("logloss", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "logloss");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.5f, 1e-17f, 1.0f+1e-17f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.1996f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
1.2039f, 0.001f);
auto expected = 21.9722f;
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
expected *= collective::GetWorldSize();
}
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}, {}, data_split_mode),
expected, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}, {}, data_split_mode),
1.3138f, 0.001f);
delete metric;
CheckDeterministicMetricElementWise(StringView{"logloss"}, GPUIDX);
}
inline void VerifyError(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("error", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "error");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.5f, 0.001f);
auto expected = 10.0f;
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
expected *= collective::GetWorldSize();
}
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}, {}, data_split_mode),
expected, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}, {}, data_split_mode),
0.55f, 0.001f);
EXPECT_ANY_THROW(xgboost::Metric::Create("error@abc", &ctx));
delete metric;
metric = xgboost::Metric::Create("error@0.5f", &ctx);
metric->Configure({});
EXPECT_STREQ(metric->Name(), "error");
delete metric;
metric = xgboost::Metric::Create("error@0.1", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "error@0.1");
EXPECT_STREQ(metric->Name(), "error@0.1");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{-0.1f, -0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.25f, 0.001f);
expected = 9.0f;
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
expected *= collective::GetWorldSize();
}
EXPECT_NEAR(GetMetricEval(metric,
{-0.1f, -0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}, {}, data_split_mode),
expected, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{-0.1f, -0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}, {}, data_split_mode),
0.45f, 0.001f);
delete metric;
CheckDeterministicMetricElementWise(StringView{"error@0.5"}, GPUIDX);
}
inline void VerifyPoissonNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("poisson-nloglik", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "poisson-nloglik");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.5f, 1e-17f, 1.0f+1e-17f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.6263f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
1.1019f, 0.001f);
auto expected = 13.3750f;
if (collective::IsDistributed() && data_split_mode == DataSplitMode::kRow) {
expected *= collective::GetWorldSize();
}
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ -1, 1, 9, -9}, {}, data_split_mode),
expected, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1},
{ 1, 2, 9, 8}, {}, data_split_mode),
1.5783f, 0.001f);
delete metric;
CheckDeterministicMetricElementWise(StringView{"poisson-nloglik"}, GPUIDX);
}
inline void VerifyMultiRMSE(DataSplitMode data_split_mode = DataSplitMode::kRow) {
size_t n_samples = 32, n_targets = 8;
linalg::Tensor<float, 2> y{{n_samples, n_targets}, GPUIDX};
auto &h_y = y.Data()->HostVector();
std::iota(h_y.begin(), h_y.end(), 0);
HostDeviceVector<float> predt(n_samples * n_targets, 0);
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("rmse", &ctx)};
metric->Configure({});
auto loss = GetMultiMetricEval(metric.get(), predt, y, {}, {}, data_split_mode);
std::vector<float> weights(n_samples, 1);
auto loss_w = GetMultiMetricEval(metric.get(), predt, y, weights, {}, data_split_mode);
std::transform(h_y.cbegin(), h_y.cend(), h_y.begin(), [](auto &v) { return v * v; });
auto ret = std::sqrt(std::accumulate(h_y.cbegin(), h_y.cend(), 1.0, std::plus<>{}) / h_y.size());
ASSERT_FLOAT_EQ(ret, loss);
ASSERT_FLOAT_EQ(ret, loss_w);
}
inline void VerifyQuantile(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<Metric> metric{Metric::Create("quantile", &ctx)};
HostDeviceVector<float> predts{0.1f, 0.9f, 0.1f, 0.9f};
std::vector<float> labels{0.5f, 0.5f, 0.9f, 0.1f};
std::vector<float> weights{0.2f, 0.4f, 0.6f, 0.8f};
metric->Configure(Args{{"quantile_alpha", "[0.0]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.400f,
0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.2]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.376f,
0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.4]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.352f,
0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.8]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.304f,
0.001f);
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, weights, {}, data_split_mode), 0.28f,
0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.0]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.2]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.4]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[0.8]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
metric->Configure(Args{{"quantile_alpha", "[1.0]"}});
EXPECT_NEAR(GetMetricEval(metric.get(), predts, labels, {}, {}, data_split_mode), 0.3f, 0.001f);
}
} // namespace metric
} // namespace xgboost

View File

@@ -1,87 +1,29 @@
// Copyright by Contributors
#include <xgboost/metric.h>
#include "test_multiclass_metric.h"
#include <string>
#include "../helpers.h"
namespace xgboost {
inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device) {
auto ctx = CreateEmptyGenericParam(device);
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
namespace metric {
HostDeviceVector<float> predts;
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
auto &h_predts = predts.HostVector();
TEST(Metric, DeclareUnifiedTest(MultiClassError)) { VerifyMultiClassError(); }
SimpleLCG lcg;
TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) { VerifyMultiClassLogLoss(); }
size_t n_samples = 2048, n_classes = 4;
info.labels.Reshape(n_samples);
auto &h_labels = info.labels.Data()->HostVector();
h_predts.resize(n_samples * n_classes);
{
SimpleRealUniformDistribution<float> dist{0.0f, static_cast<float>(n_classes)};
for (size_t i = 0; i < n_samples; ++i) {
h_labels[i] = dist(&lcg);
}
}
{
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
for (size_t i = 0; i < n_samples * n_classes; ++i) {
h_predts[i] = dist(&lcg);
}
}
auto result = metric->Evaluate(predts, p_fmat);
for (size_t i = 0; i < 8; ++i) {
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kRow);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassErrorColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassError, DataSplitMode::kCol);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kRow);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MultiClassLogLossColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMultiClassLogLoss, DataSplitMode::kCol);
}
} // namespace metric
} // namespace xgboost
inline void TestMultiClassError(int device) {
auto ctx = xgboost::CreateEmptyGenericParam(device);
ctx.gpu_id = device;
xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "merror");
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}));
EXPECT_NEAR(GetMetricEval(
metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
{0, 1, 2}),
0.666f, 0.001f);
delete metric;
}
TEST(Metric, DeclareUnifiedTest(MultiClassError)) {
TestMultiClassError(GPUIDX);
xgboost::CheckDeterministicMetricMultiClass(xgboost::StringView{"merror"}, GPUIDX);
}
inline void TestMultiClassLogLoss(int device) {
auto ctx = xgboost::CreateEmptyGenericParam(device);
ctx.gpu_id = device;
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mlogloss");
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}));
EXPECT_NEAR(GetMetricEval(
metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
{0, 1, 2}),
2.302f, 0.001f);
delete metric;
}
TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) {
TestMultiClassLogLoss(GPUIDX);
xgboost::CheckDeterministicMetricMultiClass(xgboost::StringView{"mlogloss"}, GPUIDX);
}

View File

@@ -0,0 +1,91 @@
// Copyright by Contributors
#include <xgboost/metric.h>
#include <string>
#include "../helpers.h"
namespace xgboost {
namespace metric {
inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device) {
auto ctx = CreateEmptyGenericParam(device);
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
HostDeviceVector<float> predts;
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
auto &h_predts = predts.HostVector();
SimpleLCG lcg;
size_t n_samples = 2048, n_classes = 4;
info.labels.Reshape(n_samples);
auto &h_labels = info.labels.Data()->HostVector();
h_predts.resize(n_samples * n_classes);
{
SimpleRealUniformDistribution<float> dist{0.0f, static_cast<float>(n_classes)};
for (size_t i = 0; i < n_samples; ++i) {
h_labels[i] = dist(&lcg);
}
}
{
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
for (size_t i = 0; i < n_samples * n_classes; ++i) {
h_predts[i] = dist(&lcg);
}
}
auto result = metric->Evaluate(predts, p_fmat);
for (size_t i = 0; i < 8; ++i) {
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
}
}
inline void TestMultiClassError(int device, DataSplitMode data_split_mode) {
auto ctx = xgboost::CreateEmptyGenericParam(device);
ctx.gpu_id = device;
xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "merror");
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}, {}, {}, data_split_mode));
EXPECT_NEAR(GetMetricEval(
metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
{0, 1, 2}, {}, {}, data_split_mode),
0.666f, 0.001f);
delete metric;
}
inline void VerifyMultiClassError(DataSplitMode data_split_mode = DataSplitMode::kRow) {
TestMultiClassError(GPUIDX, data_split_mode);
CheckDeterministicMetricMultiClass(StringView{"merror"}, GPUIDX);
}
inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) {
auto ctx = xgboost::CreateEmptyGenericParam(device);
ctx.gpu_id = device;
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mlogloss");
EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}, {}, {}, data_split_mode));
EXPECT_NEAR(GetMetricEval(
metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
{0, 1, 2}, {}, {}, data_split_mode),
2.302f, 0.001f);
delete metric;
}
inline void VerifyMultiClassLogLoss(DataSplitMode data_split_mode = DataSplitMode::kRow) {
TestMultiClassLogLoss(GPUIDX, data_split_mode);
CheckDeterministicMetricMultiClass(StringView{"mlogloss"}, GPUIDX);
}
} // namespace metric
} // namespace xgboost

View File

@@ -11,16 +11,20 @@
#include <memory> // for unique_ptr
#include <vector> // for vector
#include "test_rank_metric.h"
#include "../helpers.h" // for GetMetricEval, CreateEmptyGe...
#include "xgboost/base.h" // for bst_float, kRtEps
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/json.h" // for Json, String, Object
namespace xgboost {
namespace metric {
#if !defined(__CUDACC__) && !defined(__HIP_PLATFORM_AMD__)
TEST(Metric, AMS) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
EXPECT_ANY_THROW(xgboost::Metric::Create("ams", &ctx));
xgboost::Metric* metric = xgboost::Metric::Create("ams@0.5f", &ctx);
auto ctx = CreateEmptyGenericParam(GPUIDX);
EXPECT_ANY_THROW(Metric::Create("ams", &ctx));
Metric* metric = Metric::Create("ams@0.5f", &ctx);
ASSERT_STREQ(metric->Name(), "ams@0.5");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.311f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
@@ -29,7 +33,7 @@ TEST(Metric, AMS) {
0.29710f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ams@0", &ctx);
metric = Metric::Create("ams@0", &ctx);
ASSERT_STREQ(metric->Name(), "ams@0");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.311f, 0.001f);
@@ -37,172 +41,44 @@ TEST(Metric, AMS) {
}
#endif
TEST(Metric, DeclareUnifiedTest(Precision)) {
// When the limit for precision is not given, it takes the limit at
// std::numeric_limits<unsigned>::max(); hence all values are very small
// NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default.
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("pre", &ctx);
ASSERT_STREQ(metric->Name(), "pre");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-7);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0, 1e-7);
TEST(Metric, DeclareUnifiedTest(Precision)) { VerifyPrecision(); }
delete metric;
metric = xgboost::Metric::Create("pre@2", &ctx);
ASSERT_STREQ(metric->Name(), "pre@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0.5f, 1e-7);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.5f, 0.001f);
TEST(Metric, DeclareUnifiedTest(NDCG)) { VerifyNDCG(); }
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}));
TEST(Metric, DeclareUnifiedTest(MAP)) { VerifyMAP(); }
delete metric;
TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) { VerifyNDCGExpGain(); }
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kRow);
}
namespace xgboost {
namespace metric {
TEST(Metric, DeclareUnifiedTest(NDCG)) {
auto ctx = CreateEmptyGenericParam(GPUIDX);
Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg");
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}));
ASSERT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}), 1, 1e-10);
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.6509f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@2", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.3868f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@-", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}), 0, 1e-10);
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.6509f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg-", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.6509f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@2-", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg@2-");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1.f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
1.f - 0.3868f, 1.f - 0.001f);
delete metric;
TEST_F(DeclareUnifiedDistributedTest(MetricTest), PrecisionColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyPrecision, DataSplitMode::kCol);
}
TEST(Metric, DeclareUnifiedTest(MAP)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
Metric * metric = xgboost::Metric::Create("map", &ctx);
ASSERT_STREQ(metric->Name(), "map");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, kRtEps);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.5f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
std::vector<xgboost::bst_float>{}), 1, 1e-10);
// Rank metric with group info
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f},
{1, 1, 1, 0, 1, 0}, // Labels
{}, // Weights
{0, 2, 5, 6}), // Group info
0.8611f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("map@-", &ctx);
ASSERT_STREQ(metric->Name(), "map-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}), 0, 1e-10);
delete metric;
metric = xgboost::Metric::Create("map-", &ctx);
ASSERT_STREQ(metric->Name(), "map-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}), 0, 1e-10);
delete metric;
metric = xgboost::Metric::Create("map@2", &ctx);
ASSERT_STREQ(metric->Name(), "map@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}),
0.25f, 0.001f);
delete metric;
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(NDCGExpGain)) {
Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyNDCG, DataSplitMode::kCol);
}
auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
MetaInfo& info = p_fmat->Info();
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
info.num_row_ = info.labels.Shape(0);
info.group_ptr_.resize(2);
info.group_ptr_[0] = 0;
info.group_ptr_[1] = info.num_row_;
HostDeviceVector<float> predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}};
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kRow);
}
std::unique_ptr<Metric> metric{Metric::Create("ndcg", &ctx)};
Json config{Object{}};
config["name"] = String{"ndcg"};
config["lambdarank_param"] = Object{};
config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"};
config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"};
metric->LoadConfig(config);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), MAPColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyMAP, DataSplitMode::kCol);
}
auto ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 0.409738f, kRtEps);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kRow);
}
config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"};
metric->LoadConfig(config);
ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 0.695694f, kRtEps);
predt.HostVector() = info.labels.Data()->HostVector();
ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 1.0, kRtEps);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), NDCGExpGainColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyNDCGExpGain, DataSplitMode::kCol);
}
} // namespace metric
} // namespace xgboost

View File

@@ -0,0 +1,191 @@
/**
* Copyright 2016-2023 by XGBoost Contributors
*/
#pragma once
#include <gtest/gtest.h> // for Test, EXPECT_NEAR, ASSERT_STREQ
#include <xgboost/context.h> // for Context
#include <xgboost/data.h> // for MetaInfo, DMatrix
#include <xgboost/linalg.h> // for Matrix
#include <xgboost/metric.h> // for Metric
#include <algorithm> // for max
#include <memory> // for unique_ptr
#include <vector> // for vector
#include "../helpers.h" // for GetMetricEval, CreateEmptyGe...
#include "xgboost/base.h" // for bst_float, kRtEps
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/json.h" // for Json, String, Object
namespace xgboost {
namespace metric {
inline void VerifyPrecision(DataSplitMode data_split_mode = DataSplitMode::kRow) {
// When the limit for precision is not given, it takes the limit at
// std::numeric_limits<unsigned>::max(); hence all values are very small
// NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default.
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
xgboost::Metric * metric = xgboost::Metric::Create("pre", &ctx);
ASSERT_STREQ(metric->Name(), "pre");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-7);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0, 1e-7);
delete metric;
metric = xgboost::Metric::Create("pre@2", &ctx);
ASSERT_STREQ(metric->Name(), "pre@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.5f, 0.001f);
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode));
delete metric;
}
inline void VerifyNDCG(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = CreateEmptyGenericParam(GPUIDX);
Metric * metric = xgboost::Metric::Create("ndcg", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg");
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode));
ASSERT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}, {}, {}, data_split_mode), 1, 1e-10);
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.6509f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@2", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.3868f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@-", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}, {}, {}, data_split_mode), 0, 1e-10);
ASSERT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.6509f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg-", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}, {}, {}, data_split_mode), 0, 1e-10);
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.6509f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("ndcg@2-", &ctx);
ASSERT_STREQ(metric->Name(), "ndcg@2-");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1.f, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
1.f - 0.3868f, 1.f - 0.001f);
delete metric;
}
inline void VerifyMAP(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
Metric * metric = xgboost::Metric::Create("map", &ctx);
ASSERT_STREQ(metric->Name(), "map");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, kRtEps);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.5f, 0.001f);
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
std::vector<xgboost::bst_float>{}, {}, {}, data_split_mode), 1, 1e-10);
// Rank metric with group info
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f},
{1, 1, 1, 0, 1, 0}, // Labels
{}, // Weights
{0, 2, 5, 6}, // Group info
data_split_mode),
0.8611f, 0.001f);
delete metric;
metric = xgboost::Metric::Create("map@-", &ctx);
ASSERT_STREQ(metric->Name(), "map-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}, {}, {}, data_split_mode), 0, 1e-10);
delete metric;
metric = xgboost::Metric::Create("map-", &ctx);
ASSERT_STREQ(metric->Name(), "map-");
EXPECT_NEAR(GetMetricEval(metric,
xgboost::HostDeviceVector<xgboost::bst_float>{},
{}, {}, {}, data_split_mode), 0, 1e-10);
delete metric;
metric = xgboost::Metric::Create("map@2", &ctx);
ASSERT_STREQ(metric->Name(), "map@2");
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 1, 1e-10);
EXPECT_NEAR(GetMetricEval(metric,
{0.1f, 0.9f, 0.1f, 0.9f},
{ 0, 0, 1, 1}, {}, {}, data_split_mode),
0.25f, 0.001f);
delete metric;
}
inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRow) {
Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
auto p_fmat = xgboost::RandomDataGenerator{0, 0, 0}.GenerateDMatrix();
MetaInfo& info = p_fmat->Info();
info.labels = linalg::Matrix<float>{{10.0f, 0.0f, 0.0f, 1.0f, 5.0f}, {5}, ctx.gpu_id};
info.num_row_ = info.labels.Shape(0);
info.group_ptr_.resize(2);
info.group_ptr_[0] = 0;
info.group_ptr_[1] = info.num_row_;
info.data_split_mode = data_split_mode;
HostDeviceVector<float> predt{{0.1f, 0.2f, 0.3f, 4.0f, 70.0f}};
std::unique_ptr<Metric> metric{Metric::Create("ndcg", &ctx)};
Json config{Object{}};
config["name"] = String{"ndcg"};
config["lambdarank_param"] = Object{};
config["lambdarank_param"]["ndcg_exp_gain"] = String{"true"};
config["lambdarank_param"]["lambdarank_num_pair_per_sample"] = String{"32"};
metric->LoadConfig(config);
auto ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 0.409738f, kRtEps);
config["lambdarank_param"]["ndcg_exp_gain"] = String{"false"};
metric->LoadConfig(config);
ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 0.695694f, kRtEps);
predt.HostVector() = info.labels.Data()->HostVector();
ndcg = metric->Evaluate(predt, p_fmat);
ASSERT_NEAR(ndcg, 1.0, kRtEps);
}
} // namespace metric
} // namespace xgboost

View File

@@ -2,105 +2,31 @@
* Copyright (c) by Contributors 2020
*/
#include <gtest/gtest.h>
#include <cmath>
#include "test_survival_metric.h"
#include "xgboost/metric.h"
#include "../helpers.h"
#include "../../../src/common/survival_util.h"
/** Tests for Survival metrics that should run both on CPU and GPU **/
namespace xgboost {
namespace common {
namespace {
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
auto ctx = CreateEmptyGenericParam(device);
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
metric->Configure(Args{});
TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) { VerifyAFTNegLogLik(); }
HostDeviceVector<float> predts;
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
auto &h_predts = predts.HostVector();
SimpleLCG lcg;
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
size_t n_samples = 2048;
h_predts.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_predts[i] = dist(&lcg);
}
auto &h_upper = info.labels_upper_bound_.HostVector();
auto &h_lower = info.labels_lower_bound_.HostVector();
h_lower.resize(n_samples);
h_upper.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_lower[i] = 1;
h_upper[i] = 10;
}
auto result = metric->Evaluate(predts, p_fmat);
for (size_t i = 0; i < 8; ++i) {
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
}
}
} // anonymous namespace
TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
/**
* Test aggregate output from the AFT metric over a small test data set.
* This is unlike AFTLoss.* tests, which verify metric values over individual data points.
**/
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 4;
info.labels_lower_bound_.HostVector()
= { 100.0f, 0.0f, 60.0f, 16.0f };
info.labels_upper_bound_.HostVector()
= { 100.0f, 20.0f, std::numeric_limits<bst_float>::infinity(), 200.0f };
info.weights_.HostVector() = std::vector<bst_float>();
HostDeviceVector<bst_float> preds(4, std::log(64));
struct TestCase {
std::string dist_type;
bst_float reference_value;
};
for (const auto& test_case : std::vector<TestCase>{ {"normal", 2.1508f}, {"logistic", 2.1804f},
{"extreme", 2.0706f} }) {
std::unique_ptr<Metric> metric(Metric::Create("aft-nloglik", &ctx));
metric->Configure({ {"aft_loss_distribution", test_case.dist_type},
{"aft_loss_distribution_scale", "1.0"} });
EXPECT_NEAR(metric->Evaluate(preds, p_fmat), test_case.reference_value, 1e-4);
}
TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kRow);
}
TEST(Metric, DeclareUnifiedTest(IntervalRegressionAccuracy)) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), AFTNegLogLikColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyAFTNegLogLik, DataSplitMode::kCol);
}
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 4;
info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f };
info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f };
info.weights_.HostVector() = std::vector<bst_float>();
HostDeviceVector<bst_float> preds(4, std::log(60.0f));
TEST(Metric, DeclareUnifiedTest(IntervalRegressionAccuracy)) { VerifyIntervalRegressionAccuracy(); }
std::unique_ptr<Metric> metric(Metric::Create("interval-regression-accuracy", &ctx));
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.75f);
info.labels_lower_bound_.HostVector()[2] = 70.0f;
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_upper_bound_.HostVector()[2] = std::numeric_limits<bst_float>::infinity();
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_upper_bound_.HostVector()[3] = std::numeric_limits<bst_float>::infinity();
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_lower_bound_.HostVector()[0] = 70.0f;
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyRowSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kRow);
}
CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GPUIDX);
TEST_F(DeclareUnifiedDistributedTest(MetricTest), IntervalRegressionAccuracyColumnSplit) {
RunWithInMemoryCommunicator(world_size_, &VerifyIntervalRegressionAccuracy, DataSplitMode::kCol);
}
// Test configuration of AFT metric
@@ -118,6 +44,5 @@ TEST(AFTNegLogLikMetric, DeclareUnifiedTest(Configuration)) {
CheckDeterministicMetricElementWise(StringView{"aft-nloglik"}, GPUIDX);
}
} // namespace common
} // namespace xgboost

View File

@@ -0,0 +1,107 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#pragma once
#include <gtest/gtest.h>
#include <cmath>
#include "../../../src/common/survival_util.h"
#include "../helpers.h"
#include "xgboost/metric.h"
namespace xgboost {
namespace common {
inline void CheckDeterministicMetricElementWise(StringView name, int32_t device) {
auto ctx = CreateEmptyGenericParam(device);
std::unique_ptr<Metric> metric{Metric::Create(name.c_str(), &ctx)};
metric->Configure(Args{});
HostDeviceVector<float> predts;
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
auto &h_predts = predts.HostVector();
SimpleLCG lcg;
SimpleRealUniformDistribution<float> dist{0.0f, 1.0f};
size_t n_samples = 2048;
h_predts.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_predts[i] = dist(&lcg);
}
auto &h_upper = info.labels_upper_bound_.HostVector();
auto &h_lower = info.labels_lower_bound_.HostVector();
h_lower.resize(n_samples);
h_upper.resize(n_samples);
for (size_t i = 0; i < n_samples; ++i) {
h_lower[i] = 1;
h_upper[i] = 10;
}
auto result = metric->Evaluate(predts, p_fmat);
for (size_t i = 0; i < 8; ++i) {
ASSERT_EQ(metric->Evaluate(predts, p_fmat), result);
}
}
inline void VerifyAFTNegLogLik(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
/**
* Test aggregate output from the AFT metric over a small test data set.
* This is unlike AFTLoss.* tests, which verify metric values over individual data points.
**/
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 4;
info.labels_lower_bound_.HostVector()
= { 100.0f, 0.0f, 60.0f, 16.0f };
info.labels_upper_bound_.HostVector()
= { 100.0f, 20.0f, std::numeric_limits<bst_float>::infinity(), 200.0f };
info.weights_.HostVector() = std::vector<bst_float>();
info.data_split_mode = data_split_mode;
HostDeviceVector<bst_float> preds(4, std::log(64));
struct TestCase {
std::string dist_type;
bst_float reference_value;
};
for (const auto& test_case : std::vector<TestCase>{ {"normal", 2.1508f}, {"logistic", 2.1804f},
{"extreme", 2.0706f} }) {
std::unique_ptr<Metric> metric(Metric::Create("aft-nloglik", &ctx));
metric->Configure({ {"aft_loss_distribution", test_case.dist_type},
{"aft_loss_distribution_scale", "1.0"} });
EXPECT_NEAR(metric->Evaluate(preds, p_fmat), test_case.reference_value, 1e-4);
}
}
inline void VerifyIntervalRegressionAccuracy(DataSplitMode data_split_mode = DataSplitMode::kRow) {
auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
info.num_row_ = 4;
info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f };
info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f };
info.weights_.HostVector() = std::vector<bst_float>();
info.data_split_mode = data_split_mode;
HostDeviceVector<bst_float> preds(4, std::log(60.0f));
std::unique_ptr<Metric> metric(Metric::Create("interval-regression-accuracy", &ctx));
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.75f);
info.labels_lower_bound_.HostVector()[2] = 70.0f;
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_upper_bound_.HostVector()[2] = std::numeric_limits<bst_float>::infinity();
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_upper_bound_.HostVector()[3] = std::numeric_limits<bst_float>::infinity();
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.50f);
info.labels_lower_bound_.HostVector()[0] = 70.0f;
EXPECT_FLOAT_EQ(metric->Evaluate(preds, p_fmat), 0.25f);
CheckDeterministicMetricElementWise(StringView{"interval-regression-accuracy"}, GPUIDX);
}
} // namespace common
} // namespace xgboost

View File

@@ -5,6 +5,7 @@
#include <gtest/gtest.h> // for Test, Message, TestPartResult, CmpHel...
#include <algorithm> // for sort
#include <cstddef> // for size_t
#include <initializer_list> // for initializer_list
#include <map> // for map
@@ -13,7 +14,6 @@
#include <string> // for char_traits, basic_string, string
#include <vector> // for vector
#include "../../../src/common/ranking_utils.h" // for LambdaRankParam
#include "../../../src/common/ranking_utils.h" // for NDCGCache, LambdaRankParam
#include "../helpers.h" // for CheckRankingObjFunction, CheckConfigReload
#include "xgboost/base.h" // for GradientPair, bst_group_t, Args
@@ -25,6 +25,126 @@
#include "xgboost/span.h" // for Span
namespace xgboost::obj {
TEST(LambdaRank, NDCGJsonIO) {
Context ctx;
TestNDCGJsonIO(&ctx);
}
void TestNDCGGPair(Context const* ctx) {
{
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
CheckConfigReload(obj, "rank:ndcg");
// No gain in swapping 2 documents.
CheckRankingObjFunction(obj,
{1, 1, 1, 1},
{1, 1, 1, 1},
{1.0f, 1.0f},
{0, 2, 4},
{0.0f, -0.0f, 0.0f, 0.0f},
{0.0f, 0.0f, 0.0f, 0.0f});
}
{
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
// Test with setting sample weight to second query group
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 0.0f},
{0, 2, 4},
{2.06611f, -2.06611f, 0.0f, 0.0f},
{2.169331f, 2.169331f, 0.0f, 0.0f});
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 2.0f},
{0, 2, 4},
{2.06611f, -2.06611f, 2.06611f, -2.06611f},
{2.169331f, 2.169331f, 2.169331f, 2.169331f});
}
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
HostDeviceVector<float> predts{0, 1, 0, 1};
MetaInfo info;
info.labels = linalg::Tensor<float, 2>{{0, 1, 0, 1}, {4, 1}, GPUIDX};
info.group_ptr_ = {0, 2, 4};
info.num_row_ = 4;
HostDeviceVector<GradientPair> gpairs;
obj->GetGradient(predts, info, 0, &gpairs);
ASSERT_EQ(gpairs.Size(), predts.Size());
{
predts = {1, 0, 1, 0};
HostDeviceVector<GradientPair> gpairs;
obj->GetGradient(predts, info, 0, &gpairs);
for (size_t i = 0; i < gpairs.Size(); ++i) {
ASSERT_GT(gpairs.HostSpan()[i].GetHess(), 0);
}
ASSERT_LT(gpairs.HostSpan()[1].GetGrad(), 0);
ASSERT_LT(gpairs.HostSpan()[3].GetGrad(), 0);
ASSERT_GT(gpairs.HostSpan()[0].GetGrad(), 0);
ASSERT_GT(gpairs.HostSpan()[2].GetGrad(), 0);
info.weights_ = {2, 3};
HostDeviceVector<GradientPair> weighted_gpairs;
obj->GetGradient(predts, info, 0, &weighted_gpairs);
auto const& h_gpairs = gpairs.ConstHostSpan();
auto const& h_weighted_gpairs = weighted_gpairs.ConstHostSpan();
for (size_t i : {0ul, 1ul}) {
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetGrad(), h_gpairs[i].GetGrad() * 2.0f);
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetHess(), h_gpairs[i].GetHess() * 2.0f);
}
for (size_t i : {2ul, 3ul}) {
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetGrad(), h_gpairs[i].GetGrad() * 3.0f);
ASSERT_FLOAT_EQ(h_weighted_gpairs[i].GetHess(), h_gpairs[i].GetHess() * 3.0f);
}
}
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(LambdaRank, NDCGGPair) {
Context ctx;
TestNDCGGPair(&ctx);
}
void TestUnbiasedNDCG(Context const* ctx) {
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{{"lambdarank_pair_method", "topk"},
{"lambdarank_unbiased", "true"},
{"lambdarank_bias_norm", "0"}});
std::shared_ptr<DMatrix> p_fmat{RandomDataGenerator{10, 1, 0.0f}.GenerateDMatrix(true, false, 2)};
auto h_label = p_fmat->Info().labels.HostView().Values();
// Move clicked samples to the beginning.
std::sort(h_label.begin(), h_label.end(), std::greater<>{});
HostDeviceVector<float> predt(p_fmat->Info().num_row_, 1.0f);
HostDeviceVector<GradientPair> out_gpair;
obj->GetGradient(predt, p_fmat->Info(), 0, &out_gpair);
Json config{Object{}};
obj->SaveConfig(&config);
auto ti_plus = get<F32Array const>(config["ti+"]);
ASSERT_FLOAT_EQ(ti_plus[0], 1.0);
// bias is non-increasing when prediction is constant. (constant cost on swapping documents)
for (std::size_t i = 1; i < ti_plus.size(); ++i) {
ASSERT_LE(ti_plus[i], ti_plus[i - 1]);
}
auto tj_minus = get<F32Array const>(config["tj-"]);
ASSERT_FLOAT_EQ(tj_minus[0], 1.0);
}
TEST(LambdaRank, UnbiasedNDCG) {
Context ctx;
TestUnbiasedNDCG(&ctx);
}
void InitMakePairTest(Context const* ctx, MetaInfo* out_info, HostDeviceVector<float>* out_predt) {
out_predt->SetDevice(ctx->gpu_id);
MetaInfo& info = *out_info;
@@ -103,4 +223,125 @@ TEST(LambdaRank, MakePair) {
ASSERT_EQ(n_pairs, info.num_row_ * param.NumPair());
}
}
void TestMAPStat(Context const* ctx) {
auto p_fmat = EmptyDMatrix();
MetaInfo& info = p_fmat->Info();
ltr::LambdaRankParam param;
param.UpdateAllowUnknown(Args{});
{
std::vector<float> h_data{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f};
info.labels.Reshape(h_data.size(), 1);
info.labels.Data()->HostVector() = h_data;
info.num_row_ = h_data.size();
HostDeviceVector<float> predt;
auto& h_predt = predt.HostVector();
h_predt.resize(h_data.size());
std::iota(h_predt.rbegin(), h_predt.rend(), 0.0f);
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
if (ctx->IsCPU()) {
obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,
p_cache);
} else {
obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache);
}
Context cpu_ctx;
auto n_rel = p_cache->NumRelevant(&cpu_ctx);
auto acc = p_cache->Acc(&cpu_ctx);
ASSERT_EQ(n_rel[0], 1.0);
ASSERT_EQ(acc[0], 1.0);
ASSERT_EQ(n_rel.back(), h_data.size() - 1.0);
ASSERT_NEAR(acc.back(), 1.95 + (1.0 / h_data.size()), kRtEps);
}
{
info.labels.Reshape(16);
auto& h_label = info.labels.Data()->HostVector();
info.group_ptr_ = {0, 8, 16};
info.num_row_ = info.labels.Shape(0);
std::fill_n(h_label.begin(), 8, 1.0f);
std::fill_n(h_label.begin() + 8, 8, 0.0f);
HostDeviceVector<float> predt;
auto& h_predt = predt.HostVector();
h_predt.resize(h_label.size());
std::iota(h_predt.rbegin(), h_predt.rbegin() + 8, 0.0f);
std::iota(h_predt.rbegin() + 8, h_predt.rend(), 0.0f);
auto p_cache = std::make_shared<ltr::MAPCache>(ctx, info, param);
predt.SetDevice(ctx->gpu_id);
auto rank_idx =
p_cache->SortedIdx(ctx, ctx->IsCPU() ? predt.ConstHostSpan() : predt.ConstDeviceSpan());
if (ctx->IsCPU()) {
obj::cpu_impl::MAPStat(ctx, info.labels.HostView().Slice(linalg::All(), 0), rank_idx,
p_cache);
} else {
obj::cuda_impl::MAPStat(ctx, info, rank_idx, p_cache);
}
Context cpu_ctx;
auto n_rel = p_cache->NumRelevant(&cpu_ctx);
ASSERT_EQ(n_rel[7], 8); // first group
ASSERT_EQ(n_rel.back(), 0); // second group
}
}
TEST(LambdaRank, MAPStat) {
Context ctx;
TestMAPStat(&ctx);
}
void TestMAPGPair(Context const* ctx) {
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:map", ctx)};
Args args;
obj->Configure(args);
CheckConfigReload(obj, "rank:map");
CheckRankingObjFunction(obj, // obj
{0, 0.1f, 0, 0.1f}, // score
{0, 1, 0, 1}, // label
{2.0f, 2.0f}, // weight
{0, 2, 4}, // group
{1.2054923f, -1.2054923f, 1.2054923f, -1.2054923f}, // out grad
{1.2657166f, 1.2657166f, 1.2657166f, 1.2657166f});
// disable the second query group with 0 weight
CheckRankingObjFunction(obj, // obj
{0, 0.1f, 0, 0.1f}, // score
{0, 1, 0, 1}, // label
{2.0f, 0.0f}, // weight
{0, 2, 4}, // group
{1.2054923f, -1.2054923f, .0f, .0f}, // out grad
{1.2657166f, 1.2657166f, .0f, .0f});
}
TEST(LambdaRank, MAPGPair) {
Context ctx;
TestMAPGPair(&ctx);
}
void TestPairWiseGPair(Context const* ctx) {
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:pairwise", ctx)};
Args args;
obj->Configure(args);
args.emplace_back("lambdarank_unbiased", "true");
}
TEST(LambdaRank, Pairwise) {
Context ctx;
TestPairWiseGPair(&ctx);
}
} // namespace xgboost::obj

View File

@@ -12,6 +12,24 @@
#include "test_lambdarank_obj.h"
namespace xgboost::obj {
TEST(LambdaRank, GPUNDCGJsonIO) {
Context ctx;
ctx.gpu_id = 0;
TestNDCGJsonIO(&ctx);
}
TEST(LambdaRank, GPUMAPStat) {
Context ctx;
ctx.gpu_id = 0;
TestMAPStat(&ctx);
}
TEST(LambdaRank, GPUNDCGGPair) {
Context ctx;
ctx.gpu_id = 0;
TestNDCGGPair(&ctx);
}
void TestGPUMakePair() {
Context ctx;
ctx.gpu_id = 0;
@@ -107,6 +125,12 @@ void TestGPUMakePair() {
TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }
TEST(LambdaRank, GPUUnbiasedNDCG) {
Context ctx;
ctx.gpu_id = 0;
TestUnbiasedNDCG(&ctx);
}
template <typename CountFunctor>
void RankItemCountImpl(std::vector<std::uint32_t> const &sorted_items, CountFunctor f,
std::uint32_t find_val, std::uint32_t exp_val) {
@@ -135,4 +159,10 @@ TEST(LambdaRank, RankItemCountOnRight) {
RankItemCountImpl(sorted_items, wrapper, 1, static_cast<uint32_t>(1));
RankItemCountImpl(sorted_items, wrapper, 0, static_cast<uint32_t>(0));
}
TEST(LambdaRank, GPUMAPGPair) {
Context ctx;
ctx.gpu_id = 0;
TestMAPGPair(&ctx);
}
} // namespace xgboost::obj

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright (c) 2023, XGBoost Contributors
*/
#ifndef XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
#define XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
@@ -18,6 +18,29 @@
#include "../helpers.h" // for EmptyDMatrix
namespace xgboost::obj {
void TestMAPStat(Context const* ctx);
inline void TestNDCGJsonIO(Context const* ctx) {
std::unique_ptr<xgboost::ObjFunction> obj{ObjFunction::Create("rank:ndcg", ctx)};
obj->Configure(Args{});
Json j_obj{Object()};
obj->SaveConfig(&j_obj);
ASSERT_EQ(get<String>(j_obj["name"]), "rank:ndcg");
auto const& j_param = j_obj["lambdarank_param"];
ASSERT_EQ(get<String>(j_param["ndcg_exp_gain"]), "1");
ASSERT_EQ(get<String>(j_param["lambdarank_num_pair_per_sample"]),
std::to_string(ltr::LambdaRankParam::NotSet()));
}
void TestNDCGGPair(Context const* ctx);
void TestUnbiasedNDCG(Context const* ctx);
void TestMAPGPair(Context const* ctx);
/**
* \brief Initialize test data for make pair tests.
*/

View File

@@ -1,128 +0,0 @@
// Copyright by Contributors
#include <xgboost/context.h>
#include <xgboost/json.h>
#include <xgboost/objective.h>
#include "../helpers.h"
namespace xgboost {
TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPair)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:pairwise", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "rank:pairwise");
// Test with setting sample weight to second query group
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 0.0f},
{0, 2, 4},
{1.9f, -1.9f, 0.0f, 0.0f},
{1.995f, 1.995f, 0.0f, 0.0f});
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{1.0f, 1.0f},
{0, 2, 4},
{0.95f, -0.95f, 0.95f, -0.95f},
{0.9975f, 0.9975f, 0.9975f, 0.9975f});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(NDCG_JsonIO)) {
xgboost::Context ctx;
ctx.UpdateAllowUnknown(Args{});
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", &ctx)};
obj->Configure(Args{});
Json j_obj {Object()};
obj->SaveConfig(&j_obj);
ASSERT_EQ(get<String>(j_obj["name"]), "rank:ndcg");;
auto const& j_param = j_obj["lambda_rank_param"];
ASSERT_EQ(get<String>(j_param["num_pairsample"]), "1");
ASSERT_EQ(get<String>(j_param["fix_list_weight"]), "0");
}
TEST(Objective, DeclareUnifiedTest(PairwiseRankingGPairSameLabels)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<ObjFunction> obj{ObjFunction::Create("rank:pairwise", &ctx)};
obj->Configure(args);
// No computation of gradient/hessian, as there is no diversity in labels
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{1, 1, 1, 1},
{2.0f, 0.0f},
{0, 2, 4},
{0.0f, 0.0f, 0.0f, 0.0f},
{0.0f, 0.0f, 0.0f, 0.0f});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(NDCGRankingGPair)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "rank:ndcg");
// Test with setting sample weight to second query group
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 0.0f},
{0, 2, 4},
{0.7f, -0.7f, 0.0f, 0.0f},
{0.74f, 0.74f, 0.0f, 0.0f});
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{1.0f, 1.0f},
{0, 2, 4},
{0.35f, -0.35f, 0.35f, -0.35f},
{0.368f, 0.368f, 0.368f, 0.368f});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
TEST(Objective, DeclareUnifiedTest(MAPRankingGPair)) {
std::vector<std::pair<std::string, std::string>> args;
xgboost::Context ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:map", &ctx)};
obj->Configure(args);
CheckConfigReload(obj, "rank:map");
// Test with setting sample weight to second query group
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{2.0f, 0.0f},
{0, 2, 4},
{0.95f, -0.95f, 0.0f, 0.0f},
{0.9975f, 0.9975f, 0.0f, 0.0f});
CheckRankingObjFunction(obj,
{0, 0.1f, 0, 0.1f},
{0, 1, 0, 1},
{1.0f, 1.0f},
{0, 2, 4},
{0.475f, -0.475f, 0.475f, -0.475f},
{0.4988f, 0.4988f, 0.4988f, 0.4988f});
ASSERT_NO_THROW(obj->DefaultEvalMetric());
}
} // namespace xgboost

View File

@@ -1,231 +0,0 @@
/*!
* Copyright 2019-2021 by XGBoost Contributors
*/
#include <thrust/host_vector.h>
#include "test_ranking_obj.cc"
#include "../../../src/objective/rank_obj.cu"
namespace xgboost {
template <typename T = uint32_t, typename Comparator = thrust::greater<T>>
std::unique_ptr<dh::SegmentSorter<T>>
RankSegmentSorterTestImpl(const std::vector<uint32_t> &group_indices,
const std::vector<T> &hlabels,
const std::vector<T> &expected_sorted_hlabels,
const std::vector<uint32_t> &expected_orig_pos
) {
std::unique_ptr<dh::SegmentSorter<T>> seg_sorter_ptr(new dh::SegmentSorter<T>);
dh::SegmentSorter<T> &seg_sorter(*seg_sorter_ptr);
// Create a bunch of unsorted labels on the device and sort it via the segment sorter
dh::device_vector<T> dlabels(hlabels);
seg_sorter.SortItems(dlabels.data().get(), dlabels.size(), group_indices, Comparator());
auto num_items = seg_sorter.GetItemsSpan().size();
EXPECT_EQ(num_items, group_indices.back());
EXPECT_EQ(seg_sorter.GetNumGroups(), group_indices.size() - 1);
// Check the labels
dh::device_vector<T> sorted_dlabels(num_items);
sorted_dlabels.assign(dh::tcbegin(seg_sorter.GetItemsSpan()),
dh::tcend(seg_sorter.GetItemsSpan()));
thrust::host_vector<T> sorted_hlabels(sorted_dlabels);
EXPECT_EQ(expected_sorted_hlabels, sorted_hlabels);
// Check the indices
dh::device_vector<uint32_t> dorig_pos(num_items);
dorig_pos.assign(dh::tcbegin(seg_sorter.GetOriginalPositionsSpan()),
dh::tcend(seg_sorter.GetOriginalPositionsSpan()));
dh::device_vector<uint32_t> horig_pos(dorig_pos);
EXPECT_EQ(expected_orig_pos, horig_pos);
return seg_sorter_ptr;
}
TEST(Objective, RankSegmentSorterTest) {
RankSegmentSorterTestImpl({0, 2, 4, 7, 10, 14, 18, 22, 26}, // Groups
{1, 1, // Labels
1, 2,
3, 2, 1,
1, 2, 1,
1, 3, 4, 2,
1, 2, 1, 1,
1, 2, 2, 3,
3, 3, 1, 2},
{1, 1, // Expected sorted labels
2, 1,
3, 2, 1,
2, 1, 1,
4, 3, 2, 1,
2, 1, 1, 1,
3, 2, 2, 1,
3, 3, 2, 1},
{0, 1, // Expected original positions
3, 2,
4, 5, 6,
8, 7, 9,
12, 11, 13, 10,
15, 14, 16, 17,
21, 19, 20, 18,
22, 23, 25, 24});
}
TEST(Objective, RankSegmentSorterSingleGroupTest) {
RankSegmentSorterTestImpl({0, 7}, // Groups
{6, 1, 4, 3, 0, 5, 2}, // Labels
{6, 5, 4, 3, 2, 1, 0}, // Expected sorted labels
{0, 5, 2, 3, 6, 1, 4}); // Expected original positions
}
TEST(Objective, RankSegmentSorterAscendingTest) {
RankSegmentSorterTestImpl<uint32_t, thrust::less<uint32_t>>(
{0, 4, 7}, // Groups
{3, 1, 4, 2, // Labels
6, 5, 7},
{1, 2, 3, 4, // Expected sorted labels
5, 6, 7},
{1, 3, 0, 2, // Expected original positions
5, 4, 6});
}
TEST(Objective, NDCGLambdaWeightComputerTest) {
std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels
7.8f, 5.01f, 6.96f,
10.3f, 8.7f, 11.4f, 9.45f, 11.4f};
dh::device_vector<bst_float> dlabels(hlabels);
auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
{0, 4, 7, 12}, // Groups
hlabels,
{4.4f, 3.1f, 2.3f, 1.2f, // Expected sorted labels
7.8f, 6.96f, 5.01f,
11.4f, 11.4f, 10.3f, 9.45f, 8.7f},
{3, 0, 2, 1, // Expected original positions
4, 6, 5,
9, 11, 7, 10, 8});
// Created segmented predictions for the labels from above
std::vector<bst_float> hpreds{-9.78f, 24.367f, 0.908f, -11.47f,
-1.03f, -2.79f, -3.1f,
104.22f, 103.1f, -101.7f, 100.5f, 45.1f};
dh::device_vector<bst_float> dpreds(hpreds);
xgboost::obj::NDCGLambdaWeightComputer ndcg_lw_computer(dpreds.data().get(),
dlabels.data().get(),
*segment_label_sorter);
// Where will the predictions move from its current position, if they were sorted
// descendingly?
auto dsorted_pred_pos = ndcg_lw_computer.GetPredictionSorter().GetIndexableSortedPositionsSpan();
std::vector<uint32_t> hsorted_pred_pos(segment_label_sorter->GetNumItems());
dh::CopyDeviceSpanToVector(&hsorted_pred_pos, dsorted_pred_pos);
std::vector<uint32_t> expected_sorted_pred_pos{2, 0, 1, 3,
4, 5, 6,
7, 8, 11, 9, 10};
EXPECT_EQ(expected_sorted_pred_pos, hsorted_pred_pos);
// Check group DCG values
std::vector<float> hgroup_dcgs(segment_label_sorter->GetNumGroups());
dh::CopyDeviceSpanToVector(&hgroup_dcgs, ndcg_lw_computer.GetGroupDcgsSpan());
std::vector<uint32_t> hgroups(segment_label_sorter->GetNumGroups() + 1);
dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan());
EXPECT_EQ(hgroup_dcgs.size(), segment_label_sorter->GetNumGroups());
std::vector<float> hsorted_labels(segment_label_sorter->GetNumItems());
dh::CopyDeviceSpanToVector(&hsorted_labels, segment_label_sorter->GetItemsSpan());
for (size_t i = 0; i < hgroup_dcgs.size(); ++i) {
// Compute group DCG value on CPU and compare
auto gbegin = hgroups[i];
auto gend = hgroups[i + 1];
EXPECT_NEAR(
hgroup_dcgs[i],
xgboost::obj::NDCGLambdaWeightComputer::ComputeGroupDCGWeight(&hsorted_labels[gbegin],
gend - gbegin),
0.01f);
}
}
TEST(Objective, IndexableSortedItemsTest) {
std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f, // Labels
7.8f, 5.01f, 6.96f,
10.3f, 8.7f, 11.4f, 9.45f, 11.4f};
dh::device_vector<bst_float> dlabels(hlabels);
auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
{0, 4, 7, 12}, // Groups
hlabels,
{4.4f, 3.1f, 2.3f, 1.2f, // Expected sorted labels
7.8f, 6.96f, 5.01f,
11.4f, 11.4f, 10.3f, 9.45f, 8.7f},
{3, 0, 2, 1, // Expected original positions
4, 6, 5,
9, 11, 7, 10, 8});
segment_label_sorter->CreateIndexableSortedPositions();
std::vector<uint32_t> sorted_indices(segment_label_sorter->GetNumItems());
dh::CopyDeviceSpanToVector(&sorted_indices,
segment_label_sorter->GetIndexableSortedPositionsSpan());
std::vector<uint32_t> expected_sorted_indices = {
1, 3, 2, 0,
4, 6, 5,
9, 11, 7, 10, 8};
EXPECT_EQ(expected_sorted_indices, sorted_indices);
}
TEST(Objective, ComputeAndCompareMAPStatsTest) {
std::vector<float> hlabels = {3.1f, 0.0f, 2.3f, 4.4f, // Labels
0.0f, 5.01f, 0.0f,
10.3f, 0.0f, 11.4f, 9.45f, 11.4f};
dh::device_vector<bst_float> dlabels(hlabels);
auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
{0, 4, 7, 12}, // Groups
hlabels,
{4.4f, 3.1f, 2.3f, 0.0f, // Expected sorted labels
5.01f, 0.0f, 0.0f,
11.4f, 11.4f, 10.3f, 9.45f, 0.0f},
{3, 0, 2, 1, // Expected original positions
5, 4, 6,
9, 11, 7, 10, 8});
// Create MAP stats on the device first using the objective
std::vector<bst_float> hpreds{-9.78f, 24.367f, 0.908f, -11.47f,
-1.03f, -2.79f, -3.1f,
104.22f, 103.1f, -101.7f, 100.5f, 45.1f};
dh::device_vector<bst_float> dpreds(hpreds);
xgboost::obj::MAPLambdaWeightComputer map_lw_computer(dpreds.data().get(),
dlabels.data().get(),
*segment_label_sorter);
// Get the device MAP stats on host
std::vector<xgboost::obj::MAPLambdaWeightComputer::MAPStats> dmap_stats(
segment_label_sorter->GetNumItems());
dh::CopyDeviceSpanToVector(&dmap_stats, map_lw_computer.GetMapStatsSpan());
// Compute the MAP stats on host next to compare
std::vector<uint32_t> hgroups(segment_label_sorter->GetNumGroups() + 1);
dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan());
for (size_t i = 0; i < hgroups.size() - 1; ++i) {
auto gbegin = hgroups[i];
auto gend = hgroups[i + 1];
std::vector<xgboost::obj::ListEntry> lst_entry;
for (auto j = gbegin; j < gend; ++j) {
lst_entry.emplace_back(hpreds[j], hlabels[j], j);
}
std::stable_sort(lst_entry.begin(), lst_entry.end(), xgboost::obj::ListEntry::CmpPred);
// Compute the MAP stats with this list and compare with the ones computed on the device
std::vector<xgboost::obj::MAPLambdaWeightComputer::MAPStats> hmap_stats;
xgboost::obj::MAPLambdaWeightComputer::GetMAPStats(lst_entry, &hmap_stats);
for (auto j = gbegin; j < gend; ++j) {
EXPECT_EQ(dmap_stats[j].hits, hmap_stats[j - gbegin].hits);
EXPECT_NEAR(dmap_stats[j].ap_acc, hmap_stats[j - gbegin].ap_acc, 0.01f);
EXPECT_NEAR(dmap_stats[j].ap_acc_miss, hmap_stats[j - gbegin].ap_acc_miss, 0.01f);
EXPECT_NEAR(dmap_stats[j].ap_acc_add, hmap_stats[j - gbegin].ap_acc_add, 0.01f);
}
}
}
} // namespace xgboost

View File

@@ -13,25 +13,6 @@
#include "../../../plugin/federated/federated_server.h"
#include "../../../src/collective/communicator-inl.h"
inline int GenerateRandomPort(int low, int high) {
using namespace std::chrono_literals;
// Ensure unique timestamp by introducing a small artificial delay
std::this_thread::sleep_for(100ms);
auto timestamp = static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count());
std::mt19937_64 rng(timestamp);
std::uniform_int_distribution<int> dist(low, high);
int port = dist(rng);
return port;
}
inline std::string GetServerAddress() {
int port = GenerateRandomPort(50000, 60000);
std::string address = std::string("localhost:") + std::to_string(port);
return address;
}
namespace xgboost {
class ServerForTest {
@@ -41,13 +22,14 @@ class ServerForTest {
public:
explicit ServerForTest(std::int32_t world_size) {
server_address_ = GetServerAddress();
server_thread_.reset(new std::thread([this, world_size] {
grpc::ServerBuilder builder;
xgboost::federated::FederatedService service{world_size};
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
int selected_port;
builder.AddListeningPort("localhost:0", grpc::InsecureServerCredentials(), &selected_port);
builder.RegisterService(&service);
server_ = builder.BuildAndStart();
server_address_ = std::string("localhost:") + std::to_string(selected_port);
server_->Wait();
}));
}
@@ -56,7 +38,14 @@ class ServerForTest {
server_->Shutdown();
server_thread_->join();
}
auto Address() const { return server_address_; }
auto Address() const {
using namespace std::chrono_literals;
while (server_address_.empty()) {
std::this_thread::sleep_for(100ms);
}
return server_address_;
}
};
class BaseFederatedTest : public ::testing::Test {
@@ -65,7 +54,7 @@ class BaseFederatedTest : public ::testing::Test {
void TearDown() override { server_.reset(nullptr); }
static int const kWorldSize{3};
static int constexpr kWorldSize{3};
std::unique_ptr<ServerForTest> server_;
};

View File

@@ -62,34 +62,24 @@ class FederatedCommunicatorTest : public BaseFederatedTest {
};
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeTooSmall) {
std::string server_address{GetServerAddress()};
auto construct = [server_address]() {
FederatedCommunicator comm{0, 0, server_address, "", "", ""};
};
auto construct = [] { FederatedCommunicator comm{0, 0, "localhost:0", "", "", ""}; };
EXPECT_THROW(construct(), dmlc::Error);
}
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooSmall) {
std::string server_address{GetServerAddress()};
auto construct = [server_address]() {
FederatedCommunicator comm{1, -1, server_address, "", "", ""};
};
auto construct = [] { FederatedCommunicator comm{1, -1, "localhost:0", "", "", ""}; };
EXPECT_THROW(construct(), dmlc::Error);
}
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankTooBig) {
std::string server_address{GetServerAddress()};
auto construct = [server_address]() {
FederatedCommunicator comm{1, 1, server_address, "", "", ""};
};
auto construct = [] { FederatedCommunicator comm{1, 1, "localhost:0", "", "", ""}; };
EXPECT_THROW(construct(), dmlc::Error);
}
TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeNotInteger) {
std::string server_address{GetServerAddress()};
auto construct = [server_address]() {
auto construct = [] {
Json config{JsonObject()};
config["federated_server_address"] = server_address;
config["federated_server_address"] = std::string("localhost:0");
config["federated_world_size"] = std::string("1");
config["federated_rank"] = Integer(0);
FederatedCommunicator::Create(config);
@@ -98,10 +88,9 @@ TEST(FederatedCommunicatorSimpleTest, ThrowOnWorldSizeNotInteger) {
}
TEST(FederatedCommunicatorSimpleTest, ThrowOnRankNotInteger) {
std::string server_address{GetServerAddress()};
auto construct = [server_address]() {
auto construct = [] {
Json config{JsonObject()};
config["federated_server_address"] = server_address;
config["federated_server_address"] = std::string("localhost:0");
config["federated_world_size"] = 1;
config["federated_rank"] = std::string("0");
FederatedCommunicator::Create(config);
@@ -110,15 +99,13 @@ TEST(FederatedCommunicatorSimpleTest, ThrowOnRankNotInteger) {
}
TEST(FederatedCommunicatorSimpleTest, GetWorldSizeAndRank) {
std::string server_address{GetServerAddress()};
FederatedCommunicator comm{6, 3, server_address};
FederatedCommunicator comm{6, 3, "localhost:0"};
EXPECT_EQ(comm.GetWorldSize(), 6);
EXPECT_EQ(comm.GetRank(), 3);
}
TEST(FederatedCommunicatorSimpleTest, IsDistributed) {
std::string server_address{GetServerAddress()};
FederatedCommunicator comm{2, 1, server_address};
FederatedCommunicator comm{2, 1, "localhost:0"};
EXPECT_TRUE(comm.IsDistributed());
}

View File

@@ -70,7 +70,7 @@ void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json e
class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
std::unique_ptr<ServerForTest> server_;
static int const kWorldSize{3};
static int constexpr kWorldSize{3};
protected:
void SetUp() override { server_ = std::make_unique<ServerForTest>(kWorldSize); }

View File

@@ -0,0 +1,243 @@
/*!
* Copyright 2023 XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../metric/test_auc.h"
#include "../metric/test_elementwise_metric.h"
#include "../metric/test_multiclass_metric.h"
#include "../metric/test_rank_metric.h"
#include "../metric/test_survival_metric.h"
#include "helpers.h"
namespace {
class FederatedMetricTest : public xgboost::BaseFederatedTest {};
} // anonymous namespace
namespace xgboost {
namespace metric {
TEST_F(FederatedMetricTest, BinaryAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyBinaryAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, BinaryAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyBinaryAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiClassAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiClassAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, RankingAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, RankingAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, PRAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPRAUC, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, PRAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPRAUC, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiClassPRAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassPRAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiClassPRAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassPRAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, RankingPRAUCRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingPRAUC,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, RankingPRAUCColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRankingPRAUC,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, RMSERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, RMSEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, RMSLERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSLE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, RMSLEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyRMSLE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MAERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MAEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MAPERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAPE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MAPEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAPE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MPHERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMPHE, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MPHEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMPHE, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, LogLossRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyLogLoss, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, LogLossColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyLogLoss, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, ErrorRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyError, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, ErrorColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyError, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, PoissonNegLogLikRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPoissonNegLogLik,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, PoissonNegLogLikColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPoissonNegLogLik,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiRMSERowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiRMSE,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiRMSEColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiRMSE,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, QuantileRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyQuantile,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, QuantileColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyQuantile,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiClassErrorRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassError,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiClassErrorColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassError,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MultiClassLogLossRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassLogLoss,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MultiClassLogLossColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMultiClassLogLoss,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, PrecisionRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPrecision,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, PrecisionColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyPrecision,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, NDCGRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCG, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, NDCGColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCG, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, MAPRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAP, DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, MAPColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyMAP, DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, NDCGExpGainRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCGExpGain,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, NDCGExpGainColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyNDCGExpGain,
DataSplitMode::kCol);
}
} // namespace metric
} // namespace xgboost
namespace xgboost {
namespace common {
TEST_F(FederatedMetricTest, AFTNegLogLikRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAFTNegLogLik,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, AFTNegLogLikColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyAFTNegLogLik,
DataSplitMode::kCol);
}
TEST_F(FederatedMetricTest, IntervalRegressionAccuracyRowSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyIntervalRegressionAccuracy,
DataSplitMode::kRow);
}
TEST_F(FederatedMetricTest, IntervalRegressionAccuracyColumnSplit) {
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyIntervalRegressionAccuracy,
DataSplitMode::kCol);
}
} // namespace common
} // namespace xgboost

View File

@@ -126,7 +126,8 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/big.libsvm";
CreateBigTestData(tmp_file, 50000);
std::shared_ptr<DMatrix> dmat(xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache"));
std::shared_ptr<DMatrix> dmat(
xgboost::DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache"));
EXPECT_FALSE(dmat->SingleColBlock());
size_t num_row = dmat->Info().num_row_;
std::vector<bst_float> labels(num_row);

View File

@@ -203,7 +203,11 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
learner->Save(&mem_out);
ASSERT_EQ(model_at_kiter, serialised_model_tmp);
learner->SetParam("gpu_id", "0");
for (auto const& [key, value] : args) {
if (key == "tree_method" && value == "gpu_hist") {
learner->SetParam("gpu_id", "0");
}
}
// Pull data to device
for (auto &batch : p_dmat->GetBatches<SparsePage>()) {
batch.data.SetDevice(0);

View File

@@ -1,12 +1,13 @@
/*!
* Copyright 2020-2021 by XGBoost Contributors
/**
* Copyright 2020-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include "../../../../src/data/ellpack_page.cuh"
#include "../../../../src/tree/gpu_hist/gradient_based_sampler.cuh"
#include "../../../../src/tree/param.h"
#include "../../filesystem.h" // dmlc::TemporaryDirectory
#include "../../../../src/tree/param.h" // TrainParam
#include "../../filesystem.h" // dmlc::TemporaryDirectory
#include "../../helpers.h"
namespace xgboost {
@@ -31,14 +32,15 @@ void VerifySampling(size_t page_size,
}
gpair.SetDevice(0);
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
if (page_size != 0) {
EXPECT_NE(page->n_rows, kRows);
}
GradientBasedSampler sampler(page, kRows, param, subsample, sampling_method);
auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get());
GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
if (fixed_size_sampling) {
EXPECT_EQ(sample.sample_rows, kRows);
@@ -86,12 +88,13 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
auto gpair = GenerateRandomGradients(kRows);
gpair.SetDevice(0);
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_NE(page->n_rows, kRows);
GradientBasedSampler sampler(page, kRows, param, kSubsample, TrainParam::kUniform);
auto sample = sampler.Sample(gpair.DeviceSpan(), dmat.get());
GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
auto sampled_page = sample.page;
EXPECT_EQ(sample.sample_rows, kRows);
EXPECT_EQ(sample.gpair.size(), gpair.Size());
@@ -103,7 +106,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
ci(buffer.data(), sampled_page->NumSymbols());
size_t offset = 0;
for (auto& batch : dmat->GetBatches<EllpackPage>(param)) {
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
auto page = batch.Impl();
std::vector<common::CompressedByteT> page_buffer(page->gidx_buffer.HostVector());
common::CompressedIterator<common::CompressedByteT>

View File

@@ -1,9 +1,14 @@
/**
* Copyright 2020-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <vector>
#include "../../../../src/common/categorical.h"
#include "../../../../src/tree/gpu_hist/histogram.cuh"
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
#include "../../../../src/tree/param.h" // TrainParam
#include "../../categorical_helpers.h"
#include "../../helpers.h"
@@ -11,15 +16,15 @@ namespace xgboost {
namespace tree {
void TestDeterministicHistogram(bool is_dense, int shm_size) {
Context ctx = CreateEmptyGenericParam(0);
Context ctx = MakeCUDACtx(0);
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
float constexpr kLower = -1e-2, kUpper = 1e2;
float sparsity = is_dense ? 0.0f : 0.5f;
auto matrix = RandomDataGenerator(kRows, kCols, sparsity).GenerateDMatrix();
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
for (auto const& batch : matrix->GetBatches<EllpackPage>(batch_param)) {
for (auto const& batch : matrix->GetBatches<EllpackPage>(&ctx, batch_param)) {
auto* page = batch.Impl();
tree::RowPartitioner row_partitioner(0, kRows);
@@ -132,13 +137,13 @@ void ValidateCategoricalHistogram(size_t n_categories, common::Span<GradientPair
// Test 1 vs rest categorical histogram is equivalent to one hot encoded data.
void TestGPUHistogramCategorical(size_t num_categories) {
auto ctx = CreateEmptyGenericParam(0);
auto ctx = MakeCUDACtx(0);
size_t constexpr kRows = 340;
size_t constexpr kBins = 256;
auto x = GenerateRandomCategoricalSingleColumn(kRows, num_categories);
auto cat_m = GetDMatrixFromData(x, kRows, 1);
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
auto batch_param = BatchParam{kBins, tree::TrainParam::DftSparseThreshold()};
tree::RowPartitioner row_partitioner(0, kRows);
auto ridx = row_partitioner.GetRows(0);
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
@@ -148,7 +153,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
/**
* Generate hist with cat data.
*/
for (auto const &batch : cat_m->GetBatches<EllpackPage>(batch_param)) {
for (auto const &batch : cat_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
auto* page = batch.Impl();
FeatureGroups single_group(page->Cuts());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
@@ -162,7 +167,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
auto x_encoded = OneHotEncodeFeature(x, num_categories);
auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories);
dh::device_vector<GradientPairInt64> encode_hist(2 * num_categories);
for (auto const &batch : encode_m->GetBatches<EllpackPage>(batch_param)) {
for (auto const &batch : encode_m->GetBatches<EllpackPage>(&ctx, batch_param)) {
auto* page = batch.Impl();
FeatureGroups single_group(page->Cuts());
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),

View File

@@ -41,7 +41,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
size_t constexpr kMaxBins = 4;
// dense, no missing values
GHistIndexMatrix gmat(dmat.get(), kMaxBins, 0.5, false, AllThreadsForTest());
GHistIndexMatrix gmat(&ctx, dmat.get(), kMaxBins, 0.5, false);
common::RowSetCollection row_set_collection;
std::vector<size_t> &row_indices = *row_set_collection.Data();
row_indices.resize(kRows);
@@ -228,7 +228,7 @@ auto CompareOneHotAndPartition(bool onehot) {
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
std::vector<CPUExpandEntry> entries(1);
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>(&ctx, {32, param.sparse_threshold})) {
common::HistCollection hist;
entries.front().nid = 0;

View File

@@ -25,6 +25,7 @@ void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples
} // anonymous namespace
void TestAddHistRows(bool is_distributed) {
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
int starting_index = std::numeric_limits<int>::max();
@@ -32,9 +33,9 @@ void TestAddHistRows(bool is_distributed) {
size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4;
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat =
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
RegTree tree;
@@ -73,6 +74,7 @@ TEST(CPUHistogram, AddRows) {
void TestSyncHist(bool is_distributed) {
size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4;
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build_;
std::vector<CPUExpandEntry> nodes_for_subtraction_trick_;
@@ -80,9 +82,9 @@ void TestSyncHist(bool is_distributed) {
int sync_count = 0;
RegTree tree;
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat =
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
HistogramBuilder<CPUExpandEntry> histogram;
uint32_t total_bins = gmat.cut.Ptrs().back();
@@ -227,12 +229,15 @@ TEST(CPUHistogram, SyncHist) {
void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_col_split) {
size_t constexpr kNRows = 8, kNCols = 16;
int32_t constexpr kMaxBins = 4;
auto p_fmat = RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
if (is_col_split) {
p_fmat = std::shared_ptr<DMatrix>{
p_fmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
}
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
auto const &gmat =
*(p_fmat->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{kMaxBins, 0.5}).begin());
uint32_t total_bins = gmat.cut.Ptrs().back();
static double constexpr kEps = 1e-6;
@@ -257,9 +262,9 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
CPUExpandEntry node{RegTree::kRoot, tree.GetDepth(0)};
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
nodes_for_explicit_hist_build.push_back(node);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
histogram.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {}, gpair, force_read_by_column);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(&ctx, {kMaxBins, 0.5})) {
histogram.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair, force_read_by_column);
}
// Check if number of histogram bins is correct
@@ -325,6 +330,8 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
auto x = GenerateRandomCategoricalSingleColumn(kRows, n_categories);
auto cat_m = GetDMatrixFromData(x, kRows, 1);
cat_m->Info().feature_types.HostVector().push_back(FeatureType::kCategorical);
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
BatchParam batch_param{0, static_cast<int32_t>(kBins)};
RegTree tree;
@@ -345,12 +352,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
* Generate hist with cat data.
*/
HistogramBuilder<CPUExpandEntry> cat_hist;
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {}, gpair.HostVector(),
force_read_by_column);
cat_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair.HostVector(), force_read_by_column);
}
/**
@@ -359,12 +365,11 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
auto x_encoded = OneHotEncodeFeature(x, n_categories);
auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories);
HistogramBuilder<CPUExpandEntry> onehot_hist;
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false, false);
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair.HostVector(),
force_read_by_column);
gpair.HostVector(), force_read_by_column);
}
auto cat = cat_hist.Histogram()[0];
@@ -382,8 +387,8 @@ TEST(CPUHistogram, Categorical) {
}
}
namespace {
void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool force_read_by_column) {
Context ctx;
void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, bool is_approx,
bool force_read_by_column) {
size_t constexpr kEntries = 1 << 16;
auto m = CreateSparsePageDMatrix(kEntries, "cache");
@@ -410,7 +415,7 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
* Multi page
*/
std::vector<common::RowSetCollection> rows_set;
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
CHECK_LT(page.base_rowid, m->Info().num_row_);
auto n_rows_in_node = page.Size();
partition_size[0] = std::max(partition_size[0], n_rows_in_node);
@@ -426,12 +431,12 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
256};
multi_build.Reset(total_bins, batch_param, ctx.Threads(), rows_set.size(), false, false);
multi_build.Reset(total_bins, batch_param, ctx->Threads(), rows_set.size(), false, false);
size_t page_idx{0};
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {},
h_gpair, force_read_by_column);
for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {}, h_gpair,
force_read_by_column);
++page_idx;
}
ASSERT_EQ(page_idx, 2);
@@ -447,16 +452,16 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
common::RowSetCollection row_set_collection;
InitRowPartitionForTest(&row_set_collection, n_samples);
single_build.Reset(total_bins, batch_param, ctx.Threads(), 1, false, false);
single_build.Reset(total_bins, batch_param, ctx->Threads(), 1, false, false);
SparsePage concat;
std::vector<float> hess(m->Info().num_row_, 1.0f);
for (auto const& page : m->GetBatches<SparsePage>()) {
concat.Push(page);
}
auto cut = common::SketchOnDMatrix(m.get(), batch_param.max_bin, ctx.Threads(), false, hess);
auto cut = common::SketchOnDMatrix(ctx, m.get(), batch_param.max_bin, false, hess);
GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false,
std::numeric_limits<double>::quiet_NaN(), ctx.Threads());
std::numeric_limits<double>::quiet_NaN(), ctx->Threads());
single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair, force_read_by_column);
single_page = single_build.Histogram()[0];
}
@@ -470,16 +475,17 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx, bool fo
TEST(CPUHistogram, ExternalMemory) {
int32_t constexpr kBins = 256;
TestHistogramExternalMemory(BatchParam{kBins, common::Span<float>{}, false}, true, false);
TestHistogramExternalMemory(BatchParam{kBins, common::Span<float>{}, false}, true, true);
auto ctx = CreateEmptyGenericParam(Context::kCpuId);
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, false);
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, true);
float sparse_thresh{0.5};
TestHistogramExternalMemory({kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory({kBins, sparse_thresh}, false, true);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
sparse_thresh = std::numeric_limits<float>::quiet_NaN();
TestHistogramExternalMemory({kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory({kBins, sparse_thresh}, false, true);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
}
} // namespace tree
} // namespace xgboost

View File

@@ -34,7 +34,7 @@ TEST(Approx, Partitioner) {
std::vector<CPUExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {
bst_feature_t const split_ind = 0;
{
auto min_value = page.cut.MinValues()[split_ind];
@@ -84,7 +84,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
Context ctx;
ctx.InitAllowUnknown(Args{});
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>({64, *hess, true})) {
for (auto const& page : dmat->GetBatches<GHistIndexMatrix>(&ctx, {64, *hess, true})) {
{
RegTree tree;
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
@@ -133,7 +133,7 @@ TEST(Approx, PartitionerColSplit) {
Context ctx;
ctx.InitAllowUnknown(Args{});
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, {64, hess, true})) {
bst_feature_t const split_ind = 0;
min_value = page.cut.MinValues()[split_ind];

View File

@@ -43,7 +43,7 @@ void TestLeafPartition(size_t n_samples) {
std::vector<size_t> h_nptr;
float split_value{0};
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>({Context::kCpuId, 64})) {
for (auto const& page : Xy->GetBatches<GHistIndexMatrix>(&ctx, BatchParam{64, 0.2})) {
bst_feature_t const split_ind = 0;
auto ptr = page.cut.Ptrs()[split_ind + 1];
split_value = page.cut.Values().at(ptr / 2);

View File

@@ -218,17 +218,16 @@ TEST(GpuHist, TestHistogramIndex) {
TestHistogramIndexImpl();
}
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
size_t gpu_page_size, RegTree* tree,
HostDeviceVector<bst_float>* preds, float subsample = 1.0f,
const std::string& sampling_method = "uniform",
void UpdateTree(Context const* ctx, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
size_t gpu_page_size, RegTree* tree, HostDeviceVector<bst_float>* preds,
float subsample = 1.0f, const std::string& sampling_method = "uniform",
int max_bin = 2) {
if (gpu_page_size > 0) {
// Loop over the batches and count the records
int64_t batch_count = 0;
int64_t row_count = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, max_bin})) {
for (const auto& batch : dmat->GetBatches<EllpackPage>(
ctx, BatchParam{max_bin, TrainParam::DftSparseThreshold()})) {
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
batch_count++;
row_count += batch.Size();
@@ -249,14 +248,13 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
TrainParam param;
param.UpdateAllowUnknown(args);
Context ctx(CreateEmptyGenericParam(0));
ObjInfo task{ObjInfo::kRegression};
tree::GPUHistMaker hist_maker{&ctx, &task};
tree::GPUHistMaker hist_maker{ctx, &task};
std::vector<HostDeviceVector<bst_node_t>> position(1);
hist_maker.Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
{tree});
auto cache = linalg::MakeTensorView(&ctx, preds->DeviceSpan(), preds->Size(), 1);
auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1);
hist_maker.UpdatePredictionCache(dmat, cache);
}
@@ -274,12 +272,13 @@ TEST(GpuHist, UniformSampling) {
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
Context ctx(CreateEmptyGenericParam(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
"uniform", kRows);
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample, "uniform",
kRows);
// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();
@@ -303,12 +302,13 @@ TEST(GpuHist, GradientBasedSampling) {
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
Context ctx(CreateEmptyGenericParam(0));
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree_sampling, &preds_sampling, kSubsample,
"gradient_based", kRows);
// Make sure the predictions are the same.
@@ -337,12 +337,13 @@ TEST(GpuHist, ExternalMemory) {
// Build a tree using the in-memory DMatrix.
RegTree tree;
Context ctx(CreateEmptyGenericParam(0));
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, 1.0, "uniform", kRows);
// Build another tree using multiple ELLPACK pages.
RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, 1.0, "uniform", kRows);
// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();
@@ -374,17 +375,17 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
// Build a tree using the in-memory DMatrix.
auto rng = common::GlobalRandom();
Context ctx(CreateEmptyGenericParam(0));
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod,
kRows);
UpdateTree(&ctx, &gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, kRows);
// Build another tree using multiple ELLPACK pages.
common::GlobalRandom() = rng;
RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext,
kSubsample, kSamplingMethod, kRows);
UpdateTree(&ctx, &gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, kSubsample,
kSamplingMethod, kRows);
// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();

View File

@@ -36,7 +36,7 @@ void TestPartitioner(bst_target_t n_targets) {
std::vector<ExpandEntry> candidates{{0, 0}};
candidates.front().split.loss_chg = 0.4;
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64);
for (auto const& page : Xy->GetBatches<SparsePage>()) {
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());

View File

@@ -15,16 +15,17 @@ class DMatrixForTest : public data::SimpleDMatrix {
public:
using SimpleDMatrix::SimpleDMatrix;
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) override {
BatchSet<GHistIndexMatrix> GetGradientIndex(Context const* ctx,
const BatchParam& param) override {
auto backup = this->gradient_index_;
auto iter = SimpleDMatrix::GetGradientIndex(param);
auto iter = SimpleDMatrix::GetGradientIndex(ctx, param);
n_regen_ += (backup != this->gradient_index_);
return iter;
}
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override {
BatchSet<EllpackPage> GetEllpackBatches(Context const* ctx, const BatchParam& param) override {
auto backup = this->ellpack_page_;
auto iter = SimpleDMatrix::GetEllpackBatches(param);
auto iter = SimpleDMatrix::GetEllpackBatches(ctx, param);
n_regen_ += (backup != this->ellpack_page_);
return iter;
}
@@ -50,8 +51,8 @@ class RegenTest : public ::testing::Test {
HostDeviceVector<float> storage;
auto dense = RandomDataGenerator{kRows, kCols, 0.5}.GenerateArrayInterface(&storage);
auto adapter = data::ArrayAdapter(StringView{dense});
p_fmat_ = std::shared_ptr<DMatrix>(new DMatrixForTest{
&adapter, std::numeric_limits<float>::quiet_NaN(), AllThreadsForTest()});
p_fmat_ = std::shared_ptr<DMatrix>(
new DMatrixForTest{&adapter, std::numeric_limits<float>::quiet_NaN(), AllThreadsForTest()});
p_fmat_->Info().labels.Reshape(256, 1);
auto labels = p_fmat_->Info().labels.Data();
@@ -74,7 +75,7 @@ class RegenTest : public ::testing::Test {
auto for_test = dynamic_cast<DMatrixForTest*>(p_fmat_.get());
CHECK(for_test);
auto backup = for_test->NumRegen();
for_test->GetBatches<Page>(BatchParam{});
for_test->GetBatches<Page>(p_fmat_->Ctx(), BatchParam{});
CHECK_EQ(for_test->NumRegen(), backup);
if (reset) {

View File

@@ -18,6 +18,7 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cupy())
def test_dmatrix_feature_weights(self) -> None:
import cupy as cp
rng = cp.random.RandomState(1994)
data = rng.randn(5, 5)
m = xgb.DMatrix(data)
@@ -26,23 +27,91 @@ class TestQuantileDMatrix:
m.set_info(feature_weights=feature_weights)
cp.testing.assert_array_equal(
cp.array(m.get_float_info('feature_weights')),
feature_weights.astype(np.float32))
cp.array(m.get_float_info("feature_weights")),
feature_weights.astype(np.float32),
)
@pytest.mark.skipif(**tm.no_cupy())
def test_dmatrix_cupy_init(self) -> None:
import cupy as cp
data = cp.random.randn(5, 5)
xgb.QuantileDMatrix(data, cp.ones(5, dtype=np.float64))
@pytest.mark.parametrize(
"on_device,tree_method",
[(True, "hist"), (False, "gpu_hist"), (False, "hist"), (True, "gpu_hist")],
)
def test_initialization(self, on_device: bool, tree_method: str) -> None:
n_samples, n_features, max_bin = 64, 3, 16
X, y, w = tm.make_batches(
n_samples,
n_features=n_features,
n_batches=1,
use_cupy=on_device,
)
# Init SparsePage
Xy = xgb.DMatrix(X[0], y[0], weight=w[0])
# Init GIDX/Ellpack
xgb.train(
{"tree_method": tree_method, "max_bin": max_bin},
Xy,
num_boost_round=1,
)
# query cuts from GIDX/Ellpack
qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
tm.predictor_equal(Xy, qXy)
with pytest.raises(ValueError, match="Inconsistent"):
# max_bin changed.
xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)
# No error, DMatrix can be modified for different training session.
xgb.train(
{"tree_method": tree_method, "max_bin": max_bin - 1},
Xy,
num_boost_round=1,
)
# Init Ellpack/GIDX
Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin)
# Init GIDX/Ellpack
xgb.train(
{"tree_method": tree_method, "max_bin": max_bin},
Xy,
num_boost_round=1,
)
# query cuts from GIDX/Ellpack
qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
tm.predictor_equal(Xy, qXy)
with pytest.raises(ValueError, match="Inconsistent"):
# max_bin changed.
xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)
Xy = xgb.DMatrix(X[0], y[0], weight=w[0])
booster0 = xgb.train(
{"tree_method": "hist", "max_bin": max_bin, "max_depth": 4},
Xy,
num_boost_round=1,
)
booster1 = xgb.train(
{"tree_method": "gpu_hist", "max_bin": max_bin, "max_depth": 4},
Xy,
num_boost_round=1,
)
qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
predt0 = booster0.predict(qXy)
predt1 = booster1.predict(qXy)
np.testing.assert_allclose(predt0, predt1)
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.parametrize(
"tree_method,max_bin", [
("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64)
]
"tree_method,max_bin",
[("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64)],
)
def test_interoperability(self, tree_method: str, max_bin: int) -> None:
import cupy as cp
n_samples = 64
n_features = 3
X, y, w = tm.make_batches(
@@ -75,6 +144,7 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cupy())
def test_metainfo(self) -> None:
import cupy as cp
rng = cp.random.RandomState(1994)
rows = 10
@@ -98,6 +168,7 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cudf())
def test_ref_dmatrix(self) -> None:
import cupy as cp
rng = cp.random.RandomState(1994)
self.cputest.run_ref_dmatrix(rng, "gpu_hist", False)
@@ -158,5 +229,6 @@ class TestQuantileDMatrix:
@pytest.mark.skipif(**tm.no_cupy())
def test_check_inf(self) -> None:
import cupy as cp
rng = cp.random.default_rng(1994)
check_inf(rng)

View File

@@ -1,3 +1,4 @@
import json
import sys
import pytest
@@ -36,19 +37,16 @@ class TestGPUEvalMetrics:
Xy = xgboost.DMatrix(X, y, group=group)
cpu = xgboost.train(
booster = xgboost.train(
{"tree_method": "hist", "eval_metric": "auc", "objective": "rank:ndcg"},
Xy,
num_boost_round=10,
)
cpu_auc = float(cpu.eval(Xy).split(":")[1])
gpu = xgboost.train(
{"tree_method": "gpu_hist", "eval_metric": "auc", "objective": "rank:ndcg"},
Xy,
num_boost_round=10,
)
gpu_auc = float(gpu.eval(Xy).split(":")[1])
cpu_auc = float(booster.eval(Xy).split(":")[1])
booster.set_param({"gpu_id": "0"})
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
gpu_auc = float(booster.eval(Xy).split(":")[1])
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
np.testing.assert_allclose(cpu_auc, gpu_auc)

View File

@@ -153,12 +153,18 @@ class TestGPUUpdaters:
tm.dataset_strategy
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
def test_gpu_hist_device_dmatrix(
self, param: dict, num_rounds: int, dataset: tm.TestDataset
) -> None:
# We cannot handle empty dataset yet
assume(len(dataset.y) > 0)
param['tree_method'] = 'gpu_hist'
param = dataset.set_params(param)
result = train_result(param, dataset.get_device_dmat(), num_rounds)
result = train_result(
param,
dataset.get_device_dmat(max_bin=param.get("max_bin", None)),
num_rounds
)
note(result)
assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)

View File

@@ -21,8 +21,7 @@ class TestBasic:
assert not lazy_isinstance(a, 'numpy', 'dataframe')
def test_basic(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 2, 'eta': 1,
'objective': 'binary:logistic'}
# specify validations set to watch performance
@@ -61,8 +60,7 @@ class TestBasic:
def test_metric_config(self):
# Make sure that the metric configuration happens in booster so the
# string `['error', 'auc']` doesn't get passed down to core.
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
@@ -78,8 +76,7 @@ class TestBasic:
np.testing.assert_allclose(predt_0, predt_1)
def test_multiclass(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
# specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
@@ -188,7 +185,7 @@ class TestBasic:
assert dm.num_col() == cols
def test_cv(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'}
@@ -198,7 +195,7 @@ class TestBasic:
assert len(cv) == (4)
def test_cv_no_shuffle(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'}
@@ -209,7 +206,7 @@ class TestBasic:
assert len(cv) == (4)
def test_cv_explicit_fold_indices(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
'binary:logistic'}
folds = [
@@ -268,8 +265,7 @@ class TestBasicPathLike:
def test_DMatrix_init_from_path(self):
"""Initialization from the data path."""
dpath = Path('demo/data')
dtrain = xgb.DMatrix(dpath / 'agaricus.txt.train')
dtrain, _ = tm.load_agaricus(__file__)
assert dtrain.num_row() == 6513
assert dtrain.num_col() == 127

View File

@@ -42,8 +42,7 @@ class TestModels:
param = {'verbosity': 0, 'objective': 'binary:logistic',
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
'nthread': 1}
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
dtrain, dtest = tm.load_agaricus(__file__)
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 4
bst = xgb.train(param, dtrain, num_round, watchlist)
@@ -55,8 +54,7 @@ class TestModels:
assert err < 0.2
def test_dart(self):
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 5, 'objective': 'binary:logistic',
'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1}
# specify validations set to watch performance
@@ -122,7 +120,7 @@ class TestModels:
def test_boost_from_prediction(self):
# Re-construct dtrain here to avoid modification
margined = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
margined, _ = tm.load_agaricus(__file__)
bst = xgb.train({'tree_method': 'hist'}, margined, 1)
predt_0 = bst.predict(margined, output_margin=True)
margined.set_base_margin(predt_0)
@@ -130,13 +128,13 @@ class TestModels:
predt_1 = bst.predict(margined)
assert np.any(np.abs(predt_1 - predt_0) > 1e-6)
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtrain, _ = tm.load_agaricus(__file__)
bst = xgb.train({'tree_method': 'hist'}, dtrain, 2)
predt_2 = bst.predict(dtrain)
assert np.all(np.abs(predt_2 - predt_1) < 1e-6)
def test_boost_from_existing_model(self):
X = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
X, _ = tm.load_agaricus(__file__)
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4)
assert booster.num_boosted_rounds() == 4
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4,
@@ -156,8 +154,7 @@ class TestModels:
'objective': 'reg:logistic',
"tree_method": tree_method
}
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
dtrain, dtest = tm.load_agaricus(__file__)
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 10
@@ -203,8 +200,7 @@ class TestModels:
self.run_custom_objective()
def test_multi_eval_metric(self):
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
dtrain, dtest = tm.load_agaricus(__file__)
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1,
'objective': 'binary:logistic'}
@@ -226,7 +222,7 @@ class TestModels:
param['scale_pos_weight'] = ratio
return (dtrain, dtest, param)
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtrain, _ = tm.load_agaricus(__file__)
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'auc'}, seed=0, fpreproc=fpreproc)
@@ -234,7 +230,7 @@ class TestModels:
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'}
num_round = 2
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtrain, _ = tm.load_agaricus(__file__)
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'error'}, seed=0, show_stdv=False)
@@ -392,7 +388,7 @@ class TestModels:
os.remove(model_path)
try:
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtrain, _ = tm.load_agaricus(__file__)
xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
except ValueError as e:
e_str = str(e)

View File

@@ -275,9 +275,7 @@ class TestCallbacks:
"""Test learning rate scheduler, used by both CPU and GPU tests."""
scheduler = xgb.callback.LearningRateScheduler
dpath = tm.data_dir(__file__)
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
dtrain, dtest = tm.load_agaricus(__file__)
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 4
@@ -361,9 +359,7 @@ class TestCallbacks:
num_round = 4
scheduler = xgb.callback.LearningRateScheduler
dpath = tm.data_dir(__file__)
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
dtrain, dtest = tm.load_agaricus(__file__)
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
param = {

View File

@@ -283,7 +283,7 @@ class TestDMatrix:
assert m0.feature_types == m1.feature_types
def test_get_info(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtrain, _ = tm.load_agaricus(__file__)
dtrain.get_float_info('label')
dtrain.get_float_info('weight')
dtrain.get_float_info('base_margin')
@@ -432,7 +432,9 @@ class TestDMatrix:
def test_uri_categorical(self):
path = os.path.join(dpath, 'agaricus.txt.train')
feature_types = ["q"] * 5 + ["c"] + ["q"] * 120
Xy = xgb.DMatrix(path + "?indexing_mode=1", feature_types=feature_types)
Xy = xgb.DMatrix(
path + "?indexing_mode=1&format=libsvm", feature_types=feature_types
)
np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))
def test_base_margin(self):

View File

@@ -88,8 +88,12 @@ class TestInteractionConstraints:
def training_accuracy(self, tree_method):
"""Test accuracy, reused by GPU tests."""
from sklearn.metrics import accuracy_score
dtrain = xgboost.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1')
dtest = xgboost.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1')
dtrain = xgboost.DMatrix(
dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm"
)
dtest = xgboost.DMatrix(
dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm"
)
params = {
'eta': 1,
'max_depth': 6,

View File

@@ -134,8 +134,8 @@ class TestMonotoneConstraints:
@pytest.mark.skipif(**tm.no_sklearn())
def test_training_accuracy(self):
from sklearn.metrics import accuracy_score
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1')
dtrain = xgb.DMatrix(dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm")
dtest = xgb.DMatrix(dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm")
params = {'eta': 1, 'max_depth': 6, 'objective': 'binary:logistic',
'tree_method': 'hist', 'monotone_constraints': '(1, 0)'}
num_boost_round = 5

View File

@@ -13,9 +13,7 @@ pytestmark = tm.timeout(10)
class TestOMP:
def test_omp(self):
dpath = 'demo/data/'
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
dtrain, dtest = tm.load_agaricus(__file__)
param = {'booster': 'gbtree',
'objective': 'binary:logistic',

View File

@@ -13,7 +13,7 @@ rng = np.random.RandomState(1994)
class TestTreesToDataFrame:
def build_model(self, max_depth, num_round):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtrain, _ = tm.load_agaricus(__file__)
param = {'max_depth': max_depth, 'objective': 'binary:logistic',
'verbosity': 1}
num_round = num_round

View File

@@ -17,12 +17,10 @@ except ImportError:
pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(),
tm.no_graphviz()))
dpath = 'demo/data/agaricus.txt.train'
class TestPlotting:
def test_plotting(self):
m = xgb.DMatrix(dpath)
m, _ = tm.load_agaricus(__file__)
booster = xgb.train({'max_depth': 2, 'eta': 1,
'objective': 'binary:logistic'}, m,
num_boost_round=2)

View File

@@ -46,8 +46,8 @@ class TestSHAP:
fscores = bst.get_fscore()
assert scores1 == fscores
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train?format=libsvm')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test?format=libsvm')
def fn(max_depth, num_rounds):
# train

View File

@@ -154,9 +154,7 @@ class TestTreeMethod:
def test_hist_categorical(self):
# hist must be same as exact on all-categorial data
dpath = 'demo/data/'
ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
ag_dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
ag_dtrain, ag_dtest = tm.load_agaricus(__file__)
ag_param = {'max_depth': 2,
'tree_method': 'hist',
'eta': 1,

View File

@@ -222,7 +222,7 @@ class TestPandas:
set_base_margin_info(pd.DataFrame, xgb.DMatrix, "hist")
def test_cv_as_pandas(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': 'error'}

View File

@@ -176,7 +176,7 @@ def test_ranking():
def test_ranking_metric() -> None:
from sklearn.metrics import roc_auc_score
X, y, qid, w = tm.make_ltr(512, 4, 3, 2)
X, y, qid, w = tm.make_ltr(512, 4, 3, 1)
# use auc for test as ndcg_score in sklearn works only on label gain instead of exp
# gain.
# note that the auc in sklearn is different from the one in XGBoost. The one in

View File

@@ -192,6 +192,25 @@ def deterministic_repartition(
return X, y, m
@pytest.mark.parametrize("to_frame", [True, False])
def test_xgbclassifier_classes_type_and_value(to_frame: bool, client: "Client"):
X, y = make_classification(n_samples=1000, n_features=4, random_state=123)
if to_frame:
import pandas as pd
feats = [f"var_{i}" for i in range(4)]
df = pd.DataFrame(X, columns=feats)
df["target"] = y
df = dd.from_pandas(df, npartitions=1)
X, y = df[feats], df["target"]
else:
X = da.from_array(X)
y = da.from_array(y)
est = xgb.dask.DaskXGBClassifier(n_estimators=10).fit(X, y)
assert isinstance(est.classes_, np.ndarray)
np.testing.assert_array_equal(est.classes_, np.array([0, 1]))
def test_from_dask_dataframe() -> None:
with LocalCluster(n_workers=kWorkers, dashboard_address=":0") as cluster:
with Client(cluster) as client:

View File

@@ -1343,61 +1343,94 @@ class XgboostLocalTest(SparkTestCase):
SparkXGBClassifier(evals_result={})
class XgboostRankerLocalTest(SparkTestCase):
def setUp(self):
self.session.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "8")
self.ranker_df_train = self.session.createDataFrame(
[
(Vectors.dense(1.0, 2.0, 3.0), 0, 0),
(Vectors.dense(4.0, 5.0, 6.0), 1, 0),
(Vectors.dense(9.0, 4.0, 8.0), 2, 0),
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
],
["features", "label", "qid"],
)
self.ranker_df_test = self.session.createDataFrame(
[
(Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988),
(Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556),
(Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570),
(Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988),
(Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612),
(Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826),
],
["features", "qid", "expected_prediction"],
)
self.ranker_df_train_1 = self.session.createDataFrame(
[
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9),
(Vectors.dense(1.0, 2.0, 3.0), 0, 8),
(Vectors.dense(4.0, 5.0, 6.0), 1, 8),
(Vectors.dense(9.0, 4.0, 8.0), 2, 8),
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7),
(Vectors.dense(1.0, 2.0, 3.0), 0, 6),
(Vectors.dense(4.0, 5.0, 6.0), 1, 6),
(Vectors.dense(9.0, 4.0, 8.0), 2, 6),
]
* 4,
["features", "label", "qid"],
)
LTRData = namedtuple("LTRData", ("df_train", "df_test", "df_train_1"))
def test_ranker(self):
ranker = SparkXGBRanker(qid_col="qid")
@pytest.fixture
def ltr_data(spark: SparkSession) -> Generator[LTRData, None, None]:
spark.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "8")
ranker_df_train = spark.createDataFrame(
[
(Vectors.dense(1.0, 2.0, 3.0), 0, 0),
(Vectors.dense(4.0, 5.0, 6.0), 1, 0),
(Vectors.dense(9.0, 4.0, 8.0), 2, 0),
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1),
],
["features", "label", "qid"],
)
X_train = np.array(
[
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[9.0, 4.0, 8.0],
[np.NaN, 1.0, 5.5],
[np.NaN, 6.0, 7.5],
[np.NaN, 8.0, 9.5],
]
)
qid_train = np.array([0, 0, 0, 1, 1, 1])
y_train = np.array([0, 1, 2, 0, 1, 2])
X_test = np.array(
[
[1.5, 2.0, 3.0],
[4.5, 5.0, 6.0],
[9.0, 4.5, 8.0],
[np.NaN, 1.0, 6.0],
[np.NaN, 6.0, 7.0],
[np.NaN, 8.0, 10.5],
]
)
ltr = xgb.XGBRanker(tree_method="approx", objective="rank:pairwise")
ltr.fit(X_train, y_train, qid=qid_train)
predt = ltr.predict(X_test)
ranker_df_test = spark.createDataFrame(
[
(Vectors.dense(1.5, 2.0, 3.0), 0, float(predt[0])),
(Vectors.dense(4.5, 5.0, 6.0), 0, float(predt[1])),
(Vectors.dense(9.0, 4.5, 8.0), 0, float(predt[2])),
(Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, float(predt[3])),
(Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, float(predt[4])),
(Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, float(predt[5])),
],
["features", "qid", "expected_prediction"],
)
ranker_df_train_1 = spark.createDataFrame(
[
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9),
(Vectors.dense(1.0, 2.0, 3.0), 0, 8),
(Vectors.dense(4.0, 5.0, 6.0), 1, 8),
(Vectors.dense(9.0, 4.0, 8.0), 2, 8),
(Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7),
(Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7),
(Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7),
(Vectors.dense(1.0, 2.0, 3.0), 0, 6),
(Vectors.dense(4.0, 5.0, 6.0), 1, 6),
(Vectors.dense(9.0, 4.0, 8.0), 2, 6),
]
* 4,
["features", "label", "qid"],
)
yield LTRData(ranker_df_train, ranker_df_test, ranker_df_train_1)
class TestPySparkLocalLETOR:
def test_ranker(self, ltr_data: LTRData) -> None:
ranker = SparkXGBRanker(qid_col="qid", objective="rank:pairwise")
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
model = ranker.fit(self.ranker_df_train)
pred_result = model.transform(self.ranker_df_test).collect()
model = ranker.fit(ltr_data.df_train)
pred_result = model.transform(ltr_data.df_test).collect()
for row in pred_result:
assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3)
def test_ranker_qid_sorted(self):
ranker = SparkXGBRanker(qid_col="qid", num_workers=4)
assert ranker.getOrDefault(ranker.objective) == "rank:pairwise"
model = ranker.fit(self.ranker_df_train_1)
model.transform(self.ranker_df_test).collect()
def test_ranker_qid_sorted(self, ltr_data: LTRData) -> None:
ranker = SparkXGBRanker(qid_col="qid", num_workers=4, objective="rank:ndcg")
assert ranker.getOrDefault(ranker.objective) == "rank:ndcg"
model = ranker.fit(ltr_data.df_train_1)
model.transform(ltr_data.df_test).collect()