Rework the precision metric. (#9222)

- Rework the precision metric for both CPU and GPU. - Mention it in the document. - Cleanup old support code for GPU ranking metric. - Deterministic GPU implementation. * Drop support for classification. * type. * use batch shape. * lint. * cpu build. * cpu build. * lint. * Tests. * Fix. * Cleanup error message.
2023-06-02 20:49:43 +08:00
parent db8288121d
commit 9fbde21e9d
22 changed files with 312 additions and 502 deletions
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -90,7 +90,7 @@ def check_cmd_print_failure_assistance(cmd: List[str]) -> bool:

    subprocess.run([cmd[0], "--version"])
    msg = """
-Please run the following command on your machine to address the formatting error:
+Please run the following command on your machine to address the error:

    """
    msg += " ".join(cmd)
--- a/tests/cpp/metric/test_rank_metric.h
+++ b/tests/cpp/metric/test_rank_metric.h
@@ -17,34 +17,30 @@
 #include "xgboost/host_device_vector.h"  // for HostDeviceVector
 #include "xgboost/json.h"                // for Json, String, Object

-namespace xgboost {
-namespace metric {
+namespace xgboost::metric {

 inline void VerifyPrecision(DataSplitMode data_split_mode = DataSplitMode::kRow) {
-  // When the limit for precision is not given, it takes the limit at
-  // std::numeric_limits<unsigned>::max(); hence all values are very small
-  // NOTE(AbdealiJK): Maybe this should be fixed to be num_row by default.
  auto ctx = xgboost::CreateEmptyGenericParam(GPUIDX);
-  xgboost::Metric * metric = xgboost::Metric::Create("pre", &ctx);
+  std::unique_ptr<xgboost::Metric> metric{Metric::Create("pre", &ctx)};
  ASSERT_STREQ(metric->Name(), "pre");
-  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0, 1e-7);
-  EXPECT_NEAR(GetMetricEval(metric,
-                            {0.1f, 0.9f, 0.1f, 0.9f},
-                            {  0,   0,   1,   1}, {}, {}, data_split_mode),
-              0, 1e-7);
+  EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5, 1e-7);
+  EXPECT_NEAR(
+      GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),
+      0.5, 1e-7);

-  delete metric;
-  metric = xgboost::Metric::Create("pre@2", &ctx);
+  metric.reset(xgboost::Metric::Create("pre@2", &ctx));
  ASSERT_STREQ(metric->Name(), "pre@2");
-  EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7);
-  EXPECT_NEAR(GetMetricEval(metric,
-                            {0.1f, 0.9f, 0.1f, 0.9f},
-                            {  0,   0,   1,   1}, {}, {}, data_split_mode),
-              0.5f, 0.001f);
+  EXPECT_NEAR(GetMetricEval(metric.get(), {0, 1}, {0, 1}, {}, {}, data_split_mode), 0.5f, 1e-7);
+  EXPECT_NEAR(
+      GetMetricEval(metric.get(), {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}, {}, {}, data_split_mode),
+      0.5f, 0.001f);

-  EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}, {}, {}, data_split_mode));
+  EXPECT_ANY_THROW(GetMetricEval(metric.get(), {0, 1}, {}, {}, {}, data_split_mode));

-  delete metric;
+  metric.reset(xgboost::Metric::Create("pre@4", &ctx));
+  EXPECT_NEAR(GetMetricEval(metric.get(), {0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
+                            {0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f}, {}, {}, data_split_mode),
+              0.5f, 1e-7);
 }

 inline void VerifyNDCG(DataSplitMode data_split_mode = DataSplitMode::kRow) {
@@ -187,5 +183,4 @@ inline void VerifyNDCGExpGain(DataSplitMode data_split_mode = DataSplitMode::kRo
  ndcg = metric->Evaluate(predt, p_fmat);
  ASSERT_NEAR(ndcg, 1.0, kRtEps);
 }
-}  // namespace metric
-}  // namespace xgboost
+}  // namespace xgboost::metric
--- a/tests/python-gpu/test_gpu_eval_metrics.py
+++ b/tests/python-gpu/test_gpu_eval_metrics.py
@@ -5,7 +5,7 @@ import pytest

 import xgboost
 from xgboost import testing as tm
-from xgboost.testing.metrics import check_quantile_error
+from xgboost.testing.metrics import check_precision_score, check_quantile_error

 sys.path.append("tests/python")
 import test_eval_metrics as test_em  # noqa
@@ -59,6 +59,9 @@ class TestGPUEvalMetrics:
    def test_pr_auc_ltr(self):
        self.cpu_test.run_pr_auc_ltr("gpu_hist")

+    def test_precision_score(self):
+        check_precision_score("gpu_hist")
+
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_quantile_error(self) -> None:
        check_quantile_error("gpu_hist")
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@@ -3,7 +3,7 @@ import pytest

 import xgboost as xgb
 from xgboost import testing as tm
-from xgboost.testing.metrics import check_quantile_error
+from xgboost.testing.metrics import check_precision_score, check_quantile_error

 rng = np.random.RandomState(1337)

@@ -315,6 +315,9 @@ class TestEvalMetrics:
    def test_pr_auc_ltr(self):
        self.run_pr_auc_ltr("hist")

+    def test_precision_score(self):
+        check_precision_score("hist")
+
    @pytest.mark.skipif(**tm.no_sklearn())
    def test_quantile_error(self) -> None:
        check_quantile_error("hist")
--- a/tests/python/test_quantile_dmatrix.py
+++ b/tests/python/test_quantile_dmatrix.py
@@ -55,6 +55,38 @@ class TestQuantileDMatrix:
        r = np.arange(1.0, n_samples)
        np.testing.assert_allclose(Xy.get_data().toarray()[1:, 0], r)

+    def test_error(self):
+        from sklearn.model_selection import train_test_split
+
+        rng = np.random.default_rng(1994)
+        X, y = make_categorical(
+            n_samples=128, n_features=2, n_categories=3, onehot=False
+        )
+        reg = xgb.XGBRegressor(tree_method="hist", enable_categorical=True)
+        w = rng.uniform(0, 1, size=y.shape[0])
+
+        X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(
+            X, y, w, random_state=1994
+        )
+
+        with pytest.raises(ValueError, match="sample weight"):
+            reg.fit(
+                X,
+                y,
+                sample_weight=w_train,
+                eval_set=[(X_test, y_test)],
+                sample_weight_eval_set=[w_test],
+            )
+
+        with pytest.raises(ValueError, match="sample weight"):
+            reg.fit(
+                X_train,
+                y_train,
+                sample_weight=w,
+                eval_set=[(X_test, y_test)],
+                sample_weight_eval_set=[w_test],
+            )
+
    @pytest.mark.parametrize("sparsity", [0.0, 0.1, 0.8, 0.9])
    def test_with_iterator(self, sparsity: float) -> None:
        n_samples_per_batch = 317