[SYCL] Implement UpdatePredictionCache and connect updater with leraner. (#10701)

--------- Co-authored-by: Dmitry Razdoburdin <>
2024-08-21 20:07:44 +02:00
parent 9b88495840
commit 24d225c1ab
11 changed files with 502 additions and 126 deletions
--- a/tests/cpp/plugin/test_sycl_hist_updater.cc
+++ b/tests/cpp/plugin/test_sycl_hist_updater.cc
@@ -21,10 +21,8 @@ class TestHistUpdater : public HistUpdater<GradientSumT> {
  TestHistUpdater(const Context* ctx,
                  ::sycl::queue qu,
                  const xgboost::tree::TrainParam& param,
-                  std::unique_ptr<TreeUpdater> pruner,
                  FeatureInteractionConstraintHost int_constraints_,
                  DMatrix const* fmat) : HistUpdater<GradientSumT>(ctx, qu, param,
-                                                                   std::move(pruner),
                                                                   int_constraints_, fmat) {}

  void TestInitSampling(const USMVector<GradientPair, MemoryType::on_device> &gpair,
@@ -110,14 +108,12 @@ void TestHistUpdaterSampling(const xgboost::tree::TrainParam& param) {

  DeviceManager device_manager;
  auto qu = device_manager.GetQueue(ctx.Device());
-  ObjInfo task{ObjInfo::kRegression};

  auto p_fmat = RandomDataGenerator{num_rows, num_columns, 0.0}.GenerateDMatrix();

  FeatureInteractionConstraintHost int_constraints;
-  std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};

-  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
+  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());

  USMVector<size_t, MemoryType::on_device> row_indices_0(&qu, num_rows);
  USMVector<size_t, MemoryType::on_device> row_indices_1(&qu, num_rows);
@@ -165,14 +161,12 @@ void TestHistUpdaterInitData(const xgboost::tree::TrainParam& param, bool has_ne

  DeviceManager device_manager;
  auto qu = device_manager.GetQueue(ctx.Device());
-  ObjInfo task{ObjInfo::kRegression};

  auto p_fmat = RandomDataGenerator{num_rows, num_columns, 0.0}.GenerateDMatrix();

  FeatureInteractionConstraintHost int_constraints;
-  std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};

-  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
+  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());

  USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
  GenerateRandomGPairs(&qu, gpair.Data(), num_rows, has_neg_hess);
@@ -221,14 +215,12 @@ void TestHistUpdaterBuildHistogramsLossGuide(const xgboost::tree::TrainParam& pa

  DeviceManager device_manager;
  auto qu = device_manager.GetQueue(ctx.Device());
-  ObjInfo task{ObjInfo::kRegression};

  auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();

  FeatureInteractionConstraintHost int_constraints;
-  std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};

-  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
+  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());

@@ -285,14 +277,12 @@ void TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sp

  DeviceManager device_manager;
  auto qu = device_manager.GetQueue(ctx.Device());
-  ObjInfo task{ObjInfo::kRegression};

  auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();

  FeatureInteractionConstraintHost int_constraints;
-  std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};

-  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
+  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());

@@ -345,14 +335,12 @@ void TestHistUpdaterEvaluateSplits(const xgboost::tree::TrainParam& param) {

  DeviceManager device_manager;
  auto qu = device_manager.GetQueue(ctx.Device());
-  ObjInfo task{ObjInfo::kRegression};

  auto p_fmat = RandomDataGenerator{num_rows, num_columns, 0.0f}.GenerateDMatrix();

  FeatureInteractionConstraintHost int_constraints;
-  std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};

-  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
+  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());

@@ -423,8 +411,6 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
  DeviceManager device_manager;
  auto qu = device_manager.GetQueue(ctx.Device());

-  ObjInfo task{ObjInfo::kRegression}; 
-
  auto p_fmat = RandomDataGenerator{num_rows, num_columns, sparsity}.GenerateDMatrix();
  sycl::DeviceMatrix dmat;
  dmat.Init(qu, p_fmat.get());
@@ -439,8 +425,7 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
  nodes.emplace_back(tree::ExpandEntry(0, tree.GetDepth(0)));

  FeatureInteractionConstraintHost int_constraints;
-  std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
-  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
+  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
  USMVector<GradientPair, MemoryType::on_device> gpair(&qu, num_rows);
  GenerateRandomGPairs(&qu, gpair.Data(), num_rows, false);

@@ -455,8 +440,7 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
  std::vector<size_t> row_indices_desired_host(num_rows);
  size_t n_left, n_right;
  {
-    std::unique_ptr<TreeUpdater> pruner4verification{TreeUpdater::Create("prune", &ctx, &task)};
-    TestHistUpdater<GradientSumT> updater4verification(&ctx, qu, param, std::move(pruner4verification), int_constraints, p_fmat.get());
+    TestHistUpdater<GradientSumT> updater4verification(&ctx, qu, param, int_constraints, p_fmat.get());
    auto* row_set_collection4verification = updater4verification.TestInitData(gmat, gpair, *p_fmat, tree);

    size_t n_nodes = nodes.size();
@@ -526,9 +510,7 @@ void TestHistUpdaterExpandWithLossGuide(const xgboost::tree::TrainParam& param)

  RegTree tree;
  FeatureInteractionConstraintHost int_constraints;
-  ObjInfo task{ObjInfo::kRegression};
-  std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
-  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
+  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
  auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);
@@ -576,9 +558,7 @@ void TestHistUpdaterExpandWithDepthWise(const xgboost::tree::TrainParam& param)

  RegTree tree;
  FeatureInteractionConstraintHost int_constraints;
-  ObjInfo task{ObjInfo::kRegression};
-  std::unique_ptr<TreeUpdater> pruner{TreeUpdater::Create("prune", &ctx, &task)};
-  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, std::move(pruner), int_constraints, p_fmat.get());
+  TestHistUpdater<GradientSumT> updater(&ctx, qu, param, int_constraints, p_fmat.get());
  updater.SetHistSynchronizer(new BatchHistSynchronizer<GradientSumT>());
  updater.SetHistRowsAdder(new BatchHistRowsAdder<GradientSumT>());
  auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree);
--- a/tests/cpp/plugin/test_sycl_prediction_cache.cc
+++ b/tests/cpp/plugin/test_sycl_prediction_cache.cc
@@ -0,0 +1,23 @@
+/**
+ * Copyright 2020-2024 by XGBoost contributors
+ */
+#include <gtest/gtest.h>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
+#pragma GCC diagnostic ignored "-W#pragma-messages"
+#include "../tree/test_prediction_cache.h"
+#pragma GCC diagnostic pop
+
+namespace xgboost::sycl::tree {
+
+class SyclPredictionCache : public xgboost::TestPredictionCache {};
+
+TEST_F(SyclPredictionCache, Hist) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+
+  this->RunTest(&ctx, "grow_quantile_histmaker_sycl", "one_output_per_tree");
+}
+
+}  // namespace xgboost::sycl::tree
--- a/tests/cpp/tree/test_prediction_cache.cc
+++ b/tests/cpp/tree/test_prediction_cache.cc
@@ -2,97 +2,10 @@
 * Copyright 2021-2023 by XGBoost contributors
 */
 #include <gtest/gtest.h>
-#include <xgboost/host_device_vector.h>
-#include <xgboost/tree_updater.h>

-#include <memory>
-
-#include "../../../src/tree/param.h"  // for TrainParam
-#include "../helpers.h"
-#include "xgboost/task.h"             // for ObjInfo
+#include "test_prediction_cache.h"

 namespace xgboost {
-
-class TestPredictionCache : public ::testing::Test {
-  std::shared_ptr<DMatrix> Xy_;
-  std::size_t n_samples_{2048};
-
- protected:
-  void SetUp() override {
-    std::size_t n_features = 13;
-    bst_target_t n_targets = 3;
-    Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.Targets(n_targets).GenerateDMatrix(true);
-  }
-
-  void RunLearnerTest(Context const* ctx, std::string updater_name, float subsample,
-                      std::string const& grow_policy, std::string const& strategy) {
-    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
-    learner->SetParam("device", ctx->DeviceName());
-    learner->SetParam("updater", updater_name);
-    learner->SetParam("multi_strategy", strategy);
-    learner->SetParam("grow_policy", grow_policy);
-    learner->SetParam("subsample", std::to_string(subsample));
-    learner->SetParam("nthread", "0");
-    learner->Configure();
-
-    for (size_t i = 0; i < 8; ++i) {
-      learner->UpdateOneIter(i, Xy_);
-    }
-
-    HostDeviceVector<float> out_prediction_cached;
-    learner->Predict(Xy_, false, &out_prediction_cached, 0, 0);
-
-    Json model{Object()};
-    learner->SaveModel(&model);
-
-    HostDeviceVector<float> out_prediction;
-    {
-      std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
-      learner->LoadModel(model);
-      learner->Predict(Xy_, false, &out_prediction, 0, 0);
-    }
-
-    auto const h_predt_cached = out_prediction_cached.ConstHostSpan();
-    auto const h_predt = out_prediction.ConstHostSpan();
-
-    ASSERT_EQ(h_predt.size(), h_predt_cached.size());
-    for (size_t i = 0; i < h_predt.size(); ++i) {
-      ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);
-    }
-  }
-
-  void RunTest(Context* ctx, std::string const& updater_name, std::string const& strategy) {
-    {
-      ctx->InitAllowUnknown(Args{{"nthread", "8"}});
-
-      ObjInfo task{ObjInfo::kRegression};
-      std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, ctx, &task)};
-      RegTree tree;
-      std::vector<RegTree*> trees{&tree};
-      auto gpair = GenerateRandomGradients(ctx, n_samples_, 1);
-      tree::TrainParam param;
-      param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
-
-      updater->Configure(Args{});
-      std::vector<HostDeviceVector<bst_node_t>> position(1);
-      updater->Update(&param, &gpair, Xy_.get(), position, trees);
-      HostDeviceVector<float> out_prediction_cached;
-      out_prediction_cached.SetDevice(ctx->Device());
-      out_prediction_cached.Resize(n_samples_);
-      auto cache =
-          linalg::MakeTensorView(ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);
-      ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), cache));
-    }
-
-    for (auto policy : {"depthwise", "lossguide"}) {
-      for (auto subsample : {1.0f, 0.4f}) {
-        this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
-        this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
-      }
-    }
-  }
-};
-
 TEST_F(TestPredictionCache, Approx) {
  Context ctx;
  this->RunTest(&ctx, "grow_histmaker", "one_output_per_tree");
@@ -119,4 +32,4 @@ TEST_F(TestPredictionCache, GpuApprox) {
  this->RunTest(&ctx, "grow_gpu_approx", "one_output_per_tree");
 }
 #endif  // defined(XGBOOST_USE_CUDA)
-}  // namespace xgboost
+}  // namespace xgboost
--- a/tests/cpp/tree/test_prediction_cache.h
+++ b/tests/cpp/tree/test_prediction_cache.h
@@ -0,0 +1,97 @@
+/**
+ * Copyright 2021-2024 by XGBoost contributors.
+ */
+#pragma once
+
+#include <gtest/gtest.h>
+
+#include <xgboost/host_device_vector.h>
+#include <xgboost/tree_updater.h>
+
+#include <memory>
+
+#include "../../../src/tree/param.h"  // for TrainParam
+#include "../helpers.h"
+#include "xgboost/task.h"             // for ObjInfo
+
+namespace xgboost {
+class TestPredictionCache : public ::testing::Test {
+  std::shared_ptr<DMatrix> Xy_;
+  std::size_t n_samples_{2048};
+
+ protected:
+  void SetUp() override {
+    std::size_t n_features = 13;
+    bst_target_t n_targets = 3;
+    Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.Targets(n_targets).GenerateDMatrix(true);
+  }
+
+  void RunLearnerTest(Context const* ctx, std::string updater_name, float subsample,
+                      std::string const& grow_policy, std::string const& strategy) {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
+    learner->SetParam("device", ctx->DeviceName());
+    learner->SetParam("updater", updater_name);
+    learner->SetParam("multi_strategy", strategy);
+    learner->SetParam("grow_policy", grow_policy);
+    learner->SetParam("subsample", std::to_string(subsample));
+    learner->SetParam("nthread", "0");
+    learner->Configure();
+
+    for (size_t i = 0; i < 8; ++i) {
+      learner->UpdateOneIter(i, Xy_);
+    }
+
+    HostDeviceVector<float> out_prediction_cached;
+    learner->Predict(Xy_, false, &out_prediction_cached, 0, 0);
+
+    Json model{Object()};
+    learner->SaveModel(&model);
+
+    HostDeviceVector<float> out_prediction;
+    {
+      std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
+      learner->LoadModel(model);
+      learner->Predict(Xy_, false, &out_prediction, 0, 0);
+    }
+
+    auto const h_predt_cached = out_prediction_cached.ConstHostSpan();
+    auto const h_predt = out_prediction.ConstHostSpan();
+
+    ASSERT_EQ(h_predt.size(), h_predt_cached.size());
+    for (size_t i = 0; i < h_predt.size(); ++i) {
+      ASSERT_NEAR(h_predt[i], h_predt_cached[i], kRtEps);
+    }
+  }
+
+  void RunTest(Context* ctx, std::string const& updater_name, std::string const& strategy) {
+    {
+      ctx->InitAllowUnknown(Args{{"nthread", "8"}});
+
+      ObjInfo task{ObjInfo::kRegression};
+      std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, ctx, &task)};
+      RegTree tree;
+      std::vector<RegTree*> trees{&tree};
+      auto gpair = GenerateRandomGradients(ctx, n_samples_, 1);
+      tree::TrainParam param;
+      param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
+
+      updater->Configure(Args{});
+      std::vector<HostDeviceVector<bst_node_t>> position(1);
+      updater->Update(&param, &gpair, Xy_.get(), position, trees);
+      HostDeviceVector<float> out_prediction_cached;
+      out_prediction_cached.SetDevice(ctx->Device());
+      out_prediction_cached.Resize(n_samples_);
+      auto cache =
+          linalg::MakeTensorView(ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);
+      ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), cache));
+    }
+
+    for (auto policy : {"depthwise", "lossguide"}) {
+      for (auto subsample : {1.0f, 0.4f}) {
+        this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
+        this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
+      }
+    }
+  }
+};
+}  // namespace xgboost
--- a/tests/python-sycl/test_sycl_training_continuation.py
+++ b/tests/python-sycl/test_sycl_training_continuation.py
@@ -0,0 +1,59 @@
+import numpy as np
+import xgboost as xgb
+import json
+
+rng = np.random.RandomState(1994)
+
+
+class TestSYCLTrainingContinuation:
+    def run_training_continuation(self, use_json):
+        kRows = 64
+        kCols = 32
+        X = np.random.randn(kRows, kCols)
+        y = np.random.randn(kRows)
+        dtrain = xgb.DMatrix(X, y)
+        params = {
+            "device": "sycl",
+            "max_depth": "2",
+            "gamma": "0.1",
+            "alpha": "0.01",
+            "enable_experimental_json_serialization": use_json,
+        }
+        bst_0 = xgb.train(params, dtrain, num_boost_round=64)
+        dump_0 = bst_0.get_dump(dump_format="json")
+
+        bst_1 = xgb.train(params, dtrain, num_boost_round=32)
+        bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
+        dump_1 = bst_1.get_dump(dump_format="json")
+
+        def recursive_compare(obj_0, obj_1):
+            if isinstance(obj_0, float):
+                assert np.isclose(obj_0, obj_1, atol=1e-6)
+            elif isinstance(obj_0, str):
+                assert obj_0 == obj_1
+            elif isinstance(obj_0, int):
+                assert obj_0 == obj_1
+            elif isinstance(obj_0, dict):
+                keys_0 = list(obj_0.keys())
+                keys_1 = list(obj_1.keys())
+                values_0 = list(obj_0.values())
+                values_1 = list(obj_1.values())
+                for i in range(len(obj_0.items())):
+                    assert keys_0[i] == keys_1[i]
+                    if list(obj_0.keys())[i] != "missing":
+                        recursive_compare(values_0[i], values_1[i])
+            else:
+                for i in range(len(obj_0)):
+                    recursive_compare(obj_0[i], obj_1[i])
+
+        assert len(dump_0) == len(dump_1)
+        for i in range(len(dump_0)):
+            obj_0 = json.loads(dump_0[i])
+            obj_1 = json.loads(dump_1[i])
+            recursive_compare(obj_0, obj_1)
+
+    def test_sycl_training_continuation_binary(self):
+        self.run_training_continuation(False)
+
+    def test_sycl_training_continuation_json(self):
+        self.run_training_continuation(True)
--- a/tests/python-sycl/test_sycl_updaters.py
+++ b/tests/python-sycl/test_sycl_updaters.py
@@ -0,0 +1,80 @@
+import numpy as np
+import gc
+import pytest
+import xgboost as xgb
+from hypothesis import given, strategies, assume, settings, note
+
+import sys
+import os
+
+# sys.path.append("tests/python")
+# import testing as tm
+from xgboost import testing as tm
+
+parameter_strategy = strategies.fixed_dictionaries(
+    {
+        "max_depth": strategies.integers(0, 11),
+        "max_leaves": strategies.integers(0, 256),
+        "max_bin": strategies.integers(2, 1024),
+        "grow_policy": strategies.sampled_from(["lossguide", "depthwise"]),
+        "single_precision_histogram": strategies.booleans(),
+        "min_child_weight": strategies.floats(0.5, 2.0),
+        "seed": strategies.integers(0, 10),
+        # We cannot enable subsampling as the training loss can increase
+        # 'subsample': strategies.floats(0.5, 1.0),
+        "colsample_bytree": strategies.floats(0.5, 1.0),
+        "colsample_bylevel": strategies.floats(0.5, 1.0),
+    }
+).filter(
+    lambda x: (x["max_depth"] > 0 or x["max_leaves"] > 0)
+    and (x["max_depth"] > 0 or x["grow_policy"] == "lossguide")
+)
+
+
+def train_result(param, dmat, num_rounds):
+    result = {}
+    xgb.train(
+        param,
+        dmat,
+        num_rounds,
+        [(dmat, "train")],
+        verbose_eval=False,
+        evals_result=result,
+    )
+    return result
+
+
+class TestSYCLUpdaters:
+    @given(parameter_strategy, strategies.integers(1, 5), tm.make_dataset_strategy())
+    @settings(deadline=None)
+    def test_sycl_hist(self, param, num_rounds, dataset):
+        param["tree_method"] = "hist"
+        param["device"] = "sycl"
+        param["verbosity"] = 0
+        param = dataset.set_params(param)
+        result = train_result(param, dataset.get_dmat(), num_rounds)
+        note(result)
+        assert tm.non_increasing(result["train"][dataset.metric])
+
+    @given(tm.make_dataset_strategy(), strategies.integers(0, 1))
+    @settings(deadline=None)
+    def test_specified_device_id_sycl_update(self, dataset, device_id):
+        # Read the list of sycl-devicese
+        sycl_ls = os.popen("sycl-ls").read()
+        devices = sycl_ls.split("\n")
+
+        # Test should launch only on gpu
+        # Find gpus in the list of devices
+        # and use the id in the list insteard of device_id
+        target_device_type = "opencl:gpu"
+        found_devices = 0
+        for idx in range(len(devices)):
+            if len(devices[idx]) >= len(target_device_type):
+                if devices[idx][1 : 1 + len(target_device_type)] == target_device_type:
+                    if found_devices == device_id:
+                        param = {"device": f"sycl:gpu:{idx}"}
+                        param = dataset.set_params(param)
+                        result = train_result(param, dataset.get_dmat(), 10)
+                        assert tm.non_increasing(result["train"][dataset.metric])
+                    else:
+                        found_devices += 1