Quantile DMatrix for CPU. (#8130)
- Add a new `QuantileDMatrix` that works for both CPU and GPU. - Deprecate `DeviceQuantileDMatrix`.
This commit is contained in:
@@ -1,4 +1,3 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import pytest
|
||||
@@ -6,16 +5,14 @@ import sys
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
import test_quantile_dmatrix as tqd
|
||||
|
||||
|
||||
class TestDeviceQuantileDMatrix:
|
||||
def test_dmatrix_numpy_init(self):
|
||||
data = np.random.randn(5, 5)
|
||||
with pytest.raises(TypeError, match='is not supported'):
|
||||
xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
|
||||
cputest = tqd.TestQuantileDMatrix()
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_dmatrix_feature_weights(self):
|
||||
def test_dmatrix_feature_weights(self) -> None:
|
||||
import cupy as cp
|
||||
rng = cp.random.RandomState(1994)
|
||||
data = rng.randn(5, 5)
|
||||
@@ -29,7 +26,7 @@ class TestDeviceQuantileDMatrix:
|
||||
feature_weights.astype(np.float32))
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_dmatrix_cupy_init(self):
|
||||
def test_dmatrix_cupy_init(self) -> None:
|
||||
import cupy as cp
|
||||
data = cp.random.randn(5, 5)
|
||||
xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
|
||||
@@ -55,3 +52,10 @@ class TestDeviceQuantileDMatrix:
|
||||
|
||||
cp.testing.assert_allclose(fw, got_fw)
|
||||
cp.testing.assert_allclose(labels, got_labels)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_ref_dmatrix(self) -> None:
|
||||
import cupy as cp
|
||||
rng = cp.random.RandomState(1994)
|
||||
self.cputest.run_ref_dmatrix(rng, "gpu_hist", False)
|
||||
|
||||
@@ -429,9 +429,10 @@ class TestDistributedGPU:
|
||||
sig = OrderedDict(signature(dxgb.DaskDMatrix).parameters)
|
||||
del sig["client"]
|
||||
ddm_names = list(sig.keys())
|
||||
sig = OrderedDict(signature(dxgb.DaskDeviceQuantileDMatrix).parameters)
|
||||
sig = OrderedDict(signature(dxgb.DaskQuantileDMatrix).parameters)
|
||||
del sig["client"]
|
||||
del sig["max_bin"]
|
||||
del sig["ref"]
|
||||
ddqdm_names = list(sig.keys())
|
||||
assert len(ddm_names) == len(ddqdm_names)
|
||||
|
||||
@@ -442,9 +443,10 @@ class TestDistributedGPU:
|
||||
sig = OrderedDict(signature(xgb.DMatrix).parameters)
|
||||
del sig["nthread"] # no nthread in dask
|
||||
dm_names = list(sig.keys())
|
||||
sig = OrderedDict(signature(xgb.DeviceQuantileDMatrix).parameters)
|
||||
sig = OrderedDict(signature(xgb.QuantileDMatrix).parameters)
|
||||
del sig["nthread"]
|
||||
del sig["max_bin"]
|
||||
del sig["ref"]
|
||||
dqdm_names = list(sig.keys())
|
||||
|
||||
# between single node
|
||||
@@ -499,7 +501,6 @@ class TestDistributedGPU:
|
||||
for arg in rabit_args:
|
||||
if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'):
|
||||
port_env = arg.decode('utf-8')
|
||||
port_env = arg.decode('utf-8')
|
||||
if arg.decode("utf-8").startswith("DMLC_TRACKER_URI"):
|
||||
uri_env = arg.decode("utf-8")
|
||||
port = port_env.split('=')
|
||||
|
||||
@@ -1,32 +1,12 @@
|
||||
import xgboost as xgb
|
||||
from xgboost.data import SingleBatchInternalIter as SingleBatch
|
||||
import numpy as np
|
||||
from testing import IteratorForTest, non_increasing
|
||||
from typing import Tuple, List
|
||||
from testing import IteratorForTest, non_increasing, make_batches
|
||||
import pytest
|
||||
from hypothesis import given, strategies, settings
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
|
||||
def make_batches(
|
||||
n_samples_per_batch: int, n_features: int, n_batches: int, use_cupy: bool = False
|
||||
) -> Tuple[List[np.ndarray], List[np.ndarray]]:
|
||||
X = []
|
||||
y = []
|
||||
if use_cupy:
|
||||
import cupy
|
||||
|
||||
rng = cupy.random.RandomState(1994)
|
||||
else:
|
||||
rng = np.random.RandomState(1994)
|
||||
for i in range(n_batches):
|
||||
_X = rng.randn(n_samples_per_batch, n_features)
|
||||
_y = rng.randn(n_samples_per_batch)
|
||||
X.append(_X)
|
||||
y.append(_y)
|
||||
return X, y
|
||||
|
||||
|
||||
def test_single_batch(tree_method: str = "approx") -> None:
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
@@ -111,8 +91,8 @@ def run_data_iterator(
|
||||
if not subsample:
|
||||
assert non_increasing(results_from_it["Train"]["rmse"])
|
||||
|
||||
X, y = it.as_arrays()
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
X, y, w = it.as_arrays()
|
||||
Xy = xgb.DMatrix(X, y, weight=w)
|
||||
assert Xy.num_row() == n_samples_per_batch * n_batches
|
||||
assert Xy.num_col() == n_features
|
||||
|
||||
|
||||
212
tests/python/test_quantile_dmatrix.py
Normal file
212
tests/python/test_quantile_dmatrix.py
Normal file
@@ -0,0 +1,212 @@
|
||||
from typing import Dict, List, Any
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy import sparse
|
||||
from testing import IteratorForTest, make_batches, make_batches_sparse, make_categorical
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
|
||||
class TestQuantileDMatrix:
|
||||
def test_basic(self) -> None:
|
||||
n_samples = 234
|
||||
n_features = 8
|
||||
|
||||
rng = np.random.default_rng()
|
||||
X = rng.normal(loc=0, scale=3, size=n_samples * n_features).reshape(
|
||||
n_samples, n_features
|
||||
)
|
||||
y = rng.normal(0, 3, size=n_samples)
|
||||
Xy = xgb.QuantileDMatrix(X, y)
|
||||
assert Xy.num_row() == n_samples
|
||||
assert Xy.num_col() == n_features
|
||||
|
||||
X = sparse.random(n_samples, n_features, density=0.1, format="csr")
|
||||
Xy = xgb.QuantileDMatrix(X, y)
|
||||
assert Xy.num_row() == n_samples
|
||||
assert Xy.num_col() == n_features
|
||||
|
||||
X = sparse.random(n_samples, n_features, density=0.8, format="csr")
|
||||
Xy = xgb.QuantileDMatrix(X, y)
|
||||
assert Xy.num_row() == n_samples
|
||||
assert Xy.num_col() == n_features
|
||||
|
||||
@pytest.mark.parametrize("sparsity", [0.0, 0.1, 0.8, 0.9])
|
||||
def test_with_iterator(self, sparsity: float) -> None:
|
||||
n_samples_per_batch = 317
|
||||
n_features = 8
|
||||
n_batches = 7
|
||||
|
||||
if sparsity == 0.0:
|
||||
it = IteratorForTest(
|
||||
*make_batches(n_samples_per_batch, n_features, n_batches, False), None
|
||||
)
|
||||
else:
|
||||
it = IteratorForTest(
|
||||
*make_batches_sparse(
|
||||
n_samples_per_batch, n_features, n_batches, sparsity
|
||||
),
|
||||
None
|
||||
)
|
||||
Xy = xgb.QuantileDMatrix(it)
|
||||
assert Xy.num_row() == n_samples_per_batch * n_batches
|
||||
assert Xy.num_col() == n_features
|
||||
|
||||
@pytest.mark.parametrize("sparsity", [0.0, 0.1, 0.5, 0.8, 0.9])
|
||||
def test_training(self, sparsity: float) -> None:
|
||||
n_samples_per_batch = 317
|
||||
n_features = 8
|
||||
n_batches = 7
|
||||
if sparsity == 0.0:
|
||||
it = IteratorForTest(
|
||||
*make_batches(n_samples_per_batch, n_features, n_batches, False), None
|
||||
)
|
||||
else:
|
||||
it = IteratorForTest(
|
||||
*make_batches_sparse(
|
||||
n_samples_per_batch, n_features, n_batches, sparsity
|
||||
),
|
||||
None
|
||||
)
|
||||
|
||||
parameters = {"tree_method": "hist", "max_bin": 256}
|
||||
Xy_it = xgb.QuantileDMatrix(it, max_bin=parameters["max_bin"])
|
||||
from_it = xgb.train(parameters, Xy_it)
|
||||
|
||||
X, y, w = it.as_arrays()
|
||||
w_it = Xy_it.get_weight()
|
||||
np.testing.assert_allclose(w_it, w)
|
||||
|
||||
Xy_arr = xgb.DMatrix(X, y, weight=w)
|
||||
from_arr = xgb.train(parameters, Xy_arr)
|
||||
|
||||
np.testing.assert_allclose(from_arr.predict(Xy_it), from_it.predict(Xy_arr))
|
||||
|
||||
y -= y.min()
|
||||
y += 0.01
|
||||
Xy = xgb.QuantileDMatrix(X, y, weight=w)
|
||||
with pytest.raises(ValueError, match=r"Only.*hist.*"):
|
||||
parameters = {
|
||||
"tree_method": "approx",
|
||||
"max_bin": 256,
|
||||
"objective": "reg:gamma",
|
||||
}
|
||||
xgb.train(parameters, Xy)
|
||||
|
||||
def run_ref_dmatrix(self, rng: Any, tree_method: str, enable_cat: bool) -> None:
|
||||
n_samples, n_features = 2048, 17
|
||||
if enable_cat:
|
||||
X, y = make_categorical(
|
||||
n_samples, n_features, n_categories=13, onehot=False
|
||||
)
|
||||
if tree_method == "gpu_hist":
|
||||
import cudf
|
||||
X = cudf.from_pandas(X)
|
||||
y = cudf.from_pandas(y)
|
||||
else:
|
||||
X = rng.normal(loc=0, scale=3, size=n_samples * n_features).reshape(
|
||||
n_samples, n_features
|
||||
)
|
||||
y = rng.normal(0, 3, size=n_samples)
|
||||
|
||||
# Use ref
|
||||
Xy = xgb.QuantileDMatrix(X, y, enable_categorical=enable_cat)
|
||||
Xy_valid = xgb.QuantileDMatrix(X, y, ref=Xy, enable_categorical=enable_cat)
|
||||
qdm_results: Dict[str, Dict[str, List[float]]] = {}
|
||||
xgb.train(
|
||||
{"tree_method": tree_method},
|
||||
Xy,
|
||||
evals=[(Xy, "Train"), (Xy_valid, "valid")],
|
||||
evals_result=qdm_results,
|
||||
)
|
||||
np.testing.assert_allclose(
|
||||
qdm_results["Train"]["rmse"], qdm_results["valid"]["rmse"]
|
||||
)
|
||||
# No ref
|
||||
Xy_valid = xgb.QuantileDMatrix(X, y, enable_categorical=enable_cat)
|
||||
qdm_results = {}
|
||||
xgb.train(
|
||||
{"tree_method": tree_method},
|
||||
Xy,
|
||||
evals=[(Xy, "Train"), (Xy_valid, "valid")],
|
||||
evals_result=qdm_results,
|
||||
)
|
||||
np.testing.assert_allclose(
|
||||
qdm_results["Train"]["rmse"], qdm_results["valid"]["rmse"]
|
||||
)
|
||||
|
||||
# Different number of features
|
||||
Xy = xgb.QuantileDMatrix(X, y, enable_categorical=enable_cat)
|
||||
dXy = xgb.DMatrix(X, y, enable_categorical=enable_cat)
|
||||
|
||||
n_samples, n_features = 256, 15
|
||||
X = rng.normal(loc=0, scale=3, size=n_samples * n_features).reshape(
|
||||
n_samples, n_features
|
||||
)
|
||||
y = rng.normal(0, 3, size=n_samples)
|
||||
with pytest.raises(ValueError, match=r".*features\."):
|
||||
xgb.QuantileDMatrix(X, y, ref=Xy, enable_categorical=enable_cat)
|
||||
|
||||
# Compare training results
|
||||
n_samples, n_features = 256, 17
|
||||
if enable_cat:
|
||||
X, y = make_categorical(n_samples, n_features, 13, onehot=False)
|
||||
if tree_method == "gpu_hist":
|
||||
import cudf
|
||||
X = cudf.from_pandas(X)
|
||||
y = cudf.from_pandas(y)
|
||||
else:
|
||||
X = rng.normal(loc=0, scale=3, size=n_samples * n_features).reshape(
|
||||
n_samples, n_features
|
||||
)
|
||||
y = rng.normal(0, 3, size=n_samples)
|
||||
Xy_valid = xgb.QuantileDMatrix(X, y, ref=Xy, enable_categorical=enable_cat)
|
||||
# use DMatrix as ref
|
||||
Xy_valid_d = xgb.QuantileDMatrix(X, y, ref=dXy, enable_categorical=enable_cat)
|
||||
dXy_valid = xgb.DMatrix(X, y, enable_categorical=enable_cat)
|
||||
|
||||
qdm_results = {}
|
||||
xgb.train(
|
||||
{"tree_method": tree_method},
|
||||
Xy,
|
||||
evals=[(Xy, "Train"), (Xy_valid, "valid")],
|
||||
evals_result=qdm_results,
|
||||
)
|
||||
|
||||
dm_results: Dict[str, Dict[str, List[float]]] = {}
|
||||
xgb.train(
|
||||
{"tree_method": tree_method},
|
||||
dXy,
|
||||
evals=[(dXy, "Train"), (dXy_valid, "valid"), (Xy_valid_d, "dvalid")],
|
||||
evals_result=dm_results,
|
||||
)
|
||||
np.testing.assert_allclose(
|
||||
dm_results["Train"]["rmse"], qdm_results["Train"]["rmse"]
|
||||
)
|
||||
np.testing.assert_allclose(
|
||||
dm_results["valid"]["rmse"], qdm_results["valid"]["rmse"]
|
||||
)
|
||||
np.testing.assert_allclose(
|
||||
dm_results["dvalid"]["rmse"], qdm_results["valid"]["rmse"]
|
||||
)
|
||||
|
||||
def test_ref_dmatrix(self) -> None:
|
||||
rng = np.random.RandomState(1994)
|
||||
self.run_ref_dmatrix(rng, "hist", True)
|
||||
self.run_ref_dmatrix(rng, "hist", False)
|
||||
|
||||
def test_predict(self) -> None:
|
||||
n_samples, n_features = 16, 2
|
||||
X, y = make_categorical(
|
||||
n_samples, n_features, n_categories=13, onehot=False
|
||||
)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
|
||||
booster = xgb.train({"tree_method": "hist"}, Xy)
|
||||
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
a = booster.predict(Xy)
|
||||
qXy = xgb.QuantileDMatrix(X, y, enable_categorical=True)
|
||||
b = booster.predict(qXy)
|
||||
np.testing.assert_allclose(a, b)
|
||||
@@ -1382,6 +1382,42 @@ class TestWithDask:
|
||||
num_rounds = 30
|
||||
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
||||
|
||||
def test_quantile_dmatrix(self, client: Client) -> None:
|
||||
X, y = make_categorical(client, 10000, 30, 13)
|
||||
|
||||
Xy = xgb.dask.DaskDMatrix(client, X, y, enable_categorical=True)
|
||||
valid_Xy = xgb.dask.DaskDMatrix(client, X, y, enable_categorical=True)
|
||||
|
||||
output = xgb.dask.train(
|
||||
client,
|
||||
{"tree_method": "hist"},
|
||||
Xy,
|
||||
num_boost_round=10,
|
||||
evals=[(Xy, "Train"), (valid_Xy, "Valid")]
|
||||
)
|
||||
dmatrix_hist = output["history"]
|
||||
|
||||
Xy = xgb.dask.DaskQuantileDMatrix(client, X, y, enable_categorical=True)
|
||||
valid_Xy = xgb.dask.DaskQuantileDMatrix(
|
||||
client, X, y, enable_categorical=True, ref=Xy
|
||||
)
|
||||
|
||||
output = xgb.dask.train(
|
||||
client,
|
||||
{"tree_method": "hist"},
|
||||
Xy,
|
||||
num_boost_round=10,
|
||||
evals=[(Xy, "Train"), (valid_Xy, "Valid")]
|
||||
)
|
||||
quantile_hist = output["history"]
|
||||
|
||||
np.testing.assert_allclose(
|
||||
quantile_hist["Train"]["rmse"], dmatrix_hist["Train"]["rmse"]
|
||||
)
|
||||
np.testing.assert_allclose(
|
||||
quantile_hist["Valid"]["rmse"], dmatrix_hist["Valid"]["rmse"]
|
||||
)
|
||||
|
||||
@given(params=exact_parameter_strategy,
|
||||
dataset=tm.dataset_strategy)
|
||||
@settings(deadline=None, suppress_health_check=suppress, print_blob=True)
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import os
|
||||
import multiprocessing
|
||||
from typing import Tuple, Union
|
||||
from typing import Tuple, Union, List, Sequence, Callable
|
||||
import urllib
|
||||
import zipfile
|
||||
import sys
|
||||
from typing import Optional
|
||||
from typing import Optional, Dict, Any
|
||||
from contextlib import contextmanager
|
||||
from io import StringIO
|
||||
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
||||
@@ -180,79 +180,148 @@ def skip_s390x():
|
||||
|
||||
|
||||
class IteratorForTest(xgb.core.DataIter):
|
||||
def __init__(self, X, y):
|
||||
def __init__(
|
||||
self,
|
||||
X: Sequence,
|
||||
y: Sequence,
|
||||
w: Optional[Sequence],
|
||||
cache: Optional[str] = "./"
|
||||
) -> None:
|
||||
assert len(X) == len(y)
|
||||
self.X = X
|
||||
self.y = y
|
||||
self.w = w
|
||||
self.it = 0
|
||||
super().__init__("./")
|
||||
super().__init__(cache)
|
||||
|
||||
def next(self, input_data):
|
||||
def next(self, input_data: Callable) -> int:
|
||||
if self.it == len(self.X):
|
||||
return 0
|
||||
# Use copy to make sure the iterator doesn't hold a reference to the data.
|
||||
input_data(data=self.X[self.it].copy(), label=self.y[self.it].copy())
|
||||
gc.collect() # clear up the copy, see if XGBoost access freed memory.
|
||||
input_data(
|
||||
data=self.X[self.it].copy(),
|
||||
label=self.y[self.it].copy(),
|
||||
weight=self.w[self.it].copy() if self.w else None,
|
||||
)
|
||||
gc.collect() # clear up the copy, see if XGBoost access freed memory.
|
||||
self.it += 1
|
||||
return 1
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
self.it = 0
|
||||
|
||||
def as_arrays(self):
|
||||
X = np.concatenate(self.X, axis=0)
|
||||
def as_arrays(
|
||||
self,
|
||||
) -> Tuple[Union[np.ndarray, sparse.csr_matrix], np.ndarray, np.ndarray]:
|
||||
if isinstance(self.X[0], sparse.csr_matrix):
|
||||
X = sparse.vstack(self.X, format="csr")
|
||||
else:
|
||||
X = np.concatenate(self.X, axis=0)
|
||||
y = np.concatenate(self.y, axis=0)
|
||||
return X, y
|
||||
w = np.concatenate(self.w, axis=0)
|
||||
return X, y, w
|
||||
|
||||
|
||||
def make_batches(
|
||||
n_samples_per_batch: int, n_features: int, n_batches: int, use_cupy: bool = False
|
||||
) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
|
||||
X = []
|
||||
y = []
|
||||
w = []
|
||||
if use_cupy:
|
||||
import cupy
|
||||
|
||||
rng = cupy.random.RandomState(1994)
|
||||
else:
|
||||
rng = np.random.RandomState(1994)
|
||||
for i in range(n_batches):
|
||||
_X = rng.randn(n_samples_per_batch, n_features)
|
||||
_y = rng.randn(n_samples_per_batch)
|
||||
_w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
|
||||
X.append(_X)
|
||||
y.append(_y)
|
||||
w.append(_w)
|
||||
return X, y, w
|
||||
|
||||
|
||||
def make_batches_sparse(
|
||||
n_samples_per_batch: int, n_features: int, n_batches: int, sparsity: float
|
||||
) -> Tuple[List[sparse.csr_matrix], List[np.ndarray], List[np.ndarray]]:
|
||||
X = []
|
||||
y = []
|
||||
w = []
|
||||
rng = np.random.RandomState(1994)
|
||||
for i in range(n_batches):
|
||||
_X = sparse.random(
|
||||
n_samples_per_batch,
|
||||
n_features,
|
||||
1.0 - sparsity,
|
||||
format="csr",
|
||||
dtype=np.float32,
|
||||
random_state=rng,
|
||||
)
|
||||
_y = rng.randn(n_samples_per_batch)
|
||||
_w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
|
||||
X.append(_X)
|
||||
y.append(_y)
|
||||
w.append(_w)
|
||||
return X, y, w
|
||||
|
||||
|
||||
# Contains a dataset in numpy format as well as the relevant objective and metric
|
||||
class TestDataset:
|
||||
def __init__(self, name, get_dataset, objective, metric):
|
||||
def __init__(
|
||||
self, name: str, get_dataset: Callable, objective: str, metric: str
|
||||
) -> None:
|
||||
self.name = name
|
||||
self.objective = objective
|
||||
self.metric = metric
|
||||
self.X, self.y = get_dataset()
|
||||
self.w = None
|
||||
self.w: Optional[np.ndarray] = None
|
||||
self.margin: Optional[np.ndarray] = None
|
||||
|
||||
def set_params(self, params_in):
|
||||
def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]:
|
||||
params_in['objective'] = self.objective
|
||||
params_in['eval_metric'] = self.metric
|
||||
if self.objective == "multi:softmax":
|
||||
params_in["num_class"] = int(np.max(self.y) + 1)
|
||||
return params_in
|
||||
|
||||
def get_dmat(self):
|
||||
def get_dmat(self) -> xgb.DMatrix:
|
||||
return xgb.DMatrix(
|
||||
self.X, self.y, self.w, base_margin=self.margin, enable_categorical=True
|
||||
)
|
||||
|
||||
def get_device_dmat(self):
|
||||
def get_device_dmat(self) -> xgb.DeviceQuantileDMatrix:
|
||||
w = None if self.w is None else cp.array(self.w)
|
||||
X = cp.array(self.X, dtype=np.float32)
|
||||
y = cp.array(self.y, dtype=np.float32)
|
||||
return xgb.DeviceQuantileDMatrix(X, y, w, base_margin=self.margin)
|
||||
|
||||
def get_external_dmat(self):
|
||||
def get_external_dmat(self) -> xgb.DMatrix:
|
||||
n_samples = self.X.shape[0]
|
||||
n_batches = 10
|
||||
per_batch = n_samples // n_batches + 1
|
||||
|
||||
predictor = []
|
||||
response = []
|
||||
weight = []
|
||||
for i in range(n_batches):
|
||||
beg = i * per_batch
|
||||
end = min((i + 1) * per_batch, n_samples)
|
||||
assert end != beg
|
||||
X = self.X[beg: end, ...]
|
||||
y = self.y[beg: end]
|
||||
w = self.w[beg: end] if self.w is not None else None
|
||||
predictor.append(X)
|
||||
response.append(y)
|
||||
if w is not None:
|
||||
weight.append(w)
|
||||
|
||||
it = IteratorForTest(predictor, response)
|
||||
it = IteratorForTest(predictor, response, weight if weight else None)
|
||||
return xgb.DMatrix(it)
|
||||
|
||||
def __repr__(self):
|
||||
def __repr__(self) -> str:
|
||||
return self.name
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
import xgboost as xgb
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
Reference in New Issue
Block a user