101 lines
3.1 KiB
Python
101 lines
3.1 KiB
Python
import json
|
|
import xgboost as xgb
|
|
import pytest
|
|
import tempfile
|
|
import sys
|
|
import numpy as np
|
|
import os
|
|
|
|
sys.path.append("tests/python")
|
|
import testing as tm # noqa
|
|
import test_with_sklearn as twskl # noqa
|
|
|
|
pytestmark = pytest.mark.skipif(**tm.no_sklearn())
|
|
|
|
rng = np.random.RandomState(1994)
|
|
|
|
|
|
def test_gpu_binary_classification():
|
|
from sklearn.datasets import load_digits
|
|
from sklearn.model_selection import KFold
|
|
|
|
digits = load_digits(n_class=2)
|
|
y = digits['target']
|
|
X = digits['data']
|
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
|
for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
|
|
for train_index, test_index in kf.split(X, y):
|
|
xgb_model = cls(
|
|
random_state=42, tree_method='gpu_hist',
|
|
n_estimators=4, gpu_id='0').fit(X[train_index], y[train_index])
|
|
preds = xgb_model.predict(X[test_index])
|
|
labels = y[test_index]
|
|
err = sum(1 for i in range(len(preds))
|
|
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
|
|
assert err < 0.1
|
|
|
|
|
|
def test_boost_from_prediction_gpu_hist():
|
|
twskl.run_boost_from_prediction('gpu_hist')
|
|
|
|
|
|
def test_num_parallel_tree():
|
|
twskl.run_boston_housing_rf_regression("gpu_hist")
|
|
|
|
|
|
@pytest.mark.skipif(**tm.no_pandas())
|
|
@pytest.mark.skipif(**tm.no_cudf())
|
|
@pytest.mark.skipif(**tm.no_sklearn())
|
|
def test_categorical():
|
|
import pandas as pd
|
|
import cudf
|
|
import cupy as cp
|
|
from sklearn.datasets import load_svmlight_file
|
|
|
|
data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
|
|
X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train"))
|
|
clf = xgb.XGBClassifier(
|
|
tree_method="gpu_hist",
|
|
use_label_encoder=False,
|
|
enable_categorical=True,
|
|
n_estimators=10,
|
|
)
|
|
X = pd.DataFrame(X.todense()).astype("category")
|
|
clf.fit(X, y)
|
|
|
|
with tempfile.TemporaryDirectory() as tempdir:
|
|
model = os.path.join(tempdir, "categorial.json")
|
|
clf.save_model(model)
|
|
|
|
with open(model) as fd:
|
|
categorical = json.load(fd)
|
|
categories_sizes = np.array(
|
|
categorical["learner"]["gradient_booster"]["model"]["trees"][0][
|
|
"categories_sizes"
|
|
]
|
|
)
|
|
assert categories_sizes.shape[0] != 0
|
|
np.testing.assert_allclose(categories_sizes, 1)
|
|
|
|
def check_predt(X, y):
|
|
reg = xgb.XGBRegressor(
|
|
tree_method="gpu_hist", enable_categorical=True, n_estimators=64
|
|
)
|
|
reg.fit(X, y)
|
|
predts = reg.predict(X)
|
|
booster = reg.get_booster()
|
|
assert "c" in booster.feature_types
|
|
assert len(booster.feature_types) == 1
|
|
inp_predts = booster.inplace_predict(X)
|
|
if isinstance(inp_predts, cp.ndarray):
|
|
inp_predts = cp.asnumpy(inp_predts)
|
|
np.testing.assert_allclose(predts, inp_predts)
|
|
|
|
y = [1, 2, 3]
|
|
X = pd.DataFrame({"f0": ["a", "b", "c"]})
|
|
X["f0"] = X["f0"].astype("category")
|
|
check_predt(X, y)
|
|
|
|
X = cudf.DataFrame(X)
|
|
check_predt(X, y)
|