Deprecate LabelEncoder in XGBClassifier; Enable cuDF/cuPy inputs in XGBClassifier (#6269)
* Deprecate LabelEncoder in XGBClassifier; skip LabelEncoder for cuDF/cuPy inputs * Add unit tests for cuDF and cuPy inputs with XGBClassifier * Fix lint * Clarify warning * Move use_label_encoder option to XGBClassifier constructor * Add a test for cudf.Series * Add use_label_encoder to XGBRFClassifier doc * Address reviewer feedback
This commit is contained in:
committed by
GitHub
parent
bcfab4d726
commit
c8ec62103a
@@ -172,6 +172,34 @@ Arrow specification.'''
|
||||
_test_cudf_metainfo(xgb.DeviceQuantileDMatrix)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_cudf_training_with_sklearn():
|
||||
from cudf import DataFrame as df
|
||||
from cudf import Series as ss
|
||||
import pandas as pd
|
||||
np.random.seed(1)
|
||||
X = pd.DataFrame(np.random.randn(50, 10))
|
||||
y = pd.DataFrame((np.random.randn(50) > 0).astype(np.int8))
|
||||
weights = np.random.random(50) + 1.0
|
||||
cudf_weights = df.from_pandas(pd.DataFrame(weights))
|
||||
base_margin = np.random.random(50)
|
||||
cudf_base_margin = df.from_pandas(pd.DataFrame(base_margin))
|
||||
|
||||
X_cudf = df.from_pandas(X)
|
||||
y_cudf = df.from_pandas(y)
|
||||
y_cudf_series = ss(data=y.iloc[:, 0])
|
||||
|
||||
for y_obj in [y_cudf, y_cudf_series]:
|
||||
clf = xgb.XGBClassifier(gpu_id=0, tree_method='gpu_hist', use_label_encoder=False)
|
||||
clf.fit(X_cudf, y_obj, sample_weight=cudf_weights, base_margin=cudf_base_margin,
|
||||
eval_set=[(X_cudf, y_obj)])
|
||||
pred = clf.predict(X_cudf)
|
||||
assert np.array_equal(np.unique(pred), np.array([0, 1]))
|
||||
|
||||
|
||||
class IterForDMatrixTest(xgb.core.DataIter):
|
||||
'''A data iterator for XGBoost DMatrix.
|
||||
|
||||
|
||||
@@ -108,6 +108,25 @@ def _test_cupy_metainfo(DMatrixT):
|
||||
dmat_cupy.get_uint_info('group_ptr'))
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_cupy_training_with_sklearn():
|
||||
import cupy as cp
|
||||
np.random.seed(1)
|
||||
cp.random.seed(1)
|
||||
X = cp.random.randn(50, 10, dtype='float32')
|
||||
y = (cp.random.randn(50, dtype='float32') > 0).astype('int8')
|
||||
weights = np.random.random(50) + 1
|
||||
cupy_weights = cp.array(weights)
|
||||
base_margin = np.random.random(50)
|
||||
cupy_base_margin = cp.array(base_margin)
|
||||
|
||||
clf = xgb.XGBClassifier(gpu_id=0, tree_method='gpu_hist', use_label_encoder=False)
|
||||
clf.fit(X, y, sample_weight=cupy_weights, base_margin=cupy_base_margin, eval_set=[(X, y)])
|
||||
pred = clf.predict(X)
|
||||
assert np.array_equal(np.unique(pred), np.array([0, 1]))
|
||||
|
||||
|
||||
class TestFromCupy:
|
||||
'''Tests for constructing DMatrix from data structure conforming Apache
|
||||
Arrow specification.'''
|
||||
|
||||
Reference in New Issue
Block a user