Categorical data support for cuDF. (#7042)
* Add support in DMatrix. * Add support in DQM, except for iterator.
This commit is contained in:
@@ -171,6 +171,21 @@ Arrow specification.'''
|
||||
def test_cudf_metainfo_device_dmatrix(self):
|
||||
_test_cudf_metainfo(xgb.DeviceQuantileDMatrix)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_categorical(self):
|
||||
import cudf
|
||||
_X, _y = tm.make_categorical(100, 30, 17, False)
|
||||
X = cudf.from_pandas(_X)
|
||||
y = cudf.from_pandas(_y)
|
||||
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
assert len(Xy.feature_types) == X.shape[1]
|
||||
assert all(t == "categorical" for t in Xy.feature_types)
|
||||
|
||||
Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True)
|
||||
assert len(Xy.feature_types) == X.shape[1]
|
||||
assert all(t == "categorical" for t in Xy.feature_types)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
|
||||
@@ -43,22 +43,8 @@ class TestGPUUpdaters:
|
||||
assert tm.non_increasing(result['train'][dataset.metric])
|
||||
|
||||
def run_categorical_basic(self, rows, cols, rounds, cats):
|
||||
import pandas as pd
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
pd_dict = {}
|
||||
for i in range(cols):
|
||||
c = rng.randint(low=0, high=cats+1, size=rows)
|
||||
pd_dict[str(i)] = pd.Series(c, dtype=np.int64)
|
||||
|
||||
df = pd.DataFrame(pd_dict)
|
||||
label = df.iloc[:, 0]
|
||||
for i in range(0, cols-1):
|
||||
label += df.iloc[:, i]
|
||||
label += 1
|
||||
df = df.astype('category')
|
||||
onehot = pd.get_dummies(df)
|
||||
cat = df
|
||||
onehot, label = tm.make_categorical(rows, cols, cats, True)
|
||||
cat, _ = tm.make_categorical(rows, cols, cats, False)
|
||||
|
||||
by_etl_results = {}
|
||||
by_builtin_results = {}
|
||||
|
||||
Reference in New Issue
Block a user