Device dmatrix (#5420)
This commit is contained in:
172
tests/python-gpu/test_from_cudf.py
Normal file
172
tests/python-gpu/test_from_cudf.py
Normal file
@@ -0,0 +1,172 @@
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
|
||||
|
||||
def dmatrix_from_cudf(input_type, DMatrixT, missing=np.NAN):
|
||||
'''Test constructing DMatrix from cudf'''
|
||||
import cudf
|
||||
import pandas as pd
|
||||
|
||||
kRows = 80
|
||||
kCols = 3
|
||||
|
||||
na = np.random.randn(kRows, kCols)
|
||||
na[:, 0:2] = na[:, 0:2].astype(input_type)
|
||||
|
||||
na[5, 0] = missing
|
||||
na[3, 1] = missing
|
||||
|
||||
pa = pd.DataFrame({'0': na[:, 0],
|
||||
'1': na[:, 1],
|
||||
'2': na[:, 2].astype(np.int32)})
|
||||
|
||||
np_label = np.random.randn(kRows).astype(input_type)
|
||||
pa_label = pd.DataFrame(np_label)
|
||||
|
||||
cd = cudf.from_pandas(pa)
|
||||
cd_label = cudf.from_pandas(pa_label).iloc[:, 0]
|
||||
|
||||
dtrain = DMatrixT(cd, missing=missing, label=cd_label)
|
||||
assert dtrain.num_col() == kCols
|
||||
assert dtrain.num_row() == kRows
|
||||
|
||||
|
||||
def _test_from_cudf(DMatrixT):
|
||||
'''Test constructing DMatrix from cudf'''
|
||||
import cudf
|
||||
dmatrix_from_cudf(np.float32, DMatrixT, np.NAN)
|
||||
dmatrix_from_cudf(np.float64, DMatrixT, np.NAN)
|
||||
|
||||
dmatrix_from_cudf(np.int8, DMatrixT, 2)
|
||||
dmatrix_from_cudf(np.int32, DMatrixT, -2)
|
||||
dmatrix_from_cudf(np.int64, DMatrixT, -3)
|
||||
|
||||
cd = cudf.DataFrame({'x': [1, 2, 3], 'y': [0.1, 0.2, 0.3]})
|
||||
dtrain = DMatrixT(cd)
|
||||
|
||||
assert dtrain.feature_names == ['x', 'y']
|
||||
assert dtrain.feature_types == ['int', 'float']
|
||||
|
||||
series = cudf.DataFrame({'x': [1, 2, 3]}).iloc[:, 0]
|
||||
assert isinstance(series, cudf.Series)
|
||||
dtrain = DMatrixT(series)
|
||||
|
||||
assert dtrain.feature_names == ['x']
|
||||
assert dtrain.feature_types == ['int']
|
||||
|
||||
with pytest.raises(Exception):
|
||||
dtrain = DMatrixT(cd, label=cd)
|
||||
|
||||
# Test when number of elements is less than 8
|
||||
X = cudf.DataFrame({'x': cudf.Series([0, 1, 2, np.NAN, 4],
|
||||
dtype=np.int32)})
|
||||
dtrain = DMatrixT(X)
|
||||
assert dtrain.num_col() == 1
|
||||
assert dtrain.num_row() == 5
|
||||
|
||||
# Boolean is not supported.
|
||||
X_boolean = cudf.DataFrame({'x': cudf.Series([True, False])})
|
||||
with pytest.raises(Exception):
|
||||
dtrain = DMatrixT(X_boolean)
|
||||
|
||||
y_boolean = cudf.DataFrame({
|
||||
'x': cudf.Series([True, False, True, True, True])})
|
||||
with pytest.raises(Exception):
|
||||
dtrain = DMatrixT(X_boolean, label=y_boolean)
|
||||
|
||||
|
||||
def _test_cudf_training(DMatrixT):
|
||||
from cudf import DataFrame as df
|
||||
import pandas as pd
|
||||
np.random.seed(1)
|
||||
X = pd.DataFrame(np.random.randn(50, 10))
|
||||
y = pd.DataFrame(np.random.randn(50))
|
||||
weights = np.random.random(50) + 1.0
|
||||
cudf_weights = df.from_pandas(pd.DataFrame(weights))
|
||||
base_margin = np.random.random(50)
|
||||
cudf_base_margin = df.from_pandas(pd.DataFrame(base_margin))
|
||||
|
||||
evals_result_cudf = {}
|
||||
dtrain_cudf = DMatrixT(df.from_pandas(X), df.from_pandas(y), weight=cudf_weights,
|
||||
base_margin=cudf_base_margin)
|
||||
params = {'gpu_id': 0, 'tree_method': 'gpu_hist'}
|
||||
xgb.train(params, dtrain_cudf, evals=[(dtrain_cudf, "train")],
|
||||
evals_result=evals_result_cudf)
|
||||
evals_result_np = {}
|
||||
dtrain_np = xgb.DMatrix(X, y, weight=weights, base_margin=base_margin)
|
||||
xgb.train(params, dtrain_np, evals=[(dtrain_np, "train")],
|
||||
evals_result=evals_result_np)
|
||||
assert np.array_equal(evals_result_cudf["train"]["rmse"], evals_result_np["train"]["rmse"])
|
||||
|
||||
|
||||
def _test_cudf_metainfo(DMatrixT):
|
||||
from cudf import DataFrame as df
|
||||
import pandas as pd
|
||||
n = 100
|
||||
X = np.random.random((n, 2))
|
||||
dmat_cudf = DMatrixT(df.from_pandas(pd.DataFrame(X)))
|
||||
dmat = xgb.DMatrix(X)
|
||||
floats = np.random.random(n)
|
||||
uints = np.array([4, 2, 8]).astype("uint32")
|
||||
cudf_floats = df.from_pandas(pd.DataFrame(floats))
|
||||
cudf_uints = df.from_pandas(pd.DataFrame(uints))
|
||||
dmat.set_float_info('weight', floats)
|
||||
dmat.set_float_info('label', floats)
|
||||
dmat.set_float_info('base_margin', floats)
|
||||
dmat.set_uint_info('group', uints)
|
||||
dmat_cudf.set_interface_info('weight', cudf_floats)
|
||||
dmat_cudf.set_interface_info('label', cudf_floats)
|
||||
dmat_cudf.set_interface_info('base_margin', cudf_floats)
|
||||
dmat_cudf.set_interface_info('group', cudf_uints)
|
||||
|
||||
# Test setting info with cudf DataFrame
|
||||
assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight'))
|
||||
assert np.array_equal(dmat.get_float_info('label'), dmat_cudf.get_float_info('label'))
|
||||
assert np.array_equal(dmat.get_float_info('base_margin'),
|
||||
dmat_cudf.get_float_info('base_margin'))
|
||||
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
|
||||
|
||||
# Test setting info with cudf Series
|
||||
dmat_cudf.set_interface_info('weight', cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_interface_info('label', cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_interface_info('base_margin', cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_interface_info('group', cudf_uints[cudf_uints.columns[0]])
|
||||
assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight'))
|
||||
assert np.array_equal(dmat.get_float_info('label'), dmat_cudf.get_float_info('label'))
|
||||
assert np.array_equal(dmat.get_float_info('base_margin'),
|
||||
dmat_cudf.get_float_info('base_margin'))
|
||||
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
|
||||
|
||||
|
||||
class TestFromColumnar:
|
||||
'''Tests for constructing DMatrix from data structure conforming Apache
|
||||
Arrow specification.'''
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_simple_dmatrix_from_cudf(self):
|
||||
_test_from_cudf(xgb.DMatrix)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_device_dmatrix_from_cudf(self):
|
||||
_test_from_cudf(xgb.DeviceQuantileDMatrix)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_cudf_training_simple_dmatrix(self):
|
||||
_test_cudf_training(xgb.DMatrix)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_cudf_training_device_dmatrix(self):
|
||||
_test_cudf_training(xgb.DeviceQuantileDMatrix)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_cudf_metainfo_simple_dmatrix(self):
|
||||
_test_cudf_metainfo(xgb.DMatrix)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_cudf_metainfo_device_dmatrix(self):
|
||||
_test_cudf_metainfo(xgb.DeviceQuantileDMatrix)
|
||||
Reference in New Issue
Block a user