Simplify the data backends. (#5893)
This commit is contained in:
@@ -12,11 +12,12 @@ import testing as tm
|
||||
class TestDeviceQuantileDMatrix(unittest.TestCase):
|
||||
def test_dmatrix_numpy_init(self):
|
||||
data = np.random.randn(5, 5)
|
||||
with pytest.raises(AssertionError, match='is not supported for DeviceQuantileDMatrix'):
|
||||
dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
|
||||
with pytest.raises(TypeError,
|
||||
match='is not supported for DeviceQuantileDMatrix'):
|
||||
xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_dmatrix_cupy_init(self):
|
||||
import cupy as cp
|
||||
data = cp.random.randn(5, 5)
|
||||
dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
|
||||
xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
|
||||
|
||||
@@ -119,10 +119,10 @@ def _test_cudf_metainfo(DMatrixT):
|
||||
dmat.set_float_info('label', floats)
|
||||
dmat.set_float_info('base_margin', floats)
|
||||
dmat.set_uint_info('group', uints)
|
||||
dmat_cudf.set_interface_info('weight', cudf_floats)
|
||||
dmat_cudf.set_interface_info('label', cudf_floats)
|
||||
dmat_cudf.set_interface_info('base_margin', cudf_floats)
|
||||
dmat_cudf.set_interface_info('group', cudf_uints)
|
||||
dmat_cudf.set_info(weight=cudf_floats)
|
||||
dmat_cudf.set_info(label=cudf_floats)
|
||||
dmat_cudf.set_info(base_margin=cudf_floats)
|
||||
dmat_cudf.set_info(group=cudf_uints)
|
||||
|
||||
# Test setting info with cudf DataFrame
|
||||
assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight'))
|
||||
@@ -132,10 +132,10 @@ def _test_cudf_metainfo(DMatrixT):
|
||||
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
|
||||
|
||||
# Test setting info with cudf Series
|
||||
dmat_cudf.set_interface_info('weight', cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_interface_info('label', cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_interface_info('base_margin', cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_interface_info('group', cudf_uints[cudf_uints.columns[0]])
|
||||
dmat_cudf.set_info(weight=cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_info(label=cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_info(base_margin=cudf_floats[cudf_floats.columns[0]])
|
||||
dmat_cudf.set_info(group=cudf_uints[cudf_uints.columns[0]])
|
||||
assert np.array_equal(dmat.get_float_info('weight'), dmat_cudf.get_float_info('weight'))
|
||||
assert np.array_equal(dmat.get_float_info('label'), dmat_cudf.get_float_info('label'))
|
||||
assert np.array_equal(dmat.get_float_info('base_margin'),
|
||||
|
||||
@@ -92,10 +92,10 @@ def _test_cupy_metainfo(DMatrixT):
|
||||
dmat.set_float_info('label', floats)
|
||||
dmat.set_float_info('base_margin', floats)
|
||||
dmat.set_uint_info('group', uints)
|
||||
dmat_cupy.set_interface_info('weight', cupy_floats)
|
||||
dmat_cupy.set_interface_info('label', cupy_floats)
|
||||
dmat_cupy.set_interface_info('base_margin', cupy_floats)
|
||||
dmat_cupy.set_interface_info('group', cupy_uints)
|
||||
dmat_cupy.set_info(weight=cupy_floats)
|
||||
dmat_cupy.set_info(label=cupy_floats)
|
||||
dmat_cupy.set_info(base_margin=cupy_floats)
|
||||
dmat_cupy.set_info(group=cupy_uints)
|
||||
|
||||
# Test setting info with cupy
|
||||
assert np.array_equal(dmat.get_float_info('weight'),
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
try:
|
||||
# python 2
|
||||
from StringIO import StringIO
|
||||
except ImportError:
|
||||
# python 3
|
||||
from io import StringIO
|
||||
from io import StringIO
|
||||
import numpy as np
|
||||
import os
|
||||
import xgboost as xgb
|
||||
import unittest
|
||||
import json
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
|
||||
dpath = 'demo/data/'
|
||||
rng = np.random.RandomState(1994)
|
||||
@@ -66,16 +63,19 @@ class TestBasic(unittest.TestCase):
|
||||
# error must be smaller than 10%
|
||||
assert err < 0.1
|
||||
|
||||
# save dmatrix into binary buffer
|
||||
dtest.save_binary('dtest.buffer')
|
||||
# save model
|
||||
bst.save_model('xgb.model')
|
||||
# load model and data in
|
||||
bst2 = xgb.Booster(model_file='xgb.model')
|
||||
dtest2 = xgb.DMatrix('dtest.buffer')
|
||||
preds2 = bst2.predict(dtest2)
|
||||
# assert they are the same
|
||||
assert np.sum(np.abs(preds2 - preds)) == 0
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
|
||||
# save dmatrix into binary buffer
|
||||
dtest.save_binary(dtest_path)
|
||||
# save model
|
||||
model_path = os.path.join(tmpdir, 'model.booster')
|
||||
bst.save_model(model_path)
|
||||
# load model and data in
|
||||
bst2 = xgb.Booster(model_file=model_path)
|
||||
dtest2 = xgb.DMatrix(dtest_path)
|
||||
preds2 = bst2.predict(dtest2)
|
||||
# assert they are the same
|
||||
assert np.sum(np.abs(preds2 - preds)) == 0
|
||||
|
||||
def test_record_results(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
|
||||
@@ -67,8 +67,7 @@ class TestPandas(unittest.TestCase):
|
||||
# 0 1 1 0 0
|
||||
# 1 2 0 1 0
|
||||
# 2 3 0 0 1
|
||||
pandas_handler = xgb.data.PandasHandler(np.nan, 0, False)
|
||||
result, _, _ = pandas_handler._maybe_pandas_data(dummies, None, None)
|
||||
result, _, _ = xgb.data._transform_pandas_df(dummies)
|
||||
exp = np.array([[1., 1., 0., 0.],
|
||||
[2., 0., 1., 0.],
|
||||
[3., 0., 0., 1.]])
|
||||
@@ -129,18 +128,17 @@ class TestPandas(unittest.TestCase):
|
||||
def test_pandas_label(self):
|
||||
# label must be a single column
|
||||
df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
|
||||
pandas_handler = xgb.data.PandasHandler(np.nan, 0, False)
|
||||
self.assertRaises(ValueError, pandas_handler._maybe_pandas_data, df,
|
||||
self.assertRaises(ValueError, xgb.data._transform_pandas_df, df,
|
||||
None, None, 'label', 'float')
|
||||
|
||||
# label must be supported dtype
|
||||
df = pd.DataFrame({'A': np.array(['a', 'b', 'c'], dtype=object)})
|
||||
self.assertRaises(ValueError, pandas_handler._maybe_pandas_data, df,
|
||||
self.assertRaises(ValueError, xgb.data._transform_pandas_df, df,
|
||||
None, None, 'label', 'float')
|
||||
|
||||
df = pd.DataFrame({'A': np.array([1, 2, 3], dtype=int)})
|
||||
result, _, _ = pandas_handler._maybe_pandas_data(df, None, None,
|
||||
'label', 'float')
|
||||
result, _, _ = xgb.data._transform_pandas_df(df, None, None,
|
||||
'label', 'float')
|
||||
np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]],
|
||||
dtype=float))
|
||||
dm = xgb.DMatrix(np.random.randn(3, 2), label=df)
|
||||
|
||||
Reference in New Issue
Block a user