Cleanup pandas support
This commit is contained in:
@@ -136,6 +136,51 @@ class TestBasic(unittest.TestCase):
|
||||
assert dm.num_row() == 2
|
||||
assert dm.num_col() == 3
|
||||
|
||||
df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
|
||||
dummies = pd.get_dummies(df)
|
||||
# B A_X A_Y A_Z
|
||||
# 0 1 1 0 0
|
||||
# 1 2 0 1 0
|
||||
# 2 3 0 0 1
|
||||
result, _, _ = xgb.core._maybe_pandas_data(dummies, None, None)
|
||||
exp = np.array([[ 1., 1., 0., 0.],
|
||||
[ 2., 0., 1., 0.],
|
||||
[ 3., 0., 0., 1.]])
|
||||
np.testing.assert_array_equal(result, exp)
|
||||
|
||||
dm = xgb.DMatrix(dummies)
|
||||
assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
|
||||
assert dm.feature_types == ['int', 'float', 'float', 'float']
|
||||
assert dm.num_row() == 3
|
||||
assert dm.num_col() == 4
|
||||
|
||||
df = pd.DataFrame({'A=1': [1, 2, 3], 'A=2': [4, 5, 6]})
|
||||
dm = xgb.DMatrix(df)
|
||||
assert dm.feature_names == ['A=1', 'A=2']
|
||||
assert dm.feature_types == ['int', 'int']
|
||||
assert dm.num_row() == 3
|
||||
assert dm.num_col() == 2
|
||||
|
||||
def test_pandas_label(self):
|
||||
import pandas as pd
|
||||
|
||||
# label must be a single column
|
||||
df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
|
||||
self.assertRaises(ValueError, xgb.core._maybe_pandas_label, df)
|
||||
|
||||
# label must be supported dtype
|
||||
df = pd.DataFrame({'A': np.array(['a', 'b', 'c'], dtype=object)})
|
||||
self.assertRaises(ValueError, xgb.core._maybe_pandas_label, df)
|
||||
|
||||
df = pd.DataFrame({'A': np.array([1, 2, 3], dtype=int)})
|
||||
result = xgb.core._maybe_pandas_label(df)
|
||||
np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]], dtype=float))
|
||||
|
||||
dm = xgb.DMatrix(np.random.randn(3, 2), label=df)
|
||||
assert dm.num_row() == 3
|
||||
assert dm.num_col() == 2
|
||||
|
||||
|
||||
def test_load_file_invalid(self):
|
||||
|
||||
self.assertRaises(ValueError, xgb.Booster,
|
||||
|
||||
Reference in New Issue
Block a user