Support feature names/types for cudf. (#4902)
* Implement most of the pandas procedure for cudf except for type conversion. * Requires an array of interfaces in metainfo.
This commit is contained in:
@@ -31,9 +31,10 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out) {
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
|
||||
Json(Boolean(false))};
|
||||
column["data"] = j_data;
|
||||
Json array(std::vector<Json>{column});
|
||||
|
||||
std::stringstream ss;
|
||||
Json::Dump(column, &ss);
|
||||
Json::Dump(array, &ss);
|
||||
std::string str = ss.str();
|
||||
|
||||
return str;
|
||||
|
||||
@@ -42,6 +42,7 @@ Arrow specification.'''
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_from_cudf(self):
|
||||
'''Test constructing DMatrix from cudf'''
|
||||
import cudf
|
||||
dmatrix_from_cudf(np.float32, np.NAN)
|
||||
dmatrix_from_cudf(np.float64, np.NAN)
|
||||
|
||||
@@ -52,3 +53,19 @@ Arrow specification.'''
|
||||
dmatrix_from_cudf(np.int8, 2)
|
||||
dmatrix_from_cudf(np.int32, -2)
|
||||
dmatrix_from_cudf(np.int64, -3)
|
||||
|
||||
cd = cudf.DataFrame({'x': [1, 2, 3], 'y': [0.1, 0.2, 0.3]})
|
||||
dtrain = xgb.DMatrix(cd)
|
||||
|
||||
assert dtrain.feature_names == ['x', 'y']
|
||||
assert dtrain.feature_types == ['int', 'float']
|
||||
|
||||
series = cudf.DataFrame({'x': [1, 2, 3]}).iloc[:, 0]
|
||||
assert isinstance(series, cudf.Series)
|
||||
dtrain = xgb.DMatrix(series)
|
||||
|
||||
assert dtrain.feature_names == ['x']
|
||||
assert dtrain.feature_types == ['int']
|
||||
|
||||
with pytest.raises(Exception):
|
||||
dtrain = xgb.DMatrix(cd, label=cd)
|
||||
|
||||
Reference in New Issue
Block a user