Support feature names/types for cudf. (#4902)

* Implement most of the pandas procedure for cudf except for type conversion. * Requires an array of interfaces in metainfo.
2019-09-29 15:07:51 -04:00
parent 2fa8b359e0
commit d30e63a0a5
6 changed files with 90 additions and 25 deletions
--- a/tests/cpp/data/test_metainfo.cu
+++ b/tests/cpp/data/test_metainfo.cu
@@ -31,9 +31,10 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out) {
        Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
        Json(Boolean(false))};
  column["data"] = j_data;
+  Json array(std::vector<Json>{column});

  std::stringstream ss;
-  Json::Dump(column, &ss);
+  Json::Dump(array, &ss);
  std::string str = ss.str();

  return str;
--- a/tests/python-gpu/test_from_columnar.py
+++ b/tests/python-gpu/test_from_columnar.py
@@ -42,6 +42,7 @@ Arrow specification.'''
    @pytest.mark.skipif(**tm.no_cudf())
    def test_from_cudf(self):
        '''Test constructing DMatrix from cudf'''
+        import cudf
        dmatrix_from_cudf(np.float32, np.NAN)
        dmatrix_from_cudf(np.float64, np.NAN)

@@ -52,3 +53,19 @@ Arrow specification.'''
        dmatrix_from_cudf(np.int8, 2)
        dmatrix_from_cudf(np.int32, -2)
        dmatrix_from_cudf(np.int64, -3)
+
+        cd = cudf.DataFrame({'x': [1, 2, 3], 'y': [0.1, 0.2, 0.3]})
+        dtrain = xgb.DMatrix(cd)
+
+        assert dtrain.feature_names == ['x', 'y']
+        assert dtrain.feature_types == ['int', 'float']
+
+        series = cudf.DataFrame({'x': [1, 2, 3]}).iloc[:, 0]
+        assert isinstance(series, cudf.Series)
+        dtrain = xgb.DMatrix(series)
+
+        assert dtrain.feature_names == ['x']
+        assert dtrain.feature_types == ['int']
+
+        with pytest.raises(Exception):
+            dtrain = xgb.DMatrix(cd, label=cd)