Move feature names and types of DMatrix from Python to C++. (#5858)

* Add thread local return entry for DMatrix.
* Save feature name and feature type in binary file.

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan
2020-07-07 09:40:13 +08:00
committed by GitHub
parent 4b0852ee41
commit 93c44a9a64
12 changed files with 451 additions and 84 deletions

View File

@@ -10,7 +10,6 @@
#include "../helpers.h"
#include "../../../src/common/io.h"
TEST(CAPI, XGDMatrixCreateFromMatDT) {
std::vector<int> col0 = {0, -1, 3};
std::vector<float> col1 = {-4.0f, 2.0f, 0.0f};
@@ -148,4 +147,48 @@ TEST(CAPI, CatchDMLCError) {
EXPECT_THROW({ dmlc::Stream::Create("foo", "r"); }, dmlc::Error);
}
TEST(CAPI, DMatrixSetFeatureName) {
size_t constexpr kRows = 10;
bst_feature_t constexpr kCols = 2;
DMatrixHandle handle;
std::vector<float> data(kCols * kRows, 1.5);
XGDMatrixCreateFromMat_omp(data.data(), kRows, kCols,
std::numeric_limits<float>::quiet_NaN(), &handle,
0);
std::vector<std::string> feature_names;
for (bst_feature_t i = 0; i < kCols; ++i) {
feature_names.emplace_back(std::to_string(i));
}
std::vector<char const*> c_feature_names;
c_feature_names.resize(feature_names.size());
std::transform(feature_names.cbegin(), feature_names.cend(),
c_feature_names.begin(),
[](auto const &str) { return str.c_str(); });
XGDMatrixSetStrFeatureInfo(handle, u8"feature_name", c_feature_names.data(),
c_feature_names.size());
bst_ulong out_len = 0;
char const **c_out_features;
XGDMatrixGetStrFeatureInfo(handle, u8"feature_name", &out_len,
&c_out_features);
CHECK_EQ(out_len, kCols);
std::vector<std::string> out_features;
for (bst_ulong i = 0; i < out_len; ++i) {
ASSERT_EQ(std::to_string(i), c_out_features[i]);
}
char const* feat_types [] {"i", "q"};
static_assert(sizeof(feat_types)/ sizeof(feat_types[0]) == kCols, "");
XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types, kCols);
char const **c_out_types;
XGDMatrixGetStrFeatureInfo(handle, u8"feature_type", &out_len,
&c_out_types);
for (bst_ulong i = 0; i < out_len; ++i) {
ASSERT_STREQ(feat_types[i], c_out_types[i]);
}
XGDMatrixFree(handle);
}
} // namespace xgboost