Refactor tests with data generator. (#5439)
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
|
||||
#include "helpers.h"
|
||||
#include "xgboost/c_api.h"
|
||||
|
||||
#include "../../src/data/adapter.h"
|
||||
#include "../../src/gbm/gbtree_model.h"
|
||||
#include "xgboost/predictor.h"
|
||||
|
||||
@@ -155,26 +155,112 @@ SimpleLCG::StateType SimpleLCG::Max() const {
|
||||
return max_value_;
|
||||
}
|
||||
|
||||
std::shared_ptr<xgboost::DMatrix>* CreateDMatrix(int rows, int columns,
|
||||
float sparsity, int seed) {
|
||||
const float missing_value = -1;
|
||||
std::vector<float> test_data(rows * columns);
|
||||
void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
|
||||
SimpleLCG lcg{seed_};
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
|
||||
CHECK(out);
|
||||
|
||||
xgboost::SimpleLCG gen(seed);
|
||||
SimpleRealUniformDistribution<float> dis(0.0f, 1.0f);
|
||||
|
||||
for (auto &e : test_data) {
|
||||
if (dis(&gen) < sparsity) {
|
||||
e = missing_value;
|
||||
out->Resize(rows_ * cols_, 0);
|
||||
auto &h_data = out->HostVector();
|
||||
float sparsity = sparsity_ * (upper_ - lower_) + lower_;
|
||||
for (auto &v : h_data) {
|
||||
auto g = dist(&lcg);
|
||||
if (g < sparsity) {
|
||||
v = std::numeric_limits<float>::quiet_NaN();
|
||||
} else {
|
||||
e = dis(&gen);
|
||||
v = dist(&lcg);
|
||||
}
|
||||
}
|
||||
if (device_ >= 0) {
|
||||
out->SetDevice(device_);
|
||||
out->DeviceSpan();
|
||||
}
|
||||
}
|
||||
|
||||
DMatrixHandle handle;
|
||||
XGDMatrixCreateFromMat(test_data.data(), rows, columns, missing_value,
|
||||
&handle);
|
||||
return static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
|
||||
void RandomDataGenerator::GenerateArrayInterface(
|
||||
HostDeviceVector<float> *storage, std::string *out) const {
|
||||
CHECK(out);
|
||||
this->GenerateDense(storage);
|
||||
Json array_interface {Object()};
|
||||
array_interface["data"] = std::vector<Json>(2);
|
||||
array_interface["data"][0] = Integer(reinterpret_cast<int64_t>(storage->DevicePointer()));
|
||||
array_interface["data"][1] = Boolean(false);
|
||||
|
||||
array_interface["shape"] = std::vector<Json>(2);
|
||||
array_interface["shape"][0] = rows_;
|
||||
array_interface["shape"][1] = cols_;
|
||||
|
||||
array_interface["typestr"] = String("<f4");
|
||||
array_interface["version"] = 1;
|
||||
|
||||
Json::Dump(array_interface, out);
|
||||
}
|
||||
|
||||
void RandomDataGenerator::GenerateCSR(
|
||||
HostDeviceVector<float>* value, HostDeviceVector<bst_row_t>* row_ptr,
|
||||
HostDeviceVector<bst_feature_t>* columns) const {
|
||||
auto& h_value = value->HostVector();
|
||||
auto& h_rptr = row_ptr->HostVector();
|
||||
auto& h_cols = columns->HostVector();
|
||||
|
||||
SimpleLCG lcg{seed_};
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
|
||||
float sparsity = sparsity_ * (upper_ - lower_) + lower_;
|
||||
|
||||
h_rptr.emplace_back(0);
|
||||
for (size_t i = 0; i < rows_; ++i) {
|
||||
size_t rptr = h_rptr.back();
|
||||
for (size_t j = 0; j < cols_; ++j) {
|
||||
auto g = dist(&lcg);
|
||||
if (g >= sparsity) {
|
||||
g = dist(&lcg);
|
||||
h_value.emplace_back(g);
|
||||
rptr++;
|
||||
h_cols.emplace_back(j);
|
||||
}
|
||||
}
|
||||
h_rptr.emplace_back(rptr);
|
||||
}
|
||||
|
||||
if (device_ >= 0) {
|
||||
value->SetDevice(device_);
|
||||
value->DeviceSpan();
|
||||
row_ptr->SetDevice(device_);
|
||||
row_ptr->DeviceSpan();
|
||||
columns->SetDevice(device_);
|
||||
columns->DeviceSpan();
|
||||
}
|
||||
|
||||
CHECK_LE(h_value.size(), rows_ * cols_);
|
||||
CHECK_EQ(value->Size(), h_rptr.back());
|
||||
CHECK_EQ(columns->Size(), value->Size());
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix>
|
||||
RandomDataGenerator::GenerateDMatix(bool with_label, bool float_label,
|
||||
size_t classes) const {
|
||||
HostDeviceVector<float> data;
|
||||
HostDeviceVector<bst_row_t> rptrs;
|
||||
HostDeviceVector<bst_feature_t> columns;
|
||||
this->GenerateCSR(&data, &rptrs, &columns);
|
||||
data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(),
|
||||
data.HostPointer(), rows_, data.Size(), cols_);
|
||||
std::shared_ptr<DMatrix> out{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
|
||||
if (with_label) {
|
||||
RandomDataGenerator gen(rows_, 1, 0);
|
||||
if (!float_label) {
|
||||
gen.Lower(0).Upper(classes).GenerateDense(&out->Info().labels_);
|
||||
auto& h_labels = out->Info().labels_.HostVector();
|
||||
for (auto& v : h_labels) {
|
||||
v = static_cast<float>(static_cast<uint32_t>(v));
|
||||
}
|
||||
} else {
|
||||
gen.GenerateDense(&out->Info().labels_);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(
|
||||
@@ -290,8 +376,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
|
||||
std::unique_ptr<GradientBooster> gbm {
|
||||
GradientBooster::Create(name, generic_param, learner_model_param)};
|
||||
gbm->Configure(kwargs);
|
||||
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
|
||||
auto p_dmat = *pp_dmat;
|
||||
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
|
||||
|
||||
std::vector<float> labels(kRows);
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
@@ -309,7 +394,6 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
|
||||
|
||||
gbm->DoBoost(p_dmat.get(), &gpair, &predts);
|
||||
|
||||
delete pp_dmat;
|
||||
return gbm;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user