Add helper for generating batches of data. (#5756)
* Add helper for generating batches of data. * VC keyword clash. * Another clash.
This commit is contained in:
@@ -156,10 +156,10 @@ SimpleLCG::StateType SimpleLCG::Max() const {
|
||||
}
|
||||
|
||||
void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
|
||||
SimpleLCG lcg{seed_};
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
|
||||
CHECK(out);
|
||||
|
||||
SimpleLCG lcg{lcg_};
|
||||
out->Resize(rows_ * cols_, 0);
|
||||
auto &h_data = out->HostVector();
|
||||
float sparsity = sparsity_ * (upper_ - lower_) + lower_;
|
||||
@@ -202,7 +202,56 @@ std::string RandomDataGenerator::GenerateArrayInterface(
|
||||
return out;
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string>, std::string>
|
||||
RandomDataGenerator::GenerateArrayInterfaceBatch(
|
||||
HostDeviceVector<float> *storage, size_t batches) const {
|
||||
this->GenerateDense(storage);
|
||||
std::vector<std::string> result(batches);
|
||||
std::vector<Json> objects;
|
||||
|
||||
size_t const rows_per_batch = rows_ / batches;
|
||||
|
||||
auto make_interface = [storage, this](size_t offset, size_t rows) {
|
||||
Json array_interface{Object()};
|
||||
array_interface["data"] = std::vector<Json>(2);
|
||||
if (device_ >= 0) {
|
||||
array_interface["data"][0] =
|
||||
Integer(reinterpret_cast<int64_t>(storage->DevicePointer() + offset));
|
||||
} else {
|
||||
array_interface["data"][0] =
|
||||
Integer(reinterpret_cast<int64_t>(storage->HostPointer() + offset));
|
||||
}
|
||||
|
||||
array_interface["data"][1] = Boolean(false);
|
||||
|
||||
array_interface["shape"] = std::vector<Json>(2);
|
||||
array_interface["shape"][0] = rows;
|
||||
array_interface["shape"][1] = cols_;
|
||||
|
||||
array_interface["typestr"] = String("<f4");
|
||||
array_interface["version"] = 1;
|
||||
return array_interface;
|
||||
};
|
||||
|
||||
auto j_interface = make_interface(0, rows_);
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < batches - 1; ++i) {
|
||||
objects.emplace_back(make_interface(offset, rows_per_batch));
|
||||
offset += rows_per_batch * cols_;
|
||||
}
|
||||
|
||||
size_t const remaining = rows_ - offset / cols_;
|
||||
CHECK_LE(offset, rows_ * cols_);
|
||||
objects.emplace_back(make_interface(offset, remaining));
|
||||
|
||||
for (size_t i = 0; i < batches; ++i) {
|
||||
Json::Dump(objects[i], &result[i]);
|
||||
}
|
||||
|
||||
std::string interface_str;
|
||||
Json::Dump(j_interface, &interface_str);
|
||||
return {result, interface_str};
|
||||
}
|
||||
|
||||
std::string RandomDataGenerator::GenerateColumnarArrayInterface(
|
||||
std::vector<HostDeviceVector<float>> *data) const {
|
||||
@@ -225,8 +274,8 @@ void RandomDataGenerator::GenerateCSR(
|
||||
auto& h_value = value->HostVector();
|
||||
auto& h_rptr = row_ptr->HostVector();
|
||||
auto& h_cols = columns->HostVector();
|
||||
SimpleLCG lcg{lcg_};
|
||||
|
||||
SimpleLCG lcg{seed_};
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
|
||||
float sparsity = sparsity_ * (upper_ - lower_) + lower_;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user