[dask] Add DaskXGBRanker (#6576)
* Initial support for distributed LTR using dask. * Support `qid` in libxgboost. * Refactor `predict` and `n_features_in_`, `best_[score/iteration/ntree_limit]` to avoid duplicated code. * Define `DaskXGBRanker`. The dask ranker doesn't support group structure, instead it uses query id and convert to group ptr internally.
This commit is contained in:
@@ -63,7 +63,7 @@ Json GenerateSparseColumn(std::string const& typestr, size_t kRows,
|
||||
|
||||
template <typename T>
|
||||
Json Generate2dArrayInterface(int rows, int cols, std::string typestr,
|
||||
thrust::device_vector<T>* p_data) {
|
||||
thrust::device_vector<T> *p_data) {
|
||||
auto& data = *p_data;
|
||||
thrust::sequence(data.begin(), data.end());
|
||||
|
||||
|
||||
@@ -202,6 +202,24 @@ TEST(MetaInfo, LoadQid) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(MetaInfo, CPUQid) {
|
||||
xgboost::MetaInfo info;
|
||||
info.num_row_ = 100;
|
||||
std::vector<uint32_t> qid(info.num_row_, 0);
|
||||
for (size_t i = 0; i < qid.size(); ++i) {
|
||||
qid[i] = i;
|
||||
}
|
||||
|
||||
info.SetInfo("qid", qid.data(), xgboost::DataType::kUInt32, info.num_row_);
|
||||
ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1);
|
||||
ASSERT_EQ(info.group_ptr_.front(), 0);
|
||||
ASSERT_EQ(info.group_ptr_.back(), info.num_row_);
|
||||
|
||||
for (size_t i = 0; i < info.num_row_ + 1; ++i) {
|
||||
ASSERT_EQ(info.group_ptr_[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(MetaInfo, Validate) {
|
||||
xgboost::MetaInfo info;
|
||||
info.num_row_ = 10;
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include "test_array_interface.h"
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -105,6 +106,28 @@ TEST(MetaInfo, Group) {
|
||||
EXPECT_ANY_THROW(info.SetInfo("group", float_str.c_str()));
|
||||
}
|
||||
|
||||
TEST(MetaInfo, GPUQid) {
|
||||
xgboost::MetaInfo info;
|
||||
info.num_row_ = 100;
|
||||
thrust::device_vector<uint32_t> qid(info.num_row_, 0);
|
||||
for (size_t i = 0; i < qid.size(); ++i) {
|
||||
qid[i] = i;
|
||||
}
|
||||
auto column = Generate2dArrayInterface(info.num_row_, 1, "<u4", &qid);
|
||||
Json array{std::vector<Json>{column}};
|
||||
std::string array_str;
|
||||
Json::Dump(array, &array_str);
|
||||
info.SetInfo("qid", array_str.c_str());
|
||||
ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1);
|
||||
ASSERT_EQ(info.group_ptr_.front(), 0);
|
||||
ASSERT_EQ(info.group_ptr_.back(), info.num_row_);
|
||||
|
||||
for (size_t i = 0; i < info.num_row_ + 1; ++i) {
|
||||
ASSERT_EQ(info.group_ptr_[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(MetaInfo, DeviceExtend) {
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
size_t const kRows = 100;
|
||||
|
||||
Reference in New Issue
Block a user