[dask] Add DaskXGBRanker (#6576)

* Initial support for distributed LTR using dask.

* Support `qid` in libxgboost.
* Refactor `predict` and `n_features_in_`, `best_[score/iteration/ntree_limit]`
  to avoid duplicated code.
* Define `DaskXGBRanker`.

The dask ranker doesn't support group structure, instead it uses query id and
convert to group ptr internally.
This commit is contained in:
Jiaming Yuan
2021-01-08 18:35:09 +08:00
committed by GitHub
parent 96d3d32265
commit 80065d571e
18 changed files with 755 additions and 351 deletions

View File

@@ -202,6 +202,24 @@ TEST(MetaInfo, LoadQid) {
}
}
TEST(MetaInfo, CPUQid) {
xgboost::MetaInfo info;
info.num_row_ = 100;
std::vector<uint32_t> qid(info.num_row_, 0);
for (size_t i = 0; i < qid.size(); ++i) {
qid[i] = i;
}
info.SetInfo("qid", qid.data(), xgboost::DataType::kUInt32, info.num_row_);
ASSERT_EQ(info.group_ptr_.size(), info.num_row_ + 1);
ASSERT_EQ(info.group_ptr_.front(), 0);
ASSERT_EQ(info.group_ptr_.back(), info.num_row_);
for (size_t i = 0; i < info.num_row_ + 1; ++i) {
ASSERT_EQ(info.group_ptr_[i], i);
}
}
TEST(MetaInfo, Validate) {
xgboost::MetaInfo info;
info.num_row_ = 10;