[dask] Add DaskXGBRanker (#6576)

* Initial support for distributed LTR using dask.

* Support `qid` in libxgboost.
* Refactor `predict` and `n_features_in_`, `best_[score/iteration/ntree_limit]`
  to avoid duplicated code.
* Define `DaskXGBRanker`.

The dask ranker doesn't support group structure, instead it uses query id and
convert to group ptr internally.
This commit is contained in:
Jiaming Yuan
2021-01-08 18:35:09 +08:00
committed by GitHub
parent 96d3d32265
commit 80065d571e
18 changed files with 755 additions and 351 deletions

View File

@@ -171,6 +171,22 @@ Arrow specification.'''
with pytest.raises(xgb.core.XGBoostError):
m.slice(rindex=[0, 1, 2])
@pytest.mark.skipif(**tm.no_cupy())
def test_qid(self):
import cupy as cp
rng = cp.random.RandomState(1994)
rows = 100
cols = 10
X, y = rng.randn(rows, cols), rng.randn(rows)
qid = rng.randint(low=0, high=10, size=rows, dtype=np.uint32)
qid = cp.sort(qid)
Xy = xgb.DMatrix(X, y)
Xy.set_info(qid=qid)
group_ptr = Xy.get_uint_info('group_ptr')
assert group_ptr[0] == 0
assert group_ptr[-1] == rows
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_specified_device(self):