Extract dask and spark test into distributed test. (#8395)

- Move test files.
- Run spark and dask separately to prevent conflicts.
- Gather common code into the testing module.
This commit is contained in:
Jiaming Yuan
2022-10-28 16:24:32 +08:00
committed by GitHub
parent f73520bfff
commit cfd2a9f872
34 changed files with 405 additions and 337 deletions

View File

@@ -5,6 +5,7 @@ import numpy as np
import pandas as pd
import pytest
from scipy import sparse
from xgboost.testing.shared import validate_leaf_output
import xgboost as xgb
from xgboost import testing as tm
@@ -26,16 +27,6 @@ def run_threaded_predict(X, rows, predict_func):
assert f.result()
def verify_leaf_output(leaf: np.ndarray, num_parallel_tree: int):
for i in range(leaf.shape[0]): # n_samples
for j in range(leaf.shape[1]): # n_rounds
for k in range(leaf.shape[2]): # n_classes
tree_group = leaf[i, j, k, :]
assert tree_group.shape[0] == num_parallel_tree
# No sampling, all trees within forest are the same
assert np.all(tree_group == tree_group[0])
def run_predict_leaf(predictor):
rows = 100
cols = 4
@@ -67,7 +58,7 @@ def run_predict_leaf(predictor):
assert leaf.shape[2] == classes
assert leaf.shape[3] == num_parallel_tree
verify_leaf_output(leaf, num_parallel_tree)
validate_leaf_output(leaf, num_parallel_tree)
ntree_limit = 2
sliced = booster.predict(