Initial support for multioutput regression. (#7514)

* Add num target model parameter, which is configured from input labels.
* Change elementwise metric and indexing for weights.
* Add demo.
* Add tests.
This commit is contained in:
Jiaming Yuan
2021-12-18 09:28:38 +08:00
committed by GitHub
parent 9ab73f737e
commit 58a6723eb1
22 changed files with 306 additions and 67 deletions

View File

@@ -60,8 +60,9 @@ def _test_from_cudf(DMatrixT):
assert dtrain.feature_names == ['x']
assert dtrain.feature_types == ['int']
with pytest.raises(Exception):
with pytest.raises(ValueError, match=r".*multi.*"):
dtrain = DMatrixT(cd, label=cd)
xgb.train({"tree_method": "gpu_hist", "objective": "multi:softprob"}, dtrain)
# Test when number of elements is less than 8
X = cudf.DataFrame({'x': cudf.Series([0, 1, 2, np.NAN, 4],

View File

@@ -50,9 +50,10 @@ def _test_from_cupy(DMatrixT):
dmatrix_from_cupy(np.int32, DMatrixT, -2)
dmatrix_from_cupy(np.int64, DMatrixT, -3)
with pytest.raises(Exception):
with pytest.raises(ValueError):
X = cp.random.randn(2, 2, dtype="float32")
DMatrixT(X, label=X)
y = cp.random.randn(2, 2, 3, dtype="float32")
DMatrixT(X, label=y)
def _test_cupy_training(DMatrixT):

View File

@@ -277,7 +277,9 @@ def run_gpu_hist(
X = to_cp(dataset.X, DMatrixT)
X = da.from_array(X, chunks=(chunk, dataset.X.shape[1]))
y = to_cp(dataset.y, DMatrixT)
y = da.from_array(y, chunks=(chunk,))
y_chunk = chunk if len(dataset.y.shape) == 1 else (chunk, dataset.y.shape[1])
y = da.from_array(y, chunks=y_chunk)
if dataset.w is not None:
w = to_cp(dataset.w, DMatrixT)
w = da.from_array(w, chunks=(chunk,))

View File

@@ -52,8 +52,12 @@ def test_boost_from_prediction_gpu_hist():
X, y = load_digits(return_X_y=True)
X, y = cp.array(X), cp.array(y)
twskl.run_boost_from_prediction_multi_clasas(tree_method, X, y, None)
twskl.run_boost_from_prediction_multi_clasas(tree_method, X, y, cudf.DataFrame)
twskl.run_boost_from_prediction_multi_clasas(
xgb.XGBClassifier, tree_method, X, y, None
)
twskl.run_boost_from_prediction_multi_clasas(
xgb.XGBClassifier, tree_method, X, y, cudf.DataFrame
)
def test_num_parallel_tree():