Initial support for multioutput regression. (#7514)
* Add num target model parameter, which is configured from input labels. * Change elementwise metric and indexing for weights. * Add demo. * Add tests.
This commit is contained in:
@@ -127,6 +127,14 @@ def test_continuation_demo():
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.skipif(**tm.no_matplotlib())
|
||||
def test_multioutput_reg() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "multioutput_regression.py")
|
||||
cmd = ['python', script, "--plot=0"]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
# gpu_acceleration is not tested due to covertype dataset is being too huge.
|
||||
# gamma regression is not tested as it requires running a R script first.
|
||||
# aft viz is not tested due to ploting is not controled
|
||||
|
||||
@@ -1114,9 +1114,9 @@ class TestWithDask:
|
||||
return
|
||||
|
||||
chunk = 128
|
||||
X = da.from_array(dataset.X,
|
||||
chunks=(chunk, dataset.X.shape[1]))
|
||||
y = da.from_array(dataset.y, chunks=(chunk,))
|
||||
y_chunk = chunk if len(dataset.y.shape) == 1 else (chunk, dataset.y.shape[1])
|
||||
X = da.from_array(dataset.X, chunks=(chunk, dataset.X.shape[1]))
|
||||
y = da.from_array(dataset.y, chunks=y_chunk)
|
||||
if dataset.w is not None:
|
||||
w = da.from_array(dataset.w, chunks=(chunk,))
|
||||
else:
|
||||
|
||||
@@ -1118,10 +1118,10 @@ def run_boost_from_prediction_binary(tree_method, X, y, as_frame: Optional[Calla
|
||||
|
||||
|
||||
def run_boost_from_prediction_multi_clasas(
|
||||
tree_method, X, y, as_frame: Optional[Callable]
|
||||
estimator, tree_method, X, y, as_frame: Optional[Callable]
|
||||
):
|
||||
# Multi-class
|
||||
model_0 = xgb.XGBClassifier(
|
||||
model_0 = estimator(
|
||||
learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
|
||||
)
|
||||
model_0.fit(X=X, y=y)
|
||||
@@ -1129,7 +1129,7 @@ def run_boost_from_prediction_multi_clasas(
|
||||
if as_frame is not None:
|
||||
margin = as_frame(margin)
|
||||
|
||||
model_1 = xgb.XGBClassifier(
|
||||
model_1 = estimator(
|
||||
learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
|
||||
)
|
||||
model_1.fit(X=X, y=y, base_margin=margin)
|
||||
@@ -1137,7 +1137,7 @@ def run_boost_from_prediction_multi_clasas(
|
||||
xgb.DMatrix(X, base_margin=margin), output_margin=True
|
||||
)
|
||||
|
||||
model_2 = xgb.XGBClassifier(
|
||||
model_2 = estimator(
|
||||
learning_rate=0.3, random_state=0, n_estimators=8, tree_method=tree_method
|
||||
)
|
||||
model_2.fit(X=X, y=y)
|
||||
@@ -1152,8 +1152,9 @@ def run_boost_from_prediction_multi_clasas(
|
||||
|
||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
|
||||
def test_boost_from_prediction(tree_method):
|
||||
from sklearn.datasets import load_breast_cancer, load_digits
|
||||
from sklearn.datasets import load_breast_cancer, load_digits, make_regression
|
||||
import pandas as pd
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
|
||||
run_boost_from_prediction_binary(tree_method, X, y, None)
|
||||
@@ -1161,8 +1162,13 @@ def test_boost_from_prediction(tree_method):
|
||||
|
||||
X, y = load_digits(return_X_y=True)
|
||||
|
||||
run_boost_from_prediction_multi_clasas(tree_method, X, y, None)
|
||||
run_boost_from_prediction_multi_clasas(tree_method, X, y, pd.DataFrame)
|
||||
run_boost_from_prediction_multi_clasas(xgb.XGBClassifier, tree_method, X, y, None)
|
||||
run_boost_from_prediction_multi_clasas(
|
||||
xgb.XGBClassifier, tree_method, X, y, pd.DataFrame
|
||||
)
|
||||
|
||||
X, y = make_regression(n_samples=100, n_targets=4)
|
||||
run_boost_from_prediction_multi_clasas(xgb.XGBRegressor, tree_method, X, y, None)
|
||||
|
||||
|
||||
def test_estimator_type():
|
||||
|
||||
@@ -305,26 +305,48 @@ def make_categorical(
|
||||
|
||||
|
||||
_unweighted_datasets_strategy = strategies.sampled_from(
|
||||
[TestDataset('boston', get_boston, 'reg:squarederror', 'rmse'),
|
||||
TestDataset('digits', get_digits, 'multi:softmax', 'mlogloss'),
|
||||
TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
|
||||
TestDataset
|
||||
("sparse", get_sparse, "reg:squarederror", "rmse"),
|
||||
TestDataset("empty", lambda: (np.empty((0, 100)), np.empty(0)), "reg:squarederror",
|
||||
"rmse")])
|
||||
[
|
||||
TestDataset("boston", get_boston, "reg:squarederror", "rmse"),
|
||||
TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
|
||||
TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
|
||||
TestDataset(
|
||||
"mtreg",
|
||||
lambda: datasets.make_regression(n_samples=128, n_targets=3),
|
||||
"reg:squarederror",
|
||||
"rmse",
|
||||
),
|
||||
TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
|
||||
TestDataset(
|
||||
"empty",
|
||||
lambda: (np.empty((0, 100)), np.empty(0)),
|
||||
"reg:squarederror",
|
||||
"rmse",
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@strategies.composite
|
||||
def _dataset_weight_margin(draw):
|
||||
data: TestDataset = draw(_unweighted_datasets_strategy)
|
||||
if draw(strategies.booleans()):
|
||||
data.w = draw(arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0)))
|
||||
data.w = draw(
|
||||
arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0))
|
||||
)
|
||||
if draw(strategies.booleans()):
|
||||
num_class = 1
|
||||
if data.objective == "multi:softmax":
|
||||
num_class = int(np.max(data.y) + 1)
|
||||
elif data.name == "mtreg":
|
||||
num_class = data.y.shape[1]
|
||||
|
||||
data.margin = draw(
|
||||
arrays(np.float64, (len(data.y) * num_class), elements=strategies.floats(0.5, 1.0)))
|
||||
arrays(
|
||||
np.float64,
|
||||
(data.y.shape[0] * num_class),
|
||||
elements=strategies.floats(0.5, 1.0),
|
||||
)
|
||||
)
|
||||
if num_class != 1:
|
||||
data.margin = data.margin.reshape(data.y.shape[0], num_class)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user