Calculate base_score based on input labels for mae. (#8107)
Fit an intercept as base score for abs loss.
This commit is contained in:
@@ -102,34 +102,38 @@ def run_scikit_model_check(name, path):
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_model_compatibility():
|
||||
'''Test model compatibility, can only be run on CI as others don't
|
||||
"""Test model compatibility, can only be run on CI as others don't
|
||||
have the credentials.
|
||||
|
||||
'''
|
||||
"""
|
||||
path = os.path.dirname(os.path.abspath(__file__))
|
||||
path = os.path.join(path, 'models')
|
||||
path = os.path.join(path, "models")
|
||||
|
||||
zip_path, _ = urllib.request.urlretrieve('https://xgboost-ci-jenkins-artifacts.s3-us-west-2' +
|
||||
'.amazonaws.com/xgboost_model_compatibility_test.zip')
|
||||
with zipfile.ZipFile(zip_path, 'r') as z:
|
||||
z.extractall(path)
|
||||
if not os.path.exists(path):
|
||||
zip_path, _ = urllib.request.urlretrieve(
|
||||
"https://xgboost-ci-jenkins-artifacts.s3-us-west-2"
|
||||
+ ".amazonaws.com/xgboost_model_compatibility_test.zip"
|
||||
)
|
||||
with zipfile.ZipFile(zip_path, "r") as z:
|
||||
z.extractall(path)
|
||||
|
||||
models = [
|
||||
os.path.join(root, f) for root, subdir, files in os.walk(path)
|
||||
os.path.join(root, f)
|
||||
for root, subdir, files in os.walk(path)
|
||||
for f in files
|
||||
if f != 'version'
|
||||
if f != "version"
|
||||
]
|
||||
assert models
|
||||
|
||||
for path in models:
|
||||
name = os.path.basename(path)
|
||||
if name.startswith('xgboost-'):
|
||||
if name.startswith("xgboost-"):
|
||||
booster = xgboost.Booster(model_file=path)
|
||||
run_booster_check(booster, name)
|
||||
# Do full serialization.
|
||||
booster = copy.copy(booster)
|
||||
run_booster_check(booster, name)
|
||||
elif name.startswith('xgboost_scikit'):
|
||||
elif name.startswith("xgboost_scikit"):
|
||||
run_scikit_model_check(name, path)
|
||||
else:
|
||||
assert False
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from random import choice
|
||||
import json
|
||||
from string import ascii_lowercase
|
||||
from typing import Dict, Any
|
||||
import testing as tm
|
||||
@@ -397,3 +397,72 @@ class TestTreeMethod:
|
||||
def test_categorical_missing(self, rows, cols, cats):
|
||||
self.run_categorical_missing(rows, cols, cats, "approx")
|
||||
self.run_categorical_missing(rows, cols, cats, "hist")
|
||||
|
||||
def run_adaptive(self, tree_method, weighted) -> None:
|
||||
rng = np.random.RandomState(1994)
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.utils import stats
|
||||
|
||||
n_samples = 256
|
||||
X, y = make_regression(n_samples, 16, random_state=rng)
|
||||
if weighted:
|
||||
w = rng.normal(size=n_samples)
|
||||
w -= w.min()
|
||||
Xy = xgb.DMatrix(X, y, weight=w)
|
||||
base_score = stats._weighted_percentile(y, w, percentile=50)
|
||||
else:
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
base_score = np.median(y)
|
||||
|
||||
booster_0 = xgb.train(
|
||||
{
|
||||
"tree_method": tree_method,
|
||||
"base_score": base_score,
|
||||
"objective": "reg:absoluteerror",
|
||||
},
|
||||
Xy,
|
||||
num_boost_round=1,
|
||||
)
|
||||
booster_1 = xgb.train(
|
||||
{"tree_method": tree_method, "objective": "reg:absoluteerror"},
|
||||
Xy,
|
||||
num_boost_round=1,
|
||||
)
|
||||
config_0 = json.loads(booster_0.save_config())
|
||||
config_1 = json.loads(booster_1.save_config())
|
||||
|
||||
def get_score(config: Dict) -> float:
|
||||
return float(config["learner"]["learner_model_param"]["base_score"])
|
||||
|
||||
assert get_score(config_0) == get_score(config_1)
|
||||
|
||||
raw_booster = booster_1.save_raw(raw_format="deprecated")
|
||||
booster_2 = xgb.Booster(model_file=raw_booster)
|
||||
config_2 = json.loads(booster_2.save_config())
|
||||
assert get_score(config_1) == get_score(config_2)
|
||||
|
||||
raw_booster = booster_1.save_raw(raw_format="ubj")
|
||||
booster_2 = xgb.Booster(model_file=raw_booster)
|
||||
config_2 = json.loads(booster_2.save_config())
|
||||
assert get_score(config_1) == get_score(config_2)
|
||||
|
||||
booster_0 = xgb.train(
|
||||
{
|
||||
"tree_method": tree_method,
|
||||
"base_score": base_score + 1.0,
|
||||
"objective": "reg:absoluteerror",
|
||||
},
|
||||
Xy,
|
||||
num_boost_round=1,
|
||||
)
|
||||
config_0 = json.loads(booster_0.save_config())
|
||||
np.testing.assert_allclose(get_score(config_0), get_score(config_1) + 1)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.parametrize(
|
||||
"tree_method,weighted", [
|
||||
("approx", False), ("hist", False), ("approx", True), ("hist", True)
|
||||
]
|
||||
)
|
||||
def test_adaptive(self, tree_method, weighted) -> None:
|
||||
self.run_adaptive(tree_method, weighted)
|
||||
|
||||
@@ -1537,13 +1537,56 @@ class TestWithDask:
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
@pytest.mark.gtest
|
||||
def test_quantile_same_on_all_workers(self) -> None:
|
||||
self.run_quantile('SameOnAllWorkers')
|
||||
self.run_quantile("SameOnAllWorkers")
|
||||
|
||||
def test_adaptive(self) -> None:
|
||||
def get_score(config: Dict) -> float:
|
||||
return float(config["learner"]["learner_model_param"]["base_score"])
|
||||
|
||||
def local_test(rabit_args: List[bytes], worker_id: int) -> bool:
|
||||
with xgb.dask.RabitContext(rabit_args):
|
||||
if worker_id == 0:
|
||||
y = np.array([0.0, 0.0, 0.0])
|
||||
x = np.array([[0.0]] * 3)
|
||||
else:
|
||||
y = np.array([1000.0])
|
||||
x = np.array(
|
||||
[
|
||||
[0.0],
|
||||
]
|
||||
)
|
||||
|
||||
Xy = xgb.DMatrix(x, y)
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "objective": "reg:absoluteerror"},
|
||||
Xy,
|
||||
num_boost_round=1,
|
||||
)
|
||||
config = json.loads(booster.save_config())
|
||||
base_score = get_score(config)
|
||||
assert base_score == 250.0
|
||||
return True
|
||||
|
||||
with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
|
||||
with Client(cluster) as client:
|
||||
workers = _get_client_workers(client)
|
||||
rabit_args = client.sync(
|
||||
xgb.dask._get_rabit_args, len(workers), None, client
|
||||
)
|
||||
futures = []
|
||||
for i, _ in enumerate(workers):
|
||||
f = client.submit(local_test, rabit_args, i)
|
||||
futures.append(f)
|
||||
|
||||
results = client.gather(futures)
|
||||
assert all(results)
|
||||
|
||||
def test_n_workers(self) -> None:
|
||||
with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
|
||||
with Client(cluster) as client:
|
||||
workers = _get_client_workers(client)
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
dX = client.submit(da.from_array, X, workers=[workers[0]]).result()
|
||||
dy = client.submit(da.from_array, y, workers=[workers[0]]).result()
|
||||
|
||||
Reference in New Issue
Block a user