Merge model compatibility fixes from 1.0rc branch. (#5305)
* Port test model compatibility. * Port logit model fix. https://github.com/dmlc/xgboost/pull/5248 https://github.com/dmlc/xgboost/pull/5281
This commit is contained in:
148
tests/python/generate_models.py
Normal file
148
tests/python/generate_models.py
Normal file
@@ -0,0 +1,148 @@
|
||||
import xgboost
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
kRounds = 2
|
||||
kRows = 1000
|
||||
kCols = 4
|
||||
kForests = 2
|
||||
kMaxDepth = 2
|
||||
kClasses = 3
|
||||
|
||||
X = np.random.randn(kRows, kCols)
|
||||
w = np.random.uniform(size=kRows)
|
||||
|
||||
version = xgboost.__version__
|
||||
|
||||
np.random.seed(1994)
|
||||
target_dir = 'models'
|
||||
|
||||
|
||||
def booster_bin(model):
|
||||
return os.path.join(target_dir,
|
||||
'xgboost-' + version + '.' + model + '.bin')
|
||||
|
||||
|
||||
def booster_json(model):
|
||||
return os.path.join(target_dir,
|
||||
'xgboost-' + version + '.' + model + '.json')
|
||||
|
||||
|
||||
def skl_bin(model):
|
||||
return os.path.join(target_dir,
|
||||
'xgboost_scikit-' + version + '.' + model + '.bin')
|
||||
|
||||
|
||||
def skl_json(model):
|
||||
return os.path.join(target_dir,
|
||||
'xgboost_scikit-' + version + '.' + model + '.json')
|
||||
|
||||
|
||||
def generate_regression_model():
|
||||
print('Regression')
|
||||
y = np.random.randn(kRows)
|
||||
|
||||
data = xgboost.DMatrix(X, label=y, weight=w)
|
||||
booster = xgboost.train({'tree_method': 'hist',
|
||||
'num_parallel_tree': kForests,
|
||||
'max_depth': kMaxDepth},
|
||||
num_boost_round=kRounds, dtrain=data)
|
||||
booster.save_model(booster_bin('reg'))
|
||||
booster.save_model(booster_json('reg'))
|
||||
|
||||
reg = xgboost.XGBRegressor(tree_method='hist',
|
||||
num_parallel_tree=kForests,
|
||||
max_depth=kMaxDepth,
|
||||
n_estimators=kRounds)
|
||||
reg.fit(X, y, w)
|
||||
reg.save_model(skl_bin('reg'))
|
||||
reg.save_model(skl_json('reg'))
|
||||
|
||||
|
||||
def generate_logistic_model():
|
||||
print('Logistic')
|
||||
y = np.random.randint(0, 2, size=kRows)
|
||||
assert y.max() == 1 and y.min() == 0
|
||||
|
||||
data = xgboost.DMatrix(X, label=y, weight=w)
|
||||
booster = xgboost.train({'tree_method': 'hist',
|
||||
'num_parallel_tree': kForests,
|
||||
'max_depth': kMaxDepth,
|
||||
'objective': 'binary:logistic'},
|
||||
num_boost_round=kRounds, dtrain=data)
|
||||
booster.save_model(booster_bin('logit'))
|
||||
booster.save_model(booster_json('logit'))
|
||||
|
||||
reg = xgboost.XGBClassifier(tree_method='hist',
|
||||
num_parallel_tree=kForests,
|
||||
max_depth=kMaxDepth,
|
||||
n_estimators=kRounds)
|
||||
reg.fit(X, y, w)
|
||||
reg.save_model(skl_bin('logit'))
|
||||
reg.save_model(skl_json('logit'))
|
||||
|
||||
|
||||
def generate_classification_model():
|
||||
print('Classification')
|
||||
y = np.random.randint(0, kClasses, size=kRows)
|
||||
data = xgboost.DMatrix(X, label=y, weight=w)
|
||||
booster = xgboost.train({'num_class': kClasses,
|
||||
'tree_method': 'hist',
|
||||
'num_parallel_tree': kForests,
|
||||
'max_depth': kMaxDepth},
|
||||
num_boost_round=kRounds, dtrain=data)
|
||||
booster.save_model(booster_bin('cls'))
|
||||
booster.save_model(booster_json('cls'))
|
||||
|
||||
cls = xgboost.XGBClassifier(tree_method='hist',
|
||||
num_parallel_tree=kForests,
|
||||
max_depth=kMaxDepth,
|
||||
n_estimators=kRounds)
|
||||
cls.fit(X, y, w)
|
||||
cls.save_model(skl_bin('cls'))
|
||||
cls.save_model(skl_json('cls'))
|
||||
|
||||
|
||||
def generate_ranking_model():
|
||||
print('Learning to Rank')
|
||||
y = np.random.randint(5, size=kRows)
|
||||
w = np.random.uniform(size=20)
|
||||
g = np.repeat(50, 20)
|
||||
|
||||
data = xgboost.DMatrix(X, y, weight=w)
|
||||
data.set_group(g)
|
||||
booster = xgboost.train({'objective': 'rank:ndcg',
|
||||
'num_parallel_tree': kForests,
|
||||
'tree_method': 'hist',
|
||||
'max_depth': kMaxDepth},
|
||||
num_boost_round=kRounds,
|
||||
dtrain=data)
|
||||
booster.save_model(booster_bin('ltr'))
|
||||
booster.save_model(booster_json('ltr'))
|
||||
|
||||
ranker = xgboost.sklearn.XGBRanker(n_estimators=kRounds,
|
||||
tree_method='hist',
|
||||
objective='rank:ndcg',
|
||||
max_depth=kMaxDepth,
|
||||
num_parallel_tree=kForests)
|
||||
ranker.fit(X, y, g, sample_weight=w)
|
||||
ranker.save_model(skl_bin('ltr'))
|
||||
ranker.save_model(skl_json('ltr'))
|
||||
|
||||
|
||||
def write_versions():
|
||||
versions = {'numpy': np.__version__,
|
||||
'xgboost': version}
|
||||
with open(os.path.join(target_dir, 'version'), 'w') as fd:
|
||||
fd.write(str(versions))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not os.path.exists(target_dir):
|
||||
os.mkdir(target_dir)
|
||||
|
||||
generate_regression_model()
|
||||
generate_logistic_model()
|
||||
generate_classification_model()
|
||||
generate_ranking_model()
|
||||
write_versions()
|
||||
@@ -39,7 +39,7 @@ class TestBasic(unittest.TestCase):
|
||||
def test_basic(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
param = {'max_depth': 2, 'eta': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
|
||||
@@ -284,16 +284,31 @@ class TestModels(unittest.TestCase):
|
||||
self.assertRaises(ValueError, bst.predict, dm1)
|
||||
bst.predict(dm2) # success
|
||||
|
||||
def test_model_binary_io(self):
|
||||
model_path = 'test_model_binary_io.bin'
|
||||
parameters = {'tree_method': 'hist', 'booster': 'gbtree',
|
||||
'scale_pos_weight': '0.5'}
|
||||
X = np.random.random((10, 3))
|
||||
y = np.random.random((10,))
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
bst = xgb.train(parameters, dtrain, num_boost_round=2)
|
||||
bst.save_model(model_path)
|
||||
bst = xgb.Booster(model_file=model_path)
|
||||
os.remove(model_path)
|
||||
config = json.loads(bst.save_config())
|
||||
assert float(config['learner']['objective'][
|
||||
'reg_loss_param']['scale_pos_weight']) == 0.5
|
||||
|
||||
def test_model_json_io(self):
|
||||
model_path = './model.json'
|
||||
model_path = 'test_model_json_io.json'
|
||||
parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
|
||||
j_model = json_model(model_path, parameters)
|
||||
assert isinstance(j_model['learner'], dict)
|
||||
|
||||
bst = xgb.Booster(model_file='./model.json')
|
||||
bst = xgb.Booster(model_file=model_path)
|
||||
|
||||
bst.save_model(fname=model_path)
|
||||
with open('./model.json', 'r') as fd:
|
||||
with open(model_path, 'r') as fd:
|
||||
j_model = json.load(fd)
|
||||
assert isinstance(j_model['learner'], dict)
|
||||
|
||||
@@ -302,7 +317,7 @@ class TestModels(unittest.TestCase):
|
||||
@pytest.mark.skipif(**tm.no_json_schema())
|
||||
def test_json_schema(self):
|
||||
import jsonschema
|
||||
model_path = './model.json'
|
||||
model_path = 'test_json_schema.json'
|
||||
path = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
doc = os.path.join(path, 'doc', 'model.schema')
|
||||
|
||||
130
tests/python/test_model_compatibility.py
Normal file
130
tests/python/test_model_compatibility.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import xgboost
|
||||
import os
|
||||
import generate_models as gm
|
||||
import json
|
||||
import zipfile
|
||||
import pytest
|
||||
|
||||
|
||||
def run_model_param_check(config):
|
||||
assert config['learner']['learner_model_param']['num_feature'] == str(4)
|
||||
assert config['learner']['learner_train_param']['booster'] == 'gbtree'
|
||||
|
||||
|
||||
def run_booster_check(booster, name):
|
||||
config = json.loads(booster.save_config())
|
||||
run_model_param_check(config)
|
||||
if name.find('cls') != -1:
|
||||
assert (len(booster.get_dump()) == gm.kForests * gm.kRounds *
|
||||
gm.kClasses)
|
||||
assert float(
|
||||
config['learner']['learner_model_param']['base_score']) == 0.5
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'multi:softmax'
|
||||
elif name.find('logit') != -1:
|
||||
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
|
||||
assert config['learner']['learner_model_param']['num_class'] == str(0)
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'binary:logistic'
|
||||
elif name.find('ltr') != -1:
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'rank:ndcg'
|
||||
else:
|
||||
assert name.find('reg') != -1
|
||||
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
|
||||
assert float(
|
||||
config['learner']['learner_model_param']['base_score']) == 0.5
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'reg:squarederror'
|
||||
|
||||
|
||||
def run_scikit_model_check(name, path):
|
||||
if name.find('reg') != -1:
|
||||
reg = xgboost.XGBRegressor()
|
||||
reg.load_model(path)
|
||||
config = json.loads(reg.get_booster().save_config())
|
||||
if name.find('0.90') != -1:
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'reg:linear'
|
||||
else:
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'reg:squarederror'
|
||||
assert (len(reg.get_booster().get_dump()) ==
|
||||
gm.kRounds * gm.kForests)
|
||||
run_model_param_check(config)
|
||||
elif name.find('cls') != -1:
|
||||
cls = xgboost.XGBClassifier()
|
||||
cls.load_model(path)
|
||||
if name.find('0.90') == -1:
|
||||
assert len(cls.classes_) == gm.kClasses
|
||||
assert len(cls._le.classes_) == gm.kClasses
|
||||
assert cls.n_classes_ == gm.kClasses
|
||||
assert (len(cls.get_booster().get_dump()) ==
|
||||
gm.kRounds * gm.kForests * gm.kClasses), path
|
||||
config = json.loads(cls.get_booster().save_config())
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'multi:softprob', path
|
||||
run_model_param_check(config)
|
||||
elif name.find('ltr') != -1:
|
||||
ltr = xgboost.XGBRanker()
|
||||
ltr.load_model(path)
|
||||
assert (len(ltr.get_booster().get_dump()) ==
|
||||
gm.kRounds * gm.kForests)
|
||||
config = json.loads(ltr.get_booster().save_config())
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'rank:ndcg'
|
||||
run_model_param_check(config)
|
||||
elif name.find('logit') != -1:
|
||||
logit = xgboost.XGBClassifier()
|
||||
logit.load_model(path)
|
||||
assert (len(logit.get_booster().get_dump()) ==
|
||||
gm.kRounds * gm.kForests)
|
||||
config = json.loads(logit.get_booster().save_config())
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'binary:logistic'
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
@pytest.mark.ci
|
||||
def test_model_compatibility():
|
||||
'''Test model compatibility, can only be run on CI as others don't
|
||||
have the credentials.
|
||||
|
||||
'''
|
||||
path = os.path.dirname(os.path.abspath(__file__))
|
||||
path = os.path.join(path, 'models')
|
||||
try:
|
||||
import boto3
|
||||
import botocore
|
||||
except ImportError:
|
||||
pytest.skip(
|
||||
'Skiping compatibility tests as boto3 is not installed.')
|
||||
|
||||
try:
|
||||
s3_bucket = boto3.resource('s3').Bucket('xgboost-ci-jenkins-artifacts')
|
||||
zip_path = 'xgboost_model_compatibility_test.zip'
|
||||
s3_bucket.download_file(zip_path, zip_path)
|
||||
except botocore.exceptions.NoCredentialsError:
|
||||
pytest.skip(
|
||||
'Skiping compatibility tests as running on non-CI environment.')
|
||||
|
||||
with zipfile.ZipFile(zip_path, 'r') as z:
|
||||
z.extractall(path)
|
||||
|
||||
models = [
|
||||
os.path.join(root, f) for root, subdir, files in os.walk(path)
|
||||
for f in files
|
||||
if f != 'version'
|
||||
]
|
||||
assert models
|
||||
|
||||
for path in models:
|
||||
name = os.path.basename(path)
|
||||
if name.startswith('xgboost-'):
|
||||
booster = xgboost.Booster(model_file=path)
|
||||
run_booster_check(booster, name)
|
||||
elif name.startswith('xgboost_scikit'):
|
||||
run_scikit_model_check(name, path)
|
||||
else:
|
||||
assert False
|
||||
@@ -115,7 +115,6 @@ class TestRanking(unittest.TestCase):
|
||||
# model training parameters
|
||||
cls.params = {'objective': 'rank:pairwise',
|
||||
'booster': 'gbtree',
|
||||
'silent': 0,
|
||||
'eval_metric': ['ndcg']
|
||||
}
|
||||
|
||||
@@ -143,7 +142,7 @@ class TestRanking(unittest.TestCase):
|
||||
Test cross-validation with a group specified
|
||||
"""
|
||||
cv = xgboost.cv(self.params, self.dtrain, num_boost_round=2500,
|
||||
early_stopping_rounds=10, nfold=10, as_pandas=False)
|
||||
early_stopping_rounds=10, nfold=10, as_pandas=False)
|
||||
assert isinstance(cv, dict)
|
||||
self.assertSetEqual(set(cv.keys()), {'test-ndcg-mean', 'train-ndcg-mean', 'test-ndcg-std', 'train-ndcg-std'},
|
||||
"CV results dict key mismatch")
|
||||
@@ -153,7 +152,8 @@ class TestRanking(unittest.TestCase):
|
||||
Test cross-validation with a group specified
|
||||
"""
|
||||
cv = xgboost.cv(self.params, self.dtrain, num_boost_round=2500,
|
||||
early_stopping_rounds=10, shuffle=False, nfold=10, as_pandas=False)
|
||||
early_stopping_rounds=10, shuffle=False, nfold=10,
|
||||
as_pandas=False)
|
||||
assert isinstance(cv, dict)
|
||||
assert len(cv) == 4
|
||||
|
||||
|
||||
Reference in New Issue
Block a user