Test model compatibility. (#5248)

* Add model compability tests.

* Typo.
This commit is contained in:
Jiaming Yuan 2020-01-31 18:46:13 +08:00 committed by GitHub
parent c8d32102fb
commit 7f542d2198
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 183 additions and 5 deletions

View File

@ -24,7 +24,7 @@ void JsonWriter::Visit(JsonArray const* arr) {
for (size_t i = 0; i < size; ++i) {
auto const& value = vec[i];
this->Save(value);
if (i != size-1) { Write(", "); }
if (i != size-1) { Write(","); }
}
this->Write("]");
}
@ -38,7 +38,7 @@ void JsonWriter::Visit(JsonObject const* obj) {
size_t size = obj->getObject().size();
for (auto& value : obj->getObject()) {
this->Write("\"" + value.first + "\": ");
this->Write("\"" + value.first + "\":");
this->Save(value.second);
if (i != size-1) {

View File

@ -0,0 +1,124 @@
import xgboost
import numpy as np
import os
kRounds = 2
kRows = 1000
kCols = 4
kForests = 2
kMaxDepth = 2
kClasses = 3
X = np.random.randn(kRows, kCols)
w = np.random.uniform(size=kRows)
version = xgboost.__version__
np.random.seed(1994)
target_dir = 'models'
def booster_bin(model):
return os.path.join(target_dir,
'xgboost-' + version + '.' + model + '.bin')
def booster_json(model):
return os.path.join(target_dir,
'xgboost-' + version + '.' + model + '.json')
def skl_bin(model):
return os.path.join(target_dir,
'xgboost_scikit-' + version + '.' + model + '.bin')
def skl_json(model):
return os.path.join(target_dir,
'xgboost_scikit-' + version + '.' + model + '.json')
def generate_regression_model():
print('Regression')
y = np.random.randn(kRows)
data = xgboost.DMatrix(X, label=y, weight=w)
booster = xgboost.train({'tree_method': 'hist',
'num_parallel_tree': kForests,
'max_depth': kMaxDepth},
num_boost_round=kRounds, dtrain=data)
booster.save_model(booster_bin('reg'))
booster.save_model(booster_json('reg'))
reg = xgboost.XGBRegressor(tree_method='hist',
num_parallel_tree=kForests,
max_depth=kMaxDepth,
n_estimators=kRounds)
reg.fit(X, y, w)
reg.save_model(skl_bin('reg'))
reg.save_model(skl_json('reg'))
def generate_classification_model():
print('Classification')
y = np.random.randint(0, kClasses, size=kRows)
data = xgboost.DMatrix(X, label=y, weight=w)
booster = xgboost.train({'num_class': kClasses,
'tree_method': 'hist',
'num_parallel_tree': kForests,
'max_depth': kMaxDepth},
num_boost_round=kRounds, dtrain=data)
booster.save_model(booster_bin('cls'))
booster.save_model(booster_json('cls'))
cls = xgboost.XGBClassifier(tree_method='hist',
num_parallel_tree=kForests,
max_depth=kMaxDepth,
n_estimators=kRounds)
cls.fit(X, y, w)
cls.save_model(skl_bin('cls'))
cls.save_model(skl_json('cls'))
def generate_ranking_model():
print('Learning to Rank')
y = np.random.randint(5, size=kRows)
w = np.random.randn(20)
g = np.repeat(50, 20)
data = xgboost.DMatrix(X, y, weight=w)
data.set_group(g)
booster = xgboost.train({'objective': 'rank:ndcg',
'num_parallel_tree': kForests,
'tree_method': 'hist',
'max_depth': kMaxDepth},
num_boost_round=kRounds,
dtrain=data)
booster.save_model(booster_bin('ltr'))
booster.save_model(booster_json('ltr'))
ranker = xgboost.sklearn.XGBRanker(n_estimators=kRounds,
tree_method='hist',
objective='rank:ndcg',
max_depth=kMaxDepth,
num_parallel_tree=kForests)
ranker.fit(X, y, g, sample_weight=w)
ranker.save_model(skl_bin('ltr'))
ranker.save_model(skl_json('ltr'))
def write_versions():
versions = {'numpy': np.__version__,
'xgboost': version}
with open(os.path.join(target_dir, 'version'), 'w') as fd:
fd.write(str(versions))
if __name__ == '__main__':
if not os.path.exists(target_dir):
os.mkdir(target_dir)
generate_regression_model()
generate_classification_model()
generate_ranking_model()
write_versions()

View File

@ -0,0 +1 @@
{'numpy': '1.16.4', 'xgboost': '1.0.0-SNAPSHOT'}

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -0,0 +1 @@
{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"4","size_leaf_vector":"0"},"tree_info":[0,0,0,0],"trees":[{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":0,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":1,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":2,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":3,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"4"},"objective":{"lambda_rank_param":{"fix_list_weight":"0","num_pairsample":"1"},"name":"rank:ndcg"}},"version":[1,0,0]}

Binary file not shown.

View File

@ -0,0 +1 @@
{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"4","size_leaf_vector":"0"},"tree_info":[0,0,0,0],"trees":[{"base_weights":[-5.37645816802978516e-01,-4.36891138553619385e-01,-6.70873284339904785e-01,-1.25496864318847656e+00,-4.07270163297653198e-01,-6.88224375247955322e-01,4.64901357889175415e-01],"default_left":[false,false,false,false,false,false,false],"id":0,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[6.49523925781250000e+00,6.53602600097656250e+00,4.57461547851562500e+00,2.30323791503906250e-01,6.39891815185546875e+00,4.40366363525390625e+00,2.28362298011779785e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.89942225813865662e-01,-1.81951093673706055e+00,2.12066125869750977e+00,-1.88245311379432678e-01,-6.10905252397060394e-02,-1.03233657777309418e-01,6.97352066636085510e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,2.89816162109375000e+02,2.14897293090820312e+02,8.68150043487548828e+00,2.81134674072265625e+02,2.12051849365234375e+02,2.84543561935424805e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-5.37645816802978516e-01,-4.36891138553619385e-01,-6.70873284339904785e-01,-1.25496864318847656e+00,-4.07270163297653198e-01,-6.88224375247955322e-01,4.64901357889175415e-01],"default_left":[false,false,false,false,false,false,false],"id":1,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[6.49523925781250000e+00,6.53602600097656250e+00,4.57461547851562500e+00,2.30323791503906250e-01,6.39891815185546875e+00,4.40366363525390625e+00,2.28362298011779785e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.89942225813865662e-01,-1.81951093673706055e+00,2.12066125869750977e+00,-1.88245311379432678e-01,-6.10905252397060394e-02,-1.03233657777309418e-01,6.97352066636085510e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,2.89816162109375000e+02,2.14897293090820312e+02,8.68150043487548828e+00,2.81134674072265625e+02,2.12051849365234375e+02,2.84543561935424805e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.77470612525939941e-01,3.31088960170745850e-01,-3.92237067222595215e-01,8.17872881889343262e-01,1.18046358227729797e-01,-3.00728023052215576e-01,-4.70518797636032104e-01],"default_left":[false,false,false,false,false,false,false],"id":2,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[5.42109680175781250e+00,1.03034389019012451e+00,3.41049194335937500e+00,0.00000000000000000e+00,1.19803142547607422e+00,4.23731803894042969e+00,4.69757843017578125e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.07929229736328125e+00,-5.09094715118408203e-01,-8.72411578893661499e-02,1.22680939733982086e-01,1.77069548517465591e-02,-4.51092049479484558e-02,-7.05778226256370544e-02],"split_indices":[3,0,3,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+02,2.13924217224121094e+00,7.72699451446533203e+00,2.30380615234375000e+02,2.64466613769531250e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.77470612525939941e-01,3.31088960170745850e-01,-3.92237067222595215e-01,8.17872881889343262e-01,1.18046358227729797e-01,-3.00728023052215576e-01,-4.70518797636032104e-01],"default_left":[false,false,false,false,false,false,false],"id":3,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[5.42109680175781250e+00,1.03034389019012451e+00,3.41049194335937500e+00,0.00000000000000000e+00,1.19803142547607422e+00,4.23731803894042969e+00,4.69757843017578125e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.07929229736328125e+00,-5.09094715118408203e-01,-8.72411578893661499e-02,1.22680939733982086e-01,1.77069548517465591e-02,-4.51092049479484558e-02,-7.05778226256370544e-02],"split_indices":[3,0,3,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+02,2.13924217224121094e+00,7.72699451446533203e+00,2.30380615234375000e+02,2.64466613769531250e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"4"},"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,0,0]}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"learner":{"attributes":{"scikit_learn":"{\"n_estimators\": 2, \"objective\": \"rank:ndcg\", \"max_depth\": 2, \"learning_rate\": null, \"verbosity\": null, \"booster\": null, \"tree_method\": \"hist\", \"gamma\": null, \"min_child_weight\": null, \"max_delta_step\": null, \"subsample\": null, \"colsample_bytree\": null, \"colsample_bylevel\": null, \"colsample_bynode\": null, \"reg_alpha\": null, \"reg_lambda\": null, \"scale_pos_weight\": null, \"base_score\": null, \"missing\": NaN, \"num_parallel_tree\": 2, \"kwargs\": {}, \"random_state\": null, \"n_jobs\": null, \"monotone_constraints\": null, \"interaction_constraints\": null, \"importance_type\": \"gain\", \"gpu_id\": null, \"type\": \"XGBRanker\"}"},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"4","size_leaf_vector":"0"},"tree_info":[0,0,0,0],"trees":[{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":0,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":1,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":2,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":3,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"4"},"objective":{"lambda_rank_param":{"fix_list_weight":"0","num_pairsample":"1"},"name":"rank:ndcg"}},"version":[1,0,0]}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,47 @@
import xgboost
import os
import generate_models as gm
def test_model_compability():
path = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(path, 'models')
models = [
os.path.join(root, f) for root, subdir, files in os.walk(path)
for f in files
if f != 'version'
]
assert len(models) == 12
for path in models:
name = os.path.basename(path)
if name.startswith('xgboost-'):
booster = xgboost.Booster(model_file=path)
if name.find('cls') != -1:
assert (len(booster.get_dump()) ==
gm.kForests * gm.kRounds * gm.kClasses)
else:
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
elif name.startswith('xgboost_scikit'):
if name.find('reg') != -1:
reg = xgboost.XGBRegressor()
reg.load_model(path)
assert (len(reg.get_booster().get_dump()) ==
gm.kRounds * gm.kForests)
elif name.find('cls') != -1:
cls = xgboost.XGBClassifier()
cls.load_model(path)
assert len(cls.classes_) == gm.kClasses
assert len(cls._le.classes_) == gm.kClasses
assert cls.n_classes_ == gm.kClasses
assert (len(cls.get_booster().get_dump()) ==
gm.kRounds * gm.kForests * gm.kClasses), path
elif name.find('ltr') != -1:
ltr = xgboost.XGBRanker()
ltr.load_model(path)
assert (len(ltr.get_booster().get_dump()) ==
gm.kRounds * gm.kForests)
else:
assert False
else:
assert False

View File

@ -115,7 +115,6 @@ class TestRanking(unittest.TestCase):
# model training parameters
cls.params = {'objective': 'rank:pairwise',
'booster': 'gbtree',
'silent': 0,
'eval_metric': ['ndcg']
}
@ -143,7 +142,7 @@ class TestRanking(unittest.TestCase):
Test cross-validation with a group specified
"""
cv = xgboost.cv(self.params, self.dtrain, num_boost_round=2500,
early_stopping_rounds=10, nfold=10, as_pandas=False)
early_stopping_rounds=10, nfold=10, as_pandas=False)
assert isinstance(cv, dict)
self.assertSetEqual(set(cv.keys()), {'test-ndcg-mean', 'train-ndcg-mean', 'test-ndcg-std', 'train-ndcg-std'},
"CV results dict key mismatch")
@ -153,7 +152,8 @@ class TestRanking(unittest.TestCase):
Test cross-validation with a group specified
"""
cv = xgboost.cv(self.params, self.dtrain, num_boost_round=2500,
early_stopping_rounds=10, shuffle=False, nfold=10, as_pandas=False)
early_stopping_rounds=10, shuffle=False, nfold=10,
as_pandas=False)
assert isinstance(cv, dict)
assert len(cv) == 4