xgboost/tests/python/test_parse_tree.py
Philip Hyunsu Cho 9c9070aea2
Use pytest conventions consistently (#6337)
* Do not derive from unittest.TestCase (not needed for pytest)

* assertRaises -> pytest.raises

* Simplify test_empty_dmatrix with test parametrization

* setUpClass -> setup_class, tearDownClass -> teardown_class

* Don't import unittest; import pytest

* Use plain assert

* Use parametrized tests in more places

* Fix test_gpu_with_sklearn.py

* Put back run_empty_dmatrix_reg / run_empty_dmatrix_cls

* Fix test_eta_decay_gpu_hist

* Add parametrized tests for monotone constraints

* Fix test names

* Remove test parametrization

* Revise test_slice to be not flaky
2020-11-19 17:00:15 -08:00

51 lines
1.8 KiB
Python

import xgboost as xgb
import numpy as np
import pytest
import testing as tm
pytestmark = pytest.mark.skipif(**tm.no_pandas())
dpath = 'demo/data/'
rng = np.random.RandomState(1994)
class TestTreesToDataFrame:
def build_model(self, max_depth, num_round):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
param = {'max_depth': max_depth, 'objective': 'binary:logistic',
'verbosity': 1}
num_round = num_round
bst = xgb.train(param, dtrain, num_round)
return bst
def parse_dumped_model(self, booster, item_to_get, splitter):
item_to_get += '='
txt_dump = booster.get_dump(with_stats=True)
tree_list = [tree.split('/n') for tree in txt_dump]
split_trees = [tree[0].split(item_to_get)[1:] for tree in tree_list]
res = sum([float(line.split(splitter)[0])
for tree in split_trees for line in tree])
return res
def test_trees_to_dataframe(self):
bst = self.build_model(max_depth=5, num_round=10)
gain_from_dump = self.parse_dumped_model(booster=bst,
item_to_get='gain',
splitter=',')
cover_from_dump = self.parse_dumped_model(booster=bst,
item_to_get='cover',
splitter='\n')
# method being tested
df = bst.trees_to_dataframe()
# test for equality of gains
gain_from_df = df[df.Feature != 'Leaf'][['Gain']].sum()
assert np.allclose(gain_from_dump, gain_from_df)
# test for equality of covers
cover_from_df = df.Cover.sum()
assert np.allclose(cover_from_dump, cover_from_df)