Update demo for prediction. (#6789)

* Remove use of deprecated ntree_limit.
* Add sklearn demo.
This commit is contained in:
Jiaming Yuan 2021-03-27 03:09:25 +08:00 committed by GitHub
parent 8825670c9c
commit 5c87c2bba8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,21 +1,54 @@
import os import os
import numpy as np import numpy as np
import xgboost as xgb import xgboost as xgb
from sklearn.datasets import load_svmlight_file
# load data in do training
CURRENT_DIR = os.path.dirname(__file__) CURRENT_DIR = os.path.dirname(__file__)
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train")
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test")
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 3
bst = xgb.train(param, dtrain, num_round, watchlist)
print('start testing prediction from first n trees')
# predict using first 1 tree def native_interface():
label = dtest.get_label() # load data in do training
ypred1 = bst.predict(dtest, ntree_limit=1) dtrain = xgb.DMatrix(train)
# by default, we predict using all the trees dtest = xgb.DMatrix(test)
ypred2 = bst.predict(dtest) param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
print('error of ypred1=%f' % (np.sum((ypred1 > 0.5) != label) / float(len(label)))) watchlist = [(dtest, "eval"), (dtrain, "train")]
print('error of ypred2=%f' % (np.sum((ypred2 > 0.5) != label) / float(len(label)))) num_round = 3
bst = xgb.train(param, dtrain, num_round, watchlist)
print("start testing prediction from first n trees")
# predict using first 1 tree
label = dtest.get_label()
ypred1 = bst.predict(dtest, iteration_range=(0, 1))
# by default, we predict using all the trees
ypred2 = bst.predict(dtest)
print("error of ypred1=%f" % (np.sum((ypred1 > 0.5) != label) / float(len(label))))
print("error of ypred2=%f" % (np.sum((ypred2 > 0.5) != label) / float(len(label))))
def sklearn_interface():
X_train, y_train = load_svmlight_file(train)
X_test, y_test = load_svmlight_file(test)
clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1, use_label_encoder=False)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
assert clf.n_classes_ == 2
print("start testing prediction from first n trees")
# predict using first 1 tree
ypred1 = clf.predict(X_test, iteration_range=(0, 1))
# by default, we predict using all the trees
ypred2 = clf.predict(X_test)
print(
"error of ypred1=%f" % (np.sum((ypred1 > 0.5) != y_test) / float(len(y_test)))
)
print(
"error of ypred2=%f" % (np.sum((ypred2 > 0.5) != y_test) / float(len(y_test)))
)
if __name__ == "__main__":
native_interface()
sklearn_interface()