Update demo for prediction. (#6789)

* Remove use of deprecated ntree_limit. * Add sklearn demo.
2021-03-27 03:09:25 +08:00
parent 8825670c9c
commit 5c87c2bba8
1 changed files with 48 additions and 15 deletions
--- a/demo/guide-python/predict_first_ntree.py
+++ b/demo/guide-python/predict_first_ntree.py
@@ -1,21 +1,54 @@
 import os
 import numpy as np
 import xgboost as xgb
 from sklearn.datasets import load_svmlight_file
 # load data in do training
 CURRENT_DIR = os.path.dirname(__file__)
-dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
+train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train")
-dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
+test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test")
 param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
 watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 num_round = 3
 bst = xgb.train(param, dtrain, num_round, watchlist)
-print('start testing prediction from first n trees')
+
-# predict using first 1 tree
+def native_interface():
-label = dtest.get_label()
+    # load data in do training
-ypred1 = bst.predict(dtest, ntree_limit=1)
+    dtrain = xgb.DMatrix(train)
-# by default, we predict using all the trees
+    dtest = xgb.DMatrix(test)
-ypred2 = bst.predict(dtest)
+    param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
-print('error of ypred1=%f' % (np.sum((ypred1 > 0.5) != label) / float(len(label))))
+    watchlist = [(dtest, "eval"), (dtrain, "train")]
-print('error of ypred2=%f' % (np.sum((ypred2 > 0.5) != label) / float(len(label))))
+    num_round = 3
    bst = xgb.train(param, dtrain, num_round, watchlist)
    print("start testing prediction from first n trees")
    # predict using first 1 tree
    label = dtest.get_label()
    ypred1 = bst.predict(dtest, iteration_range=(0, 1))
    # by default, we predict using all the trees
    ypred2 = bst.predict(dtest)
    print("error of ypred1=%f" % (np.sum((ypred1 > 0.5) != label) / float(len(label))))
    print("error of ypred2=%f" % (np.sum((ypred2 > 0.5) != label) / float(len(label))))
 def sklearn_interface():
    X_train, y_train = load_svmlight_file(train)
    X_test, y_test = load_svmlight_file(test)
    clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1, use_label_encoder=False)
    clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
    assert clf.n_classes_ == 2
    print("start testing prediction from first n trees")
    # predict using first 1 tree
    ypred1 = clf.predict(X_test, iteration_range=(0, 1))
    # by default, we predict using all the trees
    ypred2 = clf.predict(X_test)
    print(
        "error of ypred1=%f" % (np.sum((ypred1 > 0.5) != y_test) / float(len(y_test)))
    )
    print(
        "error of ypred2=%f" % (np.sum((ypred2 > 0.5) != y_test) / float(len(y_test)))
    )
 if __name__ == "__main__":
    native_interface()
    sklearn_interface()