diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py index d542c55b7..a663e672a 100644 --- a/demo/guide-python/predict_first_ntree.py +++ b/demo/guide-python/predict_first_ntree.py @@ -1,21 +1,54 @@ import os import numpy as np import xgboost as xgb +from sklearn.datasets import load_svmlight_file -# load data in do training CURRENT_DIR = os.path.dirname(__file__) -dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) -dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) -param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} -watchlist = [(dtest, 'eval'), (dtrain, 'train')] -num_round = 3 -bst = xgb.train(param, dtrain, num_round, watchlist) +train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train") +test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test") -print('start testing prediction from first n trees') -# predict using first 1 tree -label = dtest.get_label() -ypred1 = bst.predict(dtest, ntree_limit=1) -# by default, we predict using all the trees -ypred2 = bst.predict(dtest) -print('error of ypred1=%f' % (np.sum((ypred1 > 0.5) != label) / float(len(label)))) -print('error of ypred2=%f' % (np.sum((ypred2 > 0.5) != label) / float(len(label)))) + +def native_interface(): + # load data in do training + dtrain = xgb.DMatrix(train) + dtest = xgb.DMatrix(test) + param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} + watchlist = [(dtest, "eval"), (dtrain, "train")] + num_round = 3 + bst = xgb.train(param, dtrain, num_round, watchlist) + + print("start testing prediction from first n trees") + # predict using first 1 tree + label = dtest.get_label() + ypred1 = bst.predict(dtest, iteration_range=(0, 1)) + # by default, we predict using all the trees + ypred2 = bst.predict(dtest) + + print("error of ypred1=%f" % (np.sum((ypred1 > 0.5) != label) / float(len(label)))) + print("error of ypred2=%f" % (np.sum((ypred2 > 0.5) != label) / float(len(label)))) + + +def sklearn_interface(): + X_train, y_train = load_svmlight_file(train) + X_test, y_test = load_svmlight_file(test) + clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1, use_label_encoder=False) + clf.fit(X_train, y_train, eval_set=[(X_test, y_test)]) + assert clf.n_classes_ == 2 + + print("start testing prediction from first n trees") + # predict using first 1 tree + ypred1 = clf.predict(X_test, iteration_range=(0, 1)) + # by default, we predict using all the trees + ypred2 = clf.predict(X_test) + + print( + "error of ypred1=%f" % (np.sum((ypred1 > 0.5) != y_test) / float(len(y_test))) + ) + print( + "error of ypred2=%f" % (np.sum((ypred2 > 0.5) != y_test) / float(len(y_test))) + ) + + +if __name__ == "__main__": + native_interface() + sklearn_interface()