Previously, we use `libsvm` as default when format is not specified. However, the dmlc data parser is not particularly robust against errors, and the most common type of error is undefined format. Along with which, we will recommend users to use other data loader instead. We will continue the maintenance of the parsers as it's currently used for many internal tests including federated learning.
36 lines
1.1 KiB
Python
36 lines
1.1 KiB
Python
"""
|
|
Demo for boosting from prediction
|
|
=================================
|
|
"""
|
|
import os
|
|
|
|
import xgboost as xgb
|
|
|
|
CURRENT_DIR = os.path.dirname(__file__)
|
|
dtrain = xgb.DMatrix(
|
|
os.path.join(CURRENT_DIR, "../data/agaricus.txt.train?format=libsvm")
|
|
)
|
|
dtest = xgb.DMatrix(
|
|
os.path.join(CURRENT_DIR, "../data/agaricus.txt.test?format=libsvm")
|
|
)
|
|
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
|
###
|
|
# advanced: start from a initial base prediction
|
|
#
|
|
print("start running example to start from a initial prediction")
|
|
# specify parameters via map, definition are same as c++ version
|
|
param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
|
|
# train xgboost for 1 round
|
|
bst = xgb.train(param, dtrain, 1, watchlist)
|
|
# Note: we need the margin value instead of transformed prediction in
|
|
# set_base_margin
|
|
# do predict with output_margin=True, will always give you margin values
|
|
# before logistic transformation
|
|
ptrain = bst.predict(dtrain, output_margin=True)
|
|
ptest = bst.predict(dtest, output_margin=True)
|
|
dtrain.set_base_margin(ptrain)
|
|
dtest.set_base_margin(ptest)
|
|
|
|
print("this is result of running from initial prediction")
|
|
bst = xgb.train(param, dtrain, 1, watchlist)
|