Update Python demos with tests. (#5651)
* Remove GPU memory usage demo. * Add tests for demos. * Remove `silent`. * Remove shebang as it's not portable.
This commit is contained in:
1
demo/guide-python/basic_walkthrough.py
Executable file → Normal file
1
demo/guide-python/basic_walkthrough.py
Executable file → Normal file
@@ -1,4 +1,3 @@
|
||||
#!/usr/bin/env python
|
||||
import numpy as np
|
||||
import scipy.sparse
|
||||
import pickle
|
||||
|
||||
10
demo/guide-python/boost_from_prediction.py
Executable file → Normal file
10
demo/guide-python/boost_from_prediction.py
Executable file → Normal file
@@ -1,15 +1,17 @@
|
||||
#!/usr/bin/python
|
||||
import os
|
||||
import xgboost as xgb
|
||||
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
||||
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
###
|
||||
# advanced: start from a initial base prediction
|
||||
#
|
||||
print('start running example to start from a initial prediction')
|
||||
# specify parameters via map, definition are same as c++ version
|
||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
|
||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||
# train xgboost for 1 round
|
||||
bst = xgb.train(param, dtrain, 1, watchlist)
|
||||
# Note: we need the margin value instead of transformed prediction in
|
||||
|
||||
11
demo/guide-python/cross_validation.py
Executable file → Normal file
11
demo/guide-python/cross_validation.py
Executable file → Normal file
@@ -1,10 +1,11 @@
|
||||
#!/usr/bin/python
|
||||
import os
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
|
||||
### load data in do training
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
||||
# load data in do training
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||
param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'}
|
||||
num_round = 2
|
||||
|
||||
print('running cross validation')
|
||||
@@ -56,7 +57,7 @@ def evalerror(preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1}
|
||||
param = {'max_depth':2, 'eta':1}
|
||||
# train with customized objective
|
||||
xgb.cv(param, dtrain, num_round, nfold=5, seed=0,
|
||||
obj=logregobj, feval=evalerror)
|
||||
|
||||
9
demo/guide-python/custom_objective.py
Executable file → Normal file
9
demo/guide-python/custom_objective.py
Executable file → Normal file
@@ -1,4 +1,4 @@
|
||||
#!/usr/bin/python
|
||||
import os
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
###
|
||||
@@ -6,13 +6,14 @@ import xgboost as xgb
|
||||
#
|
||||
print('start running example to used customized objective function')
|
||||
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||
|
||||
# note: for customized objective function, we leave objective as default
|
||||
# note: what we are getting is margin value in prediction
|
||||
# you must know what you are doing
|
||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1}
|
||||
param = {'max_depth': 2, 'eta': 1}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 2
|
||||
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
##
|
||||
# This script demonstrate how to access the eval metrics in xgboost
|
||||
##
|
||||
|
||||
import os
|
||||
import xgboost as xgb
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train', silent=True)
|
||||
dtest = xgb.DMatrix('../data/agaricus.txt.test', silent=True)
|
||||
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||
|
||||
param = [('max_depth', 2), ('objective', 'binary:logistic'), ('eval_metric', 'logloss'), ('eval_metric', 'error')]
|
||||
|
||||
|
||||
num_round = 2
|
||||
watchlist = [(dtest,'eval'), (dtrain,'train')]
|
||||
|
||||
|
||||
13
demo/guide-python/external_memory.py
Executable file → Normal file
13
demo/guide-python/external_memory.py
Executable file → Normal file
@@ -1,6 +1,4 @@
|
||||
#!/usr/bin/python
|
||||
import numpy as np
|
||||
import scipy.sparse
|
||||
import os
|
||||
import xgboost as xgb
|
||||
|
||||
### simple example for using external memory version
|
||||
@@ -8,11 +6,12 @@ import xgboost as xgb
|
||||
# this is the only difference, add a # followed by a cache prefix name
|
||||
# several cache file with the prefix will be generated
|
||||
# currently only support convert from libsvm file
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train#dtrain.cache')
|
||||
dtest = xgb.DMatrix('../data/agaricus.txt.test#dtest.cache')
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||
|
||||
# specify validations set to watch performance
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
||||
param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'}
|
||||
|
||||
# performance notice: set nthread to be the number of your real cpu
|
||||
# some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4
|
||||
@@ -21,5 +20,3 @@ param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 2
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||
|
||||
|
||||
|
||||
3
demo/guide-python/gamma_regression.py
Executable file → Normal file
3
demo/guide-python/gamma_regression.py
Executable file → Normal file
@@ -1,4 +1,3 @@
|
||||
#!/usr/bin/python
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
|
||||
@@ -12,7 +11,7 @@ dtest = xgb.DMatrix(data[4741:6773, 0:34], data[4741:6773, 34])
|
||||
|
||||
# for gamma regression, we need to set the objective to 'reg:gamma', it also suggests
|
||||
# to set the base_score to a value between 1 to 5 if the number of iteration is small
|
||||
param = {'silent':1, 'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3}
|
||||
param = {'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3}
|
||||
|
||||
# the rest of settings are the same
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
|
||||
9
demo/guide-python/generalized_linear_model.py
Executable file → Normal file
9
demo/guide-python/generalized_linear_model.py
Executable file → Normal file
@@ -1,16 +1,17 @@
|
||||
#!/usr/bin/python
|
||||
import os
|
||||
import xgboost as xgb
|
||||
##
|
||||
# this script demonstrate how to fit generalized linear model in xgboost
|
||||
# basically, we are using linear model, instead of tree for our boosters
|
||||
##
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||
# change booster to gblinear, so that we are fitting a linear model
|
||||
# alpha is the L1 regularizer
|
||||
# lambda is the L2 regularizer
|
||||
# you can also set lambda_bias which is L2 regularizer on the bias term
|
||||
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
|
||||
param = {'objective':'binary:logistic', 'booster':'gblinear',
|
||||
'alpha': 0.0001, 'lambda': 1}
|
||||
|
||||
# normally, you do not need to set eta (step_size)
|
||||
|
||||
13
demo/guide-python/predict_first_ntree.py
Executable file → Normal file
13
demo/guide-python/predict_first_ntree.py
Executable file → Normal file
@@ -1,17 +1,18 @@
|
||||
#!/usr/bin/python
|
||||
import os
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
|
||||
### load data in do training
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
||||
# load data in do training
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 3
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||
|
||||
print('start testing prediction from first n trees')
|
||||
### predict using first 1 tree
|
||||
# predict using first 1 tree
|
||||
label = dtest.get_label()
|
||||
ypred1 = bst.predict(dtest, ntree_limit=1)
|
||||
# by default, we predict using all the trees
|
||||
|
||||
17
demo/guide-python/predict_leaf_indices.py
Executable file → Normal file
17
demo/guide-python/predict_leaf_indices.py
Executable file → Normal file
@@ -1,19 +1,20 @@
|
||||
#!/usr/bin/python
|
||||
import os
|
||||
import xgboost as xgb
|
||||
|
||||
### load data in do training
|
||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
||||
# load data in do training
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 3
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||
|
||||
print ('start testing predict the leaf indices')
|
||||
### predict using first 2 tree
|
||||
print('start testing predict the leaf indices')
|
||||
# predict using first 2 tree
|
||||
leafindex = bst.predict(dtest, ntree_limit=2, pred_leaf=True)
|
||||
print(leafindex.shape)
|
||||
print(leafindex)
|
||||
### predict all trees
|
||||
# predict all trees
|
||||
leafindex = bst.predict(dtest, pred_leaf=True)
|
||||
print(leafindex.shape)
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
#!/bin/bash
|
||||
export PYTHONPATH=$PYTHONPATH:../../python-package
|
||||
python basic_walkthrough.py
|
||||
python custom_objective.py
|
||||
python boost_from_prediction.py
|
||||
python predict_first_ntree.py
|
||||
python generalized_linear_model.py
|
||||
python cross_validation.py
|
||||
python predict_leaf_indices.py
|
||||
python sklearn_examples.py
|
||||
python sklearn_parallel.py
|
||||
python external_memory.py
|
||||
rm -rf *~ *.model *.buffer
|
||||
@@ -20,7 +20,7 @@ clf = xgb.XGBModel(**param_dist)
|
||||
# Or you can use: clf = xgb.XGBClassifier(**param_dist)
|
||||
|
||||
clf.fit(X_train, y_train,
|
||||
eval_set=[(X_train, y_train), (X_test, y_test)],
|
||||
eval_set=[(X_train, y_train), (X_test, y_test)],
|
||||
eval_metric='logloss',
|
||||
verbose=True)
|
||||
|
||||
@@ -37,7 +37,7 @@ for e_name, e_mtrs in evals_result.items():
|
||||
for e_mtr_name, e_mtr_vals in e_mtrs.items():
|
||||
print(' - {}'.format(e_mtr_name))
|
||||
print(' - {}'.format(e_mtr_vals))
|
||||
|
||||
|
||||
print('')
|
||||
print('Access complete dict:')
|
||||
print(evals_result)
|
||||
|
||||
8
demo/guide-python/sklearn_examples.py
Executable file → Normal file
8
demo/guide-python/sklearn_examples.py
Executable file → Normal file
@@ -1,4 +1,3 @@
|
||||
#!/usr/bin/python
|
||||
'''
|
||||
Created on 1 Apr 2015
|
||||
|
||||
@@ -52,9 +51,9 @@ y = boston['target']
|
||||
X = boston['data']
|
||||
xgb_model = xgb.XGBRegressor()
|
||||
clf = GridSearchCV(xgb_model,
|
||||
{'max_depth': [2,4,6],
|
||||
'n_estimators': [50,100,200]}, verbose=1)
|
||||
clf.fit(X,y)
|
||||
{'max_depth': [2, 4, 6],
|
||||
'n_estimators': [50, 100, 200]}, verbose=1)
|
||||
clf.fit(X, y)
|
||||
print(clf.best_score_)
|
||||
print(clf.best_params_)
|
||||
|
||||
@@ -73,4 +72,3 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf = xgb.XGBClassifier()
|
||||
clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
||||
eval_set=[(X_test, y_test)])
|
||||
|
||||
|
||||
@@ -1,29 +1,11 @@
|
||||
import os
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.datasets import load_boston
|
||||
import xgboost as xgb
|
||||
|
||||
if __name__ == "__main__":
|
||||
# NOTE: on posix systems, this *has* to be here and in the
|
||||
# `__name__ == "__main__"` clause to run XGBoost in parallel processes
|
||||
# using fork, if XGBoost was built with OpenMP support. Otherwise, if you
|
||||
# build XGBoost without OpenMP support, you can use fork, which is the
|
||||
# default backend for joblib, and omit this.
|
||||
try:
|
||||
from multiprocessing import set_start_method
|
||||
except ImportError:
|
||||
raise ImportError("Unable to import multiprocessing.set_start_method."
|
||||
" This example only runs on Python 3.4")
|
||||
set_start_method("forkserver")
|
||||
|
||||
import numpy as np
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.datasets import load_boston
|
||||
import xgboost as xgb
|
||||
|
||||
rng = np.random.RandomState(31337)
|
||||
|
||||
print("Parallel Parameter optimization")
|
||||
boston = load_boston()
|
||||
|
||||
os.environ["OMP_NUM_THREADS"] = "2" # or to whatever you want
|
||||
y = boston['target']
|
||||
X = boston['data']
|
||||
xgb_model = xgb.XGBRegressor()
|
||||
|
||||
Reference in New Issue
Block a user