Update Python demos with tests. (#5651)
* Remove GPU memory usage demo. * Add tests for demos. * Remove `silent`. * Remove shebang as it's not portable.
This commit is contained in:
parent
4e64e2ef8e
commit
2c1a439869
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Demo for survival analysis (regression) using Accelerated Failure Time (AFT) model
|
Demo for survival analysis (regression) using Accelerated Failure Time (AFT) model
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
from sklearn.model_selection import ShuffleSplit
|
from sklearn.model_selection import ShuffleSplit
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -8,7 +9,8 @@ import xgboost as xgb
|
|||||||
|
|
||||||
# The Veterans' Administration Lung Cancer Trial
|
# The Veterans' Administration Lung Cancer Trial
|
||||||
# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
|
# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
|
||||||
df = pd.read_csv('../data/veterans_lung_cancer.csv')
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
|
df = pd.read_csv(os.path.join(CURRENT_DIR, '../data/veterans_lung_cancer.csv'))
|
||||||
print('Training data:')
|
print('Training data:')
|
||||||
print(df)
|
print(df)
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,3 @@
|
|||||||
# GPU Acceleration Demo
|
# GPU Acceleration Demo
|
||||||
|
|
||||||
`cover_type.py` shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms.
|
`cover_type.py` shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms.
|
||||||
|
|
||||||
`memory.py` shows how to repeatedly train xgboost models while freeing memory between iterations.
|
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import numpy as np
|
|
||||||
from sklearn.datasets import fetch_covtype
|
from sklearn.datasets import fetch_covtype
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
import time
|
import time
|
||||||
|
|||||||
@ -1,51 +0,0 @@
|
|||||||
import xgboost as xgb
|
|
||||||
import numpy as np
|
|
||||||
import time
|
|
||||||
import pickle
|
|
||||||
import GPUtil
|
|
||||||
|
|
||||||
n = 10000
|
|
||||||
m = 1000
|
|
||||||
X = np.random.random((n, m))
|
|
||||||
y = np.random.random(n)
|
|
||||||
|
|
||||||
param = {'objective': 'binary:logistic',
|
|
||||||
'tree_method': 'gpu_hist'
|
|
||||||
}
|
|
||||||
iterations = 5
|
|
||||||
dtrain = xgb.DMatrix(X, label=y)
|
|
||||||
|
|
||||||
# High memory usage
|
|
||||||
# active bst objects with device memory persist across iterations
|
|
||||||
boosters = []
|
|
||||||
for i in range(iterations):
|
|
||||||
bst = xgb.train(param, dtrain)
|
|
||||||
boosters.append(bst)
|
|
||||||
|
|
||||||
print("Example 1")
|
|
||||||
GPUtil.showUtilization()
|
|
||||||
del boosters
|
|
||||||
|
|
||||||
# Better memory usage
|
|
||||||
# The bst object can be destroyed by the python gc, freeing device memory
|
|
||||||
# The gc may not immediately free the object, so more than one booster can be allocated at a time
|
|
||||||
boosters = []
|
|
||||||
for i in range(iterations):
|
|
||||||
bst = xgb.train(param, dtrain)
|
|
||||||
boosters.append(pickle.dumps(bst))
|
|
||||||
|
|
||||||
print("Example 2")
|
|
||||||
GPUtil.showUtilization()
|
|
||||||
del boosters
|
|
||||||
|
|
||||||
# Best memory usage
|
|
||||||
# The gc explicitly frees the booster before starting the next iteration
|
|
||||||
boosters = []
|
|
||||||
for i in range(iterations):
|
|
||||||
bst = xgb.train(param, dtrain)
|
|
||||||
boosters.append(pickle.dumps(bst))
|
|
||||||
del bst
|
|
||||||
|
|
||||||
print("Example 3")
|
|
||||||
GPUtil.showUtilization()
|
|
||||||
del boosters
|
|
||||||
1
demo/guide-python/basic_walkthrough.py
Executable file → Normal file
1
demo/guide-python/basic_walkthrough.py
Executable file → Normal file
@ -1,4 +1,3 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.sparse
|
import scipy.sparse
|
||||||
import pickle
|
import pickle
|
||||||
|
|||||||
10
demo/guide-python/boost_from_prediction.py
Executable file → Normal file
10
demo/guide-python/boost_from_prediction.py
Executable file → Normal file
@ -1,15 +1,17 @@
|
|||||||
#!/usr/bin/python
|
import os
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
|
||||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||||
|
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
###
|
###
|
||||||
# advanced: start from a initial base prediction
|
# advanced: start from a initial base prediction
|
||||||
#
|
#
|
||||||
print('start running example to start from a initial prediction')
|
print('start running example to start from a initial prediction')
|
||||||
# specify parameters via map, definition are same as c++ version
|
# specify parameters via map, definition are same as c++ version
|
||||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
|
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||||
# train xgboost for 1 round
|
# train xgboost for 1 round
|
||||||
bst = xgb.train(param, dtrain, 1, watchlist)
|
bst = xgb.train(param, dtrain, 1, watchlist)
|
||||||
# Note: we need the margin value instead of transformed prediction in
|
# Note: we need the margin value instead of transformed prediction in
|
||||||
|
|||||||
11
demo/guide-python/cross_validation.py
Executable file → Normal file
11
demo/guide-python/cross_validation.py
Executable file → Normal file
@ -1,10 +1,11 @@
|
|||||||
#!/usr/bin/python
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
# load data in do training
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||||
|
param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'}
|
||||||
num_round = 2
|
num_round = 2
|
||||||
|
|
||||||
print('running cross validation')
|
print('running cross validation')
|
||||||
@ -56,7 +57,7 @@ def evalerror(preds, dtrain):
|
|||||||
labels = dtrain.get_label()
|
labels = dtrain.get_label()
|
||||||
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||||
|
|
||||||
param = {'max_depth':2, 'eta':1, 'silent':1}
|
param = {'max_depth':2, 'eta':1}
|
||||||
# train with customized objective
|
# train with customized objective
|
||||||
xgb.cv(param, dtrain, num_round, nfold=5, seed=0,
|
xgb.cv(param, dtrain, num_round, nfold=5, seed=0,
|
||||||
obj=logregobj, feval=evalerror)
|
obj=logregobj, feval=evalerror)
|
||||||
|
|||||||
9
demo/guide-python/custom_objective.py
Executable file → Normal file
9
demo/guide-python/custom_objective.py
Executable file → Normal file
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
###
|
###
|
||||||
@ -6,13 +6,14 @@ import xgboost as xgb
|
|||||||
#
|
#
|
||||||
print('start running example to used customized objective function')
|
print('start running example to used customized objective function')
|
||||||
|
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||||
|
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||||
|
|
||||||
# note: for customized objective function, we leave objective as default
|
# note: for customized objective function, we leave objective as default
|
||||||
# note: what we are getting is margin value in prediction
|
# note: what we are getting is margin value in prediction
|
||||||
# you must know what you are doing
|
# you must know what you are doing
|
||||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1}
|
param = {'max_depth': 2, 'eta': 1}
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
num_round = 2
|
num_round = 2
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
##
|
##
|
||||||
# This script demonstrate how to access the eval metrics in xgboost
|
# This script demonstrate how to access the eval metrics in xgboost
|
||||||
##
|
##
|
||||||
|
import os
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train', silent=True)
|
|
||||||
dtest = xgb.DMatrix('../data/agaricus.txt.test', silent=True)
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||||
|
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||||
|
|
||||||
param = [('max_depth', 2), ('objective', 'binary:logistic'), ('eval_metric', 'logloss'), ('eval_metric', 'error')]
|
param = [('max_depth', 2), ('objective', 'binary:logistic'), ('eval_metric', 'logloss'), ('eval_metric', 'error')]
|
||||||
|
|
||||||
|
|||||||
13
demo/guide-python/external_memory.py
Executable file → Normal file
13
demo/guide-python/external_memory.py
Executable file → Normal file
@ -1,6 +1,4 @@
|
|||||||
#!/usr/bin/python
|
import os
|
||||||
import numpy as np
|
|
||||||
import scipy.sparse
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### simple example for using external memory version
|
### simple example for using external memory version
|
||||||
@ -8,11 +6,12 @@ import xgboost as xgb
|
|||||||
# this is the only difference, add a # followed by a cache prefix name
|
# this is the only difference, add a # followed by a cache prefix name
|
||||||
# several cache file with the prefix will be generated
|
# several cache file with the prefix will be generated
|
||||||
# currently only support convert from libsvm file
|
# currently only support convert from libsvm file
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train#dtrain.cache')
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
dtest = xgb.DMatrix('../data/agaricus.txt.test#dtest.cache')
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||||
|
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||||
|
|
||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'}
|
||||||
|
|
||||||
# performance notice: set nthread to be the number of your real cpu
|
# performance notice: set nthread to be the number of your real cpu
|
||||||
# some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4
|
# some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4
|
||||||
@ -21,5 +20,3 @@ param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
|||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
num_round = 2
|
num_round = 2
|
||||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
3
demo/guide-python/gamma_regression.py
Executable file → Normal file
3
demo/guide-python/gamma_regression.py
Executable file → Normal file
@ -1,4 +1,3 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -12,7 +11,7 @@ dtest = xgb.DMatrix(data[4741:6773, 0:34], data[4741:6773, 34])
|
|||||||
|
|
||||||
# for gamma regression, we need to set the objective to 'reg:gamma', it also suggests
|
# for gamma regression, we need to set the objective to 'reg:gamma', it also suggests
|
||||||
# to set the base_score to a value between 1 to 5 if the number of iteration is small
|
# to set the base_score to a value between 1 to 5 if the number of iteration is small
|
||||||
param = {'silent':1, 'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3}
|
param = {'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3}
|
||||||
|
|
||||||
# the rest of settings are the same
|
# the rest of settings are the same
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
|
|||||||
9
demo/guide-python/generalized_linear_model.py
Executable file → Normal file
9
demo/guide-python/generalized_linear_model.py
Executable file → Normal file
@ -1,16 +1,17 @@
|
|||||||
#!/usr/bin/python
|
import os
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
##
|
##
|
||||||
# this script demonstrate how to fit generalized linear model in xgboost
|
# this script demonstrate how to fit generalized linear model in xgboost
|
||||||
# basically, we are using linear model, instead of tree for our boosters
|
# basically, we are using linear model, instead of tree for our boosters
|
||||||
##
|
##
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||||
|
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||||
# change booster to gblinear, so that we are fitting a linear model
|
# change booster to gblinear, so that we are fitting a linear model
|
||||||
# alpha is the L1 regularizer
|
# alpha is the L1 regularizer
|
||||||
# lambda is the L2 regularizer
|
# lambda is the L2 regularizer
|
||||||
# you can also set lambda_bias which is L2 regularizer on the bias term
|
# you can also set lambda_bias which is L2 regularizer on the bias term
|
||||||
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
|
param = {'objective':'binary:logistic', 'booster':'gblinear',
|
||||||
'alpha': 0.0001, 'lambda': 1}
|
'alpha': 0.0001, 'lambda': 1}
|
||||||
|
|
||||||
# normally, you do not need to set eta (step_size)
|
# normally, you do not need to set eta (step_size)
|
||||||
|
|||||||
13
demo/guide-python/predict_first_ntree.py
Executable file → Normal file
13
demo/guide-python/predict_first_ntree.py
Executable file → Normal file
@ -1,17 +1,18 @@
|
|||||||
#!/usr/bin/python
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
# load data in do training
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||||
|
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
num_round = 3
|
num_round = 3
|
||||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||||
|
|
||||||
print('start testing prediction from first n trees')
|
print('start testing prediction from first n trees')
|
||||||
### predict using first 1 tree
|
# predict using first 1 tree
|
||||||
label = dtest.get_label()
|
label = dtest.get_label()
|
||||||
ypred1 = bst.predict(dtest, ntree_limit=1)
|
ypred1 = bst.predict(dtest, ntree_limit=1)
|
||||||
# by default, we predict using all the trees
|
# by default, we predict using all the trees
|
||||||
|
|||||||
15
demo/guide-python/predict_leaf_indices.py
Executable file → Normal file
15
demo/guide-python/predict_leaf_indices.py
Executable file → Normal file
@ -1,19 +1,20 @@
|
|||||||
#!/usr/bin/python
|
import os
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
### load data in do training
|
# load data in do training
|
||||||
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
dtest = xgb.DMatrix('../data/agaricus.txt.test')
|
dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train'))
|
||||||
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
|
dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test'))
|
||||||
|
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
num_round = 3
|
num_round = 3
|
||||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||||
|
|
||||||
print('start testing predict the leaf indices')
|
print('start testing predict the leaf indices')
|
||||||
### predict using first 2 tree
|
# predict using first 2 tree
|
||||||
leafindex = bst.predict(dtest, ntree_limit=2, pred_leaf=True)
|
leafindex = bst.predict(dtest, ntree_limit=2, pred_leaf=True)
|
||||||
print(leafindex.shape)
|
print(leafindex.shape)
|
||||||
print(leafindex)
|
print(leafindex)
|
||||||
### predict all trees
|
# predict all trees
|
||||||
leafindex = bst.predict(dtest, pred_leaf=True)
|
leafindex = bst.predict(dtest, pred_leaf=True)
|
||||||
print(leafindex.shape)
|
print(leafindex.shape)
|
||||||
|
|||||||
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
export PYTHONPATH=$PYTHONPATH:../../python-package
|
|
||||||
python basic_walkthrough.py
|
|
||||||
python custom_objective.py
|
|
||||||
python boost_from_prediction.py
|
|
||||||
python predict_first_ntree.py
|
|
||||||
python generalized_linear_model.py
|
|
||||||
python cross_validation.py
|
|
||||||
python predict_leaf_indices.py
|
|
||||||
python sklearn_examples.py
|
|
||||||
python sklearn_parallel.py
|
|
||||||
python external_memory.py
|
|
||||||
rm -rf *~ *.model *.buffer
|
|
||||||
2
demo/guide-python/sklearn_examples.py
Executable file → Normal file
2
demo/guide-python/sklearn_examples.py
Executable file → Normal file
@ -1,4 +1,3 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
'''
|
'''
|
||||||
Created on 1 Apr 2015
|
Created on 1 Apr 2015
|
||||||
|
|
||||||
@ -73,4 +72,3 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
|||||||
clf = xgb.XGBClassifier()
|
clf = xgb.XGBClassifier()
|
||||||
clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
||||||
eval_set=[(X_test, y_test)])
|
eval_set=[(X_test, y_test)])
|
||||||
|
|
||||||
|
|||||||
@ -1,29 +1,11 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# NOTE: on posix systems, this *has* to be here and in the
|
|
||||||
# `__name__ == "__main__"` clause to run XGBoost in parallel processes
|
|
||||||
# using fork, if XGBoost was built with OpenMP support. Otherwise, if you
|
|
||||||
# build XGBoost without OpenMP support, you can use fork, which is the
|
|
||||||
# default backend for joblib, and omit this.
|
|
||||||
try:
|
|
||||||
from multiprocessing import set_start_method
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError("Unable to import multiprocessing.set_start_method."
|
|
||||||
" This example only runs on Python 3.4")
|
|
||||||
set_start_method("forkserver")
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import load_boston
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
rng = np.random.RandomState(31337)
|
if __name__ == "__main__":
|
||||||
|
|
||||||
print("Parallel Parameter optimization")
|
print("Parallel Parameter optimization")
|
||||||
boston = load_boston()
|
boston = load_boston()
|
||||||
|
|
||||||
os.environ["OMP_NUM_THREADS"] = "2" # or to whatever you want
|
|
||||||
y = boston['target']
|
y = boston['target']
|
||||||
X = boston['data']
|
X = boston['data']
|
||||||
xgb_model = xgb.XGBRegressor()
|
xgb_model = xgb.XGBRegressor()
|
||||||
|
|||||||
@ -8,7 +8,7 @@ label = train[:,32]
|
|||||||
data = train[:,1:31]
|
data = train[:,1:31]
|
||||||
weight = train[:,31]
|
weight = train[:,31]
|
||||||
dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
|
dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
|
||||||
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4}
|
param = {'max_depth':6, 'eta':0.1, 'objective':'binary:logitraw', 'nthread':4}
|
||||||
num_round = 120
|
num_round = 120
|
||||||
|
|
||||||
print ('running cross validation, with preprocessing function')
|
print ('running cross validation, with preprocessing function')
|
||||||
|
|||||||
@ -37,7 +37,6 @@ param['scale_pos_weight'] = sum_wneg/sum_wpos
|
|||||||
param['eta'] = 0.1
|
param['eta'] = 0.1
|
||||||
param['max_depth'] = 6
|
param['max_depth'] = 6
|
||||||
param['eval_metric'] = 'auc'
|
param['eval_metric'] = 'auc'
|
||||||
param['silent'] = 1
|
|
||||||
param['nthread'] = 16
|
param['nthread'] = 16
|
||||||
|
|
||||||
# you can directly throw param in, though we want to watch multiple metrics here
|
# you can directly throw param in, though we want to watch multiple metrics here
|
||||||
|
|||||||
@ -45,6 +45,3 @@ for k, v in res:
|
|||||||
fo.close()
|
fo.close()
|
||||||
|
|
||||||
print ('finished writing into prediction file')
|
print ('finished writing into prediction file')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -36,7 +36,6 @@ param['scale_pos_weight'] = sum_wneg/sum_wpos
|
|||||||
param['bst:eta'] = 0.1
|
param['bst:eta'] = 0.1
|
||||||
param['bst:max_depth'] = 6
|
param['bst:max_depth'] = 6
|
||||||
param['eval_metric'] = 'auc'
|
param['eval_metric'] = 'auc'
|
||||||
param['silent'] = 1
|
|
||||||
param['nthread'] = 4
|
param['nthread'] = 4
|
||||||
|
|
||||||
plst = param.items()+[('eval_metric', 'ams@0.15')]
|
plst = param.items()+[('eval_metric', 'ams@0.15')]
|
||||||
|
|||||||
@ -28,7 +28,6 @@ param['objective'] = 'multi:softmax'
|
|||||||
# scale weight of positive examples
|
# scale weight of positive examples
|
||||||
param['eta'] = 0.1
|
param['eta'] = 0.1
|
||||||
param['max_depth'] = 6
|
param['max_depth'] = 6
|
||||||
param['silent'] = 1
|
|
||||||
param['nthread'] = 4
|
param['nthread'] = 4
|
||||||
param['num_class'] = 6
|
param['num_class'] = 6
|
||||||
|
|
||||||
|
|||||||
@ -2,15 +2,17 @@ import os
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import pytest
|
import pytest
|
||||||
|
import testing as tm
|
||||||
|
|
||||||
|
|
||||||
CURRENT_DIR = os.path.dirname(__file__)
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
ROOT_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))
|
ROOT_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))
|
||||||
DEMO_DIR = os.path.join(ROOT_DIR, 'demo', 'guide-python')
|
DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
|
||||||
|
PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, 'guide-python')
|
||||||
|
|
||||||
|
|
||||||
def test_basic_walkthrough():
|
def test_basic_walkthrough():
|
||||||
script = os.path.join(DEMO_DIR, 'basic_walkthrough.py')
|
script = os.path.join(PYTHON_DEMO_DIR, 'basic_walkthrough.py')
|
||||||
cmd = ['python', script]
|
cmd = ['python', script]
|
||||||
subprocess.check_call(cmd)
|
subprocess.check_call(cmd)
|
||||||
os.remove('dump.nice.txt')
|
os.remove('dump.nice.txt')
|
||||||
@ -18,7 +20,7 @@ def test_basic_walkthrough():
|
|||||||
|
|
||||||
|
|
||||||
def test_custom_multiclass_objective():
|
def test_custom_multiclass_objective():
|
||||||
script = os.path.join(DEMO_DIR, 'custom_softmax.py')
|
script = os.path.join(PYTHON_DEMO_DIR, 'custom_softmax.py')
|
||||||
cmd = ['python', script, '--plot=0']
|
cmd = ['python', script, '--plot=0']
|
||||||
subprocess.check_call(cmd)
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
@ -27,6 +29,91 @@ def test_custom_rmsle_objective():
|
|||||||
major, minor = sys.version_info[:2]
|
major, minor = sys.version_info[:2]
|
||||||
if minor < 6:
|
if minor < 6:
|
||||||
pytest.skip('Skipping RMLSE test due to Python version being too low.')
|
pytest.skip('Skipping RMLSE test due to Python version being too low.')
|
||||||
script = os.path.join(DEMO_DIR, 'custom_rmsle.py')
|
script = os.path.join(PYTHON_DEMO_DIR, 'custom_rmsle.py')
|
||||||
cmd = ['python', script, '--plot=0']
|
cmd = ['python', script, '--plot=0']
|
||||||
subprocess.check_call(cmd)
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
def test_sklearn_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_examples.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
assert os.path.exists('best_boston.pkl')
|
||||||
|
os.remove('best_boston.pkl')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
def test_sklearn_parallel_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_parallel.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
def test_sklearn_evals_result_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_evals_result.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_boost_from_prediction_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'boost_from_prediction.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_predict_first_ntree_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'predict_first_ntree.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_predict_leaf_indices_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'predict_leaf_indices.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_generalized_linear_model_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'generalized_linear_model.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_custom_objective_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'custom_objective.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_cross_validation_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'cross_validation.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_external_memory_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'external_memory.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_evals_result_demo():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'evals_result.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_aft_demo():
|
||||||
|
script = os.path.join(DEMO_DIR, 'aft_survival', 'aft_survival_demo.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
assert os.path.exists('aft_model.json')
|
||||||
|
os.remove('aft_model.json')
|
||||||
|
|
||||||
|
|
||||||
|
# gpu_acceleration is not tested due to covertype dataset is being too huge.
|
||||||
|
# gamma regression is not tested as it requires running a R script first.
|
||||||
|
# aft viz is not tested due to ploting is not controled
|
||||||
|
# aft tunning is not tested due to extra dependency.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user