From 2c1a439869506532f48d387e7d39beeab358c76b Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 12 May 2020 12:04:42 +0800 Subject: [PATCH] Update Python demos with tests. (#5651) * Remove GPU memory usage demo. * Add tests for demos. * Remove `silent`. * Remove shebang as it's not portable. --- demo/aft_survival/aft_survival_demo.py | 6 +- demo/c-api/c-api-demo.c | 6 +- demo/gpu_acceleration/README.md | 4 +- demo/gpu_acceleration/cover_type.py | 1 - demo/gpu_acceleration/memory.py | 51 ---------- demo/guide-python/basic_walkthrough.py | 1 - demo/guide-python/boost_from_prediction.py | 10 +- demo/guide-python/cross_validation.py | 11 ++- demo/guide-python/custom_objective.py | 9 +- demo/guide-python/evals_result.py | 10 +- demo/guide-python/external_memory.py | 13 +-- demo/guide-python/gamma_regression.py | 3 +- demo/guide-python/generalized_linear_model.py | 9 +- demo/guide-python/predict_first_ntree.py | 13 +-- demo/guide-python/predict_leaf_indices.py | 17 ++-- demo/guide-python/runall.sh | 13 --- demo/guide-python/sklearn_evals_result.py | 4 +- demo/guide-python/sklearn_examples.py | 8 +- demo/guide-python/sklearn_parallel.py | 24 +---- demo/kaggle-higgs/higgs-cv.py | 2 +- demo/kaggle-higgs/higgs-numpy.py | 1 - demo/kaggle-higgs/higgs-pred.py | 3 - demo/kaggle-higgs/speedtest.py | 1 - demo/multiclass_classification/train.py | 1 - tests/python/test_demos.py | 95 ++++++++++++++++++- 25 files changed, 158 insertions(+), 158 deletions(-) delete mode 100644 demo/gpu_acceleration/memory.py mode change 100755 => 100644 demo/guide-python/basic_walkthrough.py mode change 100755 => 100644 demo/guide-python/boost_from_prediction.py mode change 100755 => 100644 demo/guide-python/cross_validation.py mode change 100755 => 100644 demo/guide-python/custom_objective.py mode change 100755 => 100644 demo/guide-python/external_memory.py mode change 100755 => 100644 demo/guide-python/gamma_regression.py mode change 100755 => 100644 demo/guide-python/generalized_linear_model.py mode change 100755 => 100644 demo/guide-python/predict_first_ntree.py mode change 100755 => 100644 demo/guide-python/predict_leaf_indices.py delete mode 100755 demo/guide-python/runall.sh mode change 100755 => 100644 demo/guide-python/sklearn_examples.py diff --git a/demo/aft_survival/aft_survival_demo.py b/demo/aft_survival/aft_survival_demo.py index 3cdccc1c2..0a659e79e 100644 --- a/demo/aft_survival/aft_survival_demo.py +++ b/demo/aft_survival/aft_survival_demo.py @@ -1,6 +1,7 @@ """ Demo for survival analysis (regression) using Accelerated Failure Time (AFT) model """ +import os from sklearn.model_selection import ShuffleSplit import pandas as pd import numpy as np @@ -8,7 +9,8 @@ import xgboost as xgb # The Veterans' Administration Lung Cancer Trial # The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980) -df = pd.read_csv('../data/veterans_lung_cancer.csv') +CURRENT_DIR = os.path.dirname(__file__) +df = pd.read_csv(os.path.join(CURRENT_DIR, '../data/veterans_lung_cancer.csv')) print('Training data:') print(df) @@ -39,7 +41,7 @@ params = {'verbosity': 0, 'lambda': 0.01, 'alpha': 0.02} bst = xgb.train(params, dtrain, num_boost_round=10000, - evals=[(dtrain, 'train'), (dvalid, 'valid')], + evals=[(dtrain, 'train'), (dvalid, 'valid')], early_stopping_rounds=50) # Run prediction on the validation set diff --git a/demo/c-api/c-api-demo.c b/demo/c-api/c-api-demo.c index 2fd212fe5..c476357bd 100644 --- a/demo/c-api/c-api-demo.c +++ b/demo/c-api/c-api-demo.c @@ -20,12 +20,12 @@ if (err != 0) { \ int main(int argc, char** argv) { int silent = 0; int use_gpu = 0; // set to 1 to use the GPU for training - + // load the data DMatrixHandle dtrain, dtest; safe_xgboost(XGDMatrixCreateFromFile("../data/agaricus.txt.train", silent, &dtrain)); safe_xgboost(XGDMatrixCreateFromFile("../data/agaricus.txt.test", silent, &dtest)); - + // create the booster BoosterHandle booster; DMatrixHandle eval_dmats[2] = {dtrain, dtest}; @@ -49,7 +49,7 @@ int main(int argc, char** argv) { safe_xgboost(XGBoosterSetParam(booster, "gamma", "0.1")); safe_xgboost(XGBoosterSetParam(booster, "max_depth", "3")); safe_xgboost(XGBoosterSetParam(booster, "verbosity", silent ? "0" : "1")); - + // train and evaluate for 10 iterations int n_trees = 10; const char* eval_names[2] = {"train", "test"}; diff --git a/demo/gpu_acceleration/README.md b/demo/gpu_acceleration/README.md index f6b0539fc..7890bcfcf 100644 --- a/demo/gpu_acceleration/README.md +++ b/demo/gpu_acceleration/README.md @@ -1,5 +1,3 @@ # GPU Acceleration Demo -`cover_type.py` shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms. - -`memory.py` shows how to repeatedly train xgboost models while freeing memory between iterations. +`cover_type.py` shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms. \ No newline at end of file diff --git a/demo/gpu_acceleration/cover_type.py b/demo/gpu_acceleration/cover_type.py index 5a073eb2c..8e44a3ddc 100644 --- a/demo/gpu_acceleration/cover_type.py +++ b/demo/gpu_acceleration/cover_type.py @@ -1,5 +1,4 @@ import xgboost as xgb -import numpy as np from sklearn.datasets import fetch_covtype from sklearn.model_selection import train_test_split import time diff --git a/demo/gpu_acceleration/memory.py b/demo/gpu_acceleration/memory.py deleted file mode 100644 index f0f955564..000000000 --- a/demo/gpu_acceleration/memory.py +++ /dev/null @@ -1,51 +0,0 @@ -import xgboost as xgb -import numpy as np -import time -import pickle -import GPUtil - -n = 10000 -m = 1000 -X = np.random.random((n, m)) -y = np.random.random(n) - -param = {'objective': 'binary:logistic', - 'tree_method': 'gpu_hist' - } -iterations = 5 -dtrain = xgb.DMatrix(X, label=y) - -# High memory usage -# active bst objects with device memory persist across iterations -boosters = [] -for i in range(iterations): - bst = xgb.train(param, dtrain) - boosters.append(bst) - -print("Example 1") -GPUtil.showUtilization() -del boosters - -# Better memory usage -# The bst object can be destroyed by the python gc, freeing device memory -# The gc may not immediately free the object, so more than one booster can be allocated at a time -boosters = [] -for i in range(iterations): - bst = xgb.train(param, dtrain) - boosters.append(pickle.dumps(bst)) - -print("Example 2") -GPUtil.showUtilization() -del boosters - -# Best memory usage -# The gc explicitly frees the booster before starting the next iteration -boosters = [] -for i in range(iterations): - bst = xgb.train(param, dtrain) - boosters.append(pickle.dumps(bst)) - del bst - -print("Example 3") -GPUtil.showUtilization() -del boosters diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py old mode 100755 new mode 100644 index ec93968fc..a76def962 --- a/demo/guide-python/basic_walkthrough.py +++ b/demo/guide-python/basic_walkthrough.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import numpy as np import scipy.sparse import pickle diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py old mode 100755 new mode 100644 index 948b47a9f..3936f4f26 --- a/demo/guide-python/boost_from_prediction.py +++ b/demo/guide-python/boost_from_prediction.py @@ -1,15 +1,17 @@ -#!/usr/bin/python +import os import xgboost as xgb -dtrain = xgb.DMatrix('../data/agaricus.txt.train') -dtest = xgb.DMatrix('../data/agaricus.txt.test') + +CURRENT_DIR = os.path.dirname(__file__) +dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) +dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) watchlist = [(dtest, 'eval'), (dtrain, 'train')] ### # advanced: start from a initial base prediction # print('start running example to start from a initial prediction') # specify parameters via map, definition are same as c++ version -param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'} +param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} # train xgboost for 1 round bst = xgb.train(param, dtrain, 1, watchlist) # Note: we need the margin value instead of transformed prediction in diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py old mode 100755 new mode 100644 index 948992cdc..3eb9c45be --- a/demo/guide-python/cross_validation.py +++ b/demo/guide-python/cross_validation.py @@ -1,10 +1,11 @@ -#!/usr/bin/python +import os import numpy as np import xgboost as xgb -### load data in do training -dtrain = xgb.DMatrix('../data/agaricus.txt.train') -param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} +# load data in do training +CURRENT_DIR = os.path.dirname(__file__) +dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) +param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'} num_round = 2 print('running cross validation') @@ -56,7 +57,7 @@ def evalerror(preds, dtrain): labels = dtrain.get_label() return 'error', float(sum(labels != (preds > 0.0))) / len(labels) -param = {'max_depth':2, 'eta':1, 'silent':1} +param = {'max_depth':2, 'eta':1} # train with customized objective xgb.cv(param, dtrain, num_round, nfold=5, seed=0, obj=logregobj, feval=evalerror) diff --git a/demo/guide-python/custom_objective.py b/demo/guide-python/custom_objective.py old mode 100755 new mode 100644 index 5bbceccfb..4830ae9fe --- a/demo/guide-python/custom_objective.py +++ b/demo/guide-python/custom_objective.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +import os import numpy as np import xgboost as xgb ### @@ -6,13 +6,14 @@ import xgboost as xgb # print('start running example to used customized objective function') -dtrain = xgb.DMatrix('../data/agaricus.txt.train') -dtest = xgb.DMatrix('../data/agaricus.txt.test') +CURRENT_DIR = os.path.dirname(__file__) +dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) +dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) # note: for customized objective function, we leave objective as default # note: what we are getting is margin value in prediction # you must know what you are doing -param = {'max_depth': 2, 'eta': 1, 'silent': 1} +param = {'max_depth': 2, 'eta': 1} watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 2 diff --git a/demo/guide-python/evals_result.py b/demo/guide-python/evals_result.py index 8449b9307..f9eeb23a6 100644 --- a/demo/guide-python/evals_result.py +++ b/demo/guide-python/evals_result.py @@ -1,13 +1,15 @@ ## # This script demonstrate how to access the eval metrics in xgboost ## - +import os import xgboost as xgb -dtrain = xgb.DMatrix('../data/agaricus.txt.train', silent=True) -dtest = xgb.DMatrix('../data/agaricus.txt.test', silent=True) + +CURRENT_DIR = os.path.dirname(__file__) +dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) +dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) param = [('max_depth', 2), ('objective', 'binary:logistic'), ('eval_metric', 'logloss'), ('eval_metric', 'error')] - + num_round = 2 watchlist = [(dtest,'eval'), (dtrain,'train')] diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py old mode 100755 new mode 100644 index 97a74b0ca..385a6c2d7 --- a/demo/guide-python/external_memory.py +++ b/demo/guide-python/external_memory.py @@ -1,6 +1,4 @@ -#!/usr/bin/python -import numpy as np -import scipy.sparse +import os import xgboost as xgb ### simple example for using external memory version @@ -8,11 +6,12 @@ import xgboost as xgb # this is the only difference, add a # followed by a cache prefix name # several cache file with the prefix will be generated # currently only support convert from libsvm file -dtrain = xgb.DMatrix('../data/agaricus.txt.train#dtrain.cache') -dtest = xgb.DMatrix('../data/agaricus.txt.test#dtest.cache') +CURRENT_DIR = os.path.dirname(__file__) +dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) +dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) # specify validations set to watch performance -param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} +param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic'} # performance notice: set nthread to be the number of your real cpu # some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4 @@ -21,5 +20,3 @@ param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 2 bst = xgb.train(param, dtrain, num_round, watchlist) - - diff --git a/demo/guide-python/gamma_regression.py b/demo/guide-python/gamma_regression.py old mode 100755 new mode 100644 index af7103b28..62bcf37cf --- a/demo/guide-python/gamma_regression.py +++ b/demo/guide-python/gamma_regression.py @@ -1,4 +1,3 @@ -#!/usr/bin/python import xgboost as xgb import numpy as np @@ -12,7 +11,7 @@ dtest = xgb.DMatrix(data[4741:6773, 0:34], data[4741:6773, 34]) # for gamma regression, we need to set the objective to 'reg:gamma', it also suggests # to set the base_score to a value between 1 to 5 if the number of iteration is small -param = {'silent':1, 'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3} +param = {'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3} # the rest of settings are the same watchlist = [(dtest, 'eval'), (dtrain, 'train')] diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py old mode 100755 new mode 100644 index c85c5ca9a..f8d4efc79 --- a/demo/guide-python/generalized_linear_model.py +++ b/demo/guide-python/generalized_linear_model.py @@ -1,16 +1,17 @@ -#!/usr/bin/python +import os import xgboost as xgb ## # this script demonstrate how to fit generalized linear model in xgboost # basically, we are using linear model, instead of tree for our boosters ## -dtrain = xgb.DMatrix('../data/agaricus.txt.train') -dtest = xgb.DMatrix('../data/agaricus.txt.test') +CURRENT_DIR = os.path.dirname(__file__) +dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) +dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) # change booster to gblinear, so that we are fitting a linear model # alpha is the L1 regularizer # lambda is the L2 regularizer # you can also set lambda_bias which is L2 regularizer on the bias term -param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', +param = {'objective':'binary:logistic', 'booster':'gblinear', 'alpha': 0.0001, 'lambda': 1} # normally, you do not need to set eta (step_size) diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py old mode 100755 new mode 100644 index 3a8dbbb86..d542c55b7 --- a/demo/guide-python/predict_first_ntree.py +++ b/demo/guide-python/predict_first_ntree.py @@ -1,17 +1,18 @@ -#!/usr/bin/python +import os import numpy as np import xgboost as xgb -### load data in do training -dtrain = xgb.DMatrix('../data/agaricus.txt.train') -dtest = xgb.DMatrix('../data/agaricus.txt.test') -param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} +# load data in do training +CURRENT_DIR = os.path.dirname(__file__) +dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) +dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) +param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 3 bst = xgb.train(param, dtrain, num_round, watchlist) print('start testing prediction from first n trees') -### predict using first 1 tree +# predict using first 1 tree label = dtest.get_label() ypred1 = bst.predict(dtest, ntree_limit=1) # by default, we predict using all the trees diff --git a/demo/guide-python/predict_leaf_indices.py b/demo/guide-python/predict_leaf_indices.py old mode 100755 new mode 100644 index 383e8d525..96608d762 --- a/demo/guide-python/predict_leaf_indices.py +++ b/demo/guide-python/predict_leaf_indices.py @@ -1,19 +1,20 @@ -#!/usr/bin/python +import os import xgboost as xgb -### load data in do training -dtrain = xgb.DMatrix('../data/agaricus.txt.train') -dtest = xgb.DMatrix('../data/agaricus.txt.test') -param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} +# load data in do training +CURRENT_DIR = os.path.dirname(__file__) +dtrain = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.train')) +dtest = xgb.DMatrix(os.path.join(CURRENT_DIR, '../data/agaricus.txt.test')) +param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 3 bst = xgb.train(param, dtrain, num_round, watchlist) -print ('start testing predict the leaf indices') -### predict using first 2 tree +print('start testing predict the leaf indices') +# predict using first 2 tree leafindex = bst.predict(dtest, ntree_limit=2, pred_leaf=True) print(leafindex.shape) print(leafindex) -### predict all trees +# predict all trees leafindex = bst.predict(dtest, pred_leaf=True) print(leafindex.shape) diff --git a/demo/guide-python/runall.sh b/demo/guide-python/runall.sh deleted file mode 100755 index 9eda92b5e..000000000 --- a/demo/guide-python/runall.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -export PYTHONPATH=$PYTHONPATH:../../python-package -python basic_walkthrough.py -python custom_objective.py -python boost_from_prediction.py -python predict_first_ntree.py -python generalized_linear_model.py -python cross_validation.py -python predict_leaf_indices.py -python sklearn_examples.py -python sklearn_parallel.py -python external_memory.py -rm -rf *~ *.model *.buffer diff --git a/demo/guide-python/sklearn_evals_result.py b/demo/guide-python/sklearn_evals_result.py index a72cdfc52..410642135 100644 --- a/demo/guide-python/sklearn_evals_result.py +++ b/demo/guide-python/sklearn_evals_result.py @@ -20,7 +20,7 @@ clf = xgb.XGBModel(**param_dist) # Or you can use: clf = xgb.XGBClassifier(**param_dist) clf.fit(X_train, y_train, - eval_set=[(X_train, y_train), (X_test, y_test)], + eval_set=[(X_train, y_train), (X_test, y_test)], eval_metric='logloss', verbose=True) @@ -37,7 +37,7 @@ for e_name, e_mtrs in evals_result.items(): for e_mtr_name, e_mtr_vals in e_mtrs.items(): print(' - {}'.format(e_mtr_name)) print(' - {}'.format(e_mtr_vals)) - + print('') print('Access complete dict:') print(evals_result) diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py old mode 100755 new mode 100644 index d4f9924ad..68a754f46 --- a/demo/guide-python/sklearn_examples.py +++ b/demo/guide-python/sklearn_examples.py @@ -1,4 +1,3 @@ -#!/usr/bin/python ''' Created on 1 Apr 2015 @@ -52,9 +51,9 @@ y = boston['target'] X = boston['data'] xgb_model = xgb.XGBRegressor() clf = GridSearchCV(xgb_model, - {'max_depth': [2,4,6], - 'n_estimators': [50,100,200]}, verbose=1) -clf.fit(X,y) + {'max_depth': [2, 4, 6], + 'n_estimators': [50, 100, 200]}, verbose=1) +clf.fit(X, y) print(clf.best_score_) print(clf.best_params_) @@ -73,4 +72,3 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = xgb.XGBClassifier() clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc", eval_set=[(X_test, y_test)]) - diff --git a/demo/guide-python/sklearn_parallel.py b/demo/guide-python/sklearn_parallel.py index 04f13f75a..62c81e380 100644 --- a/demo/guide-python/sklearn_parallel.py +++ b/demo/guide-python/sklearn_parallel.py @@ -1,29 +1,11 @@ -import os +from sklearn.model_selection import GridSearchCV +from sklearn.datasets import load_boston +import xgboost as xgb if __name__ == "__main__": - # NOTE: on posix systems, this *has* to be here and in the - # `__name__ == "__main__"` clause to run XGBoost in parallel processes - # using fork, if XGBoost was built with OpenMP support. Otherwise, if you - # build XGBoost without OpenMP support, you can use fork, which is the - # default backend for joblib, and omit this. - try: - from multiprocessing import set_start_method - except ImportError: - raise ImportError("Unable to import multiprocessing.set_start_method." - " This example only runs on Python 3.4") - set_start_method("forkserver") - - import numpy as np - from sklearn.model_selection import GridSearchCV - from sklearn.datasets import load_boston - import xgboost as xgb - - rng = np.random.RandomState(31337) - print("Parallel Parameter optimization") boston = load_boston() - os.environ["OMP_NUM_THREADS"] = "2" # or to whatever you want y = boston['target'] X = boston['data'] xgb_model = xgb.XGBRegressor() diff --git a/demo/kaggle-higgs/higgs-cv.py b/demo/kaggle-higgs/higgs-cv.py index d5bbc39ef..fe954e256 100755 --- a/demo/kaggle-higgs/higgs-cv.py +++ b/demo/kaggle-higgs/higgs-cv.py @@ -8,7 +8,7 @@ label = train[:,32] data = train[:,1:31] weight = train[:,31] dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight ) -param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4} +param = {'max_depth':6, 'eta':0.1, 'objective':'binary:logitraw', 'nthread':4} num_round = 120 print ('running cross validation, with preprocessing function') diff --git a/demo/kaggle-higgs/higgs-numpy.py b/demo/kaggle-higgs/higgs-numpy.py index 004dd55ec..41c44c935 100755 --- a/demo/kaggle-higgs/higgs-numpy.py +++ b/demo/kaggle-higgs/higgs-numpy.py @@ -37,7 +37,6 @@ param['scale_pos_weight'] = sum_wneg/sum_wpos param['eta'] = 0.1 param['max_depth'] = 6 param['eval_metric'] = 'auc' -param['silent'] = 1 param['nthread'] = 16 # you can directly throw param in, though we want to watch multiple metrics here diff --git a/demo/kaggle-higgs/higgs-pred.py b/demo/kaggle-higgs/higgs-pred.py index bc669f557..4da3427d9 100755 --- a/demo/kaggle-higgs/higgs-pred.py +++ b/demo/kaggle-higgs/higgs-pred.py @@ -45,6 +45,3 @@ for k, v in res: fo.close() print ('finished writing into prediction file') - - - diff --git a/demo/kaggle-higgs/speedtest.py b/demo/kaggle-higgs/speedtest.py index 93672de25..04f45ab89 100755 --- a/demo/kaggle-higgs/speedtest.py +++ b/demo/kaggle-higgs/speedtest.py @@ -36,7 +36,6 @@ param['scale_pos_weight'] = sum_wneg/sum_wpos param['bst:eta'] = 0.1 param['bst:max_depth'] = 6 param['eval_metric'] = 'auc' -param['silent'] = 1 param['nthread'] = 4 plst = param.items()+[('eval_metric', 'ams@0.15')] diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py index 4dbce8216..9f1721dfc 100755 --- a/demo/multiclass_classification/train.py +++ b/demo/multiclass_classification/train.py @@ -28,7 +28,6 @@ param['objective'] = 'multi:softmax' # scale weight of positive examples param['eta'] = 0.1 param['max_depth'] = 6 -param['silent'] = 1 param['nthread'] = 4 param['num_class'] = 6 diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py index 4155a48c7..99f368139 100644 --- a/tests/python/test_demos.py +++ b/tests/python/test_demos.py @@ -2,15 +2,17 @@ import os import subprocess import sys import pytest +import testing as tm CURRENT_DIR = os.path.dirname(__file__) ROOT_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR)) -DEMO_DIR = os.path.join(ROOT_DIR, 'demo', 'guide-python') +DEMO_DIR = os.path.join(ROOT_DIR, 'demo') +PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, 'guide-python') def test_basic_walkthrough(): - script = os.path.join(DEMO_DIR, 'basic_walkthrough.py') + script = os.path.join(PYTHON_DEMO_DIR, 'basic_walkthrough.py') cmd = ['python', script] subprocess.check_call(cmd) os.remove('dump.nice.txt') @@ -18,7 +20,7 @@ def test_basic_walkthrough(): def test_custom_multiclass_objective(): - script = os.path.join(DEMO_DIR, 'custom_softmax.py') + script = os.path.join(PYTHON_DEMO_DIR, 'custom_softmax.py') cmd = ['python', script, '--plot=0'] subprocess.check_call(cmd) @@ -27,6 +29,91 @@ def test_custom_rmsle_objective(): major, minor = sys.version_info[:2] if minor < 6: pytest.skip('Skipping RMLSE test due to Python version being too low.') - script = os.path.join(DEMO_DIR, 'custom_rmsle.py') + script = os.path.join(PYTHON_DEMO_DIR, 'custom_rmsle.py') cmd = ['python', script, '--plot=0'] subprocess.check_call(cmd) + + +@pytest.mark.skipif(**tm.no_sklearn()) +def test_sklearn_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_examples.py') + cmd = ['python', script] + subprocess.check_call(cmd) + assert os.path.exists('best_boston.pkl') + os.remove('best_boston.pkl') + + +@pytest.mark.skipif(**tm.no_sklearn()) +def test_sklearn_parallel_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_parallel.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +@pytest.mark.skipif(**tm.no_sklearn()) +def test_sklearn_evals_result_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_evals_result.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +def test_boost_from_prediction_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'boost_from_prediction.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +def test_predict_first_ntree_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'predict_first_ntree.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +def test_predict_leaf_indices_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'predict_leaf_indices.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +def test_generalized_linear_model_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'generalized_linear_model.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +def test_custom_objective_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'custom_objective.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +def test_cross_validation_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'cross_validation.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +def test_external_memory_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'external_memory.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +def test_evals_result_demo(): + script = os.path.join(PYTHON_DEMO_DIR, 'evals_result.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + +def test_aft_demo(): + script = os.path.join(DEMO_DIR, 'aft_survival', 'aft_survival_demo.py') + cmd = ['python', script] + subprocess.check_call(cmd) + assert os.path.exists('aft_model.json') + os.remove('aft_model.json') + + +# gpu_acceleration is not tested due to covertype dataset is being too huge. +# gamma regression is not tested as it requires running a R script first. +# aft viz is not tested due to ploting is not controled +# aft tunning is not tested due to extra dependency.