reformat benchmark_tree.py to get rid of lint errors (#4126)

This commit is contained in:
Rong Ou 2019-02-12 21:54:56 -08:00 committed by Rory Mitchell
parent 9b917cda4f
commit 3be1b9ae30
2 changed files with 51 additions and 30 deletions

View File

@ -2,6 +2,8 @@
ignore=tests ignore=tests
extension-pkg-whitelist=numpy
disiable=unexpected-special-method-signature,too-many-nested-blocks disiable=unexpected-special-method-signature,too-many-nested-blocks
dummy-variables-rgx=(unused|)_.* dummy-variables-rgx=(unused|)_.*
@ -19,3 +21,6 @@ attr-naming-style=snake_case
argument-naming-style=snake_case argument-naming-style=snake_case
variable-naming-style=snake_case variable-naming-style=snake_case
class-attribute-naming-style=snake_case class-attribute-naming-style=snake_case
# Allow single-letter variables
variable-rgx=[a-zA-Z_][a-z0-9_]{0,30}$

View File

@ -1,65 +1,81 @@
# pylint: skip-file """Run benchmark on the tree booster."""
import sys, argparse
import xgboost as xgb import argparse
import ast
import time
import numpy as np import numpy as np
from sklearn.datasets import make_classification from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import time import xgboost as xgb
import ast
rng = np.random.RandomState(1994) RNG = np.random.RandomState(1994)
def run_benchmark(args): def run_benchmark(args):
"""Runs the benchmark."""
try: try:
dtest = xgb.DMatrix('dtest.dm') dtest = xgb.DMatrix('dtest.dm')
dtrain = xgb.DMatrix('dtrain.dm') dtrain = xgb.DMatrix('dtrain.dm')
if not (dtest.num_col() == args.columns \ if not (dtest.num_col() == args.columns
and dtrain.num_col() == args.columns): and dtrain.num_col() == args.columns):
raise ValueError("Wrong cols") raise ValueError("Wrong cols")
if not (dtest.num_row() == args.rows * args.test_size \ if not (dtest.num_row() == args.rows * args.test_size
and dtrain.num_row() == args.rows * (1-args.test_size)): and dtrain.num_row() == args.rows * (1 - args.test_size)):
raise ValueError("Wrong rows") raise ValueError("Wrong rows")
except: except xgb.core.XGBoostError:
print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns)) print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size)) print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
tmp = time.time() tmp = time.time()
X, y = make_classification(args.rows, n_features=args.columns, n_redundant=0, n_informative=args.columns, n_repeated=0, random_state=7) X, y = make_classification(args.rows, n_features=args.columns, n_redundant=0,
n_informative=args.columns, n_repeated=0, random_state=7)
if args.sparsity < 1.0: if args.sparsity < 1.0:
X = np.array([[np.nan if rng.uniform(0, 1) < args.sparsity else x for x in x_row] for x_row in X]) X = np.array([[np.nan if RNG.uniform(0, 1) < args.sparsity else x for x in x_row]
for x_row in X])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size,
print ("Generate Time: %s seconds" % (str(time.time() - tmp))) random_state=7)
print("Generate Time: %s seconds" % (str(time.time() - tmp)))
tmp = time.time() tmp = time.time()
print ("DMatrix Start") print("DMatrix Start")
dtrain = xgb.DMatrix(X_train, y_train) dtrain = xgb.DMatrix(X_train, y_train)
dtest = xgb.DMatrix(X_test, y_test, nthread=-1) dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
print ("DMatrix Time: %s seconds" % (str(time.time() - tmp))) print("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
dtest.save_binary('dtest.dm') dtest.save_binary('dtest.dm')
dtrain.save_binary('dtrain.dm') dtrain.save_binary('dtrain.dm')
param = {'objective': 'binary:logistic'} param = {'objective': 'binary:logistic'}
if args.params is not '': if args.params != '':
param.update(ast.literal_eval(args.params)) param.update(ast.literal_eval(args.params))
param['tree_method'] = args.tree_method param['tree_method'] = args.tree_method
print("Training with '%s'" % param['tree_method']) print("Training with '%s'" % param['tree_method'])
tmp = time.time() tmp = time.time()
xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")]) xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
print ("Train Time: %s seconds" % (str(time.time() - tmp))) print("Train Time: %s seconds" % (str(time.time() - tmp)))
parser = argparse.ArgumentParser()
parser.add_argument('--tree_method', default='gpu_hist')
parser.add_argument('--sparsity', type=float, default=0.0)
parser.add_argument('--rows', type=int, default=1000000)
parser.add_argument('--columns', type=int, default=50)
parser.add_argument('--iterations', type=int, default=500)
parser.add_argument('--test_size', type=float, default=0.25)
parser.add_argument('--params', default='', help='Provide additional parameters as a Python dict string, e.g. --params \"{\'max_depth\':2}\"')
args = parser.parse_args()
run_benchmark(args) def main():
"""The main function.
Defines and parses command line arguments and calls the benchmark.
"""
parser = argparse.ArgumentParser()
parser.add_argument('--tree_method', default='gpu_hist')
parser.add_argument('--sparsity', type=float, default=0.0)
parser.add_argument('--rows', type=int, default=1000000)
parser.add_argument('--columns', type=int, default=50)
parser.add_argument('--iterations', type=int, default=500)
parser.add_argument('--test_size', type=float, default=0.25)
parser.add_argument('--params', default='',
help='Provide additional parameters as a Python dict string, e.g. --params '
'\"{\'max_depth\':2}\"')
args = parser.parse_args()
run_benchmark(args)
if __name__ == '__main__':
main()