From 91af8f710667884d8c841a916f1073b4a7891eaa Mon Sep 17 00:00:00 2001 From: LevineHuang Date: Fri, 27 Oct 2017 11:12:54 +0800 Subject: [PATCH] Minor edits to coding style (#2835) * Some minor changes to the code style Some minor changes to the code style in file basic_walkthrough.py * coding style changes * coding style changes arrcording PEP8 * Update basic_walkthrough.py --- demo/guide-python/basic_walkthrough.py | 34 +++++++++---------- demo/guide-python/boost_from_prediction.py | 5 +-- demo/guide-python/cross_validation.py | 24 ++++++------- demo/guide-python/custom_objective.py | 4 +-- demo/guide-python/external_memory.py | 4 +-- demo/guide-python/gamma_regression.py | 4 +-- demo/guide-python/generalized_linear_model.py | 6 ++-- demo/guide-python/predict_first_ntree.py | 10 +++--- demo/guide-python/predict_leaf_indices.py | 6 ++-- demo/guide-python/sklearn_examples.py | 6 ++-- 10 files changed, 51 insertions(+), 52 deletions(-) diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py index 725d9543c..25cc4986b 100755 --- a/demo/guide-python/basic_walkthrough.py +++ b/demo/guide-python/basic_walkthrough.py @@ -10,22 +10,22 @@ dtrain = xgb.DMatrix('../data/agaricus.txt.train') dtest = xgb.DMatrix('../data/agaricus.txt.test') # specify parameters via map, definition are same as c++ version -param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } +param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} # specify validations set to watch performance -watchlist = [(dtest,'eval'), (dtrain,'train')] +watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 2 bst = xgb.train(param, dtrain, num_round, watchlist) # this is prediction preds = bst.predict(dtest) labels = dtest.get_label() -print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))) +print('error=%f' % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)))) bst.save_model('0001.model') # dump model bst.dump_model('dump.raw.txt') # dump model with feature map -bst.dump_model('dump.nice.txt','../data/featmap.txt') +bst.dump_model('dump.nice.txt', '../data/featmap.txt') # save dmatrix into binary buffer dtest.save_binary('dtest.buffer') @@ -36,7 +36,7 @@ bst2 = xgb.Booster(model_file='xgb.model') dtest2 = xgb.DMatrix('dtest.buffer') preds2 = bst2.predict(dtest2) # assert they are the same -assert np.sum(np.abs(preds2-preds)) == 0 +assert np.sum(np.abs(preds2 - preds)) == 0 # alternatively, you can pickle the booster pks = pickle.dumps(bst2) @@ -44,11 +44,11 @@ pks = pickle.dumps(bst2) bst3 = pickle.loads(pks) preds3 = bst3.predict(dtest2) # assert they are the same -assert np.sum(np.abs(preds3-preds)) == 0 +assert np.sum(np.abs(preds3 - preds)) == 0 ### # build dmatrix from scipy.sparse -print ('start running example of build DMatrix from scipy.sparse CSR Matrix') +print('start running example of build DMatrix from scipy.sparse CSR Matrix') labels = [] row = []; col = []; dat = [] i = 0 @@ -59,24 +59,22 @@ for l in open('../data/agaricus.txt.train'): k,v = it.split(':') row.append(i); col.append(int(k)); dat.append(float(v)) i += 1 -csr = scipy.sparse.csr_matrix((dat, (row,col))) -dtrain = xgb.DMatrix(csr, label = labels) -watchlist = [(dtest,'eval'), (dtrain,'train')] +csr = scipy.sparse.csr_matrix((dat, (row, col))) +dtrain = xgb.DMatrix(csr, label=labels) +watchlist = [(dtest, 'eval'), (dtrain, 'train')] bst = xgb.train(param, dtrain, num_round, watchlist) -print ('start running example of build DMatrix from scipy.sparse CSC Matrix') +print('start running example of build DMatrix from scipy.sparse CSC Matrix') # we can also construct from csc matrix -csc = scipy.sparse.csc_matrix((dat, (row,col))) +csc = scipy.sparse.csc_matrix((dat, (row, col))) dtrain = xgb.DMatrix(csc, label=labels) -watchlist = [(dtest,'eval'), (dtrain,'train')] +watchlist = [(dtest, 'eval'), (dtrain, 'train')] bst = xgb.train(param, dtrain, num_round, watchlist) -print ('start running example of build DMatrix from numpy array') +print('start running example of build DMatrix from numpy array') # NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation # then convert to DMatrix npymat = csr.todense() -dtrain = xgb.DMatrix(npymat, label = labels) -watchlist = [(dtest,'eval'), (dtrain,'train')] +dtrain = xgb.DMatrix(npymat, label=labels) +watchlist = [(dtest, 'eval'), (dtrain, 'train')] bst = xgb.train(param, dtrain, num_round, watchlist) - - diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py index dfb7d2ff9..021aa2231 100755 --- a/demo/guide-python/boost_from_prediction.py +++ b/demo/guide-python/boost_from_prediction.py @@ -20,5 +20,6 @@ ptest = bst.predict(dtest, output_margin=True) dtrain.set_base_margin(ptrain) dtest.set_base_margin(ptest) -print ('this is result of running from initial prediction') -bst = xgb.train(param, dtrain, 1, watchlist) + +print('this is result of running from initial prediction') +bst = xgb.train(param, dtrain, 1, watchlist) \ No newline at end of file diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py index d44ab38ee..3979d027a 100755 --- a/demo/guide-python/cross_validation.py +++ b/demo/guide-python/cross_validation.py @@ -7,30 +7,30 @@ dtrain = xgb.DMatrix('../data/agaricus.txt.train') param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} num_round = 2 -print ('running cross validation') +print('running cross validation') # do cross validation, this will print result out as # [iteration] metric_name:mean_value+std_value # std_value is standard deviation of the metric xgb.cv(param, dtrain, num_round, nfold=5, - metrics={'error'}, seed = 0, + metrics={'error'}, seed=0, callbacks=[xgb.callback.print_evaluation(show_stdv=True)]) -print ('running cross validation, disable standard deviation display') +print('running cross validation, disable standard deviation display') # do cross validation, this will print result out as # [iteration] metric_name:mean_value res = xgb.cv(param, dtrain, num_boost_round=10, nfold=5, - metrics={'error'}, seed = 0, + metrics={'error'}, seed=0, callbacks=[xgb.callback.print_evaluation(show_stdv=False), xgb.callback.early_stop(3)]) -print (res) -print ('running cross validation, with preprocessing function') +print(res) +print('running cross validation, with preprocessing function') # define the preprocessing function # used to return the preprocessed training, test data, and parameter # we can use this to do weight rescale, etc. # as a example, we try to set scale_pos_weight def fpreproc(dtrain, dtest, param): label = dtrain.get_label() - ratio = float(np.sum(label == 0)) / np.sum(label==1) + ratio = float(np.sum(label == 0)) / np.sum(label == 1) param['scale_pos_weight'] = ratio return (dtrain, dtest, param) @@ -39,18 +39,18 @@ def fpreproc(dtrain, dtest, param): # then the return value of fpreproc will be used to generate # results of that fold xgb.cv(param, dtrain, num_round, nfold=5, - metrics={'auc'}, seed = 0, fpreproc = fpreproc) + metrics={'auc'}, seed=0, fpreproc=fpreproc) ### # you can also do cross validation with cutomized loss function # See custom_objective.py ## -print ('running cross validation, with cutomsized loss function') +print('running cross validation, with cutomsized loss function') def logregobj(preds, dtrain): labels = dtrain.get_label() preds = 1.0 / (1.0 + np.exp(-preds)) grad = preds - labels - hess = preds * (1.0-preds) + hess = preds * (1.0 - preds) return grad, hess def evalerror(preds, dtrain): labels = dtrain.get_label() @@ -58,5 +58,5 @@ def evalerror(preds, dtrain): param = {'max_depth':2, 'eta':1, 'silent':1} # train with customized objective -xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0, - obj = logregobj, feval=evalerror) +xgb.cv(param, dtrain, num_round, nfold=5, seed=0, + obj=logregobj, feval=evalerror) diff --git a/demo/guide-python/custom_objective.py b/demo/guide-python/custom_objective.py index 2e462b99e..cf35db0b0 100755 --- a/demo/guide-python/custom_objective.py +++ b/demo/guide-python/custom_objective.py @@ -4,7 +4,7 @@ import xgboost as xgb ### # advanced: customized loss function # -print ('start running example to used customized objective function') +print('start running example to used customized objective function') dtrain = xgb.DMatrix('../data/agaricus.txt.train') dtest = xgb.DMatrix('../data/agaricus.txt.test') @@ -22,7 +22,7 @@ def logregobj(preds, dtrain): labels = dtrain.get_label() preds = 1.0 / (1.0 + np.exp(-preds)) grad = preds - labels - hess = preds * (1.0-preds) + hess = preds * (1.0 - preds) return grad, hess # user defined evaluation function, return a pair metric_name, result diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py index eb579c935..97a74b0ca 100755 --- a/demo/guide-python/external_memory.py +++ b/demo/guide-python/external_memory.py @@ -12,13 +12,13 @@ dtrain = xgb.DMatrix('../data/agaricus.txt.train#dtrain.cache') dtest = xgb.DMatrix('../data/agaricus.txt.test#dtest.cache') # specify validations set to watch performance -param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } +param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} # performance notice: set nthread to be the number of your real cpu # some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4 #param['nthread']=num_real_cpu -watchlist = [(dtest,'eval'), (dtrain,'train')] +watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 2 bst = xgb.train(param, dtrain, num_round, watchlist) diff --git a/demo/guide-python/gamma_regression.py b/demo/guide-python/gamma_regression.py index faf58c2ad..af7103b28 100755 --- a/demo/guide-python/gamma_regression.py +++ b/demo/guide-python/gamma_regression.py @@ -15,11 +15,11 @@ dtest = xgb.DMatrix(data[4741:6773, 0:34], data[4741:6773, 34]) param = {'silent':1, 'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3} # the rest of settings are the same -watchlist = [(dtest,'eval'), (dtrain,'train')] +watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 30 # training and evaluation bst = xgb.train(param, dtrain, num_round, watchlist) preds = bst.predict(dtest) labels = dtest.get_label() -print ('test deviance=%f' % (2 * np.sum((labels - preds) / preds - np.log(labels) + np.log(preds)))) +print('test deviance=%f' % (2 * np.sum((labels - preds) / preds - np.log(labels) + np.log(preds)))) diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py index 243bd603c..c85c5ca9a 100755 --- a/demo/guide-python/generalized_linear_model.py +++ b/demo/guide-python/generalized_linear_model.py @@ -11,7 +11,7 @@ dtest = xgb.DMatrix('../data/agaricus.txt.test') # lambda is the L2 regularizer # you can also set lambda_bias which is L2 regularizer on the bias term param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', - 'alpha': 0.0001, 'lambda': 1 } + 'alpha': 0.0001, 'lambda': 1} # normally, you do not need to set eta (step_size) # XGBoost uses a parallel coordinate descent algorithm (shotgun), @@ -22,9 +22,9 @@ param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', ## # the rest of settings are the same ## -watchlist = [(dtest,'eval'), (dtrain,'train')] +watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 4 bst = xgb.train(param, dtrain, num_round, watchlist) preds = bst.predict(dtest) labels = dtest.get_label() -print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))) +print('error=%f' % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)))) diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py index 2ea91232e..3a8dbbb86 100755 --- a/demo/guide-python/predict_first_ntree.py +++ b/demo/guide-python/predict_first_ntree.py @@ -5,16 +5,16 @@ import xgboost as xgb ### load data in do training dtrain = xgb.DMatrix('../data/agaricus.txt.train') dtest = xgb.DMatrix('../data/agaricus.txt.test') -param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } -watchlist = [(dtest,'eval'), (dtrain,'train')] +param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} +watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 3 bst = xgb.train(param, dtrain, num_round, watchlist) -print ('start testing prediction from first n trees') +print('start testing prediction from first n trees') ### predict using first 1 tree label = dtest.get_label() ypred1 = bst.predict(dtest, ntree_limit=1) # by default, we predict using all the trees ypred2 = bst.predict(dtest) -print ('error of ypred1=%f' % (np.sum((ypred1>0.5)!=label) /float(len(label)))) -print ('error of ypred2=%f' % (np.sum((ypred2>0.5)!=label) /float(len(label)))) +print('error of ypred1=%f' % (np.sum((ypred1 > 0.5) != label) / float(len(label)))) +print('error of ypred2=%f' % (np.sum((ypred2 > 0.5) != label) / float(len(label)))) diff --git a/demo/guide-python/predict_leaf_indices.py b/demo/guide-python/predict_leaf_indices.py index cff9e7658..383e8d525 100755 --- a/demo/guide-python/predict_leaf_indices.py +++ b/demo/guide-python/predict_leaf_indices.py @@ -4,8 +4,8 @@ import xgboost as xgb ### load data in do training dtrain = xgb.DMatrix('../data/agaricus.txt.train') dtest = xgb.DMatrix('../data/agaricus.txt.test') -param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } -watchlist = [(dtest,'eval'), (dtrain,'train')] +param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'} +watchlist = [(dtest, 'eval'), (dtrain, 'train')] num_round = 3 bst = xgb.train(param, dtrain, num_round, watchlist) @@ -15,5 +15,5 @@ leafindex = bst.predict(dtest, ntree_limit=2, pred_leaf=True) print(leafindex.shape) print(leafindex) ### predict all trees -leafindex = bst.predict(dtest, pred_leaf = True) +leafindex = bst.predict(dtest, pred_leaf=True) print(leafindex.shape) diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py index 3908e0333..d4f9924ad 100755 --- a/demo/guide-python/sklearn_examples.py +++ b/demo/guide-python/sklearn_examples.py @@ -20,7 +20,7 @@ y = digits['target'] X = digits['data'] kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X): - xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index]) + xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index]) predictions = xgb_model.predict(X[test_index]) actuals = y[test_index] print(confusion_matrix(actuals, predictions)) @@ -31,7 +31,7 @@ y = iris['target'] X = iris['data'] kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X): - xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index]) + xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index]) predictions = xgb_model.predict(X[test_index]) actuals = y[test_index] print(confusion_matrix(actuals, predictions)) @@ -42,7 +42,7 @@ y = boston['target'] X = boston['data'] kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X): - xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index]) + xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index]) predictions = xgb_model.predict(X[test_index]) actuals = y[test_index] print(mean_squared_error(actuals, predictions))