From 91af8f710667884d8c841a916f1073b4a7891eaa Mon Sep 17 00:00:00 2001
From: LevineHuang <levinehuang@163.com>
Date: Fri, 27 Oct 2017 11:12:54 +0800
Subject: [PATCH] Minor edits to coding style (#2835)

* Some minor changes to the code style

Some minor changes to the code style in file basic_walkthrough.py

* coding style changes

* coding style changes arrcording PEP8

* Update basic_walkthrough.py
---
 demo/guide-python/basic_walkthrough.py        | 34 +++++++++----------
 demo/guide-python/boost_from_prediction.py    |  5 +--
 demo/guide-python/cross_validation.py         | 24 ++++++-------
 demo/guide-python/custom_objective.py         |  4 +--
 demo/guide-python/external_memory.py          |  4 +--
 demo/guide-python/gamma_regression.py         |  4 +--
 demo/guide-python/generalized_linear_model.py |  6 ++--
 demo/guide-python/predict_first_ntree.py      | 10 +++---
 demo/guide-python/predict_leaf_indices.py     |  6 ++--
 demo/guide-python/sklearn_examples.py         |  6 ++--
 10 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/demo/guide-python/basic_walkthrough.py b/demo/guide-python/basic_walkthrough.py
index 725d9543c..25cc4986b 100755
--- a/demo/guide-python/basic_walkthrough.py
+++ b/demo/guide-python/basic_walkthrough.py
@@ -10,22 +10,22 @@ dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 dtest = xgb.DMatrix('../data/agaricus.txt.test')
 
 # specify parameters via map, definition are same as c++ version
-param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
 
 # specify validations set to watch performance
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 num_round = 2
 bst = xgb.train(param, dtrain, num_round, watchlist)
 
 # this is prediction
 preds = bst.predict(dtest)
 labels = dtest.get_label()
-print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
+print('error=%f' % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))))
 bst.save_model('0001.model')
 # dump model
 bst.dump_model('dump.raw.txt')
 # dump model with feature map
-bst.dump_model('dump.nice.txt','../data/featmap.txt')
+bst.dump_model('dump.nice.txt', '../data/featmap.txt')
 
 # save dmatrix into binary buffer
 dtest.save_binary('dtest.buffer')
@@ -36,7 +36,7 @@ bst2 = xgb.Booster(model_file='xgb.model')
 dtest2 = xgb.DMatrix('dtest.buffer')
 preds2 = bst2.predict(dtest2)
 # assert they are the same
-assert np.sum(np.abs(preds2-preds)) == 0
+assert np.sum(np.abs(preds2 - preds)) == 0
 
 # alternatively, you can pickle the booster
 pks = pickle.dumps(bst2)
@@ -44,11 +44,11 @@ pks = pickle.dumps(bst2)
 bst3 = pickle.loads(pks)
 preds3 = bst3.predict(dtest2)
 # assert they are the same
-assert np.sum(np.abs(preds3-preds)) == 0
+assert np.sum(np.abs(preds3 - preds)) == 0
 
 ###
 # build dmatrix from scipy.sparse
-print ('start running example of build DMatrix from scipy.sparse CSR Matrix')
+print('start running example of build DMatrix from scipy.sparse CSR Matrix')
 labels = []
 row = []; col = []; dat = []
 i = 0
@@ -59,24 +59,22 @@ for l in open('../data/agaricus.txt.train'):
         k,v = it.split(':')
         row.append(i); col.append(int(k)); dat.append(float(v))
     i += 1
-csr = scipy.sparse.csr_matrix((dat, (row,col)))
-dtrain = xgb.DMatrix(csr, label = labels)
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+csr = scipy.sparse.csr_matrix((dat, (row, col)))
+dtrain = xgb.DMatrix(csr, label=labels)
+watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 bst = xgb.train(param, dtrain, num_round, watchlist)
 
-print ('start running example of build DMatrix from scipy.sparse CSC Matrix')
+print('start running example of build DMatrix from scipy.sparse CSC Matrix')
 # we can also construct from csc matrix
-csc = scipy.sparse.csc_matrix((dat, (row,col)))
+csc = scipy.sparse.csc_matrix((dat, (row, col)))
 dtrain = xgb.DMatrix(csc, label=labels)
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 bst = xgb.train(param, dtrain, num_round, watchlist)
 
-print ('start running example of build DMatrix from numpy array')
+print('start running example of build DMatrix from numpy array')
 # NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation
 # then convert to DMatrix
 npymat = csr.todense()
-dtrain = xgb.DMatrix(npymat, label = labels)
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+dtrain = xgb.DMatrix(npymat, label=labels)
+watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 bst = xgb.train(param, dtrain, num_round, watchlist)
-
-
diff --git a/demo/guide-python/boost_from_prediction.py b/demo/guide-python/boost_from_prediction.py
index dfb7d2ff9..021aa2231 100755
--- a/demo/guide-python/boost_from_prediction.py
+++ b/demo/guide-python/boost_from_prediction.py
@@ -20,5 +20,6 @@ ptest = bst.predict(dtest, output_margin=True)
 dtrain.set_base_margin(ptrain)
 dtest.set_base_margin(ptest)
 
-print ('this is result of running from initial prediction')
-bst = xgb.train(param, dtrain, 1, watchlist)
+
+print('this is result of running from initial prediction')
+bst = xgb.train(param, dtrain, 1, watchlist)
\ No newline at end of file
diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py
index d44ab38ee..3979d027a 100755
--- a/demo/guide-python/cross_validation.py
+++ b/demo/guide-python/cross_validation.py
@@ -7,30 +7,30 @@ dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
 num_round = 2
 
-print ('running cross validation')
+print('running cross validation')
 # do cross validation, this will print result out as
 # [iteration]  metric_name:mean_value+std_value
 # std_value is standard deviation of the metric
 xgb.cv(param, dtrain, num_round, nfold=5,
-       metrics={'error'}, seed = 0,
+       metrics={'error'}, seed=0,
        callbacks=[xgb.callback.print_evaluation(show_stdv=True)])
 
-print ('running cross validation, disable standard deviation display')
+print('running cross validation, disable standard deviation display')
 # do cross validation, this will print result out as
 # [iteration]  metric_name:mean_value
 res = xgb.cv(param, dtrain, num_boost_round=10, nfold=5,
-             metrics={'error'}, seed = 0,
+             metrics={'error'}, seed=0,
              callbacks=[xgb.callback.print_evaluation(show_stdv=False),
                         xgb.callback.early_stop(3)])
-print (res)
-print ('running cross validation, with preprocessing function')
+print(res)
+print('running cross validation, with preprocessing function')
 # define the preprocessing function
 # used to return the preprocessed training, test data, and parameter
 # we can use this to do weight rescale, etc.
 # as a example, we try to set scale_pos_weight
 def fpreproc(dtrain, dtest, param):
     label = dtrain.get_label()
-    ratio = float(np.sum(label == 0)) / np.sum(label==1)
+    ratio = float(np.sum(label == 0)) / np.sum(label == 1)
     param['scale_pos_weight'] = ratio
     return (dtrain, dtest, param)
 
@@ -39,18 +39,18 @@ def fpreproc(dtrain, dtest, param):
 # then the return value of fpreproc will be used to generate
 # results of that fold
 xgb.cv(param, dtrain, num_round, nfold=5,
-       metrics={'auc'}, seed = 0, fpreproc = fpreproc)
+       metrics={'auc'}, seed=0, fpreproc=fpreproc)
 
 ###
 # you can also do cross validation with cutomized loss function
 # See custom_objective.py
 ##
-print ('running cross validation, with cutomsized loss function')
+print('running cross validation, with cutomsized loss function')
 def logregobj(preds, dtrain):
     labels = dtrain.get_label()
     preds = 1.0 / (1.0 + np.exp(-preds))
     grad = preds - labels
-    hess = preds * (1.0-preds)
+    hess = preds * (1.0 - preds)
     return grad, hess
 def evalerror(preds, dtrain):
     labels = dtrain.get_label()
@@ -58,5 +58,5 @@ def evalerror(preds, dtrain):
 
 param = {'max_depth':2, 'eta':1, 'silent':1}
 # train with customized objective
-xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
-       obj = logregobj, feval=evalerror)
+xgb.cv(param, dtrain, num_round, nfold=5, seed=0,
+       obj=logregobj, feval=evalerror)
diff --git a/demo/guide-python/custom_objective.py b/demo/guide-python/custom_objective.py
index 2e462b99e..cf35db0b0 100755
--- a/demo/guide-python/custom_objective.py
+++ b/demo/guide-python/custom_objective.py
@@ -4,7 +4,7 @@ import xgboost as xgb
 ###
 # advanced: customized loss function
 #
-print ('start running example to used customized objective function')
+print('start running example to used customized objective function')
 
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 dtest = xgb.DMatrix('../data/agaricus.txt.test')
@@ -22,7 +22,7 @@ def logregobj(preds, dtrain):
     labels = dtrain.get_label()
     preds = 1.0 / (1.0 + np.exp(-preds))
     grad = preds - labels
-    hess = preds * (1.0-preds)
+    hess = preds * (1.0 - preds)
     return grad, hess
 
 # user defined evaluation function, return a pair metric_name, result
diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py
index eb579c935..97a74b0ca 100755
--- a/demo/guide-python/external_memory.py
+++ b/demo/guide-python/external_memory.py
@@ -12,13 +12,13 @@ dtrain = xgb.DMatrix('../data/agaricus.txt.train#dtrain.cache')
 dtest = xgb.DMatrix('../data/agaricus.txt.test#dtest.cache')
 
 # specify validations set to watch performance
-param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
 
 # performance notice: set nthread to be the number of your real cpu
 # some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4
 #param['nthread']=num_real_cpu
 
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 num_round = 2
 bst = xgb.train(param, dtrain, num_round, watchlist)
 
diff --git a/demo/guide-python/gamma_regression.py b/demo/guide-python/gamma_regression.py
index faf58c2ad..af7103b28 100755
--- a/demo/guide-python/gamma_regression.py
+++ b/demo/guide-python/gamma_regression.py
@@ -15,11 +15,11 @@ dtest = xgb.DMatrix(data[4741:6773, 0:34], data[4741:6773, 34])
 param = {'silent':1, 'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3}
 
 # the rest of settings are the same
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 num_round = 30
 
 # training and evaluation
 bst = xgb.train(param, dtrain, num_round, watchlist)
 preds = bst.predict(dtest)
 labels = dtest.get_label()
-print ('test deviance=%f' % (2 * np.sum((labels - preds) / preds - np.log(labels) + np.log(preds))))
+print('test deviance=%f' % (2 * np.sum((labels - preds) / preds - np.log(labels) + np.log(preds))))
diff --git a/demo/guide-python/generalized_linear_model.py b/demo/guide-python/generalized_linear_model.py
index 243bd603c..c85c5ca9a 100755
--- a/demo/guide-python/generalized_linear_model.py
+++ b/demo/guide-python/generalized_linear_model.py
@@ -11,7 +11,7 @@ dtest = xgb.DMatrix('../data/agaricus.txt.test')
 # lambda is the L2 regularizer
 # you can also set lambda_bias which is L2 regularizer on the bias term
 param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
-         'alpha': 0.0001, 'lambda': 1 }
+         'alpha': 0.0001, 'lambda': 1}
 
 # normally, you do not need to set eta (step_size)
 # XGBoost uses a parallel coordinate descent algorithm (shotgun),
@@ -22,9 +22,9 @@ param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
 ##
 # the rest of settings are the same
 ##
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 num_round = 4
 bst = xgb.train(param, dtrain, num_round, watchlist)
 preds = bst.predict(dtest)
 labels = dtest.get_label()
-print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
+print('error=%f' % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))))
diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py
index 2ea91232e..3a8dbbb86 100755
--- a/demo/guide-python/predict_first_ntree.py
+++ b/demo/guide-python/predict_first_ntree.py
@@ -5,16 +5,16 @@ import xgboost as xgb
 ### load data in do training
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 dtest = xgb.DMatrix('../data/agaricus.txt.test')
-param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
+watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 num_round = 3
 bst = xgb.train(param, dtrain, num_round, watchlist)
 
-print ('start testing prediction from first n trees')
+print('start testing prediction from first n trees')
 ### predict using first 1 tree
 label = dtest.get_label()
 ypred1 = bst.predict(dtest, ntree_limit=1)
 # by default, we predict using all the trees
 ypred2 = bst.predict(dtest)
-print ('error of ypred1=%f' % (np.sum((ypred1>0.5)!=label) /float(len(label))))
-print ('error of ypred2=%f' % (np.sum((ypred2>0.5)!=label) /float(len(label))))
+print('error of ypred1=%f' % (np.sum((ypred1 > 0.5) != label) / float(len(label))))
+print('error of ypred2=%f' % (np.sum((ypred2 > 0.5) != label) / float(len(label))))
diff --git a/demo/guide-python/predict_leaf_indices.py b/demo/guide-python/predict_leaf_indices.py
index cff9e7658..383e8d525 100755
--- a/demo/guide-python/predict_leaf_indices.py
+++ b/demo/guide-python/predict_leaf_indices.py
@@ -4,8 +4,8 @@ import xgboost as xgb
 ### load data in do training
 dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 dtest = xgb.DMatrix('../data/agaricus.txt.test')
-param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
-watchlist  = [(dtest,'eval'), (dtrain,'train')]
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
+watchlist = [(dtest, 'eval'), (dtrain, 'train')]
 num_round = 3
 bst = xgb.train(param, dtrain, num_round, watchlist)
 
@@ -15,5 +15,5 @@ leafindex = bst.predict(dtest, ntree_limit=2, pred_leaf=True)
 print(leafindex.shape)
 print(leafindex)
 ### predict all trees
-leafindex = bst.predict(dtest, pred_leaf = True)
+leafindex = bst.predict(dtest, pred_leaf=True)
 print(leafindex.shape)
diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py
index 3908e0333..d4f9924ad 100755
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@@ -20,7 +20,7 @@ y = digits['target']
 X = digits['data']
 kf = KFold(n_splits=2, shuffle=True, random_state=rng)
 for train_index, test_index in kf.split(X):
-    xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
+    xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
     predictions = xgb_model.predict(X[test_index])
     actuals = y[test_index]
     print(confusion_matrix(actuals, predictions))
@@ -31,7 +31,7 @@ y = iris['target']
 X = iris['data']
 kf = KFold(n_splits=2, shuffle=True, random_state=rng)
 for train_index, test_index in kf.split(X):
-    xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
+    xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
     predictions = xgb_model.predict(X[test_index])
     actuals = y[test_index]
     print(confusion_matrix(actuals, predictions))
@@ -42,7 +42,7 @@ y = boston['target']
 X = boston['data']
 kf = KFold(n_splits=2, shuffle=True, random_state=rng)
 for train_index, test_index in kf.split(X):
-    xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
+    xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
     predictions = xgb_model.predict(X[test_index])
     actuals = y[test_index]
     print(mean_squared_error(actuals, predictions))