From 6776292951565c8cd72e69afd9d94de1474f00c0 Mon Sep 17 00:00:00 2001 From: "Juang, Yi-Lin" Date: Fri, 26 May 2017 08:40:41 -0500 Subject: [PATCH] Minor cleanup (#2342) * Clean up demo of multiclass classification * Remove extra space --- demo/multiclass_classification/train.py | 35 ++++++++++++++----------- python-package/xgboost/core.py | 2 +- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py index 9e2a82ed2..b4af52c99 100755 --- a/demo/multiclass_classification/train.py +++ b/demo/multiclass_classification/train.py @@ -1,22 +1,25 @@ -#! /usr/bin/python +#!/usr/bin/python + +from __future__ import division + import numpy as np import xgboost as xgb # label need to be 0 to num_class -1 -data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1 } ) +data = np.loadtxt('./dermatology.data', delimiter=',', + converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1}) sz = data.shape train = data[:int(sz[0] * 0.7), :] test = data[int(sz[0] * 0.7):, :] -train_X = train[:,0:33] +train_X = train[:, :33] train_Y = train[:, 34] - -test_X = test[:,0:33] +test_X = test[:, :33] test_Y = test[:, 34] -xg_train = xgb.DMatrix( train_X, label=train_Y) +xg_train = xgb.DMatrix(train_X, label=train_Y) xg_test = xgb.DMatrix(test_X, label=test_Y) # setup parameters for xgboost param = {} @@ -29,20 +32,20 @@ param['silent'] = 1 param['nthread'] = 4 param['num_class'] = 6 -watchlist = [ (xg_train,'train'), (xg_test, 'test') ] +watchlist = [(xg_train, 'train'), (xg_test, 'test')] num_round = 5 -bst = xgb.train(param, xg_train, num_round, watchlist ); +bst = xgb.train(param, xg_train, num_round, watchlist) # get prediction -pred = bst.predict( xg_test ); - -print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) )) +pred = bst.predict(xg_test) +error_rate = np.sum(pred != test_Y) / test_Y.shape[0] +print('Test error using softmax = {}'.format(error_rate)) # do the same thing again, but output probabilities param['objective'] = 'multi:softprob' -bst = xgb.train(param, xg_train, num_round, watchlist ); +bst = xgb.train(param, xg_train, num_round, watchlist) # Note: this convention has been changed since xgboost-unity # get prediction, this is in 1D array, need reshape to (ndata, nclass) -yprob = bst.predict( xg_test ).reshape( test_Y.shape[0], 6 ) -ylabel = np.argmax(yprob, axis=1) - -print ('predicting, classification error=%f' % (sum( int(ylabel[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) )) +pred_prob = bst.predict(xg_test).reshape(test_Y.shape[0], 6) +pred_label = np.argmax(pred_prob, axis=1) +error_rate = np.sum(pred != test_Y) / test_Y.shape[0] +print('Test error using softprob = {}'.format(error_rate)) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 2d1c5e7c7..c653a540b 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -848,7 +848,7 @@ class Booster(object): def eval_set(self, evals, iteration=0, feval=None): # pylint: disable=invalid-name - """Evaluate a set of data. + """Evaluate a set of data. Parameters ----------