Minor cleanup (#2342)

* Clean up demo of multiclass classification

* Remove extra space
This commit is contained in:
Juang, Yi-Lin 2017-05-26 08:40:41 -05:00 committed by Yuan (Terry) Tang
parent f1dc82e3e1
commit 6776292951
2 changed files with 20 additions and 17 deletions

View File

@ -1,19 +1,22 @@
#!/usr/bin/python #!/usr/bin/python
from __future__ import division
import numpy as np import numpy as np
import xgboost as xgb import xgboost as xgb
# label need to be 0 to num_class -1 # label need to be 0 to num_class -1
data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1 } ) data = np.loadtxt('./dermatology.data', delimiter=',',
converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1})
sz = data.shape sz = data.shape
train = data[:int(sz[0] * 0.7), :] train = data[:int(sz[0] * 0.7), :]
test = data[int(sz[0] * 0.7):, :] test = data[int(sz[0] * 0.7):, :]
train_X = train[:,0:33] train_X = train[:, :33]
train_Y = train[:, 34] train_Y = train[:, 34]
test_X = test[:, :33]
test_X = test[:,0:33]
test_Y = test[:, 34] test_Y = test[:, 34]
xg_train = xgb.DMatrix(train_X, label=train_Y) xg_train = xgb.DMatrix(train_X, label=train_Y)
@ -31,18 +34,18 @@ param['num_class'] = 6
watchlist = [(xg_train, 'train'), (xg_test, 'test')] watchlist = [(xg_train, 'train'), (xg_test, 'test')]
num_round = 5 num_round = 5
bst = xgb.train(param, xg_train, num_round, watchlist ); bst = xgb.train(param, xg_train, num_round, watchlist)
# get prediction # get prediction
pred = bst.predict( xg_test ); pred = bst.predict(xg_test)
error_rate = np.sum(pred != test_Y) / test_Y.shape[0]
print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) )) print('Test error using softmax = {}'.format(error_rate))
# do the same thing again, but output probabilities # do the same thing again, but output probabilities
param['objective'] = 'multi:softprob' param['objective'] = 'multi:softprob'
bst = xgb.train(param, xg_train, num_round, watchlist ); bst = xgb.train(param, xg_train, num_round, watchlist)
# Note: this convention has been changed since xgboost-unity # Note: this convention has been changed since xgboost-unity
# get prediction, this is in 1D array, need reshape to (ndata, nclass) # get prediction, this is in 1D array, need reshape to (ndata, nclass)
yprob = bst.predict( xg_test ).reshape( test_Y.shape[0], 6 ) pred_prob = bst.predict(xg_test).reshape(test_Y.shape[0], 6)
ylabel = np.argmax(yprob, axis=1) pred_label = np.argmax(pred_prob, axis=1)
error_rate = np.sum(pred != test_Y) / test_Y.shape[0]
print ('predicting, classification error=%f' % (sum( int(ylabel[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) )) print('Test error using softprob = {}'.format(error_rate))