diff --git a/demo/kaggle-higgs/README.md b/demo/kaggle-higgs/README.md index a3c208002..b3db23266 100644 --- a/demo/kaggle-higgs/README.md +++ b/demo/kaggle-higgs/README.md @@ -16,5 +16,6 @@ make - +Speed +===== speedtest.py compares xgboost's speed on this dataset with sklearn.GBM diff --git a/demo/multiclass_classification/README.md b/demo/multiclass_classification/README.md new file mode 100644 index 000000000..4d66ee06a --- /dev/null +++ b/demo/multiclass_classification/README.md @@ -0,0 +1,8 @@ +Demonstrating how to use XGBoost accomplish Multi-Class classification task on [UCI Dermatology dataset](https://archive.ics.uci.edu/ml/datasets/Dermatology) + +1. Run runexp.sh +```bash +./runexp.sh +``` + +Explainations can be found in [wiki](https://github.com/tqchen/xgboost/wiki) diff --git a/demo/multiclass_classification/runexp.sh b/demo/multiclass_classification/runexp.sh new file mode 100755 index 000000000..0af814725 --- /dev/null +++ b/demo/multiclass_classification/runexp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +if [ -f dermatology.data ] +then + echo "use existing data to run multi class classification" +else + echo "getting data from uci, make sure you are connected to internet" + wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data +fi +python train.py diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py new file mode 100755 index 000000000..38d818890 --- /dev/null +++ b/demo/multiclass_classification/train.py @@ -0,0 +1,42 @@ +#! /usr/bin/python +import sys +import numpy as np +sys.path.append('../../python/') +import xgboost as xgb + +# label need to be 0 to num_class -1 +data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1 } ) +sz = data.shape + +train = data[:int(sz[0] * 0.7), :] +test = data[int(sz[0] * 0.7):, :] + +train_X = train[:,0:33] +train_Y = train[:, 34] + + +test_X = test[:,0:33] +test_Y = test[:, 34] + +xg_train = xgb.DMatrix( train_X, label=train_Y) +xg_test = xgb.DMatrix(test_X, label=test_Y) +# setup parameters for xgboost +param = {} +# use softmax multi-class classification +param['objective'] = 'multi:softmax' +# scale weight of positive examples +param['bst:eta'] = 0.1 +param['bst:max_depth'] = 6 +param['silent'] = 1 +param['nthread'] = 4 +param['num_class'] = 6 + +watchlist = [ (xg_train,'train'), (xg_test, 'test') ] +num_round = 5 +bst = xgb.train(param, xg_train, num_round, watchlist ); +# get prediction +pred = bst.predict( xg_test ); + +print 'predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in xrange(len(test_Y))) / float(len(test_Y)) ) + + diff --git a/regrank/xgboost_regrank.h b/regrank/xgboost_regrank.h index b2649735c..e3868002a 100644 --- a/regrank/xgboost_regrank.h +++ b/regrank/xgboost_regrank.h @@ -97,8 +97,8 @@ namespace xgboost{ */ inline void InitTrainer(void){ if( mparam.num_class != 0 ){ - if( name_obj_ != "softmax" ){ - name_obj_ = "softmax"; + if( name_obj_ != "multi:softmax" ){ + name_obj_ = "multi:softmax"; printf("auto select objective=softmax to support multi-class classification\n" ); } }