Merge branch 'dev' of https://github.com/tqchen/xgboost into dev
This commit is contained in:
commit
a77debc0c5
@ -16,5 +16,6 @@ make
|
||||
|
||||
|
||||
|
||||
|
||||
Speed
|
||||
=====
|
||||
speedtest.py compares xgboost's speed on this dataset with sklearn.GBM
|
||||
|
||||
8
demo/multiclass_classification/README.md
Normal file
8
demo/multiclass_classification/README.md
Normal file
@ -0,0 +1,8 @@
|
||||
Demonstrating how to use XGBoost accomplish Multi-Class classification task on [UCI Dermatology dataset](https://archive.ics.uci.edu/ml/datasets/Dermatology)
|
||||
|
||||
1. Run runexp.sh
|
||||
```bash
|
||||
./runexp.sh
|
||||
```
|
||||
|
||||
Explainations can be found in [wiki](https://github.com/tqchen/xgboost/wiki)
|
||||
9
demo/multiclass_classification/runexp.sh
Executable file
9
demo/multiclass_classification/runexp.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
if [ -f dermatology.data ]
|
||||
then
|
||||
echo "use existing data to run multi class classification"
|
||||
else
|
||||
echo "getting data from uci, make sure you are connected to internet"
|
||||
wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data
|
||||
fi
|
||||
python train.py
|
||||
42
demo/multiclass_classification/train.py
Executable file
42
demo/multiclass_classification/train.py
Executable file
@ -0,0 +1,42 @@
|
||||
#! /usr/bin/python
|
||||
import sys
|
||||
import numpy as np
|
||||
sys.path.append('../../python/')
|
||||
import xgboost as xgb
|
||||
|
||||
# label need to be 0 to num_class -1
|
||||
data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1 } )
|
||||
sz = data.shape
|
||||
|
||||
train = data[:int(sz[0] * 0.7), :]
|
||||
test = data[int(sz[0] * 0.7):, :]
|
||||
|
||||
train_X = train[:,0:33]
|
||||
train_Y = train[:, 34]
|
||||
|
||||
|
||||
test_X = test[:,0:33]
|
||||
test_Y = test[:, 34]
|
||||
|
||||
xg_train = xgb.DMatrix( train_X, label=train_Y)
|
||||
xg_test = xgb.DMatrix(test_X, label=test_Y)
|
||||
# setup parameters for xgboost
|
||||
param = {}
|
||||
# use softmax multi-class classification
|
||||
param['objective'] = 'multi:softmax'
|
||||
# scale weight of positive examples
|
||||
param['bst:eta'] = 0.1
|
||||
param['bst:max_depth'] = 6
|
||||
param['silent'] = 1
|
||||
param['nthread'] = 4
|
||||
param['num_class'] = 6
|
||||
|
||||
watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
|
||||
num_round = 5
|
||||
bst = xgb.train(param, xg_train, num_round, watchlist );
|
||||
# get prediction
|
||||
pred = bst.predict( xg_test );
|
||||
|
||||
print 'predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in xrange(len(test_Y))) / float(len(test_Y)) )
|
||||
|
||||
|
||||
@ -97,8 +97,8 @@ namespace xgboost{
|
||||
*/
|
||||
inline void InitTrainer(void){
|
||||
if( mparam.num_class != 0 ){
|
||||
if( name_obj_ != "softmax" ){
|
||||
name_obj_ = "softmax";
|
||||
if( name_obj_ != "multi:softmax" ){
|
||||
name_obj_ = "multi:softmax";
|
||||
printf("auto select objective=softmax to support multi-class classification\n" );
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user