This commit is contained in:
antinucleon 2014-05-17 02:59:10 +00:00
parent 2cae28087a
commit c67b098bd6
2 changed files with 44 additions and 0 deletions

View File

@ -0,0 +1,42 @@
import sys
import numpy as np
sys.path.append('../../python/')
import xgboost as xgb
data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x) } )
sz = data.shape
train = data[:int(sz[0] * 0.7), :]
test = data[int(sz[0] * 0.7):, :]
train_X = train[:,0:33]
train_Y = train[:, 34]
test_X = test[:,0:33]
test_Y = test[:, 34]
xg_train = xgb.DMatrix( train_X, label=train_Y)
xg_test = xgb.DMatrix(test_X, label=test_Y)
# setup parameters for xgboost
param = {}
# use logistic regression loss, use raw prediction before logistic transformation
# since we only need the rank
param['objective'] = 'multi:softmax'
# scale weight of positive examples
param['bst:eta'] = 0.1
param['bst:max_depth'] = 6
param['eval_metric'] = 'auc'
param['silent'] = 1
param['nthread'] = 4
param['num_class'] = 5
watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
num_round = 5
bst = xgb.train(param, xg_train, num_round, watchlist );

View File

@ -0,0 +1,2 @@
#! /bin/bash
wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data