add python interface for xgboost
This commit is contained in:
parent
adc9400736
commit
6fd77cbb24
3
.gitignore
vendored
3
.gitignore
vendored
@ -16,4 +16,5 @@
|
|||||||
*conf
|
*conf
|
||||||
*buffer
|
*buffer
|
||||||
*model
|
*model
|
||||||
xgboost
|
xgboost
|
||||||
|
*pyc
|
||||||
|
|||||||
@ -1,2 +1,4 @@
|
|||||||
beta version:
|
|
||||||
python wrapper for xgboost using ctypes
|
python wrapper for xgboost using ctypes
|
||||||
|
|
||||||
|
see example for usage
|
||||||
|
|
||||||
|
|||||||
3
python/example/README.md
Normal file
3
python/example/README.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format
|
||||||
|
|
||||||
|
for usage: see demo.py and comments in demo.py
|
||||||
1611
python/example/agaricus.txt.test
Normal file
1611
python/example/agaricus.txt.test
Normal file
File diff suppressed because it is too large
Load Diff
6513
python/example/agaricus.txt.train
Normal file
6513
python/example/agaricus.txt.train
Normal file
File diff suppressed because it is too large
Load Diff
74
python/example/demo.py
Executable file
74
python/example/demo.py
Executable file
@ -0,0 +1,74 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
import sys
|
||||||
|
import scipy.sparse
|
||||||
|
# append the path to xgboost
|
||||||
|
sys.path.append('../')
|
||||||
|
import xgboost as xgb
|
||||||
|
|
||||||
|
### simple example
|
||||||
|
# load file from text file, also binary buffer generated by xgboost
|
||||||
|
dtrain = xgb.DMatrix('agaricus.txt.train')
|
||||||
|
dtest = xgb.DMatrix('agaricus.txt.test')
|
||||||
|
|
||||||
|
# specify parameters via map, definition are same as c++ version
|
||||||
|
param = {'bst:max_depth':4, 'bst:eta':1, 'silent':1, 'loss_type':2 }
|
||||||
|
|
||||||
|
# specify validations set to watch performance
|
||||||
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
|
num_round = 2
|
||||||
|
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||||
|
|
||||||
|
# this is prediction
|
||||||
|
preds = bst.predict( dtest )
|
||||||
|
labels = dtest.get_label()
|
||||||
|
print 'error=%f' % ( sum(1 for i in xrange(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))
|
||||||
|
bst.save_model('0001.model')
|
||||||
|
|
||||||
|
|
||||||
|
###
|
||||||
|
# build dmatrix in python iteratively
|
||||||
|
#
|
||||||
|
print 'start running example of build DMatrix in python'
|
||||||
|
dtrain = xgb.DMatrix()
|
||||||
|
labels = []
|
||||||
|
for l in open('agaricus.txt.train'):
|
||||||
|
arr = l.split()
|
||||||
|
labels.append( int(arr[0]))
|
||||||
|
feats = []
|
||||||
|
for it in arr[1:]:
|
||||||
|
k,v = it.split(':')
|
||||||
|
feats.append( (int(k), float(v)) )
|
||||||
|
dtrain.add_row( feats )
|
||||||
|
dtrain.set_label( labels )
|
||||||
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
|
|
||||||
|
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||||
|
|
||||||
|
###
|
||||||
|
# build dmatrix from scipy.sparse
|
||||||
|
print 'start running example of build DMatrix from scipy.sparse'
|
||||||
|
labels = []
|
||||||
|
row = []; col = []; dat = []
|
||||||
|
i = 0
|
||||||
|
for l in open('agaricus.txt.train'):
|
||||||
|
arr = l.split()
|
||||||
|
labels.append( int(arr[0]))
|
||||||
|
for it in arr[1:]:
|
||||||
|
k,v = it.split(':')
|
||||||
|
row.append(i); col.append(int(k)); dat.append(float(v))
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
|
||||||
|
dtrain = xgb.DMatrix( csr )
|
||||||
|
dtrain.set_label(labels)
|
||||||
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
|
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||||
|
|
||||||
|
print 'start running example of build DMatrix from numpy array'
|
||||||
|
# NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
|
||||||
|
npymat = csr.todense()
|
||||||
|
dtrain = xgb.DMatrix( npymat )
|
||||||
|
dtrain.set_label(labels)
|
||||||
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
|
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||||
|
|
||||||
126
python/example/featmap.txt
Normal file
126
python/example/featmap.txt
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
0 cap-shape=bell i
|
||||||
|
1 cap-shape=conical i
|
||||||
|
2 cap-shape=convex i
|
||||||
|
3 cap-shape=flat i
|
||||||
|
4 cap-shape=knobbed i
|
||||||
|
5 cap-shape=sunken i
|
||||||
|
6 cap-surface=fibrous i
|
||||||
|
7 cap-surface=grooves i
|
||||||
|
8 cap-surface=scaly i
|
||||||
|
9 cap-surface=smooth i
|
||||||
|
10 cap-color=brown i
|
||||||
|
11 cap-color=buff i
|
||||||
|
12 cap-color=cinnamon i
|
||||||
|
13 cap-color=gray i
|
||||||
|
14 cap-color=green i
|
||||||
|
15 cap-color=pink i
|
||||||
|
16 cap-color=purple i
|
||||||
|
17 cap-color=red i
|
||||||
|
18 cap-color=white i
|
||||||
|
19 cap-color=yellow i
|
||||||
|
20 bruises?=bruises i
|
||||||
|
21 bruises?=no i
|
||||||
|
22 odor=almond i
|
||||||
|
23 odor=anise i
|
||||||
|
24 odor=creosote i
|
||||||
|
25 odor=fishy i
|
||||||
|
26 odor=foul i
|
||||||
|
27 odor=musty i
|
||||||
|
28 odor=none i
|
||||||
|
29 odor=pungent i
|
||||||
|
30 odor=spicy i
|
||||||
|
31 gill-attachment=attached i
|
||||||
|
32 gill-attachment=descending i
|
||||||
|
33 gill-attachment=free i
|
||||||
|
34 gill-attachment=notched i
|
||||||
|
35 gill-spacing=close i
|
||||||
|
36 gill-spacing=crowded i
|
||||||
|
37 gill-spacing=distant i
|
||||||
|
38 gill-size=broad i
|
||||||
|
39 gill-size=narrow i
|
||||||
|
40 gill-color=black i
|
||||||
|
41 gill-color=brown i
|
||||||
|
42 gill-color=buff i
|
||||||
|
43 gill-color=chocolate i
|
||||||
|
44 gill-color=gray i
|
||||||
|
45 gill-color=green i
|
||||||
|
46 gill-color=orange i
|
||||||
|
47 gill-color=pink i
|
||||||
|
48 gill-color=purple i
|
||||||
|
49 gill-color=red i
|
||||||
|
50 gill-color=white i
|
||||||
|
51 gill-color=yellow i
|
||||||
|
52 stalk-shape=enlarging i
|
||||||
|
53 stalk-shape=tapering i
|
||||||
|
54 stalk-root=bulbous i
|
||||||
|
55 stalk-root=club i
|
||||||
|
56 stalk-root=cup i
|
||||||
|
57 stalk-root=equal i
|
||||||
|
58 stalk-root=rhizomorphs i
|
||||||
|
59 stalk-root=rooted i
|
||||||
|
60 stalk-root=missing i
|
||||||
|
61 stalk-surface-above-ring=fibrous i
|
||||||
|
62 stalk-surface-above-ring=scaly i
|
||||||
|
63 stalk-surface-above-ring=silky i
|
||||||
|
64 stalk-surface-above-ring=smooth i
|
||||||
|
65 stalk-surface-below-ring=fibrous i
|
||||||
|
66 stalk-surface-below-ring=scaly i
|
||||||
|
67 stalk-surface-below-ring=silky i
|
||||||
|
68 stalk-surface-below-ring=smooth i
|
||||||
|
69 stalk-color-above-ring=brown i
|
||||||
|
70 stalk-color-above-ring=buff i
|
||||||
|
71 stalk-color-above-ring=cinnamon i
|
||||||
|
72 stalk-color-above-ring=gray i
|
||||||
|
73 stalk-color-above-ring=orange i
|
||||||
|
74 stalk-color-above-ring=pink i
|
||||||
|
75 stalk-color-above-ring=red i
|
||||||
|
76 stalk-color-above-ring=white i
|
||||||
|
77 stalk-color-above-ring=yellow i
|
||||||
|
78 stalk-color-below-ring=brown i
|
||||||
|
79 stalk-color-below-ring=buff i
|
||||||
|
80 stalk-color-below-ring=cinnamon i
|
||||||
|
81 stalk-color-below-ring=gray i
|
||||||
|
82 stalk-color-below-ring=orange i
|
||||||
|
83 stalk-color-below-ring=pink i
|
||||||
|
84 stalk-color-below-ring=red i
|
||||||
|
85 stalk-color-below-ring=white i
|
||||||
|
86 stalk-color-below-ring=yellow i
|
||||||
|
87 veil-type=partial i
|
||||||
|
88 veil-type=universal i
|
||||||
|
89 veil-color=brown i
|
||||||
|
90 veil-color=orange i
|
||||||
|
91 veil-color=white i
|
||||||
|
92 veil-color=yellow i
|
||||||
|
93 ring-number=none i
|
||||||
|
94 ring-number=one i
|
||||||
|
95 ring-number=two i
|
||||||
|
96 ring-type=cobwebby i
|
||||||
|
97 ring-type=evanescent i
|
||||||
|
98 ring-type=flaring i
|
||||||
|
99 ring-type=large i
|
||||||
|
100 ring-type=none i
|
||||||
|
101 ring-type=pendant i
|
||||||
|
102 ring-type=sheathing i
|
||||||
|
103 ring-type=zone i
|
||||||
|
104 spore-print-color=black i
|
||||||
|
105 spore-print-color=brown i
|
||||||
|
106 spore-print-color=buff i
|
||||||
|
107 spore-print-color=chocolate i
|
||||||
|
108 spore-print-color=green i
|
||||||
|
109 spore-print-color=orange i
|
||||||
|
110 spore-print-color=purple i
|
||||||
|
111 spore-print-color=white i
|
||||||
|
112 spore-print-color=yellow i
|
||||||
|
113 population=abundant i
|
||||||
|
114 population=clustered i
|
||||||
|
115 population=numerous i
|
||||||
|
116 population=scattered i
|
||||||
|
117 population=several i
|
||||||
|
118 population=solitary i
|
||||||
|
119 habitat=grasses i
|
||||||
|
120 habitat=leaves i
|
||||||
|
121 habitat=meadows i
|
||||||
|
122 habitat=paths i
|
||||||
|
123 habitat=urban i
|
||||||
|
124 habitat=waste i
|
||||||
|
125 habitat=woods i
|
||||||
@ -1,10 +1,12 @@
|
|||||||
# module for xgboost
|
# module for xgboost
|
||||||
import ctypes
|
import ctypes
|
||||||
|
import os
|
||||||
# optinally have scipy sparse, though not necessary
|
# optinally have scipy sparse, though not necessary
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.sparse as scp
|
import scipy.sparse as scp
|
||||||
|
|
||||||
# set this line correctly
|
# set this line correctly
|
||||||
XGBOOST_PATH = './libxgboostpy.so'
|
XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostpy.so'
|
||||||
|
|
||||||
# entry type of sparse matrix
|
# entry type of sparse matrix
|
||||||
class REntry(ctypes.Structure):
|
class REntry(ctypes.Structure):
|
||||||
@ -34,9 +36,9 @@ class DMatrix:
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
csr = scp.csr_matrix(data)
|
csr = scp.csr_matrix(data)
|
||||||
self.__init_from_csr(data)
|
self.__init_from_csr(csr)
|
||||||
except:
|
except:
|
||||||
raise "DMatrix", "can not intialize DMatrix from"+type(data)
|
raise Exception, "can not intialize DMatrix from"+str(type(data))
|
||||||
if label != None:
|
if label != None:
|
||||||
self.set_label(label)
|
self.set_label(label)
|
||||||
|
|
||||||
|
|||||||
@ -32,6 +32,7 @@ namespace xgboost{
|
|||||||
mat.row_data_.resize( mat.row_ptr_.back() + len );
|
mat.row_data_.resize( mat.row_ptr_.back() + len );
|
||||||
memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len );
|
memcpy( &mat.row_data_[mat.row_ptr_.back()], data, sizeof(XGEntry)*len );
|
||||||
mat.row_ptr_.push_back( mat.row_ptr_.back() + len );
|
mat.row_ptr_.push_back( mat.row_ptr_.back() + len );
|
||||||
|
init_col_ = false;
|
||||||
}
|
}
|
||||||
inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{
|
inline const XGEntry* GetRow(unsigned ridx, size_t* len) const{
|
||||||
const xgboost::booster::FMatrixS &mat = this->data;
|
const xgboost::booster::FMatrixS &mat = this->data;
|
||||||
@ -72,7 +73,7 @@ namespace xgboost{
|
|||||||
return &(this->info.labels[0]);
|
return &(this->info.labels[0]);
|
||||||
}
|
}
|
||||||
inline void CheckInit(void){
|
inline void CheckInit(void){
|
||||||
if(!this->data.HaveColAccess()){
|
if(!init_col_){
|
||||||
this->data.InitData();
|
this->data.InitData();
|
||||||
}
|
}
|
||||||
utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
|
utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
|
||||||
@ -163,7 +164,9 @@ extern "C"{
|
|||||||
void *XGBoosterCreate( void *dmats[], size_t len ){
|
void *XGBoosterCreate( void *dmats[], size_t len ){
|
||||||
std::vector<const xgboost::regrank::DMatrix*> mats;
|
std::vector<const xgboost::regrank::DMatrix*> mats;
|
||||||
for( size_t i = 0; i < len; ++i ){
|
for( size_t i = 0; i < len; ++i ){
|
||||||
mats.push_back( static_cast<DMatrix*>(dmats[i]) );
|
DMatrix *dtr = static_cast<DMatrix*>(dmats[i]);
|
||||||
|
dtr->CheckInit();
|
||||||
|
mats.push_back( dtr );
|
||||||
}
|
}
|
||||||
return new Booster( mats );
|
return new Booster( mats );
|
||||||
}
|
}
|
||||||
|
|||||||
@ -112,7 +112,9 @@ namespace xgboost{
|
|||||||
unsigned ngptr;
|
unsigned ngptr;
|
||||||
if( fs.Read(&ngptr, sizeof(unsigned) ) != 0 ){
|
if( fs.Read(&ngptr, sizeof(unsigned) ) != 0 ){
|
||||||
info.group_ptr.resize( ngptr );
|
info.group_ptr.resize( ngptr );
|
||||||
utils::Assert( fs.Read(&info.group_ptr[0], sizeof(unsigned) * ngptr) != 0, "Load group file");
|
if( ngptr != 0 ){
|
||||||
|
utils::Assert( fs.Read(&info.group_ptr[0], sizeof(unsigned) * ngptr) != 0, "Load group file");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fs.Close();
|
fs.Close();
|
||||||
@ -143,7 +145,9 @@ namespace xgboost{
|
|||||||
{// write out group ptr
|
{// write out group ptr
|
||||||
unsigned ngptr = static_cast<unsigned>( info.group_ptr.size() );
|
unsigned ngptr = static_cast<unsigned>( info.group_ptr.size() );
|
||||||
fs.Write(&ngptr, sizeof(unsigned) );
|
fs.Write(&ngptr, sizeof(unsigned) );
|
||||||
fs.Write(&info.group_ptr[0], sizeof(unsigned) * ngptr);
|
if( ngptr != 0 ){
|
||||||
|
fs.Write(&info.group_ptr[0], sizeof(unsigned) * ngptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
fs.Close();
|
fs.Close();
|
||||||
if (!silent){
|
if (!silent){
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user