chg
This commit is contained in:
parent
3ba7995754
commit
08a6b92216
1611
wrapper/R-example/agaricus.txt.test
Normal file
1611
wrapper/R-example/agaricus.txt.test
Normal file
File diff suppressed because it is too large
Load Diff
6513
wrapper/R-example/agaricus.txt.train
Normal file
6513
wrapper/R-example/agaricus.txt.train
Normal file
File diff suppressed because it is too large
Load Diff
14
wrapper/R-example/demo.R
Normal file
14
wrapper/R-example/demo.R
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# include xgboost library, must set chdir=TRURE
|
||||||
|
source('../xgboost.R', chdir=TRUE)
|
||||||
|
|
||||||
|
# test code here
|
||||||
|
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||||
|
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||||
|
param = list('bst:max_depth'=2, 'bst:eta'=1, 'silent'=1, 'objective'='binary:logistic')
|
||||||
|
watchlist <- list('train'=dtrain,'test'=dtest)
|
||||||
|
bst <- xgb.train(param, dtrain, watchlist=watchlist, nround=3)
|
||||||
|
|
||||||
|
succ <- xgb.save(bst, "iter.model")
|
||||||
|
print('finsih save model')
|
||||||
|
bst2 <- xgb.Booster(modelfile="iter.model")
|
||||||
|
pred = xgb.predict(bst2, dtest)
|
||||||
126
wrapper/R-example/featmap.txt
Normal file
126
wrapper/R-example/featmap.txt
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
0 cap-shape=bell i
|
||||||
|
1 cap-shape=conical i
|
||||||
|
2 cap-shape=convex i
|
||||||
|
3 cap-shape=flat i
|
||||||
|
4 cap-shape=knobbed i
|
||||||
|
5 cap-shape=sunken i
|
||||||
|
6 cap-surface=fibrous i
|
||||||
|
7 cap-surface=grooves i
|
||||||
|
8 cap-surface=scaly i
|
||||||
|
9 cap-surface=smooth i
|
||||||
|
10 cap-color=brown i
|
||||||
|
11 cap-color=buff i
|
||||||
|
12 cap-color=cinnamon i
|
||||||
|
13 cap-color=gray i
|
||||||
|
14 cap-color=green i
|
||||||
|
15 cap-color=pink i
|
||||||
|
16 cap-color=purple i
|
||||||
|
17 cap-color=red i
|
||||||
|
18 cap-color=white i
|
||||||
|
19 cap-color=yellow i
|
||||||
|
20 bruises?=bruises i
|
||||||
|
21 bruises?=no i
|
||||||
|
22 odor=almond i
|
||||||
|
23 odor=anise i
|
||||||
|
24 odor=creosote i
|
||||||
|
25 odor=fishy i
|
||||||
|
26 odor=foul i
|
||||||
|
27 odor=musty i
|
||||||
|
28 odor=none i
|
||||||
|
29 odor=pungent i
|
||||||
|
30 odor=spicy i
|
||||||
|
31 gill-attachment=attached i
|
||||||
|
32 gill-attachment=descending i
|
||||||
|
33 gill-attachment=free i
|
||||||
|
34 gill-attachment=notched i
|
||||||
|
35 gill-spacing=close i
|
||||||
|
36 gill-spacing=crowded i
|
||||||
|
37 gill-spacing=distant i
|
||||||
|
38 gill-size=broad i
|
||||||
|
39 gill-size=narrow i
|
||||||
|
40 gill-color=black i
|
||||||
|
41 gill-color=brown i
|
||||||
|
42 gill-color=buff i
|
||||||
|
43 gill-color=chocolate i
|
||||||
|
44 gill-color=gray i
|
||||||
|
45 gill-color=green i
|
||||||
|
46 gill-color=orange i
|
||||||
|
47 gill-color=pink i
|
||||||
|
48 gill-color=purple i
|
||||||
|
49 gill-color=red i
|
||||||
|
50 gill-color=white i
|
||||||
|
51 gill-color=yellow i
|
||||||
|
52 stalk-shape=enlarging i
|
||||||
|
53 stalk-shape=tapering i
|
||||||
|
54 stalk-root=bulbous i
|
||||||
|
55 stalk-root=club i
|
||||||
|
56 stalk-root=cup i
|
||||||
|
57 stalk-root=equal i
|
||||||
|
58 stalk-root=rhizomorphs i
|
||||||
|
59 stalk-root=rooted i
|
||||||
|
60 stalk-root=missing i
|
||||||
|
61 stalk-surface-above-ring=fibrous i
|
||||||
|
62 stalk-surface-above-ring=scaly i
|
||||||
|
63 stalk-surface-above-ring=silky i
|
||||||
|
64 stalk-surface-above-ring=smooth i
|
||||||
|
65 stalk-surface-below-ring=fibrous i
|
||||||
|
66 stalk-surface-below-ring=scaly i
|
||||||
|
67 stalk-surface-below-ring=silky i
|
||||||
|
68 stalk-surface-below-ring=smooth i
|
||||||
|
69 stalk-color-above-ring=brown i
|
||||||
|
70 stalk-color-above-ring=buff i
|
||||||
|
71 stalk-color-above-ring=cinnamon i
|
||||||
|
72 stalk-color-above-ring=gray i
|
||||||
|
73 stalk-color-above-ring=orange i
|
||||||
|
74 stalk-color-above-ring=pink i
|
||||||
|
75 stalk-color-above-ring=red i
|
||||||
|
76 stalk-color-above-ring=white i
|
||||||
|
77 stalk-color-above-ring=yellow i
|
||||||
|
78 stalk-color-below-ring=brown i
|
||||||
|
79 stalk-color-below-ring=buff i
|
||||||
|
80 stalk-color-below-ring=cinnamon i
|
||||||
|
81 stalk-color-below-ring=gray i
|
||||||
|
82 stalk-color-below-ring=orange i
|
||||||
|
83 stalk-color-below-ring=pink i
|
||||||
|
84 stalk-color-below-ring=red i
|
||||||
|
85 stalk-color-below-ring=white i
|
||||||
|
86 stalk-color-below-ring=yellow i
|
||||||
|
87 veil-type=partial i
|
||||||
|
88 veil-type=universal i
|
||||||
|
89 veil-color=brown i
|
||||||
|
90 veil-color=orange i
|
||||||
|
91 veil-color=white i
|
||||||
|
92 veil-color=yellow i
|
||||||
|
93 ring-number=none i
|
||||||
|
94 ring-number=one i
|
||||||
|
95 ring-number=two i
|
||||||
|
96 ring-type=cobwebby i
|
||||||
|
97 ring-type=evanescent i
|
||||||
|
98 ring-type=flaring i
|
||||||
|
99 ring-type=large i
|
||||||
|
100 ring-type=none i
|
||||||
|
101 ring-type=pendant i
|
||||||
|
102 ring-type=sheathing i
|
||||||
|
103 ring-type=zone i
|
||||||
|
104 spore-print-color=black i
|
||||||
|
105 spore-print-color=brown i
|
||||||
|
106 spore-print-color=buff i
|
||||||
|
107 spore-print-color=chocolate i
|
||||||
|
108 spore-print-color=green i
|
||||||
|
109 spore-print-color=orange i
|
||||||
|
110 spore-print-color=purple i
|
||||||
|
111 spore-print-color=white i
|
||||||
|
112 spore-print-color=yellow i
|
||||||
|
113 population=abundant i
|
||||||
|
114 population=clustered i
|
||||||
|
115 population=numerous i
|
||||||
|
116 population=scattered i
|
||||||
|
117 population=several i
|
||||||
|
118 population=solitary i
|
||||||
|
119 habitat=grasses i
|
||||||
|
120 habitat=leaves i
|
||||||
|
121 habitat=meadows i
|
||||||
|
122 habitat=paths i
|
||||||
|
123 habitat=urban i
|
||||||
|
124 habitat=waste i
|
||||||
|
125 habitat=woods i
|
||||||
12
wrapper/README.md
Normal file
12
wrapper/README.md
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
Wrapper of XGBoost
|
||||||
|
=====
|
||||||
|
This folder provides wrapper of xgboost to other languages
|
||||||
|
|
||||||
|
|
||||||
|
Python
|
||||||
|
=====
|
||||||
|
To make the python module, type ```make``` in the root directory of project
|
||||||
|
|
||||||
|
R
|
||||||
|
=====
|
||||||
|
To make the R wrapper, type ```make R``` in the root directory of project
|
||||||
3
wrapper/python-example/README.md
Normal file
3
wrapper/python-example/README.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format
|
||||||
|
|
||||||
|
for usage: see demo.py and comments in demo.py
|
||||||
1611
wrapper/python-example/agaricus.txt.test
Normal file
1611
wrapper/python-example/agaricus.txt.test
Normal file
File diff suppressed because it is too large
Load Diff
6513
wrapper/python-example/agaricus.txt.train
Normal file
6513
wrapper/python-example/agaricus.txt.train
Normal file
File diff suppressed because it is too large
Load Diff
112
wrapper/python-example/demo.py
Executable file
112
wrapper/python-example/demo.py
Executable file
@ -0,0 +1,112 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
|
import scipy.sparse
|
||||||
|
# append the path to xgboost, you may need to change the following line
|
||||||
|
# alternatively, you can add the path to PYTHONPATH environment variable
|
||||||
|
sys.path.append('../')
|
||||||
|
import xgboost as xgb
|
||||||
|
|
||||||
|
### simple example
|
||||||
|
# load file from text file, also binary buffer generated by xgboost
|
||||||
|
dtrain = xgb.DMatrix('agaricus.txt.train')
|
||||||
|
dtest = xgb.DMatrix('agaricus.txt.test')
|
||||||
|
|
||||||
|
# specify parameters via map, definition are same as c++ version
|
||||||
|
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||||
|
|
||||||
|
# specify validations set to watch performance
|
||||||
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
|
num_round = 2
|
||||||
|
bst = xgb.train(param, dtrain, num_round, evallist)
|
||||||
|
|
||||||
|
# this is prediction
|
||||||
|
preds = bst.predict(dtest)
|
||||||
|
labels = dtest.get_label()
|
||||||
|
print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
|
||||||
|
bst.save_model('0001.model')
|
||||||
|
# dump model
|
||||||
|
bst.dump_model('dump.raw.txt')
|
||||||
|
# dump model with feature map
|
||||||
|
bst.dump_model('dump.nice.txt','featmap.txt')
|
||||||
|
|
||||||
|
###
|
||||||
|
# build dmatrix from scipy.sparse
|
||||||
|
print ('start running example of build DMatrix from scipy.sparse')
|
||||||
|
labels = []
|
||||||
|
row = []; col = []; dat = []
|
||||||
|
i = 0
|
||||||
|
for l in open('agaricus.txt.train'):
|
||||||
|
arr = l.split()
|
||||||
|
labels.append( int(arr[0]))
|
||||||
|
for it in arr[1:]:
|
||||||
|
k,v = it.split(':')
|
||||||
|
row.append(i); col.append(int(k)); dat.append(float(v))
|
||||||
|
i += 1
|
||||||
|
csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
|
||||||
|
dtrain = xgb.DMatrix( csr )
|
||||||
|
dtrain.set_label(labels)
|
||||||
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
|
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||||
|
|
||||||
|
print ('start running example of build DMatrix from numpy array')
|
||||||
|
# NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
|
||||||
|
npymat = csr.todense()
|
||||||
|
dtrain = xgb.DMatrix( npymat)
|
||||||
|
dtrain.set_label(labels)
|
||||||
|
evallist = [(dtest,'eval'), (dtrain,'train')]
|
||||||
|
bst = xgb.train( param, dtrain, num_round, evallist )
|
||||||
|
|
||||||
|
###
|
||||||
|
# advanced: cutomsized loss function, set loss_type to 0, so that predict get untransformed score
|
||||||
|
#
|
||||||
|
print ('start running example to used cutomized objective function')
|
||||||
|
|
||||||
|
# note: for customized objective function, we leave objective as default
|
||||||
|
# note: what we are getting is margin value in prediction
|
||||||
|
# you must know what you are doing
|
||||||
|
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
|
||||||
|
|
||||||
|
# user define objective function, given prediction, return gradient and second order gradient
|
||||||
|
# this is loglikelihood loss
|
||||||
|
def logregobj(preds, dtrain):
|
||||||
|
labels = dtrain.get_label()
|
||||||
|
preds = 1.0 / (1.0 + np.exp(-preds))
|
||||||
|
grad = preds - labels
|
||||||
|
hess = preds * (1.0-preds)
|
||||||
|
return grad, hess
|
||||||
|
|
||||||
|
# user defined evaluation function, return a pair metric_name, result
|
||||||
|
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||||
|
# this may make buildin evalution metric not function properly
|
||||||
|
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||||
|
# the buildin evaluation error assumes input is after logistic transformation
|
||||||
|
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||||
|
def evalerror(preds, dtrain):
|
||||||
|
labels = dtrain.get_label()
|
||||||
|
# return a pair metric_name, result
|
||||||
|
# since preds are margin(before logistic transformation, cutoff at 0)
|
||||||
|
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||||
|
|
||||||
|
# training with customized objective, we can also do step by step training
|
||||||
|
# simply look at xgboost.py's implementation of train
|
||||||
|
bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
|
||||||
|
|
||||||
|
|
||||||
|
###
|
||||||
|
# advanced: start from a initial base prediction
|
||||||
|
#
|
||||||
|
print ('start running example to start from a initial prediction')
|
||||||
|
# specify parameters via map, definition are same as c++ version
|
||||||
|
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||||
|
# train xgboost for 1 round
|
||||||
|
bst = xgb.train( param, dtrain, 1, evallist )
|
||||||
|
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||||
|
# do predict with output_margin=True, will always give you margin values before logistic transformation
|
||||||
|
ptrain = bst.predict(dtrain, output_margin=True)
|
||||||
|
ptest = bst.predict(dtest, output_margin=True)
|
||||||
|
dtrain.set_base_margin(ptrain)
|
||||||
|
dtest.set_base_margin(ptest)
|
||||||
|
|
||||||
|
print ('this is result of running from initial prediction')
|
||||||
|
bst = xgb.train( param, dtrain, 1, evallist )
|
||||||
126
wrapper/python-example/featmap.txt
Normal file
126
wrapper/python-example/featmap.txt
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
0 cap-shape=bell i
|
||||||
|
1 cap-shape=conical i
|
||||||
|
2 cap-shape=convex i
|
||||||
|
3 cap-shape=flat i
|
||||||
|
4 cap-shape=knobbed i
|
||||||
|
5 cap-shape=sunken i
|
||||||
|
6 cap-surface=fibrous i
|
||||||
|
7 cap-surface=grooves i
|
||||||
|
8 cap-surface=scaly i
|
||||||
|
9 cap-surface=smooth i
|
||||||
|
10 cap-color=brown i
|
||||||
|
11 cap-color=buff i
|
||||||
|
12 cap-color=cinnamon i
|
||||||
|
13 cap-color=gray i
|
||||||
|
14 cap-color=green i
|
||||||
|
15 cap-color=pink i
|
||||||
|
16 cap-color=purple i
|
||||||
|
17 cap-color=red i
|
||||||
|
18 cap-color=white i
|
||||||
|
19 cap-color=yellow i
|
||||||
|
20 bruises?=bruises i
|
||||||
|
21 bruises?=no i
|
||||||
|
22 odor=almond i
|
||||||
|
23 odor=anise i
|
||||||
|
24 odor=creosote i
|
||||||
|
25 odor=fishy i
|
||||||
|
26 odor=foul i
|
||||||
|
27 odor=musty i
|
||||||
|
28 odor=none i
|
||||||
|
29 odor=pungent i
|
||||||
|
30 odor=spicy i
|
||||||
|
31 gill-attachment=attached i
|
||||||
|
32 gill-attachment=descending i
|
||||||
|
33 gill-attachment=free i
|
||||||
|
34 gill-attachment=notched i
|
||||||
|
35 gill-spacing=close i
|
||||||
|
36 gill-spacing=crowded i
|
||||||
|
37 gill-spacing=distant i
|
||||||
|
38 gill-size=broad i
|
||||||
|
39 gill-size=narrow i
|
||||||
|
40 gill-color=black i
|
||||||
|
41 gill-color=brown i
|
||||||
|
42 gill-color=buff i
|
||||||
|
43 gill-color=chocolate i
|
||||||
|
44 gill-color=gray i
|
||||||
|
45 gill-color=green i
|
||||||
|
46 gill-color=orange i
|
||||||
|
47 gill-color=pink i
|
||||||
|
48 gill-color=purple i
|
||||||
|
49 gill-color=red i
|
||||||
|
50 gill-color=white i
|
||||||
|
51 gill-color=yellow i
|
||||||
|
52 stalk-shape=enlarging i
|
||||||
|
53 stalk-shape=tapering i
|
||||||
|
54 stalk-root=bulbous i
|
||||||
|
55 stalk-root=club i
|
||||||
|
56 stalk-root=cup i
|
||||||
|
57 stalk-root=equal i
|
||||||
|
58 stalk-root=rhizomorphs i
|
||||||
|
59 stalk-root=rooted i
|
||||||
|
60 stalk-root=missing i
|
||||||
|
61 stalk-surface-above-ring=fibrous i
|
||||||
|
62 stalk-surface-above-ring=scaly i
|
||||||
|
63 stalk-surface-above-ring=silky i
|
||||||
|
64 stalk-surface-above-ring=smooth i
|
||||||
|
65 stalk-surface-below-ring=fibrous i
|
||||||
|
66 stalk-surface-below-ring=scaly i
|
||||||
|
67 stalk-surface-below-ring=silky i
|
||||||
|
68 stalk-surface-below-ring=smooth i
|
||||||
|
69 stalk-color-above-ring=brown i
|
||||||
|
70 stalk-color-above-ring=buff i
|
||||||
|
71 stalk-color-above-ring=cinnamon i
|
||||||
|
72 stalk-color-above-ring=gray i
|
||||||
|
73 stalk-color-above-ring=orange i
|
||||||
|
74 stalk-color-above-ring=pink i
|
||||||
|
75 stalk-color-above-ring=red i
|
||||||
|
76 stalk-color-above-ring=white i
|
||||||
|
77 stalk-color-above-ring=yellow i
|
||||||
|
78 stalk-color-below-ring=brown i
|
||||||
|
79 stalk-color-below-ring=buff i
|
||||||
|
80 stalk-color-below-ring=cinnamon i
|
||||||
|
81 stalk-color-below-ring=gray i
|
||||||
|
82 stalk-color-below-ring=orange i
|
||||||
|
83 stalk-color-below-ring=pink i
|
||||||
|
84 stalk-color-below-ring=red i
|
||||||
|
85 stalk-color-below-ring=white i
|
||||||
|
86 stalk-color-below-ring=yellow i
|
||||||
|
87 veil-type=partial i
|
||||||
|
88 veil-type=universal i
|
||||||
|
89 veil-color=brown i
|
||||||
|
90 veil-color=orange i
|
||||||
|
91 veil-color=white i
|
||||||
|
92 veil-color=yellow i
|
||||||
|
93 ring-number=none i
|
||||||
|
94 ring-number=one i
|
||||||
|
95 ring-number=two i
|
||||||
|
96 ring-type=cobwebby i
|
||||||
|
97 ring-type=evanescent i
|
||||||
|
98 ring-type=flaring i
|
||||||
|
99 ring-type=large i
|
||||||
|
100 ring-type=none i
|
||||||
|
101 ring-type=pendant i
|
||||||
|
102 ring-type=sheathing i
|
||||||
|
103 ring-type=zone i
|
||||||
|
104 spore-print-color=black i
|
||||||
|
105 spore-print-color=brown i
|
||||||
|
106 spore-print-color=buff i
|
||||||
|
107 spore-print-color=chocolate i
|
||||||
|
108 spore-print-color=green i
|
||||||
|
109 spore-print-color=orange i
|
||||||
|
110 spore-print-color=purple i
|
||||||
|
111 spore-print-color=white i
|
||||||
|
112 spore-print-color=yellow i
|
||||||
|
113 population=abundant i
|
||||||
|
114 population=clustered i
|
||||||
|
115 population=numerous i
|
||||||
|
116 population=scattered i
|
||||||
|
117 population=several i
|
||||||
|
118 population=solitary i
|
||||||
|
119 habitat=grasses i
|
||||||
|
120 habitat=leaves i
|
||||||
|
121 habitat=meadows i
|
||||||
|
122 habitat=paths i
|
||||||
|
123 habitat=urban i
|
||||||
|
124 habitat=waste i
|
||||||
|
125 habitat=woods i
|
||||||
136
wrapper/xgboost.R
Normal file
136
wrapper/xgboost.R
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
# load in library
|
||||||
|
dyn.load("./libxgboostR.so")
|
||||||
|
|
||||||
|
# constructing DMatrix
|
||||||
|
xgb.DMatrix <- function(data) {
|
||||||
|
if (typeof(data) == "character") {
|
||||||
|
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE))
|
||||||
|
}else {
|
||||||
|
stop("xgb.DMatrix cannot recognize data type")
|
||||||
|
}
|
||||||
|
return(structure(handle, class="xgb.DMatrix"))
|
||||||
|
}
|
||||||
|
# construct a Booster from cachelist
|
||||||
|
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||||
|
if (typeof(cachelist) != "list") {
|
||||||
|
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||||
|
}
|
||||||
|
for (dm in cachelist) {
|
||||||
|
if (class(dm) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
handle <- .Call("XGBoosterCreate_R", cachelist)
|
||||||
|
for (i in 1:length(params)) {
|
||||||
|
p = params[i]
|
||||||
|
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p))
|
||||||
|
}
|
||||||
|
if (!is.null(modelfile)) {
|
||||||
|
if (typeof(modelfile) != "character"){
|
||||||
|
stop("xgb.Booster: modelfile must be character");
|
||||||
|
}
|
||||||
|
.Call("XGBoosterLoadModel_R", handle, modelfile)
|
||||||
|
}
|
||||||
|
return(structure(handle, class="xgb.Booster"))
|
||||||
|
}
|
||||||
|
# train a model using given parameters
|
||||||
|
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL) {
|
||||||
|
if (typeof(params) != "list") {
|
||||||
|
stop("xgb.train: first argument params must be list");
|
||||||
|
}
|
||||||
|
if (class(dtrain) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.train: second argument dtrain must be xgb.DMatrix");
|
||||||
|
}
|
||||||
|
bst <- xgb.Booster(params, append(watchlist,dtrain))
|
||||||
|
for (i in 1:nrounds) {
|
||||||
|
if (is.null(obj)) {
|
||||||
|
succ <- xgb.iter.update(bst, dtrain, i-1)
|
||||||
|
} else {
|
||||||
|
pred = xgb.predict(bst, dtrain)
|
||||||
|
gpair = obj(pred, dtrain)
|
||||||
|
succ <- xgb.iter.boost(bst, dtrain, gpair)
|
||||||
|
}
|
||||||
|
if (length(watchlist) != 0) {
|
||||||
|
msg <- xgb.iter.eval(bst, watchlist, i-1)
|
||||||
|
cat(msg); cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(bst)
|
||||||
|
}
|
||||||
|
# save model or DMatrix to file
|
||||||
|
xgb.save <- function(handle, fname) {
|
||||||
|
if (typeof(fname) != "character") {
|
||||||
|
stop("xgb.save: fname must be character");
|
||||||
|
}
|
||||||
|
if (class(handle) == "xgb.Booster") {
|
||||||
|
.Call("XGBoosterSaveModel_R", handle, fname);
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
if (class(handle) == "xgb.DMatrix") {
|
||||||
|
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE))
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
|
||||||
|
return(FALSE)
|
||||||
|
}
|
||||||
|
# predict
|
||||||
|
xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (class(dmat) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||||
|
}
|
||||||
|
ret = .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin))
|
||||||
|
return(ret)
|
||||||
|
}
|
||||||
|
##--------------------------------------
|
||||||
|
# the following are low level iteratively function, not needed
|
||||||
|
# if you do not want to use them
|
||||||
|
#---------------------------------------
|
||||||
|
# iteratively update booster with dtrain
|
||||||
|
xgb.iter.update <- function(booster, dtrain, iter) {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (class(dtrain) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||||
|
}
|
||||||
|
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain)
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
# iteratively update booster with customized statistics
|
||||||
|
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.iter.update: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (class(dtrain) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
|
||||||
|
}
|
||||||
|
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess)
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
# iteratively evaluate one iteration
|
||||||
|
xgb.iter.eval <- function(booster, watchlist, iter) {
|
||||||
|
if (class(booster) != "xgb.Booster") {
|
||||||
|
stop("xgb.eval: first argument must be type xgb.Booster")
|
||||||
|
}
|
||||||
|
if (typeof(watchlist) != "list") {
|
||||||
|
stop("xgb.eval: only accepts list of DMatrix as watchlist")
|
||||||
|
}
|
||||||
|
for (w in watchlist) {
|
||||||
|
if (class(w) != "xgb.DMatrix") {
|
||||||
|
stop("xgb.eval: watch list can only contain xgb.DMatrix")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
evnames <- list()
|
||||||
|
for (i in 1:length(watchlist)) {
|
||||||
|
w <- watchlist[i]
|
||||||
|
if (length(names(w)) == 0) {
|
||||||
|
stop("xgb.eval: name tag must be presented for every elements in watchlist")
|
||||||
|
}
|
||||||
|
evnames <- append(evnames, names(w))
|
||||||
|
}
|
||||||
|
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames)
|
||||||
|
return(msg)
|
||||||
|
}
|
||||||
266
wrapper/xgboost.py
Normal file
266
wrapper/xgboost.py
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
# Author: Tianqi Chen, Bing Xu
|
||||||
|
# module for xgboost
|
||||||
|
import ctypes
|
||||||
|
import os
|
||||||
|
# optinally have scipy sparse, though not necessary
|
||||||
|
import numpy
|
||||||
|
import sys
|
||||||
|
import numpy.ctypeslib
|
||||||
|
import scipy.sparse as scp
|
||||||
|
|
||||||
|
# set this line correctly
|
||||||
|
XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
|
||||||
|
|
||||||
|
# load in xgboost library
|
||||||
|
xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
|
||||||
|
|
||||||
|
xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
|
||||||
|
xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
|
||||||
|
xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
|
||||||
|
xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
|
||||||
|
xglib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
|
||||||
|
xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
|
||||||
|
|
||||||
|
xglib.XGBoosterCreate.restype = ctypes.c_void_p
|
||||||
|
xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
|
||||||
|
xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
|
||||||
|
xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
|
||||||
|
|
||||||
|
|
||||||
|
def ctypes2numpy(cptr, length):
|
||||||
|
# convert a ctypes pointer array to numpy
|
||||||
|
assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
|
||||||
|
res = numpy.zeros(length, dtype='float32')
|
||||||
|
assert ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0])
|
||||||
|
return res
|
||||||
|
|
||||||
|
# data matrix used in xgboost
|
||||||
|
class DMatrix:
|
||||||
|
# constructor
|
||||||
|
def __init__(self, data, label=None, missing=0.0, weight = None):
|
||||||
|
# force into void_p, mac need to pass things in as void_p
|
||||||
|
if data == None:
|
||||||
|
self.handle = None
|
||||||
|
return
|
||||||
|
if isinstance(data, str):
|
||||||
|
self.handle = ctypes.c_void_p(
|
||||||
|
xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1))
|
||||||
|
elif isinstance(data, scp.csr_matrix):
|
||||||
|
self.__init_from_csr(data)
|
||||||
|
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
|
||||||
|
self.__init_from_npy2d(data, missing)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
csr = scp.csr_matrix(data)
|
||||||
|
self.__init_from_csr(csr)
|
||||||
|
except:
|
||||||
|
raise Exception("can not intialize DMatrix from"+str(type(data)))
|
||||||
|
if label != None:
|
||||||
|
self.set_label(label)
|
||||||
|
if weight !=None:
|
||||||
|
self.set_weight(weight)
|
||||||
|
# convert data from csr matrix
|
||||||
|
def __init_from_csr(self, csr):
|
||||||
|
assert len(csr.indices) == len(csr.data)
|
||||||
|
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSR(
|
||||||
|
(ctypes.c_ulong * len(csr.indptr))(*csr.indptr),
|
||||||
|
(ctypes.c_uint * len(csr.indices))(*csr.indices),
|
||||||
|
(ctypes.c_float * len(csr.data))(*csr.data),
|
||||||
|
len(csr.indptr), len(csr.data)))
|
||||||
|
# convert data from numpy matrix
|
||||||
|
def __init_from_npy2d(self,mat,missing):
|
||||||
|
data = numpy.array(mat.reshape(mat.size), dtype='float32')
|
||||||
|
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromMat(
|
||||||
|
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
||||||
|
mat.shape[0], mat.shape[1], ctypes.c_float(missing)))
|
||||||
|
# destructor
|
||||||
|
def __del__(self):
|
||||||
|
xglib.XGDMatrixFree(self.handle)
|
||||||
|
def __get_float_info(self, field):
|
||||||
|
length = ctypes.c_ulong()
|
||||||
|
ret = xglib.XGDMatrixGetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
|
||||||
|
ctypes.byref(length))
|
||||||
|
return ctypes2numpy(ret, length.value)
|
||||||
|
def __set_float_info(self, field, data):
|
||||||
|
xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')),
|
||||||
|
(ctypes.c_float*len(data))(*data), len(data))
|
||||||
|
# load data from file
|
||||||
|
def save_binary(self, fname, silent=True):
|
||||||
|
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
|
||||||
|
# set label of dmatrix
|
||||||
|
def set_label(self, label):
|
||||||
|
self.__set_float_info('label', label)
|
||||||
|
# set weight of each instances
|
||||||
|
def set_weight(self, weight):
|
||||||
|
self.__set_float_info('weight', weight)
|
||||||
|
# set initialized margin prediction
|
||||||
|
def set_base_margin(self, margin):
|
||||||
|
"""
|
||||||
|
set base margin of booster to start from
|
||||||
|
this can be used to specify a prediction value of
|
||||||
|
existing model to be base_margin
|
||||||
|
However, remember margin is needed, instead of transformed prediction
|
||||||
|
e.g. for logistic regression: need to put in value before logistic transformation
|
||||||
|
see also example/demo.py
|
||||||
|
"""
|
||||||
|
self.__set_float_info('base_margin', margin)
|
||||||
|
# set group size of dmatrix, used for rank
|
||||||
|
def set_group(self, group):
|
||||||
|
xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
|
||||||
|
# get label from dmatrix
|
||||||
|
def get_label(self):
|
||||||
|
return self.__get_float_info('label')
|
||||||
|
# get weight from dmatrix
|
||||||
|
def get_weight(self):
|
||||||
|
return self.__get_float_info('weight')
|
||||||
|
# get base_margin from dmatrix
|
||||||
|
def get_base_margin(self):
|
||||||
|
return self.__get_float_info('base_margin')
|
||||||
|
def num_row(self):
|
||||||
|
return xglib.XGDMatrixNumRow(self.handle)
|
||||||
|
# slice the DMatrix to return a new DMatrix that only contains rindex
|
||||||
|
def slice(self, rindex):
|
||||||
|
res = DMatrix(None)
|
||||||
|
res.handle = ctypes.c_void_p(xglib.XGDMatrixSliceDMatrix(
|
||||||
|
self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex)))
|
||||||
|
return res
|
||||||
|
|
||||||
|
class Booster:
|
||||||
|
"""learner class """
|
||||||
|
def __init__(self, params={}, cache=[], model_name = None):
|
||||||
|
""" constructor, param: """
|
||||||
|
for d in cache:
|
||||||
|
assert isinstance(d, DMatrix)
|
||||||
|
dmats = (ctypes.c_void_p * len(cache))(*[ d.handle for d in cache])
|
||||||
|
self.handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, len(cache)))
|
||||||
|
self.set_param({'seed':0})
|
||||||
|
self.set_param(params)
|
||||||
|
if model_name != None:
|
||||||
|
self.load_model(model_name)
|
||||||
|
def __del__(self):
|
||||||
|
xglib.XGBoosterFree(self.handle)
|
||||||
|
def set_param(self, params, pv=None):
|
||||||
|
if isinstance(params, dict):
|
||||||
|
for k, v in params.items():
|
||||||
|
xglib.XGBoosterSetParam(
|
||||||
|
self.handle, ctypes.c_char_p(k.encode('utf-8')),
|
||||||
|
ctypes.c_char_p(str(v).encode('utf-8')))
|
||||||
|
elif isinstance(params,str) and pv != None:
|
||||||
|
xglib.XGBoosterSetParam(
|
||||||
|
self.handle, ctypes.c_char_p(params.encode('utf-8')),
|
||||||
|
ctypes.c_char_p(str(pv).encode('utf-8')))
|
||||||
|
else:
|
||||||
|
for k, v in params:
|
||||||
|
xglib.XGBoosterSetParam(
|
||||||
|
self.handle, ctypes.c_char_p(k.encode('utf-8')),
|
||||||
|
ctypes.c_char_p(str(v).encode('utf-8')))
|
||||||
|
def update(self, dtrain, it):
|
||||||
|
"""
|
||||||
|
update
|
||||||
|
dtrain: the training DMatrix
|
||||||
|
it: current iteration number
|
||||||
|
"""
|
||||||
|
assert isinstance(dtrain, DMatrix)
|
||||||
|
xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
|
||||||
|
def boost(self, dtrain, grad, hess):
|
||||||
|
""" update """
|
||||||
|
assert len(grad) == len(hess)
|
||||||
|
assert isinstance(dtrain, DMatrix)
|
||||||
|
xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
|
||||||
|
(ctypes.c_float*len(grad))(*grad),
|
||||||
|
(ctypes.c_float*len(hess))(*hess),
|
||||||
|
len(grad))
|
||||||
|
def eval_set(self, evals, it = 0):
|
||||||
|
for d in evals:
|
||||||
|
assert isinstance(d[0], DMatrix)
|
||||||
|
assert isinstance(d[1], str)
|
||||||
|
dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
|
||||||
|
evnames = (ctypes.c_char_p * len(evals))(
|
||||||
|
* [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
|
||||||
|
return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
|
||||||
|
def eval(self, mat, name = 'eval', it = 0):
|
||||||
|
return self.eval_set( [(mat,name)], it)
|
||||||
|
def predict(self, data, output_margin=False):
|
||||||
|
"""
|
||||||
|
predict with data
|
||||||
|
data: the dmatrix storing the input
|
||||||
|
output_margin: whether output raw margin value that is untransformed
|
||||||
|
"""
|
||||||
|
length = ctypes.c_ulong()
|
||||||
|
preds = xglib.XGBoosterPredict(self.handle, data.handle,
|
||||||
|
int(output_margin), ctypes.byref(length))
|
||||||
|
return ctypes2numpy(preds, length.value)
|
||||||
|
def save_model(self, fname):
|
||||||
|
""" save model to file """
|
||||||
|
xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
|
||||||
|
def load_model(self, fname):
|
||||||
|
"""load model from file"""
|
||||||
|
xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
|
||||||
|
def dump_model(self, fo, fmap=''):
|
||||||
|
"""dump model into text file"""
|
||||||
|
if isinstance(fo,str):
|
||||||
|
fo = open(fo,'w')
|
||||||
|
need_close = True
|
||||||
|
else:
|
||||||
|
need_close = False
|
||||||
|
ret = self.get_dump(fmap)
|
||||||
|
for i in range(len(ret)):
|
||||||
|
fo.write('booster[%d]:\n' %i)
|
||||||
|
fo.write( ret[i] )
|
||||||
|
if need_close:
|
||||||
|
fo.close()
|
||||||
|
def get_dump(self, fmap=''):
|
||||||
|
"""get dump of model as list of strings """
|
||||||
|
length = ctypes.c_ulong()
|
||||||
|
sarr = xglib.XGBoosterDumpModel(self.handle, ctypes.c_char_p(fmap.encode('utf-8')), ctypes.byref(length))
|
||||||
|
res = []
|
||||||
|
for i in range(length.value):
|
||||||
|
res.append( str(sarr[i]) )
|
||||||
|
return res
|
||||||
|
def get_fscore(self, fmap=''):
|
||||||
|
""" get feature importance of each feature """
|
||||||
|
trees = self.get_dump(fmap)
|
||||||
|
fmap = {}
|
||||||
|
for tree in trees:
|
||||||
|
print tree
|
||||||
|
for l in tree.split('\n'):
|
||||||
|
arr = l.split('[')
|
||||||
|
if len(arr) == 1:
|
||||||
|
continue
|
||||||
|
fid = arr[1].split(']')[0]
|
||||||
|
fid = fid.split('<')[0]
|
||||||
|
if fid not in fmap:
|
||||||
|
fmap[fid] = 1
|
||||||
|
else:
|
||||||
|
fmap[fid]+= 1
|
||||||
|
return fmap
|
||||||
|
|
||||||
|
def evaluate(bst, evals, it, feval = None):
|
||||||
|
"""evaluation on eval set"""
|
||||||
|
if feval != None:
|
||||||
|
res = '[%d]' % it
|
||||||
|
for dm, evname in evals:
|
||||||
|
name, val = feval(bst.predict(dm), dm)
|
||||||
|
res += '\t%s-%s:%f' % (evname, name, val)
|
||||||
|
else:
|
||||||
|
res = bst.eval_set(evals, it)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
|
||||||
|
""" train a booster with given paramaters """
|
||||||
|
bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
|
||||||
|
if obj == None:
|
||||||
|
for i in range(num_boost_round):
|
||||||
|
bst.update( dtrain, i )
|
||||||
|
if len(evals) != 0:
|
||||||
|
sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
|
||||||
|
else:
|
||||||
|
# try customized objective function
|
||||||
|
for i in range(num_boost_round):
|
||||||
|
pred = bst.predict( dtrain )
|
||||||
|
grad, hess = obj( pred, dtrain )
|
||||||
|
bst.boost( dtrain, grad, hess )
|
||||||
|
if len(evals) != 0:
|
||||||
|
sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
|
||||||
|
return bst
|
||||||
115
wrapper/xgboost_R.cpp
Normal file
115
wrapper/xgboost_R.cpp
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include "xgboost_wrapper.h"
|
||||||
|
#include "xgboost_R.h"
|
||||||
|
#include "../src/utils/utils.h"
|
||||||
|
#include "../src/utils/omp.h"
|
||||||
|
|
||||||
|
using namespace xgboost;
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
void _DMatrixFinalizer(SEXP ext) {
|
||||||
|
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||||
|
XGDMatrixFree(R_ExternalPtrAddr(ext));
|
||||||
|
R_ClearExternalPtr(ext);
|
||||||
|
}
|
||||||
|
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
|
||||||
|
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
|
||||||
|
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||||
|
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
|
||||||
|
UNPROTECT(1);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
|
||||||
|
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
|
||||||
|
CHAR(asChar(fname)), asInteger(silent));
|
||||||
|
}
|
||||||
|
|
||||||
|
// functions related to booster
|
||||||
|
void _BoosterFinalizer(SEXP ext) {
|
||||||
|
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||||
|
XGBoosterFree(R_ExternalPtrAddr(ext));
|
||||||
|
R_ClearExternalPtr(ext);
|
||||||
|
}
|
||||||
|
SEXP XGBoosterCreate_R(SEXP dmats) {
|
||||||
|
int len = length(dmats);
|
||||||
|
std::vector<void*> dvec;
|
||||||
|
for (int i = 0; i < len; ++i){
|
||||||
|
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
|
||||||
|
}
|
||||||
|
void *handle = XGBoosterCreate(&dvec[0], dvec.size());
|
||||||
|
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
|
||||||
|
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
|
||||||
|
UNPROTECT(1);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
|
||||||
|
XGBoosterSetParam(R_ExternalPtrAddr(handle),
|
||||||
|
CHAR(asChar(name)),
|
||||||
|
CHAR(asChar(val)));
|
||||||
|
}
|
||||||
|
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
|
||||||
|
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
|
||||||
|
asInteger(iter),
|
||||||
|
R_ExternalPtrAddr(dtrain));
|
||||||
|
}
|
||||||
|
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
|
||||||
|
utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
|
||||||
|
int len = length(grad);
|
||||||
|
std::vector<float> tgrad(len), thess(len);
|
||||||
|
#pragma omp parallel for schedule(static)
|
||||||
|
for (int j = 0; j < len; ++j) {
|
||||||
|
tgrad[j] = REAL(grad)[j];
|
||||||
|
thess[j] = REAL(hess)[j];
|
||||||
|
}
|
||||||
|
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
|
||||||
|
R_ExternalPtrAddr(dtrain),
|
||||||
|
&tgrad[0], &thess[0], len);
|
||||||
|
}
|
||||||
|
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
|
||||||
|
utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
|
||||||
|
int len = length(dmats);
|
||||||
|
std::vector<void*> vec_dmats;
|
||||||
|
std::vector<std::string> vec_names;
|
||||||
|
std::vector<const char*> vec_sptr;
|
||||||
|
for (int i = 0; i < len; ++i){
|
||||||
|
vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
|
||||||
|
vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
|
||||||
|
vec_sptr.push_back(vec_names.back().c_str());
|
||||||
|
}
|
||||||
|
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
|
||||||
|
asInteger(iter),
|
||||||
|
&vec_dmats[0], &vec_sptr[0], len));
|
||||||
|
}
|
||||||
|
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
|
||||||
|
size_t olen;
|
||||||
|
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
|
||||||
|
R_ExternalPtrAddr(dmat),
|
||||||
|
asInteger(output_margin),
|
||||||
|
&olen);
|
||||||
|
SEXP ret = PROTECT(allocVector(REALSXP, olen));
|
||||||
|
for (size_t i = 0; i < olen; ++i) {
|
||||||
|
REAL(ret)[i] = res[i];
|
||||||
|
}
|
||||||
|
UNPROTECT(1);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
|
||||||
|
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||||
|
}
|
||||||
|
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
|
||||||
|
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
|
||||||
|
}
|
||||||
|
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
|
||||||
|
size_t olen;
|
||||||
|
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
|
||||||
|
CHAR(asChar(fmap)),
|
||||||
|
&olen);
|
||||||
|
FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
|
||||||
|
for (size_t i = 0; i < olen; ++i) {
|
||||||
|
fprintf(fo, "booster[%lu]:\n", i);
|
||||||
|
fprintf(fo, "%s\n", res[i]);
|
||||||
|
}
|
||||||
|
fclose(fo);
|
||||||
|
}
|
||||||
|
}
|
||||||
91
wrapper/xgboost_R.h
Normal file
91
wrapper/xgboost_R.h
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
#ifndef XGBOOST_WRAPPER_R_H_
|
||||||
|
#define XGBOOST_WRAPPER_R_H_
|
||||||
|
/*!
|
||||||
|
* \file xgboost_wrapper_R.h
|
||||||
|
* \author Tianqi Chen
|
||||||
|
* \brief R wrapper of xgboost
|
||||||
|
*/
|
||||||
|
extern "C" {
|
||||||
|
#include <Rinternals.h>
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
/*!
|
||||||
|
* \brief load a data matrix
|
||||||
|
* \param fname name of the content
|
||||||
|
* \param silent whether print messages
|
||||||
|
* \return a loaded data matrix
|
||||||
|
*/
|
||||||
|
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
|
||||||
|
/*!
|
||||||
|
* \brief load a data matrix into binary file
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param fname file name
|
||||||
|
* \param silent print statistics when saving
|
||||||
|
*/
|
||||||
|
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
|
||||||
|
/*!
|
||||||
|
* \brief create xgboost learner
|
||||||
|
* \param dmats a list of dmatrix handles that will be cached
|
||||||
|
*/
|
||||||
|
SEXP XGBoosterCreate_R(SEXP dmats);
|
||||||
|
/*!
|
||||||
|
* \brief set parameters
|
||||||
|
* \param handle handle
|
||||||
|
* \param name parameter name
|
||||||
|
* \param val value of parameter
|
||||||
|
*/
|
||||||
|
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
|
||||||
|
/*!
|
||||||
|
* \brief update the model in one round using dtrain
|
||||||
|
* \param handle handle
|
||||||
|
* \param iter current iteration rounds
|
||||||
|
* \param dtrain training data
|
||||||
|
*/
|
||||||
|
void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain);
|
||||||
|
/*!
|
||||||
|
* \brief update the model, by directly specify gradient and second order gradient,
|
||||||
|
* this can be used to replace UpdateOneIter, to support customized loss function
|
||||||
|
* \param handle handle
|
||||||
|
* \param dtrain training data
|
||||||
|
* \param grad gradient statistics
|
||||||
|
* \param hess second order gradient statistics
|
||||||
|
*/
|
||||||
|
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess);
|
||||||
|
/*!
|
||||||
|
* \brief get evaluation statistics for xgboost
|
||||||
|
* \param handle handle
|
||||||
|
* \param iter current iteration rounds
|
||||||
|
* \param dmats list of handles to dmatrices
|
||||||
|
* \param evname name of evaluation
|
||||||
|
* \return the string containing evaluation stati
|
||||||
|
*/
|
||||||
|
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames);
|
||||||
|
/*!
|
||||||
|
* \brief make prediction based on dmat
|
||||||
|
* \param handle handle
|
||||||
|
* \param dmat data matrix
|
||||||
|
* \param output_margin whether only output raw margin value
|
||||||
|
*/
|
||||||
|
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
|
||||||
|
/*!
|
||||||
|
* \brief load model from existing file
|
||||||
|
* \param handle handle
|
||||||
|
* \param fname file name
|
||||||
|
*/
|
||||||
|
void XGBoosterLoadModel_R(SEXP handle, SEXP fname);
|
||||||
|
/*!
|
||||||
|
* \brief save model into existing file
|
||||||
|
* \param handle handle
|
||||||
|
* \param fname file name
|
||||||
|
*/
|
||||||
|
void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
|
||||||
|
/*!
|
||||||
|
* \brief dump model into text file
|
||||||
|
* \param handle handle
|
||||||
|
* \param fname file name of model that can be dumped into
|
||||||
|
* \param fmap name to fmap can be empty string
|
||||||
|
*/
|
||||||
|
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap);
|
||||||
|
};
|
||||||
|
#endif // XGBOOST_WRAPPER_R_H_
|
||||||
249
wrapper/xgboost_wrapper.cpp
Normal file
249
wrapper/xgboost_wrapper.cpp
Normal file
@ -0,0 +1,249 @@
|
|||||||
|
// implementations in ctypes
|
||||||
|
#include <cstdio>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "./xgboost_wrapper.h"
|
||||||
|
#include "../src/data.h"
|
||||||
|
#include "../src/learner/learner-inl.hpp"
|
||||||
|
#include "../src/io/io.h"
|
||||||
|
#include "../src/io/simple_dmatrix-inl.hpp"
|
||||||
|
|
||||||
|
using namespace xgboost;
|
||||||
|
using namespace xgboost::io;
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
namespace wrapper {
|
||||||
|
// booster wrapper class
|
||||||
|
class Booster: public learner::BoostLearner<FMatrixS> {
|
||||||
|
public:
|
||||||
|
explicit Booster(const std::vector<DataMatrix*>& mats) {
|
||||||
|
this->silent = 1;
|
||||||
|
this->init_model = false;
|
||||||
|
this->SetCacheData(mats);
|
||||||
|
}
|
||||||
|
const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) {
|
||||||
|
this->CheckInitModel();
|
||||||
|
this->Predict(dmat, output_margin, &this->preds_);
|
||||||
|
*len = this->preds_.size();
|
||||||
|
return &this->preds_[0];
|
||||||
|
}
|
||||||
|
inline void BoostOneIter(const DataMatrix &train,
|
||||||
|
float *grad, float *hess, size_t len) {
|
||||||
|
this->gpair_.resize(len);
|
||||||
|
const unsigned ndata = static_cast<unsigned>(len);
|
||||||
|
#pragma omp parallel for schedule(static)
|
||||||
|
for (unsigned j = 0; j < ndata; ++j) {
|
||||||
|
gpair_[j] = bst_gpair(grad[j], hess[j]);
|
||||||
|
}
|
||||||
|
gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
|
||||||
|
}
|
||||||
|
inline void CheckInitModel(void) {
|
||||||
|
if (!init_model) {
|
||||||
|
this->InitModel(); init_model = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inline void LoadModel(const char *fname) {
|
||||||
|
learner::BoostLearner<FMatrixS>::LoadModel(fname);
|
||||||
|
this->init_model = true;
|
||||||
|
}
|
||||||
|
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
|
||||||
|
model_dump = this->DumpModel(fmap, with_stats);
|
||||||
|
model_dump_cptr.resize(model_dump.size());
|
||||||
|
for (size_t i = 0; i < model_dump.size(); ++i) {
|
||||||
|
model_dump_cptr[i] = model_dump[i].c_str();
|
||||||
|
}
|
||||||
|
*len = model_dump.size();
|
||||||
|
return &model_dump_cptr[0];
|
||||||
|
}
|
||||||
|
// temporal fields
|
||||||
|
// temporal data to save evaluation dump
|
||||||
|
std::string eval_str;
|
||||||
|
// temporal space to save model dump
|
||||||
|
std::vector<std::string> model_dump;
|
||||||
|
std::vector<const char*> model_dump_cptr;
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool init_model;
|
||||||
|
};
|
||||||
|
} // namespace wrapper
|
||||||
|
} // namespace xgboost
|
||||||
|
|
||||||
|
using namespace xgboost::wrapper;
|
||||||
|
|
||||||
|
extern "C"{
|
||||||
|
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
||||||
|
return LoadDataMatrix(fname, silent, false);
|
||||||
|
}
|
||||||
|
void* XGDMatrixCreateFromCSR(const size_t *indptr,
|
||||||
|
const unsigned *indices,
|
||||||
|
const float *data,
|
||||||
|
size_t nindptr,
|
||||||
|
size_t nelem) {
|
||||||
|
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||||
|
DMatrixSimple &mat = *p_mat;
|
||||||
|
mat.row_ptr_.resize(nindptr);
|
||||||
|
memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
|
||||||
|
mat.row_data_.resize(nelem);
|
||||||
|
for (size_t i = 0; i < nelem; ++i) {
|
||||||
|
mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
|
||||||
|
mat.info.num_col = std::max(mat.info.num_col,
|
||||||
|
static_cast<size_t>(indices[i]+1));
|
||||||
|
}
|
||||||
|
mat.info.num_row = nindptr - 1;
|
||||||
|
return p_mat;
|
||||||
|
}
|
||||||
|
void* XGDMatrixCreateFromMat(const float *data,
|
||||||
|
size_t nrow,
|
||||||
|
size_t ncol,
|
||||||
|
float missing) {
|
||||||
|
DMatrixSimple *p_mat = new DMatrixSimple();
|
||||||
|
DMatrixSimple &mat = *p_mat;
|
||||||
|
mat.info.num_row = nrow;
|
||||||
|
mat.info.num_col = ncol;
|
||||||
|
for (size_t i = 0; i < nrow; ++i, data += ncol) {
|
||||||
|
size_t nelem = 0;
|
||||||
|
for (size_t j = 0; j < ncol; ++j) {
|
||||||
|
if (data[j] != missing) {
|
||||||
|
mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
|
||||||
|
++nelem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
|
||||||
|
}
|
||||||
|
return p_mat;
|
||||||
|
}
|
||||||
|
void* XGDMatrixSliceDMatrix(void *handle,
|
||||||
|
const int *idxset,
|
||||||
|
size_t len) {
|
||||||
|
DMatrixSimple tmp;
|
||||||
|
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
|
||||||
|
if (dsrc.magic != DMatrixSimple::kMagic) {
|
||||||
|
tmp.CopyFrom(dsrc);
|
||||||
|
}
|
||||||
|
DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
|
||||||
|
*static_cast<DMatrixSimple*>(handle): tmp);
|
||||||
|
DMatrixSimple *p_ret = new DMatrixSimple();
|
||||||
|
DMatrixSimple &ret = *p_ret;
|
||||||
|
|
||||||
|
utils::Check(src.info.group_ptr.size() == 0,
|
||||||
|
"slice does not support group structure");
|
||||||
|
ret.Clear();
|
||||||
|
ret.info.num_row = len;
|
||||||
|
ret.info.num_col = src.info.num_col;
|
||||||
|
|
||||||
|
utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
|
||||||
|
iter->BeforeFirst();
|
||||||
|
utils::Assert(iter->Next(), "slice");
|
||||||
|
const SparseBatch &batch = iter->Value();
|
||||||
|
for (size_t i = 0; i < len; ++i) {
|
||||||
|
const int ridx = idxset[i];
|
||||||
|
SparseBatch::Inst inst = batch[ridx];
|
||||||
|
utils::Check(static_cast<size_t>(ridx) < batch.size, "slice index exceed number of rows");
|
||||||
|
ret.row_data_.resize(ret.row_data_.size() + inst.length);
|
||||||
|
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
|
||||||
|
sizeof(SparseBatch::Entry) * inst.length);
|
||||||
|
ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
|
||||||
|
if (src.info.labels.size() != 0) {
|
||||||
|
ret.info.labels.push_back(src.info.labels[ridx]);
|
||||||
|
}
|
||||||
|
if (src.info.weights.size() != 0) {
|
||||||
|
ret.info.weights.push_back(src.info.weights[ridx]);
|
||||||
|
}
|
||||||
|
if (src.info.info.root_index.size() != 0) {
|
||||||
|
ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return p_ret;
|
||||||
|
}
|
||||||
|
void XGDMatrixFree(void *handle) {
|
||||||
|
delete static_cast<DataMatrix*>(handle);
|
||||||
|
}
|
||||||
|
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
|
||||||
|
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
|
||||||
|
}
|
||||||
|
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) {
|
||||||
|
std::vector<float> &vec =
|
||||||
|
static_cast<DataMatrix*>(handle)->info.GetInfo(field);
|
||||||
|
vec.resize(len);
|
||||||
|
memcpy(&vec[0], info, sizeof(float) * len);
|
||||||
|
}
|
||||||
|
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) {
|
||||||
|
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
|
||||||
|
pmat->info.group_ptr.resize(len + 1);
|
||||||
|
pmat->info.group_ptr[0] = 0;
|
||||||
|
for (size_t i = 0; i < len; ++i) {
|
||||||
|
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) {
|
||||||
|
const std::vector<float> &vec =
|
||||||
|
static_cast<const DataMatrix*>(handle)->info.GetInfo(field);
|
||||||
|
*len = vec.size();
|
||||||
|
return &vec[0];
|
||||||
|
}
|
||||||
|
size_t XGDMatrixNumRow(const void *handle) {
|
||||||
|
return static_cast<const DataMatrix*>(handle)->info.num_row;
|
||||||
|
}
|
||||||
|
|
||||||
|
// xgboost implementation
|
||||||
|
void *XGBoosterCreate(void *dmats[], size_t len) {
|
||||||
|
std::vector<DataMatrix*> mats;
|
||||||
|
for (size_t i = 0; i < len; ++i) {
|
||||||
|
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
|
||||||
|
mats.push_back(dtr);
|
||||||
|
}
|
||||||
|
return new Booster(mats);
|
||||||
|
}
|
||||||
|
void XGBoosterFree(void *handle) {
|
||||||
|
delete static_cast<Booster*>(handle);
|
||||||
|
}
|
||||||
|
void XGBoosterSetParam(void *handle, const char *name, const char *value) {
|
||||||
|
static_cast<Booster*>(handle)->SetParam(name, value);
|
||||||
|
}
|
||||||
|
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
|
||||||
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
|
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
||||||
|
bst->CheckInitModel();
|
||||||
|
bst->CheckInit(dtr);
|
||||||
|
bst->UpdateOneIter(iter, *dtr);
|
||||||
|
}
|
||||||
|
void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
||||||
|
float *grad, float *hess, size_t len) {
|
||||||
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
|
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
|
||||||
|
bst->CheckInitModel();
|
||||||
|
bst->CheckInit(dtr);
|
||||||
|
bst->BoostOneIter(*dtr, grad, hess, len);
|
||||||
|
}
|
||||||
|
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
||||||
|
const char *evnames[], size_t len) {
|
||||||
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
|
std::vector<std::string> names;
|
||||||
|
std::vector<const DataMatrix*> mats;
|
||||||
|
for (size_t i = 0; i < len; ++i) {
|
||||||
|
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
|
||||||
|
names.push_back(std::string(evnames[i]));
|
||||||
|
}
|
||||||
|
bst->CheckInitModel();
|
||||||
|
bst->eval_str = bst->EvalOneIter(iter, mats, names);
|
||||||
|
return bst->eval_str.c_str();
|
||||||
|
}
|
||||||
|
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) {
|
||||||
|
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
|
||||||
|
}
|
||||||
|
void XGBoosterLoadModel(void *handle, const char *fname) {
|
||||||
|
static_cast<Booster*>(handle)->LoadModel(fname);
|
||||||
|
}
|
||||||
|
void XGBoosterSaveModel(const void *handle, const char *fname) {
|
||||||
|
static_cast<const Booster*>(handle)->SaveModel(fname);
|
||||||
|
}
|
||||||
|
const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
|
||||||
|
utils::FeatMap featmap;
|
||||||
|
if (strlen(fmap) != 0) {
|
||||||
|
featmap.LoadText(fmap);
|
||||||
|
}
|
||||||
|
return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len);
|
||||||
|
}
|
||||||
|
};
|
||||||
171
wrapper/xgboost_wrapper.h
Normal file
171
wrapper/xgboost_wrapper.h
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
#ifndef XGBOOST_WRAPPER_H_
|
||||||
|
#define XGBOOST_WRAPPER_H_
|
||||||
|
/*!
|
||||||
|
* \file xgboost_wrapperh
|
||||||
|
* \author Tianqi Chen
|
||||||
|
* \brief a C style wrapper of xgboost
|
||||||
|
* can be used to create wrapper of other languages
|
||||||
|
*/
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
/*!
|
||||||
|
* \brief load a data matrix
|
||||||
|
* \return a loaded data matrix
|
||||||
|
*/
|
||||||
|
void* XGDMatrixCreateFromFile(const char *fname, int silent);
|
||||||
|
/*!
|
||||||
|
* \brief create a matrix content from csr format
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param indptr pointer to row headers
|
||||||
|
* \param indices findex
|
||||||
|
* \param data fvalue
|
||||||
|
* \param nindptr number of rows in the matix + 1
|
||||||
|
* \param nelem number of nonzero elements in the matrix
|
||||||
|
* \return created dmatrix
|
||||||
|
*/
|
||||||
|
void* XGDMatrixCreateFromCSR(const size_t *indptr,
|
||||||
|
const unsigned *indices,
|
||||||
|
const float *data,
|
||||||
|
size_t nindptr,
|
||||||
|
size_t nelem);
|
||||||
|
/*!
|
||||||
|
* \brief create matrix content from dense matrix
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param data pointer to the data space
|
||||||
|
* \param nrow number of rows
|
||||||
|
* \param ncol number columns
|
||||||
|
* \param missing which value to represent missing value
|
||||||
|
* \return created dmatrix
|
||||||
|
*/
|
||||||
|
void* XGDMatrixCreateFromMat(const float *data,
|
||||||
|
size_t nrow,
|
||||||
|
size_t ncol,
|
||||||
|
float missing);
|
||||||
|
/*!
|
||||||
|
* \brief create a new dmatrix from sliced content of existing matrix
|
||||||
|
* \param handle instance of data matrix to be sliced
|
||||||
|
* \param idxset index set
|
||||||
|
* \param len length of index set
|
||||||
|
* \return a sliced new matrix
|
||||||
|
*/
|
||||||
|
void* XGDMatrixSliceDMatrix(void *handle,
|
||||||
|
const int *idxset,
|
||||||
|
size_t len);
|
||||||
|
/*!
|
||||||
|
* \brief free space in data matrix
|
||||||
|
*/
|
||||||
|
void XGDMatrixFree(void *handle);
|
||||||
|
/*!
|
||||||
|
* \brief load a data matrix into binary file
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param fname file name
|
||||||
|
* \param silent print statistics when saving
|
||||||
|
*/
|
||||||
|
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
|
||||||
|
/*!
|
||||||
|
* \brief set float vector to a content in info
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param field field name, can be label, weight
|
||||||
|
* \param array pointer to float vector
|
||||||
|
* \param len length of array
|
||||||
|
*/
|
||||||
|
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len);
|
||||||
|
/*!
|
||||||
|
* \brief set label of the training matrix
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param group pointer to group size
|
||||||
|
* \param len length of array
|
||||||
|
*/
|
||||||
|
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
|
||||||
|
/*!
|
||||||
|
* \brief get float info vector from matrix
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param len used to set result length
|
||||||
|
* \param field field name
|
||||||
|
* \return pointer to the label
|
||||||
|
*/
|
||||||
|
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
|
||||||
|
/*!
|
||||||
|
* \brief return number of rows
|
||||||
|
*/
|
||||||
|
size_t XGDMatrixNumRow(const void *handle);
|
||||||
|
// --- start XGBoost class
|
||||||
|
/*!
|
||||||
|
* \brief create xgboost learner
|
||||||
|
* \param dmats matrices that are set to be cached
|
||||||
|
* \param len length of dmats
|
||||||
|
*/
|
||||||
|
void *XGBoosterCreate(void* dmats[], size_t len);
|
||||||
|
/*!
|
||||||
|
* \brief free obj in handle
|
||||||
|
* \param handle handle to be freed
|
||||||
|
*/
|
||||||
|
void XGBoosterFree(void* handle);
|
||||||
|
/*!
|
||||||
|
* \brief set parameters
|
||||||
|
* \param handle handle
|
||||||
|
* \param name parameter name
|
||||||
|
* \param val value of parameter
|
||||||
|
*/
|
||||||
|
void XGBoosterSetParam(void *handle, const char *name, const char *value);
|
||||||
|
/*!
|
||||||
|
* \brief update the model in one round using dtrain
|
||||||
|
* \param handle handle
|
||||||
|
* \param iter current iteration rounds
|
||||||
|
* \param dtrain training data
|
||||||
|
*/
|
||||||
|
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
|
||||||
|
/*!
|
||||||
|
* \brief update the model, by directly specify gradient and second order gradient,
|
||||||
|
* this can be used to replace UpdateOneIter, to support customized loss function
|
||||||
|
* \param handle handle
|
||||||
|
* \param dtrain training data
|
||||||
|
* \param grad gradient statistics
|
||||||
|
* \param hess second order gradient statistics
|
||||||
|
* \param len length of grad/hess array
|
||||||
|
*/
|
||||||
|
void XGBoosterBoostOneIter(void *handle, void *dtrain,
|
||||||
|
float *grad, float *hess, size_t len);
|
||||||
|
/*!
|
||||||
|
* \brief get evaluation statistics for xgboost
|
||||||
|
* \param handle handle
|
||||||
|
* \param iter current iteration rounds
|
||||||
|
* \param dmats pointers to data to be evaluated
|
||||||
|
* \param evnames pointers to names of each data
|
||||||
|
* \param len length of dmats
|
||||||
|
* \return the string containing evaluation stati
|
||||||
|
*/
|
||||||
|
const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
|
||||||
|
const char *evnames[], size_t len);
|
||||||
|
/*!
|
||||||
|
* \brief make prediction based on dmat
|
||||||
|
* \param handle handle
|
||||||
|
* \param dmat data matrix
|
||||||
|
* \param output_margin whether only output raw margin value
|
||||||
|
* \param len used to store length of returning result
|
||||||
|
*/
|
||||||
|
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len);
|
||||||
|
/*!
|
||||||
|
* \brief load model from existing file
|
||||||
|
* \param handle handle
|
||||||
|
* \param fname file name
|
||||||
|
*/
|
||||||
|
void XGBoosterLoadModel(void *handle, const char *fname);
|
||||||
|
/*!
|
||||||
|
* \brief save model into existing file
|
||||||
|
* \param handle handle
|
||||||
|
* \param fname file name
|
||||||
|
*/
|
||||||
|
void XGBoosterSaveModel(const void *handle, const char *fname);
|
||||||
|
/*!
|
||||||
|
* \brief dump model, return array of strings representing model dump
|
||||||
|
* \param handle handle
|
||||||
|
* \param fmap name to fmap can be empty string
|
||||||
|
* \param out_len length of output array
|
||||||
|
* \return char *data[], representing dump of each model
|
||||||
|
*/
|
||||||
|
const char **XGBoosterDumpModel(void *handle, const char *fmap,
|
||||||
|
size_t *out_len);
|
||||||
|
};
|
||||||
|
#endif // XGBOOST_WRAPPER_H_
|
||||||
Loading…
x
Reference in New Issue
Block a user