This commit is contained in:
tqchen 2014-08-23 14:20:29 -07:00
parent 3ba7995754
commit 08a6b92216
16 changed files with 17669 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

14
wrapper/R-example/demo.R Normal file
View File

@ -0,0 +1,14 @@
# include xgboost library, must set chdir=TRURE
source('../xgboost.R', chdir=TRUE)
# test code here
dtrain <- xgb.DMatrix("agaricus.txt.train")
dtest <- xgb.DMatrix("agaricus.txt.test")
param = list('bst:max_depth'=2, 'bst:eta'=1, 'silent'=1, 'objective'='binary:logistic')
watchlist <- list('train'=dtrain,'test'=dtest)
bst <- xgb.train(param, dtrain, watchlist=watchlist, nround=3)
succ <- xgb.save(bst, "iter.model")
print('finsih save model')
bst2 <- xgb.Booster(modelfile="iter.model")
pred = xgb.predict(bst2, dtest)

View File

@ -0,0 +1,126 @@
0 cap-shape=bell i
1 cap-shape=conical i
2 cap-shape=convex i
3 cap-shape=flat i
4 cap-shape=knobbed i
5 cap-shape=sunken i
6 cap-surface=fibrous i
7 cap-surface=grooves i
8 cap-surface=scaly i
9 cap-surface=smooth i
10 cap-color=brown i
11 cap-color=buff i
12 cap-color=cinnamon i
13 cap-color=gray i
14 cap-color=green i
15 cap-color=pink i
16 cap-color=purple i
17 cap-color=red i
18 cap-color=white i
19 cap-color=yellow i
20 bruises?=bruises i
21 bruises?=no i
22 odor=almond i
23 odor=anise i
24 odor=creosote i
25 odor=fishy i
26 odor=foul i
27 odor=musty i
28 odor=none i
29 odor=pungent i
30 odor=spicy i
31 gill-attachment=attached i
32 gill-attachment=descending i
33 gill-attachment=free i
34 gill-attachment=notched i
35 gill-spacing=close i
36 gill-spacing=crowded i
37 gill-spacing=distant i
38 gill-size=broad i
39 gill-size=narrow i
40 gill-color=black i
41 gill-color=brown i
42 gill-color=buff i
43 gill-color=chocolate i
44 gill-color=gray i
45 gill-color=green i
46 gill-color=orange i
47 gill-color=pink i
48 gill-color=purple i
49 gill-color=red i
50 gill-color=white i
51 gill-color=yellow i
52 stalk-shape=enlarging i
53 stalk-shape=tapering i
54 stalk-root=bulbous i
55 stalk-root=club i
56 stalk-root=cup i
57 stalk-root=equal i
58 stalk-root=rhizomorphs i
59 stalk-root=rooted i
60 stalk-root=missing i
61 stalk-surface-above-ring=fibrous i
62 stalk-surface-above-ring=scaly i
63 stalk-surface-above-ring=silky i
64 stalk-surface-above-ring=smooth i
65 stalk-surface-below-ring=fibrous i
66 stalk-surface-below-ring=scaly i
67 stalk-surface-below-ring=silky i
68 stalk-surface-below-ring=smooth i
69 stalk-color-above-ring=brown i
70 stalk-color-above-ring=buff i
71 stalk-color-above-ring=cinnamon i
72 stalk-color-above-ring=gray i
73 stalk-color-above-ring=orange i
74 stalk-color-above-ring=pink i
75 stalk-color-above-ring=red i
76 stalk-color-above-ring=white i
77 stalk-color-above-ring=yellow i
78 stalk-color-below-ring=brown i
79 stalk-color-below-ring=buff i
80 stalk-color-below-ring=cinnamon i
81 stalk-color-below-ring=gray i
82 stalk-color-below-ring=orange i
83 stalk-color-below-ring=pink i
84 stalk-color-below-ring=red i
85 stalk-color-below-ring=white i
86 stalk-color-below-ring=yellow i
87 veil-type=partial i
88 veil-type=universal i
89 veil-color=brown i
90 veil-color=orange i
91 veil-color=white i
92 veil-color=yellow i
93 ring-number=none i
94 ring-number=one i
95 ring-number=two i
96 ring-type=cobwebby i
97 ring-type=evanescent i
98 ring-type=flaring i
99 ring-type=large i
100 ring-type=none i
101 ring-type=pendant i
102 ring-type=sheathing i
103 ring-type=zone i
104 spore-print-color=black i
105 spore-print-color=brown i
106 spore-print-color=buff i
107 spore-print-color=chocolate i
108 spore-print-color=green i
109 spore-print-color=orange i
110 spore-print-color=purple i
111 spore-print-color=white i
112 spore-print-color=yellow i
113 population=abundant i
114 population=clustered i
115 population=numerous i
116 population=scattered i
117 population=several i
118 population=solitary i
119 habitat=grasses i
120 habitat=leaves i
121 habitat=meadows i
122 habitat=paths i
123 habitat=urban i
124 habitat=waste i
125 habitat=woods i

12
wrapper/README.md Normal file
View File

@ -0,0 +1,12 @@
Wrapper of XGBoost
=====
This folder provides wrapper of xgboost to other languages
Python
=====
To make the python module, type ```make``` in the root directory of project
R
=====
To make the R wrapper, type ```make R``` in the root directory of project

View File

@ -0,0 +1,3 @@
example to use python xgboost, the data is generated from demo/binary_classification, in libsvm format
for usage: see demo.py and comments in demo.py

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

112
wrapper/python-example/demo.py Executable file
View File

@ -0,0 +1,112 @@
#!/usr/bin/python
import sys
import numpy as np
import scipy.sparse
# append the path to xgboost, you may need to change the following line
# alternatively, you can add the path to PYTHONPATH environment variable
sys.path.append('../')
import xgboost as xgb
### simple example
# load file from text file, also binary buffer generated by xgboost
dtrain = xgb.DMatrix('agaricus.txt.train')
dtest = xgb.DMatrix('agaricus.txt.test')
# specify parameters via map, definition are same as c++ version
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
# specify validations set to watch performance
evallist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, evallist)
# this is prediction
preds = bst.predict(dtest)
labels = dtest.get_label()
print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
bst.save_model('0001.model')
# dump model
bst.dump_model('dump.raw.txt')
# dump model with feature map
bst.dump_model('dump.nice.txt','featmap.txt')
###
# build dmatrix from scipy.sparse
print ('start running example of build DMatrix from scipy.sparse')
labels = []
row = []; col = []; dat = []
i = 0
for l in open('agaricus.txt.train'):
arr = l.split()
labels.append( int(arr[0]))
for it in arr[1:]:
k,v = it.split(':')
row.append(i); col.append(int(k)); dat.append(float(v))
i += 1
csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
dtrain = xgb.DMatrix( csr )
dtrain.set_label(labels)
evallist = [(dtest,'eval'), (dtrain,'train')]
bst = xgb.train( param, dtrain, num_round, evallist )
print ('start running example of build DMatrix from numpy array')
# NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
npymat = csr.todense()
dtrain = xgb.DMatrix( npymat)
dtrain.set_label(labels)
evallist = [(dtest,'eval'), (dtrain,'train')]
bst = xgb.train( param, dtrain, num_round, evallist )
###
# advanced: cutomsized loss function, set loss_type to 0, so that predict get untransformed score
#
print ('start running example to used cutomized objective function')
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1 }
# user define objective function, given prediction, return gradient and second order gradient
# this is loglikelihood loss
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds))
grad = preds - labels
hess = preds * (1.0-preds)
return grad, hess
# user defined evaluation function, return a pair metric_name, result
# NOTE: when you do customized loss function, the default prediction value is margin
# this may make buildin evalution metric not function properly
# for example, we are doing logistic loss, the prediction is score before logistic transformation
# the buildin evaluation error assumes input is after logistic transformation
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
def evalerror(preds, dtrain):
labels = dtrain.get_label()
# return a pair metric_name, result
# since preds are margin(before logistic transformation, cutoff at 0)
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
bst = xgb.train(param, dtrain, num_round, evallist, logregobj, evalerror)
###
# advanced: start from a initial base prediction
#
print ('start running example to start from a initial prediction')
# specify parameters via map, definition are same as c++ version
param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
# train xgboost for 1 round
bst = xgb.train( param, dtrain, 1, evallist )
# Note: we need the margin value instead of transformed prediction in set_base_margin
# do predict with output_margin=True, will always give you margin values before logistic transformation
ptrain = bst.predict(dtrain, output_margin=True)
ptest = bst.predict(dtest, output_margin=True)
dtrain.set_base_margin(ptrain)
dtest.set_base_margin(ptest)
print ('this is result of running from initial prediction')
bst = xgb.train( param, dtrain, 1, evallist )

View File

@ -0,0 +1,126 @@
0 cap-shape=bell i
1 cap-shape=conical i
2 cap-shape=convex i
3 cap-shape=flat i
4 cap-shape=knobbed i
5 cap-shape=sunken i
6 cap-surface=fibrous i
7 cap-surface=grooves i
8 cap-surface=scaly i
9 cap-surface=smooth i
10 cap-color=brown i
11 cap-color=buff i
12 cap-color=cinnamon i
13 cap-color=gray i
14 cap-color=green i
15 cap-color=pink i
16 cap-color=purple i
17 cap-color=red i
18 cap-color=white i
19 cap-color=yellow i
20 bruises?=bruises i
21 bruises?=no i
22 odor=almond i
23 odor=anise i
24 odor=creosote i
25 odor=fishy i
26 odor=foul i
27 odor=musty i
28 odor=none i
29 odor=pungent i
30 odor=spicy i
31 gill-attachment=attached i
32 gill-attachment=descending i
33 gill-attachment=free i
34 gill-attachment=notched i
35 gill-spacing=close i
36 gill-spacing=crowded i
37 gill-spacing=distant i
38 gill-size=broad i
39 gill-size=narrow i
40 gill-color=black i
41 gill-color=brown i
42 gill-color=buff i
43 gill-color=chocolate i
44 gill-color=gray i
45 gill-color=green i
46 gill-color=orange i
47 gill-color=pink i
48 gill-color=purple i
49 gill-color=red i
50 gill-color=white i
51 gill-color=yellow i
52 stalk-shape=enlarging i
53 stalk-shape=tapering i
54 stalk-root=bulbous i
55 stalk-root=club i
56 stalk-root=cup i
57 stalk-root=equal i
58 stalk-root=rhizomorphs i
59 stalk-root=rooted i
60 stalk-root=missing i
61 stalk-surface-above-ring=fibrous i
62 stalk-surface-above-ring=scaly i
63 stalk-surface-above-ring=silky i
64 stalk-surface-above-ring=smooth i
65 stalk-surface-below-ring=fibrous i
66 stalk-surface-below-ring=scaly i
67 stalk-surface-below-ring=silky i
68 stalk-surface-below-ring=smooth i
69 stalk-color-above-ring=brown i
70 stalk-color-above-ring=buff i
71 stalk-color-above-ring=cinnamon i
72 stalk-color-above-ring=gray i
73 stalk-color-above-ring=orange i
74 stalk-color-above-ring=pink i
75 stalk-color-above-ring=red i
76 stalk-color-above-ring=white i
77 stalk-color-above-ring=yellow i
78 stalk-color-below-ring=brown i
79 stalk-color-below-ring=buff i
80 stalk-color-below-ring=cinnamon i
81 stalk-color-below-ring=gray i
82 stalk-color-below-ring=orange i
83 stalk-color-below-ring=pink i
84 stalk-color-below-ring=red i
85 stalk-color-below-ring=white i
86 stalk-color-below-ring=yellow i
87 veil-type=partial i
88 veil-type=universal i
89 veil-color=brown i
90 veil-color=orange i
91 veil-color=white i
92 veil-color=yellow i
93 ring-number=none i
94 ring-number=one i
95 ring-number=two i
96 ring-type=cobwebby i
97 ring-type=evanescent i
98 ring-type=flaring i
99 ring-type=large i
100 ring-type=none i
101 ring-type=pendant i
102 ring-type=sheathing i
103 ring-type=zone i
104 spore-print-color=black i
105 spore-print-color=brown i
106 spore-print-color=buff i
107 spore-print-color=chocolate i
108 spore-print-color=green i
109 spore-print-color=orange i
110 spore-print-color=purple i
111 spore-print-color=white i
112 spore-print-color=yellow i
113 population=abundant i
114 population=clustered i
115 population=numerous i
116 population=scattered i
117 population=several i
118 population=solitary i
119 habitat=grasses i
120 habitat=leaves i
121 habitat=meadows i
122 habitat=paths i
123 habitat=urban i
124 habitat=waste i
125 habitat=woods i

136
wrapper/xgboost.R Normal file
View File

@ -0,0 +1,136 @@
# load in library
dyn.load("./libxgboostR.so")
# constructing DMatrix
xgb.DMatrix <- function(data) {
if (typeof(data) == "character") {
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE))
}else {
stop("xgb.DMatrix cannot recognize data type")
}
return(structure(handle, class="xgb.DMatrix"))
}
# construct a Booster from cachelist
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
}
for (dm in cachelist) {
if (class(dm) != "xgb.DMatrix") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
}
}
handle <- .Call("XGBoosterCreate_R", cachelist)
for (i in 1:length(params)) {
p = params[i]
.Call("XGBoosterSetParam_R", handle, names(p), as.character(p))
}
if (!is.null(modelfile)) {
if (typeof(modelfile) != "character"){
stop("xgb.Booster: modelfile must be character");
}
.Call("XGBoosterLoadModel_R", handle, modelfile)
}
return(structure(handle, class="xgb.Booster"))
}
# train a model using given parameters
xgb.train <- function(params, dtrain, nrounds=10, watchlist=list(), obj=NULL) {
if (typeof(params) != "list") {
stop("xgb.train: first argument params must be list");
}
if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.train: second argument dtrain must be xgb.DMatrix");
}
bst <- xgb.Booster(params, append(watchlist,dtrain))
for (i in 1:nrounds) {
if (is.null(obj)) {
succ <- xgb.iter.update(bst, dtrain, i-1)
} else {
pred = xgb.predict(bst, dtrain)
gpair = obj(pred, dtrain)
succ <- xgb.iter.boost(bst, dtrain, gpair)
}
if (length(watchlist) != 0) {
msg <- xgb.iter.eval(bst, watchlist, i-1)
cat(msg); cat("\n")
}
}
return(bst)
}
# save model or DMatrix to file
xgb.save <- function(handle, fname) {
if (typeof(fname) != "character") {
stop("xgb.save: fname must be character");
}
if (class(handle) == "xgb.Booster") {
.Call("XGBoosterSaveModel_R", handle, fname);
return(TRUE)
}
if (class(handle) == "xgb.DMatrix") {
.Call("XGDMatrixSaveBinary_R", handle, fname, as.integer(FALSE))
return(TRUE)
}
stop("xgb.save: the input must be either xgb.DMatrix or xgb.Booster")
return(FALSE)
}
# predict
xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
if (class(booster) != "xgb.Booster") {
stop("xgb.iter.update: first argument must be type xgb.Booster")
}
if (class(dmat) != "xgb.DMatrix") {
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
}
ret = .Call("XGBoosterPredict_R", booster, dmat, as.integer(outputmargin))
return(ret)
}
##--------------------------------------
# the following are low level iteratively function, not needed
# if you do not want to use them
#---------------------------------------
# iteratively update booster with dtrain
xgb.iter.update <- function(booster, dtrain, iter) {
if (class(booster) != "xgb.Booster") {
stop("xgb.iter.update: first argument must be type xgb.Booster")
}
if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
}
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain)
return(TRUE)
}
# iteratively update booster with customized statistics
xgb.iter.boost <- function(booster, dtrain, gpair) {
if (class(booster) != "xgb.Booster") {
stop("xgb.iter.update: first argument must be type xgb.Booster")
}
if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
}
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess)
return(TRUE)
}
# iteratively evaluate one iteration
xgb.iter.eval <- function(booster, watchlist, iter) {
if (class(booster) != "xgb.Booster") {
stop("xgb.eval: first argument must be type xgb.Booster")
}
if (typeof(watchlist) != "list") {
stop("xgb.eval: only accepts list of DMatrix as watchlist")
}
for (w in watchlist) {
if (class(w) != "xgb.DMatrix") {
stop("xgb.eval: watch list can only contain xgb.DMatrix")
}
}
evnames <- list()
for (i in 1:length(watchlist)) {
w <- watchlist[i]
if (length(names(w)) == 0) {
stop("xgb.eval: name tag must be presented for every elements in watchlist")
}
evnames <- append(evnames, names(w))
}
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, evnames)
return(msg)
}

266
wrapper/xgboost.py Normal file
View File

@ -0,0 +1,266 @@
# Author: Tianqi Chen, Bing Xu
# module for xgboost
import ctypes
import os
# optinally have scipy sparse, though not necessary
import numpy
import sys
import numpy.ctypeslib
import scipy.sparse as scp
# set this line correctly
XGBOOST_PATH = os.path.dirname(__file__)+'/libxgboostwrapper.so'
# load in xgboost library
xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
xglib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p
xglib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p
xglib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p
xglib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p
xglib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float)
xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
xglib.XGBoosterCreate.restype = ctypes.c_void_p
xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
def ctypes2numpy(cptr, length):
# convert a ctypes pointer array to numpy
assert isinstance(cptr, ctypes.POINTER(ctypes.c_float))
res = numpy.zeros(length, dtype='float32')
assert ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0])
return res
# data matrix used in xgboost
class DMatrix:
# constructor
def __init__(self, data, label=None, missing=0.0, weight = None):
# force into void_p, mac need to pass things in as void_p
if data == None:
self.handle = None
return
if isinstance(data, str):
self.handle = ctypes.c_void_p(
xglib.XGDMatrixCreateFromFile(ctypes.c_char_p(data.encode('utf-8')), 1))
elif isinstance(data, scp.csr_matrix):
self.__init_from_csr(data)
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
self.__init_from_npy2d(data, missing)
else:
try:
csr = scp.csr_matrix(data)
self.__init_from_csr(csr)
except:
raise Exception("can not intialize DMatrix from"+str(type(data)))
if label != None:
self.set_label(label)
if weight !=None:
self.set_weight(weight)
# convert data from csr matrix
def __init_from_csr(self, csr):
assert len(csr.indices) == len(csr.data)
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSR(
(ctypes.c_ulong * len(csr.indptr))(*csr.indptr),
(ctypes.c_uint * len(csr.indices))(*csr.indices),
(ctypes.c_float * len(csr.data))(*csr.data),
len(csr.indptr), len(csr.data)))
# convert data from numpy matrix
def __init_from_npy2d(self,mat,missing):
data = numpy.array(mat.reshape(mat.size), dtype='float32')
self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromMat(
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
mat.shape[0], mat.shape[1], ctypes.c_float(missing)))
# destructor
def __del__(self):
xglib.XGDMatrixFree(self.handle)
def __get_float_info(self, field):
length = ctypes.c_ulong()
ret = xglib.XGDMatrixGetFloatInfo(self.handle, ctypes.c_char_p(field.encode('utf-8')),
ctypes.byref(length))
return ctypes2numpy(ret, length.value)
def __set_float_info(self, field, data):
xglib.XGDMatrixSetFloatInfo(self.handle,ctypes.c_char_p(field.encode('utf-8')),
(ctypes.c_float*len(data))(*data), len(data))
# load data from file
def save_binary(self, fname, silent=True):
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
# set label of dmatrix
def set_label(self, label):
self.__set_float_info('label', label)
# set weight of each instances
def set_weight(self, weight):
self.__set_float_info('weight', weight)
# set initialized margin prediction
def set_base_margin(self, margin):
"""
set base margin of booster to start from
this can be used to specify a prediction value of
existing model to be base_margin
However, remember margin is needed, instead of transformed prediction
e.g. for logistic regression: need to put in value before logistic transformation
see also example/demo.py
"""
self.__set_float_info('base_margin', margin)
# set group size of dmatrix, used for rank
def set_group(self, group):
xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group))
# get label from dmatrix
def get_label(self):
return self.__get_float_info('label')
# get weight from dmatrix
def get_weight(self):
return self.__get_float_info('weight')
# get base_margin from dmatrix
def get_base_margin(self):
return self.__get_float_info('base_margin')
def num_row(self):
return xglib.XGDMatrixNumRow(self.handle)
# slice the DMatrix to return a new DMatrix that only contains rindex
def slice(self, rindex):
res = DMatrix(None)
res.handle = ctypes.c_void_p(xglib.XGDMatrixSliceDMatrix(
self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex)))
return res
class Booster:
"""learner class """
def __init__(self, params={}, cache=[], model_name = None):
""" constructor, param: """
for d in cache:
assert isinstance(d, DMatrix)
dmats = (ctypes.c_void_p * len(cache))(*[ d.handle for d in cache])
self.handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, len(cache)))
self.set_param({'seed':0})
self.set_param(params)
if model_name != None:
self.load_model(model_name)
def __del__(self):
xglib.XGBoosterFree(self.handle)
def set_param(self, params, pv=None):
if isinstance(params, dict):
for k, v in params.items():
xglib.XGBoosterSetParam(
self.handle, ctypes.c_char_p(k.encode('utf-8')),
ctypes.c_char_p(str(v).encode('utf-8')))
elif isinstance(params,str) and pv != None:
xglib.XGBoosterSetParam(
self.handle, ctypes.c_char_p(params.encode('utf-8')),
ctypes.c_char_p(str(pv).encode('utf-8')))
else:
for k, v in params:
xglib.XGBoosterSetParam(
self.handle, ctypes.c_char_p(k.encode('utf-8')),
ctypes.c_char_p(str(v).encode('utf-8')))
def update(self, dtrain, it):
"""
update
dtrain: the training DMatrix
it: current iteration number
"""
assert isinstance(dtrain, DMatrix)
xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
def boost(self, dtrain, grad, hess):
""" update """
assert len(grad) == len(hess)
assert isinstance(dtrain, DMatrix)
xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
(ctypes.c_float*len(grad))(*grad),
(ctypes.c_float*len(hess))(*hess),
len(grad))
def eval_set(self, evals, it = 0):
for d in evals:
assert isinstance(d[0], DMatrix)
assert isinstance(d[1], str)
dmats = (ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
evnames = (ctypes.c_char_p * len(evals))(
* [ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
def eval(self, mat, name = 'eval', it = 0):
return self.eval_set( [(mat,name)], it)
def predict(self, data, output_margin=False):
"""
predict with data
data: the dmatrix storing the input
output_margin: whether output raw margin value that is untransformed
"""
length = ctypes.c_ulong()
preds = xglib.XGBoosterPredict(self.handle, data.handle,
int(output_margin), ctypes.byref(length))
return ctypes2numpy(preds, length.value)
def save_model(self, fname):
""" save model to file """
xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
def load_model(self, fname):
"""load model from file"""
xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
def dump_model(self, fo, fmap=''):
"""dump model into text file"""
if isinstance(fo,str):
fo = open(fo,'w')
need_close = True
else:
need_close = False
ret = self.get_dump(fmap)
for i in range(len(ret)):
fo.write('booster[%d]:\n' %i)
fo.write( ret[i] )
if need_close:
fo.close()
def get_dump(self, fmap=''):
"""get dump of model as list of strings """
length = ctypes.c_ulong()
sarr = xglib.XGBoosterDumpModel(self.handle, ctypes.c_char_p(fmap.encode('utf-8')), ctypes.byref(length))
res = []
for i in range(length.value):
res.append( str(sarr[i]) )
return res
def get_fscore(self, fmap=''):
""" get feature importance of each feature """
trees = self.get_dump(fmap)
fmap = {}
for tree in trees:
print tree
for l in tree.split('\n'):
arr = l.split('[')
if len(arr) == 1:
continue
fid = arr[1].split(']')[0]
fid = fid.split('<')[0]
if fid not in fmap:
fmap[fid] = 1
else:
fmap[fid]+= 1
return fmap
def evaluate(bst, evals, it, feval = None):
"""evaluation on eval set"""
if feval != None:
res = '[%d]' % it
for dm, evname in evals:
name, val = feval(bst.predict(dm), dm)
res += '\t%s-%s:%f' % (evname, name, val)
else:
res = bst.eval_set(evals, it)
return res
def train(params, dtrain, num_boost_round = 10, evals = [], obj=None, feval=None):
""" train a booster with given paramaters """
bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
if obj == None:
for i in range(num_boost_round):
bst.update( dtrain, i )
if len(evals) != 0:
sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
else:
# try customized objective function
for i in range(num_boost_round):
pred = bst.predict( dtrain )
grad, hess = obj( pred, dtrain )
bst.boost( dtrain, grad, hess )
if len(evals) != 0:
sys.stderr.write(evaluate(bst, evals, i, feval)+'\n')
return bst

115
wrapper/xgboost_R.cpp Normal file
View File

@ -0,0 +1,115 @@
#include <vector>
#include <string>
#include "xgboost_wrapper.h"
#include "xgboost_R.h"
#include "../src/utils/utils.h"
#include "../src/utils/omp.h"
using namespace xgboost;
extern "C" {
void _DMatrixFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return;
XGDMatrixFree(R_ExternalPtrAddr(ext));
R_ClearExternalPtr(ext);
}
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent));
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
UNPROTECT(1);
return ret;
}
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
CHAR(asChar(fname)), asInteger(silent));
}
// functions related to booster
void _BoosterFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return;
XGBoosterFree(R_ExternalPtrAddr(ext));
R_ClearExternalPtr(ext);
}
SEXP XGBoosterCreate_R(SEXP dmats) {
int len = length(dmats);
std::vector<void*> dvec;
for (int i = 0; i < len; ++i){
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
}
void *handle = XGBoosterCreate(&dvec[0], dvec.size());
SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
UNPROTECT(1);
return ret;
}
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)),
CHAR(asChar(val)));
}
void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
R_ExternalPtrAddr(dtrain));
}
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
utils::Check(length(grad) == length(hess), "gradient and hess must have same length");
int len = length(grad);
std::vector<float> tgrad(len), thess(len);
#pragma omp parallel for schedule(static)
for (int j = 0; j < len; ++j) {
tgrad[j] = REAL(grad)[j];
thess[j] = REAL(hess)[j];
}
XGBoosterBoostOneIter(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dtrain),
&tgrad[0], &thess[0], len);
}
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
utils::Check(length(dmats) == length(evnames), "dmats and evnams must have same length");
int len = length(dmats);
std::vector<void*> vec_dmats;
std::vector<std::string> vec_names;
std::vector<const char*> vec_sptr;
for (int i = 0; i < len; ++i){
vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
vec_names.push_back(std::string(CHAR(asChar(VECTOR_ELT(evnames, i)))));
vec_sptr.push_back(vec_names.back().c_str());
}
return mkString(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
&vec_dmats[0], &vec_sptr[0], len));
}
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
size_t olen;
const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat),
asInteger(output_margin),
&olen);
SEXP ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) {
REAL(ret)[i] = res[i];
}
UNPROTECT(1);
return ret;
}
void XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
}
void XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)));
}
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap) {
size_t olen;
const char **res = XGBoosterDumpModel(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)),
&olen);
FILE *fo = utils::FopenCheck(CHAR(asChar(fname)), "w");
for (size_t i = 0; i < olen; ++i) {
fprintf(fo, "booster[%lu]:\n", i);
fprintf(fo, "%s\n", res[i]);
}
fclose(fo);
}
}

91
wrapper/xgboost_R.h Normal file
View File

@ -0,0 +1,91 @@
#ifndef XGBOOST_WRAPPER_R_H_
#define XGBOOST_WRAPPER_R_H_
/*!
* \file xgboost_wrapper_R.h
* \author Tianqi Chen
* \brief R wrapper of xgboost
*/
extern "C" {
#include <Rinternals.h>
}
extern "C" {
/*!
* \brief load a data matrix
* \param fname name of the content
* \param silent whether print messages
* \return a loaded data matrix
*/
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent);
/*!
* \brief load a data matrix into binary file
* \param handle a instance of data matrix
* \param fname file name
* \param silent print statistics when saving
*/
void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent);
/*!
* \brief create xgboost learner
* \param dmats a list of dmatrix handles that will be cached
*/
SEXP XGBoosterCreate_R(SEXP dmats);
/*!
* \brief set parameters
* \param handle handle
* \param name parameter name
* \param val value of parameter
*/
void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val);
/*!
* \brief update the model in one round using dtrain
* \param handle handle
* \param iter current iteration rounds
* \param dtrain training data
*/
void XGBoosterUpdateOneIter_R(SEXP ext, SEXP iter, SEXP dtrain);
/*!
* \brief update the model, by directly specify gradient and second order gradient,
* this can be used to replace UpdateOneIter, to support customized loss function
* \param handle handle
* \param dtrain training data
* \param grad gradient statistics
* \param hess second order gradient statistics
*/
void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess);
/*!
* \brief get evaluation statistics for xgboost
* \param handle handle
* \param iter current iteration rounds
* \param dmats list of handles to dmatrices
* \param evname name of evaluation
* \return the string containing evaluation stati
*/
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames);
/*!
* \brief make prediction based on dmat
* \param handle handle
* \param dmat data matrix
* \param output_margin whether only output raw margin value
*/
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
/*!
* \brief load model from existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterLoadModel_R(SEXP handle, SEXP fname);
/*!
* \brief save model into existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterSaveModel_R(SEXP handle, SEXP fname);
/*!
* \brief dump model into text file
* \param handle handle
* \param fname file name of model that can be dumped into
* \param fmap name to fmap can be empty string
*/
void XGBoosterDumpModel_R(SEXP handle, SEXP fname, SEXP fmap);
};
#endif // XGBOOST_WRAPPER_R_H_

249
wrapper/xgboost_wrapper.cpp Normal file
View File

@ -0,0 +1,249 @@
// implementations in ctypes
#include <cstdio>
#include <vector>
#include <string>
#include <cstring>
#include <algorithm>
#include "./xgboost_wrapper.h"
#include "../src/data.h"
#include "../src/learner/learner-inl.hpp"
#include "../src/io/io.h"
#include "../src/io/simple_dmatrix-inl.hpp"
using namespace xgboost;
using namespace xgboost::io;
namespace xgboost {
namespace wrapper {
// booster wrapper class
class Booster: public learner::BoostLearner<FMatrixS> {
public:
explicit Booster(const std::vector<DataMatrix*>& mats) {
this->silent = 1;
this->init_model = false;
this->SetCacheData(mats);
}
const float *Pred(const DataMatrix &dmat, int output_margin, size_t *len) {
this->CheckInitModel();
this->Predict(dmat, output_margin, &this->preds_);
*len = this->preds_.size();
return &this->preds_[0];
}
inline void BoostOneIter(const DataMatrix &train,
float *grad, float *hess, size_t len) {
this->gpair_.resize(len);
const unsigned ndata = static_cast<unsigned>(len);
#pragma omp parallel for schedule(static)
for (unsigned j = 0; j < ndata; ++j) {
gpair_[j] = bst_gpair(grad[j], hess[j]);
}
gbm_->DoBoost(train.fmat, train.info.info, &gpair_);
}
inline void CheckInitModel(void) {
if (!init_model) {
this->InitModel(); init_model = true;
}
}
inline void LoadModel(const char *fname) {
learner::BoostLearner<FMatrixS>::LoadModel(fname);
this->init_model = true;
}
inline const char** GetModelDump(const utils::FeatMap& fmap, bool with_stats, size_t *len) {
model_dump = this->DumpModel(fmap, with_stats);
model_dump_cptr.resize(model_dump.size());
for (size_t i = 0; i < model_dump.size(); ++i) {
model_dump_cptr[i] = model_dump[i].c_str();
}
*len = model_dump.size();
return &model_dump_cptr[0];
}
// temporal fields
// temporal data to save evaluation dump
std::string eval_str;
// temporal space to save model dump
std::vector<std::string> model_dump;
std::vector<const char*> model_dump_cptr;
private:
bool init_model;
};
} // namespace wrapper
} // namespace xgboost
using namespace xgboost::wrapper;
extern "C"{
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
return LoadDataMatrix(fname, silent, false);
}
void* XGDMatrixCreateFromCSR(const size_t *indptr,
const unsigned *indices,
const float *data,
size_t nindptr,
size_t nelem) {
DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat;
mat.row_ptr_.resize(nindptr);
memcpy(&mat.row_ptr_[0], indptr, sizeof(size_t)*nindptr);
mat.row_data_.resize(nelem);
for (size_t i = 0; i < nelem; ++i) {
mat.row_data_[i] = SparseBatch::Entry(indices[i], data[i]);
mat.info.num_col = std::max(mat.info.num_col,
static_cast<size_t>(indices[i]+1));
}
mat.info.num_row = nindptr - 1;
return p_mat;
}
void* XGDMatrixCreateFromMat(const float *data,
size_t nrow,
size_t ncol,
float missing) {
DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat;
mat.info.num_row = nrow;
mat.info.num_col = ncol;
for (size_t i = 0; i < nrow; ++i, data += ncol) {
size_t nelem = 0;
for (size_t j = 0; j < ncol; ++j) {
if (data[j] != missing) {
mat.row_data_.push_back(SparseBatch::Entry(j, data[j]));
++nelem;
}
}
mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
}
return p_mat;
}
void* XGDMatrixSliceDMatrix(void *handle,
const int *idxset,
size_t len) {
DMatrixSimple tmp;
DataMatrix &dsrc = *static_cast<DataMatrix*>(handle);
if (dsrc.magic != DMatrixSimple::kMagic) {
tmp.CopyFrom(dsrc);
}
DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ?
*static_cast<DMatrixSimple*>(handle): tmp);
DMatrixSimple *p_ret = new DMatrixSimple();
DMatrixSimple &ret = *p_ret;
utils::Check(src.info.group_ptr.size() == 0,
"slice does not support group structure");
ret.Clear();
ret.info.num_row = len;
ret.info.num_col = src.info.num_col;
utils::IIterator<SparseBatch> *iter = src.fmat.RowIterator();
iter->BeforeFirst();
utils::Assert(iter->Next(), "slice");
const SparseBatch &batch = iter->Value();
for (size_t i = 0; i < len; ++i) {
const int ridx = idxset[i];
SparseBatch::Inst inst = batch[ridx];
utils::Check(static_cast<size_t>(ridx) < batch.size, "slice index exceed number of rows");
ret.row_data_.resize(ret.row_data_.size() + inst.length);
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
sizeof(SparseBatch::Entry) * inst.length);
ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
if (src.info.labels.size() != 0) {
ret.info.labels.push_back(src.info.labels[ridx]);
}
if (src.info.weights.size() != 0) {
ret.info.weights.push_back(src.info.weights[ridx]);
}
if (src.info.info.root_index.size() != 0) {
ret.info.info.root_index.push_back(src.info.info.root_index[ridx]);
}
}
return p_ret;
}
void XGDMatrixFree(void *handle) {
delete static_cast<DataMatrix*>(handle);
}
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) {
SaveDataMatrix(*static_cast<DataMatrix*>(handle), fname, silent);
}
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, size_t len) {
std::vector<float> &vec =
static_cast<DataMatrix*>(handle)->info.GetInfo(field);
vec.resize(len);
memcpy(&vec[0], info, sizeof(float) * len);
}
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len) {
DataMatrix *pmat = static_cast<DataMatrix*>(handle);
pmat->info.group_ptr.resize(len + 1);
pmat->info.group_ptr[0] = 0;
for (size_t i = 0; i < len; ++i) {
pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i]+group[i];
}
}
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* len) {
const std::vector<float> &vec =
static_cast<const DataMatrix*>(handle)->info.GetInfo(field);
*len = vec.size();
return &vec[0];
}
size_t XGDMatrixNumRow(const void *handle) {
return static_cast<const DataMatrix*>(handle)->info.num_row;
}
// xgboost implementation
void *XGBoosterCreate(void *dmats[], size_t len) {
std::vector<DataMatrix*> mats;
for (size_t i = 0; i < len; ++i) {
DataMatrix *dtr = static_cast<DataMatrix*>(dmats[i]);
mats.push_back(dtr);
}
return new Booster(mats);
}
void XGBoosterFree(void *handle) {
delete static_cast<Booster*>(handle);
}
void XGBoosterSetParam(void *handle, const char *name, const char *value) {
static_cast<Booster*>(handle)->SetParam(name, value);
}
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) {
Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInitModel();
bst->CheckInit(dtr);
bst->UpdateOneIter(iter, *dtr);
}
void XGBoosterBoostOneIter(void *handle, void *dtrain,
float *grad, float *hess, size_t len) {
Booster *bst = static_cast<Booster*>(handle);
DataMatrix *dtr = static_cast<DataMatrix*>(dtrain);
bst->CheckInitModel();
bst->CheckInit(dtr);
bst->BoostOneIter(*dtr, grad, hess, len);
}
const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
const char *evnames[], size_t len) {
Booster *bst = static_cast<Booster*>(handle);
std::vector<std::string> names;
std::vector<const DataMatrix*> mats;
for (size_t i = 0; i < len; ++i) {
mats.push_back(static_cast<DataMatrix*>(dmats[i]));
names.push_back(std::string(evnames[i]));
}
bst->CheckInitModel();
bst->eval_str = bst->EvalOneIter(iter, mats, names);
return bst->eval_str.c_str();
}
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len) {
return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
}
void XGBoosterLoadModel(void *handle, const char *fname) {
static_cast<Booster*>(handle)->LoadModel(fname);
}
void XGBoosterSaveModel(const void *handle, const char *fname) {
static_cast<const Booster*>(handle)->SaveModel(fname);
}
const char** XGBoosterDumpModel(void *handle, const char *fmap, size_t *len){
utils::FeatMap featmap;
if (strlen(fmap) != 0) {
featmap.LoadText(fmap);
}
return static_cast<Booster*>(handle)->GetModelDump(featmap, false, len);
}
};

171
wrapper/xgboost_wrapper.h Normal file
View File

@ -0,0 +1,171 @@
#ifndef XGBOOST_WRAPPER_H_
#define XGBOOST_WRAPPER_H_
/*!
* \file xgboost_wrapperh
* \author Tianqi Chen
* \brief a C style wrapper of xgboost
* can be used to create wrapper of other languages
*/
#include <cstdio>
extern "C" {
/*!
* \brief load a data matrix
* \return a loaded data matrix
*/
void* XGDMatrixCreateFromFile(const char *fname, int silent);
/*!
* \brief create a matrix content from csr format
* \param handle a instance of data matrix
* \param indptr pointer to row headers
* \param indices findex
* \param data fvalue
* \param nindptr number of rows in the matix + 1
* \param nelem number of nonzero elements in the matrix
* \return created dmatrix
*/
void* XGDMatrixCreateFromCSR(const size_t *indptr,
const unsigned *indices,
const float *data,
size_t nindptr,
size_t nelem);
/*!
* \brief create matrix content from dense matrix
* \param handle a instance of data matrix
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param missing which value to represent missing value
* \return created dmatrix
*/
void* XGDMatrixCreateFromMat(const float *data,
size_t nrow,
size_t ncol,
float missing);
/*!
* \brief create a new dmatrix from sliced content of existing matrix
* \param handle instance of data matrix to be sliced
* \param idxset index set
* \param len length of index set
* \return a sliced new matrix
*/
void* XGDMatrixSliceDMatrix(void *handle,
const int *idxset,
size_t len);
/*!
* \brief free space in data matrix
*/
void XGDMatrixFree(void *handle);
/*!
* \brief load a data matrix into binary file
* \param handle a instance of data matrix
* \param fname file name
* \param silent print statistics when saving
*/
void XGDMatrixSaveBinary(void *handle, const char *fname, int silent);
/*!
* \brief set float vector to a content in info
* \param handle a instance of data matrix
* \param field field name, can be label, weight
* \param array pointer to float vector
* \param len length of array
*/
void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, size_t len);
/*!
* \brief set label of the training matrix
* \param handle a instance of data matrix
* \param group pointer to group size
* \param len length of array
*/
void XGDMatrixSetGroup(void *handle, const unsigned *group, size_t len);
/*!
* \brief get float info vector from matrix
* \param handle a instance of data matrix
* \param len used to set result length
* \param field field name
* \return pointer to the label
*/
const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, size_t* out_len);
/*!
* \brief return number of rows
*/
size_t XGDMatrixNumRow(const void *handle);
// --- start XGBoost class
/*!
* \brief create xgboost learner
* \param dmats matrices that are set to be cached
* \param len length of dmats
*/
void *XGBoosterCreate(void* dmats[], size_t len);
/*!
* \brief free obj in handle
* \param handle handle to be freed
*/
void XGBoosterFree(void* handle);
/*!
* \brief set parameters
* \param handle handle
* \param name parameter name
* \param val value of parameter
*/
void XGBoosterSetParam(void *handle, const char *name, const char *value);
/*!
* \brief update the model in one round using dtrain
* \param handle handle
* \param iter current iteration rounds
* \param dtrain training data
*/
void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain);
/*!
* \brief update the model, by directly specify gradient and second order gradient,
* this can be used to replace UpdateOneIter, to support customized loss function
* \param handle handle
* \param dtrain training data
* \param grad gradient statistics
* \param hess second order gradient statistics
* \param len length of grad/hess array
*/
void XGBoosterBoostOneIter(void *handle, void *dtrain,
float *grad, float *hess, size_t len);
/*!
* \brief get evaluation statistics for xgboost
* \param handle handle
* \param iter current iteration rounds
* \param dmats pointers to data to be evaluated
* \param evnames pointers to names of each data
* \param len length of dmats
* \return the string containing evaluation stati
*/
const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[],
const char *evnames[], size_t len);
/*!
* \brief make prediction based on dmat
* \param handle handle
* \param dmat data matrix
* \param output_margin whether only output raw margin value
* \param len used to store length of returning result
*/
const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, size_t *len);
/*!
* \brief load model from existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterLoadModel(void *handle, const char *fname);
/*!
* \brief save model into existing file
* \param handle handle
* \param fname file name
*/
void XGBoosterSaveModel(const void *handle, const char *fname);
/*!
* \brief dump model, return array of strings representing model dump
* \param handle handle
* \param fmap name to fmap can be empty string
* \param out_len length of output array
* \return char *data[], representing dump of each model
*/
const char **XGBoosterDumpModel(void *handle, const char *fmap,
size_t *out_len);
};
#endif // XGBOOST_WRAPPER_H_