initial trial package
This commit is contained in:
1611
R-package/inst/examples/agaricus.txt.test
Normal file
1611
R-package/inst/examples/agaricus.txt.test
Normal file
File diff suppressed because it is too large
Load Diff
6513
R-package/inst/examples/agaricus.txt.train
Normal file
6513
R-package/inst/examples/agaricus.txt.train
Normal file
File diff suppressed because it is too large
Load Diff
127
R-package/inst/examples/demo.R
Normal file
127
R-package/inst/examples/demo.R
Normal file
@@ -0,0 +1,127 @@
|
||||
# load xgboost library
|
||||
require(xgboost)
|
||||
require(methods)
|
||||
|
||||
# helper function to read libsvm format
|
||||
# this is very badly written, load in dense, and convert to sparse
|
||||
# use this only for demo purpose
|
||||
# adopted from https://github.com/zygmuntz/r-libsvm-format-read-write/blob/master/f_read.libsvm.r
|
||||
read.libsvm <- function(fname, maxcol) {
|
||||
content <- readLines(fname)
|
||||
nline <- length(content)
|
||||
label <- numeric(nline)
|
||||
mat <- matrix(0, nline, maxcol+1)
|
||||
for (i in 1:nline) {
|
||||
arr <- as.vector(strsplit(content[i], " ")[[1]])
|
||||
label[i] <- as.numeric(arr[[1]])
|
||||
for (j in 2:length(arr)) {
|
||||
kv <- strsplit(arr[j], ":")[[1]]
|
||||
# to avoid 0 index
|
||||
findex <- as.integer(kv[1]) + 1
|
||||
fvalue <- as.numeric(kv[2])
|
||||
mat[i,findex] <- fvalue
|
||||
}
|
||||
}
|
||||
mat <- as(mat, "sparseMatrix")
|
||||
return(list(label=label, data=mat))
|
||||
}
|
||||
|
||||
# test code here
|
||||
dtrain <- xgb.DMatrix("agaricus.txt.train")
|
||||
dtest <- xgb.DMatrix("agaricus.txt.test")
|
||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
# training xgboost model
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
# make prediction
|
||||
preds <- xgb.predict(bst, dtest)
|
||||
labels <- xgb.getinfo(dtest, "label")
|
||||
err <- as.numeric(sum(as.integer(preds > 0.5) != labels)) / length(labels)
|
||||
# print error rate
|
||||
print(paste("error=",err))
|
||||
|
||||
# dump model
|
||||
xgb.dump(bst, "dump.raw.txt")
|
||||
# dump model with feature map
|
||||
xgb.dump(bst, "dump.nice.txt", "featmap.txt")
|
||||
|
||||
# save dmatrix into binary buffer
|
||||
succ <- xgb.save(dtest, "dtest.buffer")
|
||||
# save model into file
|
||||
succ <- xgb.save(bst, "xgb.model")
|
||||
# load model and data in
|
||||
bst2 <- xgb.Booster(modelfile="xgb.model")
|
||||
dtest2 <- xgb.DMatrix("dtest.buffer")
|
||||
preds2 <- xgb.predict(bst2, dtest2)
|
||||
# assert they are the same
|
||||
stopifnot(sum(abs(preds2-preds)) == 0)
|
||||
|
||||
###
|
||||
# build dmatrix from sparseMatrix
|
||||
###
|
||||
print ('start running example of build DMatrix from R.sparseMatrix')
|
||||
csc <- read.libsvm("agaricus.txt.train", 126)
|
||||
label <- csc$label
|
||||
data <- csc$data
|
||||
dtrain <- xgb.DMatrix(data, info=list(label=label) )
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
|
||||
###
|
||||
# build dmatrix from dense matrix
|
||||
###
|
||||
print ('start running example of build DMatrix from R.Matrix')
|
||||
mat = as.matrix(data)
|
||||
dtrain <- xgb.DMatrix(mat, info=list(label=label) )
|
||||
watchlist <- list("eval"=dtest,"train"=dtrain)
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist=watchlist)
|
||||
|
||||
###
|
||||
# advanced: cutomsized loss function
|
||||
#
|
||||
print("start running example to used cutomized objective function")
|
||||
# note: for customized objective function, we leave objective as default
|
||||
# note: what we are getting is margin value in prediction
|
||||
# you must know what you are doing
|
||||
param <- list("bst:max_depth" = 2, "bst:eta" = 1, "silent" =1)
|
||||
# user define objective function, given prediction, return gradient and second order gradient
|
||||
# this is loglikelihood loss
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- xgb.getinfo(dtrain, "label")
|
||||
preds <- 1.0 / (1.0 + exp(-preds))
|
||||
grad <- preds - labels
|
||||
hess <- preds * (1.0-preds)
|
||||
return(list(grad=grad, hess=hess))
|
||||
}
|
||||
# user defined evaluation function, return a list(metric="metric-name", value="metric-value")
|
||||
# NOTE: when you do customized loss function, the default prediction value is margin
|
||||
# this may make buildin evalution metric not function properly
|
||||
# for example, we are doing logistic loss, the prediction is score before logistic transformation
|
||||
# the buildin evaluation error assumes input is after logistic transformation
|
||||
# Take this in mind when you use the customization, and maybe you need write customized evaluation function
|
||||
evalerror <- function(preds, dtrain) {
|
||||
labels <- xgb.getinfo(dtrain, "label")
|
||||
err <- as.numeric(sum(labels != (preds > 0.0))) / length(labels)
|
||||
return(list(metric="error", value=err))
|
||||
}
|
||||
|
||||
# training with customized objective, we can also do step by step training
|
||||
# simply look at xgboost.py"s implementation of train
|
||||
bst <- xgb.train(param, dtrain, nround=2, watchlist, logregobj, evalerror)
|
||||
|
||||
###
|
||||
# advanced: start from a initial base prediction
|
||||
#
|
||||
print ("start running example to start from a initial prediction")
|
||||
# specify parameters via map, definition are same as c++ version
|
||||
param = list("bst:max_depth"=2, "bst:eta"=1, "silent"=1, "objective"="binary:logistic")
|
||||
# train xgboost for 1 round
|
||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||
# Note: we need the margin value instead of transformed prediction in set_base_margin
|
||||
# do predict with output_margin=True, will always give you margin values before logistic transformation
|
||||
ptrain <- xgb.predict(bst, dtrain, outputmargin=TRUE)
|
||||
ptest <- xgb.predict(bst, dtest, outputmargin=TRUE)
|
||||
succ <- xgb.setinfo(dtrain, "base_margin", ptrain)
|
||||
succ <- xgb.setinfo(dtest, "base_margin", ptest)
|
||||
print ("this is result of running from initial prediction")
|
||||
bst <- xgb.train( param, dtrain, 1, watchlist )
|
||||
126
R-package/inst/examples/featmap.txt
Normal file
126
R-package/inst/examples/featmap.txt
Normal file
@@ -0,0 +1,126 @@
|
||||
0 cap-shape=bell i
|
||||
1 cap-shape=conical i
|
||||
2 cap-shape=convex i
|
||||
3 cap-shape=flat i
|
||||
4 cap-shape=knobbed i
|
||||
5 cap-shape=sunken i
|
||||
6 cap-surface=fibrous i
|
||||
7 cap-surface=grooves i
|
||||
8 cap-surface=scaly i
|
||||
9 cap-surface=smooth i
|
||||
10 cap-color=brown i
|
||||
11 cap-color=buff i
|
||||
12 cap-color=cinnamon i
|
||||
13 cap-color=gray i
|
||||
14 cap-color=green i
|
||||
15 cap-color=pink i
|
||||
16 cap-color=purple i
|
||||
17 cap-color=red i
|
||||
18 cap-color=white i
|
||||
19 cap-color=yellow i
|
||||
20 bruises?=bruises i
|
||||
21 bruises?=no i
|
||||
22 odor=almond i
|
||||
23 odor=anise i
|
||||
24 odor=creosote i
|
||||
25 odor=fishy i
|
||||
26 odor=foul i
|
||||
27 odor=musty i
|
||||
28 odor=none i
|
||||
29 odor=pungent i
|
||||
30 odor=spicy i
|
||||
31 gill-attachment=attached i
|
||||
32 gill-attachment=descending i
|
||||
33 gill-attachment=free i
|
||||
34 gill-attachment=notched i
|
||||
35 gill-spacing=close i
|
||||
36 gill-spacing=crowded i
|
||||
37 gill-spacing=distant i
|
||||
38 gill-size=broad i
|
||||
39 gill-size=narrow i
|
||||
40 gill-color=black i
|
||||
41 gill-color=brown i
|
||||
42 gill-color=buff i
|
||||
43 gill-color=chocolate i
|
||||
44 gill-color=gray i
|
||||
45 gill-color=green i
|
||||
46 gill-color=orange i
|
||||
47 gill-color=pink i
|
||||
48 gill-color=purple i
|
||||
49 gill-color=red i
|
||||
50 gill-color=white i
|
||||
51 gill-color=yellow i
|
||||
52 stalk-shape=enlarging i
|
||||
53 stalk-shape=tapering i
|
||||
54 stalk-root=bulbous i
|
||||
55 stalk-root=club i
|
||||
56 stalk-root=cup i
|
||||
57 stalk-root=equal i
|
||||
58 stalk-root=rhizomorphs i
|
||||
59 stalk-root=rooted i
|
||||
60 stalk-root=missing i
|
||||
61 stalk-surface-above-ring=fibrous i
|
||||
62 stalk-surface-above-ring=scaly i
|
||||
63 stalk-surface-above-ring=silky i
|
||||
64 stalk-surface-above-ring=smooth i
|
||||
65 stalk-surface-below-ring=fibrous i
|
||||
66 stalk-surface-below-ring=scaly i
|
||||
67 stalk-surface-below-ring=silky i
|
||||
68 stalk-surface-below-ring=smooth i
|
||||
69 stalk-color-above-ring=brown i
|
||||
70 stalk-color-above-ring=buff i
|
||||
71 stalk-color-above-ring=cinnamon i
|
||||
72 stalk-color-above-ring=gray i
|
||||
73 stalk-color-above-ring=orange i
|
||||
74 stalk-color-above-ring=pink i
|
||||
75 stalk-color-above-ring=red i
|
||||
76 stalk-color-above-ring=white i
|
||||
77 stalk-color-above-ring=yellow i
|
||||
78 stalk-color-below-ring=brown i
|
||||
79 stalk-color-below-ring=buff i
|
||||
80 stalk-color-below-ring=cinnamon i
|
||||
81 stalk-color-below-ring=gray i
|
||||
82 stalk-color-below-ring=orange i
|
||||
83 stalk-color-below-ring=pink i
|
||||
84 stalk-color-below-ring=red i
|
||||
85 stalk-color-below-ring=white i
|
||||
86 stalk-color-below-ring=yellow i
|
||||
87 veil-type=partial i
|
||||
88 veil-type=universal i
|
||||
89 veil-color=brown i
|
||||
90 veil-color=orange i
|
||||
91 veil-color=white i
|
||||
92 veil-color=yellow i
|
||||
93 ring-number=none i
|
||||
94 ring-number=one i
|
||||
95 ring-number=two i
|
||||
96 ring-type=cobwebby i
|
||||
97 ring-type=evanescent i
|
||||
98 ring-type=flaring i
|
||||
99 ring-type=large i
|
||||
100 ring-type=none i
|
||||
101 ring-type=pendant i
|
||||
102 ring-type=sheathing i
|
||||
103 ring-type=zone i
|
||||
104 spore-print-color=black i
|
||||
105 spore-print-color=brown i
|
||||
106 spore-print-color=buff i
|
||||
107 spore-print-color=chocolate i
|
||||
108 spore-print-color=green i
|
||||
109 spore-print-color=orange i
|
||||
110 spore-print-color=purple i
|
||||
111 spore-print-color=white i
|
||||
112 spore-print-color=yellow i
|
||||
113 population=abundant i
|
||||
114 population=clustered i
|
||||
115 population=numerous i
|
||||
116 population=scattered i
|
||||
117 population=several i
|
||||
118 population=solitary i
|
||||
119 habitat=grasses i
|
||||
120 habitat=leaves i
|
||||
121 habitat=meadows i
|
||||
122 habitat=paths i
|
||||
123 habitat=urban i
|
||||
124 habitat=waste i
|
||||
125 habitat=woods i
|
||||
Reference in New Issue
Block a user