re-structure Java API, add Scala API and consolidate the names of Java/Scala API

This commit is contained in:
CodingCat
2015-12-22 03:29:20 -06:00
parent fc4c88fceb
commit 3b246c2420
56 changed files with 2902 additions and 2384 deletions

View File

@@ -0,0 +1,152 @@
package org.dmlc.xgboost4j;
import java.io.IOException;
import java.util.Map;
public interface Booster {
/**
* set parameter
*
* @param key param name
* @param value param value
*/
void setParam(String key, String value) throws XGBoostError;
/**
* set parameters
*
* @param params parameters key-value map
*/
void setParams(Map<String, Object> params) throws XGBoostError;
/**
* Update (one iteration)
*
* @param dtrain training data
* @param iter current iteration number
*/
void update(DMatrix dtrain, int iter) throws XGBoostError;
/**
* update with customize obj func
*
* @param dtrain training data
* @param obj customized objective class
*/
void update(DMatrix dtrain, IObjective obj) throws XGBoostError;
/**
* update with give grad and hess
*
* @param dtrain training data
* @param grad first order of gradient
* @param hess seconde order of gradient
*/
void boost(DMatrix dtrain, float[] grad, float[] hess) throws XGBoostError;
/**
* evaluate with given dmatrixs.
*
* @param evalMatrixs dmatrixs for evaluation
* @param evalNames name for eval dmatrixs, used for check results
* @param iter current eval iteration
* @return eval information
*/
String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throws XGBoostError;
/**
* evaluate with given customized Evaluation class
*
* @param evalMatrixs evaluation matrix
* @param evalNames evaluation names
* @param eval custom evaluator
* @return eval information
*/
String evalSet(DMatrix[] evalMatrixs, String[] evalNames, IEvaluation eval) throws XGBoostError;
/**
* Predict with data
*
* @param data dmatrix storing the input
* @return predict result
*/
float[][] predict(DMatrix data) throws XGBoostError;
/**
* Predict with data
*
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @return predict result
*/
float[][] predict(DMatrix data, boolean outPutMargin) throws XGBoostError;
/**
* Predict with data
*
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @return predict result
*/
float[][] predict(DMatrix data, boolean outPutMargin, int treeLimit) throws XGBoostError;
/**
* Predict with data
* @param data dmatrix storing the input
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @param predLeaf When this option is on, the output will be a matrix of (nsample, ntrees), nsample = data.numRow
with each record indicating the predicted leaf index of each sample in each tree.
Note that the leaf index of a tree is unique per tree, so you may find leaf 1
in both tree 1 and tree 0.
* @return predict result
* @throws XGBoostError native error
*/
float[][] predict(DMatrix data, int treeLimit, boolean predLeaf) throws XGBoostError;
/**
* save model to modelPath
*
* @param modelPath model path
*/
void saveModel(String modelPath) throws XGBoostError;
/**
* Dump model into a text file.
*
* @param modelPath file to save dumped model info
* @param withStats bool Controls whether the split statistics are output.
*/
void dumpModel(String modelPath, boolean withStats) throws IOException, XGBoostError;
/**
* Dump model into a text file.
*
* @param modelPath file to save dumped model info
* @param featureMap featureMap file
* @param withStats bool
* Controls whether the split statistics are output.
*/
void dumpModel(String modelPath, String featureMap, boolean withStats) throws IOException, XGBoostError;
/**
* get importance of each feature
*
* @return featureMap key: feature index, value: feature importance score
*/
Map<String, Integer> getFeatureScore() throws XGBoostError ;
/**
* get importance of each feature
*
* @param featureMap file to save dumped model info
* @return featureMap key: feature index, value: feature importance score
*/
Map<String, Integer> getFeatureScore(String featureMap) throws XGBoostError;
void dispose();
}

View File

@@ -0,0 +1,265 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.IOException;
/**
* DMatrix for xgboost, similar to the python wrapper xgboost.py
*
* @author hzx
*/
public class DMatrix {
private static final Log logger = LogFactory.getLog(DMatrix.class);
private long handle = 0;
//load native library
static {
try {
NativeLibLoader.InitXgboost();
} catch (IOException ex) {
logger.error("load native library failed.");
logger.error(ex);
}
}
/**
* sparse matrix type (CSR or CSC)
*/
public static enum SparseType {
CSR,
CSC;
}
public DMatrix(String dataPath) throws XGBoostError {
if (dataPath == null) {
throw new NullPointerException("dataPath: null");
}
long[] out = new long[1];
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromFile(dataPath, 1, out));
handle = out[0];
}
public DMatrix(long[] headers, int[] indices, float[] data, SparseType st) throws XGBoostError {
long[] out = new long[1];
if (st == SparseType.CSR) {
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromCSR(headers, indices, data, out));
} else if (st == SparseType.CSC) {
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromCSC(headers, indices, data, out));
} else {
throw new UnknownError("unknow sparsetype");
}
handle = out[0];
}
/**
* create DMatrix from dense matrix
*
* @param data data values
* @param nrow number of rows
* @param ncol number of columns
* @throws XGBoostError native error
*/
public DMatrix(float[] data, int nrow, int ncol) throws XGBoostError {
long[] out = new long[1];
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromMat(data, nrow, ncol, 0.0f, out));
handle = out[0];
}
/**
* used for DMatrix slice
*
* @param handle
*/
protected DMatrix(long handle) {
this.handle = handle;
}
/**
* set label of dmatrix
*
* @param labels labels
* @throws XGBoostError native error
*/
public void setLabel(float[] labels) throws XGBoostError {
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "label", labels));
}
/**
* set weight of each instance
*
* @param weights weights
* @throws XGBoostError native error
*/
public void setWeight(float[] weights) throws XGBoostError {
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "weight", weights));
}
/**
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from
*
* @param baseMargin base margin
* @throws XGBoostError native error
*/
public void setBaseMargin(float[] baseMargin) throws XGBoostError {
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "base_margin", baseMargin));
}
/**
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from
*
* @param baseMargin base margin
* @throws XGBoostError native error
*/
public void setBaseMargin(float[][] baseMargin) throws XGBoostError {
float[] flattenMargin = flatten(baseMargin);
setBaseMargin(flattenMargin);
}
/**
* Set group sizes of DMatrix (used for ranking)
*
* @param group group size as array
* @throws XGBoostError native error
*/
public void setGroup(int[] group) throws XGBoostError {
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixSetGroup(handle, group));
}
private float[] getFloatInfo(String field) throws XGBoostError {
float[][] infos = new float[1][];
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixGetFloatInfo(handle, field, infos));
return infos[0];
}
private int[] getIntInfo(String field) throws XGBoostError {
int[][] infos = new int[1][];
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixGetUIntInfo(handle, field, infos));
return infos[0];
}
/**
* get label values
*
* @return label
* @throws XGBoostError native error
*/
public float[] getLabel() throws XGBoostError {
return getFloatInfo("label");
}
/**
* get weight of the DMatrix
*
* @return weights
* @throws XGBoostError native error
*/
public float[] getWeight() throws XGBoostError {
return getFloatInfo("weight");
}
/**
* get base margin of the DMatrix
*
* @return base margin
* @throws XGBoostError native error
*/
public float[] getBaseMargin() throws XGBoostError {
return getFloatInfo("base_margin");
}
/**
* Slice the DMatrix and return a new DMatrix that only contains `rowIndex`.
*
* @param rowIndex row index
* @return sliced new DMatrix
* @throws XGBoostError native error
*/
public DMatrix slice(int[] rowIndex) throws XGBoostError {
long[] out = new long[1];
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixSliceDMatrix(handle, rowIndex, out));
long sHandle = out[0];
DMatrix sMatrix = new DMatrix(sHandle);
return sMatrix;
}
/**
* get the row number of DMatrix
*
* @return number of rows
* @throws XGBoostError native error
*/
public long rowNum() throws XGBoostError {
long[] rowNum = new long[1];
JNIErrorHandle.checkCall(XgboostJNI.XGDMatrixNumRow(handle, rowNum));
return rowNum[0];
}
/**
* save DMatrix to filePath
*
* @param filePath file path
*/
public void saveBinary(String filePath) {
XgboostJNI.XGDMatrixSaveBinary(handle, filePath, 1);
}
/**
* Get the handle
*
* @return native handler id
*/
public long getHandle() {
return handle;
}
/**
* flatten a mat to array
*
* @param mat
* @return
*/
private static float[] flatten(float[][] mat) {
int size = 0;
for (float[] array : mat) size += array.length;
float[] result = new float[size];
int pos = 0;
for (float[] ar : mat) {
System.arraycopy(ar, 0, result, pos, ar.length);
pos += ar.length;
}
return result;
}
@Override
protected void finalize() {
dispose();
}
public synchronized void dispose() {
if (handle != 0) {
XgboostJNI.XGDMatrixFree(handle);
handle = 0;
}
}
}

View File

@@ -0,0 +1,39 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
/**
* interface for customized evaluation
*
* @author hzx
*/
public interface IEvaluation {
/**
* get evaluate metric
*
* @return evalMetric
*/
String getMetric();
/**
* evaluate with predicts and data
*
* @param predicts predictions as array
* @param dmat data matrix to evaluate
* @return result of the metric
*/
float eval(float[][] predicts, DMatrix dmat);
}

View File

@@ -0,0 +1,34 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import java.util.List;
/**
* interface for customize Object function
*
* @author hzx
*/
public interface IObjective {
/**
* user define objective function, return gradient and second order gradient
*
* @param predicts untransformed margin predicts
* @param dtrain training data
* @return List with two float array, correspond to first order grad and second order grad
*/
List<float[]> getGradient(float[][] predicts, DMatrix dtrain);
}

View File

@@ -0,0 +1,51 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.IOException;
/**
* Error handle for Xgboost.
*/
class JNIErrorHandle {
private static final Log logger = LogFactory.getLog(DMatrix.class);
//load native library
static {
try {
NativeLibLoader.InitXgboost();
} catch (IOException ex) {
logger.error("load native library failed.");
logger.error(ex);
}
}
/**
* Check the return value of C API.
*
* @param ret return valud of xgboostJNI C API call
* @throws XGBoostError native error
*/
static void checkCall(int ret) throws XGBoostError {
if (ret != 0) {
throw new XGBoostError(XgboostJNI.XGBGetLastError());
}
}
}

View File

@@ -0,0 +1,467 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Booster for xgboost, similar to the python wrapper xgboost.py
* but custom obj function and eval function not supported at present.
*
* @author hzx
*/
class JavaBoosterImpl implements Booster {
private static final Log logger = LogFactory.getLog(JavaBoosterImpl.class);
long handle = 0;
//load native library
static {
try {
NativeLibLoader.InitXgboost();
} catch (IOException ex) {
logger.error("load native library failed.");
logger.error(ex);
}
}
/**
* init Booster from dMatrixs
*
* @param params parameters
* @param dMatrixs DMatrix array
* @throws XGBoostError native error
*/
JavaBoosterImpl(Map<String, Object> params, DMatrix[] dMatrixs) throws XGBoostError {
init(dMatrixs);
setParam("seed", "0");
setParams(params);
}
/**
* load model from modelPath
*
* @param params parameters
* @param modelPath booster modelPath (model generated by booster.saveModel)
* @throws XGBoostError native error
*/
JavaBoosterImpl(Map<String, Object> params, String modelPath) throws XGBoostError {
init(null);
if (modelPath == null) {
throw new NullPointerException("modelPath : null");
}
loadModel(modelPath);
setParam("seed", "0");
setParams(params);
}
private void init(DMatrix[] dMatrixs) throws XGBoostError {
long[] handles = null;
if (dMatrixs != null) {
handles = dMatrixs2handles(dMatrixs);
}
long[] out = new long[1];
JNIErrorHandle.checkCall(XgboostJNI.XGBoosterCreate(handles, out));
handle = out[0];
}
/**
* set parameter
*
* @param key param name
* @param value param value
* @throws XGBoostError native error
*/
public final void setParam(String key, String value) throws XGBoostError {
JNIErrorHandle.checkCall(XgboostJNI.XGBoosterSetParam(handle, key, value));
}
/**
* set parameters
*
* @param params parameters key-value map
* @throws XGBoostError native error
*/
public void setParams(Map<String, Object> params) throws XGBoostError {
if (params != null) {
for (Map.Entry<String, Object> entry : params.entrySet()) {
setParam(entry.getKey(), entry.getValue().toString());
}
}
}
/**
* Update (one iteration)
*
* @param dtrain training data
* @param iter current iteration number
* @throws XGBoostError native error
*/
public void update(DMatrix dtrain, int iter) throws XGBoostError {
JNIErrorHandle.checkCall(XgboostJNI.XGBoosterUpdateOneIter(handle, iter, dtrain.getHandle()));
}
/**
* update with customize obj func
*
* @param dtrain training data
* @param obj customized objective class
* @throws XGBoostError native error
*/
public void update(DMatrix dtrain, IObjective obj) throws XGBoostError {
float[][] predicts = predict(dtrain, true);
List<float[]> gradients = obj.getGradient(predicts, dtrain);
boost(dtrain, gradients.get(0), gradients.get(1));
}
/**
* update with give grad and hess
*
* @param dtrain training data
* @param grad first order of gradient
* @param hess seconde order of gradient
* @throws XGBoostError native error
*/
public void boost(DMatrix dtrain, float[] grad, float[] hess) throws XGBoostError {
if (grad.length != hess.length) {
throw new AssertionError(String.format("grad/hess length mismatch %s / %s", grad.length,
hess.length));
}
JNIErrorHandle.checkCall(XgboostJNI.XGBoosterBoostOneIter(handle, dtrain.getHandle(), grad, hess));
}
/**
* evaluate with given dmatrixs.
*
* @param evalMatrixs dmatrixs for evaluation
* @param evalNames name for eval dmatrixs, used for check results
* @param iter current eval iteration
* @return eval information
* @throws XGBoostError native error
*/
public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throws XGBoostError {
long[] handles = dMatrixs2handles(evalMatrixs);
String[] evalInfo = new String[1];
JNIErrorHandle.checkCall(XgboostJNI.XGBoosterEvalOneIter(handle, iter, handles, evalNames, evalInfo));
return evalInfo[0];
}
/**
* evaluate with given customized Evaluation class
*
* @param evalMatrixs evaluation matrix
* @param evalNames evaluation names
* @param eval custom evaluator
* @return eval information
* @throws XGBoostError native error
*/
public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, IEvaluation eval)
throws XGBoostError {
String evalInfo = "";
for (int i = 0; i < evalNames.length; i++) {
String evalName = evalNames[i];
DMatrix evalMat = evalMatrixs[i];
float evalResult = eval.eval(predict(evalMat), evalMat);
String evalMetric = eval.getMetric();
evalInfo += String.format("\t%s-%s:%f", evalName, evalMetric, evalResult);
}
return evalInfo;
}
/**
* base function for Predict
*
* @param data data
* @param outPutMargin output margin
* @param treeLimit limit number of trees
* @param predLeaf prediction minimum to keep leafs
* @return predict results
*/
private synchronized float[][] pred(DMatrix data, boolean outPutMargin, int treeLimit,
boolean predLeaf) throws XGBoostError {
int optionMask = 0;
if (outPutMargin) {
optionMask = 1;
}
if (predLeaf) {
optionMask = 2;
}
float[][] rawPredicts = new float[1][];
JNIErrorHandle.checkCall(XgboostJNI.XGBoosterPredict(handle, data.getHandle(), optionMask,
treeLimit, rawPredicts));
int row = (int) data.rowNum();
int col = rawPredicts[0].length / row;
float[][] predicts = new float[row][col];
int r, c;
for (int i = 0; i < rawPredicts[0].length; i++) {
r = i / col;
c = i % col;
predicts[r][c] = rawPredicts[0][i];
}
return predicts;
}
/**
* Predict with data
*
* @param data dmatrix storing the input
* @return predict result
* @throws XGBoostError native error
*/
public float[][] predict(DMatrix data) throws XGBoostError {
return pred(data, false, 0, false);
}
/**
* Predict with data
*
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @return predict result
* @throws XGBoostError native error
*/
public float[][] predict(DMatrix data, boolean outPutMargin) throws XGBoostError {
return pred(data, outPutMargin, 0, false);
}
/**
* Predict with data
*
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @return predict result
* @throws XGBoostError native error
*/
public float[][] predict(DMatrix data, boolean outPutMargin, int treeLimit) throws XGBoostError {
return pred(data, outPutMargin, treeLimit, false);
}
/**
* Predict with data
*
* @param data dmatrix storing the input
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @param predLeaf When this option is on, the output will be a matrix of (nsample, ntrees),
* nsample = data.numRow with each record indicating the predicted leaf index
* of each sample in each tree.
* Note that the leaf index of a tree is unique per tree, so you may find leaf 1
* in both tree 1 and tree 0.
* @return predict result
* @throws XGBoostError native error
*/
public float[][] predict(DMatrix data, int treeLimit, boolean predLeaf) throws XGBoostError {
return pred(data, false, treeLimit, predLeaf);
}
/**
* save model to modelPath
*
* @param modelPath model path
*/
public void saveModel(String modelPath) throws XGBoostError{
JNIErrorHandle.checkCall(XgboostJNI.XGBoosterSaveModel(handle, modelPath));
}
private void loadModel(String modelPath) {
XgboostJNI.XGBoosterLoadModel(handle, modelPath);
}
/**
* get the dump of the model as a string array
*
* @param withStats Controls whether the split statistics are output.
* @return dumped model information
* @throws XGBoostError native error
*/
private String[] getDumpInfo(boolean withStats) throws XGBoostError {
int statsFlag = 0;
if (withStats) {
statsFlag = 1;
}
String[][] modelInfos = new String[1][];
JNIErrorHandle.checkCall(XgboostJNI.XGBoosterDumpModel(handle, "", statsFlag, modelInfos));
return modelInfos[0];
}
/**
* get the dump of the model as a string array
*
* @param featureMap featureMap file
* @param withStats Controls whether the split statistics are output.
* @return dumped model information
* @throws XGBoostError native error
*/
private String[] getDumpInfo(String featureMap, boolean withStats) throws XGBoostError {
int statsFlag = 0;
if (withStats) {
statsFlag = 1;
}
String[][] modelInfos = new String[1][];
JNIErrorHandle.checkCall(XgboostJNI.XGBoosterDumpModel(handle, featureMap, statsFlag, modelInfos));
return modelInfos[0];
}
/**
* Dump model into a text file.
*
* @param modelPath file to save dumped model info
* @param withStats bool
* Controls whether the split statistics are output.
* @throws FileNotFoundException file not found
* @throws UnsupportedEncodingException unsupported feature
* @throws IOException error with model writing
* @throws XGBoostError native error
*/
public void dumpModel(String modelPath, boolean withStats) throws IOException, XGBoostError {
File tf = new File(modelPath);
FileOutputStream out = new FileOutputStream(tf);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
String[] modelInfos = getDumpInfo(withStats);
for (int i = 0; i < modelInfos.length; i++) {
writer.write("booster [" + i + "]:\n");
writer.write(modelInfos[i]);
}
writer.close();
out.close();
}
/**
* Dump model into a text file.
*
* @param modelPath file to save dumped model info
* @param featureMap featureMap file
* @param withStats bool
* Controls whether the split statistics are output.
* @throws FileNotFoundException exception
* @throws UnsupportedEncodingException exception
* @throws IOException exception
* @throws XGBoostError native error
*/
public void dumpModel(String modelPath, String featureMap, boolean withStats) throws
IOException, XGBoostError {
File tf = new File(modelPath);
FileOutputStream out = new FileOutputStream(tf);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
String[] modelInfos = getDumpInfo(featureMap, withStats);
for (int i = 0; i < modelInfos.length; i++) {
writer.write("booster [" + i + "]:\n");
writer.write(modelInfos[i]);
}
writer.close();
out.close();
}
/**
* get importance of each feature
*
* @return featureMap key: feature index, value: feature importance score
* @throws XGBoostError native error
*/
public Map<String, Integer> getFeatureScore() throws XGBoostError {
String[] modelInfos = getDumpInfo(false);
Map<String, Integer> featureScore = new HashMap<String, Integer>();
for (String tree : modelInfos) {
for (String node : tree.split("\n")) {
String[] array = node.split("\\[");
if (array.length == 1) {
continue;
}
String fid = array[1].split("\\]")[0];
fid = fid.split("<")[0];
if (featureScore.containsKey(fid)) {
featureScore.put(fid, 1 + featureScore.get(fid));
} else {
featureScore.put(fid, 1);
}
}
}
return featureScore;
}
/**
* get importance of each feature
*
* @param featureMap file to save dumped model info
* @return featureMap key: feature index, value: feature importance score
* @throws XGBoostError native error
*/
public Map<String, Integer> getFeatureScore(String featureMap) throws XGBoostError {
String[] modelInfos = getDumpInfo(featureMap, false);
Map<String, Integer> featureScore = new HashMap<String, Integer>();
for (String tree : modelInfos) {
for (String node : tree.split("\n")) {
String[] array = node.split("\\[");
if (array.length == 1) {
continue;
}
String fid = array[1].split("\\]")[0];
fid = fid.split("<")[0];
if (featureScore.containsKey(fid)) {
featureScore.put(fid, 1 + featureScore.get(fid));
} else {
featureScore.put(fid, 1);
}
}
}
return featureScore;
}
/**
* transfer DMatrix array to handle array (used for native functions)
*
* @param dmatrixs
* @return handle array for input dmatrixs
*/
private static long[] dMatrixs2handles(DMatrix[] dmatrixs) {
long[] handles = new long[dmatrixs.length];
for (int i = 0; i < dmatrixs.length; i++) {
handles[i] = dmatrixs[i].getHandle();
}
return handles;
}
@Override
protected void finalize() throws Throwable {
super.finalize();
dispose();
}
public synchronized void dispose() {
if (handle != 0L) {
XgboostJNI.XGBoosterFree(handle);
handle = 0;
}
}
}

View File

@@ -0,0 +1,168 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.*;
import java.lang.reflect.Field;
/**
* class to load native library
*
* @author hzx
*/
class NativeLibLoader {
private static final Log logger = LogFactory.getLog(NativeLibLoader.class);
private static boolean initialized = false;
private static final String nativePath = "../lib/";
private static final String nativeResourcePath = "/lib/";
private static final String[] libNames = new String[]{"xgboost4j"};
public static synchronized void InitXgboost() throws IOException {
if (!initialized) {
for (String libName : libNames) {
smartLoad(libName);
}
initialized = true;
}
}
/**
* Loads library from current JAR archive
* <p/>
* The file from JAR is copied into system temporary directory and then loaded.
* The temporary file is deleted after exiting.
* Method uses String as filename because the pathname is "abstract", not system-dependent.
* <p/>
* The restrictions of {@link File#createTempFile(java.lang.String, java.lang.String)} apply to {@code path}.
*
* @param path The filename inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext
* @throws IOException If temporary file creation or read/write operation fails
* @throws IllegalArgumentException If source file (param path) does not exist
* @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters
*/
private static void loadLibraryFromJar(String path) throws IOException {
if (!path.startsWith("/")) {
throw new IllegalArgumentException("The path has to be absolute (start with '/').");
}
// Obtain filename from path
String[] parts = path.split("/");
String filename = (parts.length > 1) ? parts[parts.length - 1] : null;
// Split filename to prexif and suffix (extension)
String prefix = "";
String suffix = null;
if (filename != null) {
parts = filename.split("\\.", 2);
prefix = parts[0];
suffix = (parts.length > 1) ? "." + parts[parts.length - 1] : null; // Thanks, davs! :-)
}
// Check if the filename is okay
if (filename == null || prefix.length() < 3) {
throw new IllegalArgumentException("The filename has to be at least 3 characters long.");
}
// Prepare temporary file
File temp = File.createTempFile(prefix, suffix);
temp.deleteOnExit();
if (!temp.exists()) {
throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist.");
}
// Prepare buffer for data copying
byte[] buffer = new byte[1024];
int readBytes;
// Open and check input stream
InputStream is = NativeLibLoader.class.getResourceAsStream(path);
if (is == null) {
throw new FileNotFoundException("File " + path + " was not found inside JAR.");
}
// Open output stream and copy data between source file in JAR and the temporary file
OutputStream os = new FileOutputStream(temp);
try {
while ((readBytes = is.read(buffer)) != -1) {
os.write(buffer, 0, readBytes);
}
} finally {
// If read/write fails, close streams safely before throwing an exception
os.close();
is.close();
}
// Finally, load the library
System.load(temp.getAbsolutePath());
}
/**
* load native library, this method will first try to load library from java.library.path, then
* try to load library in jar package.
*
* @param libName library path
* @throws IOException exception
*/
private static void smartLoad(String libName) throws IOException {
addNativeDir(nativePath);
try {
System.loadLibrary(libName);
System.out.println("======load " + libName + " successfully");
} catch (UnsatisfiedLinkError e) {
try {
String libraryFromJar = nativeResourcePath + System.mapLibraryName(libName);
loadLibraryFromJar(libraryFromJar);
} catch (IOException e1) {
throw e1;
}
}
}
/**
* Add libPath to java.library.path, then native library in libPath would be load properly
*
* @param libPath library path
* @throws IOException exception
*/
private static void addNativeDir(String libPath) throws IOException {
try {
Field field = ClassLoader.class.getDeclaredField("usr_paths");
field.setAccessible(true);
String[] paths = (String[]) field.get(null);
for (String path : paths) {
if (libPath.equals(path)) {
return;
}
}
String[] tmp = new String[paths.length + 1];
System.arraycopy(paths, 0, tmp, 0, paths.length);
tmp[paths.length] = libPath;
field.set(null, tmp);
} catch (IllegalAccessException e) {
logger.error(e.getMessage());
throw new IOException("Failed to get permissions to set library path");
} catch (NoSuchFieldException e) {
logger.error(e.getMessage());
throw new IOException("Failed to get field handle to set library path");
}
}
}

View File

@@ -0,0 +1,336 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.util.*;
/**
* trainer for xgboost
*
* @author hzx
*/
public class XGBoost {
private static final Log logger = LogFactory.getLog(XGBoost.class);
/**
* Train a booster with given parameters.
*
* @param params Booster params.
* @param dtrain Data to be trained.
* @param round Number of boosting iterations.
* @param watches a group of items to be evaluated during training, this allows user to watch
* performance on the validation set.
* @param obj customized objective (set to null if not used)
* @param eval customized evaluation (set to null if not used)
* @return trained booster
* @throws XGBoostError native error
*/
public static Booster train(Map<String, Object> params, DMatrix dtrain, int round,
Map<String, DMatrix> watches, IObjective obj,
IEvaluation eval) throws XGBoostError {
//collect eval matrixs
String[] evalNames;
DMatrix[] evalMats;
List<String> names = new ArrayList<String>();
List<DMatrix> mats = new ArrayList<DMatrix>();
for (Map.Entry<String, DMatrix> evalEntry : watches.entrySet()) {
names.add(evalEntry.getKey());
mats.add(evalEntry.getValue());
}
evalNames = names.toArray(new String[names.size()]);
evalMats = mats.toArray(new DMatrix[mats.size()]);
//collect all data matrixs
DMatrix[] allMats;
if (evalMats != null && evalMats.length > 0) {
allMats = new DMatrix[evalMats.length + 1];
allMats[0] = dtrain;
System.arraycopy(evalMats, 0, allMats, 1, evalMats.length);
} else {
allMats = new DMatrix[1];
allMats[0] = dtrain;
}
//initialize booster
Booster booster = new JavaBoosterImpl(params, allMats);
//begin to train
for (int iter = 0; iter < round; iter++) {
if (obj != null) {
booster.update(dtrain, obj);
} else {
booster.update(dtrain, iter);
}
//evaluation
if (evalMats != null && evalMats.length > 0) {
String evalInfo;
if (eval != null) {
evalInfo = booster.evalSet(evalMats, evalNames, eval);
} else {
evalInfo = booster.evalSet(evalMats, evalNames, iter);
}
logger.info(evalInfo);
}
}
return booster;
}
/**
* init Booster from dMatrixs
*
* @param params parameters
* @param dMatrixs DMatrix array
* @throws XGBoostError native error
*/
public static Booster initBoostingModel(
Map<String, Object> params,
DMatrix[] dMatrixs) throws XGBoostError {
return new JavaBoosterImpl(params, dMatrixs);
}
/**
* load model from modelPath
*
* @param params parameters
* @param modelPath booster modelPath (model generated by booster.saveModel)
* @throws XGBoostError native error
*/
public static Booster loadBoostModel(Map<String, Object> params, String modelPath)
throws XGBoostError {
return new JavaBoosterImpl(params, modelPath);
}
/**
* Cross-validation with given paramaters.
*
* @param params Booster params.
* @param data Data to be trained.
* @param round Number of boosting iterations.
* @param nfold Number of folds in CV.
* @param metrics Evaluation metrics to be watched in CV.
* @param obj customized objective (set to null if not used)
* @param eval customized evaluation (set to null if not used)
* @return evaluation history
* @throws XGBoostError native error
*/
public static String[] crossValiation(
Map<String, Object> params,
DMatrix data,
int round,
int nfold,
String[] metrics,
IObjective obj,
IEvaluation eval) throws XGBoostError {
CVPack[] cvPacks = makeNFold(data, nfold, params, metrics);
String[] evalHist = new String[round];
String[] results = new String[cvPacks.length];
for (int i = 0; i < round; i++) {
for (CVPack cvPack : cvPacks) {
if (obj != null) {
cvPack.update(obj);
} else {
cvPack.update(i);
}
}
for (int j = 0; j < cvPacks.length; j++) {
if (eval != null) {
results[j] = cvPacks[j].eval(eval);
} else {
results[j] = cvPacks[j].eval(i);
}
}
evalHist[i] = aggCVResults(results);
logger.info(evalHist[i]);
}
return evalHist;
}
/**
* make an n-fold array of CVPack from random indices
*
* @param data original data
* @param nfold num of folds
* @param params booster parameters
* @param evalMetrics Evaluation metrics
* @return CV package array
* @throws XGBoostError native error
*/
private static CVPack[] makeNFold(DMatrix data, int nfold, Map<String, Object> params,
String[] evalMetrics) throws XGBoostError {
List<Integer> samples = genRandPermutationNums(0, (int) data.rowNum());
int step = samples.size() / nfold;
int[] testSlice = new int[step];
int[] trainSlice = new int[samples.size() - step];
int testid, trainid;
CVPack[] cvPacks = new CVPack[nfold];
for (int i = 0; i < nfold; i++) {
testid = 0;
trainid = 0;
for (int j = 0; j < samples.size(); j++) {
if (j > (i * step) && j < (i * step + step) && testid < step) {
testSlice[testid] = samples.get(j);
testid++;
} else {
if (trainid < samples.size() - step) {
trainSlice[trainid] = samples.get(j);
trainid++;
} else {
testSlice[testid] = samples.get(j);
testid++;
}
}
}
DMatrix dtrain = data.slice(trainSlice);
DMatrix dtest = data.slice(testSlice);
CVPack cvPack = new CVPack(dtrain, dtest, params);
//set eval types
if (evalMetrics != null) {
for (String type : evalMetrics) {
cvPack.booster.setParam("eval_metric", type);
}
}
cvPacks[i] = cvPack;
}
return cvPacks;
}
private static List<Integer> genRandPermutationNums(int start, int end) {
List<Integer> samples = new ArrayList<Integer>();
for (int i = start; i < end; i++) {
samples.add(i);
}
Collections.shuffle(samples);
return samples;
}
/**
* Aggregate cross-validation results.
*
* @param results eval info from each data sample
* @return cross-validation eval info
*/
private static String aggCVResults(String[] results) {
Map<String, List<Float>> cvMap = new HashMap<String, List<Float>>();
String aggResult = results[0].split("\t")[0];
for (String result : results) {
String[] items = result.split("\t");
for (int i = 1; i < items.length; i++) {
String[] tup = items[i].split(":");
String key = tup[0];
Float value = Float.valueOf(tup[1]);
if (!cvMap.containsKey(key)) {
cvMap.put(key, new ArrayList<Float>());
}
cvMap.get(key).add(value);
}
}
for (String key : cvMap.keySet()) {
float value = 0f;
for (Float tvalue : cvMap.get(key)) {
value += tvalue;
}
value /= cvMap.get(key).size();
aggResult += String.format("\tcv-%s:%f", key, value);
}
return aggResult;
}
/**
* cross validation package for xgb
*
* @author hzx
*/
private static class CVPack {
DMatrix dtrain;
DMatrix dtest;
DMatrix[] dmats;
String[] names;
Booster booster;
/**
* create an cross validation package
*
* @param dtrain train data
* @param dtest test data
* @param params parameters
* @throws XGBoostError native error
*/
public CVPack(DMatrix dtrain, DMatrix dtest, Map<String, Object> params)
throws XGBoostError {
dmats = new DMatrix[]{dtrain, dtest};
booster = XGBoost.initBoostingModel(params, dmats);
names = new String[]{"train", "test"};
this.dtrain = dtrain;
this.dtest = dtest;
}
/**
* update one iteration
*
* @param iter iteration num
* @throws XGBoostError native error
*/
public void update(int iter) throws XGBoostError {
booster.update(dtrain, iter);
}
/**
* update one iteration
*
* @param obj customized objective
* @throws XGBoostError native error
*/
public void update(IObjective obj) throws XGBoostError {
booster.update(dtrain, obj);
}
/**
* evaluation
*
* @param iter iteration num
* @return evaluation
* @throws XGBoostError native error
*/
public String eval(int iter) throws XGBoostError {
return booster.evalSet(dmats, names, iter);
}
/**
* evaluation
*
* @param eval customized eval
* @return evaluation
* @throws XGBoostError native error
*/
public String eval(IEvaluation eval) throws XGBoostError {
return booster.evalSet(dmats, names, eval);
}
}
}

View File

@@ -0,0 +1,27 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
/**
* custom error class for xgboost
*
* @author hzx
*/
public class XGBoostError extends Exception {
public XGBoostError(String message) {
super(message);
}
}

View File

@@ -0,0 +1,83 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
/**
* xgboost jni wrapper functions for xgboost_wrapper.h
* change 2015-7-6: *use a long[] (length=1) as container of handle to get the output DMatrix or Booster
*
* @author hzx
*/
class XgboostJNI {
public final static native String XGBGetLastError();
public final static native int XGDMatrixCreateFromFile(String fname, int silent, long[] out);
public final static native int XGDMatrixCreateFromCSR(long[] indptr, int[] indices, float[] data,
long[] out);
public final static native int XGDMatrixCreateFromCSC(long[] colptr, int[] indices, float[] data,
long[] out);
public final static native int XGDMatrixCreateFromMat(float[] data, int nrow, int ncol,
float missing, long[] out);
public final static native int XGDMatrixSliceDMatrix(long handle, int[] idxset, long[] out);
public final static native int XGDMatrixFree(long handle);
public final static native int XGDMatrixSaveBinary(long handle, String fname, int silent);
public final static native int XGDMatrixSetFloatInfo(long handle, String field, float[] array);
public final static native int XGDMatrixSetUIntInfo(long handle, String field, int[] array);
public final static native int XGDMatrixSetGroup(long handle, int[] group);
public final static native int XGDMatrixGetFloatInfo(long handle, String field, float[][] info);
public final static native int XGDMatrixGetUIntInfo(long handle, String filed, int[][] info);
public final static native int XGDMatrixNumRow(long handle, long[] row);
public final static native int XGBoosterCreate(long[] handles, long[] out);
public final static native int XGBoosterFree(long handle);
public final static native int XGBoosterSetParam(long handle, String name, String value);
public final static native int XGBoosterUpdateOneIter(long handle, int iter, long dtrain);
public final static native int XGBoosterBoostOneIter(long handle, long dtrain, float[] grad,
float[] hess);
public final static native int XGBoosterEvalOneIter(long handle, int iter, long[] dmats,
String[] evnames, String[] eval_info);
public final static native int XGBoosterPredict(long handle, long dmat, int option_mask,
int ntree_limit, float[][] predicts);
public final static native int XGBoosterLoadModel(long handle, String fname);
public final static native int XGBoosterSaveModel(long handle, String fname);
public final static native int XGBoosterLoadModelFromBuffer(long handle, long buf, long len);
public final static native int XGBoosterGetModelRaw(long handle, String[] out_string);
public final static native int XGBoosterDumpModel(long handle, String fmap, int with_stats,
String[][] out_strings);
}

View File

@@ -0,0 +1,172 @@
package org.dmlc.xgboost4j.scala
import java.io.IOException
import scala.collection.mutable
import org.dmlc.xgboost4j.{IEvaluation, IObjective, XGBoostError}
trait Booster {
/**
* set parameter
*
* @param key param name
* @param value param value
*/
@throws(classOf[XGBoostError])
def setParam(key: String, value: String)
/**
* set parameters
*
* @param params parameters key-value map
*/
@throws(classOf[XGBoostError])
def setParams(params: Map[String, AnyRef])
/**
* Update (one iteration)
*
* @param dtrain training data
* @param iter current iteration number
*/
@throws(classOf[XGBoostError])
def update(dtrain: DMatrix, iter: Int)
/**
* update with customize obj func
*
* @param dtrain training data
* @param obj customized objective class
*/
@throws(classOf[XGBoostError])
def update(dtrain: DMatrix, obj: IObjective)
/**
* update with give grad and hess
*
* @param dtrain training data
* @param grad first order of gradient
* @param hess seconde order of gradient
*/
@throws(classOf[XGBoostError])
def boost(dtrain: DMatrix, grad: Array[Float], hess: Array[Float])
/**
* evaluate with given dmatrixs.
*
* @param evalMatrixs dmatrixs for evaluation
* @param evalNames name for eval dmatrixs, used for check results
* @param iter current eval iteration
* @return eval information
*/
@throws(classOf[XGBoostError])
def evalSet(evalMatrixs: Array[DMatrix], evalNames: Array[String], iter: Int): String
/**
* evaluate with given customized Evaluation class
*
* @param evalMatrixs evaluation matrix
* @param evalNames evaluation names
* @param eval custom evaluator
* @return eval information
*/
@throws(classOf[XGBoostError])
def evalSet(evalMatrixs: Array[DMatrix], evalNames: Array[String], eval: IEvaluation): String
/**
* Predict with data
*
* @param data dmatrix storing the input
* @return predict result
*/
@throws(classOf[XGBoostError])
def predict(data: DMatrix): Array[Array[Float]]
/**
* Predict with data
*
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @return predict result
*/
@throws(classOf[XGBoostError])
def predict(data: DMatrix, outPutMargin: Boolean): Array[Array[Float]]
/**
* Predict with data
*
* @param data dmatrix storing the input
* @param outPutMargin Whether to output the raw untransformed margin value.
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @return predict result
*/
@throws(classOf[XGBoostError])
def predict(data: DMatrix, outPutMargin: Boolean, treeLimit: Int): Array[Array[Float]]
/**
* Predict with data
*
* @param data dmatrix storing the input
* @param treeLimit Limit number of trees in the prediction; defaults to 0 (use all trees).
* @param predLeaf When this option is on, the output will be a matrix of (nsample, ntrees), nsample = data.numRow
with each record indicating the predicted leaf index of each sample in each tree.
Note that the leaf index of a tree is unique per tree, so you may find leaf 1
in both tree 1 and tree 0.
* @return predict result
* @throws XGBoostError native error
*/
@throws(classOf[XGBoostError])
def predict(data: DMatrix, treeLimit: Int, predLeaf: Boolean): Array[Array[Float]]
/**
* save model to modelPath
*
* @param modelPath model path
*/
@throws(classOf[XGBoostError])
def saveModel(modelPath: String)
/**
* Dump model into a text file.
*
* @param modelPath file to save dumped model info
* @param withStats bool Controls whether the split statistics are output.
*/
@throws(classOf[IOException])
@throws(classOf[XGBoostError])
def dumpModel(modelPath: String, withStats: Boolean)
/**
* Dump model into a text file.
*
* @param modelPath file to save dumped model info
* @param featureMap featureMap file
* @param withStats bool
* Controls whether the split statistics are output.
*/
@throws(classOf[IOException])
@throws(classOf[XGBoostError])
def dumpModel(modelPath: String, featureMap: String, withStats: Boolean)
/**
* get importance of each feature
*
* @return featureMap key: feature index, value: feature importance score
*/
@throws(classOf[XGBoostError])
def getFeatureScore: mutable.Map[String, Integer]
/**
* get importance of each feature
*
* @param featureMap file to save dumped model info
* @return featureMap key: feature index, value: feature importance score
*/
@throws(classOf[XGBoostError])
def getFeatureScore(featureMap: String): mutable.Map[String, Integer]
def dispose
}

View File

@@ -0,0 +1,161 @@
package org.dmlc.xgboost4j.scala
import org.dmlc.xgboost4j.{DMatrix => JDMatrix, XGBoostError}
class DMatrix private(private[scala] val jDMatrix: JDMatrix) {
/**
* init DMatrix from file (svmlight format)
*
* @param dataPath path of data file
* @throws XGBoostError native error
*/
def this(dataPath: String) {
this(new JDMatrix(dataPath))
}
/**
* create DMatrix from sparse matrix
*
* @param headers index to headers (rowHeaders for CSR or colHeaders for CSC)
* @param indices Indices (colIndexs for CSR or rowIndexs for CSC)
* @param data non zero values (sequence by row for CSR or by col for CSC)
* @param st sparse matrix type (CSR or CSC)
*/
@throws(classOf[XGBoostError])
def this(headers: Array[Long], indices: Array[Int], data: Array[Float], st: JDMatrix.SparseType) {
this(new JDMatrix(headers, indices, data, st))
}
/**
* create DMatrix from dense matrix
*
* @param data data values
* @param nrow number of rows
* @param ncol number of columns
*/
@throws(classOf[XGBoostError])
def this(data: Array[Float], nrow: Int, ncol: Int) {
this(new JDMatrix(data, nrow, ncol))
}
/**
* set label of dmatrix
*
* @param labels labels
*/
@throws(classOf[XGBoostError])
def setLabel(labels: Array[Float]): Unit = {
jDMatrix.setLabel(labels)
}
/**
* set weight of each instance
*
* @param weights weights
*/
@throws(classOf[XGBoostError])
def setWeight(weights: Array[Float]): Unit = {
jDMatrix.setWeight(weights)
}
/**
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from
*
* @param baseMargin base margin
*/
@throws(classOf[XGBoostError])
def setBaseMargin(baseMargin: Array[Float]): Unit = {
jDMatrix.setBaseMargin(baseMargin)
}
/**
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from
*
* @param baseMargin base margin
*/
@throws(classOf[XGBoostError])
def setBaseMargin(baseMargin: Array[Array[Float]]): Unit = {
jDMatrix.setBaseMargin(baseMargin)
}
/**
* Set group sizes of DMatrix (used for ranking)
*
* @param group group size as array
*/
@throws(classOf[XGBoostError])
def setGroup(group: Array[Int]): Unit = {
jDMatrix.setGroup(group)
}
/**
* get label values
*
* @return label
*/
@throws(classOf[XGBoostError])
def getLabel: Array[Float] = {
jDMatrix.getLabel
}
/**
* get weight of the DMatrix
*
* @return weights
*/
@throws(classOf[XGBoostError])
def getWeight: Array[Float] = {
jDMatrix.getWeight
}
/**
* get base margin of the DMatrix
*
* @return base margin
*/
@throws(classOf[XGBoostError])
def getBaseMargin: Array[Float] = {
jDMatrix.getBaseMargin
}
/**
* Slice the DMatrix and return a new DMatrix that only contains `rowIndex`.
*
* @param rowIndex row index
* @return sliced new DMatrix
*/
@throws(classOf[XGBoostError])
def slice(rowIndex: Array[Int]): DMatrix = {
new DMatrix(jDMatrix.slice(rowIndex))
}
/**
* get the row number of DMatrix
*
* @return number of rows
*/
@throws(classOf[XGBoostError])
def rowNum: Long = {
jDMatrix.rowNum
}
/**
* save DMatrix to filePath
*
* @param filePath file path
*/
def saveBinary(filePath: String): Unit = {
jDMatrix.saveBinary(filePath)
}
def getHandle: Long = {
jDMatrix.getHandle
}
def delete(): Unit = {
jDMatrix.dispose()
}
}

View File

@@ -0,0 +1,82 @@
package org.dmlc.xgboost4j.scala
import scala.collection.JavaConverters._
import scala.collection.mutable
import org.dmlc.xgboost4j.{Booster => JBooster, IEvaluation, IObjective}
private[scala] class ScalaBoosterImpl private[xgboost4j](booster: JBooster) extends Booster {
override def setParam(key: String, value: String): Unit = {
booster.setParam(key, value)
}
override def update(dtrain: DMatrix, iter: Int): Unit = {
booster.update(dtrain.jDMatrix, iter)
}
override def update(dtrain: DMatrix, obj: IObjective): Unit = {
booster.update(dtrain.jDMatrix, obj)
}
override def dumpModel(modelPath: String, withStats: Boolean): Unit = {
booster.dumpModel(modelPath, withStats)
}
override def dumpModel(modelPath: String, featureMap: String, withStats: Boolean): Unit = {
booster.dumpModel(modelPath, featureMap, withStats)
}
override def setParams(params: Map[String, AnyRef]): Unit = {
booster.setParams(params.asJava)
}
override def evalSet(evalMatrixs: Array[DMatrix], evalNames: Array[String], iter: Int): String = {
booster.evalSet(evalMatrixs.map(_.jDMatrix), evalNames, iter)
}
override def evalSet(evalMatrixs: Array[DMatrix], evalNames: Array[String], eval: IEvaluation): String = {
booster.evalSet(evalMatrixs.map(_.jDMatrix), evalNames, eval)
}
override def dispose: Unit = {
booster.dispose()
}
override def predict(data: DMatrix): Array[Array[Float]] = {
booster.predict(data.jDMatrix)
}
override def predict(data: DMatrix, outPutMargin: Boolean): Array[Array[Float]] = {
booster.predict(data.jDMatrix, outPutMargin)
}
override def predict(data: DMatrix, outPutMargin: Boolean, treeLimit: Int): Array[Array[Float]] = {
booster.predict(data.jDMatrix, outPutMargin, treeLimit)
}
override def predict(data: DMatrix, treeLimit: Int, predLeaf: Boolean): Array[Array[Float]] = {
booster.predict(data.jDMatrix, treeLimit, predLeaf)
}
override def boost(dtrain: DMatrix, grad: Array[Float], hess: Array[Float]): Unit = {
booster.boost(dtrain.jDMatrix, grad, hess)
}
override def getFeatureScore: mutable.Map[String, Integer] = {
booster.getFeatureScore.asScala
}
override def getFeatureScore(featureMap: String): mutable.Map[String, Integer] = {
booster.getFeatureScore(featureMap).asScala
}
override def saveModel(modelPath: String): Unit = {
booster.saveModel(modelPath)
}
override def finalize(): Unit = {
super.finalize()
dispose
}
}

View File

@@ -0,0 +1,36 @@
package org.dmlc.xgboost4j.scala
import _root_.scala.collection.JavaConverters._
import org.dmlc.xgboost4j
import org.dmlc.xgboost4j.{XGBoost => JXGBoost, IEvaluation, IObjective}
object XGBoost {
def train(params: Map[String, AnyRef], dtrain: xgboost4j.DMatrix, round: Int,
watches: Map[String, xgboost4j.DMatrix], obj: IObjective, eval: IEvaluation): Booster = {
val xgboostInJava = JXGBoost.train(params.asJava, dtrain, round, watches.asJava, obj, eval)
new ScalaBoosterImpl(xgboostInJava)
}
def crossValiation(params: Map[String, AnyRef],
data: DMatrix,
round: Int,
nfold: Int,
metrics: Array[String],
obj: IObjective,
eval: IEvaluation): Array[String] = {
JXGBoost.crossValiation(params.asJava, data.jDMatrix, round, nfold, metrics, obj,
eval)
}
def initBoostModel(params: Map[String, AnyRef], dMatrixs: Array[DMatrix]): Booster = {
val xgboostInJava = JXGBoost.initBoostingModel(params.asJava, dMatrixs.map(_.jDMatrix))
new ScalaBoosterImpl(xgboostInJava)
}
def loadBoostModel(params: Map[String, AnyRef], modelPath: String): Booster = {
val xgboostInJava = JXGBoost.loadBoostModel(params.asJava, modelPath)
new ScalaBoosterImpl(xgboostInJava)
}
}

View File

@@ -0,0 +1,513 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "xgboost/c_api.h"
#include "xgboost4j.h"
#include <cstring>
//helper functions
//set handle
void setHandle(JNIEnv *jenv, jlongArray jhandle, void* handle) {
long out[1];
out[0] = (long) handle;
jenv->SetLongArrayRegion(jhandle, 0, 1, (const jlong*) out);
}
JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBGetLastError
(JNIEnv *jenv, jclass jcls) {
jstring jresult = 0 ;
const char* result = XGBGetLastError();
if (result) jresult = jenv->NewStringUTF(result);
return jresult;
}
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixCreateFromFile
(JNIEnv *jenv, jclass jcls, jstring jfname, jint jsilent, jlongArray jout) {
DMatrixHandle result;
const char* fname = jenv->GetStringUTFChars(jfname, 0);
int ret = XGDMatrixCreateFromFile(fname, jsilent, &result);
if (fname) jenv->ReleaseStringUTFChars(jfname, fname);
setHandle(jenv, jout, result);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixCreateFromCSR
* Signature: ([J[J[F)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixCreateFromCSR
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jlongArray jout) {
DMatrixHandle result;
jlong* indptr = jenv->GetLongArrayElements(jindptr, 0);
jint* indices = jenv->GetIntArrayElements(jindices, 0);
jfloat* data = jenv->GetFloatArrayElements(jdata, 0);
bst_ulong nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
bst_ulong nelem = (bst_ulong)jenv->GetArrayLength(jdata);
int ret = (jint) XGDMatrixCreateFromCSR((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem, &result);
setHandle(jenv, jout, result);
//Release
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
jenv->ReleaseIntArrayElements(jindices, indices, 0);
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixCreateFromCSC
* Signature: ([J[J[F)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixCreateFromCSC
(JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jlongArray jout) {
DMatrixHandle result;
jlong* indptr = jenv->GetLongArrayElements(jindptr, NULL);
jint* indices = jenv->GetIntArrayElements(jindices, 0);
jfloat* data = jenv->GetFloatArrayElements(jdata, NULL);
bst_ulong nindptr = (bst_ulong)jenv->GetArrayLength(jindptr);
bst_ulong nelem = (bst_ulong)jenv->GetArrayLength(jdata);
int ret = (jint) XGDMatrixCreateFromCSC((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem, &result);
setHandle(jenv, jout, result);
//release
jenv->ReleaseLongArrayElements(jindptr, indptr, 0);
jenv->ReleaseIntArrayElements(jindices, indices, 0);
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixCreateFromMat
* Signature: ([FIIF)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixCreateFromMat
(JNIEnv *jenv, jclass jcls, jfloatArray jdata, jint jnrow, jint jncol, jfloat jmiss, jlongArray jout) {
DMatrixHandle result;
jfloat* data = jenv->GetFloatArrayElements(jdata, 0);
bst_ulong nrow = (bst_ulong)jnrow;
bst_ulong ncol = (bst_ulong)jncol;
int ret = (jint) XGDMatrixCreateFromMat((float const *)data, nrow, ncol, jmiss, &result);
setHandle(jenv, jout, result);
//release
jenv->ReleaseFloatArrayElements(jdata, data, 0);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSliceDMatrix
* Signature: (J[I)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSliceDMatrix
(JNIEnv *jenv, jclass jcls, jlong jhandle, jintArray jindexset, jlongArray jout) {
DMatrixHandle result;
DMatrixHandle handle = (DMatrixHandle) jhandle;
jint* indexset = jenv->GetIntArrayElements(jindexset, 0);
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jindexset);
int ret = XGDMatrixSliceDMatrix(handle, (int const *)indexset, len, &result);
setHandle(jenv, jout, result);
//release
jenv->ReleaseIntArrayElements(jindexset, indexset, 0);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixFree
* Signature: (J)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixFree
(JNIEnv *jenv, jclass jcls, jlong jhandle) {
DMatrixHandle handle = (DMatrixHandle) jhandle;
int ret = XGDMatrixFree(handle);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSaveBinary
* Signature: (JLjava/lang/String;I)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSaveBinary
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname, jint jsilent) {
DMatrixHandle handle = (DMatrixHandle) jhandle;
const char* fname = jenv->GetStringUTFChars(jfname, 0);
int ret = XGDMatrixSaveBinary(handle, fname, jsilent);
if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSetFloatInfo
* Signature: (JLjava/lang/String;[F)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSetFloatInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jfloatArray jarray) {
DMatrixHandle handle = (DMatrixHandle) jhandle;
const char* field = jenv->GetStringUTFChars(jfield, 0);
jfloat* array = jenv->GetFloatArrayElements(jarray, NULL);
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);
int ret = XGDMatrixSetFloatInfo(handle, field, (float const *)array, len);
//release
if (field) jenv->ReleaseStringUTFChars(jfield, field);
jenv->ReleaseFloatArrayElements(jarray, array, 0);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSetUIntInfo
* Signature: (JLjava/lang/String;[I)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSetUIntInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jintArray jarray) {
DMatrixHandle handle = (DMatrixHandle) jhandle;
const char* field = jenv->GetStringUTFChars(jfield, 0);
jint* array = jenv->GetIntArrayElements(jarray, NULL);
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);
int ret = XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len);
//release
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
jenv->ReleaseIntArrayElements(jarray, array, 0);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSetGroup
* Signature: (J[I)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSetGroup
(JNIEnv * jenv, jclass jcls, jlong jhandle, jintArray jarray) {
DMatrixHandle handle = (DMatrixHandle) jhandle;
jint* array = jenv->GetIntArrayElements(jarray, NULL);
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);
int ret = XGDMatrixSetGroup(handle, (unsigned int const *)array, len);
//release
jenv->ReleaseIntArrayElements(jarray, array, 0);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixGetFloatInfo
* Signature: (JLjava/lang/String;)[F
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixGetFloatInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) {
DMatrixHandle handle = (DMatrixHandle) jhandle;
const char* field = jenv->GetStringUTFChars(jfield, 0);
bst_ulong len;
float *result;
int ret = XGDMatrixGetFloatInfo(handle, field, &len, (const float**) &result);
if (field) jenv->ReleaseStringUTFChars(jfield, field);
jsize jlen = (jsize) len;
jfloatArray jarray = jenv->NewFloatArray(jlen);
jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) result);
jenv->SetObjectArrayElement(jout, 0, (jobject) jarray);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixGetUIntInfo
* Signature: (JLjava/lang/String;)[I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixGetUIntInfo
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) {
DMatrixHandle handle = (DMatrixHandle) jhandle;
const char* field = jenv->GetStringUTFChars(jfield, 0);
bst_ulong len;
unsigned int *result;
int ret = (jint) XGDMatrixGetUIntInfo(handle, field, &len, (const unsigned int **) &result);
if (field) jenv->ReleaseStringUTFChars(jfield, field);
jsize jlen = (jsize) len;
jintArray jarray = jenv->NewIntArray(jlen);
jenv->SetIntArrayRegion(jarray, 0, jlen, (jint *) result);
jenv->SetObjectArrayElement(jout, 0, jarray);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixNumRow
* Signature: (J)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixNumRow
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlongArray jout) {
DMatrixHandle handle = (DMatrixHandle) jhandle;
bst_ulong result[1];
int ret = (jint) XGDMatrixNumRow(handle, result);
jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) result);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterCreate
* Signature: ([J)J
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterCreate
(JNIEnv *jenv, jclass jcls, jlongArray jhandles, jlongArray jout) {
DMatrixHandle* handles;
bst_ulong len = 0;
jlong* cjhandles = 0;
BoosterHandle result;
if(jhandles) {
len = (bst_ulong)jenv->GetArrayLength(jhandles);
handles = new DMatrixHandle[len];
//put handle from jhandles to chandles
cjhandles = jenv->GetLongArrayElements(jhandles, 0);
for(bst_ulong i=0; i<len; i++) {
handles[i] = (DMatrixHandle) cjhandles[i];
}
}
int ret = XGBoosterCreate(handles, len, &result);
//release
if(jhandles) {
delete[] handles;
jenv->ReleaseLongArrayElements(jhandles, cjhandles, 0);
}
setHandle(jenv, jout, result);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterFree
* Signature: (J)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterFree
(JNIEnv *jenv, jclass jcls, jlong jhandle) {
BoosterHandle handle = (BoosterHandle) jhandle;
return XGBoosterFree(handle);
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterSetParam
* Signature: (JLjava/lang/String;Ljava/lang/String;)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterSetParam
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jname, jstring jvalue) {
BoosterHandle handle = (BoosterHandle) jhandle;
const char* name = jenv->GetStringUTFChars(jname, 0);
const char* value = jenv->GetStringUTFChars(jvalue, 0);
int ret = XGBoosterSetParam(handle, name, value);
//release
if (name) jenv->ReleaseStringUTFChars(jname, name);
if (value) jenv->ReleaseStringUTFChars(jvalue, value);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterUpdateOneIter
* Signature: (JIJ)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterUpdateOneIter
(JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlong jdtrain) {
BoosterHandle handle = (BoosterHandle) jhandle;
DMatrixHandle dtrain = (DMatrixHandle) jdtrain;
return XGBoosterUpdateOneIter(handle, jiter, dtrain);
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterBoostOneIter
* Signature: (JJ[F[F)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterBoostOneIter
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdtrain, jfloatArray jgrad, jfloatArray jhess) {
BoosterHandle handle = (BoosterHandle) jhandle;
DMatrixHandle dtrain = (DMatrixHandle) jdtrain;
jfloat* grad = jenv->GetFloatArrayElements(jgrad, 0);
jfloat* hess = jenv->GetFloatArrayElements(jhess, 0);
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jgrad);
int ret = XGBoosterBoostOneIter(handle, dtrain, grad, hess, len);
//release
jenv->ReleaseFloatArrayElements(jgrad, grad, 0);
jenv->ReleaseFloatArrayElements(jhess, hess, 0);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterEvalOneIter
* Signature: (JI[J[Ljava/lang/String;)Ljava/lang/String;
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterEvalOneIter
(JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlongArray jdmats, jobjectArray jevnames, jobjectArray jout) {
BoosterHandle handle = (BoosterHandle) jhandle;
DMatrixHandle* dmats = 0;
char **evnames = 0;
char *result = 0;
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jdmats);
if(len > 0) {
dmats = new DMatrixHandle[len];
evnames = new char*[len];
}
//put handle from jhandles to chandles
jlong* cjdmats = jenv->GetLongArrayElements(jdmats, 0);
for(bst_ulong i=0; i<len; i++) {
dmats[i] = (DMatrixHandle) cjdmats[i];
}
//transfer jObjectArray to char**, user strcpy and release JNI char* inplace
for(bst_ulong i=0; i<len; i++) {
jstring jevname = (jstring)jenv->GetObjectArrayElement(jevnames, i);
const char* cevname = jenv->GetStringUTFChars(jevname, 0);
evnames[i] = new char[jenv->GetStringLength(jevname)];
strcpy(evnames[i], cevname);
jenv->ReleaseStringUTFChars(jevname, cevname);
}
int ret = XGBoosterEvalOneIter(handle, jiter, dmats, (char const *(*)) evnames, len, (const char **) &result);
if(len > 0) {
delete[] dmats;
//release string chars
for(bst_ulong i=0; i<len; i++) {
delete[] evnames[i];
}
delete[] evnames;
jenv->ReleaseLongArrayElements(jdmats, cjdmats, 0);
}
jstring jinfo = 0;
if (result) jinfo = jenv->NewStringUTF((const char *) result);
jenv->SetObjectArrayElement(jout, 0, jinfo);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterPredict
* Signature: (JJIJ)[F
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterPredict
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdmat, jint joption_mask, jint jntree_limit, jobjectArray jout) {
BoosterHandle handle = (BoosterHandle) jhandle;
DMatrixHandle dmat = (DMatrixHandle) jdmat;
bst_ulong len;
float *result;
int ret = XGBoosterPredict(handle, dmat, joption_mask, (unsigned int) jntree_limit, &len, (const float **) &result);
jsize jlen = (jsize) len;
jfloatArray jarray = jenv->NewFloatArray(jlen);
jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) result);
jenv->SetObjectArrayElement(jout, 0, jarray);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterLoadModel
* Signature: (JLjava/lang/String;)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterLoadModel
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) {
BoosterHandle handle = (BoosterHandle) jhandle;
const char* fname = jenv->GetStringUTFChars(jfname, 0);
int ret = XGBoosterLoadModel(handle, fname);
if (fname) jenv->ReleaseStringUTFChars(jfname,fname);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterSaveModel
* Signature: (JLjava/lang/String;)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterSaveModel
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) {
BoosterHandle handle = (BoosterHandle) jhandle;
const char* fname = jenv->GetStringUTFChars(jfname, 0);
int ret = XGBoosterSaveModel(handle, fname);
if (fname) jenv->ReleaseStringUTFChars(jfname, fname);
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterLoadModelFromBuffer
* Signature: (JJJ)V
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterLoadModelFromBuffer
(JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jbuf, jlong jlen) {
BoosterHandle handle = (BoosterHandle) jhandle;
void *buf = (void*) jbuf;
return XGBoosterLoadModelFromBuffer(handle, (void const *)buf, (bst_ulong) jlen);
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterGetModelRaw
* Signature: (J)Ljava/lang/String;
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterGetModelRaw
(JNIEnv * jenv, jclass jcls, jlong jhandle, jobjectArray jout) {
BoosterHandle handle = (BoosterHandle) jhandle;
bst_ulong len = 0;
char *result;
int ret = XGBoosterGetModelRaw(handle, &len, (const char **) &result);
if (result){
jstring jinfo = jenv->NewStringUTF((const char *) result);
jenv->SetObjectArrayElement(jout, 0, jinfo);
}
return ret;
}
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterDumpModel
* Signature: (JLjava/lang/String;I)[Ljava/lang/String;
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterDumpModel
(JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfmap, jint jwith_stats, jobjectArray jout) {
BoosterHandle handle = (BoosterHandle) jhandle;
const char *fmap = jenv->GetStringUTFChars(jfmap, 0);
bst_ulong len = 0;
char **result;
int ret = XGBoosterDumpModel(handle, fmap, jwith_stats, &len, (const char ***) &result);
jsize jlen = (jsize) len;
jobjectArray jinfos = jenv->NewObjectArray(jlen, jenv->FindClass("java/lang/String"), jenv->NewStringUTF(""));
for(int i=0 ; i<jlen; i++) {
jenv->SetObjectArrayElement(jinfos, i, jenv->NewStringUTF((const char*) result[i]));
}
jenv->SetObjectArrayElement(jout, 0, jinfos);
if (fmap) jenv->ReleaseStringUTFChars(jfmap, (const char *)fmap);
return ret;
}

View File

@@ -0,0 +1,221 @@
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class org_dmlc_xgboost4j_XgboostJNI */
#ifndef _Included_org_dmlc_xgboost4j_XgboostJNI
#define _Included_org_dmlc_xgboost4j_XgboostJNI
#ifdef __cplusplus
extern "C" {
#endif
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBGetLastError
* Signature: ()Ljava/lang/String;
*/
JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBGetLastError
(JNIEnv *, jclass);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixCreateFromFile
* Signature: (Ljava/lang/String;I[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixCreateFromFile
(JNIEnv *, jclass, jstring, jint, jlongArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixCreateFromCSR
* Signature: ([J[I[F[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixCreateFromCSR
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixCreateFromCSC
* Signature: ([J[I[F[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixCreateFromCSC
(JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixCreateFromMat
* Signature: ([FIIF[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixCreateFromMat
(JNIEnv *, jclass, jfloatArray, jint, jint, jfloat, jlongArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSliceDMatrix
* Signature: (J[I[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSliceDMatrix
(JNIEnv *, jclass, jlong, jintArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixFree
* Signature: (J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixFree
(JNIEnv *, jclass, jlong);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSaveBinary
* Signature: (JLjava/lang/String;I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSaveBinary
(JNIEnv *, jclass, jlong, jstring, jint);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSetFloatInfo
* Signature: (JLjava/lang/String;[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSetFloatInfo
(JNIEnv *, jclass, jlong, jstring, jfloatArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSetUIntInfo
* Signature: (JLjava/lang/String;[I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSetUIntInfo
(JNIEnv *, jclass, jlong, jstring, jintArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixSetGroup
* Signature: (J[I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixSetGroup
(JNIEnv *, jclass, jlong, jintArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixGetFloatInfo
* Signature: (JLjava/lang/String;[[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixGetFloatInfo
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixGetUIntInfo
* Signature: (JLjava/lang/String;[[I)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixGetUIntInfo
(JNIEnv *, jclass, jlong, jstring, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGDMatrixNumRow
* Signature: (J[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGDMatrixNumRow
(JNIEnv *, jclass, jlong, jlongArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterCreate
* Signature: ([J[J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterCreate
(JNIEnv *, jclass, jlongArray, jlongArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterFree
* Signature: (J)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterFree
(JNIEnv *, jclass, jlong);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterSetParam
* Signature: (JLjava/lang/String;Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterSetParam
(JNIEnv *, jclass, jlong, jstring, jstring);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterUpdateOneIter
* Signature: (JIJ)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterUpdateOneIter
(JNIEnv *, jclass, jlong, jint, jlong);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterBoostOneIter
* Signature: (JJ[F[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterBoostOneIter
(JNIEnv *, jclass, jlong, jlong, jfloatArray, jfloatArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterEvalOneIter
* Signature: (JI[J[Ljava/lang/String;[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterEvalOneIter
(JNIEnv *, jclass, jlong, jint, jlongArray, jobjectArray, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterPredict
* Signature: (JJIJ[[F)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterPredict
(JNIEnv *, jclass, jlong, jlong, jint, jint, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterLoadModel
* Signature: (JLjava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterLoadModel
(JNIEnv *, jclass, jlong, jstring);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterSaveModel
* Signature: (JLjava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterSaveModel
(JNIEnv *, jclass, jlong, jstring);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterLoadModelFromBuffer
* Signature: (JJJ)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterLoadModelFromBuffer
(JNIEnv *, jclass, jlong, jlong, jlong);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterGetModelRaw
* Signature: (J[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterGetModelRaw
(JNIEnv *, jclass, jlong, jobjectArray);
/*
* Class: org_dmlc_xgboost4j_XgboostJNI
* Method: XGBoosterDumpModel
* Signature: (JLjava/lang/String;I[[Ljava/lang/String;)I
*/
JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_XgboostJNI_XGBoosterDumpModel
(JNIEnv *, jclass, jlong, jstring, jint, jobjectArray);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,139 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.Test;
import java.util.*;
import java.util.Map.Entry;
/**
* test cases for Booster
*
* @author hzx
*/
public class BoosterImplTest {
public static class EvalError implements IEvaluation {
private static final Log logger = LogFactory.getLog(EvalError.class);
String evalMetric = "custom_error";
public EvalError() {
}
@Override
public String getMetric() {
return evalMetric;
}
@Override
public float eval(float[][] predicts, org.dmlc.xgboost4j.DMatrix dmat) {
float error = 0f;
float[] labels;
try {
labels = dmat.getLabel();
} catch (XGBoostError ex) {
logger.error(ex);
return -1f;
}
int nrow = predicts.length;
for (int i = 0; i < nrow; i++) {
if (labels[i] == 0f && predicts[i][0] > 0) {
error++;
} else if (labels[i] == 1f && predicts[i][0] <= 0) {
error++;
}
}
return error / labels.length;
}
}
@Test
public void testBoosterBasic() throws XGBoostError {
org.dmlc.xgboost4j.DMatrix trainMat = new org.dmlc.xgboost4j.DMatrix("../../demo/data/agaricus.txt.train");
org.dmlc.xgboost4j.DMatrix testMat = new org.dmlc.xgboost4j.DMatrix("../../demo/data/agaricus.txt.test");
//set params
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("eta", 1.0);
put("max_depth", 2);
put("silent", 1);
put("objective", "binary:logistic");
}
};
//set watchList
HashMap<String, org.dmlc.xgboost4j.DMatrix> watches = new HashMap<>();
watches.put("train", trainMat);
watches.put("test", testMat);
//set round
int round = 2;
//train a boost model
Booster booster = XGBoost.train(paramMap, trainMat, round, watches, null, null);
//predict raw output
float[][] predicts = booster.predict(testMat, true);
//eval
IEvaluation eval = new EvalError();
//error must be less than 0.1
TestCase.assertTrue(eval.eval(predicts, testMat) < 0.1f);
//test dump model
}
/**
* test cross valiation
*
* @throws XGBoostError
*/
@Test
public void testCV() throws XGBoostError {
//load train mat
org.dmlc.xgboost4j.DMatrix trainMat = new org.dmlc.xgboost4j.DMatrix("../../demo/data/agaricus.txt.train");
//set params
Map<String, Object> param = new HashMap<String, Object>() {
{
put("eta", 1.0);
put("max_depth", 3);
put("silent", 1);
put("nthread", 6);
put("objective", "binary:logistic");
put("gamma", 1.0);
put("eval_metric", "error");
}
};
//do 5-fold cross validation
int round = 2;
int nfold = 5;
//set additional eval_metrics
String[] metrics = null;
String[] evalHist = XGBoost.crossValiation(param, trainMat, round, nfold, metrics,
null, null);
}
}

View File

@@ -0,0 +1,104 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.dmlc.xgboost4j;
import junit.framework.TestCase;
import org.dmlc.xgboost4j.*;
import org.junit.Test;
import java.util.Arrays;
import java.util.Random;
/**
* test cases for DMatrix
*
* @author hzx
*/
public class DMatrixTest {
@Test
public void testCreateFromFile() throws XGBoostError {
//create DMatrix from file
org.dmlc.xgboost4j.DMatrix dmat = new org.dmlc.xgboost4j.DMatrix("../../demo/data/agaricus.txt.test");
//get label
float[] labels = dmat.getLabel();
//check length
TestCase.assertTrue(dmat.rowNum() == labels.length);
//set weights
float[] weights = Arrays.copyOf(labels, labels.length);
dmat.setWeight(weights);
float[] dweights = dmat.getWeight();
TestCase.assertTrue(Arrays.equals(weights, dweights));
}
@Test
public void testCreateFromCSR() throws XGBoostError {
//create Matrix from csr format sparse Matrix and labels
/**
* sparse matrix
* 1 0 2 3 0
* 4 0 2 3 5
* 3 1 2 5 0
*/
float[] data = new float[]{1, 2, 3, 4, 2, 3, 5, 3, 1, 2, 5};
int[] colIndex = new int[]{0, 2, 3, 0, 2, 3, 4, 0, 1, 2, 3};
long[] rowHeaders = new long[]{0, 3, 7, 11};
org.dmlc.xgboost4j.DMatrix dmat1 = new org.dmlc.xgboost4j.DMatrix(rowHeaders, colIndex, data, org.dmlc.xgboost4j.DMatrix.SparseType.CSR);
//check row num
System.out.println(dmat1.rowNum());
TestCase.assertTrue(dmat1.rowNum() == 3);
//test set label
float[] label1 = new float[]{1, 0, 1};
dmat1.setLabel(label1);
float[] label2 = dmat1.getLabel();
TestCase.assertTrue(Arrays.equals(label1, label2));
}
@Test
public void testCreateFromDenseMatrix() throws XGBoostError {
//create DMatrix from 10*5 dense matrix
int nrow = 10;
int ncol = 5;
float[] data0 = new float[nrow * ncol];
//put random nums
Random random = new Random();
for (int i = 0; i < nrow * ncol; i++) {
data0[i] = random.nextFloat();
}
//create label
float[] label0 = new float[nrow];
for (int i = 0; i < nrow; i++) {
label0[i] = random.nextFloat();
}
org.dmlc.xgboost4j.DMatrix dmat0 = new org.dmlc.xgboost4j.DMatrix(data0, nrow, ncol);
dmat0.setLabel(label0);
//check
TestCase.assertTrue(dmat0.rowNum() == 10);
TestCase.assertTrue(dmat0.getLabel().length == 10);
//set weights for each instance
float[] weights = new float[nrow];
for (int i = 0; i < nrow; i++) {
weights[i] = random.nextFloat();
}
dmat0.setWeight(weights);
TestCase.assertTrue(Arrays.equals(weights, dmat0.getWeight()));
}
}