diff --git a/NEWS.md b/NEWS.md index 0da657019..6fc6a37a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,8 @@ This file records the changes in xgboost library in reverse chronological order. - This could fix some of the previous problem which runs xgboost on multiple threads. * JVM Package - Enable xgboost4j for java and scala + - XGBoost distributed now runs on Flink and Spark. + ## v0.47 (2016.01.14) diff --git a/doc/jvm/index.md b/doc/jvm/index.md new file mode 100644 index 000000000..8b1f22d5e --- /dev/null +++ b/doc/jvm/index.md @@ -0,0 +1,20 @@ +XGBoost JVM Package +=================== +[![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost) +[![GitHub license](http://dmlc.github.io/img/apache2.svg)](../LICENSE) + +You have find XGBoost JVM Package! + +Installation +------------ +To build XGBoost4J contains two steps. +- First type the following command to build JNI library. +```bash +./create_jni.sh +``` +- Then package the libary. you can run `mvn package` in xgboost4j folder or just use IDE(eclipse/netbeans) to open this maven project and build. + +Contents +-------- +* [Java Overview Tutorial](java_intro.md) +* [Code Examples](https://github.com/dmlc/xgboost/tree/master/jvm-packages/xgboost4j-example) diff --git a/jvm-packages/doc/xgboost4j.md b/doc/jvm/java_intro.md similarity index 80% rename from jvm-packages/doc/xgboost4j.md rename to doc/jvm/java_intro.md index 201b3cc05..a944628fd 100644 --- a/jvm-packages/doc/xgboost4j.md +++ b/doc/jvm/java_intro.md @@ -1,23 +1,8 @@ -xgboost4j : java wrapper for xgboost -==== +XGBoost4J Java API +================== +This tutorial introduces -This page will introduce xgboost4j, the java wrapper for xgboost, including: -* [Building](#build-xgboost4j) -* [Data Interface](#data-interface) -* [Setting Parameters](#setting-parameters) -* [Train Model](#training-model) -* [Prediction](#prediction) - -= -#### Build xgboost4j -* Build native library -first make sure you have installed jdk and `JAVA_HOME` has been setted properly, then simply run `./create_wrap.sh`. - -* Package xgboost4j -to package xgboost4j, you can run `mvn package` in xgboost4j folder or just use IDE(eclipse/netbeans) to open this maven project and build. - -= -#### Data Interface +## Data Interface Like the xgboost python module, xgboost4j use ```DMatrix``` to handle data, libsvm txt format file, sparse matrix in CSR/CSC format, and dense matrix is supported. * To import ```DMatrix``` : @@ -30,11 +15,11 @@ import org.dmlc.xgboost4j.DMatrix; DMatrix dmat = new DMatrix("train.svm.txt"); ``` -* To load sparse matrix in CSR/CSC format is a little complicated, the usage is like : -suppose a sparse matrix : -1 0 2 0 -4 0 0 3 -3 1 2 0 +* To load sparse matrix in CSR/CSC format is a little complicated, the usage is like : +suppose a sparse matrix : +1 0 2 0 +4 0 0 3 +3 1 2 0 for CSR format ```java @@ -52,12 +37,12 @@ int[] rowIndex = new int[] {0,1,2,2,0,2,1}; DMatrix dmat = new DMatrix(colHeaders, rowIndex, data, DMatrix.SparseType.CSC); ``` -* To load 3*2 dense matrix, the usage is like : -suppose a matrix : -1 2 -3 4 -5 6 - +* To load 3*2 dense matrix, the usage is like : +suppose a matrix : +1 2 +3 4 +5 6 + ```java float[] data = new float[] {1f,2f,3f,4f,5f,6f}; int nrow = 3; @@ -72,7 +57,7 @@ float[] weights = new float[] {1f,2f,1f}; dmat.setWeight(weights); ``` -#### Setting Parameters +## Setting Parameters * in xgboost4j any ```Iterable>``` object could be used as parameters. * to set parameters, for non-multiple value params, you can simply use entrySet of an Map: @@ -100,7 +85,7 @@ List> params = new ArrayList>() { }; ``` -#### Training Model +## Training Model With parameters and data, you are able to train a booster model. * Import ```Trainer``` and ```Booster``` : ```java @@ -145,7 +130,7 @@ Params param = new Params() { Booster booster = new Booster(param, "model.bin"); ``` -####Prediction +## Prediction after training and loading a model, you use it to predict other data, the predict results will be a two-dimension float array (nsample, nclass) ,for predict leaf, it would be (nsample, nclass*ntrees) ```java DMatrix dtest = new DMatrix("test.svm.txt"); diff --git a/jvm-packages/README.md b/jvm-packages/README.md index f7f3fa707..8e2e51233 100644 --- a/jvm-packages/README.md +++ b/jvm-packages/README.md @@ -1,33 +1,73 @@ -# xgboost4j -this is a java wrapper for xgboost +# XGBoost4J: Distributed XGBoost for Scala/Java +[![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost) +[![GitHub license](http://dmlc.github.io/img/apache2.svg)](../LICENSE) -the structure of this wrapper is almost the same as the official python wrapper. +[Documentation](https://xgboost.readthedocs.org/en/latest/jvm/index.html) | +[Resources](../demo/README.md) | +[Release Notes](../NEWS.md) -core of this wrapper is two classes: +XGBoost4J is the JVM package of xgboost. It brings all the optimizations +and power xgboost into JVM ecosystem. -* DMatrix: for handling data +- Train XGBoost models on scala and java with easy customizations. +- Run distributed xgboost natively on jvm frameworks such as Flink and Spark. -* Booster: for train and predict +You can find more about XGBoost on [Documentation](https://xgboost.readthedocs.org/en/latest/jvm/index.html) and [Resource Page](../demo/README.md). -## usage: - please refer to [xgboost4j.md](doc/xgboost4j.md) for more information. +## Hello World +### XGBoost Scala +```scala +import ml.dmlc.xgboost4j.scala.DMatrix +import ml.dmlc.xgboost4j.scala.XGBoost - besides, simple examples could be found in [xgboost4j-demo](xgboost4j-demo/README.md) - +object XGBoostScalaExample { + def main(args: Array[String]) { + // read trainining data, available at xgboost/demo/data + val trainData = + new DMatrix("/path/to/agaricus.txt.train") + // define parameters + val paramMap = List( + "eta" -> 0.1, + "max_depth" -> 2, + "objective" -> "binary:logistic").toMap + // number of iterations + val round = 2 + // train the model + val model = XGBoost.train(paramMap, trainData, round) + // run prediction + val predTrain = model.predict(trainData) + // save model to the file. + model.saveModel("/local/path/to/model") + } +} +``` -## build native library +### XGBoost Flink +```scala +import ml.dmlc.xgboost4j.scala.flink.XGBoost +import org.apache.flink.api.scala._ +import org.apache.flink.api.scala.ExecutionEnvironment +import org.apache.flink.ml.MLUtils -for windows: open the xgboost.sln in "../windows" folder, you will found the xgboost4j project, you should do the following steps to build wrapper library: - * Select x64/win32 and Release in build - * (if you have setted `JAVA_HOME` properly in windows environment variables, escape this step) right click on xgboost4j project -> choose "Properties" -> click on "C/C++" in the window -> change the "Additional Include Directories" to fit your jdk install path. - * rebuild all - * double click "create_wrap.bat" to set library to proper place +object DistTrainWithFlink { + def main(args: Array[String]) { + val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment + // read trainining data + val trainData = + MLUtils.readLibSVM(env, "/path/to/data/agaricus.txt.train") + // define parameters + val paramMap = List( + "eta" -> 0.1, + "max_depth" -> 2, + "objective" -> "binary:logistic").toMap + // number of iterations + val round = 2 + // train the model + val model = XGBoost.train(paramMap, trainData, round) + val predTrain = model.predict(trainData.map{x => x.vector}) + model.saveModelToHadoop("file:///path/to/xgboost.model") + } +} +``` -for linux: - * make sure you have installed jdk and `JAVA_HOME` has been setted properly - * run "create_wrap.sh" - -for osx: - * make sure you have installed jdk - * for single thread xgboost, simply run "create_wrap.sh" - * for build with openMP, please refer to [build.md](../doc/build.md) to get openmp supported compiler first, and change the line "dis_omp=1" to "dis_omp=0" in "create_wrap.sh", then run "create_wrap.sh" \ No newline at end of file +### XGBoost Spark diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index db6bc8a98..dff6e0359 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -5,8 +5,8 @@ 4.0.0 ml.dmlc - xgboostjvm - 0.1 + xgboost-jvm + 0.5 pom UTF-8 @@ -19,7 +19,7 @@ xgboost4j - xgboost4j-demo + xgboost4j-example xgboost4j-spark xgboost4j-flink diff --git a/jvm-packages/xgboost4j-demo/README.md b/jvm-packages/xgboost4j-demo/README.md deleted file mode 100644 index c9cb35e4b..000000000 --- a/jvm-packages/xgboost4j-demo/README.md +++ /dev/null @@ -1,10 +0,0 @@ -xgboost4j examples -==== -* [Basic walkthrough of wrappers](src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java) -* [Cutomize loss function, and evaluation metric](src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java) -* [Boosting from existing prediction](src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java) -* [Predicting using first n trees](src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java) -* [Generalized Linear Model](src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java) -* [Cross validation](src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java) -* [Predicting leaf indices](src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java) -* [External Memory](src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java) diff --git a/jvm-packages/xgboost4j-demo/LICENSE b/jvm-packages/xgboost4j-example/LICENSE similarity index 100% rename from jvm-packages/xgboost4j-demo/LICENSE rename to jvm-packages/xgboost4j-example/LICENSE diff --git a/jvm-packages/xgboost4j-example/README.md b/jvm-packages/xgboost4j-example/README.md new file mode 100644 index 000000000..07b399242 --- /dev/null +++ b/jvm-packages/xgboost4j-example/README.md @@ -0,0 +1,18 @@ +XGBoost4J Code Examples +======================= + +## Java API +* [Basic walkthrough of wrappers](src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java) +* [Cutomize loss function, and evaluation metric](src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java) +* [Boosting from existing prediction](src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java) +* [Predicting using first n trees](src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java) +* [Generalized Linear Model](src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java) +* [Cross validation](src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java) +* [Predicting leaf indices](src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java) +* [External Memory](src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java) + +## Spark API +* [Distributed Training with Spark](src/main/scala/ml/dmlc/xgboost4j/scala/spark/example/DistTrainWithSpark.scala) + +## Flink API +* [Distributed Training with Flink](src/main/scala/ml/dmlc/xgboost4j/scala/flink/example/DistTrainWithFlink.scala) diff --git a/jvm-packages/xgboost4j-demo/pom.xml b/jvm-packages/xgboost4j-example/pom.xml similarity index 75% rename from jvm-packages/xgboost4j-demo/pom.xml rename to jvm-packages/xgboost4j-example/pom.xml index e076af63d..e4670dee9 100644 --- a/jvm-packages/xgboost4j-demo/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboostjvm - 0.1 + xgboost-jvm + 0.5 - xgboost4j-demo - 0.1 + xgboost4j-example + 0.5 jar @@ -26,7 +26,12 @@ ml.dmlc xgboost4j-spark - 0.1 + 0.5 + + + ml.dmlc + xgboost4j-flink + 0.5 org.apache.commons diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/BasicWalkThrough.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java similarity index 97% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/BasicWalkThrough.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java index d13bfbcd5..2fe829b41 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/BasicWalkThrough.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BasicWalkThrough.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo; +package ml.dmlc.xgboost4j.java.example; import java.io.File; import java.io.IOException; @@ -24,7 +24,7 @@ import ml.dmlc.xgboost4j.java.Booster; import ml.dmlc.xgboost4j.java.DMatrix; import ml.dmlc.xgboost4j.java.XGBoost; import ml.dmlc.xgboost4j.java.XGBoostError; -import ml.dmlc.xgboost4j.java.demo.util.DataLoader; +import ml.dmlc.xgboost4j.java.example.util.DataLoader; /** * a simple example of java wrapper for xgboost diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/BoostFromPrediction.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java similarity index 98% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/BoostFromPrediction.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java index 5076892cb..0649de2ae 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/BoostFromPrediction.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/BoostFromPrediction.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo; +package ml.dmlc.xgboost4j.java.example; import java.util.HashMap; diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/CrossValidation.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java similarity index 97% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/CrossValidation.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java index 8f467c8d3..dbaa4ff0f 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/CrossValidation.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo; +package ml.dmlc.xgboost4j.java.example; import java.io.IOException; import java.util.HashMap; diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/CustomObjective.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java similarity index 99% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/CustomObjective.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java index fecc5f687..5fc132fa6 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/CustomObjective.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/CustomObjective.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo; +package ml.dmlc.xgboost4j.java.example; import java.util.ArrayList; import java.util.HashMap; diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/ExternalMemory.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java similarity index 98% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/ExternalMemory.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java index e73eb058d..26434a1a4 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/ExternalMemory.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo; +package ml.dmlc.xgboost4j.java.example; import java.util.HashMap; diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/GeneralizedLinearModel.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java similarity index 96% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/GeneralizedLinearModel.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java index 4ec2fc1a2..37c4e756a 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/GeneralizedLinearModel.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo; +package ml.dmlc.xgboost4j.java.example; import java.util.HashMap; @@ -21,7 +21,7 @@ import ml.dmlc.xgboost4j.java.Booster; import ml.dmlc.xgboost4j.java.DMatrix; import ml.dmlc.xgboost4j.java.XGBoost; import ml.dmlc.xgboost4j.java.XGBoostError; -import ml.dmlc.xgboost4j.java.demo.util.CustomEval; +import ml.dmlc.xgboost4j.java.example.util.CustomEval; /** * this is an example of fit generalized linear model in xgboost diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/PredictFirstNtree.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java similarity index 95% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/PredictFirstNtree.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java index 67a965d64..9b3f3e27a 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/PredictFirstNtree.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictFirstNtree.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo; +package ml.dmlc.xgboost4j.java.example; import java.util.HashMap; @@ -21,7 +21,7 @@ import ml.dmlc.xgboost4j.java.Booster; import ml.dmlc.xgboost4j.java.DMatrix; import ml.dmlc.xgboost4j.java.XGBoost; import ml.dmlc.xgboost4j.java.XGBoostError; -import ml.dmlc.xgboost4j.java.demo.util.CustomEval; +import ml.dmlc.xgboost4j.java.example.util.CustomEval; /** * predict first ntree diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/PredictLeafIndices.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java similarity index 98% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/PredictLeafIndices.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java index 9c3f20b90..c063df368 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/PredictLeafIndices.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo; +package ml.dmlc.xgboost4j.java.example; import java.util.Arrays; import java.util.HashMap; diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/util/CustomEval.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/util/CustomEval.java similarity index 97% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/util/CustomEval.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/util/CustomEval.java index b698bf410..b34b97e87 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/util/CustomEval.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/util/CustomEval.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo.util; +package ml.dmlc.xgboost4j.java.example.util; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/util/DataLoader.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/util/DataLoader.java similarity index 98% rename from jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/util/DataLoader.java rename to jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/util/DataLoader.java index 5aa456fad..5f1ac6e3b 100644 --- a/jvm-packages/xgboost4j-demo/src/main/java/ml/dmlc/xgboost4j/java/demo/util/DataLoader.java +++ b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/util/DataLoader.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package ml.dmlc.xgboost4j.java.demo.util; +package ml.dmlc.xgboost4j.java.example.util; import java.io.*; import java.util.ArrayList; diff --git a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/flink/Test.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/flink/example/DistTrainWithFlink.scala similarity index 58% rename from jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/flink/Test.scala rename to jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/flink/example/DistTrainWithFlink.scala index 3637badf0..d3ad97428 100644 --- a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/flink/Test.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/flink/example/DistTrainWithFlink.scala @@ -13,33 +13,29 @@ See the License for the specific language governing permissions and limitations under the License. */ +package ml.dmlc.xgboost4j.scala.flink.example -package ml.dmlc.xgboost4j.flink - -import org.apache.commons.logging.Log -import org.apache.commons.logging.LogFactory -import org.apache.flink.api.common.functions.RichMapPartitionFunction +import ml.dmlc.xgboost4j.scala.flink.XGBoost import org.apache.flink.api.scala._ -import org.apache.flink.api.scala.DataSet import org.apache.flink.api.scala.ExecutionEnvironment -import org.apache.flink.ml.common.LabeledVector import org.apache.flink.ml.MLUtils -import org.apache.flink.util.Collector - - -object Test { - val log = LogFactory.getLog(this.getClass) +object DistTrainWithFlink { def main(args: Array[String]) { val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment - val data = MLUtils.readLibSVM(env, "/home/tqchen/github/xgboost/demo/data/agaricus.txt.train") - val paramMap = List("eta" -> "1", "max_depth" -> "2", "silent" -> "1", + // read trainining data + val trainData = + MLUtils.readLibSVM(env, "/path/to/data/agaricus.txt.train") + // define parameters + val paramMap = List( + "eta" -> 0.1, + "max_depth" -> 2, "objective" -> "binary:logistic").toMap + // number of iterations val round = 2 - val model = XGBoost.train(paramMap, data, round) - - - log.info(model) + // train the model + val model = XGBoost.train(paramMap, trainData, round) + val predTrain = model.predict(trainData.map{x => x.vector}) + model.saveModelToHadoop("file:///path/to/xgboost.model") } } - diff --git a/jvm-packages/xgboost4j-demo/src/main/scala/ml/dmlc/xgboost4j/scala/spark/demo/DistTrainWithSpark.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/spark/example/DistTrainWithSpark.scala similarity index 98% rename from jvm-packages/xgboost4j-demo/src/main/scala/ml/dmlc/xgboost4j/scala/spark/demo/DistTrainWithSpark.scala rename to jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/spark/example/DistTrainWithSpark.scala index 8fd794423..686948379 100644 --- a/jvm-packages/xgboost4j-demo/src/main/scala/ml/dmlc/xgboost4j/scala/spark/demo/DistTrainWithSpark.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/spark/example/DistTrainWithSpark.scala @@ -14,7 +14,7 @@ limitations under the License. */ -package ml.dmlc.xgboost4j.scala.spark.demo +package ml.dmlc.xgboost4j.scala.spark.example import java.io.File diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 7e6ed2b06..a52f993a5 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboostjvm - 0.1 + xgboost-jvm + 0.5 xgboost4j-flink - 0.1 + 0.5 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j - 0.1 + 0.5 org.apache.commons diff --git a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/flink/XGBoost.scala b/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoost.scala similarity index 94% rename from jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/flink/XGBoost.scala rename to jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoost.scala index 8f71cdd09..acbd6e656 100644 --- a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/flink/XGBoost.scala +++ b/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoost.scala @@ -14,7 +14,8 @@ limitations under the License. */ -package ml.dmlc.xgboost4j.flink +package ml.dmlc.xgboost4j.scala.flink + import scala.collection.JavaConverters.asScalaIteratorConverter; import ml.dmlc.xgboost4j.LabeledPoint import ml.dmlc.xgboost4j.java.{RabitTracker, Rabit} @@ -35,7 +36,7 @@ object XGBoost { * * @param workerEnvs */ - private class MapFunction(paramMap: Map[String, AnyRef], + private class MapFunction(paramMap: Map[String, Any], round: Int, workerEnvs: java.util.Map[String, String]) extends RichMapPartitionFunction[LabeledVector, XGBoostModel] { @@ -69,7 +70,7 @@ object XGBoost { * @param modelPath The path that is accessible by hadoop filesystem API. * @return The loaded model */ - def loadModel(modelPath: String) : XGBoostModel = { + def loadModelFromHadoop(modelPath: String) : XGBoostModel = { new XGBoostModel( XGBoostScala.loadModel( FileSystem @@ -84,7 +85,7 @@ object XGBoost { * @param dtrain The training data. * @param round Number of rounds to train. */ - def train(params: Map[String, AnyRef], + def train(params: Map[String, Any], dtrain: DataSet[LabeledVector], round: Int): XGBoostModel = { val tracker = new RabitTracker(dtrain.getExecutionEnvironment.getParallelism) diff --git a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/flink/XGBoostModel.scala b/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoostModel.scala similarity index 95% rename from jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/flink/XGBoostModel.scala rename to jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoostModel.scala index ce072fd10..6391e2a39 100644 --- a/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/flink/XGBoostModel.scala +++ b/jvm-packages/xgboost4j-flink/src/main/scala/ml/dmlc/xgboost4j/scala/flink/XGBoostModel.scala @@ -14,7 +14,7 @@ limitations under the License. */ -package ml.dmlc.xgboost4j.flink +package ml.dmlc.xgboost4j.scala.flink import ml.dmlc.xgboost4j.LabeledPoint import ml.dmlc.xgboost4j.scala.{DMatrix, Booster} @@ -31,7 +31,7 @@ class XGBoostModel (booster: Booster) extends Serializable { * * @param modelPath The model path as in Hadoop path. */ - def saveModel(modelPath: String): Unit = { + def saveModelToHadoop(modelPath: String): Unit = { booster.saveModel(FileSystem .get(new Configuration) .create(new Path(modelPath))) diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 4e40b83e4..a1f005f83 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -5,8 +5,8 @@ 4.0.0 ml.dmlc - xgboostjvm - 0.1 + xgboost-jvm + 0.5 xgboost4j-spark @@ -24,7 +24,7 @@ ml.dmlc xgboost4j - 0.1 + 0.5 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 6e1d733a4..f0e18a108 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboostjvm - 0.1 + xgboost-jvm + 0.5 xgboost4j - 0.1 + 0.5 jar diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala index 63627ed8d..6ed9cfb62 100644 --- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala +++ b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/scala/XGBoost.scala @@ -39,14 +39,20 @@ object XGBoost { */ @throws(classOf[XGBoostError]) def train( - params: Map[String, AnyRef], + params: Map[String, Any], dtrain: DMatrix, round: Int, watches: Map[String, DMatrix] = Map[String, DMatrix](), obj: ObjectiveTrait = null, eval: EvalTrait = null): Booster = { + + val jWatches = watches.map{case (name, matrix) => (name, matrix.jDMatrix)} - val xgboostInJava = JXGBoost.train(params.asJava, dtrain.jDMatrix, round, jWatches.asJava, + val xgboostInJava = JXGBoost.train( + params.map{ + case (key: String, value) => (key, value.toString) + }.toMap[String, AnyRef].asJava, + dtrain.jDMatrix, round, jWatches.asJava, obj, eval) new Booster(xgboostInJava) } @@ -65,14 +71,17 @@ object XGBoost { */ @throws(classOf[XGBoostError]) def crossValidation( - params: Map[String, AnyRef], + params: Map[String, Any], data: DMatrix, round: Int, nfold: Int = 5, metrics: Array[String] = null, obj: ObjectiveTrait = null, eval: EvalTrait = null): Array[String] = { - JXGBoost.crossValidation(params.asJava, data.jDMatrix, round, nfold, metrics, obj, eval) + JXGBoost.crossValidation(params.map{ + case (key: String, value) => (key, value.toString) + }.toMap[String, AnyRef].asJava, + data.jDMatrix, round, nfold, metrics, obj, eval) } /**