From a6abdccf01de0fb2d6c53403eca9b88df3f89719 Mon Sep 17 00:00:00 2001 From: Jeremy ATIA Date: Mon, 8 Jun 2015 23:31:12 +0200 Subject: [PATCH 01/59] Update understandingXGBoostModel.Rmd a typo for the dimension of the test set --- demo/kaggle-otto/understandingXGBoostModel.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/kaggle-otto/understandingXGBoostModel.Rmd b/demo/kaggle-otto/understandingXGBoostModel.Rmd index f0858e2da..6bd64401d 100644 --- a/demo/kaggle-otto/understandingXGBoostModel.Rmd +++ b/demo/kaggle-otto/understandingXGBoostModel.Rmd @@ -45,7 +45,7 @@ dim(train) train[1:6,1:5, with =F] # Test dataset dimensions -dim(train) +dim(test) # Test content test[1:6,1:5, with =F] @@ -228,4 +228,4 @@ There are 4 documents you may also be interested in: * [xgboostPresentation.Rmd](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd): general presentation * [discoverYourData.Rmd](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/discoverYourData.Rmd): explaining feature analysus * [Feature Importance Analysis with XGBoost in Tax audit](http://fr.slideshare.net/MichaelBENESTY/feature-importance-analysis-with-xgboost-in-tax-audit): use case -* [The Elements of Statistical Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/): very good book to have a good understanding of the model \ No newline at end of file +* [The Elements of Statistical Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/): very good book to have a good understanding of the model From f91a098770ce7339f40fa33b0868b5e6948f67ad Mon Sep 17 00:00:00 2001 From: yanqingmen Date: Tue, 9 Jun 2015 23:14:50 -0700 Subject: [PATCH 02/59] add java wrapper --- .gitignore | 8 + Makefile | 13 +- java/README.md | 28 + java/create_wrap.sh | 15 + java/doc/xgboost4j.md | 157 +++++ java/xgboost4j-demo/LICENSE | 15 + java/xgboost4j-demo/README.md | 10 + java/xgboost4j-demo/pom.xml | 36 + .../dmlc/xgboost4j/demo/BasicWalkThrough.java | 117 ++++ .../xgboost4j/demo/BoostFromPrediction.java | 61 ++ .../dmlc/xgboost4j/demo/CrossValidation.java | 53 ++ .../dmlc/xgboost4j/demo/CustomObjective.java | 154 +++++ .../dmlc/xgboost4j/demo/ExternalMemory.java | 59 ++ .../demo/GeneralizedLinearModel.java | 68 ++ .../xgboost4j/demo/PredictFirstNtree.java | 63 ++ .../xgboost4j/demo/PredictLeafIndices.java | 64 ++ .../dmlc/xgboost4j/demo/util/CustomEval.java | 50 ++ .../dmlc/xgboost4j/demo/util/DataLoader.java | 129 ++++ java/xgboost4j/LICENSE | 15 + java/xgboost4j/README.md | 23 + java/xgboost4j/pom.xml | 35 + .../main/java/org/dmlc/xgboost4j/Booster.java | 438 ++++++++++++ .../main/java/org/dmlc/xgboost4j/DMatrix.java | 217 ++++++ .../java/org/dmlc/xgboost4j/IEvaluation.java | 36 + .../java/org/dmlc/xgboost4j/IObjective.java | 32 + .../java/org/dmlc/xgboost4j/util/CVPack.java | 85 +++ .../org/dmlc/xgboost4j/util/Initializer.java | 92 +++ .../org/dmlc/xgboost4j/util/NativeUtils.java | 99 +++ .../java/org/dmlc/xgboost4j/util/Params.java | 54 ++ .../java/org/dmlc/xgboost4j/util/Trainer.java | 230 +++++++ .../org/dmlc/xgboost4j/util/TransferUtil.java | 55 ++ .../dmlc/xgboost4j/wrapper/XgboostJNI.java | 48 ++ .../src/main/resources/lib/README.md | 1 + java/xgboost4j_wrapper.cpp | 634 ++++++++++++++++++ java/xgboost4j_wrapper.h | 213 ++++++ windows/xgboost.sln | 10 + .../xgboostjavawrapper.vcxproj | 129 ++++ 37 files changed, 3545 insertions(+), 1 deletion(-) create mode 100644 java/README.md create mode 100755 java/create_wrap.sh create mode 100644 java/doc/xgboost4j.md create mode 100644 java/xgboost4j-demo/LICENSE create mode 100644 java/xgboost4j-demo/README.md create mode 100644 java/xgboost4j-demo/pom.xml create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java create mode 100644 java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/DataLoader.java create mode 100644 java/xgboost4j/LICENSE create mode 100644 java/xgboost4j/README.md create mode 100644 java/xgboost4j/pom.xml create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/IEvaluation.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/IObjective.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Initializer.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/NativeUtils.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Params.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/TransferUtil.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/wrapper/XgboostJNI.java create mode 100644 java/xgboost4j/src/main/resources/lib/README.md create mode 100644 java/xgboost4j_wrapper.cpp create mode 100644 java/xgboost4j_wrapper.h create mode 100644 windows/xgboostjavawrapper/xgboostjavawrapper.vcxproj diff --git a/.gitignore b/.gitignore index c38e16aed..44a215435 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,11 @@ R-package.Rproj *.cache* R-package/inst R-package/src +#java +java/xgboost4j/target +java/xgboost4j/tmp +java/xgboost4j-demo/target +java/xgboost4j-demo/data/ +java/xgboost4j-demo/tmp/ +java/xgboost4j-demo/model/ +nb-configuration* diff --git a/Makefile b/Makefile index e568222c2..74ea4cc63 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,8 @@ export CXX = g++ export MPICXX = mpicxx export LDFLAGS= -pthread -lm export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -funroll-loops +# java include path +export JAVAINCFLAGS = -I${JAVA_HOME}/include -I${JAVA_HOME}/include/linux -I./java ifeq ($(OS), Windows_NT) export CXX = g++ -m64 @@ -53,6 +55,9 @@ else SLIB = wrapper/libxgboostwrapper.so endif +# java lib +JLIB = java/libxgboostjavawrapper.so + # specify tensor path BIN = xgboost MOCKBIN = xgboost.mock @@ -79,6 +84,9 @@ main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner xgboost: updater.o gbm.o io.o main.o $(LIBRABIT) $(LIBDMLC) wrapper/xgboost_wrapper.dll wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC) +java: java/libxgboostjavawrapper.so +java/libxgboostjavawrapper.so: java/xgboost4j_wrapper.cpp wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC) + # dependency on rabit subtree/rabit/lib/librabit.a: subtree/rabit/src/engine.cc + cd subtree/rabit;make lib/librabit.a; cd ../.. @@ -98,6 +106,9 @@ $(MOCKBIN) : $(SLIB) : $(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(DLLFLAGS) +$(JLIB) : + $(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.so %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(JAVAINCFLAGS) + $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) @@ -105,7 +116,7 @@ $(MPIOBJ) : $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) $(MPIBIN) : - $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) + $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) install: cp -f -r $(BIN) $(INSTALL_PATH) diff --git a/java/README.md b/java/README.md new file mode 100644 index 000000000..161d594d8 --- /dev/null +++ b/java/README.md @@ -0,0 +1,28 @@ +# xgboost4j +this is a java wrapper for xgboost + +the structure of this wrapper is almost the same as the official python wrapper. + +core of this wrapper is two classes: + +* DMatrix: for handling data + +* Booster: for train and predict + +## usage: + please refer to [xgboost4j.md](doc/xgboost4j.md) for more information. + + besides, simple examples could be found in [xgboost4j-demo](xgboost4j-demo/README.md) + + +## build native library + +for windows: open the xgboost.sln in windows folder, you will found the xgboostjavawrapper project, you should do the following steps to build wrapper library: + * Select x64/win32 and Release in build + * (if you have setted `JAVA_HOME` properly in windows environment variables, escape this step) right click on xgboostjavawrapper project -> choose "Properties" -> click on "C/C++" in the window -> change the "Additional Include Directories" to fit your jdk install path. + * rebuild all + * move the dll "xgboostjavawrapper.dll" to "xgboost4j/src/main/resources/lib/"(you may need to create this folder if necessary.) + +for linux: + * make sure you have installed jdk and `JAVA_HOME` has been setted properly + * run "create_wrap.sh" diff --git a/java/create_wrap.sh b/java/create_wrap.sh new file mode 100755 index 000000000..08b3f6792 --- /dev/null +++ b/java/create_wrap.sh @@ -0,0 +1,15 @@ +echo "build java wrapper" +cd .. +make java +cd java +echo "move native lib" + +libPath="xgboost4j/src/main/resources/lib" +if [ ! -d "$libPath" ]; then + mkdir "$libPath" +fi + +rm -f xgboost4j/src/main/resources/lib/libxgboostjavawrapper.so +mv libxgboostjavawrapper.so xgboost4j/src/main/resources/lib/ + +echo "complete" diff --git a/java/doc/xgboost4j.md b/java/doc/xgboost4j.md new file mode 100644 index 000000000..f23ff509a --- /dev/null +++ b/java/doc/xgboost4j.md @@ -0,0 +1,157 @@ +xgboost4j : java wrapper for xgboost +==== + +This page will introduce xgboost4j, the java wrapper for xgboost, including: +* [Building](#build-xgboost4j) +* [Data Interface](#data-interface) +* [Setting Parameters](#setting-parameters) +* [Train Model](#training-model) +* [Prediction](#prediction) + += +#### Build xgboost4j +* Build native library +first make sure you have installed jdk and `JAVA_HOME` has been setted properly, then simply run `./create_wrap.sh`. + +* Package xgboost4j +to package xgboost4j, you can run `mvn package` in xgboost4j folder or just use IDE(eclipse/netbeans) to open this maven project and build. + += +#### Data Interface +Like the xgboost python module, xgboost4j use ```DMatrix``` to handle data, libsvm txt format file, sparse matrix in CSR/CSC format, and dense matrix is supported. + +* To import ```DMatrix``` : +```java +import org.dmlc.xgboost4j.DMatrix; +``` + +* To load libsvm text format file, the usage is like : +```java +DMatrix dmat = new DMatrix("train.svm.txt"); +``` + +* To load sparse matrix in CSR/CSC format is a little complicated, the usage is like : +suppose a sparse matrix : +1 0 2 0 +4 0 0 3 +3 1 2 0 + + for CSR format +```java +long[] rowHeaders = new long[] {0,2,4,7}; +float[] data = new float[] {1f,2f,4f,3f,3f,1f,2f}; +int[] colIndex = new int[] {0,2,0,3,0,1,2}; +DMatrix dmat = new DMatrix(rowHeaders, colIndex, data, DMatrix.SparseType.CSR); +``` + + for CSC format +```java +long[] colHeaders = new long[] {0,3,4,6,7}; +float[] data = new float[] {1f,4f,3f,1f,2f,2f,3f}; +int[] rowIndex = new int[] {0,1,2,2,0,2,1}; +DMatrix dmat = new DMatrix(colHeaders, rowIndex, data, DMatrix.SparseType.CSC); +``` + +* To load 3*2 dense matrix, the usage is like : +suppose a matrix : +1 2 +3 4 +5 6 + +```java +float[] data = new float[] {1f,2f,3f,4f,5f,6f}; +int nrow = 3; +int ncol = 2; +float missing = 0.0f; +DMatrix dmat = new Matrix(data, nrow, ncol, missing); +``` + +* To set weight : +```java +float[] weights = new float[] {1f,2f,1f}; +dmat.setWeight(weights); +``` + +#### Setting Parameters +* A util class ```Params``` in xgboost4j is used to handle parameters. +* To import ```Params``` : +```java +import org.dmlc.xgboost4j.util.Params; +``` +* to set parameters : +```java +Params params = new Params() { + { + put("eta", "1.0"); + put("max_depth", "2"); + put("silent", "1"); + put("objective", "binary:logistic"); + put("eval_metric", "logloss"); + } +}; +``` +* Multiple values with same param key is handled naturally in ```Params```, e.g. : +```java +Params params = new Params() { + { + put("eta", "1.0"); + put("max_depth", "2"); + put("silent", "1"); + put("objective", "binary:logistic"); + put("eval_metric", "logloss"); + put("eval_metric", "error"); + } +}; +``` + +#### Training Model +With parameters and data, you are able to train a booster model. +* Import ```Trainer``` and ```Booster``` : +```java +import org.dmlc.xgboost4j.Booster; +import org.dmlc.xgboost4j.util.Trainer; +``` + +* Training +```java +DMatrix trainMat = new DMatrix("train.svm.txt"); +DMatrix validMat = new DMatrix("valid.svm.txt"); +DMatrix[] evalMats = new DMatrix[] {trainMat, validMat}; +String[] evalNames = new String[] {"train", "valid"}; +int round = 2; +Booster booster = Trainer.train(params, trainMat, round, evalMats, evalNames, null, null); +``` + +* Saving model +After training, you can save model and dump it out. +```java +booster.saveModel("model.bin"); +``` + +* Dump Model and Feature Map +```java +booster.dumpModel("modelInfo.txt", false) +//dump with featureMap +booster.dumpModel("modelInfo.txt", "featureMap.txt", false) +``` + +* Load a model +```java +Params param = new Params() { + { + put("silent", "1"); + put("nthread", "6"); + } +}; +Booster booster = new Booster(param, "model.bin"); +``` + +####Prediction +after training and loading a model, you use it to predict other data, the predict results will be a two-dimension float array (nsample, nclass) ,for predict leaf, it would be (nsample, nclass*ntrees) +```java +DMatrix dtest = new DMatrix("test.svm.txt"); +//predict +float[][] predicts = booster.predict(dtest); +//predict leaf +float[][] leafPredicts = booster.predict(dtest, 0, true); +``` diff --git a/java/xgboost4j-demo/LICENSE b/java/xgboost4j-demo/LICENSE new file mode 100644 index 000000000..9a1673be2 --- /dev/null +++ b/java/xgboost4j-demo/LICENSE @@ -0,0 +1,15 @@ +/* +Copyright (c) 2014 by Contributors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ \ No newline at end of file diff --git a/java/xgboost4j-demo/README.md b/java/xgboost4j-demo/README.md new file mode 100644 index 000000000..c9cb35e4b --- /dev/null +++ b/java/xgboost4j-demo/README.md @@ -0,0 +1,10 @@ +xgboost4j examples +==== +* [Basic walkthrough of wrappers](src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java) +* [Cutomize loss function, and evaluation metric](src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java) +* [Boosting from existing prediction](src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java) +* [Predicting using first n trees](src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java) +* [Generalized Linear Model](src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java) +* [Cross validation](src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java) +* [Predicting leaf indices](src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java) +* [External Memory](src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java) diff --git a/java/xgboost4j-demo/pom.xml b/java/xgboost4j-demo/pom.xml new file mode 100644 index 000000000..28c51bc13 --- /dev/null +++ b/java/xgboost4j-demo/pom.xml @@ -0,0 +1,36 @@ + + + 4.0.0 + org.dmlc + xgboost4j-demo + 1.0 + jar + + UTF-8 + 1.7 + 1.7 + + + + org.dmlc + xgboost4j + 1.1 + + + commons-io + commons-io + 2.4 + + + org.apache.commons + commons-lang3 + 3.4 + + + junit + junit + 4.11 + test + + + \ No newline at end of file diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java new file mode 100644 index 000000000..778d05a4d --- /dev/null +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java @@ -0,0 +1,117 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.demo; + +import java.io.File; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import org.dmlc.xgboost4j.Booster; +import org.dmlc.xgboost4j.DMatrix; +import org.dmlc.xgboost4j.demo.util.DataLoader; +import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.util.Trainer; + +/** + * a simple example of java wrapper for xgboost + * @author hzx + */ +public class BasicWalkThrough { + public static boolean checkPredicts(float[][] fPredicts, float[][] sPredicts) { + if(fPredicts.length != sPredicts.length) { + return false; + } + + for(int i=0; i getGradient(float[][] predicts, DMatrix dtrain) { + int nrow = predicts.length; + List gradients = new ArrayList<>(); + float[] labels = dtrain.getLabel(); + float[] grad = new float[nrow]; + float[] hess = new float[nrow]; + + float[][] transPredicts = transform(predicts); + + for(int i=0; i0) { + error++; + } + else if(labels[i]==1f && predicts[i][0]<=0) { + error++; + } + } + + return error/labels.length; + } + } + + public static void main(String[] args) { + //load train mat (svmlight format) + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); + //load valid mat (svmlight format) + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + + //set params + //set params + Params param = new Params() { + { + put("eta", "1.0"); + put("max_depth", "2"); + put("silent", "1"); + } + }; + + //set round + int round = 2; + + //set evaluation data + DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; + String[] evalNames = new String[] {"train", "eval"}; + + //user define obj and eval + IObjective obj = new LogRegObj(); + IEvaluation eval = new EvalError(); + + //train a booster + System.out.println("begin to train the booster model"); + Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, obj, eval); + } +} diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java new file mode 100644 index 000000000..2912d43eb --- /dev/null +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java @@ -0,0 +1,59 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.demo; + +import org.dmlc.xgboost4j.Booster; +import org.dmlc.xgboost4j.DMatrix; +import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.util.Trainer; + +/** + * simple example for using external memory version + * @author hzx + */ +public class ExternalMemory { + public static void main(String[] args) { + //this is the only difference, add a # followed by a cache prefix name + //several cache file with the prefix will be generated + //currently only support convert from libsvm file + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train#dtrain.cache"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test#dtest.cache"); + + //specify parameters + Params param = new Params() { + { + put("eta", "1.0"); + put("max_depth", "2"); + put("silent", "1"); + put("objective", "binary:logistic"); + } + }; + + //performance notice: set nthread to be the number of your real cpu + //some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4 + //param.put("nthread", "num_real_cpu"); + + //specify evaluate datasets and evaluate names + DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; + String[] evalNames = new String[] {"train", "test"}; + + //set round + int round = 2; + + //train a boost model + Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, null, null); + } +} diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java new file mode 100644 index 000000000..6bdc02ab5 --- /dev/null +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java @@ -0,0 +1,68 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.demo; + +import org.dmlc.xgboost4j.Booster; +import org.dmlc.xgboost4j.DMatrix; +import org.dmlc.xgboost4j.demo.util.CustomEval; +import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.util.Trainer; + +/** + * this is an example of fit generalized linear model in xgboost + * basically, we are using linear model, instead of tree for our boosters + * @author hzx + */ +public class GeneralizedLinearModel { + public static void main(String[] args) { + // load file from text file, also binary buffer generated by xgboost4j + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + + //specify parameters + //change booster to gblinear, so that we are fitting a linear model + // alpha is the L1 regularizer + //lambda is the L2 regularizer + //you can also set lambda_bias which is L2 regularizer on the bias term + Params param = new Params() { + { + put("alpha", "0.0001"); + put("silent", "1"); + put("objective", "binary:logistic"); + put("booster", "gblinear"); + } + }; + //normally, you do not need to set eta (step_size) + //XGBoost uses a parallel coordinate descent algorithm (shotgun), + //there could be affection on convergence with parallelization on certain cases + //setting eta to be smaller value, e.g 0.5 can make the optimization more stable + //param.put("eta", "0.5"); + + + //specify evaluate datasets and evaluate names + DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; + String[] evalNames = new String[] {"train", "test"}; + + //train a booster + int round = 4; + Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, null, null); + + float[][] predicts = booster.predict(testMat); + + CustomEval eval = new CustomEval(); + System.out.println("error=" + eval.eval(predicts, testMat)); + } +} diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java new file mode 100644 index 000000000..51604e8ec --- /dev/null +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java @@ -0,0 +1,63 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.demo; + +import org.dmlc.xgboost4j.Booster; +import org.dmlc.xgboost4j.DMatrix; +import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.util.Trainer; + +import org.dmlc.xgboost4j.demo.util.CustomEval; + +/** + * predict first ntree + * @author hzx + */ +public class PredictFirstNtree { + public static void main(String[] args) { + // load file from text file, also binary buffer generated by xgboost4j + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + + //specify parameters + Params param = new Params() { + { + put("eta", "1.0"); + put("max_depth", "2"); + put("silent", "1"); + put("objective", "binary:logistic"); + } + }; + + //specify evaluate datasets and evaluate names + DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; + String[] evalNames = new String[] {"train", "test"}; + + //train a booster + int round = 3; + Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, null, null); + + //predict use 1 tree + float[][] predicts1 = booster.predict(testMat, false, 1); + //by default all trees are used to do predict + float[][] predicts2 = booster.predict(testMat); + + //use a simple evaluation class to check error result + CustomEval eval = new CustomEval(); + System.out.println("error of predicts1: " + eval.eval(predicts1, testMat)); + System.out.println("error of predicts2: " + eval.eval(predicts2, testMat)); + } +} diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java new file mode 100644 index 000000000..ced309b03 --- /dev/null +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java @@ -0,0 +1,64 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.demo; + +import java.util.Arrays; +import org.dmlc.xgboost4j.Booster; +import org.dmlc.xgboost4j.DMatrix; +import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.util.Trainer; + +/** + * predict leaf indices + * @author hzx + */ +public class PredictLeafIndices { + public static void main(String[] args) { + // load file from text file, also binary buffer generated by xgboost4j + DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); + DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + + //specify parameters + Params param = new Params() { + { + put("eta", "1.0"); + put("max_depth", "2"); + put("silent", "1"); + put("objective", "binary:logistic"); + } + }; + + //specify evaluate datasets and evaluate names + DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; + String[] evalNames = new String[] {"train", "test"}; + + //train a booster + int round = 3; + Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, null, null); + + //predict using first 2 tree + float[][] leafindex = booster.predict(testMat, 2, true); + for(float[] leafs : leafindex) { + System.out.println(Arrays.toString(leafs)); + } + + //predict all trees + leafindex = booster.predict(testMat, 0, true); + for(float[] leafs : leafindex) { + System.out.println(Arrays.toString(leafs)); + } + } +} diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java new file mode 100644 index 000000000..ad3a9124b --- /dev/null +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java @@ -0,0 +1,50 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.demo.util; + +import org.dmlc.xgboost4j.DMatrix; +import org.dmlc.xgboost4j.IEvaluation; + +/** + * a util evaluation class for examples + * @author hzx + */ +public class CustomEval implements IEvaluation { + + String evalMetric = "custom_error"; + + @Override + public String getMetric() { + return evalMetric; + } + + @Override + public float eval(float[][] predicts, DMatrix dmat) { + float error = 0f; + float[] labels = dmat.getLabel(); + int nrow = predicts.length; + for(int i=0; i0.5) { + error++; + } + else if(labels[i]==1f && predicts[i][0]<=0.5) { + error++; + } + } + + return error/labels.length; + } +} diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/DataLoader.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/DataLoader.java new file mode 100644 index 000000000..0a020c761 --- /dev/null +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/DataLoader.java @@ -0,0 +1,129 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.demo.util; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.lang3.ArrayUtils; + +/** + * util class for loading data + * @author hzx + */ +public class DataLoader { + public static class DenseData { + public float[] labels; + public float[] data; + public int nrow; + public int ncol; + } + + public static class CSRSparseData { + public float[] labels; + public float[] data; + public long[] rowHeaders; + public int[] colIndex; + } + + public static DenseData loadCSVFile(String filePath) throws FileNotFoundException, UnsupportedEncodingException, IOException { + DenseData denseData = new DenseData(); + + File f = new File(filePath); + FileInputStream in = new FileInputStream(f); + BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); + + denseData.nrow = 0; + denseData.ncol = -1; + String line; + List tlabels = new ArrayList<>(); + List tdata = new ArrayList<>(); + + while((line=reader.readLine()) != null) { + String[] items = line.trim().split(","); + if(items.length==0) { + continue; + } + denseData.nrow++; + if(denseData.ncol == -1) { + denseData.ncol = items.length - 1; + } + + tlabels.add(Float.valueOf(items[items.length-1])); + for(int i=0; i tlabels = new ArrayList<>(); + List tdata = new ArrayList<>(); + List theaders = new ArrayList<>(); + List tindex = new ArrayList<>(); + + File f = new File(filePath); + FileInputStream in = new FileInputStream(f); + BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); + + String line; + long rowheader = 0; + theaders.add(rowheader); + while((line=reader.readLine()) != null) { + String[] items = line.trim().split(" "); + if(items.length==0) { + continue; + } + + rowheader += items.length - 1; + theaders.add(rowheader); + tlabels.add(Float.valueOf(items[0])); + + for(int i=1; i + + 4.0.0 + org.dmlc + xgboost4j + 1.1 + jar + + UTF-8 + 1.7 + 1.7 + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.3 + + + + + + junit + junit + 4.11 + test + + + commons-logging + commons-logging + 1.2 + + + \ No newline at end of file diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java new file mode 100644 index 000000000..91a2bd40b --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java @@ -0,0 +1,438 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.dmlc.xgboost4j.util.Initializer; +import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.util.TransferUtil; +import org.dmlc.xgboost4j.wrapper.XgboostJNI; + + +/** + * Booster for xgboost, similar to the python wrapper xgboost.py + * but custom obj function and eval function not supported at present. + * @author hzx + */ +public final class Booster { + private static final Log logger = LogFactory.getLog(Booster.class); + + long handle = 0; + + //load native library + static { + try { + Initializer.InitXgboost(); + } catch (IOException ex) { + logger.error("load native library failed."); + logger.error(ex); + } + } + + /** + * init Booster from dMatrixs + * @param params parameters + * @param dMatrixs DMatrix array + */ + public Booster(Params params, DMatrix[] dMatrixs) { + init(dMatrixs); + setParam("seed","0"); + setParams(params); + } + + + + /** + * load model from modelPath + * @param params parameters + * @param modelPath booster modelPath (model generated by booster.saveModel) + */ + public Booster(Params params, String modelPath) { + handle = XgboostJNI.XGBoosterCreate(new long[] {}); + loadModel(modelPath); + setParam("seed","0"); + setParams(params); + } + + + + + private void init(DMatrix[] dMatrixs) { + long[] handles = null; + if(dMatrixs != null) { + handles = TransferUtil.dMatrixs2handles(dMatrixs); + } + handle = XgboostJNI.XGBoosterCreate(handles); + } + + /** + * set parameter + * @param key param name + * @param value param value + */ + public final void setParam(String key, String value) { + XgboostJNI.XGBoosterSetParam(handle, key, value); + } + + /** + * set parameters + * @param params parameters key-value map + */ + public void setParams(Params params) { + if(params!=null) { + for(Map.Entry entry : params) { + setParam(entry.getKey(), entry.getValue()); + } + } + } + + + /** + * Update (one iteration) + * @param dtrain training data + * @param iter current iteration number + */ + public void update(DMatrix dtrain, int iter) { + XgboostJNI.XGBoosterUpdateOneIter(handle, iter, dtrain.getHandle()); + } + + /** + * update with customize obj func + * @param dtrain training data + * @param iter current iteration number + * @param obj customized objective class + */ + public void update(DMatrix dtrain, int iter, IObjective obj) { + float[][] predicts = predict(dtrain, true); + List gradients = obj.getGradient(predicts, dtrain); + boost(dtrain, gradients.get(0), gradients.get(1)); + } + + /** + * update with give grad and hess + * @param dtrain training data + * @param grad first order of gradient + * @param hess seconde order of gradient + */ + public void boost(DMatrix dtrain, float[] grad, float[] hess) { + if(grad.length != hess.length) { + throw new AssertionError(String.format("grad/hess length mismatch %s / %s", grad.length, hess.length)); + } + XgboostJNI.XGBoosterBoostOneIter(handle, dtrain.getHandle(), grad, hess); + } + + /** + * evaluate with given dmatrixs. + * @param evalMatrixs dmatrixs for evaluation + * @param evalNames name for eval dmatrixs, used for check results + * @param iter current eval iteration + * @return eval information + */ + public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) { + long[] handles = TransferUtil.dMatrixs2handles(evalMatrixs); + String evalInfo = XgboostJNI.XGBoosterEvalOneIter(handle, iter, handles, evalNames); + return evalInfo; + } + + /** + * evaluate with given customized Evaluation class + * @param evalMatrixs + * @param evalNames + * @param iter + * @param eval + * @return eval information + */ + public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, IEvaluation eval) { + String evalInfo = ""; + for(int i=0; i getFeatureScore() { + String[] modelInfos = getDumpInfo(false); + Map featureScore = new HashMap<>(); + for(String tree : modelInfos) { + for(String node : tree.split("\n")) { + String[] array = node.split("\\["); + if(array.length == 1) { + continue; + } + String fid = array[1].split("\\]")[0]; + fid = fid.split("<")[0]; + if(featureScore.containsKey(fid)) { + featureScore.put(fid, 1 + featureScore.get(fid)); + } + else { + featureScore.put(fid, 1); + } + } + } + return featureScore; + } + + + /** + * get importance of each feature + * @param featureMap file to save dumped model info + * @return featureMap key: feature index, value: feature importance score + */ + public Map getFeatureScore(String featureMap) { + String[] modelInfos = getDumpInfo(featureMap, false); + Map featureScore = new HashMap<>(); + for(String tree : modelInfos) { + for(String node : tree.split("\n")) { + String[] array = node.split("\\["); + if(array.length == 1) { + continue; + } + String fid = array[1].split("\\]")[0]; + fid = fid.split("<")[0]; + if(featureScore.containsKey(fid)) { + featureScore.put(fid, 1 + featureScore.get(fid)); + } + else { + featureScore.put(fid, 1); + } + } + } + return featureScore; + } + + @Override + protected void finalize() { + delete(); + } + + public synchronized void delete() { + if(handle != 0l) { + XgboostJNI.XGBoosterFree(handle); + handle=0; + } + } +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java new file mode 100644 index 000000000..58f241f89 --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java @@ -0,0 +1,217 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dmlc.xgboost4j.util.Initializer; +import org.dmlc.xgboost4j.util.TransferUtil; +import org.dmlc.xgboost4j.wrapper.XgboostJNI; + +/** + * DMatrix for xgboost, similar to the python wrapper xgboost.py + * @author hzx + */ +public class DMatrix { + private static final Log logger = LogFactory.getLog(DMatrix.class); + long handle = 0; + + //load native library + static { + try { + Initializer.InitXgboost(); + } catch (IOException ex) { + logger.error("load native library failed."); + logger.error(ex); + } + } + + /** + * sparse matrix type (CSR or CSC) + */ + public static enum SparseType { + CSR, + CSC; + } + + /** + * init DMatrix from file (svmlight format) + * @param dataPath + */ + public DMatrix(String dataPath) { + handle = XgboostJNI.XGDMatrixCreateFromFile(dataPath, 1); + } + + /** + * create DMatrix from sparse matrix + * @param headers index to headers (rowHeaders for CSR or colHeaders for CSC) + * @param indices Indices (colIndexs for CSR or rowIndexs for CSC) + * @param data non zero values (sequence by row for CSR or by col for CSC) + * @param st sparse matrix type (CSR or CSC) + */ + public DMatrix(long[] headers, int[] indices, float[] data, SparseType st) { + if(st == SparseType.CSR) { + handle = XgboostJNI.XGDMatrixCreateFromCSR(headers, indices, data); + } + else if(st == SparseType.CSC) { + handle = XgboostJNI.XGDMatrixCreateFromCSC(headers, indices, data); + } + else { + throw new UnknownError("unknow sparsetype"); + } + } + + /** + * create DMatrix from dense matrix + * @param data data values + * @param nrow number of rows + * @param ncol number of columns + */ + public DMatrix(float[] data, int nrow, int ncol) { + handle = XgboostJNI.XGDMatrixCreateFromMat(data, nrow, ncol, 0.0f); + } + + /** + * used for DMatrix slice + * @param handle + */ + private DMatrix(long handle) { + this.handle = handle; + } + + + + /** + * set label of dmatrix + * @param labels + */ + public void setLabel(float[] labels) { + XgboostJNI.XGDMatrixSetFloatInfo(handle, "label", labels); + } + + /** + * set weight of each instance + * @param weights + */ + public void setWeight(float[] weights) { + XgboostJNI.XGDMatrixSetFloatInfo(handle, "weight", weights); + } + + /** + * if specified, xgboost will start from this init margin + * can be used to specify initial prediction to boost from + * @param baseMargin + */ + public void setBaseMargin(float[] baseMargin) { + XgboostJNI.XGDMatrixSetFloatInfo(handle, "base_margin", baseMargin); + } + + /** + * if specified, xgboost will start from this init margin + * can be used to specify initial prediction to boost from + * @param baseMargin + */ + public void setBaseMargin(float[][] baseMargin) { + float[] flattenMargin = TransferUtil.flatten(baseMargin); + setBaseMargin(flattenMargin); + } + + /** + * Set group sizes of DMatrix (used for ranking) + * @param group + */ + public void setGroup(int[] group) { + XgboostJNI.XGDMatrixSetGroup(handle, group); + } + + private float[] getFloatInfo(String field) { + float[] infos = XgboostJNI.XGDMatrixGetFloatInfo(handle, field); + return infos; + } + + private int[] getIntInfo(String field) { + int[] infos = XgboostJNI.XGDMatrixGetUIntInfo(handle, field); + return infos; + } + + /** + * get label values + * @return label + */ + public float[] getLabel() { + return getFloatInfo("label"); + } + + /** + * get weight of the DMatrix + * @return weights + */ + public float[] getWeight() { + return getFloatInfo("weight"); + } + + /** + * get base margin of the DMatrix + * @return base margin + */ + public float[] getBaseMargin() { + return getFloatInfo("base_margin"); + } + + /** + * Slice the DMatrix and return a new DMatrix that only contains `rowIndex`. + * @param rowIndex + * @return sliced new DMatrix + */ + public DMatrix slice(int[] rowIndex) { + long sHandle = XgboostJNI.XGDMatrixSliceDMatrix(handle, rowIndex); + DMatrix sMatrix = new DMatrix(sHandle); + return sMatrix; + } + + /** + * get the row number of DMatrix + * @return number of rows + */ + public long rowNum() { + return XgboostJNI.XGDMatrixNumRow(handle); + } + + /** + * save DMatrix to filePath + * @param filePath + */ + public void saveBinary(String filePath) { + XgboostJNI.XGDMatrixSaveBinary(handle, filePath, 1); + } + + public long getHandle() { + return handle; + } + + @Override + protected void finalize() { + delete(); + } + + public synchronized void delete() { + if(handle != 0) { + XgboostJNI.XGDMatrixFree(handle); + handle = 0; + } + } +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/IEvaluation.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/IEvaluation.java new file mode 100644 index 000000000..a02746dfa --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/IEvaluation.java @@ -0,0 +1,36 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j; + +/** + * interface for customized evaluation + * @author hzx + */ +public interface IEvaluation { + /** + * get evaluate metric + * @return evalMetric + */ + public abstract String getMetric(); + + /** + * evaluate with predicts and data + * @param predicts + * @param dmat + * @return + */ + public abstract float eval(float[][] predicts, DMatrix dmat); +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/IObjective.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/IObjective.java new file mode 100644 index 000000000..640f46e6d --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/IObjective.java @@ -0,0 +1,32 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j; + +import java.util.List; + +/** + * interface for customize Object function + * @author hzx + */ +public interface IObjective { + /** + * user define objective function, return gradient and second order gradient + * @param predicts untransformed margin predicts + * @param dtrain training data + * @return List with two float array, correspond to first order grad and second order grad + */ + public abstract List getGradient(float[][] predicts, DMatrix dtrain); +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java new file mode 100644 index 000000000..09bd7b3d8 --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java @@ -0,0 +1,85 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.util; + +import org.dmlc.xgboost4j.IEvaluation; +import org.dmlc.xgboost4j.Booster; +import org.dmlc.xgboost4j.DMatrix; +import org.dmlc.xgboost4j.IObjective; + +/** + * cross validation package for xgb + * @author hzx + */ +public class CVPack { + DMatrix dtrain; + DMatrix dtest; + DMatrix[] dmats; + long[] dataArray; + String[] names; + Booster booster; + + /** + * create an cross validation package + * @param dtrain train data + * @param dtest test data + * @param params parameters + */ + public CVPack(DMatrix dtrain, DMatrix dtest, Params params) { + dmats = new DMatrix[] {dtrain, dtest}; + booster = new Booster(params, dmats); + dataArray = TransferUtil.dMatrixs2handles(dmats); + names = new String[] {"train", "test"}; + this.dtrain = dtrain; + this.dtest = dtest; + } + + /** + * update one iteration + * @param iter iteration num + */ + public void update(int iter) { + booster.update(dtrain, iter); + } + + /** + * update one iteration + * @param iter iteration num + * @param obj customized objective + */ + public void update(int iter, IObjective obj) { + booster.update(dtrain, iter, obj); + } + + /** + * evaluation + * @param iter iteration num + * @return + */ + public String eval(int iter) { + return booster.evalSet(dataArray, names, iter); + } + + /** + * evaluation + * @param iter iteration num + * @param eval customized eval + * @return + */ + public String eval(int iter, IEvaluation eval) { + return booster.evalSet(dmats, names, iter, eval); + } +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Initializer.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Initializer.java new file mode 100644 index 000000000..b34a21109 --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Initializer.java @@ -0,0 +1,92 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.util; + +import java.io.IOException; +import java.lang.reflect.Field; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * class to load native library + * @author hzx + */ +public class Initializer { + private static final Log logger = LogFactory.getLog(Initializer.class); + + static boolean initialized = false; + public static final String nativePath = "./lib"; + public static final String nativeResourcePath = "/lib/"; + public static final String[] libNames = new String[] {"xgboostjavawrapper"}; + + public static synchronized void InitXgboost() throws IOException { + if(initialized == false) { + for(String libName: libNames) { + smartLoad(libName); + } + initialized = true; + } + } + + /** + * load native library, this method will first try to load library from java.library.path, then try to load from library in jar package. + * @param libName + * @throws IOException + */ + private static void smartLoad(String libName) throws IOException { + addNativeDir(nativePath); + try { + System.loadLibrary(libName); + } + catch (UnsatisfiedLinkError e) { + try { + NativeUtils.loadLibraryFromJar(nativeResourcePath + System.mapLibraryName(libName)); + } + catch (IOException e1) { + throw e1; + } + } + } + + /** + * add libPath to java.library.path, then native library in libPath would be load properly + * @param libPath + * @throws IOException + */ + public static void addNativeDir(String libPath) throws IOException { + try { + Field field = ClassLoader.class.getDeclaredField("usr_paths"); + field.setAccessible(true); + String[] paths = (String[]) field.get(null); + for (String path : paths) { + if (libPath.equals(path)) { + return; + } + } + String[] tmp = new String[paths.length+1]; + System.arraycopy(paths,0,tmp,0,paths.length); + tmp[paths.length] = libPath; + field.set(null, tmp); + } catch (IllegalAccessException e) { + logger.error(e.getMessage()); + throw new IOException("Failed to get permissions to set library path"); + } catch (NoSuchFieldException e) { + logger.error(e.getMessage()); + throw new IOException("Failed to get field handle to set library path"); + } + } +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/NativeUtils.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/NativeUtils.java new file mode 100644 index 000000000..65409603e --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/NativeUtils.java @@ -0,0 +1,99 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.dmlc.xgboost4j.util; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + + +/** + * Simple library class for working with JNI (Java Native Interface) + * + * @see http://adamheinrich.com/2012/how-to-load-native-jni-library-from-jar + * + * @author Adam Heirnich <adam@adamh.cz>, http://www.adamh.cz + */ +public class NativeUtils { + + /** + * Private constructor - this class will never be instanced + */ + private NativeUtils() { + } + + /** + * Loads library from current JAR archive + * + * The file from JAR is copied into system temporary directory and then loaded. The temporary file is deleted after exiting. + * Method uses String as filename because the pathname is "abstract", not system-dependent. + * + * @param path The filename inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext + * @throws IOException If temporary file creation or read/write operation fails + * @throws IllegalArgumentException If source file (param path) does not exist + * @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters (restriction of {@see File#createTempFile(java.lang.String, java.lang.String)}). + */ + public static void loadLibraryFromJar(String path) throws IOException { + + if (!path.startsWith("/")) { + throw new IllegalArgumentException("The path has to be absolute (start with '/')."); + } + + // Obtain filename from path + String[] parts = path.split("/"); + String filename = (parts.length > 1) ? parts[parts.length - 1] : null; + + // Split filename to prexif and suffix (extension) + String prefix = ""; + String suffix = null; + if (filename != null) { + parts = filename.split("\\.", 2); + prefix = parts[0]; + suffix = (parts.length > 1) ? "."+parts[parts.length - 1] : null; // Thanks, davs! :-) + } + + // Check if the filename is okay + if (filename == null || prefix.length() < 3) { + throw new IllegalArgumentException("The filename has to be at least 3 characters long."); + } + + // Prepare temporary file + File temp = File.createTempFile(prefix, suffix); + temp.deleteOnExit(); + + if (!temp.exists()) { + throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist."); + } + + // Prepare buffer for data copying + byte[] buffer = new byte[1024]; + int readBytes; + + // Open and check input stream + InputStream is = NativeUtils.class.getResourceAsStream(path); + if (is == null) { + throw new FileNotFoundException("File " + path + " was not found inside JAR."); + } + + // Open output stream and copy data between source file in JAR and the temporary file + OutputStream os = new FileOutputStream(temp); + try { + while ((readBytes = is.read(buffer)) != -1) { + os.write(buffer, 0, readBytes); + } + } finally { + // If read/write fails, close streams safely before throwing an exception + os.close(); + is.close(); + } + + // Finally, load the library + System.load(temp.getAbsolutePath()); + } +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Params.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Params.java new file mode 100644 index 000000000..42da77126 --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Params.java @@ -0,0 +1,54 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.util; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; +import java.util.AbstractMap; + + +/** + * a util class for handle params + * @author hzx + */ +public class Params implements Iterable>{ + List> params = new ArrayList<>(); + + /** + * put param key-value pair + * @param key + * @param value + */ + public void put(String key, String value) { + params.add(new AbstractMap.SimpleEntry<>(key, value)); + } + + @Override + public String toString(){ + String paramsInfo = ""; + for(Entry param : params) { + paramsInfo += param.getKey() + ":" + param.getValue() + "\n"; + } + return paramsInfo; + } + + @Override + public Iterator> iterator() { + return params.iterator(); + } +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java new file mode 100644 index 000000000..76f5f58bc --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java @@ -0,0 +1,230 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.util; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dmlc.xgboost4j.IEvaluation; +import org.dmlc.xgboost4j.Booster; +import org.dmlc.xgboost4j.DMatrix; +import org.dmlc.xgboost4j.IObjective; + + +/** + * trainer for xgboost + * @author hzx + */ +public class Trainer { + private static final Log logger = LogFactory.getLog(Trainer.class); + + /** + * Train a booster with given parameters. + * @param params Booster params. + * @param dtrain Data to be trained. + * @param round Number of boosting iterations. + * @param evalMats Data to be evaluated (may include dtrain) + * @param evalNames name of data (used for evaluation info) + * @param obj customized objective (set to null if not used) + * @param eval customized evaluation (set to null if not used) + * @return trained booster + */ + public static Booster train(Params params, DMatrix dtrain, int round, + DMatrix[] evalMats, String[] evalNames, IObjective obj, IEvaluation eval) { + //collect all data matrixs + DMatrix[] allMats; + if(evalMats!=null && evalMats.length>0) { + allMats = new DMatrix[evalMats.length+1]; + allMats[0] = dtrain; + System.arraycopy(evalMats, 0, allMats, 1, evalMats.length); + } + else { + allMats = new DMatrix[1]; + allMats[0] = dtrain; + } + + //initialize booster + Booster booster = new Booster(params, allMats); + + //used for evaluation + long[] dataArray = null; + String[] names = null; + + if(dataArray==null || names==null) { + //prepare data for evaluation + dataArray = TransferUtil.dMatrixs2handles(evalMats); + names = evalNames; + } + + //begin to train + for(int iter=0; iter0) { + String evalInfo; + if(eval != null) { + evalInfo = booster.evalSet(evalMats, evalNames, iter, eval); + } + else { + evalInfo = booster.evalSet(dataArray, names, iter); + } + logger.info(evalInfo); + } + } + return booster; + } + + /** + * Cross-validation with given paramaters. + * @param params Booster params. + * @param data Data to be trained. + * @param round Number of boosting iterations. + * @param nfold Number of folds in CV. + * @param metrics Evaluation metrics to be watched in CV. + * @param obj customized objective (set to null if not used) + * @param eval customized evaluation (set to null if not used) + * @return evaluation history + */ + public static String[] crossValiation(Params params, DMatrix data, int round, int nfold, String[] metrics, IObjective obj, IEvaluation eval) { + CVPack[] cvPacks = makeNFold(data, nfold, params, metrics); + String[] evalHist = new String[round]; + String[] results = new String[cvPacks.length]; + for(int i=0; i samples = genRandPermutationNums(0, (int) data.rowNum()); + int step = samples.size()/nfold; + int[] testSlice = new int[step]; + int[] trainSlice = new int[samples.size()-step]; + int testid, trainid; + CVPack[] cvPacks = new CVPack[nfold]; + for(int i=0; i(i*step) && j<(i*step+step) && testid genRandPermutationNums(int start, int end) { + List samples = new ArrayList<>(); + for(int i=start; i > cvMap = new HashMap<>(); + String aggResult = results[0].split("\t")[0]; + for(String result : results) { + String[] items = result.split("\t"); + for(int i=1; i()); + } + cvMap.get(key).add(value); + } + } + + for(String key : cvMap.keySet()) { + float value = 0f; + for(Float tvalue : cvMap.get(key)) { + value += tvalue; + } + value /= cvMap.get(key).size(); + aggResult += String.format("\tcv-%s:%f", key, value); + } + + return aggResult; + } +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/TransferUtil.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/TransferUtil.java new file mode 100644 index 000000000..99478ba95 --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/TransferUtil.java @@ -0,0 +1,55 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.util; + +import org.dmlc.xgboost4j.DMatrix; + +/** + * + * @author hzx + */ +public class TransferUtil { + /** + * transfer DMatrix array to handle array (used for native functions) + * @param dmatrixs + * @return handle array for input dmatrixs + */ + public static long[] dMatrixs2handles(DMatrix[] dmatrixs) { + long[] handles = new long[dmatrixs.length]; + for(int i=0; i +#include "../wrapper/xgboost_wrapper.h" +#include "xgboost4j_wrapper.h" + +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile + (JNIEnv *jenv, jclass jcls, jstring jfname, jint jsilent) { + jlong jresult = 0 ; + char *fname = (char *) 0 ; + int silent; + void *result = 0 ; + fname = 0; + if (jfname) { + fname = (char *)jenv->GetStringUTFChars(jfname, 0); + if (!fname) return 0; + } + silent = (int)jsilent; + result = (void *)XGDMatrixCreateFromFile((char const *)fname, silent); + *(void **)&jresult = result; + if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname); + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixCreateFromCSR + * Signature: ([J[J[F)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR + (JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata) { + jlong jresult = 0 ; + bst_ulong nindptr ; + bst_ulong nelem; + void *result = 0 ; + + jlong* indptr = jenv->GetLongArrayElements(jindptr, 0); + jint* indices = jenv->GetIntArrayElements(jindices, 0); + jfloat* data = jenv->GetFloatArrayElements(jdata, 0); + nindptr = (bst_ulong)jenv->GetArrayLength(jindptr); + nelem = (bst_ulong)jenv->GetArrayLength(jdata); + + result = (void *)XGDMatrixCreateFromCSR((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem); + *(void **)&jresult = result; + + //release + jenv->ReleaseLongArrayElements(jindptr, indptr, 0); + jenv->ReleaseIntArrayElements(jindices, indices, 0); + jenv->ReleaseFloatArrayElements(jdata, data, 0); + + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixCreateFromCSC + * Signature: ([J[J[F)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC + (JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata) { + jlong jresult = 0 ; + bst_ulong nindptr ; + bst_ulong nelem; + void *result = 0 ; + + jlong* indptr = jenv->GetLongArrayElements(jindptr, NULL); + jint* indices = jenv->GetIntArrayElements(jindices, 0); + jfloat* data = jenv->GetFloatArrayElements(jdata, NULL); + nindptr = (bst_ulong)jenv->GetArrayLength(jindptr); + nelem = (bst_ulong)jenv->GetArrayLength(jdata); + + result = (void *)XGDMatrixCreateFromCSC((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem); + *(void **)&jresult = result; + + //release + jenv->ReleaseLongArrayElements(jindptr, indptr, 0); + jenv->ReleaseIntArrayElements(jindices, indices, 0); + jenv->ReleaseFloatArrayElements(jdata, data, 0); + + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixCreateFromMat + * Signature: ([FIIF)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat + (JNIEnv *jenv, jclass jcls, jfloatArray jdata, jint jnrow, jint jncol, jfloat jmiss) { + jlong jresult = 0 ; + bst_ulong nrow ; + bst_ulong ncol ; + float miss ; + void *result = 0 ; + + + jfloat* data = jenv->GetFloatArrayElements(jdata, 0); + nrow = (bst_ulong)jnrow; + ncol = (bst_ulong)jncol; + miss = (float)jmiss; + result = (void *)XGDMatrixCreateFromMat((float const *)data, nrow, ncol, miss); + *(void **)&jresult = result; + + //release + jenv->ReleaseFloatArrayElements(jdata, data, 0); + + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSliceDMatrix + * Signature: (J[I)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix + (JNIEnv *jenv, jclass jcls, jlong jhandle, jintArray jindexset) { + jlong jresult = 0 ; + void *handle = (void *) 0 ; + bst_ulong len; + void *result = 0 ; + + jint* indexset = jenv->GetIntArrayElements(jindexset, 0); + handle = *(void **)&jhandle; + len = (bst_ulong)jenv->GetArrayLength(jindexset); + + result = (void *)XGDMatrixSliceDMatrix(handle, (int const *)indexset, len); + *(void **)&jresult = result; + + //release + jenv->ReleaseIntArrayElements(jindexset, indexset, 0); + + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixFree + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree + (JNIEnv *jenv, jclass jcls, jlong jhandle) { + void *handle = (void *) 0 ; + handle = *(void **)&jhandle; + XGDMatrixFree(handle); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSaveBinary + * Signature: (JLjava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname, jint jsilent) { + void *handle = (void *) 0 ; + char *fname = (char *) 0 ; + int silent ; + handle = *(void **)&jhandle; + fname = 0; + if (jfname) { + fname = (char *)jenv->GetStringUTFChars(jfname, 0); + if (!fname) return ; + } + silent = (int)jsilent; + XGDMatrixSaveBinary(handle, (char const *)fname, silent); + if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSetFloatInfo + * Signature: (JLjava/lang/String;[F)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jfloatArray jarray) { + void *handle = (void *) 0 ; + char *field = (char *) 0 ; + bst_ulong len; + + + handle = *(void **)&jhandle; + field = 0; + if (jfield) { + field = (char *)jenv->GetStringUTFChars(jfield, 0); + if (!field) return ; + } + + jfloat* array = jenv->GetFloatArrayElements(jarray, NULL); + len = (bst_ulong)jenv->GetArrayLength(jarray); + XGDMatrixSetFloatInfo(handle, (char const *)field, (float const *)array, len); + + //release + if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); + jenv->ReleaseFloatArrayElements(jarray, array, 0); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSetUIntInfo + * Signature: (JLjava/lang/String;[I)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jintArray jarray) { + void *handle = (void *) 0 ; + char *field = (char *) 0 ; + bst_ulong len ; + handle = *(void **)&jhandle; + field = 0; + if (jfield) { + field = (char *)jenv->GetStringUTFChars(jfield, 0); + if (!field) return ; + } + + jint* array = jenv->GetIntArrayElements(jarray, NULL); + len = (bst_ulong)jenv->GetArrayLength(jarray); + + XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len); + //release + if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); + jenv->ReleaseIntArrayElements(jarray, array, 0); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSetGroup + * Signature: (J[I)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup + (JNIEnv * jenv, jclass jcls, jlong jhandle, jintArray jarray) { + void *handle = (void *) 0 ; + bst_ulong len ; + + handle = *(void **)&jhandle; + jint* array = jenv->GetIntArrayElements(jarray, NULL); + len = (bst_ulong)jenv->GetArrayLength(jarray); + + XGDMatrixSetGroup(handle, (unsigned int const *)array, len); + + //release + jenv->ReleaseIntArrayElements(jarray, array, 0); + +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixGetFloatInfo + * Signature: (JLjava/lang/String;)[F + */ +JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield) { + void *handle = (void *) 0 ; + char *field = (char *) 0 ; + bst_ulong len[1]; + *len = 0; + float *result = 0 ; + + handle = *(void **)&jhandle; + field = 0; + if (jfield) { + field = (char *)jenv->GetStringUTFChars(jfield, 0); + if (!field) return 0; + } + + result = (float *)XGDMatrixGetFloatInfo((void const *)handle, (char const *)field, len); + + if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); + + jsize jlen = (jsize)*len; + jfloatArray jresult = jenv->NewFloatArray(jlen); + jenv->SetFloatArrayRegion(jresult, 0, jlen, (jfloat *)result); + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixGetUIntInfo + * Signature: (JLjava/lang/String;)[I + */ +JNIEXPORT jintArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield) { + void *handle = (void *) 0 ; + char *field = (char *) 0 ; + bst_ulong len[1]; + *len = 0; + unsigned int *result = 0 ; + + handle = *(void **)&jhandle; + field = 0; + if (jfield) { + field = (char *)jenv->GetStringUTFChars(jfield, 0); + if (!field) return 0; + } + + result = (unsigned int *)XGDMatrixGetUIntInfo((void const *)handle, (char const *)field, len); + + if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); + + jsize jlen = (jsize)*len; + jintArray jresult = jenv->NewIntArray(jlen); + jenv->SetIntArrayRegion(jresult, 0, jlen, (jint *)result); + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixNumRow + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow + (JNIEnv *jenv, jclass jcls, jlong jhandle) { + jlong jresult = 0 ; + void *handle = (void *) 0 ; + bst_ulong result; + handle = *(void **)&jhandle; + result = (bst_ulong)XGDMatrixNumRow((void const *)handle); + jresult = (jlong)result; + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterCreate + * Signature: ([J)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate + (JNIEnv *jenv, jclass jcls, jlongArray jhandles) { + jlong jresult = 0 ; + void **handles = 0; + bst_ulong len = 0; + void *result = 0 ; + jlong* cjhandles = 0; + + if(jhandles) { + len = (bst_ulong)jenv->GetArrayLength(jhandles); + handles = new void*[len]; + //put handle from jhandles to chandles + cjhandles = jenv->GetLongArrayElements(jhandles, 0); + for(bst_ulong i=0; iReleaseLongArrayElements(jhandles, cjhandles, 0); + } + + *(void **)&jresult = result; + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterFree + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree + (JNIEnv *jenv, jclass jcls, jlong jhandle) { + void *handle = (void *) 0 ; + handle = *(void **)&jhandle; + XGBoosterFree(handle); +} + + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterSetParam + * Signature: (JLjava/lang/String;Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jname, jstring jvalue) { + void *handle = (void *) 0 ; + char *name = (char *) 0 ; + char *value = (char *) 0 ; + handle = *(void **)&jhandle; + + name = 0; + if (jname) { + name = (char *)jenv->GetStringUTFChars(jname, 0); + if (!name) return ; + } + + value = 0; + if (jvalue) { + value = (char *)jenv->GetStringUTFChars(jvalue, 0); + if (!value) return ; + } + XGBoosterSetParam(handle, (char const *)name, (char const *)value); + if (name) jenv->ReleaseStringUTFChars(jname, (const char *)name); + if (value) jenv->ReleaseStringUTFChars(jvalue, (const char *)value); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterUpdateOneIter + * Signature: (JIJ)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter + (JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlong jdtrain) { + void *handle = (void *) 0 ; + int iter ; + void *dtrain = (void *) 0 ; + handle = *(void **)&jhandle; + iter = (int)jiter; + dtrain = *(void **)&jdtrain; + XGBoosterUpdateOneIter(handle, iter, dtrain); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterBoostOneIter + * Signature: (JJ[F[F)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter + (JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdtrain, jfloatArray jgrad, jfloatArray jhess) { + void *handle = (void *) 0 ; + void *dtrain = (void *) 0 ; + bst_ulong len ; + + handle = *(void **)&jhandle; + dtrain = *(void **)&jdtrain; + jfloat* grad = jenv->GetFloatArrayElements(jgrad, 0); + jfloat* hess = jenv->GetFloatArrayElements(jhess, 0); + len = (bst_ulong)jenv->GetArrayLength(jgrad); + XGBoosterBoostOneIter(handle, dtrain, grad, hess, len); + + //release + jenv->ReleaseFloatArrayElements(jgrad, grad, 0); + jenv->ReleaseFloatArrayElements(jhess, hess, 0); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterEvalOneIter + * Signature: (JI[J[Ljava/lang/String;)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter + (JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlongArray jdmats, jobjectArray jevnames) { + jstring jresult = 0 ; + void *handle = (void *) 0 ; + int iter ; + void **dmats = 0; + char **evnames = 0; + bst_ulong len ; + char *result = 0 ; + + handle = *(void **)&jhandle; + iter = (int)jiter; + len = (bst_ulong)jenv->GetArrayLength(jdmats); + + + if(len > 0) { + dmats = new void*[len]; + evnames = new char*[len]; + } + + //put handle from jhandles to chandles + jlong* cjdmats = jenv->GetLongArrayElements(jdmats, 0); + for(bst_ulong i=0; iGetObjectArrayElement(jevnames, i); + evnames[i] = (char *)jenv->GetStringUTFChars(jevname, 0); + } + + result = (char *)XGBoosterEvalOneIter(handle, iter, dmats, (char const *(*))evnames, len); + + if(len > 0) { + delete[] dmats; + //release string chars + for(bst_ulong i=0; iGetObjectArrayElement(jevnames, i); + jenv->ReleaseStringUTFChars(jevname, (const char*)evnames[i]); + } + delete[] evnames; + jenv->ReleaseLongArrayElements(jdmats, cjdmats, 0); + } + + if (result) jresult = jenv->NewStringUTF((const char *)result); + + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterPredict + * Signature: (JJIJ)[F + */ +JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict + (JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdmat, jint joption_mask, jlong jntree_limit) { + void *handle = (void *) 0 ; + void *dmat = (void *) 0 ; + int option_mask ; + unsigned int ntree_limit ; + bst_ulong len[1]; + *len = 0; + float *result = 0 ; + + handle = *(void **)&jhandle; + dmat = *(void **)&jdmat; + option_mask = (int)joption_mask; + ntree_limit = (unsigned int)jntree_limit; + + result = (float *)XGBoosterPredict(handle, dmat, option_mask, ntree_limit, len); + + jsize jlen = (jsize)*len; + jfloatArray jresult = jenv->NewFloatArray(jlen); + jenv->SetFloatArrayRegion(jresult, 0, jlen, (jfloat *)result); + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterLoadModel + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) { + void *handle = (void *) 0 ; + char *fname = (char *) 0 ; + handle = *(void **)&jhandle; + fname = 0; + if (jfname) { + fname = (char *)jenv->GetStringUTFChars(jfname, 0); + if (!fname) return ; + } + XGBoosterLoadModel(handle,(char const *)fname); + if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterSaveModel + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) { + void *handle = (void *) 0 ; + char *fname = (char *) 0 ; + handle = *(void **)&jhandle; + fname = 0; + if (jfname) { + fname = (char *)jenv->GetStringUTFChars(jfname, 0); + if (!fname) return ; + } + XGBoosterSaveModel(handle, (char const *)fname); + if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterLoadModelFromBuffer + * Signature: (JJJ)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer + (JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jbuf, jlong jlen) { + void *handle = (void *) 0 ; + void *buf = (void *) 0 ; + bst_ulong len ; + handle = *(void **)&jhandle; + buf = *(void **)&jbuf; + len = (bst_ulong)jlen; + XGBoosterLoadModelFromBuffer(handle, (void const *)buf, len); +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterGetModelRaw + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw + (JNIEnv * jenv, jclass jcls, jlong jhandle) { + jstring jresult = 0 ; + void *handle = (void *) 0 ; + bst_ulong len[1]; + *len = 0; + char *result = 0 ; + handle = *(void **)&jhandle; + + result = (char *)XGBoosterGetModelRaw(handle, len); + if (result) jresult = jenv->NewStringUTF((const char *)result); + return jresult; +} + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterDumpModel + * Signature: (JLjava/lang/String;I)[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfmap, jint jwith_stats) { + void *handle = (void *) 0 ; + char *fmap = (char *) 0 ; + int with_stats ; + bst_ulong len[1]; + *len = 0; + + char **result = 0 ; + handle = *(void **)&jhandle; + fmap = 0; + if (jfmap) { + fmap = (char *)jenv->GetStringUTFChars(jfmap, 0); + if (!fmap) return 0; + } + with_stats = (int)jwith_stats; + + result = (char **)XGBoosterDumpModel(handle, (char const *)fmap, with_stats, len); + + jsize jlen = (jsize)*len; + jobjectArray jresult = jenv->NewObjectArray(jlen, jenv->FindClass("java/lang/String"), jenv->NewStringUTF("")); + for(int i=0 ; iSetObjectArrayElement(jresult, i, jenv->NewStringUTF((const char*)result[i])); + } + + if (fmap) jenv->ReleaseStringUTFChars(jfmap, (const char *)fmap); + return jresult; +} \ No newline at end of file diff --git a/java/xgboost4j_wrapper.h b/java/xgboost4j_wrapper.h new file mode 100644 index 000000000..d13b86f8c --- /dev/null +++ b/java/xgboost4j_wrapper.h @@ -0,0 +1,213 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_dmlc_xgboost4j_wrapper_XgboostJNI */ + +#ifndef _Included_org_dmlc_xgboost4j_wrapper_XgboostJNI +#define _Included_org_dmlc_xgboost4j_wrapper_XgboostJNI +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixCreateFromFile + * Signature: (Ljava/lang/String;I)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile + (JNIEnv *, jclass, jstring, jint); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixCreateFromCSR + * Signature: ([J[J[F)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR + (JNIEnv *, jclass, jlongArray, jintArray, jfloatArray); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixCreateFromCSC + * Signature: ([J[J[F)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC + (JNIEnv *, jclass, jlongArray, jintArray, jfloatArray); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixCreateFromMat + * Signature: ([FIIF)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat + (JNIEnv *, jclass, jfloatArray, jint, jint, jfloat); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSliceDMatrix + * Signature: (J[I)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix + (JNIEnv *, jclass, jlong, jintArray); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixFree + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree + (JNIEnv *, jclass, jlong); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSaveBinary + * Signature: (JLjava/lang/String;I)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary + (JNIEnv *, jclass, jlong, jstring, jint); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSetFloatInfo + * Signature: (JLjava/lang/String;[F)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo + (JNIEnv *, jclass, jlong, jstring, jfloatArray); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSetUIntInfo + * Signature: (JLjava/lang/String;[I)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo + (JNIEnv *, jclass, jlong, jstring, jintArray); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixSetGroup + * Signature: (J[I)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup + (JNIEnv *, jclass, jlong, jintArray); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixGetFloatInfo + * Signature: (JLjava/lang/String;)[F + */ +JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixGetUIntInfo + * Signature: (JLjava/lang/String;)[I + */ +JNIEXPORT jintArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixNumRow + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow + (JNIEnv *, jclass, jlong); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterCreate + * Signature: ([J)J + */ +JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate + (JNIEnv *, jclass, jlongArray); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterFree + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree + (JNIEnv *, jclass, jlong); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterSetParam + * Signature: (JLjava/lang/String;Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam + (JNIEnv *, jclass, jlong, jstring, jstring); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterUpdateOneIter + * Signature: (JIJ)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter + (JNIEnv *, jclass, jlong, jint, jlong); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterBoostOneIter + * Signature: (JJ[F[F)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter + (JNIEnv *, jclass, jlong, jlong, jfloatArray, jfloatArray); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterEvalOneIter + * Signature: (JI[J[Ljava/lang/String;)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter + (JNIEnv *, jclass, jlong, jint, jlongArray, jobjectArray); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterPredict + * Signature: (JJIJ)[F + */ +JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict + (JNIEnv *, jclass, jlong, jlong, jint, jlong); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterLoadModel + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterSaveModel + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterLoadModelFromBuffer + * Signature: (JJJ)V + */ +JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer + (JNIEnv *, jclass, jlong, jlong, jlong); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterGetModelRaw + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw + (JNIEnv *, jclass, jlong); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGBoosterDumpModel + * Signature: (JLjava/lang/String;I)[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel + (JNIEnv *, jclass, jlong, jstring, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/windows/xgboost.sln b/windows/xgboost.sln index f2b08a456..7bd8db5b2 100644 --- a/windows/xgboost.sln +++ b/windows/xgboost.sln @@ -10,6 +10,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xgboost_wrapper", "xgboost_ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rabit", "..\subtree\rabit\windows\rabit\rabit.vcxproj", "{D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xgboostjavawrapper", "xgboostjavawrapper\xgboostjavawrapper.vcxproj", "{20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -41,6 +43,14 @@ Global {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Release|Win32.Build.0 = Release|Win32 {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Release|x64.ActiveCfg = Release|x64 {D7B77D06-4F5F-4BD7-B81E-7CC8EBBE684F}.Release|x64.Build.0 = Release|x64 + {20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA}.Debug|Win32.ActiveCfg = Debug|Win32 + {20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA}.Debug|Win32.Build.0 = Debug|Win32 + {20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA}.Debug|x64.ActiveCfg = Debug|x64 + {20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA}.Debug|x64.Build.0 = Debug|x64 + {20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA}.Release|Win32.ActiveCfg = Release|Win32 + {20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA}.Release|Win32.Build.0 = Release|Win32 + {20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA}.Release|x64.ActiveCfg = Release|x64 + {20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/windows/xgboostjavawrapper/xgboostjavawrapper.vcxproj b/windows/xgboostjavawrapper/xgboostjavawrapper.vcxproj new file mode 100644 index 000000000..e55dfff71 --- /dev/null +++ b/windows/xgboostjavawrapper/xgboostjavawrapper.vcxproj @@ -0,0 +1,129 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + {20A0E4D7-20C7-4EC1-BDF6-0D469CE239AA} + xgboost_wrapper + + + + DynamicLibrary + true + MultiByte + + + DynamicLibrary + true + MultiByte + + + DynamicLibrary + false + true + MultiByte + + + DynamicLibrary + false + true + MultiByte + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + + + + Level3 + Disabled + + + true + + + + + Level3 + Disabled + + + true + + + + + Level3 + MaxSpeed + true + true + true + $(JAVA_HOME)\include;$(JAVA_HOME)\include\win32;%(AdditionalIncludeDirectories) + + + true + true + true + + + + + Level3 + MaxSpeed + true + true + true + MultiThreaded + $(JAVA_HOME)\include\win32;$(JAVA_HOME)\include;%(AdditionalIncludeDirectories) + + + true + true + true + ws2_32.lib;%(AdditionalDependencies) + + + + + + \ No newline at end of file From 1e03be4e08172cf096e72b950ae65561052195b4 Mon Sep 17 00:00:00 2001 From: yanqingmen Date: Tue, 9 Jun 2015 23:30:00 -0700 Subject: [PATCH 03/59] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 74ea4cc63..360d55e84 100644 --- a/Makefile +++ b/Makefile @@ -107,7 +107,7 @@ $(SLIB) : $(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(DLLFLAGS) $(JLIB) : - $(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.so %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(JAVAINCFLAGS) + $(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(JAVAINCFLAGS) $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) From c110111f521ae3b95096e4b73cf3233a9e21de1b Mon Sep 17 00:00:00 2001 From: yanqingmen Date: Wed, 10 Jun 2015 20:09:49 -0700 Subject: [PATCH 04/59] make some fix --- java/README.md | 4 +- java/create_wrap.bat | 20 +++++++ java/create_wrap.sh | 2 +- java/doc/xgboost4j.md | 25 +++++---- .../dmlc/xgboost4j/demo/BasicWalkThrough.java | 23 +++++--- .../xgboost4j/demo/BoostFromPrediction.java | 18 +++--- .../dmlc/xgboost4j/demo/CrossValidation.java | 10 ++-- .../dmlc/xgboost4j/demo/CustomObjective.java | 17 +++--- .../dmlc/xgboost4j/demo/ExternalMemory.java | 18 +++--- .../demo/GeneralizedLinearModel.java | 14 +++-- .../xgboost4j/demo/PredictFirstNtree.java | 16 +++--- .../xgboost4j/demo/PredictLeafIndices.java | 16 +++--- .../main/java/org/dmlc/xgboost4j/Booster.java | 22 ++++++-- .../main/java/org/dmlc/xgboost4j/DMatrix.java | 21 ++++++- .../java/org/dmlc/xgboost4j/util/CVPack.java | 4 +- .../org/dmlc/xgboost4j/util/Initializer.java | 2 +- .../org/dmlc/xgboost4j/util/NativeUtils.java | 16 +++++- .../java/org/dmlc/xgboost4j/util/Params.java | 10 ++-- .../java/org/dmlc/xgboost4j/util/Trainer.java | 33 ++++++----- .../org/dmlc/xgboost4j/util/TransferUtil.java | 55 ------------------- .../org/dmlc/xgboost4j/util/WatchList.java | 49 +++++++++++++++++ .../src/main/resources/lib/README.md | 1 - 22 files changed, 234 insertions(+), 162 deletions(-) create mode 100644 java/create_wrap.bat delete mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/TransferUtil.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/WatchList.java delete mode 100644 java/xgboost4j/src/main/resources/lib/README.md diff --git a/java/README.md b/java/README.md index 161d594d8..12cbb4582 100644 --- a/java/README.md +++ b/java/README.md @@ -17,11 +17,11 @@ core of this wrapper is two classes: ## build native library -for windows: open the xgboost.sln in windows folder, you will found the xgboostjavawrapper project, you should do the following steps to build wrapper library: +for windows: open the xgboost.sln in "../windows" folder, you will found the xgboostjavawrapper project, you should do the following steps to build wrapper library: * Select x64/win32 and Release in build * (if you have setted `JAVA_HOME` properly in windows environment variables, escape this step) right click on xgboostjavawrapper project -> choose "Properties" -> click on "C/C++" in the window -> change the "Additional Include Directories" to fit your jdk install path. * rebuild all - * move the dll "xgboostjavawrapper.dll" to "xgboost4j/src/main/resources/lib/"(you may need to create this folder if necessary.) + * double click "create_wrap.bat" to set library to proper place for linux: * make sure you have installed jdk and `JAVA_HOME` has been setted properly diff --git a/java/create_wrap.bat b/java/create_wrap.bat new file mode 100644 index 000000000..e7f8603cd --- /dev/null +++ b/java/create_wrap.bat @@ -0,0 +1,20 @@ +echo "move native library" +set libsource=..\windows\x64\Release\xgboostjavawrapper.dll + +if not exist %libsource% ( +goto end +) + +set libfolder=xgboost4j\src\main\resources\lib +set libpath=%libfolder%\xgboostjavawrapper.dll +if not exist %libfolder% (mkdir %libfolder%) +if exist %libpath% (del %libpath%) +move %libsource% %libfolder% +echo complete +pause +exit + +:end + echo "source library not found, please build it first from ..\windows\xgboost.sln" + pause + exit \ No newline at end of file diff --git a/java/create_wrap.sh b/java/create_wrap.sh index 08b3f6792..d66e4dbd4 100755 --- a/java/create_wrap.sh +++ b/java/create_wrap.sh @@ -6,7 +6,7 @@ echo "move native lib" libPath="xgboost4j/src/main/resources/lib" if [ ! -d "$libPath" ]; then - mkdir "$libPath" + mkdir -p "$libPath" fi rm -f xgboost4j/src/main/resources/lib/libxgboostjavawrapper.so diff --git a/java/doc/xgboost4j.md b/java/doc/xgboost4j.md index f23ff509a..b383e9a04 100644 --- a/java/doc/xgboost4j.md +++ b/java/doc/xgboost4j.md @@ -82,9 +82,9 @@ import org.dmlc.xgboost4j.util.Params; ```java Params params = new Params() { { - put("eta", "1.0"); - put("max_depth", "2"); - put("silent", "1"); + put("eta", 1.0); + put("max_depth", 2); + put("silent", 1); put("objective", "binary:logistic"); put("eval_metric", "logloss"); } @@ -94,9 +94,9 @@ Params params = new Params() { ```java Params params = new Params() { { - put("eta", "1.0"); - put("max_depth", "2"); - put("silent", "1"); + put("eta", 1.0); + put("max_depth", 2); + put("silent", 1); put("objective", "binary:logistic"); put("eval_metric", "logloss"); put("eval_metric", "error"); @@ -110,16 +110,19 @@ With parameters and data, you are able to train a booster model. ```java import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.WatchList; ``` * Training ```java DMatrix trainMat = new DMatrix("train.svm.txt"); DMatrix validMat = new DMatrix("valid.svm.txt"); -DMatrix[] evalMats = new DMatrix[] {trainMat, validMat}; -String[] evalNames = new String[] {"train", "valid"}; +//specifiy a watchList to see the performance +WatchList watchs = new WatchList(); +watchs.put("train", trainMat); +watchs.put("test", testMat); int round = 2; -Booster booster = Trainer.train(params, trainMat, round, evalMats, evalNames, null, null); +Booster booster = Trainer.train(params, trainMat, round, watchs, null, null); ``` * Saving model @@ -139,8 +142,8 @@ booster.dumpModel("modelInfo.txt", "featureMap.txt", false) ```java Params param = new Params() { { - put("silent", "1"); - put("nthread", "6"); + put("silent", 1); + put("nthread", 6); } }; Booster booster = new Booster(param, "model.bin"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java index 778d05a4d..0a80ae314 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java @@ -24,6 +24,7 @@ import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.DataLoader; import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.WatchList; /** * a simple example of java wrapper for xgboost @@ -53,22 +54,23 @@ public class BasicWalkThrough { //specify parameters Params param = new Params() { { - put("eta", "1.0"); - put("max_depth", "2"); - put("silent", "1"); + put("eta", 1.0); + put("max_depth", 2); + put("silent", 1); put("objective", "binary:logistic"); } }; - //specify evaluate datasets and evaluate names - DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; - String[] evalNames = new String[] {"train", "test"}; + //specify watchList + WatchList watchs = new WatchList(); + watchs.put("train", trainMat); + watchs.put("test", testMat); //set round int round = 2; //train a boost model - Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, null, null); + Booster booster = Trainer.train(param, trainMat, round, watchs, null, null); //predict float[][] predicts = booster.predict(testMat); @@ -107,8 +109,11 @@ public class BasicWalkThrough { DMatrix trainMat2 = new DMatrix(spData.rowHeaders, spData.colIndex, spData.data, DMatrix.SparseType.CSR); trainMat2.setLabel(spData.labels); - dmats = new DMatrix[] {trainMat2, testMat}; - Booster booster3 = Trainer.train(param, trainMat2, round, dmats, evalNames, null, null); + //specify watchList + WatchList watchs2 = new WatchList(); + watchs2.put("train", trainMat2); + watchs2.put("test", testMat); + Booster booster3 = Trainer.train(param, trainMat2, round, watchs2, null, null); float[][] predicts3 = booster3.predict(testMat2); //check predicts diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java index ed029a6a1..54c8c1bee 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java @@ -19,6 +19,7 @@ import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.WatchList; /** * example for start from a initial base prediction @@ -35,19 +36,20 @@ public class BoostFromPrediction { //specify parameters Params param = new Params() { { - put("eta", "1.0"); - put("max_depth", "2"); - put("silent", "1"); + put("eta", 1.0); + put("max_depth", 2); + put("silent", 1); put("objective", "binary:logistic"); } }; - //specify evaluate datasets and evaluate names - DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; - String[] evalNames = new String[] {"train", "test"}; + //specify watchList + WatchList watchs = new WatchList(); + watchs.put("train", trainMat); + watchs.put("test", testMat); //train xgboost for 1 round - Booster booster = Trainer.train(param, trainMat, 1, dmats, evalNames, null, null); + Booster booster = Trainer.train(param, trainMat, 1, watchs, null, null); float[][] trainPred = booster.predict(trainMat, true); float[][] testPred = booster.predict(testMat, true); @@ -56,6 +58,6 @@ public class BoostFromPrediction { testMat.setBaseMargin(testPred); System.out.println("result of running from initial prediction"); - Booster booster2 = Trainer.train(param, trainMat, 1, dmats, evalNames, null, null); + Booster booster2 = Trainer.train(param, trainMat, 1, watchs, null, null); } } diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java index 754ae072c..793ffb61d 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java @@ -32,12 +32,12 @@ public class CrossValidation { //set params Params param = new Params() { { - put("eta", "1.0"); - put("max_depth", "3"); - put("silent", "1"); - put("nthread", "6"); + put("eta", 1.0); + put("max_depth", 3); + put("silent", 1); + put("nthread", 6); put("objective", "binary:logistic"); - put("gamma", "1.0"); + put("gamma", 1.0); put("eval_metric", "error"); } }; diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java index d0caaf53f..ed8c9a9a9 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java @@ -16,7 +16,6 @@ package org.dmlc.xgboost4j.demo; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.IEvaluation; @@ -24,6 +23,7 @@ import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.IObjective; import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.WatchList; /** * an example user define objective and eval @@ -130,18 +130,19 @@ public class CustomObjective { //set params Params param = new Params() { { - put("eta", "1.0"); - put("max_depth", "2"); - put("silent", "1"); + put("eta", 1.0); + put("max_depth", 2); + put("silent", 1); } }; //set round int round = 2; - //set evaluation data - DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; - String[] evalNames = new String[] {"train", "eval"}; + //specify watchList + WatchList watchs = new WatchList(); + watchs.put("train", trainMat); + watchs.put("test", testMat); //user define obj and eval IObjective obj = new LogRegObj(); @@ -149,6 +150,6 @@ public class CustomObjective { //train a booster System.out.println("begin to train the booster model"); - Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, obj, eval); + Booster booster = Trainer.train(param, trainMat, round, watchs, obj, eval); } } diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java index 2912d43eb..698245bf1 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java @@ -19,6 +19,7 @@ import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.WatchList; /** * simple example for using external memory version @@ -35,25 +36,26 @@ public class ExternalMemory { //specify parameters Params param = new Params() { { - put("eta", "1.0"); - put("max_depth", "2"); - put("silent", "1"); + put("eta", 1.0); + put("max_depth", 2); + put("silent", 1); put("objective", "binary:logistic"); } }; //performance notice: set nthread to be the number of your real cpu //some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4 - //param.put("nthread", "num_real_cpu"); + //param.put("nthread", num_real_cpu); - //specify evaluate datasets and evaluate names - DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; - String[] evalNames = new String[] {"train", "test"}; + //specify watchList + WatchList watchs = new WatchList(); + watchs.put("train", trainMat); + watchs.put("test", testMat); //set round int round = 2; //train a boost model - Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, null, null); + Booster booster = Trainer.train(param, trainMat, round, watchs, null, null); } } diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java index 6bdc02ab5..a9b3ba8df 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java @@ -20,6 +20,7 @@ import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.CustomEval; import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.WatchList; /** * this is an example of fit generalized linear model in xgboost @@ -39,8 +40,8 @@ public class GeneralizedLinearModel { //you can also set lambda_bias which is L2 regularizer on the bias term Params param = new Params() { { - put("alpha", "0.0001"); - put("silent", "1"); + put("alpha", 0.0001); + put("silent", 1); put("objective", "binary:logistic"); put("booster", "gblinear"); } @@ -52,13 +53,14 @@ public class GeneralizedLinearModel { //param.put("eta", "0.5"); - //specify evaluate datasets and evaluate names - DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; - String[] evalNames = new String[] {"train", "test"}; + //specify watchList + WatchList watchs = new WatchList(); + watchs.put("train", trainMat); + watchs.put("test", testMat); //train a booster int round = 4; - Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, null, null); + Booster booster = Trainer.train(param, trainMat, round, watchs, null, null); float[][] predicts = booster.predict(testMat); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java index 51604e8ec..bfcc04d06 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java @@ -21,6 +21,7 @@ import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.util.Trainer; import org.dmlc.xgboost4j.demo.util.CustomEval; +import org.dmlc.xgboost4j.util.WatchList; /** * predict first ntree @@ -35,20 +36,21 @@ public class PredictFirstNtree { //specify parameters Params param = new Params() { { - put("eta", "1.0"); - put("max_depth", "2"); - put("silent", "1"); + put("eta", 1.0); + put("max_depth", 2); + put("silent", 1); put("objective", "binary:logistic"); } }; - //specify evaluate datasets and evaluate names - DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; - String[] evalNames = new String[] {"train", "test"}; + //specify watchList + WatchList watchs = new WatchList(); + watchs.put("train", trainMat); + watchs.put("test", testMat); //train a booster int round = 3; - Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, null, null); + Booster booster = Trainer.train(param, trainMat, round, watchs, null, null); //predict use 1 tree float[][] predicts1 = booster.predict(testMat, false, 1); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java index ced309b03..5f1c2e5ac 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java @@ -20,6 +20,7 @@ import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.WatchList; /** * predict leaf indices @@ -34,20 +35,21 @@ public class PredictLeafIndices { //specify parameters Params param = new Params() { { - put("eta", "1.0"); - put("max_depth", "2"); - put("silent", "1"); + put("eta", 1.0); + put("max_depth", 2); + put("silent", 1); put("objective", "binary:logistic"); } }; - //specify evaluate datasets and evaluate names - DMatrix[] dmats = new DMatrix[] {trainMat, testMat}; - String[] evalNames = new String[] {"train", "test"}; + //specify watchList + WatchList watchs = new WatchList(); + watchs.put("train", trainMat); + watchs.put("test", testMat); //train a booster int round = 3; - Booster booster = Trainer.train(param, trainMat, round, dmats, evalNames, null, null); + Booster booster = Trainer.train(param, trainMat, round, watchs, null, null); //predict using first 2 tree float[][] leafindex = booster.predict(testMat, 2, true); diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java index 91a2bd40b..3140b184e 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java @@ -30,7 +30,6 @@ import org.apache.commons.logging.LogFactory; import org.dmlc.xgboost4j.util.Initializer; import org.dmlc.xgboost4j.util.Params; -import org.dmlc.xgboost4j.util.TransferUtil; import org.dmlc.xgboost4j.wrapper.XgboostJNI; @@ -85,7 +84,7 @@ public final class Booster { private void init(DMatrix[] dMatrixs) { long[] handles = null; if(dMatrixs != null) { - handles = TransferUtil.dMatrixs2handles(dMatrixs); + handles = dMatrixs2handles(dMatrixs); } handle = XgboostJNI.XGBoosterCreate(handles); } @@ -105,8 +104,8 @@ public final class Booster { */ public void setParams(Params params) { if(params!=null) { - for(Map.Entry entry : params) { - setParam(entry.getKey(), entry.getValue()); + for(Map.Entry entry : params) { + setParam(entry.getKey(), entry.getValue().toString()); } } } @@ -154,7 +153,7 @@ public final class Booster { * @return eval information */ public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) { - long[] handles = TransferUtil.dMatrixs2handles(evalMatrixs); + long[] handles = dMatrixs2handles(evalMatrixs); String evalInfo = XgboostJNI.XGBoosterEvalOneIter(handle, iter, handles, evalNames); return evalInfo; } @@ -424,6 +423,19 @@ public final class Booster { return featureScore; } + /** + * transfer DMatrix array to handle array (used for native functions) + * @param dmatrixs + * @return handle array for input dmatrixs + */ + private static long[] dMatrixs2handles(DMatrix[] dmatrixs) { + long[] handles = new long[dmatrixs.length]; + for(int i=0; i>{ - List> params = new ArrayList<>(); +public class Params implements Iterable>{ + List> params = new ArrayList<>(); /** * put param key-value pair * @param key * @param value */ - public void put(String key, String value) { + public void put(String key, Object value) { params.add(new AbstractMap.SimpleEntry<>(key, value)); } @Override public String toString(){ String paramsInfo = ""; - for(Entry param : params) { + for(Entry param : params) { paramsInfo += param.getKey() + ":" + param.getValue() + "\n"; } return paramsInfo; } @Override - public Iterator> iterator() { + public Iterator> iterator() { return params.iterator(); } } diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java index 76f5f58bc..a53437477 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java @@ -20,6 +20,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dmlc.xgboost4j.IEvaluation; @@ -40,14 +41,26 @@ public class Trainer { * @param params Booster params. * @param dtrain Data to be trained. * @param round Number of boosting iterations. - * @param evalMats Data to be evaluated (may include dtrain) - * @param evalNames name of data (used for evaluation info) + * @param watchs a group of items to be evaluated during training, this allows user to watch performance on the validation set. * @param obj customized objective (set to null if not used) * @param eval customized evaluation (set to null if not used) * @return trained booster */ - public static Booster train(Params params, DMatrix dtrain, int round, - DMatrix[] evalMats, String[] evalNames, IObjective obj, IEvaluation eval) { + public static Booster train(Params params, DMatrix dtrain, int round, + WatchList watchs, IObjective obj, IEvaluation eval) { + + //collect eval matrixs + int len = watchs.size(); + int i = 0; + String[] evalNames = new String[len]; + DMatrix[] evalMats = new DMatrix[len]; + + for(Entry evalEntry : watchs) { + evalNames[i] = evalEntry.getKey(); + evalMats[i] = evalEntry.getValue(); + i++; + } + //collect all data matrixs DMatrix[] allMats; if(evalMats!=null && evalMats.length>0) { @@ -63,16 +76,6 @@ public class Trainer { //initialize booster Booster booster = new Booster(params, allMats); - //used for evaluation - long[] dataArray = null; - String[] names = null; - - if(dataArray==null || names==null) { - //prepare data for evaluation - dataArray = TransferUtil.dMatrixs2handles(evalMats); - names = evalNames; - } - //begin to train for(int iter=0; iter >{ + List> watchList = new ArrayList<>(); + + /** + * put eval dmatrix and it's name + * @param name + * @param dmat + */ + public void put(String name, DMatrix dmat) { + watchList.add(new AbstractMap.SimpleEntry<>(name, dmat)); + } + + public int size() { + return watchList.size(); + } + + @Override + public Iterator> iterator() { + return watchList.iterator(); + } +} diff --git a/java/xgboost4j/src/main/resources/lib/README.md b/java/xgboost4j/src/main/resources/lib/README.md deleted file mode 100644 index 9c4e25ae2..000000000 --- a/java/xgboost4j/src/main/resources/lib/README.md +++ /dev/null @@ -1 +0,0 @@ -please put native library in this package. From 4e8a1c65168f314e8b34bde1fa01ec91e4bf87be Mon Sep 17 00:00:00 2001 From: yanqingmen Date: Wed, 10 Jun 2015 23:34:52 -0700 Subject: [PATCH 05/59] rm WatchList class, take Iterable> as eval param, change Params to Iterable> --- java/doc/xgboost4j.md | 38 ++++++------ .../dmlc/xgboost4j/demo/BasicWalkThrough.java | 61 ++++++++++++++++--- .../xgboost4j/demo/BoostFromPrediction.java | 15 +++-- .../dmlc/xgboost4j/demo/CrossValidation.java | 2 +- .../dmlc/xgboost4j/demo/CustomObjective.java | 11 ++-- .../dmlc/xgboost4j/demo/ExternalMemory.java | 13 ++-- .../demo/GeneralizedLinearModel.java | 13 ++-- .../xgboost4j/demo/PredictFirstNtree.java | 13 ++-- .../xgboost4j/demo/PredictLeafIndices.java | 13 ++-- .../org/dmlc/xgboost4j/demo}/util/Params.java | 2 +- .../main/java/org/dmlc/xgboost4j/Booster.java | 8 +-- .../java/org/dmlc/xgboost4j/util/CVPack.java | 3 +- .../java/org/dmlc/xgboost4j/util/Trainer.java | 24 ++++---- .../org/dmlc/xgboost4j/util/WatchList.java | 49 --------------- 14 files changed, 136 insertions(+), 129 deletions(-) rename java/{xgboost4j/src/main/java/org/dmlc/xgboost4j => xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo}/util/Params.java (97%) delete mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/WatchList.java diff --git a/java/doc/xgboost4j.md b/java/doc/xgboost4j.md index b383e9a04..201b3cc05 100644 --- a/java/doc/xgboost4j.md +++ b/java/doc/xgboost4j.md @@ -73,14 +73,11 @@ dmat.setWeight(weights); ``` #### Setting Parameters -* A util class ```Params``` in xgboost4j is used to handle parameters. -* To import ```Params``` : +* in xgboost4j any ```Iterable>``` object could be used as parameters. + +* to set parameters, for non-multiple value params, you can simply use entrySet of an Map: ```java -import org.dmlc.xgboost4j.util.Params; -``` -* to set parameters : -```java -Params params = new Params() { +Map paramMap = new HashMap<>() { { put("eta", 1.0); put("max_depth", 2); @@ -89,18 +86,17 @@ Params params = new Params() { put("eval_metric", "logloss"); } }; +Iterable> params = paramMap.entrySet(); ``` -* Multiple values with same param key is handled naturally in ```Params```, e.g. : +* for the situation that multiple values with same param key, List> would be a good choice, e.g. : ```java -Params params = new Params() { - { - put("eta", 1.0); - put("max_depth", 2); - put("silent", 1); - put("objective", "binary:logistic"); - put("eval_metric", "logloss"); - put("eval_metric", "error"); - } +List> params = new ArrayList>() { + { + add(new SimpleEntry("eta", 1.0)); + add(new SimpleEntry("max_depth", 2.0)); + add(new SimpleEntry("silent", 1)); + add(new SimpleEntry("objective", "binary:logistic")); + } }; ``` @@ -110,7 +106,6 @@ With parameters and data, you are able to train a booster model. ```java import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.WatchList; ``` * Training @@ -118,9 +113,10 @@ import org.dmlc.xgboost4j.util.WatchList; DMatrix trainMat = new DMatrix("train.svm.txt"); DMatrix validMat = new DMatrix("valid.svm.txt"); //specifiy a watchList to see the performance -WatchList watchs = new WatchList(); -watchs.put("train", trainMat); -watchs.put("test", testMat); +//any Iterable> object could be used as watchList +List> watchs = new ArrayList<>(); +watchs.add(new SimpleEntry<>("train", trainMat)); +watchs.add(new SimpleEntry<>("test", testMat)); int round = 2; Booster booster = Trainer.train(params, trainMat, round, watchs, null, null); ``` diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java index 0a80ae314..a0c7a3ae1 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java @@ -18,13 +18,19 @@ package org.dmlc.xgboost4j.demo; import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.util.AbstractMap; +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.DataLoader; -import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.WatchList; /** * a simple example of java wrapper for xgboost @@ -51,8 +57,32 @@ public class BasicWalkThrough { DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); + //specify parameters - Params param = new Params() { + //note: any Iterable> object would be used as paramters + //e.g. + // Map paramMap = new HashMap() { + // { + // put("eta", 1.0); + // put("max_depth", 2); + // put("silent", 1); + // put("objective", "binary:logistic"); + // } + // }; + // Iterable> param = paramMap.entrySet(); + + //or + // List> param = new ArrayList>() { + // { + // add(new SimpleEntry("eta", 1.0)); + // add(new SimpleEntry("max_depth", 2.0)); + // add(new SimpleEntry("silent", 1)); + // add(new SimpleEntry("objective", "binary:logistic")); + // } + // }; + + //we use a util class Params to handle parameters as example + Iterable> param = new Params() { { put("eta", 1.0); put("max_depth", 2); @@ -61,10 +91,21 @@ public class BasicWalkThrough { } }; - //specify watchList - WatchList watchs = new WatchList(); - watchs.put("train", trainMat); - watchs.put("test", testMat); + + + //specify watchList to set evaluation dmats + //note: any Iterable> object would be used as watchList + //e.g. + //an entrySet of Map is good + // Map watchMap = new HashMap<>(); + // watchMap.put("train", trainMat); + // watchMap.put("test", testMat); + // Iterable> watchs = watchMap.entrySet(); + + //we use a List of Entry WatchList as example + List> watchs = new ArrayList<>(); + watchs.add(new SimpleEntry<>("train", trainMat)); + watchs.add(new SimpleEntry<>("test", testMat)); //set round int round = 2; @@ -110,9 +151,9 @@ public class BasicWalkThrough { trainMat2.setLabel(spData.labels); //specify watchList - WatchList watchs2 = new WatchList(); - watchs2.put("train", trainMat2); - watchs2.put("test", testMat); + List> watchs2 = new ArrayList<>(); + watchs2.add(new SimpleEntry<>("train", trainMat2)); + watchs2.add(new SimpleEntry<>("test", testMat2)); Booster booster3 = Trainer.train(param, trainMat2, round, watchs2, null, null); float[][] predicts3 = booster3.predict(testMat2); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java index 54c8c1bee..733c49503 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java @@ -15,11 +15,14 @@ */ package org.dmlc.xgboost4j.demo; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; -import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.WatchList; /** * example for start from a initial base prediction @@ -43,10 +46,10 @@ public class BoostFromPrediction { } }; - //specify watchList - WatchList watchs = new WatchList(); - watchs.put("train", trainMat); - watchs.put("test", testMat); + //specify watchList + List> watchs = new ArrayList<>(); + watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat)); + watchs.add(new AbstractMap.SimpleEntry<>("test", testMat)); //train xgboost for 1 round Booster booster = Trainer.train(param, trainMat, 1, watchs, null, null); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java index 793ffb61d..0c470bf17 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java @@ -18,7 +18,7 @@ package org.dmlc.xgboost4j.demo; import java.io.IOException; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.demo.util.Params; /** * an example of cross validation diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java index ed8c9a9a9..03c9c4b52 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java @@ -15,15 +15,16 @@ */ package org.dmlc.xgboost4j.demo; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.List; +import java.util.Map; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.IEvaluation; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.IObjective; -import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.WatchList; /** * an example user define objective and eval @@ -140,9 +141,9 @@ public class CustomObjective { int round = 2; //specify watchList - WatchList watchs = new WatchList(); - watchs.put("train", trainMat); - watchs.put("test", testMat); + List> watchs = new ArrayList<>(); + watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat)); + watchs.add(new AbstractMap.SimpleEntry<>("test", testMat)); //user define obj and eval IObjective obj = new LogRegObj(); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java index 698245bf1..6ac687289 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java @@ -15,11 +15,14 @@ */ package org.dmlc.xgboost4j.demo; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; -import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.WatchList; /** * simple example for using external memory version @@ -48,9 +51,9 @@ public class ExternalMemory { //param.put("nthread", num_real_cpu); //specify watchList - WatchList watchs = new WatchList(); - watchs.put("train", trainMat); - watchs.put("test", testMat); + List> watchs = new ArrayList<>(); + watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat)); + watchs.add(new AbstractMap.SimpleEntry<>("test", testMat)); //set round int round = 2; diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java index a9b3ba8df..2a20edbff 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java @@ -15,12 +15,15 @@ */ package org.dmlc.xgboost4j.demo; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.CustomEval; -import org.dmlc.xgboost4j.util.Params; +import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.WatchList; /** * this is an example of fit generalized linear model in xgboost @@ -54,9 +57,9 @@ public class GeneralizedLinearModel { //specify watchList - WatchList watchs = new WatchList(); - watchs.put("train", trainMat); - watchs.put("test", testMat); + List> watchs = new ArrayList<>(); + watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat)); + watchs.add(new AbstractMap.SimpleEntry<>("test", testMat)); //train a booster int round = 4; diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java index bfcc04d06..8e3f3abfb 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java @@ -15,13 +15,16 @@ */ package org.dmlc.xgboost4j.demo; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; -import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.util.Trainer; import org.dmlc.xgboost4j.demo.util.CustomEval; -import org.dmlc.xgboost4j.util.WatchList; +import org.dmlc.xgboost4j.demo.util.Params; /** * predict first ntree @@ -44,9 +47,9 @@ public class PredictFirstNtree { }; //specify watchList - WatchList watchs = new WatchList(); - watchs.put("train", trainMat); - watchs.put("test", testMat); + List> watchs = new ArrayList<>(); + watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat)); + watchs.add(new AbstractMap.SimpleEntry<>("test", testMat)); //train a booster int round = 3; diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java index 5f1c2e5ac..697f40379 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java @@ -15,12 +15,15 @@ */ package org.dmlc.xgboost4j.demo; +import java.util.AbstractMap; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; +import java.util.Map; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; -import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.WatchList; +import org.dmlc.xgboost4j.demo.util.Params; /** * predict leaf indices @@ -43,9 +46,9 @@ public class PredictLeafIndices { }; //specify watchList - WatchList watchs = new WatchList(); - watchs.put("train", trainMat); - watchs.put("test", testMat); + List> watchs = new ArrayList<>(); + watchs.add(new AbstractMap.SimpleEntry<>("train", trainMat)); + watchs.add(new AbstractMap.SimpleEntry<>("test", testMat)); //train a booster int round = 3; diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Params.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/Params.java similarity index 97% rename from java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Params.java rename to java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/Params.java index 582620174..0f4c5c738 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Params.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/Params.java @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package org.dmlc.xgboost4j.util; +package org.dmlc.xgboost4j.demo.util; import java.util.ArrayList; import java.util.Iterator; diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java index 3140b184e..c5d8b1006 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java @@ -25,11 +25,11 @@ import java.io.UnsupportedEncodingException; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dmlc.xgboost4j.util.Initializer; -import org.dmlc.xgboost4j.util.Params; import org.dmlc.xgboost4j.wrapper.XgboostJNI; @@ -58,7 +58,7 @@ public final class Booster { * @param params parameters * @param dMatrixs DMatrix array */ - public Booster(Params params, DMatrix[] dMatrixs) { + public Booster(Iterable> params, DMatrix[] dMatrixs) { init(dMatrixs); setParam("seed","0"); setParams(params); @@ -71,7 +71,7 @@ public final class Booster { * @param params parameters * @param modelPath booster modelPath (model generated by booster.saveModel) */ - public Booster(Params params, String modelPath) { + public Booster(Iterable> params, String modelPath) { handle = XgboostJNI.XGBoosterCreate(new long[] {}); loadModel(modelPath); setParam("seed","0"); @@ -102,7 +102,7 @@ public final class Booster { * set parameters * @param params parameters key-value map */ - public void setParams(Params params) { + public void setParams(Iterable> params) { if(params!=null) { for(Map.Entry entry : params) { setParam(entry.getKey(), entry.getValue().toString()); diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java index a0d145636..3e67dc669 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java @@ -15,6 +15,7 @@ */ package org.dmlc.xgboost4j.util; +import java.util.Map; import org.dmlc.xgboost4j.IEvaluation; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; @@ -37,7 +38,7 @@ public class CVPack { * @param dtest test data * @param params parameters */ - public CVPack(DMatrix dtrain, DMatrix dtest, Params params) { + public CVPack(DMatrix dtrain, DMatrix dtest, Iterable> params) { dmats = new DMatrix[] {dtrain, dtest}; booster = new Booster(params, dmats); names = new String[] {"train", "test"}; diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java index a53437477..8a336b1a8 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java @@ -46,21 +46,23 @@ public class Trainer { * @param eval customized evaluation (set to null if not used) * @return trained booster */ - public static Booster train(Params params, DMatrix dtrain, int round, - WatchList watchs, IObjective obj, IEvaluation eval) { + public static Booster train(Iterable> params, DMatrix dtrain, int round, + Iterable> watchs, IObjective obj, IEvaluation eval) { //collect eval matrixs - int len = watchs.size(); - int i = 0; - String[] evalNames = new String[len]; - DMatrix[] evalMats = new DMatrix[len]; + String[] evalNames; + DMatrix[] evalMats; + List names = new ArrayList<>(); + List mats = new ArrayList<>(); for(Entry evalEntry : watchs) { - evalNames[i] = evalEntry.getKey(); - evalMats[i] = evalEntry.getValue(); - i++; + names.add(evalEntry.getKey()); + mats.add(evalEntry.getValue()); } + evalNames = names.toArray(new String[names.size()]); + evalMats = mats.toArray(new DMatrix[mats.size()]); + //collect all data matrixs DMatrix[] allMats; if(evalMats!=null && evalMats.length>0) { @@ -110,7 +112,7 @@ public class Trainer { * @param eval customized evaluation (set to null if not used) * @return evaluation history */ - public static String[] crossValiation(Params params, DMatrix data, int round, int nfold, String[] metrics, IObjective obj, IEvaluation eval) { + public static String[] crossValiation(Iterable> params, DMatrix data, int round, int nfold, String[] metrics, IObjective obj, IEvaluation eval) { CVPack[] cvPacks = makeNFold(data, nfold, params, metrics); String[] evalHist = new String[round]; String[] results = new String[cvPacks.length]; @@ -147,7 +149,7 @@ public class Trainer { * @param evalMetrics Evaluation metrics * @return CV package array */ - public static CVPack[] makeNFold(DMatrix data, int nfold, Params params, String[] evalMetrics) { + public static CVPack[] makeNFold(DMatrix data, int nfold, Iterable> params, String[] evalMetrics) { List samples = genRandPermutationNums(0, (int) data.rowNum()); int step = samples.size()/nfold; int[] testSlice = new int[step]; diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/WatchList.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/WatchList.java deleted file mode 100644 index a08b96208..000000000 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/WatchList.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - Copyright (c) 2014 by Contributors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ -package org.dmlc.xgboost4j.util; - -import java.util.AbstractMap; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map.Entry; -import org.dmlc.xgboost4j.DMatrix; - -/** - * class to handle evaluation dmatrix - * @author hzx - */ -public class WatchList implements Iterable >{ - List> watchList = new ArrayList<>(); - - /** - * put eval dmatrix and it's name - * @param name - * @param dmat - */ - public void put(String name, DMatrix dmat) { - watchList.add(new AbstractMap.SimpleEntry<>(name, dmat)); - } - - public int size() { - return watchList.size(); - } - - @Override - public Iterator> iterator() { - return watchList.iterator(); - } -} From 61142f203b27682fcc2c0cb751fe6e21d2cae36e Mon Sep 17 00:00:00 2001 From: Tong He Date: Thu, 11 Jun 2015 14:04:43 -0700 Subject: [PATCH 06/59] check whether objective is character --- R-package/R/utils.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 4a5d99c7d..f7f6b9192 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -220,7 +220,8 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) { stop("nfold must be bigger than 1") } if(is.null(folds)) { - if (exists('objective', where=param) && strtrim(param[['objective']], 5) == 'rank:') { + if (exists('objective', where=param) && is.character(param$objective) && + strtrim(param[['objective']], 5) == 'rank:') { stop("\tAutomatic creation of CV-folds is not implemented for ranking!\n", "\tConsider providing pre-computed CV-folds through the folds parameter.") } @@ -234,7 +235,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) { # For classification, need to convert y labels to factor before making the folds, # and then do stratification by factor levels. # For regression, leave y numeric and do stratification by quantiles. - if (exists('objective', where=param)) { + if (exists('objective', where=param) && is.character(param$objective)) { # If 'objective' provided in params, assume that y is a classification label # unless objective is reg:linear if (param[['objective']] != 'reg:linear') y <- factor(y) From 7cb449c4a75c2a16f6dfea5244ce959d998344b1 Mon Sep 17 00:00:00 2001 From: Tong He Date: Thu, 11 Jun 2015 14:16:20 -0700 Subject: [PATCH 07/59] Update xgb.cv.R --- R-package/R/xgb.cv.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index df7fd5648..e7687ac3d 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -124,14 +124,14 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = if (!is.null(params$objective)) if (class(params$objective)=='function') { obj = params$objective - params$objective = NULL + params[['objective']] = NULL } if (!is.null(params$eval_metric) && !is.null(feval)) stop("xgb.cv: cannot assign two different evaluation metrics") if (!is.null(params$eval_metric)) if (class(params$eval_metric)=='function') { feval = params$eval_metric - params$eval_metric = NULL + params[['eval_metric']] = NULL } # Early Stopping From c51d71b033da8b963bf6ff86dc5a293c5a67ad1b Mon Sep 17 00:00:00 2001 From: hetong007 Date: Fri, 12 Jun 2015 16:48:01 -0700 Subject: [PATCH 08/59] check duplicated params --- R-package/R/xgb.cv.R | 11 ++++++++--- R-package/R/xgb.train.R | 8 +++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index e7687ac3d..1d747ba57 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -112,7 +112,12 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = } else { dtrain <- xgb.get.DMatrix(data, label, missing) } - params <- append(params, list(...)) + dot.params = list(...) + nms.params = names(params) + nms.dot.params = names(dot.params) + if (length(intersect(nms.params,nms.dot.params))>0) + stop("Duplicated defined term in parameters. Please check your list of params.") + params <- append(params, dot.params) params <- append(params, list(silent=1)) for (mc in metrics) { params <- append(params, list("eval_metric"=mc)) @@ -126,8 +131,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = obj = params$objective params[['objective']] = NULL } - if (!is.null(params$eval_metric) && !is.null(feval)) - stop("xgb.cv: cannot assign two different evaluation metrics") + # if (!is.null(params$eval_metric) && !is.null(feval)) + # stop("xgb.cv: cannot assign two different evaluation metrics") if (!is.null(params$eval_metric)) if (class(params$eval_metric)=='function') { feval = params$eval_metric diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index 0700577f7..fb403143a 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -136,7 +136,13 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(), if (length(watchlist) != 0 && verbose == 0) { warning('watchlist is provided but verbose=0, no evaluation information will be printed') } - params = append(params, list(...)) + + dot.params = list(...) + nms.params = names(params) + nms.dot.params = names(dot.params) + if (length(intersect(nms.params,nms.dot.params))>0) + stop("Duplicated term in parameters. Please check your list of params.") + params = append(params, dot.params) # customized objective and evaluation metric interface if (!is.null(params$objective) && !is.null(obj)) From 7a92d4008ee429c0e208d198bc160a6220c1ba79 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 15 Jun 2015 09:24:10 -0700 Subject: [PATCH 09/59] fix col from dense --- src/io/simple_fmatrix-inl.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/io/simple_fmatrix-inl.hpp b/src/io/simple_fmatrix-inl.hpp index fc6aab8f9..af5b31321 100644 --- a/src/io/simple_fmatrix-inl.hpp +++ b/src/io/simple_fmatrix-inl.hpp @@ -153,7 +153,7 @@ class FMatrixS : public IFMatrix { pcol->Clear(); utils::ParallelGroupBuilder builder(&pcol->offset, &pcol->data); - builder.InitBudget(0, nthread); + builder.InitBudget(info_.num_col(), nthread); // start working iter_->BeforeFirst(); while (iter_->Next()) { @@ -204,7 +204,8 @@ class FMatrixS : public IFMatrix { } } - utils::Assert(pcol->Size() == info_.num_col(), "inconsistent col data"); + utils::Assert(pcol->Size() == info_.num_col(), + "inconsistent col data"); // sort columns bst_omp_uint ncol = static_cast(pcol->Size()); #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread) From 0bbb4a07b2731e102a9234fb5396c8c254de9b5c Mon Sep 17 00:00:00 2001 From: hetong007 Date: Mon, 15 Jun 2015 15:25:40 -0700 Subject: [PATCH 10/59] add travis conf, waiting for setting on travis-ci.org --- .travis.yml | 31 +++++++++++++++++++++++++++++++ README.md | 2 ++ 2 files changed, 33 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..6a908ea99 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,31 @@ +language: c + +env: + global: + - _R_CHECK_TIMINGS_=0 + +warnings_are_errors: false + +sudo: required + +before_install: + - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh + - chmod 755 ./travis-tool.sh + - ./travis-tool.sh bootstrap + +install: + - cd ./R-package + - ../travis-tool.sh install_deps + +script: ../travis-tool.sh run_tests + +on_failure: + - ../travis-tool.sh dump_logs + +notifications: + email: + recipients: + - hetong007@gmail.com + on_success: change + on_failure: always + diff --git a/README.md b/README.md index 415bf771b..59f2028c8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ XGBoost: eXtreme Gradient Boosting ================================== +[![Build Status](https://travis-ci.org/dmlc/xgboost.png)](https://travis-ci.org/dmlc/xgboost) + An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version. It implements machine learning algorithm under gradient boosting framework, including generalized linear model and gradient boosted regression tree (GBDT). XGBoost can also be distributed and scale to Terascale data From 7d9ac3f97d367bb9b7bd251eb423a676f2aa3119 Mon Sep 17 00:00:00 2001 From: Tong He Date: Mon, 15 Jun 2015 19:15:34 -0700 Subject: [PATCH 11/59] Update .travis.yml --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 6a908ea99..d716fa520 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ env: warnings_are_errors: false -sudo: required +sudo: true before_install: - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh @@ -26,6 +26,7 @@ notifications: email: recipients: - hetong007@gmail.com + - tqchen@cs.washington.edu on_success: change on_failure: always From b08c3c5baad5e7336ec6bb89e1b073c3b7050886 Mon Sep 17 00:00:00 2001 From: Tong He Date: Mon, 15 Jun 2015 22:16:11 -0700 Subject: [PATCH 12/59] Update .travis.yml --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index d716fa520..e05c25fe2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,6 +21,7 @@ script: ../travis-tool.sh run_tests on_failure: - ../travis-tool.sh dump_logs + - cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out notifications: email: From 5568f83a6c8a1ef6ddf10a7e719d25f57bb30bb2 Mon Sep 17 00:00:00 2001 From: Tong He Date: Mon, 15 Jun 2015 22:40:15 -0700 Subject: [PATCH 13/59] Update .travis.yml --- .travis.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index e05c25fe2..651a9bc2f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,11 +17,17 @@ install: - cd ./R-package - ../travis-tool.sh install_deps -script: ../travis-tool.sh run_tests +script: + - bash ../travis-tool.sh run_tests + - bash cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out + +after_failure: cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out on_failure: - - ../travis-tool.sh dump_logs - cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out + - ../travis-tool.sh dump_logs + +after_script: cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out notifications: email: From 67f0b69a4ceeeba25bdc005e7621fa2b500dc33f Mon Sep 17 00:00:00 2001 From: hetong007 Date: Tue, 16 Jun 2015 11:30:11 -0700 Subject: [PATCH 14/59] change makefile to be compatible with r-travis --- .travis.yml | 11 +++-------- Makefile | 5 +++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 651a9bc2f..2910c15b8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,21 +14,16 @@ before_install: - ./travis-tool.sh bootstrap install: - - cd ./R-package + - make Rpack + - cd ./xgboost - ../travis-tool.sh install_deps script: - bash ../travis-tool.sh run_tests - - bash cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out after_failure: cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out -on_failure: - - cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out - - ../travis-tool.sh dump_logs - -after_script: cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out - + notifications: email: recipients: diff --git a/Makefile b/Makefile index 360d55e84..49356021a 100644 --- a/Makefile +++ b/Makefile @@ -145,6 +145,11 @@ Rpack: cp xgboost/src/Makevars xgboost/src/Makevars.win # R CMD build --no-build-vignettes xgboost R CMD build xgboost + # rm -rf xgboost + # R CMD check --as-cran xgboost*.tar.gz + +Rcheck: + make Rpack rm -rf xgboost R CMD check --as-cran xgboost*.tar.gz From 9987fb24f8b45a1a4a313dc08e299677084c5038 Mon Sep 17 00:00:00 2001 From: hetong007 Date: Tue, 16 Jun 2015 11:43:04 -0700 Subject: [PATCH 15/59] update makefile --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 49356021a..d7e24cd49 100644 --- a/Makefile +++ b/Makefile @@ -144,13 +144,17 @@ Rpack: cat R-package/src/Makevars|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars cp xgboost/src/Makevars xgboost/src/Makevars.win # R CMD build --no-build-vignettes xgboost - R CMD build xgboost + # R CMD build xgboost # rm -rf xgboost # R CMD check --as-cran xgboost*.tar.gz -Rcheck: +Rbuild: make Rpack rm -rf xgboost + R CMD build xgboost + +Rcheck: + make Rbuild R CMD check --as-cran xgboost*.tar.gz clean: From 1595d3672132d1eb4df692487eb77caf99a1bc73 Mon Sep 17 00:00:00 2001 From: Tong He Date: Tue, 16 Jun 2015 14:22:51 -0700 Subject: [PATCH 16/59] ask travis to compile vignette --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 2910c15b8..b3475bb23 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,8 @@ language: c env: global: - _R_CHECK_TIMINGS_=0 + - R_BUILD_ARGS="" + - R_CHECK_ARGS="" warnings_are_errors: false From 70c5c12067aefe1abf923b772a9d89df8d870f2b Mon Sep 17 00:00:00 2001 From: Tong He Date: Tue, 16 Jun 2015 14:39:04 -0700 Subject: [PATCH 17/59] update knitr dependency --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index b3475bb23..d4b68dbe0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,7 @@ warnings_are_errors: false sudo: true before_install: + - sudo apt-get install texinfo - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh - chmod 755 ./travis-tool.sh - ./travis-tool.sh bootstrap From 777c5ce992187bf51920029f6a73012afcb17595 Mon Sep 17 00:00:00 2001 From: Tong He Date: Tue, 16 Jun 2015 15:08:01 -0700 Subject: [PATCH 18/59] temporarily do not compile vignette --- .travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index d4b68dbe0..8eca7c0ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,15 +3,14 @@ language: c env: global: - _R_CHECK_TIMINGS_=0 - - R_BUILD_ARGS="" - - R_CHECK_ARGS="" + - R_BUILD_ARGS="--no-build-vignettes --no-manual" + - R_CHECK_ARGS="--no-vignettes --no-manual" warnings_are_errors: false sudo: true before_install: - - sudo apt-get install texinfo - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh - chmod 755 ./travis-tool.sh - ./travis-tool.sh bootstrap From 561e51871ee2e50a4f8361799ed0fae2401b5c0d Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 17 Jun 2015 21:00:34 -0700 Subject: [PATCH 19/59] ok --- src/io/simple_fmatrix-inl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/simple_fmatrix-inl.hpp b/src/io/simple_fmatrix-inl.hpp index af5b31321..1d704c4f8 100644 --- a/src/io/simple_fmatrix-inl.hpp +++ b/src/io/simple_fmatrix-inl.hpp @@ -39,7 +39,7 @@ class FMatrixS : public IFMatrix { /*! \brief get number of colmuns */ virtual size_t NumCol(void) const { utils::Check(this->HaveColAccess(), "NumCol:need column access"); - return col_size_.size() - 1; + return col_size_.size(); } /*! \brief get number of buffered rows */ virtual const std::vector &buffered_rowset(void) const { From 6b254ec4959d72f32f2dda745d04ca7c014704a1 Mon Sep 17 00:00:00 2001 From: Tong He Date: Sun, 21 Jun 2015 19:25:09 -0700 Subject: [PATCH 20/59] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d7e24cd49..295de4064 100644 --- a/Makefile +++ b/Makefile @@ -150,8 +150,8 @@ Rpack: Rbuild: make Rpack - rm -rf xgboost R CMD build xgboost + rm -rf xgboost Rcheck: make Rbuild From 704d9e0a135318a1cc9706cac2b211a69fabe6a7 Mon Sep 17 00:00:00 2001 From: Tong He Date: Sun, 21 Jun 2015 19:46:31 -0700 Subject: [PATCH 21/59] fix early stopping and prediction --- R-package/R/xgb.cv.R | 277 +++++++++++++++++++++---------------------- 1 file changed, 138 insertions(+), 139 deletions(-) diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 1d747ba57..06e2cf82e 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -95,157 +95,156 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = prediction = FALSE, showsd = TRUE, metrics=list(), obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, verbose = T, print.every.n=1L, early.stop.round = NULL, maximize = NULL, ...) { - if (typeof(params) != "list") { - stop("xgb.cv: first argument params must be list") - } - if(!is.null(folds)) { - if(class(folds)!="list" | length(folds) < 2) { - stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold") + if (typeof(params) != "list") { + stop("xgb.cv: first argument params must be list") } - nfold <- length(folds) - } - if (nfold <= 1) { - stop("nfold must be bigger than 1") - } - if (is.null(missing)) { - dtrain <- xgb.get.DMatrix(data, label) - } else { - dtrain <- xgb.get.DMatrix(data, label, missing) - } - dot.params = list(...) - nms.params = names(params) - nms.dot.params = names(dot.params) - if (length(intersect(nms.params,nms.dot.params))>0) - stop("Duplicated defined term in parameters. Please check your list of params.") - params <- append(params, dot.params) - params <- append(params, list(silent=1)) - for (mc in metrics) { - params <- append(params, list("eval_metric"=mc)) - } - - # customized objective and evaluation metric interface - if (!is.null(params$objective) && !is.null(obj)) - stop("xgb.cv: cannot assign two different objectives") - if (!is.null(params$objective)) - if (class(params$objective)=='function') { - obj = params$objective - params[['objective']] = NULL + if(!is.null(folds)) { + if(class(folds)!="list" | length(folds) < 2) { + stop("folds must be a list with 2 or more elements that are vectors of indices for each CV-fold") + } + nfold <- length(folds) } - # if (!is.null(params$eval_metric) && !is.null(feval)) - # stop("xgb.cv: cannot assign two different evaluation metrics") - if (!is.null(params$eval_metric)) - if (class(params$eval_metric)=='function') { - feval = params$eval_metric - params[['eval_metric']] = NULL + if (nfold <= 1) { + stop("nfold must be bigger than 1") } - - # Early Stopping - if (!is.null(early.stop.round)){ - if (!is.null(feval) && is.null(maximize)) - stop('Please set maximize to note whether the model is maximizing the evaluation or not.') - if (is.null(maximize) && is.null(params$eval_metric)) - stop('Please set maximize to note whether the model is maximizing the evaluation or not.') - if (is.null(maximize)) - { - if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) { - maximize = FALSE - } else { - maximize = TRUE - } - } - - if (maximize) { - bestScore = 0 + if (is.null(missing)) { + dtrain <- xgb.get.DMatrix(data, label) } else { - bestScore = Inf + dtrain <- xgb.get.DMatrix(data, label, missing) + } + dot.params = list(...) + nms.params = names(params) + nms.dot.params = names(dot.params) + if (length(intersect(nms.params,nms.dot.params))>0) + stop("Duplicated defined term in parameters. Please check your list of params.") + params <- append(params, dot.params) + params <- append(params, list(silent=1)) + for (mc in metrics) { + params <- append(params, list("eval_metric"=mc)) } - bestInd = 0 - earlyStopflag = FALSE - if (length(metrics)>1) - warning('Only the first metric is used for early stopping process.') - } - - xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds) - obj_type = params[['objective']] - mat_pred = FALSE - if (!is.null(obj_type) && obj_type=='multi:softprob') - { - num_class = params[['num_class']] - if (is.null(num_class)) - stop('must set num_class to use softmax') - predictValues <- matrix(0,xgb.numrow(dtrain),num_class) - mat_pred = TRUE - } - else - predictValues <- rep(0,xgb.numrow(dtrain)) - history <- c() - print.every.n = max(as.integer(print.every.n), 1L) - for (i in 1:nrounds) { - msg <- list() - for (k in 1:nfold) { - fd <- xgb_folds[[k]] - succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj) - if (i% str_split("\t") %>% .[[1]] - } else { - if (!prediction) { - msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]] - } else { - res <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval, prediction) - if (mat_pred) { - pred_mat = matrix(res[[2]],num_class,length(fd$index)) - predictValues[fd$index,] <- t(pred_mat) - } else { - predictValues[fd$index] <- res[[2]] - } - msg[[k]] <- res[[1]] %>% str_split("\t") %>% .[[1]] + # customized objective and evaluation metric interface + if (!is.null(params$objective) && !is.null(obj)) + stop("xgb.cv: cannot assign two different objectives") + if (!is.null(params$objective)) + if (class(params$objective)=='function') { + obj = params$objective + params[['objective']] = NULL + } + # if (!is.null(params$eval_metric) && !is.null(feval)) + # stop("xgb.cv: cannot assign two different evaluation metrics") + if (!is.null(params$eval_metric)) + if (class(params$eval_metric)=='function') { + feval = params$eval_metric + params[['eval_metric']] = NULL } - } - } - ret <- xgb.cv.aggcv(msg, showsd) - history <- c(history, ret) - if(verbose) - if (0==(i-1L)%%print.every.n) - cat(ret, "\n", sep="") - # early_Stopping + # Early Stopping if (!is.null(early.stop.round)){ - score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2] - score = strsplit(score,'\\+|:')[[1]][[2]] - score = as.numeric(score) - if ((maximize && score>bestScore) || (!maximize && score=early.stop.round) { - earlyStopflag = TRUE - cat('Stopping. Best iteration:',bestInd) - break + if (!is.null(feval) && is.null(maximize)) + stop('Please set maximize to note whether the model is maximizing the evaluation or not.') + if (is.null(maximize) && is.null(params$eval_metric)) + stop('Please set maximize to note whether the model is maximizing the evaluation or not.') + if (is.null(maximize)) + { + if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) { + maximize = FALSE + } else { + maximize = TRUE + } } - } + + if (maximize) { + bestScore = 0 + } else { + bestScore = Inf + } + bestInd = 0 + earlyStopflag = FALSE + + if (length(metrics)>1) + warning('Only the first metric is used for early stopping process.') } - } - - colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".") - colnamesMean <- paste(colnames, "mean") - if(showsd) colnamesStd <- paste(colnames, "std") - - colnames <- c() - if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i]) - else colnames <- colnamesMean - - type <- rep(x = "numeric", times = length(colnames)) - dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table - split <- str_split(string = history, pattern = "\t") - - for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)} - - if (prediction) { - return(list(dt = dt,pred = predictValues)) - } - return(dt) + xgb_folds <- xgb.cv.mknfold(dtrain, nfold, params, stratified, folds) + obj_type = params[['objective']] + mat_pred = FALSE + if (!is.null(obj_type) && obj_type=='multi:softprob') + { + num_class = params[['num_class']] + if (is.null(num_class)) + stop('must set num_class to use softmax') + predictValues <- matrix(0,xgb.numrow(dtrain),num_class) + mat_pred = TRUE + } + else + predictValues <- rep(0,xgb.numrow(dtrain)) + history <- c() + print.every.n = max(as.integer(print.every.n), 1L) + for (i in 1:nrounds) { + msg <- list() + for (k in 1:nfold) { + fd <- xgb_folds[[k]] + succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj) + msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]] + } + ret <- xgb.cv.aggcv(msg, showsd) + history <- c(history, ret) + if(verbose) + if (0==(i-1L)%%print.every.n) + cat(ret, "\n", sep="") + + # early_Stopping + if (!is.null(early.stop.round)){ + score = strsplit(ret,'\\s+')[[1]][1+length(metrics)+2] + score = strsplit(score,'\\+|:')[[1]][[2]] + score = as.numeric(score) + if ((maximize && score>bestScore) || (!maximize && score=early.stop.round) { + earlyStopflag = TRUE + cat('Stopping. Best iteration:',bestInd) + break + } + } + } + + } + + if (prediction) { + for (k in 1:nfold) { + fd = xgb_folds[[k]] + res = xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval, prediction) + if (mat_pred) { + pred_mat = matrix(res[[2]],num_class,length(fd$index)) + predictValues[fd$index,] = t(pred_mat) + } else { + predictValues[fd$index] = res[[2]] + } + } + } + + + colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".") + colnamesMean <- paste(colnames, "mean") + if(showsd) colnamesStd <- paste(colnames, "std") + + colnames <- c() + if(showsd) for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i]) + else colnames <- colnamesMean + + type <- rep(x = "numeric", times = length(colnames)) + dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table + split <- str_split(string = history, pattern = "\t") + + for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)} + + if (prediction) { + return(list(dt = dt,pred = predictValues)) + } + return(dt) } # Avoid error messages during CRAN check. From 48e19c196401c2774012768ab0d64ee3de55e137 Mon Sep 17 00:00:00 2001 From: Tong He Date: Mon, 22 Jun 2015 12:42:12 -0700 Subject: [PATCH 22/59] Update xgb.cv.R --- R-package/R/xgb.cv.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 06e2cf82e..793d904cd 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -216,7 +216,11 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = if (prediction) { for (k in 1:nfold) { fd = xgb_folds[[k]] - res = xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval, prediction) + if (!is.null(early.stop.round) && earlyStopflag) { + res = xgb.iter.eval(fd$booster, fd$watchlist, bestInd - 1, feval, prediction) + } else { + res = xgb.iter.eval(fd$booster, fd$watchlist, nrounds - 1, feval, prediction) + } if (mat_pred) { pred_mat = matrix(res[[2]],num_class,length(fd$index)) predictValues[fd$index,] = t(pred_mat) From 009f692f49abe24ad71962c3bf2d006f40d11bb0 Mon Sep 17 00:00:00 2001 From: Ajinkya Kale Date: Wed, 1 Jul 2015 12:12:47 -0700 Subject: [PATCH 23/59] Some typo and formatting fixes --- doc/build.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/build.md b/doc/build.md index a5261b604..f9a626603 100644 --- a/doc/build.md +++ b/doc/build.md @@ -17,13 +17,15 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost 1. Obtain gcc with openmp support by `brew install gcc --without-multilib` **or** clang with openmp by `brew install clang-omp`. The clang one is recommended because the first method requires us compiling gcc inside the machine (more than an hour in mine)! (BTW, `brew` is the de facto standard of `apt-get` on OS X. So installing [HPC](http://hpc.sourceforge.net/) separately is not recommended, but it should work.) -2. **if plaing to use clang-omp** in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to +2. **if you are planing to use clang-omp** - in step 3 and/or 4, change line 9 in `xgboost/src/utils/omp.h` to ```C++ #include /* instead of #include */` ``` - to make it work, otherwise the following steps would show `src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...` + to make it work, otherwise you might get this error + + `src/tree/../utils/omp.h:9:10: error: 'omp.h' file not found...` @@ -47,7 +49,7 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost 4. Set the `Makevars` file in highest piority for R. - The point is, there are three `Makevars` inside the machine: `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by runing `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!). + The point is, there are three `Makevars` : `~/.R/Makevars`, `xgboost/R-package/src/Makevars`, and `/usr/local/Cellar/r/3.2.0/R.framework/Resources/etc/Makeconf` (the last one obtained by running `file.path(R.home("etc"), "Makeconf")` in R), and `SHLIB_OPENMP_CXXFLAGS` is not set by default!! After trying, it seems that the first one has highest piority (surprise!). So, **add** or **change** `~/.R/Makevars` to the following lines: From c70a73f38de92e6a93f7cb9e1725530e32943477 Mon Sep 17 00:00:00 2001 From: Ajinkya Kale Date: Wed, 1 Jul 2015 22:35:41 -0700 Subject: [PATCH 24/59] fixing some typos --- demo/binary_classification/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/demo/binary_classification/README.md b/demo/binary_classification/README.md index 02c06e550..8d1e5e2a5 100644 --- a/demo/binary_classification/README.md +++ b/demo/binary_classification/README.md @@ -147,7 +147,7 @@ Run the command again, we can find the log file becomes ``` The rule is eval[name-printed-in-log] = filename, then the file will be added to monitoring process, and evaluated each round. -xgboost also support monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes +xgboost also supports monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes ``` [0] test-error:0.016139 test-negllik:0.029795 trainname-error:0.014433 trainname-negllik:0.027023 [1] test-error:0.000000 test-negllik:0.000000 trainname-error:0.001228 trainname-negllik:0.002457 @@ -166,7 +166,7 @@ When you are working with a large dataset, you may want to take advantage of par #### Additional Notes * What are ```agaricus.txt.test.buffer``` and ```agaricus.txt.train.buffer``` generated during runexp.sh? - - By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. When next time you run xgboost, it detects i -Demonstrating how to use XGBoost accomplish binary classification tasks on UCI mushroom dataset http://archive.ics.uci.edu/ml/datasets/Mushroom + - By default xgboost will automatically generate a binary format buffer of input data, with suffix ```buffer```. Next time when you run xgboost, it will detects these binary files. + From 0162bb7034f224dfcb3ecd290b2c7d2ad316fe86 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 18:31:52 -0700 Subject: [PATCH 25/59] lint half way --- src/gbm/gblinear-inl.hpp | 17 +-- src/gbm/gbm.cpp | 1 + src/gbm/gbm.h | 27 ++-- src/gbm/gbtree-inl.hpp | 46 +++---- src/io/io.h | 9 +- src/tree/model.h | 80 ++++++------ src/tree/param.h | 49 ++++---- src/tree/updater.cpp | 1 + src/tree/updater.h | 20 +-- src/tree/updater_basemaker-inl.hpp | 77 +++++++----- src/tree/updater_colmaker-inl.hpp | 141 +++++++++++++-------- src/tree/updater_distcol-inl.hpp | 32 +++-- src/tree/updater_histmaker-inl.hpp | 92 +++++++------- src/tree/updater_prune-inl.hpp | 15 ++- src/tree/updater_refresh-inl.hpp | 12 +- src/tree/updater_skmaker-inl.hpp | 59 +++++---- src/tree/updater_sync-inl.hpp | 13 +- src/utils/config.h | 32 ++--- src/utils/group_data.h | 20 +-- src/utils/io.h | 26 ++-- src/utils/thread.h | 195 +++++++++++++++++++++-------- 21 files changed, 573 insertions(+), 391 deletions(-) diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp index 3d2f36f5f..17d90e556 100644 --- a/src/gbm/gblinear-inl.hpp +++ b/src/gbm/gblinear-inl.hpp @@ -1,11 +1,13 @@ -#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_ -#define XGBOOST_GBM_GBLINEAR_INL_HPP_ /*! + * Copyright by Contributors * \file gblinear-inl.hpp * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net * the update rule is parallel coordinate descent (shotgun) * \author Tianqi Chen */ +#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_ +#define XGBOOST_GBM_GBLINEAR_INL_HPP_ + #include #include #include @@ -33,10 +35,10 @@ class GBLinear : public IGradBooster { model.param.SetParam(name, val); } } - virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { + virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*) model.LoadModel(fi); } - virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { + virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*) model.SaveModel(fo); } virtual void InitModel(void) { @@ -92,7 +94,8 @@ class GBLinear : public IGradBooster { sum_hess += p.hess * v * v; } float &w = model[fid][gid]; - bst_float dw = static_cast(param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w)); + bst_float dw = static_cast(param.learning_rate * + param.CalcDelta(sum_grad, sum_hess, w)); w += dw; // update grad value for (bst_uint j = 0; j < col.length; ++j) { @@ -258,12 +261,12 @@ class GBLinear : public IGradBooster { std::fill(weight.begin(), weight.end(), 0.0f); } // save the model to file - inline void SaveModel(utils::IStream &fo) const { + inline void SaveModel(utils::IStream &fo) const { // NOLINT(*) fo.Write(¶m, sizeof(Param)); fo.Write(weight); } // load model from file - inline void LoadModel(utils::IStream &fi) { + inline void LoadModel(utils::IStream &fi) { // NOLINT(*) utils::Assert(fi.Read(¶m, sizeof(Param)) != 0, "Load LinearBooster"); fi.Read(&weight); } diff --git a/src/gbm/gbm.cpp b/src/gbm/gbm.cpp index fe8d778e4..13ad44c57 100644 --- a/src/gbm/gbm.cpp +++ b/src/gbm/gbm.cpp @@ -1,3 +1,4 @@ +// Copyright by Contributors #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h index f07d277ac..60b7474e1 100644 --- a/src/gbm/gbm.h +++ b/src/gbm/gbm.h @@ -1,11 +1,14 @@ -#ifndef XGBOOST_GBM_GBM_H_ -#define XGBOOST_GBM_GBM_H_ /*! + * Copyright by Contributors * \file gbm.h * \brief interface of gradient booster, that learns through gradient statistics * \author Tianqi Chen */ +#ifndef XGBOOST_GBM_GBM_H_ +#define XGBOOST_GBM_GBM_H_ + #include +#include #include "../data.h" #include "../utils/io.h" #include "../utils/fmap.h" @@ -13,7 +16,7 @@ namespace xgboost { /*! \brief namespace for gradient booster */ namespace gbm { -/*! +/*! * \brief interface of gradient boosting model */ class IGradBooster { @@ -29,26 +32,26 @@ class IGradBooster { * \param fi input stream * \param with_pbuffer whether the incoming data contains pbuffer */ - virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0; + virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0; // NOLINT(*) /*! * \brief save model to stream * \param fo output stream * \param with_pbuffer whether save out pbuffer */ - virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0; + virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0; // NOLINT(*) /*! * \brief initialize the model */ virtual void InitModel(void) = 0; - /*! + /*! * \brief reset the predict buffer * this will invalidate all the previous cached results * and recalculate from scratch */ virtual void ResetPredBuffer(size_t num_pbuffer) {} - /*! + /*! * \brief whether the model allow lazy checkpoint - * return true if model is only updated in DoBoost + * return true if model is only updated in DoBoost * after all Allreduce calls */ virtual bool AllowLazyCheckPoint(void) const { @@ -76,20 +79,20 @@ class IGradBooster { * the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size") * \param info extra side information that may be needed for prediction * \param out_preds output vector to hold the predictions - * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means + * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means * we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear */ virtual void Predict(IFMatrix *p_fmat, int64_t buffer_offset, const BoosterInfo &info, std::vector *out_preds, - unsigned ntree_limit = 0) = 0; + unsigned ntree_limit = 0) = 0; /*! * \brief online prediction funciton, predict score for one instance at a time * NOTE: use the batch prediction interface if possible, batch prediction is usually * more efficient than online prediction * This function is NOT threadsafe, make sure you only call from one thread - * + * * \param inst the instance you want to predict * \param out_preds output vector to hold the predictions * \param ntree_limit limit the number of trees used in prediction @@ -106,7 +109,7 @@ class IGradBooster { * \param p_fmat feature matrix * \param info extra side information that may be needed for prediction * \param out_preds output vector to hold the predictions - * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means + * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means * we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear */ virtual void PredictLeaf(IFMatrix *p_fmat, diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp index c868c302a..9335ef8e7 100644 --- a/src/gbm/gbtree-inl.hpp +++ b/src/gbm/gbtree-inl.hpp @@ -1,13 +1,16 @@ -#ifndef XGBOOST_GBM_GBTREE_INL_HPP_ -#define XGBOOST_GBM_GBTREE_INL_HPP_ /*! + * Copyright by Contributors * \file gbtree-inl.hpp * \brief gradient boosted tree implementation * \author Tianqi Chen */ +#ifndef XGBOOST_GBM_GBTREE_INL_HPP_ +#define XGBOOST_GBM_GBTREE_INL_HPP_ + #include #include #include +#include #include "./gbm.h" #include "../utils/omp.h" #include "../tree/updater.h" @@ -39,7 +42,7 @@ class GBTree : public IGradBooster { tparam.SetParam(name, val); if (trees.size() == 0) mparam.SetParam(name, val); } - virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { + virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*) this->Clear(); utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0, "GBTree: invalid model file"); @@ -62,10 +65,10 @@ class GBTree : public IGradBooster { "GBTree: invalid model file"); } } - virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { + virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*) utils::Assert(mparam.num_trees == static_cast(trees.size()), "GBTree"); if (with_pbuffer) { - fo.Write(&mparam, sizeof(ModelParam)); + fo.Write(&mparam, sizeof(ModelParam)); } else { ModelParam p = mparam; p.num_pbuffer = 0; @@ -129,7 +132,7 @@ class GBTree : public IGradBooster { int64_t buffer_offset, const BoosterInfo &info, std::vector *out_preds, - unsigned ntree_limit = 0) { + unsigned ntree_limit = 0) { int nthread; #pragma omp parallel { @@ -160,12 +163,12 @@ class GBTree : public IGradBooster { this->Pred(batch[i], buffer_offset < 0 ? -1 : buffer_offset + ridx, gid, info.GetRoot(ridx), &feats, - &preds[ridx * mparam.num_output_group + gid], stride, + &preds[ridx * mparam.num_output_group + gid], stride, ntree_limit); } } } - } + } virtual void Predict(const SparseBatch::Inst &inst, std::vector *out_preds, unsigned ntree_limit, @@ -178,10 +181,10 @@ class GBTree : public IGradBooster { // loop over output groups for (int gid = 0; gid < mparam.num_output_group; ++gid) { this->Pred(inst, -1, gid, root_index, &thread_temp[0], - &(*out_preds)[gid], mparam.num_output_group, + &(*out_preds)[gid], mparam.num_output_group, ntree_limit); } - } + } virtual void PredictLeaf(IFMatrix *p_fmat, const BoosterInfo &info, std::vector *out_preds, @@ -196,7 +199,6 @@ class GBTree : public IGradBooster { thread_temp[i].Init(mparam.num_feature); } this->PredPath(p_fmat, info, out_preds, ntree_limit); - } virtual std::vector DumpModel(const utils::FeatMap& fmap, int option) { std::vector dump; @@ -260,7 +262,7 @@ class GBTree : public IGradBooster { // update the trees for (size_t i = 0; i < updaters.size(); ++i) { updaters[i]->Update(gpair, p_fmat, info, new_trees); - } + } // optimization, update buffer, if possible // this is only under distributed column mode // for safety check of lazy checkpoint @@ -287,7 +289,7 @@ class GBTree : public IGradBooster { } // update buffer by pre-cached position inline void UpdateBufferByPosition(IFMatrix *p_fmat, - int64_t buffer_offset, + int64_t buffer_offset, int bst_group, const tree::RegTree &new_tree, const int* leaf_position) { @@ -313,11 +315,11 @@ class GBTree : public IGradBooster { int bst_group, unsigned root_index, tree::RegTree::FVec *p_feats, - float *out_pred, size_t stride, + float *out_pred, size_t stride, unsigned ntree_limit) { size_t itop = 0; float psum = 0.0f; - // sum of leaf vector + // sum of leaf vector std::vector vec_psum(mparam.size_leaf_vector, 0.0f); const int64_t bid = mparam.BufferOffset(buffer_index, bst_group); // number of valid trees @@ -339,7 +341,7 @@ class GBTree : public IGradBooster { for (int j = 0; j < mparam.size_leaf_vector; ++j) { vec_psum[j] += trees[i]->leafvec(tid)[j]; } - if(--treeleft == 0) break; + if (--treeleft == 0) break; } } p_feats->Drop(inst); @@ -365,7 +367,7 @@ class GBTree : public IGradBooster { // number of valid trees if (ntree_limit == 0 || ntree_limit > trees.size()) { ntree_limit = static_cast(trees.size()); - } + } std::vector &preds = *out_preds; preds.resize(info.num_row * ntree_limit); // start collecting the prediction @@ -389,7 +391,7 @@ class GBTree : public IGradBooster { } } } - + // --- data structure --- /*! \brief training parameters */ struct TrainParam { @@ -442,10 +444,10 @@ class GBTree : public IGradBooster { int num_feature; /*! \brief size of predicton buffer allocated used for buffering */ int64_t num_pbuffer; - /*! + /*! * \brief how many output group a single instance can produce * this affects the behavior of number of output we have: - * suppose we have n instance and k group, output will be k*n + * suppose we have n instance and k group, output will be k*n */ int num_output_group; /*! \brief size of leaf vector needed in tree */ @@ -478,8 +480,8 @@ class GBTree : public IGradBooster { inline size_t PredBufferSize(void) const { return num_output_group * num_pbuffer * (size_leaf_vector + 1); } - /*! - * \brief get the buffer offset given a buffer index and group id + /*! + * \brief get the buffer offset given a buffer index and group id * \return calculated buffer offset */ inline int64_t BufferOffset(int64_t buffer_index, int bst_group) const { diff --git a/src/io/io.h b/src/io/io.h index ed075977c..267bb0bff 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -1,11 +1,13 @@ -#ifndef XGBOOST_IO_IO_H_ -#define XGBOOST_IO_IO_H_ /*! + * Copyright 2014 by Contributors * \file io.h * \brief handles input data format of xgboost * I/O module handles a specific DMatrix format * \author Tianqi Chen */ +#ifndef XGBOOST_IO_IO_H_ +#define XGBOOST_IO_IO_H_ + #include "../data.h" #include "../learner/dmatrix.h" @@ -32,7 +34,7 @@ DataMatrix* LoadDataMatrix(const char *fname, bool loadsplit, const char *cache_file = NULL); /*! - * \brief save DataMatrix into stream, + * \brief save DataMatrix into stream, * note: the saved dmatrix format may not be in exactly same as input * SaveDMatrix will choose the best way to materialize the dmatrix. * \param dmat the dmatrix to be saved @@ -40,7 +42,6 @@ DataMatrix* LoadDataMatrix(const char *fname, * \param silent whether print message during saving */ void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false); - } // namespace io } // namespace xgboost #endif // XGBOOST_IO_IO_H_ diff --git a/src/tree/model.h b/src/tree/model.h index 4eea34911..6a22aa5f1 100644 --- a/src/tree/model.h +++ b/src/tree/model.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_MODEL_H_ -#define XGBOOST_TREE_MODEL_H_ /*! + * Copyright 2014 by Contributors * \file model.h * \brief model structure for tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_MODEL_H_ +#define XGBOOST_TREE_MODEL_H_ + #include #include #include @@ -19,7 +21,7 @@ namespace xgboost { namespace tree { /*! - * \brief template class of TreeModel + * \brief template class of TreeModel * \tparam TSplitCond data type to indicate split condition * \tparam TNodeStat auxiliary statistics of node to help tree building */ @@ -42,7 +44,7 @@ class TreeModel { int max_depth; /*! \brief number of features used for tree construction */ int num_feature; - /*! + /*! * \brief leaf vector size, used for vector tree * used to store more than one dimensional information in tree */ @@ -55,8 +57,8 @@ class TreeModel { size_leaf_vector = 0; std::memset(reserved, 0, sizeof(reserved)); } - /*! - * \brief set parameters from outside + /*! + * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter */ @@ -70,7 +72,7 @@ class TreeModel { /*! \brief tree node */ class Node { public: - Node(void) : sindex_(0) {} + Node(void) : sindex_(0) {} /*! \brief index of left child */ inline int cleft(void) const { return this->cleft_; @@ -119,15 +121,15 @@ class TreeModel { inline bool is_root(void) const { return parent_ == -1; } - /*! - * \brief set the right child + /*! + * \brief set the right child * \param nide node id to right child */ inline void set_right_child(int nid) { this->cright_ = nid; } - /*! - * \brief set split condition of current node + /*! + * \brief set split condition of current node * \param split_index feature index to split * \param split_cond split condition * \param default_left the default direction when feature is unknown @@ -138,10 +140,10 @@ class TreeModel { this->sindex_ = split_index; (this->info_).split_cond = split_cond; } - /*! + /*! * \brief set the leaf value of the node * \param value leaf value - * \param right right index, could be used to store + * \param right right index, could be used to store * additional information */ inline void set_leaf(float value, int right = -1) { @@ -153,12 +155,12 @@ class TreeModel { inline void mark_delete(void) { this->sindex_ = std::numeric_limits::max(); } - + private: friend class TreeModel; - /*! - * \brief in leaf node, we have weights, in non-leaf nodes, - * we have split condition + /*! + * \brief in leaf node, we have weights, in non-leaf nodes, + * we have split condition */ union Info{ float leaf_value; @@ -203,7 +205,7 @@ class TreeModel { "number of nodes in the tree exceed 2^31"); nodes.resize(param.num_nodes); stats.resize(param.num_nodes); - leaf_vector.resize(param.num_nodes * param.size_leaf_vector); + leaf_vector.resize(param.num_nodes * param.size_leaf_vector); return nd; } // delete a tree node, keep the parent field to allow trace back @@ -215,7 +217,7 @@ class TreeModel { } public: - /*! + /*! * \brief change a non leaf node to a leaf node, delete its children * \param rid node id of the node * \param new leaf value @@ -229,7 +231,7 @@ class TreeModel { this->DeleteNode(nodes[rid].cright()); nodes[rid].set_leaf(value); } - /*! + /*! * \brief collapse a non leaf node to a leaf node, delete its children * \param rid node id of the node * \param new leaf value @@ -273,7 +275,7 @@ class TreeModel { return &leaf_vector[nid * param.size_leaf_vector]; } /*! \brief get leaf vector given nid */ - inline const bst_float* leafvec(int nid) const{ + inline const bst_float* leafvec(int nid) const { if (leaf_vector.size() == 0) return NULL; return &leaf_vector[nid * param.size_leaf_vector]; } @@ -288,15 +290,15 @@ class TreeModel { nodes[i].set_parent(-1); } } - /*! + /*! * \brief load model from stream * \param fi input stream */ - inline void LoadModel(utils::IStream &fi) { + inline void LoadModel(utils::IStream &fi) { // NOLINT(*) utils::Check(fi.Read(¶m, sizeof(Param)) > 0, "TreeModel: wrong format"); nodes.resize(param.num_nodes); stats.resize(param.num_nodes); - utils::Assert(param.num_nodes != 0, "invalid model"); + utils::Assert(param.num_nodes != 0, "invalid model"); utils::Check(fi.Read(BeginPtr(nodes), sizeof(Node) * nodes.size()) > 0, "TreeModel: wrong format"); utils::Check(fi.Read(BeginPtr(stats), sizeof(NodeStat) * stats.size()) > 0, @@ -313,22 +315,22 @@ class TreeModel { "number of deleted nodes do not match, num_deleted=%d, dnsize=%lu, num_nodes=%d", param.num_deleted, deleted_nodes.size(), param.num_nodes); } - /*! + /*! * \brief save model to stream * \param fo output stream */ - inline void SaveModel(utils::IStream &fo) const { + inline void SaveModel(utils::IStream &fo) const { // NOLINT(*) utils::Assert(param.num_nodes == static_cast(nodes.size()), "Tree::SaveModel"); utils::Assert(param.num_nodes == static_cast(stats.size()), "Tree::SaveModel"); fo.Write(¶m, sizeof(Param)); - utils::Assert(param.num_nodes != 0, "invalid model"); + utils::Assert(param.num_nodes != 0, "invalid model"); fo.Write(BeginPtr(nodes), sizeof(Node) * nodes.size()); fo.Write(BeginPtr(stats), sizeof(NodeStat) * nodes.size()); if (param.size_leaf_vector != 0) fo.Write(leaf_vector); } - /*! + /*! * \brief add child nodes to node * \param nid node id to add childs */ @@ -340,8 +342,8 @@ class TreeModel { nodes[nodes[nid].cleft() ].set_parent(nid, true); nodes[nodes[nid].cright()].set_parent(nid, false); } - /*! - * \brief only add a right child to a leaf node + /*! + * \brief only add a right child to a leaf node * \param node id to add right child */ inline void AddRightChild(int nid) { @@ -385,7 +387,7 @@ class TreeModel { inline int num_extra_nodes(void) const { return param.num_nodes - param.num_roots - param.num_deleted; } - /*! + /*! * \brief dump model to text string * \param fmap feature map of feature types * \param with_stats whether dump out statistics as well @@ -400,7 +402,7 @@ class TreeModel { } private: - void Dump(int nid, std::stringstream &fo, + void Dump(int nid, std::stringstream &fo, // NOLINT(*) const utils::FeatMap& fmap, int depth, bool with_stats) { for (int i = 0; i < depth; ++i) { fo << '\t'; @@ -469,7 +471,7 @@ struct RTreeNodeStat { /*! \brief number of child that is leaf node known up to now */ int leaf_child_cnt; /*! \brief print information of current stats to fo */ - inline void Print(std::stringstream &fo, bool is_leaf) const { + inline void Print(std::stringstream &fo, bool is_leaf) const { // NOLINT(*) if (!is_leaf) { fo << ",gain=" << loss_chg << ",cover=" << sum_hess; } else { @@ -481,13 +483,13 @@ struct RTreeNodeStat { /*! \brief define regression tree to be the most common tree model */ class RegTree: public TreeModel{ public: - /*! + /*! * \brief dense feature vector that can be taken by RegTree * to do tranverse efficiently * and can be construct from sparse feature vector */ struct FVec { - /*! + /*! * \brief a union value of value and flag * when flag == -1, this indicate the value is missing */ @@ -510,7 +512,7 @@ class RegTree: public TreeModel{ } } /*! \brief drop the trace after fill, must be called after fill */ - inline void Drop(const RowBatch::Inst &inst) { + inline void Drop(const RowBatch::Inst &inst) { for (bst_uint i = 0; i < inst.length; ++i) { if (inst[i].index >= data.size()) continue; data[inst[i].index].flag = -1; @@ -526,10 +528,10 @@ class RegTree: public TreeModel{ } }; /*! - * \brief get the leaf index + * \brief get the leaf index * \param feats dense feature vector, if the feature is missing the field is set to NaN * \param root_gid starting root index of the instance - * \return the leaf index of the given feature + * \return the leaf index of the given feature */ inline int GetLeafIndex(const FVec&feat, unsigned root_id = 0) const { // start from groups that belongs to current data @@ -545,7 +547,7 @@ class RegTree: public TreeModel{ * \brief get the prediction of regression tree, only accepts dense feature vector * \param feats dense feature vector, if the feature is missing the field is set to NaN * \param root_gid starting root index of the instance - * \return the leaf index of the given feature + * \return the leaf index of the given feature */ inline float Predict(const FVec &feat, unsigned root_id = 0) const { int pid = this->GetLeafIndex(feat, root_id); diff --git a/src/tree/param.h b/src/tree/param.h index 20ba1e6c0..f06365a17 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -1,10 +1,13 @@ -#ifndef XGBOOST_TREE_PARAM_H_ -#define XGBOOST_TREE_PARAM_H_ /*! + * Copyright 2014 by Contributors * \file param.h * \brief training parameters, statistics used to support tree construction * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_PARAM_H_ +#define XGBOOST_TREE_PARAM_H_ + +#include #include #include "../data.h" @@ -27,7 +30,7 @@ struct TrainParam{ // L1 regularization factor float reg_alpha; // default direction choice - int default_direction; + int default_direction; // maximum delta update we can add in weight estimation // this parameter can be used to stablize update // default=0 means no constraint on weight delta @@ -45,7 +48,7 @@ struct TrainParam{ // accuracy of sketch float sketch_ratio; // leaf vector size - int size_leaf_vector; + int size_leaf_vector; // option for parallelization int parallel_option; // option to open cacheline optimizaton @@ -74,11 +77,11 @@ struct TrainParam{ sketch_ratio = 2.0f; cache_opt = 1; } - /*! - * \brief set parameters from outside + /*! + * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter - */ + */ inline void SetParam(const char *name, const char *val) { using namespace std; // sync-names @@ -116,7 +119,7 @@ struct TrainParam{ if (reg_alpha == 0.0f) { return Sqr(sum_grad) / (sum_hess + reg_lambda); } else { - return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda); + return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda); } } else { double w = CalcWeight(sum_grad, sum_hess); @@ -213,7 +216,7 @@ struct GradStats { inline static void CheckInfo(const BoosterInfo &info) { } /*! - * \brief accumulate statistics + * \brief accumulate statistics * \param p the gradient pair */ inline void Add(bst_gpair p) { @@ -222,7 +225,7 @@ struct GradStats { /*! * \brief accumulate statistics, more complicated version * \param gpair the vector storing the gradient statistics - * \param info the additional information + * \param info the additional information * \param ridx instance index of this instance */ inline void Add(const std::vector &gpair, @@ -244,7 +247,7 @@ struct GradStats { this->Add(b.sum_grad, b.sum_hess); } /*! \brief same as add, reduce is used in All Reduce */ - inline static void Reduce(GradStats &a, const GradStats &b) { + inline static void Reduce(GradStats &a, const GradStats &b) { // NOLINT(*) a.Add(b); } /*! \brief set current value to a - b */ @@ -257,8 +260,8 @@ struct GradStats { return sum_hess == 0.0; } /*! \brief set leaf vector value based on statistics */ - inline void SetLeafVec(const TrainParam ¶m, bst_float *vec) const{ - } + inline void SetLeafVec(const TrainParam ¶m, bst_float *vec) const { + } // constructor to allow inheritance GradStats(void) {} /*! \brief add statistics to the data */ @@ -311,7 +314,7 @@ struct CVGradStats : public GradStats { ret += param.CalcGain(train[i].sum_grad, train[i].sum_hess, vsize * valid[i].sum_grad, - vsize * valid[i].sum_hess); + vsize * valid[i].sum_hess); } return ret / vsize; } @@ -324,7 +327,7 @@ struct CVGradStats : public GradStats { } } /*! \brief same as add, reduce is used in All Reduce */ - inline static void Reduce(CVGradStats &a, const CVGradStats &b) { + inline static void Reduce(CVGradStats &a, const CVGradStats &b) { // NOLINT(*) a.Add(b); } /*! \brief set current value to a - b */ @@ -344,8 +347,8 @@ struct CVGradStats : public GradStats { } }; -/*! - * \brief statistics that is helpful to store +/*! + * \brief statistics that is helpful to store * and represent a split solution for the tree */ struct SplitEntry{ @@ -357,12 +360,12 @@ struct SplitEntry{ float split_value; /*! \brief constructor */ SplitEntry(void) : loss_chg(0.0f), sindex(0), split_value(0.0f) {} - /*! - * \brief decides whether a we can replace current entry with the statistics given + /*! + * \brief decides whether a we can replace current entry with the statistics given * This function gives better priority to lower index when loss_chg equals * not the best way, but helps to give consistent result during multi-thread execution * \param loss_chg the loss reduction get through the split - * \param split_index the feature index where the split is on + * \param split_index the feature index where the split is on */ inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { if (this->split_index() <= split_index) { @@ -371,7 +374,7 @@ struct SplitEntry{ return !(this->loss_chg > new_loss_chg); } } - /*! + /*! * \brief update the split entry, replace it if e is better * \param e candidate split solution * \return whether the proposed split is better and can replace current split @@ -386,7 +389,7 @@ struct SplitEntry{ return false; } } - /*! + /*! * \brief update the split entry, replace it if e is better * \param loss_chg loss reduction of new candidate * \param split_index feature index to split on @@ -407,7 +410,7 @@ struct SplitEntry{ } } /*! \brief same as update, used by AllReduce*/ - inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { + inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { // NOLINT(*) dst.Update(src); } /*!\return feature index to split on */ diff --git a/src/tree/updater.cpp b/src/tree/updater.cpp index 5d2e99820..eb2e06925 100644 --- a/src/tree/updater.cpp +++ b/src/tree/updater.cpp @@ -1,3 +1,4 @@ +// Copyright 2014 by Contributors #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX diff --git a/src/tree/updater.h b/src/tree/updater.h index 4ced21e5e..1cf74a699 100644 --- a/src/tree/updater.h +++ b/src/tree/updater.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_H_ -#define XGBOOST_TREE_UPDATER_H_ /*! + * Copyright 2014 by Contributors * \file updater.h * \brief interface to update the tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_H_ +#define XGBOOST_TREE_UPDATER_H_ + #include #include "../data.h" @@ -12,7 +14,7 @@ namespace xgboost { namespace tree { -/*! +/*! * \brief interface of tree update module, that performs update of a tree */ class IUpdater { @@ -21,7 +23,7 @@ class IUpdater { * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter - */ + */ virtual void SetParam(const char *name, const char *val) = 0; /*! * \brief peform update to the tree models @@ -29,8 +31,8 @@ class IUpdater { * \param p_fmat feature matrix that provide access to features * \param info extra side information that may be need, such as root index * \param trees pointer to the trees to be updated, upater will change the content of the tree - * note: all the trees in the vector are updated, with the same statistics, - * but maybe different random seeds, usually one tree is passed in at a time, + * note: all the trees in the vector are updated, with the same statistics, + * but maybe different random seeds, usually one tree is passed in at a time, * there can be multiple trees when we train random forest style model */ virtual void Update(const std::vector &gpair, @@ -38,7 +40,7 @@ class IUpdater { const BoosterInfo &info, const std::vector &trees) = 0; - /*! + /*! * \brief this is simply a function for optimizing performance * this function asks the updater to return the leaf position of each instance in the p_fmat, * if it is cached in the updater, if it is not available, return NULL @@ -50,8 +52,8 @@ class IUpdater { // destructor virtual ~IUpdater(void) {} }; -/*! - * \brief create a updater based on name +/*! + * \brief create a updater based on name * \param name name of updater * \return return the updater instance */ diff --git a/src/tree/updater_basemaker-inl.hpp b/src/tree/updater_basemaker-inl.hpp index f144ae199..6204c47b7 100644 --- a/src/tree/updater_basemaker-inl.hpp +++ b/src/tree/updater_basemaker-inl.hpp @@ -1,12 +1,14 @@ -#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ -#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_basemaker-inl.hpp * \brief implement a common tree constructor * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ +#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ #include #include +#include #include #include "../sync/sync.h" #include "../utils/random.h" @@ -14,7 +16,7 @@ namespace xgboost { namespace tree { -/*! +/*! * \brief base tree maker class that defines common operation * needed in tree making */ @@ -26,7 +28,7 @@ class BaseMaker: public IUpdater { virtual void SetParam(const char *name, const char *val) { param.SetParam(name, val); } - + protected: // helper to collect and query feature meta information struct FMetaHelper { @@ -60,8 +62,11 @@ class BaseMaker: public IUpdater { bst_float a = fminmax[fid * 2]; bst_float b = fminmax[fid * 2 + 1]; if (a == -std::numeric_limits::max()) return 0; - if (-a == b) return 1; - else return 2; + if (-a == b) { + return 1; + } else { + return 2; + } } inline bst_float MaxValue(bst_uint fid) const { return fminmax[fid *2 + 1]; @@ -70,7 +75,7 @@ class BaseMaker: public IUpdater { std::vector &findex = *p_findex; findex.clear(); for (size_t i = 0; i < fminmax.size(); i += 2) { - const bst_uint fid = static_cast(i / 2); + const bst_uint fid = static_cast(i / 2); if (this->Type(fid) != 0) findex.push_back(fid); } unsigned n = static_cast(p * findex.size()); @@ -86,7 +91,7 @@ class BaseMaker: public IUpdater { rabit::Broadcast(&s_cache, 0); fs.Read(&findex); } - + private: std::vector fminmax; }; @@ -116,7 +121,7 @@ class BaseMaker: public IUpdater { } return nthread; } - // ------class member helpers--------- + // ------class member helpers--------- /*! \brief initialize temp data structure */ inline void InitData(const std::vector &gpair, const IFMatrix &fmat, @@ -124,7 +129,8 @@ class BaseMaker: public IUpdater { const RegTree &tree) { utils::Assert(tree.param.num_nodes == tree.param.num_roots, "TreeMaker: can only grow new tree"); - {// setup position + { + // setup position position.resize(gpair.size()); if (root_index.size() == 0) { std::fill(position.begin(), position.end(), 0); @@ -147,7 +153,8 @@ class BaseMaker: public IUpdater { } } } - {// expand query + { + // expand query qexpand.reserve(256); qexpand.clear(); for (int i = 0; i < tree.param.num_roots; ++i) { qexpand.push_back(i); @@ -170,7 +177,7 @@ class BaseMaker: public IUpdater { this->UpdateNode2WorkIndex(tree); } // return decoded position - inline int DecodePosition(bst_uint ridx) const{ + inline int DecodePosition(bst_uint ridx) const { const int pid = position[ridx]; return pid < 0 ? ~pid : pid; } @@ -182,23 +189,24 @@ class BaseMaker: public IUpdater { position[ridx] = nid; } } - /*! + /*! * \brief this is helper function uses column based data structure, * reset the positions to the lastest one * \param nodes the set of nodes that contains the split to be used * \param p_fmat feature matrix needed for tree construction * \param tree the regression tree structure */ - inline void ResetPositionCol(const std::vector &nodes, IFMatrix *p_fmat, const RegTree &tree) { + inline void ResetPositionCol(const std::vector &nodes, + IFMatrix *p_fmat, const RegTree &tree) { // set the positions in the nondefault this->SetNonDefaultPositionCol(nodes, p_fmat, tree); // set rest of instances to default position const std::vector &rowset = p_fmat->buffered_rowset(); // set default direct nodes to default - // for leaf nodes that are not fresh, mark then to ~nid, + // for leaf nodes that are not fresh, mark then to ~nid, // so that they are ignored in future statistics collection const bst_omp_uint ndata = static_cast(rowset.size()); - + #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; @@ -237,7 +245,7 @@ class BaseMaker: public IUpdater { } std::sort(fsplits.begin(), fsplits.end()); fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); - + utils::IIterator *iter = p_fmat->ColIterator(fsplits); while (iter->Next()) { const ColBatch &batch = iter->Value(); @@ -252,7 +260,7 @@ class BaseMaker: public IUpdater { const int nid = this->DecodePosition(ridx); // go back to parent, correct those who are not default if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { - if(fvalue < tree[nid].split_cond()) { + if (fvalue < tree[nid].split_cond()) { this->SetEncodePosition(ridx, tree[nid].cleft()); } else { this->SetEncodePosition(ridx, tree[nid].cright()); @@ -324,7 +332,7 @@ class BaseMaker: public IUpdater { sketch->temp.size = 0; } /*! - * \brief push a new element to sketch + * \brief push a new element to sketch * \param fvalue feature value, comes in sorted ascending order * \param w weight * \param max_size @@ -337,31 +345,32 @@ class BaseMaker: public IUpdater { return; } if (last_fvalue != fvalue) { - double rmax = rmin + wmin; + double rmax = rmin + wmin; if (rmax >= next_goal && sketch->temp.size != max_size) { - if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) { + if (sketch->temp.size == 0 || + last_fvalue > sketch->temp.data[sketch->temp.size-1].value) { // push to sketch sketch->temp.data[sketch->temp.size] = utils::WXQuantileSketch:: Entry(static_cast(rmin), - static_cast(rmax), - static_cast(wmin), last_fvalue); + static_cast(rmax), + static_cast(wmin), last_fvalue); utils::Assert(sketch->temp.size < max_size, "invalid maximum size max_size=%u, stemp.size=%lu\n", max_size, sketch->temp.size); ++sketch->temp.size; } if (sketch->temp.size == max_size) { - next_goal = sum_total * 2.0f + 1e-5f; - } else{ + next_goal = sum_total * 2.0f + 1e-5f; + } else { next_goal = static_cast(sketch->temp.size * sum_total / max_size); } } else { - if (rmax >= next_goal) { - rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n", - rmax, sum_total, next_goal, sketch->temp.size); - } - } + if (rmax >= next_goal) { + rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n", + rmax, sum_total, next_goal, sketch->temp.size); + } + } rmin = rmax; wmin = w; last_fvalue = fvalue; @@ -375,13 +384,13 @@ class BaseMaker: public IUpdater { if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) { utils::Assert(sketch->temp.size <= max_size, "Finalize: invalid maximum size, max_size=%u, stemp.size=%lu", - sketch->temp.size, max_size ); + sketch->temp.size, max_size); // push to sketch sketch->temp.data[sketch->temp.size] = utils::WXQuantileSketch:: Entry(static_cast(rmin), - static_cast(rmax), - static_cast(wmin), last_fvalue); + static_cast(rmax), + static_cast(wmin), last_fvalue); ++sketch->temp.size; } sketch->PushTemp(); @@ -415,4 +424,4 @@ class BaseMaker: public IUpdater { }; } // namespace tree } // namespace xgboost -#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ +#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp index db3581aac..e3070d495 100644 --- a/src/tree/updater_colmaker-inl.hpp +++ b/src/tree/updater_colmaker-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_ -#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_colmaker-inl.hpp * \brief use columnwise update to construct a tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_ +#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_ + #include #include #include @@ -114,10 +116,13 @@ class ColMaker: public IUpdater { // initialize temp data structure inline void InitData(const std::vector &gpair, const IFMatrix &fmat, - const std::vector &root_index, const RegTree &tree) { - utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree"); + const std::vector &root_index, + const RegTree &tree) { + utils::Assert(tree.param.num_nodes == tree.param.num_roots, + "ColMaker: can only grow new tree"); const std::vector &rowset = fmat.buffered_rowset(); - {// setup position + { + // setup position position.resize(gpair.size()); if (root_index.size() == 0) { for (size_t i = 0; i < rowset.size(); ++i) { @@ -127,7 +132,8 @@ class ColMaker: public IUpdater { for (size_t i = 0; i < rowset.size(); ++i) { const bst_uint ridx = rowset[i]; position[ridx] = root_index[ridx]; - utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots, "root index exceed setting"); + utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots, + "root index exceed setting"); } } // mark delete for the deleted datas @@ -154,11 +160,12 @@ class ColMaker: public IUpdater { } unsigned n = static_cast(param.colsample_bytree * feat_index.size()); random::Shuffle(feat_index); - //utils::Check(n > 0, "colsample_bytree is too small that no feature can be included"); - utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included", param.colsample_bytree); + utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included", + param.colsample_bytree); feat_index.resize(n); } - {// setup temp space for each thread + { + // setup temp space for each thread #pragma omp parallel { this->nthread = omp_get_num_threads(); @@ -171,20 +178,25 @@ class ColMaker: public IUpdater { } snode.reserve(256); } - {// expand query + { + // expand query qexpand_.reserve(256); qexpand_.clear(); for (int i = 0; i < tree.param.num_roots; ++i) { qexpand_.push_back(i); } } } - /*! \brief initialize the base_weight, root_gain, and NodeEntry for all the new nodes in qexpand */ + /*! + * \brief initialize the base_weight, root_gain, + * and NodeEntry for all the new nodes in qexpand + */ inline void InitNewNode(const std::vector &qexpand, const std::vector &gpair, const IFMatrix &fmat, const BoosterInfo &info, const RegTree &tree) { - {// setup statistics space for each tree node + { + // setup statistics space for each tree node for (size_t i = 0; i < stemp.size(); ++i) { stemp[i].resize(tree.param.num_nodes, ThreadEntry(param)); } @@ -226,7 +238,7 @@ class ColMaker: public IUpdater { } // use new nodes for qexpand qexpand = newnodes; - } + } // parallel find the best split of current fid // this function does not support nested functions inline void ParallelFindSplit(const ColBatch::Inst &col, @@ -280,26 +292,30 @@ class ColMaker: public IUpdater { ThreadEntry &e = stemp[tid][nid]; float fsplit; if (tid != 0) { - if(std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) { + if (std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) { fsplit = (stemp[tid - 1][nid].last_fvalue - e.first_fvalue) * 0.5f; } else { continue; } } else { fsplit = e.first_fvalue - rt_eps; - } + } if (need_forward && tid != 0) { c.SetSubstract(snode[nid].stats, e.stats); - if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + e.stats.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, fsplit, false); } } if (need_backward) { tmp.SetSubstract(sum, e.stats); c.SetSubstract(snode[nid].stats, tmp); - if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + tmp.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(tmp.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, fsplit, true); } } @@ -308,8 +324,10 @@ class ColMaker: public IUpdater { tmp = sum; ThreadEntry &e = stemp[nthread-1][nid]; c.SetSubstract(snode[nid].stats, tmp); - if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + tmp.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(tmp.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true); } } @@ -335,25 +353,31 @@ class ColMaker: public IUpdater { e.first_fvalue = fvalue; } else { // forward default right - if (std::abs(fvalue - e.first_fvalue) > rt_2eps){ - if (need_forward) { + if (std::abs(fvalue - e.first_fvalue) > rt_2eps) { + if (need_forward) { c.SetSubstract(snode[nid].stats, e.stats); - if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + e.stats.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - + snode[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false); } } if (need_backward) { cright.SetSubstract(e.stats_extra, e.stats); c.SetSubstract(snode[nid].stats, cright); - if (c.sum_hess >= param.min_child_weight && cright.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(cright.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + cright.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(cright.CalcGain(param) + + c.CalcGain(param) - + snode[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true); } } - } + } e.stats.Add(gpair, info, ridx); - e.first_fvalue = fvalue; + e.first_fvalue = fvalue; } } } @@ -361,7 +385,7 @@ class ColMaker: public IUpdater { // update enumeration solution inline void UpdateEnumeration(int nid, bst_gpair gstats, float fvalue, int d_step, bst_uint fid, - TStats &c, std::vector &temp) { + TStats &c, std::vector &temp) { // NOLINT(*) // get the statistics of nid ThreadEntry &e = temp[nid]; // test if first hit, this is fine, because we set 0 during init @@ -370,10 +394,12 @@ class ColMaker: public IUpdater { e.last_fvalue = fvalue; } else { // try to find a split - if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) { + if (std::abs(fvalue - e.last_fvalue) > rt_2eps && + e.stats.sum_hess >= param.min_child_weight) { c.SetSubstract(snode[nid].stats, e.stats); if (c.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1); } } @@ -388,7 +414,7 @@ class ColMaker: public IUpdater { int d_step, bst_uint fid, const std::vector &gpair, - std::vector &temp) { + std::vector &temp) { // NOLINT(*) const std::vector &qexpand = qexpand_; // clear all the temp statistics for (size_t j = 0; j < qexpand.size(); ++j) { @@ -423,7 +449,7 @@ class ColMaker: public IUpdater { this->UpdateEnumeration(nid, buf_gpair[i], p->fvalue, d_step, fid, c, temp); - } + } } // finish up the ending piece for (it = align_end, i = 0; it != end; ++i, it += d_step) { @@ -436,14 +462,15 @@ class ColMaker: public IUpdater { this->UpdateEnumeration(nid, buf_gpair[i], it->fvalue, d_step, fid, c, temp); - } + } // finish updating all statistics, check if it is possible to include all sum statistics for (size_t i = 0; i < qexpand.size(); ++i) { const int nid = qexpand[i]; ThreadEntry &e = temp[nid]; c.SetSubstract(snode[nid].stats, e.stats); if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); const float gap = std::abs(e.last_fvalue) + rt_eps; const float delta = d_step == +1 ? gap: -gap; e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1); @@ -458,7 +485,7 @@ class ColMaker: public IUpdater { bst_uint fid, const std::vector &gpair, const BoosterInfo &info, - std::vector &temp) { + std::vector &temp) { // NOLINT(*) // use cacheline aware optimization if (TStats::kSimpleStats != 0 && param.cache_opt != 0) { EnumerateSplitCacheOpt(begin, end, d_step, fid, gpair, temp); @@ -471,7 +498,7 @@ class ColMaker: public IUpdater { } // left statistics TStats c(param); - for(const ColBatch::Entry *it = begin; it != end; it += d_step) { + for (const ColBatch::Entry *it = begin; it != end; it += d_step) { const bst_uint ridx = it->index; const int nid = position[ridx]; if (nid < 0) continue; @@ -485,10 +512,12 @@ class ColMaker: public IUpdater { e.last_fvalue = fvalue; } else { // try to find a split - if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) { + if (std::abs(fvalue - e.last_fvalue) > rt_2eps && + e.stats.sum_hess >= param.min_child_weight) { c.SetSubstract(snode[nid].stats, e.stats); if (c.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1); } } @@ -503,7 +532,8 @@ class ColMaker: public IUpdater { ThreadEntry &e = temp[nid]; c.SetSubstract(snode[nid].stats, e.stats); if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); const float gap = std::abs(e.last_fvalue) + rt_eps; const float delta = d_step == +1 ? gap: -gap; e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1); @@ -511,14 +541,14 @@ class ColMaker: public IUpdater { } } - // update the solution candidate + // update the solution candidate virtual void UpdateSolution(const ColBatch &batch, const std::vector &gpair, const IFMatrix &fmat, const BoosterInfo &info) { // start enumeration const bst_omp_uint nsize = static_cast(batch.size); - #if defined(_OPENMP) + #if defined(_OPENMP) const int batch_size = std::max(static_cast(nsize / this->nthread / 32), 1); #endif int poption = param.parallel_option; @@ -533,11 +563,11 @@ class ColMaker: public IUpdater { const ColBatch::Inst c = batch[i]; const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue; if (param.need_forward_search(fmat.GetColDensity(fid), ind)) { - this->EnumerateSplit(c.data, c.data + c.length, +1, + this->EnumerateSplit(c.data, c.data + c.length, +1, fid, gpair, info, stemp[tid]); } if (param.need_backward_search(fmat.GetColDensity(fid), ind)) { - this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1, + this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1, fid, gpair, info, stemp[tid]); } } @@ -546,7 +576,7 @@ class ColMaker: public IUpdater { this->ParallelFindSplit(batch[i], batch.col_index[i], fmat, gpair, info); } - } + } } // find splits at current level, do split per level inline void FindSplit(int depth, @@ -571,7 +601,7 @@ class ColMaker: public IUpdater { // get the best result, we can synchronize the solution for (size_t i = 0; i < qexpand.size(); ++i) { const int nid = qexpand[i]; - NodeEntry &e = snode[nid]; + NodeEntry &e = snode[nid]; // now we know the solution in snode[nid], set split if (e.best.loss_chg > rt_eps) { p_tree->AddChilds(nid); @@ -582,19 +612,20 @@ class ColMaker: public IUpdater { } else { (*p_tree)[nid].set_leaf(e.weight * param.learning_rate); } - } + } } // reset position of each data points after split is created in the tree - inline void ResetPosition(const std::vector &qexpand, IFMatrix *p_fmat, const RegTree &tree) { + inline void ResetPosition(const std::vector &qexpand, + IFMatrix *p_fmat, const RegTree &tree) { // set the positions in the nondefault - this->SetNonDefaultPosition(qexpand, p_fmat, tree); + this->SetNonDefaultPosition(qexpand, p_fmat, tree); // set rest of instances to default position const std::vector &rowset = p_fmat->buffered_rowset(); // set default direct nodes to default - // for leaf nodes that are not fresh, mark then to ~nid, + // for leaf nodes that are not fresh, mark then to ~nid, // so that they are ignored in future statistics collection const bst_omp_uint ndata = static_cast(rowset.size()); - + #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; @@ -655,7 +686,7 @@ class ColMaker: public IUpdater { const float fvalue = col[j].fvalue; // go back to parent, correct those who are not default if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { - if(fvalue < tree[nid].split_cond()) { + if (fvalue < tree[nid].split_cond()) { this->SetEncodePosition(ridx, tree[nid].cleft()); } else { this->SetEncodePosition(ridx, tree[nid].cright()); @@ -667,7 +698,7 @@ class ColMaker: public IUpdater { } // utils to get/set position, with encoded format // return decoded position - inline int DecodePosition(bst_uint ridx) const{ + inline int DecodePosition(bst_uint ridx) const { const int pid = position[ridx]; return pid < 0 ? ~pid : pid; } @@ -679,7 +710,7 @@ class ColMaker: public IUpdater { position[ridx] = nid; } } - //--data fields-- + // --data fields-- const TrainParam ¶m; // number of omp thread used during training int nthread; diff --git a/src/tree/updater_distcol-inl.hpp b/src/tree/updater_distcol-inl.hpp index c989f4e47..e3d3f8b59 100644 --- a/src/tree/updater_distcol-inl.hpp +++ b/src/tree/updater_distcol-inl.hpp @@ -1,11 +1,15 @@ -#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ -#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_distcol-inl.hpp - * \brief beta distributed version that takes a sub-column + * \brief beta distributed version that takes a sub-column * and construct a tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ +#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ + +#include +#include #include "../sync/sync.h" #include "../utils/bitmap.h" #include "../utils/io.h" @@ -27,7 +31,7 @@ class DistColMaker : public ColMaker { virtual void Update(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, - const std::vector &trees) { + const std::vector &trees) { TStats::CheckInfo(info); utils::Check(trees.size() == 1, "DistColMaker: only support one tree at a time"); // build the tree @@ -39,11 +43,12 @@ class DistColMaker : public ColMaker { } virtual const int* GetLeafPosition(void) const { return builder.GetLeafPosition(); - } + } + private: struct Builder : public ColMaker::Builder { public: - Builder(const TrainParam ¶m) + explicit Builder(const TrainParam ¶m) : ColMaker::Builder(param) { } inline void UpdatePosition(IFMatrix *p_fmat, const RegTree &tree) { @@ -63,7 +68,8 @@ class DistColMaker : public ColMaker { virtual const int* GetLeafPosition(void) const { return BeginPtr(this->position); } - protected: + + protected: virtual void SetNonDefaultPosition(const std::vector &qexpand, IFMatrix *p_fmat, const RegTree &tree) { // step 2, classify the non-default data into right places @@ -87,7 +93,7 @@ class DistColMaker : public ColMaker { #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { boolmap[j] = 0; - } + } } utils::IIterator *iter = p_fmat->ColIterator(fsplits); while (iter->Next()) { @@ -111,7 +117,7 @@ class DistColMaker : public ColMaker { } } } - + bitmap.InitFromBool(boolmap); // communicate bitmap rabit::Allreduce(BeginPtr(bitmap.data), bitmap.data.size()); @@ -142,7 +148,7 @@ class DistColMaker : public ColMaker { } vec.push_back(this->snode[nid].best); } - // TODO, lazy version + // TODO(tqchen) lazy version // communicate best solution reducer.Allreduce(BeginPtr(vec), vec.size()); // assign solution back @@ -151,7 +157,7 @@ class DistColMaker : public ColMaker { this->snode[nid].best = vec[i]; } } - + private: utils::BitMap bitmap; std::vector boolmap; @@ -162,8 +168,8 @@ class DistColMaker : public ColMaker { // training parameter TrainParam param; // pointer to the builder - Builder builder; + Builder builder; }; } // namespace tree } // namespace xgboost -#endif +#endif // XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ diff --git a/src/tree/updater_histmaker-inl.hpp b/src/tree/updater_histmaker-inl.hpp index f739f23f3..d86204e4b 100644 --- a/src/tree/updater_histmaker-inl.hpp +++ b/src/tree/updater_histmaker-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_ -#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_histmaker-inl.hpp * \brief use histogram counting to construct a tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_ +#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_ + #include #include #include "../sync/sync.h" @@ -38,7 +40,7 @@ class HistMaker: public BaseMaker { struct HistUnit { /*! \brief cutting point of histogram, contains maximum point */ const bst_float *cut; - /*! \brief content of statistics data */ + /*! \brief content of statistics data */ TStats *data; /*! \brief size of histogram */ unsigned size; @@ -48,13 +50,13 @@ class HistMaker: public BaseMaker { HistUnit(const bst_float *cut, TStats *data, unsigned size) : cut(cut), data(data), size(size) {} /*! \brief add a histogram to data */ - inline void Add(bst_float fv, + inline void Add(bst_float fv, const std::vector &gpair, const BoosterInfo &info, const bst_uint ridx) { unsigned i = std::upper_bound(cut, cut + size, fv) - cut; utils::Assert(size != 0, "try insert into size=0"); - utils::Assert(i < size, + utils::Assert(i < size, "maximum value must be in cut, fv = %g, cutmax=%g", fv, cut[size-1]); data[i].Add(gpair, info, ridx); } @@ -74,7 +76,7 @@ class HistMaker: public BaseMaker { rptr[fid+1] - rptr[fid]); } }; - // thread workspace + // thread workspace struct ThreadWSpace { /*! \brief actual unit pointer */ std::vector rptr; @@ -92,7 +94,7 @@ class HistMaker: public BaseMaker { } hset[tid].rptr = BeginPtr(rptr); hset[tid].cut = BeginPtr(cut); - hset[tid].data.resize(cut.size(), TStats(param)); + hset[tid].data.resize(cut.size(), TStats(param)); } } // aggregate all statistics to hset[0] @@ -147,7 +149,7 @@ class HistMaker: public BaseMaker { } // this function does two jobs // (1) reset the position in array position, to be the latest leaf id - // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly + // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly virtual void ResetPosAndPropose(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, @@ -171,8 +173,9 @@ class HistMaker: public BaseMaker { const BoosterInfo &info, const std::vector &fset, const RegTree &tree) = 0; + private: - inline void EnumerateSplit(const HistUnit &hist, + inline void EnumerateSplit(const HistUnit &hist, const TStats &node_sum, bst_uint fid, SplitEntry *best, @@ -187,7 +190,7 @@ class HistMaker: public BaseMaker { c.SetSubstract(node_sum, s); if (c.sum_hess >= param.min_child_weight) { double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; - if (best->Update((float)loss_chg, fid, hist.cut[i], false)) { + if (best->Update(static_cast(loss_chg), fid, hist.cut[i], false)) { *left_sum = s; } } @@ -200,7 +203,7 @@ class HistMaker: public BaseMaker { c.SetSubstract(node_sum, s); if (c.sum_hess >= param.min_child_weight) { double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; - if (best->Update((float)loss_chg, fid, hist.cut[i-1], true)) { + if (best->Update(static_cast(loss_chg), fid, hist.cut[i-1], true)) { *left_sum = c; } } @@ -216,22 +219,22 @@ class HistMaker: public BaseMaker { const size_t num_feature = fset.size(); // get the best split condition for each node std::vector sol(qexpand.size()); - std::vector left_sum(qexpand.size()); + std::vector left_sum(qexpand.size()); bst_omp_uint nexpand = static_cast(qexpand.size()); #pragma omp parallel for schedule(dynamic, 1) - for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { + for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { const int nid = qexpand[wid]; utils::Assert(node2workindex[nid] == static_cast(wid), "node2workindex inconsistent"); SplitEntry &best = sol[wid]; TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; - for (size_t i = 0; i < fset.size(); ++ i) { + for (size_t i = 0; i < fset.size(); ++i) { EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)], node_sum, fset[i], &best, &left_sum[wid]); } } // get the best result, we can synchronize the solution - for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { + for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { const int nid = qexpand[wid]; const SplitEntry &best = sol[wid]; const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; @@ -244,7 +247,7 @@ class HistMaker: public BaseMaker { (*p_tree)[nid].set_split(best.split_index(), best.split_value, best.default_left()); // mark right child as 0, to indicate fresh leaf - (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0); + (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0); (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0); // right side sum TStats right_sum; @@ -256,11 +259,11 @@ class HistMaker: public BaseMaker { } } } - + inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) { p_tree->stat(nid).base_weight = static_cast(node_sum.CalcWeight(param)); p_tree->stat(nid).sum_hess = static_cast(node_sum.sum_hess); - node_sum.SetLeafVec(param, p_tree->leafvec(nid)); + node_sum.SetLeafVec(param, p_tree->leafvec(nid)); } }; @@ -270,7 +273,7 @@ class CQHistMaker: public HistMaker { struct HistEntry { typename HistMaker::HistUnit hist; unsigned istart; - /*! + /*! * \brief add a histogram to data, * do linear scan, start from istart */ @@ -282,7 +285,7 @@ class CQHistMaker: public HistMaker { utils::Assert(istart != hist.size, "the bound variable must be max"); hist.data[istart].Add(gpair, info, ridx); } - /*! + /*! * \brief add a histogram to data, * do linear scan, start from istart */ @@ -302,7 +305,7 @@ class CQHistMaker: public HistMaker { feat_helper.InitByCol(p_fmat, tree); feat_helper.SampleCol(this->param.colsample_bytree, p_fset); } - // code to create histogram + // code to create histogram virtual void CreateHist(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, @@ -313,7 +316,7 @@ class CQHistMaker: public HistMaker { std::fill(feat2workindex.begin(), feat2workindex.end(), -1); for (size_t i = 0; i < fset.size(); ++i) { feat2workindex[fset[i]] = static_cast(i); - } + } // start to work this->wspace.Init(this->param, 1); // if it is C++11, use lazy evaluation for Allreduce, @@ -350,11 +353,11 @@ class CQHistMaker: public HistMaker { // sync the histogram // if it is C++11, use lazy evaluation for Allreduce #if __cplusplus >= 201103L - this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), + this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size(), lazy_get_hist); #else - this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size()); -#endif + this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size()); +#endif } virtual void ResetPositionAfterSplit(IFMatrix *p_fmat, const RegTree &tree) { @@ -374,11 +377,11 @@ class CQHistMaker: public HistMaker { feat2workindex[fset[i]] = static_cast(freal_set.size()); freal_set.push_back(fset[i]); } else { - feat2workindex[fset[i]] = -2; + feat2workindex[fset[i]] = -2; } } this->GetNodeStats(gpair, *p_fmat, tree, info, - &thread_stats, &node_stats); + &thread_stats, &node_stats); sketchs.resize(this->qexpand.size() * freal_set.size()); for (size_t i = 0; i < sketchs.size(); ++i) { sketchs[i].Init(info.num_row, this->param.sketch_eps); @@ -394,7 +397,8 @@ class CQHistMaker: public HistMaker { #if __cplusplus >= 201103L auto lazy_get_summary = [&]() #endif - {// get smmary + { + // get smmary thread_sketch.resize(this->get_nthread()); // number of rows in const size_t nrows = p_fmat->buffered_rowset().size(); @@ -457,9 +461,9 @@ class CQHistMaker: public HistMaker { this->wspace.rptr.push_back(static_cast(this->wspace.cut.size())); } else { utils::Assert(offset == -2, "BUG in mark"); - bst_float cpt = feat_helper.MaxValue(fset[i]); + bst_float cpt = feat_helper.MaxValue(fset[i]); this->wspace.cut.push_back(cpt + fabs(cpt) + rt_eps); - this->wspace.rptr.push_back(static_cast(this->wspace.cut.size())); + this->wspace.rptr.push_back(static_cast(this->wspace.cut.size())); } } // reserve last value for global statistics @@ -470,7 +474,7 @@ class CQHistMaker: public HistMaker { (fset.size() + 1) * this->qexpand.size() + 1, "cut space inconsistent"); } - + private: inline void UpdateHistCol(const std::vector &gpair, const ColBatch::Inst &c, @@ -554,9 +558,9 @@ class CQHistMaker: public HistMaker { } } else { for (size_t i = 0; i < this->qexpand.size(); ++i) { - const unsigned nid = this->qexpand[i]; + const unsigned nid = this->qexpand[i]; sbuilder[nid].sum_total = static_cast(nstats[nid].sum_hess); - } + } } // if only one value, no need to do second pass if (c[0].fvalue == c[c.length-1].fvalue) { @@ -589,7 +593,7 @@ class CQHistMaker: public HistMaker { if (nid >= 0) { sbuilder[nid].Push(c[j + i].fvalue, buf_hess[i], max_size); } - } + } } for (bst_uint j = align_length; j < c.length; ++j) { const bst_uint ridx = c[j].index; @@ -617,7 +621,7 @@ class CQHistMaker: public HistMaker { // temp space to map feature id to working index std::vector feat2workindex; // set of index from fset that are real - std::vector freal_set; + std::vector freal_set; // thread temp data std::vector< std::vector > thread_sketch; // used to hold statistics @@ -631,18 +635,18 @@ class CQHistMaker: public HistMaker { // reducer for summary rabit::SerializeReducer sreducer; // per node, per feature sketch - std::vector< utils::WXQuantileSketch > sketchs; + std::vector< utils::WXQuantileSketch > sketchs; }; template -class QuantileHistMaker: public HistMaker { +class QuantileHistMaker: public HistMaker { protected: typedef utils::WXQuantileSketch WXQSketch; virtual void ResetPosAndPropose(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, const std::vector &fset, - const RegTree &tree) { + const RegTree &tree) { // initialize the data structure int nthread = BaseMaker::get_nthread(); sketchs.resize(this->qexpand.size() * tree.param.num_feature); @@ -658,7 +662,7 @@ class QuantileHistMaker: public HistMaker { utils::ParallelGroupBuilder builder(&col_ptr, &col_data, &thread_col_ptr); builder.InitBudget(tree.param.num_feature, nthread); - const bst_omp_uint nbatch = static_cast(batch.size); + const bst_omp_uint nbatch = static_cast(batch.size); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nbatch; ++i) { RowBatch::Inst inst = batch[i]; @@ -667,11 +671,11 @@ class QuantileHistMaker: public HistMaker { if (nid >= 0) { if (!tree[nid].is_leaf()) { this->position[ridx] = nid = HistMaker::NextLevel(inst, tree, nid); - } + } if (this->node2workindex[nid] < 0) { this->position[ridx] = ~nid; - } else{ - for (bst_uint j = 0; j < inst.length; ++j) { + } else { + for (bst_uint j = 0; j < inst.length; ++j) { builder.AddBudget(inst[j].index, omp_get_thread_num()); } } @@ -712,8 +716,8 @@ class QuantileHistMaker: public HistMaker { summary_array[i].Reserve(max_size); summary_array[i].SetPrune(out, max_size); } - - size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); + + size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); sreducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size()); // now we get the final result of sketch, setup the cut this->wspace.cut.clear(); diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp index e7e5f9f0b..dc99e94e4 100644 --- a/src/tree/updater_prune-inl.hpp +++ b/src/tree/updater_prune-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_ -#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_prune-inl.hpp - * \brief prune a tree given the statistics + * \brief prune a tree given the statistics * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_ +#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_ + #include #include "./param.h" #include "./updater.h" @@ -37,9 +39,10 @@ class TreePruner: public IUpdater { param.learning_rate = lr; syncher.Update(gpair, p_fmat, info, trees); } + private: // try to prune off current leaf - inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { + inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*) if (tree[nid].is_root()) return npruned; int pid = tree[nid].parent(); RegTree::NodeStat &s = tree.stat(pid); @@ -51,10 +54,10 @@ class TreePruner: public IUpdater { return this->TryPruneLeaf(tree, pid, depth - 1, npruned+2); } else { return npruned; - } + } } /*! \brief do prunning of a tree */ - inline void DoPrune(RegTree &tree) { + inline void DoPrune(RegTree &tree) { // NOLINT(*) int npruned = 0; // initialize auxiliary statistics for (int nid = 0; nid < tree.param.num_nodes; ++nid) { diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp index 8613c8ea6..b6c5ee89e 100644 --- a/src/tree/updater_refresh-inl.hpp +++ b/src/tree/updater_refresh-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_ -#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_refresh-inl.hpp * \brief refresh the statistics and leaf value on the tree on the dataset * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_ +#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_ + #include #include #include "../sync/sync.h" @@ -27,7 +29,7 @@ class TreeRefresher: public IUpdater { virtual void Update(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, - const std::vector &trees) { + const std::vector &trees) { if (trees.size() == 0) return; // number of threads // thread temporal space @@ -100,7 +102,7 @@ class TreeRefresher: public IUpdater { float lr = param.learning_rate; param.learning_rate = lr / trees.size(); int offset = 0; - for (size_t i = 0; i < trees.size(); ++i) { + for (size_t i = 0; i < trees.size(); ++i) { for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) { this->Refresh(BeginPtr(stemp[0]) + offset, rid, trees[i]); } @@ -147,7 +149,7 @@ class TreeRefresher: public IUpdater { // training parameter TrainParam param; // reducer - rabit::Reducer reducer; + rabit::Reducer reducer; }; } // namespace tree diff --git a/src/tree/updater_skmaker-inl.hpp b/src/tree/updater_skmaker-inl.hpp index 6bc2fc39a..ade22011b 100644 --- a/src/tree/updater_skmaker-inl.hpp +++ b/src/tree/updater_skmaker-inl.hpp @@ -1,11 +1,13 @@ -#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ -#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_skmaker-inl.hpp * \brief use approximation sketch to construct a tree, a refresh is needed to make the statistics exactly correct * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ +#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ + #include #include #include "../sync/sync.h" @@ -30,7 +32,7 @@ class SketchMaker: public BaseMaker { } param.learning_rate = lr; } - + protected: inline void Update(const std::vector &gpair, IFMatrix *p_fmat, @@ -79,9 +81,9 @@ class SketchMaker: public BaseMaker { double pos_grad; /*! \brief sum of all negative gradient */ double neg_grad; - /*! \brief sum of hessian statistics */ + /*! \brief sum of hessian statistics */ double sum_hess; - explicit SKStats(void) {} + SKStats(void) {} // constructor explicit SKStats(const TrainParam ¶m) { this->Clear(); @@ -123,7 +125,7 @@ class SketchMaker: public BaseMaker { sum_hess += b.sum_hess; } /*! \brief same as add, reduce is used in All Reduce */ - inline static void Reduce(SKStats &a, const SKStats &b) { + inline static void Reduce(SKStats &a, const SKStats &b) { // NOLINT(*) a.Add(b); } /*! \brief set leaf vector value based on statistics */ @@ -139,7 +141,7 @@ class SketchMaker: public BaseMaker { sketchs[i].Init(info.num_row, this->param.sketch_eps); } thread_sketch.resize(this->get_nthread()); - // number of rows in + // number of rows in const size_t nrows = p_fmat->buffered_rowset().size(); // start accumulating statistics utils::IIterator *iter = p_fmat->ColIterator(); @@ -156,7 +158,7 @@ class SketchMaker: public BaseMaker { batch[i].length == nrows, &thread_sketch[omp_get_thread_num()]); } - } + } // setup maximum size unsigned max_size = param.max_sketch_size(); // synchronize sketch @@ -167,8 +169,8 @@ class SketchMaker: public BaseMaker { summary_array[i].Reserve(max_size); summary_array[i].SetPrune(out, max_size); } - size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); - sketch_reducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size()); + size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); + sketch_reducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size()); } // update sketch information in column fid inline void UpdateSketchCol(const std::vector &gpair, @@ -209,7 +211,7 @@ class SketchMaker: public BaseMaker { const unsigned nid = this->qexpand[i]; sbuilder[3 * nid + 0].sum_total = static_cast(nstats[nid].pos_grad); sbuilder[3 * nid + 1].sum_total = static_cast(nstats[nid].neg_grad); - sbuilder[3 * nid + 2].sum_total = static_cast(nstats[nid].sum_hess); + sbuilder[3 * nid + 2].sum_total = static_cast(nstats[nid].sum_hess); } } // if only one value, no need to do second pass @@ -217,7 +219,9 @@ class SketchMaker: public BaseMaker { for (size_t i = 0; i < this->qexpand.size(); ++i) { const int nid = this->qexpand[i]; for (int k = 0; k < 3; ++k) { - sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, static_cast(sbuilder[3 * nid + k].sum_total)); + sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, + static_cast( + sbuilder[3 * nid + k].sum_total)); } } return; @@ -250,7 +254,7 @@ class SketchMaker: public BaseMaker { sbuilder[3 * nid + k].Finalize(max_size); } } - } + } inline void SyncNodeStats(void) { utils::Assert(qexpand.size() != 0, "qexpand must not be empty"); std::vector tmp(qexpand.size()); @@ -272,12 +276,12 @@ class SketchMaker: public BaseMaker { std::vector sol(qexpand.size()); bst_omp_uint nexpand = static_cast(qexpand.size()); #pragma omp parallel for schedule(dynamic, 1) - for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { + for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { const int nid = qexpand[wid]; utils::Assert(node2workindex[nid] == static_cast(wid), "node2workindex inconsistent"); SplitEntry &best = sol[wid]; - for (bst_uint fid = 0; fid < num_feature; ++ fid) { + for (bst_uint fid = 0; fid < num_feature; ++fid) { unsigned base = (wid * p_tree->param.num_feature + fid) * 3; EnumerateSplit(summary_array[base + 0], summary_array[base + 1], @@ -286,7 +290,7 @@ class SketchMaker: public BaseMaker { } } // get the best result, we can synchronize the solution - for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { + for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { const int nid = qexpand[wid]; const SplitEntry &best = sol[wid]; // set up the values @@ -337,7 +341,7 @@ class SketchMaker: public BaseMaker { feat_sum.neg_grad = neg_grad.data[neg_grad.size - 1].rmax; feat_sum.sum_hess = sum_hess.data[sum_hess.size - 1].rmax; size_t ipos = 0, ineg = 0, ihess = 0; - for (size_t i = 1; i < fsplits.size(); ++i) { + for (size_t i = 1; i < fsplits.size(); ++i) { WXQSketch::Entry pos = pos_grad.Query(fsplits[i], ipos); WXQSketch::Entry neg = neg_grad.Query(fsplits[i], ineg); WXQSketch::Entry hess = sum_hess.Query(fsplits[i], ihess); @@ -345,11 +349,11 @@ class SketchMaker: public BaseMaker { s.pos_grad = 0.5f * (pos.rmin + pos.rmax - pos.wmin); s.neg_grad = 0.5f * (neg.rmin + neg.rmax - neg.wmin); s.sum_hess = 0.5f * (hess.rmin + hess.rmax - hess.wmin); - c.SetSubstract(node_sum, s); + c.SetSubstract(node_sum, s); // forward if (s.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; best->Update(static_cast(loss_chg), fid, fsplits[i], false); } // backward @@ -357,22 +361,23 @@ class SketchMaker: public BaseMaker { s.SetSubstract(node_sum, c); if (s.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; best->Update(static_cast(loss_chg), fid, fsplits[i], true); - } + } } - {// all including + { + // all including SKStats s = feat_sum, c; c.SetSubstract(node_sum, s); if (s.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { bst_float cpt = fsplits.back(); - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; best->Update(static_cast(loss_chg), fid, cpt + fabsf(cpt) + 1.0f, false); } } } - + // thread temp data // used to hold temporal sketch std::vector< std::vector > thread_sketch; @@ -389,6 +394,6 @@ class SketchMaker: public BaseMaker { // per node, per feature sketch std::vector< utils::WXQuantileSketch > sketchs; }; -} // tree -} // xgboost -#endif +} // namespace tree +} // namespace xgboost +#endif // XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ diff --git a/src/tree/updater_sync-inl.hpp b/src/tree/updater_sync-inl.hpp index 2aa534aa8..e76d1f76d 100644 --- a/src/tree/updater_sync-inl.hpp +++ b/src/tree/updater_sync-inl.hpp @@ -1,18 +1,21 @@ -#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_ -#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_sync-inl.hpp * \brief synchronize the tree in all distributed nodes * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_ +#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_ + #include +#include #include #include "../sync/sync.h" #include "./updater.h" namespace xgboost { namespace tree { -/*! +/*! * \brief syncher that synchronize the tree in all distributed nodes * can implement various strategies, so far it is always set to node 0's tree */ @@ -28,7 +31,7 @@ class TreeSyncher: public IUpdater { const std::vector &trees) { this->SyncTrees(trees); } - + private: // synchronize the trees in different nodes, take tree from rank 0 inline void SyncTrees(const std::vector &trees) { @@ -43,7 +46,7 @@ class TreeSyncher: public IUpdater { } fs.Seek(0); rabit::Broadcast(&s_model, 0); - for (size_t i = 0; i < trees.size(); ++i) { + for (size_t i = 0; i < trees.size(); ++i) { trees[i]->LoadModel(fs); } } diff --git a/src/utils/config.h b/src/utils/config.h index 19f4980cf..43d7bc8bd 100644 --- a/src/utils/config.h +++ b/src/utils/config.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_UTILS_CONFIG_H_ -#define XGBOOST_UTILS_CONFIG_H_ /*! + * Copyright 2014 by Contributors * \file config.h * \brief helper class to load in configures from file * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_CONFIG_H_ +#define XGBOOST_UTILS_CONFIG_H_ + #include #include #include @@ -14,26 +16,26 @@ namespace xgboost { namespace utils { -/*! +/*! * \brief base implementation of config reader */ class ConfigReaderBase { public: - /*! + /*! * \brief get current name, called after Next returns true - * \return current parameter name + * \return current parameter name */ inline const char *name(void) const { return s_name.c_str(); } - /*! + /*! * \brief get current value, called after Next returns true - * \return current parameter value + * \return current parameter value */ inline const char *val(void) const { return s_val.c_str(); } - /*! + /*! * \brief move iterator to next position * \return true if there is value in next position */ @@ -55,7 +57,7 @@ class ConfigReaderBase { protected: /*! * \brief to be implemented by subclass, - * get next token, return EOF if end of file + * get next token, return EOF if end of file */ virtual char GetChar(void) = 0; /*! \brief to be implemented by child, check if end of stream */ @@ -144,9 +146,9 @@ class ConfigReaderBase { */ class ConfigStreamReader: public ConfigReaderBase { public: - /*! - * \brief constructor - * \param istream input stream + /*! + * \brief constructor + * \param istream input stream */ explicit ConfigStreamReader(std::istream &fin) : fin(fin) {} @@ -163,13 +165,13 @@ class ConfigStreamReader: public ConfigReaderBase { std::istream &fin; }; -/*! +/*! * \brief an iterator that iterates over a configure file and gets the configures */ class ConfigIterator: public ConfigStreamReader { public: - /*! - * \brief constructor + /*! + * \brief constructor * \param fname name of configure file */ explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) { diff --git a/src/utils/group_data.h b/src/utils/group_data.h index 6e12a39ff..29d391aa8 100644 --- a/src/utils/group_data.h +++ b/src/utils/group_data.h @@ -1,6 +1,5 @@ -#ifndef XGBOOST_UTILS_GROUP_DATA_H_ -#define XGBOOST_UTILS_GROUP_DATA_H_ /*! + * Copyright 2014 by Contributors * \file group_data.h * \brief this file defines utils to group data by integer keys * Input: given input sequence (key,value), (k1,v1), (k2,v2) @@ -12,6 +11,11 @@ * The major algorithm is a two pass linear scan algorithm that requires two pass scan over the data * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_GROUP_DATA_H_ +#define XGBOOST_UTILS_GROUP_DATA_H_ + +#include + namespace xgboost { namespace utils { /*! @@ -32,10 +36,10 @@ struct ParallelGroupBuilder { std::vector< std::vector > *p_thread_rptr) : rptr(*p_rptr), data(*p_data), thread_rptr(*p_thread_rptr) { } - + public: /*! - * \brief step 1: initialize the helper, with hint of number keys + * \brief step 1: initialize the helper, with hint of number keys * and thread used in the construction * \param nkeys number of keys in the matrix, can be smaller than expected * \param nthread number of thread that will be used in construction @@ -56,7 +60,7 @@ struct ParallelGroupBuilder { inline void AddBudget(size_t key, int threadid, SizeType nelem = 1) { std::vector &trptr = thread_rptr[threadid]; if (trptr.size() < key + 1) { - trptr.resize(key + 1, 0); + trptr.resize(key + 1, 0); } trptr[key] += nelem; } @@ -84,13 +88,13 @@ struct ParallelGroupBuilder { data.resize(start); } /*! - * \brief step 4: add data to the allocated space, + * \brief step 4: add data to the allocated space, * the calls to this function should be exactly match previous call to AddBudget * - * \param key the key of + * \param key the key of * \param threadid the id of thread that calls this function */ - inline void Push(size_t key, ValueType value, int threadid) { + inline void Push(size_t key, ValueType value, int threadid) { SizeType &rp = thread_rptr[threadid][key]; data[rp++] = value; } diff --git a/src/utils/io.h b/src/utils/io.h index d96d16e2a..5b366e51c 100644 --- a/src/utils/io.h +++ b/src/utils/io.h @@ -1,16 +1,19 @@ -#ifndef XGBOOST_UTILS_IO_H -#define XGBOOST_UTILS_IO_H +/*! + * Copyright 2014 by Contributors + * \file io.h + * \brief general stream interface for serialization, I/O + * \author Tianqi Chen + */ + +#ifndef XGBOOST_UTILS_IO_H_ +#define XGBOOST_UTILS_IO_H_ #include #include #include #include #include "./utils.h" #include "../sync/sync.h" -/*! - * \file io.h - * \brief general stream interface for serialization, I/O - * \author Tianqi Chen - */ + namespace xgboost { namespace utils { // reuse the definitions of streams @@ -23,7 +26,7 @@ typedef rabit::utils::MemoryBufferStream MemoryBufferStream; class FileStream : public ISeekStream { public: explicit FileStream(std::FILE *fp) : fp(fp) {} - explicit FileStream(void) { + FileStream(void) { this->fp = NULL; } virtual size_t Read(void *ptr, size_t size) { @@ -33,7 +36,7 @@ class FileStream : public ISeekStream { std::fwrite(ptr, size, 1, fp); } virtual void Seek(size_t pos) { - std::fseek(fp, static_cast(pos), SEEK_SET); + std::fseek(fp, static_cast(pos), SEEK_SET); // NOLINT(*) } virtual size_t Tell(void) { return std::ftell(fp); @@ -42,7 +45,7 @@ class FileStream : public ISeekStream { return std::feof(fp) != 0; } inline void Close(void) { - if (fp != NULL){ + if (fp != NULL) { std::fclose(fp); fp = NULL; } } @@ -52,6 +55,5 @@ class FileStream : public ISeekStream { }; } // namespace utils } // namespace xgboost - #include "./base64-inl.h" -#endif +#endif // XGBOOST_UTILS_IO_H_ diff --git a/src/utils/thread.h b/src/utils/thread.h index ef6335a74..78b488cff 100644 --- a/src/utils/thread.h +++ b/src/utils/thread.h @@ -1,16 +1,17 @@ -#ifndef XGBOOST_UTILS_THREAD_H -#define XGBOOST_UTILS_THREAD_H /*! + * Copyright by Contributors * \file thread.h - * \brief this header include the minimum necessary resource for multi-threading + * \brief this header include the minimum necessary resource + * for multi-threading that can be compiled in windows, linux, mac * \author Tianqi Chen - * Acknowledgement: this file is adapted from SVDFeature project, by same author. - * The MAC support part of this code is provided by Artemy Kolchinsky */ +#ifndef XGBOOST_UTILS_THREAD_H_ // NOLINT(*) +#define XGBOOST_UTILS_THREAD_H_ // NOLINT(*) + #ifdef _MSC_VER -#include "utils.h" #include #include +#include "../xgboost/utils.h" namespace xgboost { namespace utils { /*! \brief simple semaphore used for synchronization */ @@ -18,29 +19,80 @@ class Semaphore { public : inline void Init(int init_val) { sem = CreateSemaphore(NULL, init_val, 10, NULL); - utils::Assert(sem != NULL, "create Semaphore error"); + utils::Check(sem != NULL, "create Semaphore error"); } inline void Destroy(void) { CloseHandle(sem); } inline void Wait(void) { - utils::Assert(WaitForSingleObject(sem, INFINITE) == WAIT_OBJECT_0, "WaitForSingleObject error"); + utils::Check(WaitForSingleObject(sem, INFINITE) == WAIT_OBJECT_0, "WaitForSingleObject error"); } inline void Post(void) { - utils::Assert(ReleaseSemaphore(sem, 1, NULL) != 0, "ReleaseSemaphore error"); + utils::Check(ReleaseSemaphore(sem, 1, NULL) != 0, "ReleaseSemaphore error"); } + private: HANDLE sem; }; + +/*! \brief mutex under windows */ +class Mutex { + public: + inline void Init(void) { + utils::Check(InitializeCriticalSectionAndSpinCount(&mutex, 0x00000400) != 0, + "Mutex::Init fail"); + } + inline void Lock(void) { + EnterCriticalSection(&mutex); + } + inline void Unlock(void) { + LeaveCriticalSection(&mutex); + } + inline void Destroy(void) { + DeleteCriticalSection(&mutex); + } + + private: + friend class ConditionVariable; + CRITICAL_SECTION mutex; +}; + +// conditional variable that uses pthread +class ConditionVariable { + public: + // initialize conditional variable + inline void Init(void) { + InitializeConditionVariable(&cond); + } + // destroy the thread + inline void Destroy(void) { + // DeleteConditionVariable(&cond); + } + // wait on the conditional variable + inline void Wait(Mutex *mutex) { + utils::Check(SleepConditionVariableCS(&cond, &(mutex->mutex), INFINITE) != 0, + "ConditionVariable:Wait fail"); + } + inline void Broadcast(void) { + WakeAllConditionVariable(&cond); + } + inline void Signal(void) { + WakeConditionVariable(&cond); + } + + private: + CONDITION_VARIABLE cond; +}; + /*! \brief simple thread that wraps windows thread */ class Thread { private: HANDLE thread_handle; - unsigned thread_id; + unsigned thread_id; public: - inline void Start(unsigned int __stdcall entry(void*), void *param) { + inline void Start(unsigned int __stdcall entry(void*p), void *param) { thread_handle = (HANDLE)_beginthreadex(NULL, 0, entry, param, 0, &thread_id); - } + } inline int Join(void) { WaitForSingleObject(thread_handle, INFINITE); return 0; @@ -54,39 +106,41 @@ inline void ThreadExit(void *status) { } // namespace utils } // namespace xgboost #else -// thread interface using g++ -extern "C" { +// thread interface using g++ #include #include -} +#include namespace xgboost { namespace utils { /*!\brief semaphore class */ class Semaphore { #ifdef __APPLE__ + private: sem_t* semPtr; - char sema_name[20]; + char sema_name[20]; + private: inline void GenRandomString(char *s, const int len) { - static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" ; + static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; for (int i = 0; i < len; ++i) { s[i] = alphanum[rand() % (sizeof(alphanum) - 1)]; } s[len] = 0; } + public: inline void Init(int init_val) { - sema_name[0]='/'; - sema_name[1]='s'; - sema_name[2]='e'; - sema_name[3]='/'; + sema_name[0] = '/'; + sema_name[1] = 's'; + sema_name[2] = 'e'; + sema_name[3] = '/'; GenRandomString(&sema_name[4], 16); - if((semPtr = sem_open(sema_name, O_CREAT, 0644, init_val)) == SEM_FAILED) { + if ((semPtr = sem_open(sema_name, O_CREAT, 0644, init_val)) == SEM_FAILED) { perror("sem_open"); exit(1); } - utils::Assert(semPtr != NULL, "create Semaphore error"); + utils::Check(semPtr != NULL, "create Semaphore error"); } inline void Destroy(void) { if (sem_close(semPtr) == -1) { @@ -103,53 +157,93 @@ class Semaphore { } inline void Post(void) { sem_post(semPtr); - } + } #else + private: sem_t sem; + public: inline void Init(int init_val) { - sem_init(&sem, 0, init_val); + if (sem_init(&sem, 0, init_val) != 0) { + utils::Error("Semaphore.Init:%s", strerror(errno)); + } } inline void Destroy(void) { - sem_destroy(&sem); + if (sem_destroy(&sem) != 0) { + utils::Error("Semaphore.Destroy:%s", strerror(errno)); + } } inline void Wait(void) { - sem_wait(&sem); + if (sem_wait(&sem) != 0) { + utils::Error("Semaphore.Wait:%s", strerror(errno)); + } } inline void Post(void) { - sem_post(&sem); + if (sem_post(&sem) != 0) { + utils::Error("Semaphore.Post:%s", strerror(errno)); + } } - #endif + #endif }; -// helper for c thread -// used to strictly call c++ function from pthread -struct ThreadContext { - void *(*entry)(void*); - void *param; -}; -extern "C" { - inline void *RunThreadContext(void *ctx_) { - ThreadContext *ctx = reinterpret_cast(ctx_); - void *ret = (*ctx->entry)(ctx->param); - delete ctx; - return ret; +// mutex that works with pthread +class Mutex { + public: + inline void Init(void) { + pthread_mutex_init(&mutex, NULL); } -} + inline void Lock(void) { + pthread_mutex_lock(&mutex); + } + inline void Unlock(void) { + pthread_mutex_unlock(&mutex); + } + inline void Destroy(void) { + pthread_mutex_destroy(&mutex); + } + + private: + friend class ConditionVariable; + pthread_mutex_t mutex; +}; + +// conditional variable that uses pthread +class ConditionVariable { + public: + // initialize conditional variable + inline void Init(void) { + pthread_cond_init(&cond, NULL); + } + // destroy the thread + inline void Destroy(void) { + pthread_cond_destroy(&cond); + } + // wait on the conditional variable + inline void Wait(Mutex *mutex) { + pthread_cond_wait(&cond, &(mutex->mutex)); + } + inline void Broadcast(void) { + pthread_cond_broadcast(&cond); + } + inline void Signal(void) { + pthread_cond_signal(&cond); + } + + private: + pthread_cond_t cond; +}; + /*!\brief simple thread class */ class Thread { private: - pthread_t thread; - + pthread_t thread; public : - inline void Start(void *entry(void*), void *param) { + inline void Start(void * entry(void*), void *param) { // NOLINT(*) pthread_attr_t attr; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - ThreadContext *ctx = new ThreadContext(); - ctx->entry = entry; ctx->param = param; - pthread_create(&thread, &attr, RunThreadContext, ctx); + pthread_create(&thread, &attr, entry, param); } inline int Join(void) { void *status; @@ -159,9 +253,8 @@ class Thread { inline void ThreadExit(void *status) { pthread_exit(status); } - } // namespace utils } // namespace xgboost #define XGBOOST_THREAD_PREFIX void * -#endif -#endif +#endif // Linux +#endif // XGBOOST_UTILS_THREAD_H_ NOLINT(*) From 1581de08da0e1fb5919ab85168389eb8e72633cf Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 18:44:01 -0700 Subject: [PATCH 26/59] fix all utils --- src/utils/base64-inl.h | 37 ++++++++------ src/utils/bitmap.h | 22 ++++---- src/utils/fmap.h | 8 +-- src/utils/group_data.h | 3 +- src/utils/iterator.h | 12 +++-- src/utils/math.h | 9 ++-- src/utils/omp.h | 12 +++-- src/utils/quantile.h | 105 ++++++++++++++++++++------------------ src/utils/random.h | 27 +++++----- src/utils/thread_buffer.h | 44 ++++++++-------- src/utils/utils.h | 24 +++++---- 11 files changed, 164 insertions(+), 139 deletions(-) diff --git a/src/utils/base64-inl.h b/src/utils/base64-inl.h index 9fd5fc49f..49cd65254 100644 --- a/src/utils/base64-inl.h +++ b/src/utils/base64-inl.h @@ -1,13 +1,16 @@ -#ifndef XGBOOST_UTILS_BASE64_INL_H_ -#define XGBOOST_UTILS_BASE64_INL_H_ /*! + * Copyright 2014 by Contributors * \file base64.h * \brief data stream support to input and output from/to base64 stream * base64 is easier to store and pass as text format in mapreduce * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_BASE64_INL_H_ +#define XGBOOST_UTILS_BASE64_INL_H_ + #include #include +#include #include "./io.h" namespace xgboost { @@ -15,7 +18,7 @@ namespace utils { /*! \brief buffer reader of the stream that allows you to get */ class StreamBufferReader { public: - StreamBufferReader(size_t buffer_size) + explicit StreamBufferReader(size_t buffer_size) :stream_(NULL), read_len_(1), read_ptr_(1) { buffer_.resize(buffer_size); @@ -45,7 +48,7 @@ class StreamBufferReader { inline bool AtEnd(void) const { return read_len_ == 0; } - + private: /*! \brief the underlying stream */ IStream *stream_; @@ -75,7 +78,7 @@ const char DecodeTable[] = { }; static const char EncodeTable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -} // namespace base64 +} // namespace base64 /*! \brief the stream that reads from base64, note we take from file pointers */ class Base64InStream: public IStream { public: @@ -83,8 +86,8 @@ class Base64InStream: public IStream { reader_.set_stream(fs); num_prev = 0; tmp_ch = 0; } - /*! - * \brief initialize the stream position to beginning of next base64 stream + /*! + * \brief initialize the stream position to beginning of next base64 stream * call this function before actually start read */ inline void InitPosition(void) { @@ -132,19 +135,19 @@ class Base64InStream: public IStream { { // second byte utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), - "invalid base64 format"); + "invalid base64 format"); nvalue |= DecodeTable[tmp_ch] << 12; *cptr++ = (nvalue >> 16) & 0xFF; --tlen; } { // third byte utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), - "invalid base64 format"); + "invalid base64 format"); // handle termination if (tmp_ch == '=') { utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format"); utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)), - "invalid base64 format"); + "invalid base64 format"); break; } nvalue |= DecodeTable[tmp_ch] << 6; @@ -157,10 +160,10 @@ class Base64InStream: public IStream { { // fourth byte utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), - "invalid base64 format"); + "invalid base64 format"); if (tmp_ch == '=') { utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)), - "invalid base64 format"); + "invalid base64 format"); break; } nvalue |= DecodeTable[tmp_ch]; @@ -240,13 +243,13 @@ class Base64OutStream: public IStream { if (endch != EOF) PutChar(endch); this->Flush(); } - - private: + + private: IStream *fp; int buf_top; unsigned char buf[4]; std::string out_buf; - const static size_t kBufferSize = 256; + static const size_t kBufferSize = 256; inline void PutChar(char ch) { out_buf += ch; @@ -260,5 +263,5 @@ class Base64OutStream: public IStream { } }; } // namespace utils -} // namespace rabit -#endif // RABIT_LEARN_UTILS_BASE64_INL_H_ +} // namespace xgboost +#endif // XGBOOST_UTILS_BASE64_INL_H_ diff --git a/src/utils/bitmap.h b/src/utils/bitmap.h index ba12caf41..eecccbda5 100644 --- a/src/utils/bitmap.h +++ b/src/utils/bitmap.h @@ -1,11 +1,13 @@ -#ifndef XGBOOST_UTILS_BITMAP_H_ -#define XGBOOST_UTILS_BITMAP_H_ /*! + * Copyright 2014 by Contributors * \file bitmap.h * \brief a simple implement of bitmap * NOTE: bitmap is only threadsafe per word access, remember this when using bitmap * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_BITMAP_H_ +#define XGBOOST_UTILS_BITMAP_H_ + #include #include "./utils.h" #include "./omp.h" @@ -16,22 +18,22 @@ namespace utils { struct BitMap { /*! \brief internal data structure */ std::vector data; - /*! - * \brief resize the bitmap to be certain size + /*! + * \brief resize the bitmap to be certain size * \param size the size of bitmap */ inline void Resize(size_t size) { data.resize((size + 31U) >> 5, 0); } - /*! - * \brief query the i-th position of bitmap - * \param i the position in + /*! + * \brief query the i-th position of bitmap + * \param i the position in */ inline bool Get(size_t i) const { return (data[i >> 5] >> (i & 31U)) & 1U; } - /*! - * \brief set i-th position to true + /*! + * \brief set i-th position to true * \param i position index */ inline void SetTrue(size_t i) { @@ -63,4 +65,4 @@ struct BitMap { }; } // namespace utils } // namespace xgboost -#endif +#endif // XGBOOST_UTILS_BITMAP_H_ diff --git a/src/utils/fmap.h b/src/utils/fmap.h index 607f37013..218a61aa4 100644 --- a/src/utils/fmap.h +++ b/src/utils/fmap.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_UTILS_FMAP_H_ -#define XGBOOST_UTILS_FMAP_H_ /*! + * Copyright 2014 by Contributors * \file fmap.h * \brief helper class that holds the feature names and interpretations * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_FMAP_H_ +#define XGBOOST_UTILS_FMAP_H_ + #include #include #include @@ -78,4 +80,4 @@ class FeatMap { } // namespace utils } // namespace xgboost -#endif // XGBOOST_FMAP_H_ +#endif // XGBOOST_UTILS_FMAP_H_ diff --git a/src/utils/group_data.h b/src/utils/group_data.h index 29d391aa8..31f9c3a50 100644 --- a/src/utils/group_data.h +++ b/src/utils/group_data.h @@ -111,5 +111,4 @@ struct ParallelGroupBuilder { }; } // namespace utils } // namespace xgboost -#endif - +#endif // XGBOOST_UTILS_GROUP_DATA_H_ diff --git a/src/utils/iterator.h b/src/utils/iterator.h index 3f5b23310..5d986b2e4 100644 --- a/src/utils/iterator.h +++ b/src/utils/iterator.h @@ -1,11 +1,13 @@ -#ifndef XGBOOST_UTILS_ITERATOR_H -#define XGBOOST_UTILS_ITERATOR_H -#include /*! + * Copyright 2014 by Contributors * \file iterator.h * \brief itertator interface * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_ITERATOR_H_ +#define XGBOOST_UTILS_ITERATOR_H_ +#include + namespace xgboost { namespace utils { /*! @@ -16,7 +18,7 @@ template class IIterator { public: /*! - * \brief set the parameter + * \brief set the parameter * \param name name of parameter * \param val value of parameter */ @@ -36,5 +38,5 @@ class IIterator { } // namespace utils } // namespace xgboost -#endif +#endif // XGBOOST_UTILS_ITERATOR_H_ diff --git a/src/utils/math.h b/src/utils/math.h index e0bf8c466..7609df076 100644 --- a/src/utils/math.h +++ b/src/utils/math.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_UTILS_MATH_H_ -#define XGBOOST_UTILS_MATH_H_ /*! + * Copyright 2014 by Contributors * \file math.h * \brief support additional math * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_MATH_H_ +#define XGBOOST_UTILS_MATH_H_ + #include namespace xgboost { @@ -28,7 +30,8 @@ inline T LogGamma(T v) { #if _MSC_VER >= 1800 return lgamma(v); #else -#pragma message ("Warning: lgamma function was not available until VS2013, poisson regression will be disabled") +#pragma message("Warning: lgamma function was not available until VS2013"\ + ", poisson regression will be disabled") utils::Error("lgamma function was not available until VS2013"); return static_cast(1.0); #endif diff --git a/src/utils/omp.h b/src/utils/omp.h index 87cad380e..ddd3467d9 100644 --- a/src/utils/omp.h +++ b/src/utils/omp.h @@ -1,16 +1,20 @@ -#ifndef XGBOOST_UTILS_OMP_H_ -#define XGBOOST_UTILS_OMP_H_ /*! + * Copyright 2014 by Contributors * \file omp.h * \brief header to handle OpenMP compatibility issues * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_OMP_H_ +#define XGBOOST_UTILS_OMP_H_ + #if defined(_OPENMP) #include #else #ifndef DISABLE_OPENMP // use pragma message instead of warning -#pragma message ("Warning: OpenMP is not available, xgboost will be compiled into single-thread code. Use OpenMP-enabled compiler to get benefit of multi-threading") +#pragma message("Warning: OpenMP is not available,"\ + "xgboost will be compiled into single-thread code."\ + "Use OpenMP-enabled compiler to get benefit of multi-threading") #endif inline int omp_get_thread_num() { return 0; } inline int omp_get_num_threads() { return 1; } @@ -25,6 +29,6 @@ typedef int bst_omp_uint; #else typedef unsigned bst_omp_uint; #endif -} // namespace xgboost +} // namespace xgboost #endif // XGBOOST_UTILS_OMP_H_ diff --git a/src/utils/quantile.h b/src/utils/quantile.h index 4e885e254..ffd9142da 100644 --- a/src/utils/quantile.h +++ b/src/utils/quantile.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_UTILS_QUANTILE_H_ -#define XGBOOST_UTILS_QUANTILE_H_ /*! + * Copyright 2014 by Contributors * \file quantile.h - * \brief util to compute quantiles + * \brief util to compute quantiles * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_QUANTILE_H_ +#define XGBOOST_UTILS_QUANTILE_H_ + #include #include #include @@ -37,8 +39,8 @@ struct WQSummary { // constructor Entry(RType rmin, RType rmax, RType wmin, DType value) : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {} - /*! - * \brief debug function, check Valid + /*! + * \brief debug function, check Valid * \param eps the tolerate level for violating the relation */ inline void CheckValid(RType eps = 0) const { @@ -65,7 +67,7 @@ struct WQSummary { // default constructor QEntry(void) {} // constructor - QEntry(DType value, RType weight) + QEntry(DType value, RType weight) : value(value), weight(weight) {} // comparator on value inline bool operator<(const QEntry &b) const { @@ -83,11 +85,11 @@ struct WQSummary { } else { queue[qtail - 1].weight += w; } - } + } inline void MakeSummary(WQSummary *out) { std::sort(queue.begin(), queue.begin() + qtail); out->size = 0; - // start update sketch + // start update sketch RType wsum = 0; // construct data with unique weights for (size_t i = 0; i < qtail;) { @@ -106,7 +108,7 @@ struct WQSummary { /*! \brief number of elements in the summary */ size_t size; // constructor - WQSummary(Entry *data, size_t size) + WQSummary(Entry *data, size_t size) : data(data), size(size) {} /*! * \return the maximum error of the Summary @@ -119,12 +121,12 @@ struct WQSummary { } return res; } - /*! + /*! * \brief query qvalue, start from istart * \param qvalue the value we query for * \param istart starting position */ - inline Entry Query(DType qvalue, size_t &istart) const { + inline Entry Query(DType qvalue, size_t &istart) const { // NOLINT(*) while (istart < size && qvalue > data[istart].value) { ++istart; } @@ -136,7 +138,7 @@ struct WQSummary { return data[istart]; } else { if (istart == 0) { - return Entry(0.0f, 0.0f, 0.0f, qvalue); + return Entry(0.0f, 0.0f, 0.0f, qvalue); } else { return Entry(data[istart - 1].rmin_next(), data[istart].rmax_prev(), @@ -154,12 +156,12 @@ struct WQSummary { */ inline void CopyFrom(const WQSummary &src) { size = src.size; - std::memcpy(data, src.data, sizeof(Entry) * size); - } - /*! - * \brief debug function, validate whether the summary + std::memcpy(data, src.data, sizeof(Entry) * size); + } + /*! + * \brief debug function, validate whether the summary * run consistency check to check if it is a valid summary - * \param eps the tolerate error level, used when RType is floating point and + * \param eps the tolerate error level, used when RType is floating point and * some inconsistency could occur due to rounding error */ inline void CheckValid(RType eps) const { @@ -199,8 +201,8 @@ struct WQSummary { size_t i = 1, lastidx = 0; for (size_t k = 1; k < n; ++k) { RType dx2 = 2 * ((k * range) / n + begin); - // find first i such that d < (rmax[i+1] + rmin[i+1]) / 2 - while (i < src.size - 1 + // find first i such that d < (rmax[i+1] + rmin[i+1]) / 2 + while (i < src.size - 1 && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; utils::Assert(i != src.size - 1, "this cannot happen"); if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { @@ -217,7 +219,7 @@ struct WQSummary { data[size++] = src.data[src.size - 1]; } } - /*! + /*! * \brief set current summary to be merged summary of sa and sb * \param sa first input summary to be merged * \param sb second input summar to be merged @@ -230,7 +232,7 @@ struct WQSummary { if (sb.size == 0) { this->CopyFrom(sa); return; } - utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge"); + utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge"); const Entry *a = sa.data, *a_end = sa.data + sa.size; const Entry *b = sb.data, *b_end = sb.data + sb.size; // extended rmin value @@ -297,7 +299,7 @@ struct WXQSummary : public WQSummary { RType begin = src.data[0].rmax; size_t n = maxsize - 1, nbig = 0; RType range = src.data[src.size - 1].rmin - begin; - // prune off zero weights + // prune off zero weights if (range == 0.0f) { // special case, contain only two effective data pts this->data[0] = src.data[0]; @@ -331,7 +333,7 @@ struct WXQSummary : public WQSummary { utils::Printf("LOG: check quantile stats, nbig=%lu, n=%lu\n", nbig, n); utils::Printf("LOG: srcsize=%lu, maxsize=%lu, range=%g, chunk=%g\n", src.size, maxsize, static_cast(range), - static_cast(chunk)); + static_cast(chunk)); for (size_t i = 0; i < src.size; ++i) { utils::Printf("[%lu] rmin=%g, rmax=%g, wmin=%g, v=%g, isbig=%d\n", i, src.data[i].rmin, src.data[i].rmax, src.data[i].wmin, @@ -352,7 +354,7 @@ struct WXQSummary : public WQSummary { RType maxdx2 = src.data[end].rmax_prev() * 2; for (; k < n; ++k) { RType dx2 = 2 * ((k * mrange) / n + begin); - if (dx2 >= maxdx2) break; + if (dx2 >= maxdx2) break; while (i < end && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { @@ -371,13 +373,13 @@ struct WXQSummary : public WQSummary { lastidx = end; } bid = end; - // shift base by the gap + // shift base by the gap begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev(); } } } }; -/*! +/*! * \brief traditional GK summary */ template @@ -405,7 +407,7 @@ struct GKSummary { // push data to the queue inline void Push(DType x, RType w) { queue[qtail++] = x; - } + } inline void MakeSummary(GKSummary *out) { std::sort(queue.begin(), queue.begin() + qtail); out->size = qtail; @@ -419,7 +421,7 @@ struct GKSummary { /*! \brief number of elements in the summary */ size_t size; GKSummary(Entry *data, size_t size) - : data(data), size(size) {} + : data(data), size(size) {} /*! \brief the maximum error of the summary */ inline RType MaxError(void) const { RType res = 0; @@ -432,7 +434,7 @@ struct GKSummary { inline RType MaxRank(void) const { return data[size - 1].rmax; } - /*! + /*! * \brief copy content from src * \param src source sketch */ @@ -450,8 +452,8 @@ struct GKSummary { << "[" << data[i].rmin << "," << data[i].rmax << "]" << std::endl; } - } - /*! + } + /*! * \brief set current summary to be pruned summary of src * assume data field is already allocated to be at least maxsize * \param src source summary @@ -486,8 +488,8 @@ struct GKSummary { } if (sb.size == 0) { this->CopyFrom(sa); return; - } - utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge"); + } + utils::Assert(sa.size > 0 && sb.size > 0, "invalid input for merge"); const Entry *a = sa.data, *a_end = sa.data + sa.size; const Entry *b = sb.data, *b_end = sb.data + sb.size; this->size = sa.size + sb.size; @@ -500,7 +502,7 @@ struct GKSummary { aprev_rmin = a->rmin; ++dst; ++a; } else { - *dst = Entry(aprev_rmin + b->rmin, + *dst = Entry(aprev_rmin + b->rmin, b->rmax + a->rmax - 1, b->value); bprev_rmin = b->rmin; ++dst; ++b; @@ -537,15 +539,15 @@ class QuantileSketchTemplate { /*! \brief type of summary type */ typedef TSummary Summary; /*! \brief the entry type */ - typedef typename Summary::Entry Entry; + typedef typename Summary::Entry Entry; /*! \brief same as summary, but use STL to backup the space */ struct SummaryContainer : public Summary { std::vector space; - SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) { + SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) { this->space = src.space; this->data = BeginPtr(this->space); } - SummaryContainer(void) : Summary(NULL, 0) { + SummaryContainer(void) : Summary(NULL, 0) { } /*! \brief reserve space for summary */ inline void Reserve(size_t size) { @@ -554,7 +556,7 @@ class QuantileSketchTemplate { this->data = BeginPtr(space); } } - /*! + /*! * \brief set the space to be merge of all Summary arrays * \param begin begining position in th summary array * \param end ending position in the Summary array @@ -597,7 +599,7 @@ class QuantileSketchTemplate { } /*! \brief save the data structure into stream */ template - inline void Save(TStream &fo) const { + inline void Save(TStream &fo) const { // NOLINT(*) fo.Write(&(this->size), sizeof(this->size)); if (this->size != 0) { fo.Write(this->data, this->size * sizeof(Entry)); @@ -605,15 +607,16 @@ class QuantileSketchTemplate { } /*! \brief load data structure from input stream */ template - inline void Load(TStream &fi) { + inline void Load(TStream &fi) { // NOLINT(*) utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1"); this->Reserve(this->size); if (this->size != 0) { - utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0, "invalid SummaryArray 2"); + utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0, + "invalid SummaryArray 2"); } } }; - /*! + /*! * \brief intialize the quantile sketch, given the performance specification * \param maxn maximum number of data points can be feed into sketch * \param eps accuracy level of summary @@ -741,8 +744,8 @@ class QuantileSketchTemplate { * \tparam DType type of data content * \tparam RType type of rank */ -template -class WQuantileSketch : +template +class WQuantileSketch : public QuantileSketchTemplate >{ }; @@ -751,8 +754,8 @@ class WQuantileSketch : * \tparam DType type of data content * \tparam RType type of rank */ -template -class WXQuantileSketch : +template +class WXQuantileSketch : public QuantileSketchTemplate >{ }; /*! @@ -760,11 +763,11 @@ class WXQuantileSketch : * \tparam DType type of data content * \tparam RType type of rank */ -template -class GKQuantileSketch : +template +class GKQuantileSketch : public QuantileSketchTemplate >{ }; -} // utils -} // xgboost -#endif +} // namespace utils +} // namespace xgboost +#endif // XGBOOST_UTILS_QUANTILE_H_ diff --git a/src/utils/random.h b/src/utils/random.h index 1e3e617f9..7d52c2ae7 100644 --- a/src/utils/random.h +++ b/src/utils/random.h @@ -1,12 +1,14 @@ -#ifndef XGBOOST_UTILS_RANDOM_H_ -#define XGBOOST_UTILS_RANDOM_H_ /*! + * Copyright 2014 by Contributors * \file xgboost_random.h * \brief PRNG to support random number generation * \author Tianqi Chen: tianqi.tchen@gmail.com * * Use standard PRNG from stdlib */ +#ifndef XGBOOST_UTILS_RANDOM_H_ +#define XGBOOST_UTILS_RANDOM_H_ + #include #include #include @@ -23,11 +25,11 @@ inline void Seed(unsigned seed) { } /*! \brief basic function, uniform */ inline double Uniform(void) { - return static_cast(rand()) / (static_cast(RAND_MAX)+1.0); + return static_cast(rand()) / (static_cast(RAND_MAX)+1.0); // NOLINT(*) } /*! \brief return a real numer uniform in (0,1) */ inline double NextDouble2(void) { - return (static_cast(rand()) + 1.0) / (static_cast(RAND_MAX)+2.0); + return (static_cast(rand()) + 1.0) / (static_cast(RAND_MAX)+2.0); // NOLINT(*) } /*! \brief return x~N(0,1) */ inline double Normal(void) { @@ -73,7 +75,7 @@ inline void Shuffle(T *data, size_t sz) { } // random shuffle the data inside, require PRNG template -inline void Shuffle(std::vector &data) { +inline void Shuffle(std::vector &data) { // NOLINT(*) Shuffle(&data[0], data.size()); } @@ -81,17 +83,18 @@ inline void Shuffle(std::vector &data) { struct Random{ /*! \brief set random number seed */ inline void Seed(unsigned sd) { - this->rseed = sd; -#if defined(_MSC_VER)||defined(_WIN32) - ::xgboost::random::Seed(sd); + this->rseed = sd; +#if defined(_MSC_VER) || defined(_WIN32) + ::xgboost::random::Seed(sd); #endif } /*! \brief return a real number uniform in [0,1) */ inline double RandDouble(void) { - // use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe - // For cygwin and mingw, this can slows down parallelism, but rand_r is only used in objective-inl.hpp, won't affect speed in general - // todo, replace with another PRNG -#if defined(_MSC_VER)||defined(_WIN32)||defined(XGBOOST_STRICT_CXX98_) + // use rand instead of rand_r in windows, for MSVC it is fine since rand is threadsafe + // For cygwin and mingw, this can slows down parallelism, + // but rand_r is only used in objective-inl.hpp, won't affect speed in general + // todo, replace with another PRNG +#if defined(_MSC_VER) || defined(_WIN32) || defined(XGBOOST_STRICT_CXX98_) return Uniform(); #else return static_cast(rand_r(&rseed)) / (static_cast(RAND_MAX) + 1.0); diff --git a/src/utils/thread_buffer.h b/src/utils/thread_buffer.h index 45da6ec84..2119f53ab 100644 --- a/src/utils/thread_buffer.h +++ b/src/utils/thread_buffer.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_ -#define XGBOOST_UTILS_THREAD_BUFFER_H_ /*! + * Copyright 2014 by Contributors * \file thread_buffer.h * \brief multi-thread buffer, iterator, can be used to create parallel pipeline * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_THREAD_BUFFER_H_ +#define XGBOOST_UTILS_THREAD_BUFFER_H_ + #include #include #include @@ -27,7 +29,7 @@ class ThreadBuffer { this->buf_size = 30; } ~ThreadBuffer(void) { - if(init_end) this->Destroy(); + if (init_end) this->Destroy(); } /*!\brief set parameter, will also pass the parameter to factory */ inline void SetParam(const char *name, const char *val) { @@ -38,7 +40,7 @@ class ThreadBuffer { /*! * \brief initalize the buffered iterator * \param param a initialize parameter that will pass to factory, ignore it if not necessary - * \return false if the initlization can't be done, e.g. buffer file hasn't been created + * \return false if the initlization can't be done, e.g. buffer file hasn't been created */ inline bool Init(void) { if (!factory.Init()) return false; @@ -49,7 +51,7 @@ class ThreadBuffer { this->init_end = true; this->StartLoader(); return true; - } + } /*!\brief place the iterator before first value */ inline void BeforeFirst(void) { // wait till last loader end @@ -70,7 +72,7 @@ class ThreadBuffer { loading_need.Post(); // set buffer value buf_index = 0; - } + } /*! \brief destroy the buffer iterator, will deallocate the buffer */ inline void Destroy(void) { // wait until the signal is consumed @@ -78,7 +80,7 @@ class ThreadBuffer { loading_need.Post(); loader_thread.Join(); loading_need.Destroy(); - loading_end.Destroy(); + loading_end.Destroy(); for (size_t i = 0; i < bufA.size(); ++i) { factory.FreeSpace(bufA[i]); } @@ -88,37 +90,38 @@ class ThreadBuffer { bufA.clear(); bufB.clear(); factory.Destroy(); this->init_end = false; - } + } /*! * \brief get the next element needed in buffer * \param elem element to store into * \return whether reaches end of data */ - inline bool Next(Elem &elem) { + inline bool Next(Elem &elem) { // NOLINT(*) // end of buffer try to switch if (buf_index == buf_size) { this->SwitchBuffer(); buf_index = 0; } - if (buf_index >= (current_buf ? endA : endB)) { + if (buf_index >= (current_buf ? endA : endB)) { return false; } std::vector &buf = current_buf ? bufA : bufB; elem = buf[buf_index]; ++buf_index; return true; - } + } /*! * \brief get the factory object */ inline ElemFactory &get_factory(void) { return factory; } - inline const ElemFactory &get_factory(void) const{ + inline const ElemFactory &get_factory(void) const { return factory; } // size of buffer int buf_size; + private: // factory object used to load configures ElemFactory factory; @@ -147,15 +150,15 @@ class ThreadBuffer { * this implementation is like producer-consumer style */ inline void RunLoader(void) { - while(!destroy_signal) { + while (!destroy_signal) { // sleep until loading is needed - loading_need.Wait(); + loading_need.Wait(); std::vector &buf = current_buf ? bufB : bufA; int i; for (i = 0; i < buf_size ; ++i) { if (!factory.LoadNext(buf[i])) { int &end = current_buf ? endB : endA; - end = i; // marks the termination + end = i; // marks the termination break; } } @@ -166,14 +169,14 @@ class ThreadBuffer { } /*!\brief entry point of loader thread */ inline static XGBOOST_THREAD_PREFIX LoaderEntry(void *pthread) { - static_cast< ThreadBuffer* >(pthread)->RunLoader(); + static_cast< ThreadBuffer* >(pthread)->RunLoader(); return NULL; } /*!\brief start loader thread */ inline void StartLoader(void) { destroy_signal = false; // set param - current_buf = 1; + current_buf = 1; loading_need.Init(1); loading_end .Init(0); // reset terminate limit @@ -185,8 +188,8 @@ class ThreadBuffer { current_buf = 0; // wake loader for next part data_loaded = false; - loading_need.Post(); - buf_index = 0; + loading_need.Post(); + buf_index = 0; } /*!\brief switch double buffer */ inline void SwitchBuffer(void) { @@ -198,7 +201,6 @@ class ThreadBuffer { loading_need.Post(); } }; - } // namespace utils } // namespace xgboost -#endif +#endif // XGBOOST_UTILS_THREAD_BUFFER_H_ diff --git a/src/utils/utils.h b/src/utils/utils.h index e6026c3a6..2066634d6 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_UTILS_UTILS_H_ -#define XGBOOST_UTILS_UTILS_H_ /*! + * Copyright 2014 by Contributors * \file utils.h * \brief simple utils to support the code * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_UTILS_H_ +#define XGBOOST_UTILS_UTILS_H_ + #define _CRT_SECURE_NO_WARNINGS #include #include @@ -19,18 +21,18 @@ #define fopen64 std::fopen #endif #ifdef _MSC_VER -// NOTE: sprintf_s is not equivalent to snprintf, +// NOTE: sprintf_s is not equivalent to snprintf, // they are equivalent when success, which is sufficient for our case #define snprintf sprintf_s #define vsnprintf vsprintf_s #else #ifdef _FILE_OFFSET_BITS #if _FILE_OFFSET_BITS == 32 -#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit") +#pragma message("Warning: FILE OFFSET BITS defined to be 32 bit") #endif #endif -#ifdef __APPLE__ +#ifdef __APPLE__ #define off64_t off_t #define fopen64 std::fopen #endif @@ -58,17 +60,17 @@ namespace utils { const int kPrintBuffer = 1 << 12; #ifndef XGBOOST_CUSTOMIZE_MSG_ -/*! +/*! * \brief handling of Assert error, caused by in-apropriate input - * \param msg error message + * \param msg error message */ inline void HandleAssertError(const char *msg) { fprintf(stderr, "AssertError:%s\n", msg); exit(-1); } -/*! +/*! * \brief handling of Check error, caused by in-apropriate input - * \param msg error message + * \param msg error message */ inline void HandleCheckError(const char *msg) { fprintf(stderr, "%s\n", msg); @@ -158,7 +160,7 @@ inline std::FILE *FopenCheck(const char *fname, const char *flag) { // easy utils that can be directly acessed in xgboost /*! \brief get the beginning address of a vector */ template -inline T *BeginPtr(std::vector &vec) { +inline T *BeginPtr(std::vector &vec) { // NOLINT(*) if (vec.size() == 0) { return NULL; } else { @@ -174,7 +176,7 @@ inline const T *BeginPtr(const std::vector &vec) { return &vec[0]; } } -inline char* BeginPtr(std::string &str) { +inline char* BeginPtr(std::string &str) { // NOLINT(*) if (str.length() == 0) return NULL; return &str[0]; } From aba41d07cdeb1dcdee6f84feaf956cd1a6a97ff1 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 19:20:45 -0700 Subject: [PATCH 27/59] lint learner finish --- src/learner/dmatrix.h | 28 +++++++------- src/learner/evaluation-inl.hpp | 71 ++++++++++++++++++---------------- src/learner/evaluation.h | 8 ++-- src/learner/helper_utils.h | 8 ++-- src/learner/learner-inl.hpp | 65 ++++++++++++++++--------------- src/learner/objective-inl.hpp | 28 +++++++------- src/learner/objective.h | 21 +++++----- src/sync/sync.h | 10 ++--- 8 files changed, 127 insertions(+), 112 deletions(-) diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h index b58f7b2bb..3fbc579de 100644 --- a/src/learner/dmatrix.h +++ b/src/learner/dmatrix.h @@ -1,11 +1,13 @@ -#ifndef XGBOOST_LEARNER_DMATRIX_H_ -#define XGBOOST_LEARNER_DMATRIX_H_ /*! + * Copyright 2014 by Contributors * \file dmatrix.h - * \brief meta data and template data structure + * \brief meta data and template data structure * used for regression/classification/ranking * \author Tianqi Chen */ +#ifndef XGBOOST_LEARNER_DMATRIX_H_ +#define XGBOOST_LEARNER_DMATRIX_H_ + #include #include #include "../data.h" @@ -16,8 +18,8 @@ namespace learner { * \brief meta information needed in training, including label, weight */ struct MetaInfo { - /*! - * \brief information needed by booster + /*! + * \brief information needed by booster * BoosterInfo does not implement save and load, * all serialization is done in MetaInfo */ @@ -31,7 +33,7 @@ struct MetaInfo { std::vector group_ptr; /*! \brief weights of each instance, optional */ std::vector weights; - /*! + /*! * \brief initialized margins, * if specified, xgboost will start from this init margin * can be used to specify initial prediction to boost from @@ -66,7 +68,7 @@ struct MetaInfo { return 1.0f; } } - inline void SaveBinary(utils::IStream &fo) const { + inline void SaveBinary(utils::IStream &fo) const { // NOLINT(*) int version = kVersion; fo.Write(&version, sizeof(version)); fo.Write(&info.num_row, sizeof(info.num_row)); @@ -77,7 +79,7 @@ struct MetaInfo { fo.Write(info.root_index); fo.Write(base_margin); } - inline void LoadBinary(utils::IStream &fi) { + inline void LoadBinary(utils::IStream &fi) { // NOLINT(*) int version; utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format"); utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format"); @@ -114,7 +116,7 @@ struct MetaInfo { return labels; } inline const std::vector& GetFloatInfo(const char *field) const { - return ((MetaInfo*)this)->GetFloatInfo(field); + return ((MetaInfo*)this)->GetFloatInfo(field); // NOLINT(*) } inline std::vector &GetUIntInfo(const char *field) { using namespace std; @@ -124,7 +126,7 @@ struct MetaInfo { return info.root_index; } inline const std::vector &GetUIntInfo(const char *field) const { - return ((MetaInfo*)this)->GetUIntInfo(field); + return ((MetaInfo*)this)->GetUIntInfo(field); // NOLINT(*) } // try to load weight information from file, if exists inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) { @@ -149,14 +151,14 @@ struct MetaInfo { * \tparam FMatrix type of feature data source */ struct DMatrix { - /*! - * \brief magic number associated with this object + /*! + * \brief magic number associated with this object * used to check if it is specific instance */ const int magic; /*! \brief meta information about the dataset */ MetaInfo info; - /*! + /*! * \brief cache pointer to verify if the data structure is cached in some learner * used to verify if DMatrix is cached */ diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp index 433b5a00b..2b69a43a8 100644 --- a/src/learner/evaluation-inl.hpp +++ b/src/learner/evaluation-inl.hpp @@ -1,10 +1,12 @@ +/*! + * Copyright 2014 by Contributors + * \file xgboost_evaluation-inl.hpp + * \brief evaluation metrics for regression and classification and rank + * \author Kailong Chen, Tianqi Chen + */ #ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_ #define XGBOOST_LEARNER_EVALUATION_INL_HPP_ -/*! -* \file xgboost_evaluation-inl.hpp -* \brief evaluation metrics for regression and classification and rank -* \author Kailong Chen, Tianqi Chen -*/ + #include #include #include @@ -18,8 +20,8 @@ namespace xgboost { namespace learner { -/*! - * \brief base class of elementwise evaluation +/*! + * \brief base class of elementwise evaluation * \tparam Derived the name of subclass */ template @@ -47,15 +49,15 @@ struct EvalEWiseBase : public IEvaluator { } return Derived::GetFinal(dat[0], dat[1]); } - /*! - * \brief to be implemented by subclass, - * get evaluation result from one row + /*! + * \brief to be implemented by subclass, + * get evaluation result from one row * \param label label of current instance * \param pred prediction value of current instance */ inline static float EvalRow(float label, float pred); - /*! - * \brief to be overide by subclas, final trasnformation + /*! + * \brief to be overide by subclas, final trasnformation * \param esum the sum statistics returned by EvalRow * \param wsum sum of weight */ @@ -87,9 +89,9 @@ struct EvalLogLoss : public EvalEWiseBase { const float eps = 1e-16f; const float pneg = 1.0f - py; if (py < eps) { - return -y * std::log(eps) - (1.0f - y) * std::log(1.0f - eps); + return -y * std::log(eps) - (1.0f - y) * std::log(1.0f - eps); } else if (pneg < eps) { - return -y * std::log(1.0f - eps) - (1.0f - y) * std::log(eps); + return -y * std::log(1.0f - eps) - (1.0f - y) * std::log(eps); } else { return -y * std::log(py) - (1.0f - y) * std::log(pneg); } @@ -119,7 +121,7 @@ struct EvalPoissionNegLogLik : public EvalEWiseBase { } }; -/*! +/*! * \brief base class of multi-class evaluation * \tparam Derived the name of subclass */ @@ -139,7 +141,7 @@ struct EvalMClassBase : public IEvaluator { float sum = 0.0, wsum = 0.0; int label_error = 0; #pragma omp parallel for reduction(+: sum, wsum) schedule(static) - for (bst_omp_uint i = 0; i < ndata; ++i) { + for (bst_omp_uint i = 0; i < ndata; ++i) { const float wt = info.GetWeight(i); int label = static_cast(info.labels[i]); if (label >= 0 && label < static_cast(nclass)) { @@ -161,18 +163,18 @@ struct EvalMClassBase : public IEvaluator { } return Derived::GetFinal(dat[0], dat[1]); } - /*! - * \brief to be implemented by subclass, - * get evaluation result from one row + /*! + * \brief to be implemented by subclass, + * get evaluation result from one row * \param label label of current instance - * \param pred prediction value of current instance + * \param pred prediction value of current instance * \param nclass number of class in the prediction */ inline static float EvalRow(int label, const float *pred, size_t nclass); - /*! - * \brief to be overide by subclas, final trasnformation + /*! + * \brief to be overide by subclas, final trasnformation * \param esum the sum statistics returned by EvalRow * \param wsum sum of weight */ @@ -208,7 +210,7 @@ struct EvalMultiLogLoss : public EvalMClassBase { } else { return -std::log(eps); } - } + } }; /*! \brief ctest */ @@ -240,7 +242,7 @@ struct EvalCTest: public IEvaluator { tpred.push_back(preds[i + (k + 1) * ndata]); tinfo.labels.push_back(info.labels[i]); tinfo.weights.push_back(info.GetWeight(i)); - } + } } wsum += base_->Eval(tpred, tinfo); } @@ -328,7 +330,7 @@ struct EvalPrecisionRatio : public IEvaluator{ const MetaInfo &info, bool distributed) const { utils::Check(!distributed, "metric %s do not support distributed evaluation", Name()); - utils::Check(info.labels.size() != 0, "label set cannot be empty"); + utils::Check(info.labels.size() != 0, "label set cannot be empty"); utils::Assert(preds.size() % info.labels.size() == 0, "label size predict size not match"); std::vector< std::pair > rec; @@ -344,7 +346,8 @@ struct EvalPrecisionRatio : public IEvaluator{ } protected: - inline double CalcPRatio(const std::vector< std::pair >& rec, const MetaInfo &info) const { + inline double CalcPRatio(const std::vector< std::pair >& rec, + const MetaInfo &info) const { size_t cutoff = static_cast(ratio_ * rec.size()); double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0; for (size_t j = 0; j < cutoff; ++j) { @@ -372,7 +375,7 @@ struct EvalAuc : public IEvaluator { utils::Check(info.labels.size() != 0, "label set cannot be empty"); utils::Check(preds.size() % info.labels.size() == 0, "label size predict size not match"); - std::vector tgptr(2, 0); + std::vector tgptr(2, 0); tgptr[1] = static_cast(info.labels.size()); const std::vector &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; @@ -417,8 +420,8 @@ struct EvalAuc : public IEvaluator { } if (distributed) { float dat[2]; - dat[0] = static_cast(sum_auc); - dat[1] = static_cast(ngroup); + dat[0] = static_cast(sum_auc); + dat[1] = static_cast(ngroup); // approximately estimate auc using mean rabit::Allreduce(dat, 2); return dat[0] / dat[1]; @@ -463,8 +466,8 @@ struct EvalRankList : public IEvaluator { } if (distributed) { float dat[2]; - dat[0] = static_cast(sum_metric); - dat[1] = static_cast(ngroup); + dat[0] = static_cast(sum_metric); + dat[1] = static_cast(ngroup); // approximately estimate auc using mean rabit::Allreduce(dat, 2); return dat[0] / dat[1]; @@ -489,7 +492,7 @@ struct EvalRankList : public IEvaluator { } } /*! \return evaluation metric, given the pair_sort record, (pred,label) */ - virtual float EvalMetric(std::vector< std::pair > &pair_sort) const = 0; + virtual float EvalMetric(std::vector< std::pair > &pair_sort) const = 0; // NOLINT(*) protected: unsigned topn_; @@ -524,13 +527,13 @@ struct EvalNDCG : public EvalRankList{ double sumdcg = 0.0; for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) { const unsigned rel = rec[i].second; - if (rel != 0) { + if (rel != 0) { sumdcg += ((1 << rel) - 1) / std::log(i + 2.0); } } return static_cast(sumdcg); } - virtual float EvalMetric(std::vector< std::pair > &rec) const { + virtual float EvalMetric(std::vector< std::pair > &rec) const { // NOLINT(*) std::stable_sort(rec.begin(), rec.end(), CmpFirst); float dcg = this->CalcDCG(rec); std::stable_sort(rec.begin(), rec.end(), CmpSecond); diff --git a/src/learner/evaluation.h b/src/learner/evaluation.h index 85358e72e..a98c47495 100644 --- a/src/learner/evaluation.h +++ b/src/learner/evaluation.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_LEARNER_EVALUATION_H_ -#define XGBOOST_LEARNER_EVALUATION_H_ /*! + * Copyright 2014 by Contributors * \file evaluation.h * \brief interface of evaluation function supported in xgboost * \author Tianqi Chen, Kailong Chen */ +#ifndef XGBOOST_LEARNER_EVALUATION_H_ +#define XGBOOST_LEARNER_EVALUATION_H_ + #include #include #include @@ -19,7 +21,7 @@ struct IEvaluator{ * \brief evaluate a specific metric * \param preds prediction * \param info information, including label etc. - * \param distributed whether a call to Allreduce is needed to gather + * \param distributed whether a call to Allreduce is needed to gather * the average statistics across all the node, * this is only supported by some metrics */ diff --git a/src/learner/helper_utils.h b/src/learner/helper_utils.h index d318cf8bd..7ca7ba59c 100644 --- a/src/learner/helper_utils.h +++ b/src/learner/helper_utils.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_ -#define XGBOOST_LEARNER_HELPER_UTILS_H_ /*! + * Copyright 2014 by Contributors * \file helper_utils.h * \brief useful helper functions * \author Tianqi Chen, Kailong Chen */ +#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_ +#define XGBOOST_LEARNER_HELPER_UTILS_H_ + #include #include #include @@ -61,7 +63,7 @@ inline float LogSum(const float *rec, size_t size) { for (size_t i = 0; i < size; ++i) { sum += std::exp(rec[i] - mx); } - return mx + std::log(sum); + return mx + std::log(sum); } inline static bool CmpFirst(const std::pair &a, diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index 45e312aa7..f051992d3 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_ -#define XGBOOST_LEARNER_LEARNER_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file learner-inl.hpp - * \brief learning algorithm + * \brief learning algorithm * \author Tianqi Chen */ +#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_ +#define XGBOOST_LEARNER_LEARNER_INL_HPP_ + #include #include #include @@ -19,7 +21,7 @@ namespace xgboost { /*! \brief namespace for learning algorithm */ namespace learner { -/*! +/*! * \brief learner that takes do gradient boosting on specific objective functions * and do training and prediction */ @@ -30,7 +32,7 @@ class BoostLearner : public rabit::Serializable { gbm_ = NULL; name_obj_ = "reg:linear"; name_gbm_ = "gbtree"; - silent= 0; + silent = 0; prob_buffer_row = 1.0f; distributed_mode = 0; updater_mode = 0; @@ -47,10 +49,10 @@ class BoostLearner : public rabit::Serializable { * \brief add internal cache space for mat, this can speedup prediction for matrix, * please cache prediction for training and eval data * warning: if the model is loaded from file from some previous training history - * set cache data must be called with exactly SAME + * set cache data must be called with exactly SAME * data matrices to continue training otherwise it will cause error * \param mats array of pointers to matrix whose prediction result need to be cached - */ + */ inline void SetCacheData(const std::vector& mats) { utils::Assert(cache_.size() == 0, "can only call cache data once"); // assign buffer index @@ -67,10 +69,10 @@ class BoostLearner : public rabit::Serializable { buffer_size += mats[i]->info.num_row(); } char str_temp[25]; - utils::SPrintf(str_temp, sizeof(str_temp), "%lu", - static_cast(buffer_size)); + utils::SPrintf(str_temp, sizeof(str_temp), "%lu", + static_cast(buffer_size)); // NOLINT(*) this->SetParam("num_pbuffer", str_temp); - this->pred_buffer_size = buffer_size; + this->pred_buffer_size = buffer_size; } /*! * \brief set parameters from outside @@ -79,7 +81,7 @@ class BoostLearner : public rabit::Serializable { */ inline void SetParam(const char *name, const char *val) { using namespace std; - // in this version, bst: prefix is no longer required + // in this version, bst: prefix is no longer required if (strncmp(name, "bst:", 4) != 0) { std::string n = "bst:"; n += name; this->SetParam(n.c_str(), val); @@ -119,7 +121,7 @@ class BoostLearner : public rabit::Serializable { if (!strcmp(name, "objective")) name_obj_ = val; if (!strcmp(name, "booster")) name_gbm_ = val; mparam.SetParam(name, val); - } + } if (gbm_ != NULL) gbm_->SetParam(name, val); if (obj_ != NULL) obj_->SetParam(name, val); if (gbm_ == NULL || obj_ == NULL) { @@ -133,16 +135,16 @@ class BoostLearner : public rabit::Serializable { // estimate feature bound unsigned num_feature = 0; for (size_t i = 0; i < cache_.size(); ++i) { - num_feature = std::max(num_feature, + num_feature = std::max(num_feature, static_cast(cache_[i].mat_->info.num_col())); } // run allreduce on num_feature to find the maximum value rabit::Allreduce(&num_feature, 1); if (num_feature > mparam.num_feature) mparam.num_feature = num_feature; - } + } char str_temp[25]; utils::SPrintf(str_temp, sizeof(str_temp), "%d", mparam.num_feature); - this->SetParam("bst:num_feature", str_temp); + this->SetParam("bst:num_feature", str_temp); } /*! * \brief initialize the model @@ -161,13 +163,13 @@ class BoostLearner : public rabit::Serializable { * \param fi input stream * \param calc_num_feature whether call InitTrainer with calc_num_feature */ - inline void LoadModel(utils::IStream &fi, + inline void LoadModel(utils::IStream &fi, // NOLINT(*) bool calc_num_feature = true) { utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0, "BoostLearner: wrong model format"); { // backward compatibility code for compatible with old model type - // for new model, Read(&name_obj_) is suffice + // for new model, Read(&name_obj_) is suffice uint64_t len; utils::Check(fi.Read(&len, sizeof(len)) != 0, "BoostLearner: wrong model format"); if (len >= std::numeric_limits::max()) { @@ -226,9 +228,9 @@ class BoostLearner : public rabit::Serializable { fi = utils::IStream::Create(fname, "r"); this->LoadModel(*fi, true); } - delete fi; + delete fi; } - inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { + inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*) ModelParam p = mparam; p.saved_with_pbuffer = static_cast(with_pbuffer); fo.Write(&p, sizeof(ModelParam)); @@ -247,7 +249,7 @@ class BoostLearner : public rabit::Serializable { fo->Write("bs64\t", 5); utils::Base64OutStream bout(fo); this->SaveModel(bout, with_pbuffer); - bout.Finish('\n'); + bout.Finish('\n'); } else { fo->Write("binf", 4); this->SaveModel(*fo, with_pbuffer); @@ -260,7 +262,7 @@ class BoostLearner : public rabit::Serializable { * \param p_train pointer to the matrix used by training */ inline void CheckInit(DMatrix *p_train) { - int ncol = static_cast(p_train->info.info.num_col); + int ncol = static_cast(p_train->info.info.num_col); std::vector enabled(ncol, true); // set max row per batch to limited value // in distributed mode, use safe choice otherwise @@ -345,10 +347,9 @@ class BoostLearner : public rabit::Serializable { bool output_margin, std::vector *out_preds, unsigned ntree_limit = 0, - bool pred_leaf = false - ) const { + bool pred_leaf = false) const { if (pred_leaf) { - gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit); + gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit); } else { this->PredictRaw(data, out_preds, ntree_limit); if (!output_margin) { @@ -361,7 +362,7 @@ class BoostLearner : public rabit::Serializable { * NOTE: use the batch prediction interface if possible, batch prediction is usually * more efficient than online prediction * This function is NOT threadsafe, make sure you only call from one thread - * + * * \param inst the instance you want to predict * \param output_margin whether to only predict margin value instead of transformed prediction * \param out_preds output vector to hold the predictions @@ -387,8 +388,8 @@ class BoostLearner : public rabit::Serializable { } protected: - /*! - * \brief initialize the objective function and GBM, + /*! + * \brief initialize the objective function and GBM, * if not yet done */ inline void InitObjGBM(void) { @@ -401,12 +402,12 @@ class BoostLearner : public rabit::Serializable { for (size_t i = 0; i < cfg_.size(); ++i) { obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str()); gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str()); - } + } if (evaluator_.Size() == 0) { evaluator_.AddEval(obj_->DefaultEvalMetric()); } } - /*! + /*! * \brief additional default value for specific objs */ inline void InitAdditionDefaultParam(void) { @@ -415,12 +416,12 @@ class BoostLearner : public rabit::Serializable { gbm_->SetParam("max_delta_step", "0.7"); } } - /*! + /*! * \brief get un-transformed prediction * \param data training data matrix * \param out_preds output vector that stores the prediction * \param ntree_limit limit number of trees used for boosted tree - * predictor, when it equals 0, this means we are using all the trees + * predictor, when it equals 0, this means we are using all the trees */ inline void PredictRaw(const DMatrix &data, std::vector *out_preds, @@ -517,7 +518,7 @@ class BoostLearner : public rabit::Serializable { protected: // magic number to transform random seed - const static int kRandSeedMagic = 127; + static const int kRandSeedMagic = 127; // cache entry object that helps handle feature caching struct CacheEntry { const DMatrix *mat_; diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp index d0ecf7a27..b6d388e3c 100644 --- a/src/learner/objective-inl.hpp +++ b/src/learner/objective-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_ -#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file objective-inl.hpp * \brief objective function implementations * \author Tianqi Chen, Kailong Chen */ +#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_ +#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_ + #include #include #include @@ -176,14 +178,14 @@ class RegLossObj : public IObjFunction { // poisson regression for count class PoissonRegression : public IObjFunction { public: - explicit PoissonRegression(void) { + PoissonRegression(void) { max_delta_step = 0.0f; } virtual ~PoissonRegression(void) {} - + virtual void SetParam(const char *name, const char *val) { using namespace std; - if (!strcmp( "max_delta_step", name )) { + if (!strcmp("max_delta_step", name)) { max_delta_step = static_cast(atof(val)); } } @@ -201,9 +203,9 @@ class PoissonRegression : public IObjFunction { // check if label in range bool label_correct = true; // start calculating gradient - const long ndata = static_cast(preds.size()); + const long ndata = static_cast(preds.size()); // NOLINT(*) #pragma omp parallel for schedule(static) - for (long i = 0; i < ndata; ++i) { + for (long i = 0; i < ndata; ++i) { // NOLINT(*) float p = preds[i]; float w = info.GetWeight(i); float y = info.labels[i]; @@ -219,9 +221,9 @@ class PoissonRegression : public IObjFunction { } virtual void PredTransform(std::vector *io_preds) { std::vector &preds = *io_preds; - const long ndata = static_cast(preds.size()); + const long ndata = static_cast(preds.size()); // NOLINT(*) #pragma omp parallel for schedule(static) - for (long j = 0; j < ndata; ++j) { + for (long j = 0; j < ndata; ++j) { // NOLINT(*) preds[j] = std::exp(preds[j]); } } @@ -234,7 +236,7 @@ class PoissonRegression : public IObjFunction { virtual const char* DefaultEvalMetric(void) const { return "poisson-nloglik"; } - + private: float max_delta_step; }; @@ -467,7 +469,7 @@ class LambdaRankObj : public IObjFunction { : pos_index(pos_index), neg_index(neg_index), weight(1.0f) {} }; /*! - * \brief get lambda weight for existing pairs + * \brief get lambda weight for existing pairs * \param list a list that is sorted by pred score * \param io_pairs record of pairs, containing the pairs to fill in weights */ @@ -555,10 +557,10 @@ class LambdaRankObjMAP : public LambdaRankObj { float ap_acc; /*! * \brief the accumulated precision, - * assuming a positive instance is missing + * assuming a positive instance is missing */ float ap_acc_miss; - /*! + /*! * \brief the accumulated precision, * assuming that one more positive instance is inserted ahead */ diff --git a/src/learner/objective.h b/src/learner/objective.h index c0a525a43..08b57f528 100644 --- a/src/learner/objective.h +++ b/src/learner/objective.h @@ -1,11 +1,14 @@ -#ifndef XGBOOST_LEARNER_OBJECTIVE_H_ -#define XGBOOST_LEARNER_OBJECTIVE_H_ /*! + * Copyright 2014 by Contributors * \file objective.h * \brief interface of objective function used for gradient boosting * \author Tianqi Chen, Kailong Chen */ -#include "dmatrix.h" +#ifndef XGBOOST_LEARNER_OBJECTIVE_H_ +#define XGBOOST_LEARNER_OBJECTIVE_H_ + +#include +#include "./dmatrix.h" namespace xgboost { namespace learner { @@ -13,13 +16,13 @@ namespace learner { class IObjFunction{ public: /*! \brief virtual destructor */ - virtual ~IObjFunction(void){} + virtual ~IObjFunction(void) {} /*! * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter */ - virtual void SetParam(const char *name, const char *val) = 0; + virtual void SetParam(const char *name, const char *val) = 0; /*! * \brief get gradient over each of predictions, given existing information * \param preds prediction of current round @@ -38,9 +41,9 @@ class IObjFunction{ * \brief transform prediction values, this is only called when Prediction is called * \param io_preds prediction values, saves to this vector as well */ - virtual void PredTransform(std::vector *io_preds){} + virtual void PredTransform(std::vector *io_preds) {} /*! - * \brief transform prediction values, this is only called when Eval is called, + * \brief transform prediction values, this is only called when Eval is called, * usually it redirect to PredTransform * \param io_preds prediction values, saves to this vector as well */ @@ -49,7 +52,7 @@ class IObjFunction{ } /*! * \brief transform probability value back to margin - * this is used to transform user-set base_score back to margin + * this is used to transform user-set base_score back to margin * used by gradient boosting * \return transformed value */ @@ -77,7 +80,7 @@ inline IObjFunction* CreateObjFunction(const char *name) { if (!strcmp("multi:softprob", name)) return new SoftmaxMultiClassObj(1); if (!strcmp("rank:pairwise", name )) return new PairwiseRankObj(); if (!strcmp("rank:ndcg", name)) return new LambdaRankObjNDCG(); - if (!strcmp("rank:map", name)) return new LambdaRankObjMAP(); + if (!strcmp("rank:map", name)) return new LambdaRankObjMAP(); utils::Error("unknown objective function type: %s", name); return NULL; } diff --git a/src/sync/sync.h b/src/sync/sync.h index 3a371b03c..b9bdf89fe 100644 --- a/src/sync/sync.h +++ b/src/sync/sync.h @@ -1,13 +1,13 @@ -#ifndef XGBOOST_SYNC_H_ -#define XGBOOST_SYNC_H_ /*! + * Copyright 2014 by Contributors * \file sync.h * \brief the synchronization module of rabit * redirects to subtree rabit header * \author Tianqi Chen */ +#ifndef XGBOOST_SYNC_SYNC_H_ +#define XGBOOST_SYNC_SYNC_H_ + #include "../../subtree/rabit/include/rabit.h" #include "../../subtree/rabit/include/rabit/timer.h" -#endif // XGBOOST_SYNC_H_ - - +#endif // XGBOOST_SYNC_SYNC_H_ From 1123253f79e1288058ae72072cd2736a709738ed Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 19:35:23 -0700 Subject: [PATCH 28/59] lint all --- src/data.h | 16 +++++---- src/io/dmlc_simple.cpp | 24 +++++++------ src/io/io.cpp | 9 ++--- src/io/libsvm_parser.h | 11 +++--- src/io/page_dmatrix-inl.hpp | 32 +++++++++-------- src/io/page_fmatrix-inl.hpp | 42 ++++++++++++---------- src/io/simple_dmatrix-inl.hpp | 41 +++++++++++---------- src/io/simple_fmatrix-inl.hpp | 52 ++++++++++++++------------- src/io/sparse_batch_page.h | 26 ++++++++------ src/xgboost_main.cpp | 68 +++++++++++++++++++---------------- 10 files changed, 178 insertions(+), 143 deletions(-) diff --git a/src/data.h b/src/data.h index 63dd2d78f..3c4a14987 100644 --- a/src/data.h +++ b/src/data.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_DATA_H -#define XGBOOST_DATA_H /*! + * Copyright (c) 2014 by Contributors * \file data.h * \brief the input data structure for gradient boosting * \author Tianqi Chen */ +#ifndef XGBOOST_DATA_H_ +#define XGBOOST_DATA_H_ + #include #include #include "utils/utils.h" @@ -32,7 +34,7 @@ struct bst_gpair { bst_gpair(bst_float grad, bst_float hess) : grad(grad), hess(hess) {} }; -/*! +/*! * \brief extra information that might needed by gbm and tree module * these information are not necessarily presented, and can be empty */ @@ -102,7 +104,7 @@ struct RowBatch : public SparseBatch { return Inst(data_ptr + ind_ptr[i], static_cast(ind_ptr[i+1] - ind_ptr[i])); } }; -/*! +/*! * \brief read-only column batch, used to access columns, * the columns are not required to be continuous */ @@ -131,7 +133,7 @@ class IFMatrix { /*!\brief get column iterator */ virtual utils::IIterator *ColIterator(void) = 0; /*! - * \brief get the column iterator associated with FMatrix with subset of column features + * \brief get the column iterator associated with FMatrix with subset of column features * \param fset is the list of column index set that must be contained in the returning Column iterator * \return the column iterator, initialized so that it reads the elements in fset */ @@ -154,11 +156,11 @@ class IFMatrix { /*! \brief get number of non-missing entries in column */ virtual size_t GetColSize(size_t cidx) const = 0; /*! \brief get column density */ - virtual float GetColDensity(size_t cidx) const = 0; + virtual float GetColDensity(size_t cidx) const = 0; /*! \brief reference of buffered rowset */ virtual const std::vector &buffered_rowset(void) const = 0; // virtual destructor virtual ~IFMatrix(void){} }; } // namespace xgboost -#endif // XGBOOST_DATA_H +#endif // XGBOOST_DATA_H_ diff --git a/src/io/dmlc_simple.cpp b/src/io/dmlc_simple.cpp index 065877a19..3fbf34734 100644 --- a/src/io/dmlc_simple.cpp +++ b/src/io/dmlc_simple.cpp @@ -1,6 +1,8 @@ +// Copyright by Contributors #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX +#include #include "../utils/io.h" // implements a single no split version of DMLC @@ -9,7 +11,7 @@ namespace xgboost { namespace utils { /*! - * \brief line split implementation from single FILE + * \brief line split implementation from single FILE * simply returns lines of files, used for stdin */ class SingleFileSplit : public dmlc::InputSplit { @@ -32,7 +34,7 @@ class SingleFileSplit : public dmlc::InputSplit { } virtual size_t Read(void *ptr, size_t size) { return std::fread(ptr, 1, size, fp_); - } + } virtual void Write(const void *ptr, size_t size) { utils::Error("cannot do write in inputsplit"); } @@ -47,13 +49,13 @@ class SingleFileSplit : public dmlc::InputSplit { chunk_end_); out_rec->dptr = chunk_begin_; out_rec->size = next - chunk_begin_; - chunk_begin_ = next; + chunk_begin_ = next; return true; } virtual bool NextChunk(Blob *out_chunk) { if (chunk_begin_ == chunk_end_) { if (!LoadChunk()) return false; - } + } out_chunk->dptr = chunk_begin_; out_chunk->size = chunk_end_ - chunk_begin_; chunk_begin_ = chunk_end_; @@ -64,8 +66,8 @@ class SingleFileSplit : public dmlc::InputSplit { if (max_size <= overflow_.length()) { *size = 0; return true; } - if (overflow_.length() != 0) { - std::memcpy(buf, BeginPtr(overflow_), overflow_.length()); + if (overflow_.length() != 0) { + std::memcpy(buf, BeginPtr(overflow_), overflow_.length()); } size_t olen = overflow_.length(); overflow_.resize(0); @@ -88,13 +90,13 @@ class SingleFileSplit : public dmlc::InputSplit { return true; } } - + protected: inline const char* FindLastRecordBegin(const char *begin, const char *end) { if (begin == end) return begin; for (const char *p = end - 1; p != begin; --p) { - if (*p == '\n' || *p == '\r') return p + 1; + if (*p == '\n' || *p == '\r') return p + 1; } return begin; } @@ -143,7 +145,7 @@ class StdFile : public dmlc::Stream { public: explicit StdFile(std::FILE *fp, bool use_stdio) : fp(fp), use_stdio(use_stdio) { - } + } virtual ~StdFile(void) { this->Close(); } @@ -154,7 +156,7 @@ class StdFile : public dmlc::Stream { std::fwrite(ptr, size, 1, fp); } virtual void Seek(size_t pos) { - std::fseek(fp, static_cast(pos), SEEK_SET); + std::fseek(fp, static_cast(pos), SEEK_SET); // NOLINT(*) } virtual size_t Tell(void) { return std::ftell(fp); @@ -197,7 +199,7 @@ Stream *Stream::Create(const char *fname, const char * const mode, bool allow_nu "to use hdfs, s3 or distributed version, compile with make dmlc=1"; utils::Check(strncmp(fname, "s3://", 5) != 0, msg); utils::Check(strncmp(fname, "hdfs://", 7) != 0, msg); - + std::FILE *fp = NULL; bool use_stdio = false; using namespace std; diff --git a/src/io/io.cpp b/src/io/io.cpp index dd4336170..b3713f0c5 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1,3 +1,4 @@ +// Copyright 2014 by Contributors #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX @@ -17,7 +18,7 @@ DataMatrix* LoadDataMatrix(const char *fname, const char *cache_file) { using namespace std; std::string fname_ = fname; - + const char *dlm = strchr(fname, '#'); if (dlm != NULL) { utils::Check(strchr(dlm + 1, '#') == NULL, @@ -29,7 +30,7 @@ DataMatrix* LoadDataMatrix(const char *fname, cache_file = dlm +1; } - if (cache_file == NULL) { + if (cache_file == NULL) { if (!std::strcmp(fname, "stdin") || !std::strncmp(fname, "s3://", 5) || !std::strncmp(fname, "hdfs://", 7) || @@ -42,7 +43,7 @@ DataMatrix* LoadDataMatrix(const char *fname, utils::FileStream fs(utils::FopenCheck(fname, "rb")); utils::Check(fs.Read(&magic, sizeof(magic)) != 0, "invalid input file format"); fs.Seek(0); - if (magic == DMatrixSimple::kMagic) { + if (magic == DMatrixSimple::kMagic) { DMatrixSimple *dmat = new DMatrixSimple(); dmat->LoadBinary(fs, silent, fname); fs.Close(); @@ -81,7 +82,7 @@ DataMatrix* LoadDataMatrix(const char *fname, } } -void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) { +void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) { if (dmat.magic == DMatrixSimple::kMagic) { const DMatrixSimple *p_dmat = static_cast(&dmat); p_dmat->SaveBinary(fname, silent); diff --git a/src/io/libsvm_parser.h b/src/io/libsvm_parser.h index 0e69d0467..92eeaf35d 100644 --- a/src/io/libsvm_parser.h +++ b/src/io/libsvm_parser.h @@ -22,7 +22,7 @@ namespace io { /*! \brief page returned by libsvm parser */ struct LibSVMPage : public SparsePage { std::vector label; - // overload clear + // overload clear inline void Clear() { SparsePage::Clear(); label.clear(); @@ -35,7 +35,7 @@ struct LibSVMPage : public SparsePage { */ class LibSVMPageFactory { public: - explicit LibSVMPageFactory() + LibSVMPageFactory() : bytes_read_(0), at_head_(true) { } inline bool Init(void) { @@ -85,7 +85,7 @@ class LibSVMPageFactory { data->resize(nthread); bytes_read_ += chunk.size; utils::Assert(chunk.size != 0, "LibSVMParser.FileData"); - char *head = reinterpret_cast(chunk.dptr); + char *head = reinterpret_cast(chunk.dptr); #pragma omp parallel num_threads(nthread_) { // threadid @@ -150,7 +150,7 @@ class LibSVMPageFactory { } return begin; } - + private: // nthread int nthread_; @@ -199,12 +199,13 @@ class LibSVMParser : public utils::IIterator { inline size_t bytes_read(void) const { return itr.get_factory().bytes_read(); } + private: bool at_end_; size_t data_ptr_; std::vector *data_; utils::ThreadBuffer*, LibSVMPageFactory> itr; -}; +}; } // namespace io } // namespace xgboost diff --git a/src/io/page_dmatrix-inl.hpp b/src/io/page_dmatrix-inl.hpp index 79455d130..3012af564 100644 --- a/src/io/page_dmatrix-inl.hpp +++ b/src/io/page_dmatrix-inl.hpp @@ -1,11 +1,15 @@ -#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_ -#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_ /*! + * Copyright (c) 2014 by Contributors * \file page_dmatrix-inl.hpp * row iterator based on sparse page * \author Tianqi Chen */ +#ifndef XGBOOST_IO_PAGE_DMATRIX_INL_HPP_ +#define XGBOOST_IO_PAGE_DMATRIX_INL_HPP_ + #include +#include +#include #include "../data.h" #include "../utils/iterator.h" #include "../utils/thread_buffer.h" @@ -94,12 +98,12 @@ class DMatrixPageBase : public DataMatrix { fbin.Close(); if (!silent) { utils::Printf("DMatrixPage: %lux%lu is saved to %s\n", - static_cast(mat.info.num_row()), - static_cast(mat.info.num_col()), fname_); + static_cast(mat.info.num_row()), // NOLINT(*) + static_cast(mat.info.num_col()), fname_); // NOLINT(*) } } /*! \brief load and initialize the iterator with fi */ - inline void LoadBinary(utils::FileStream &fi, + inline void LoadBinary(utils::FileStream &fi, // NOLINT(*) bool silent, const char *fname_) { this->set_cache_file(fname_); @@ -114,8 +118,8 @@ class DMatrixPageBase : public DataMatrix { iter_->Load(fs); if (!silent) { utils::Printf("DMatrixPage: %lux%lu matrix is loaded", - static_cast(info.num_row()), - static_cast(info.num_col())); + static_cast(info.num_row()), // NOLINT(*) + static_cast(info.num_col())); // NOLINT(*) if (fname_ != NULL) { utils::Printf(" from %s\n", fname_); } else { @@ -141,7 +145,7 @@ class DMatrixPageBase : public DataMatrix { } this->set_cache_file(cache_file); std::string fname_row = std::string(cache_file) + ".row.blob"; - utils::FileStream fo(utils::FopenCheck(fname_row.c_str(), "wb")); + utils::FileStream fo(utils::FopenCheck(fname_row.c_str(), "wb")); SparsePage page; size_t bytes_write = 0; double tstart = rabit::utils::GetTime(); @@ -178,8 +182,8 @@ class DMatrixPageBase : public DataMatrix { if (page.data.size() != 0) { page.Save(&fo); } - fo.Close(); - iter_->Load(utils::FileStream(utils::FopenCheck(fname_row.c_str(), "rb"))); + fo.Close(); + iter_->Load(utils::FileStream(utils::FopenCheck(fname_row.c_str(), "rb"))); // save data matrix utils::FileStream fs(utils::FopenCheck(cache_file, "wb")); int tmagic = kMagic; @@ -188,8 +192,8 @@ class DMatrixPageBase : public DataMatrix { fs.Close(); if (!silent) { utils::Printf("DMatrixPage: %lux%lu is parsed from %s\n", - static_cast(info.num_row()), - static_cast(info.num_col()), + static_cast(info.num_row()), // NOLINT(*) + static_cast(info.num_col()), // NOLINT(*) uri); } } @@ -241,12 +245,12 @@ class DMatrixHalfRAM : public DMatrixPageBase<0xffffab03> { virtual IFMatrix *fmat(void) const { return fmat_; } - virtual void set_cache_file(const std::string &cache_file) { + virtual void set_cache_file(const std::string &cache_file) { } virtual void CheckMagic(int tmagic) { utils::Check(tmagic == DMatrixPageBase<0xffffab02>::kMagic || tmagic == DMatrixPageBase<0xffffab03>::kMagic, - "invalid format,magic number mismatch"); + "invalid format,magic number mismatch"); } /*! \brief the real fmatrix */ IFMatrix *fmat_; diff --git a/src/io/page_fmatrix-inl.hpp b/src/io/page_fmatrix-inl.hpp index 18f4c6dee..2aaec5b19 100644 --- a/src/io/page_fmatrix-inl.hpp +++ b/src/io/page_fmatrix-inl.hpp @@ -1,10 +1,16 @@ -#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_ -#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_ /*! + * Copyright (c) 2014 by Contributors * \file page_fmatrix-inl.hpp * col iterator based on sparse page * \author Tianqi Chen */ +#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_ +#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_ + +#include +#include +#include + namespace xgboost { namespace io { /*! \brief thread buffer iterator */ @@ -42,9 +48,9 @@ class ThreadColPageIterator: public utils::IIterator { } // set index set inline void SetIndexSet(const std::vector &fset, bool load_all) { - itr.get_factory().SetIndexSet(fset, load_all); + itr.get_factory().SetIndexSet(fset, load_all); } - + private: // output data ColBatch out_; @@ -96,7 +102,7 @@ struct ColConvertFactory { return true; } } - if (tmp_.Size() != 0){ + if (tmp_.Size() != 0) { this->MakeColPage(tmp_, BeginPtr(*buffered_rowset_) + btop, *enabled_, val); return true; @@ -104,7 +110,7 @@ struct ColConvertFactory { return false; } } - inline void Destroy(void) {} + inline void Destroy(void) {} inline void BeforeFirst(void) {} inline void MakeColPage(const SparsePage &prow, const bst_uint *ridx, @@ -115,7 +121,7 @@ struct ColConvertFactory { #pragma omp parallel { nthread = omp_get_num_threads(); - int max_nthread = std::max(omp_get_num_procs() / 2 - 4, 1); + int max_nthread = std::max(omp_get_num_procs() / 2 - 4, 1); if (nthread > max_nthread) { nthread = max_nthread; } @@ -130,10 +136,10 @@ struct ColConvertFactory { int tid = omp_get_thread_num(); for (size_t j = prow.offset[i]; j < prow.offset[i+1]; ++j) { const SparseBatch::Entry &e = prow.data[j]; - if (enabled[e.index]) { + if (enabled[e.index]) { builder.AddBudget(e.index, tid); } - } + } } builder.InitStorage(); #pragma omp parallel for schedule(static) num_threads(nthread) @@ -169,7 +175,7 @@ struct ColConvertFactory { // buffered rowset std::vector *buffered_rowset_; // enabled marks - const std::vector *enabled_; + const std::vector *enabled_; // internal temp cache SparsePage tmp_; /*! \brief page size 256 M */ @@ -191,7 +197,7 @@ class FMatrixPage : public IFMatrix { if (iter_ != NULL) delete iter_; } /*! \return whether column access is enabled */ - virtual bool HaveColAccess(void) const { + virtual bool HaveColAccess(void) const { return col_size_.size() != 0; } /*! \brief get number of colmuns */ @@ -212,7 +218,7 @@ class FMatrixPage : public IFMatrix { size_t nmiss = num_buffered_row_ - (col_size_[cidx]); return 1.0f - (static_cast(nmiss)) / num_buffered_row_; } - virtual void InitColAccess(const std::vector &enabled, + virtual void InitColAccess(const std::vector &enabled, float pkeep, size_t max_row_perbatch) { if (this->HaveColAccess()) return; if (TryLoadColData()) return; @@ -242,11 +248,11 @@ class FMatrixPage : public IFMatrix { /*! * \brief colmun based iterator */ - virtual utils::IIterator *ColIterator(const std::vector &fset) { + virtual utils::IIterator *ColIterator(const std::vector &fset) { size_t ncol = this->NumCol(); col_index_.resize(0); for (size_t i = 0; i < fset.size(); ++i) { - if (fset[i] < ncol) col_index_.push_back(fset[i]); + if (fset[i] < ncol) col_index_.push_back(fset[i]); } col_iter_.SetIndexSet(col_index_, false); col_iter_.BeforeFirst(); @@ -255,13 +261,13 @@ class FMatrixPage : public IFMatrix { // set the cache file name inline void set_cache_file(const std::string &cache_file) { col_data_name_ = std::string(cache_file) + ".col.blob"; - col_meta_name_ = std::string(cache_file) + ".col.meta"; + col_meta_name_ = std::string(cache_file) + ".col.meta"; } protected: inline bool TryLoadColData(void) { std::FILE *fi = fopen64(col_meta_name_.c_str(), "rb"); - if (fi == NULL) return false; + if (fi == NULL) return false; utils::FileStream fs(fi); LoadMeta(&fs); fs.Close(); @@ -306,12 +312,12 @@ class FMatrixPage : public IFMatrix { SparsePage *pcol; while (citer.Next(pcol)) { for (size_t i = 0; i < pcol->Size(); ++i) { - col_size_[i] += pcol->offset[i + 1] - pcol->offset[i]; + col_size_[i] += pcol->offset[i + 1] - pcol->offset[i]; } pcol->Save(&fo); size_t spage = pcol->MemCostBytes(); bytes_write += spage; - double tnow = rabit::utils::GetTime(); + double tnow = rabit::utils::GetTime(); double tdiff = tnow - tstart; utils::Printf("Writting to %s in %g MB/s, %lu MB written current speed:%g MB/s\n", col_data_name_.c_str(), diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp index 3876c21ad..190cbdcdf 100644 --- a/src/io/simple_dmatrix-inl.hpp +++ b/src/io/simple_dmatrix-inl.hpp @@ -1,13 +1,15 @@ -#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_ -#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file simple_dmatrix-inl.hpp - * \brief simple implementation of DMatrixS that can be used + * \brief simple implementation of DMatrixS that can be used * the data format of xgboost is templatized, which means it can accept * any data structure that implements the function defined by FMatrix * this file is a specific implementation of input data structure that can be used by BoostLearner * \author Tianqi Chen */ +#ifndef XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_ +#define XGBOOST_IO_SIMPLE_DMATRIX_INL_HPP_ + #include #include #include @@ -119,13 +121,13 @@ class DMatrixSimple : public DataMatrix { for (size_t i = 0; i < batch.data.size(); ++i) { info.info.num_col = std::max(info.info.num_col, static_cast(batch.data[i].index+1)); - } + } } if (!silent) { utils::Printf("%lux%lu matrix with %lu entries is loaded from %s\n", - static_cast(info.num_row()), - static_cast(info.num_col()), - static_cast(row_data_.size()), uri); + static_cast(info.num_row()), // NOLINT(*) + static_cast(info.num_col()), // NOLINT(*) + static_cast(row_data_.size()), uri); // NOLINT(*) } // try to load in additional file if (!loadsplit) { @@ -141,7 +143,7 @@ class DMatrixSimple : public DataMatrix { "DMatrix: weight data does not match the number of rows in features"); } std::string mname = name + ".base_margin"; - if (info.TryLoadFloatInfo("base_margin", mname.c_str(), silent)) { + if (info.TryLoadFloatInfo("base_margin", mname.c_str(), silent)) { } } } @@ -165,10 +167,11 @@ class DMatrixSimple : public DataMatrix { * \param silent whether print information during loading * \param fname file name, used to print message */ - inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) { + inline void LoadBinary(utils::IStream &fs, bool silent = false, const char *fname = NULL) { // NOLINT(*) int tmagic; utils::Check(fs.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format"); - utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch", fname == NULL ? "" : fname); + utils::Check(tmagic == kMagic, "\"%s\" invalid format, magic number mismatch", + fname == NULL ? "" : fname); info.LoadBinary(fs); LoadBinary(fs, &row_ptr_, &row_data_); @@ -176,9 +179,9 @@ class DMatrixSimple : public DataMatrix { if (!silent) { utils::Printf("%lux%lu matrix with %lu entries is loaded", - static_cast(info.num_row()), - static_cast(info.num_col()), - static_cast(row_data_.size())); + static_cast(info.num_row()), // NOLINT(*) + static_cast(info.num_col()), // NOLINT(*) + static_cast(row_data_.size())); // NOLINT(*) if (fname != NULL) { utils::Printf(" from %s\n", fname); } else { @@ -205,9 +208,9 @@ class DMatrixSimple : public DataMatrix { if (!silent) { utils::Printf("%lux%lu matrix with %lu entries is saved to %s\n", - static_cast(info.num_row()), - static_cast(info.num_col()), - static_cast(row_data_.size()), fname); + static_cast(info.num_row()), // NOLINT(*) + static_cast(info.num_col()), // NOLINT(*) + static_cast(row_data_.size()), fname); // NOLINT(*) if (info.group_ptr.size() != 0) { utils::Printf("data contains %u groups\n", static_cast(info.group_ptr.size()-1)); @@ -256,7 +259,7 @@ class DMatrixSimple : public DataMatrix { * \param ptr pointer data * \param data data content */ - inline static void SaveBinary(utils::IStream &fo, + inline static void SaveBinary(utils::IStream &fo, // NOLINT(*) const std::vector &ptr, const std::vector &data) { size_t nrow = ptr.size() - 1; @@ -272,7 +275,7 @@ class DMatrixSimple : public DataMatrix { * \param out_ptr pointer data * \param out_data data content */ - inline static void LoadBinary(utils::IStream &fi, + inline static void LoadBinary(utils::IStream &fi, // NOLINT(*) std::vector *out_ptr, std::vector *out_data) { size_t nrow; @@ -314,7 +317,7 @@ class DMatrixSimple : public DataMatrix { DMatrixSimple *parent_; // temporal space for batch RowBatch batch_; - }; + }; }; } // namespace io } // namespace xgboost diff --git a/src/io/simple_fmatrix-inl.hpp b/src/io/simple_fmatrix-inl.hpp index 1d704c4f8..0e0da4461 100644 --- a/src/io/simple_fmatrix-inl.hpp +++ b/src/io/simple_fmatrix-inl.hpp @@ -1,11 +1,15 @@ -#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_ -#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file simple_fmatrix-inl.hpp * \brief the input data structure for gradient boosting * \author Tianqi Chen */ +#ifndef XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_ +#define XGBOOST_IO_SIMPLE_FMATRIX_INL_HPP_ + #include +#include +#include #include "../data.h" #include "../utils/utils.h" #include "../utils/random.h" @@ -30,7 +34,7 @@ class FMatrixS : public IFMatrix { } // destructor virtual ~FMatrixS(void) { - if (iter_ != NULL) delete iter_; + if (iter_ != NULL) delete iter_; } /*! \return whether column access is enabled */ virtual bool HaveColAccess(void) const { @@ -54,7 +58,7 @@ class FMatrixS : public IFMatrix { size_t nmiss = buffered_rowset_.size() - col_size_[cidx]; return 1.0f - (static_cast(nmiss)) / buffered_rowset_.size(); } - virtual void InitColAccess(const std::vector &enabled, + virtual void InitColAccess(const std::vector &enabled, float pkeep, size_t max_row_perbatch) { if (this->HaveColAccess()) return; this->InitColData(enabled, pkeep, max_row_perbatch); @@ -85,7 +89,7 @@ class FMatrixS : public IFMatrix { size_t ncol = this->NumCol(); col_iter_.col_index_.resize(0); for (size_t i = 0; i < fset.size(); ++i) { - if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]); + if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]); } col_iter_.BeforeFirst(); return &col_iter_; @@ -94,7 +98,7 @@ class FMatrixS : public IFMatrix { * \brief save column access data into stream * \param fo output stream to save to */ - inline void SaveColAccess(utils::IStream &fo) const { + inline void SaveColAccess(utils::IStream &fo) const { // NOLINT(*) size_t n = 0; fo.Write(&n, sizeof(n)); } @@ -102,10 +106,10 @@ class FMatrixS : public IFMatrix { * \brief load column access data from stream * \param fo output stream to load from */ - inline void LoadColAccess(utils::IStream &fi) { + inline void LoadColAccess(utils::IStream &fi) { // NOLINT(*) // do nothing in load col access } - + protected: /*! * \brief intialize column data @@ -129,7 +133,7 @@ class FMatrixS : public IFMatrix { for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) { SparsePage *pcol = col_iter_.cpages_[i]; for (size_t j = 0; j < pcol->Size(); ++j) { - col_size_[j] += pcol->offset[j + 1] - pcol->offset[j]; + col_size_[j] += pcol->offset[j + 1] - pcol->offset[j]; } } } @@ -139,7 +143,7 @@ class FMatrixS : public IFMatrix { * \param pcol the target column */ inline void MakeOneBatch(const std::vector &enabled, - float pkeep, + float pkeep, SparsePage *pcol) { // clear rowset buffered_rowset_.clear(); @@ -159,8 +163,8 @@ class FMatrixS : public IFMatrix { while (iter_->Next()) { const RowBatch &batch = iter_->Value(); bmap.resize(bmap.size() + batch.size, true); - long batch_size = static_cast(batch.size); - for (long i = 0; i < batch_size; ++i) { + long batch_size = static_cast(batch.size); // NOLINT(*) + for (long i = 0; i < batch_size; ++i) { // NOLINT(*) bst_uint ridx = static_cast(batch.base_rowid + i); if (pkeep == 1.0f || random::SampleBinary(pkeep)) { buffered_rowset_.push_back(ridx); @@ -169,13 +173,13 @@ class FMatrixS : public IFMatrix { } } #pragma omp parallel for schedule(static) - for (long i = 0; i < batch_size; ++i) { + for (long i = 0; i < batch_size; ++i) { // NOLINT(*) int tid = omp_get_thread_num(); bst_uint ridx = static_cast(batch.base_rowid + i); if (bmap[ridx]) { RowBatch::Inst inst = batch[i]; for (bst_uint j = 0; j < inst.length; ++j) { - if (enabled[inst[j].index]){ + if (enabled[inst[j].index]) { builder.AddBudget(inst[j].index, tid); } } @@ -183,18 +187,18 @@ class FMatrixS : public IFMatrix { } } builder.InitStorage(); - + iter_->BeforeFirst(); while (iter_->Next()) { const RowBatch &batch = iter_->Value(); #pragma omp parallel for schedule(static) - for (long i = 0; i < static_cast(batch.size); ++i) { + for (long i = 0; i < static_cast(batch.size); ++i) { // NOLINT(*) int tid = omp_get_thread_num(); bst_uint ridx = static_cast(batch.base_rowid + i); if (bmap[ridx]) { RowBatch::Inst inst = batch[i]; for (bst_uint j = 0; j < inst.length; ++j) { - if (enabled[inst[j].index]) { + if (enabled[inst[j].index]) { builder.Push(inst[j].index, Entry((bst_uint)(batch.base_rowid+i), inst[j].fvalue), tid); @@ -261,7 +265,7 @@ class FMatrixS : public IFMatrix { #pragma omp parallel { nthread = omp_get_num_threads(); - int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1); + int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1); if (nthread > max_nthread) { nthread = max_nthread; } @@ -277,7 +281,7 @@ class FMatrixS : public IFMatrix { RowBatch::Inst inst = batch[i]; for (bst_uint j = 0; j < inst.length; ++j) { const SparseBatch::Entry &e = inst[j]; - if (enabled[e.index]) { + if (enabled[e.index]) { builder.AddBudget(e.index, tid); } } @@ -330,10 +334,10 @@ class FMatrixS : public IFMatrix { static_cast(pcol->offset[ridx + 1] - pcol->offset[ridx])); } batch_.col_index = BeginPtr(col_index_); - batch_.col_data = BeginPtr(col_data_); + batch_.col_data = BeginPtr(col_data_); return true; } - virtual const ColBatch &Value(void) const { + virtual const ColBatch &Value(void) const { return batch_; } inline void Clear(void) { @@ -347,7 +351,7 @@ class FMatrixS : public IFMatrix { // column content std::vector col_data_; // column sparse pages - std::vector cpages_; + std::vector cpages_; // data pointer size_t data_ptr_; // temporal space for batch @@ -357,7 +361,7 @@ class FMatrixS : public IFMatrix { // column iterator ColBatchIter col_iter_; // shared meta info with DMatrix - const learner::MetaInfo &info_; + const learner::MetaInfo &info_; // row iterator utils::IIterator *iter_; /*! \brief list of row index that are buffered */ @@ -367,4 +371,4 @@ class FMatrixS : public IFMatrix { }; } // namespace io } // namespace xgboost -#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP +#endif // XGBOOST_IO_SLICE_FMATRIX_INL_HPP_ diff --git a/src/io/sparse_batch_page.h b/src/io/sparse_batch_page.h index d94141a6e..24546f785 100644 --- a/src/io/sparse_batch_page.h +++ b/src/io/sparse_batch_page.h @@ -1,18 +1,22 @@ -#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_ -#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_ /*! + * Copyright (c) 2014 by Contributors * \file sparse_batch_page.h * content holder of sparse batch that can be saved to disk * the representation can be effectively * use in external memory computation * \author Tianqi Chen */ +#ifndef XGBOOST_IO_SPARSE_BATCH_PAGE_H_ +#define XGBOOST_IO_SPARSE_BATCH_PAGE_H_ + +#include +#include #include "../data.h" namespace xgboost { namespace io { /*! - * \brief storage unit of sparse batch + * \brief storage unit of sparse batch */ class SparsePage { public: @@ -96,7 +100,7 @@ class SparsePage { } /*! * \brief save the data to fo, when a page was written - * to disk it must contain all the elements in the + * to disk it must contain all the elements in the * \param fo output stream */ inline void Save(utils::IStream *fo) const { @@ -124,7 +128,7 @@ class SparsePage { */ inline bool PushLoad(utils::IStream *fi) { if (!fi->Read(&disk_offset_)) return false; - data.resize(offset.back() + disk_offset_.back()); + data.resize(offset.back() + disk_offset_.back()); if (disk_offset_.back() != 0) { utils::Check(fi->Read(BeginPtr(data) + offset.back(), disk_offset_.back() * sizeof(SparseBatch::Entry)) != 0, @@ -138,7 +142,7 @@ class SparsePage { } return true; } - /*! + /*! * \brief Push row batch into the page * \param batch the row batch */ @@ -154,7 +158,7 @@ class SparsePage { offset[i + begin] = top + batch.ind_ptr[i + 1] - batch.ind_ptr[0]; } } - /*! + /*! * \brief Push a sparse page * \param batch the row page */ @@ -170,7 +174,7 @@ class SparsePage { offset[i + begin] = top + batch.offset[i + 1]; } } - /*! + /*! * \brief Push one instance into page * \param row an instance row */ @@ -202,7 +206,7 @@ class SparsePage { }; /*! * \brief factory class for SparsePage, - * used in threadbuffer template + * used in threadbuffer template */ class SparsePageFactory { public: @@ -217,7 +221,7 @@ class SparsePageFactory { return action_index_set_; } // set index set, will be used after next before first - inline void SetIndexSet(const std::vector &index_set, + inline void SetIndexSet(const std::vector &index_set, bool load_all) { set_load_all_ = load_all; if (!set_load_all_) { @@ -229,7 +233,7 @@ class SparsePageFactory { return true; } inline void SetParam(const char *name, const char *val) {} - inline bool LoadNext(SparsePage *val) { + inline bool LoadNext(SparsePage *val) { if (!action_load_all_) { if (action_index_set_.size() == 0) { return false; diff --git a/src/xgboost_main.cpp b/src/xgboost_main.cpp index 769e3be3b..773001503 100644 --- a/src/xgboost_main.cpp +++ b/src/xgboost_main.cpp @@ -1,18 +1,20 @@ +// Copyright 2014 by Contributors #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX #include #include #include +#include #include "./sync/sync.h" -#include "io/io.h" -#include "utils/utils.h" -#include "utils/config.h" -#include "learner/learner-inl.hpp" +#include "./io/io.h" +#include "./utils/utils.h" +#include "./utils/config.h" +#include "./learner/learner-inl.hpp" namespace xgboost { /*! - * \brief wrapping the training process + * \brief wrapping the training process */ class BoostLearnTask { public: @@ -20,7 +22,7 @@ class BoostLearnTask { if (argc < 2) { printf("Usage: \n"); return 0; - } + } utils::ConfigIterator itr(argv[1]); while (itr.Next()) { this->SetParam(itr.name(), itr.val()); @@ -44,10 +46,10 @@ class BoostLearnTask { } if (rabit::IsDistributed() && data_split == "NONE") { this->SetParam("dsplit", "row"); - } + } if (rabit::GetRank() != 0) { this->SetParam("silent", "2"); - } + } this->InitData(); if (task == "train") { @@ -90,12 +92,14 @@ class BoostLearnTask { if (!strcmp("save_pbuffer", name)) save_with_pbuffer = atoi(val); if (!strncmp("eval[", name, 5)) { char evname[256]; - utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display"); + utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, + "must specify evaluation name for display"); eval_data_names.push_back(std::string(evname)); eval_data_paths.push_back(std::string(val)); } learner.SetParam(name, val); } + public: BoostLearnTask(void) { // default parameters @@ -119,12 +123,13 @@ class BoostLearnTask { save_with_pbuffer = 0; data = NULL; } - ~BoostLearnTask(void){ - for (size_t i = 0; i < deval.size(); i++){ + ~BoostLearnTask(void) { + for (size_t i = 0; i < deval.size(); i++) { delete deval[i]; } if (data != NULL) delete data; } + private: inline void InitData(void) { if (strchr(train_path.c_str(), '%') != NULL) { @@ -151,14 +156,14 @@ class BoostLearnTask { loadsplit)); devalall.push_back(deval.back()); } - + std::vector dcache(1, data); - for (size_t i = 0; i < deval.size(); ++ i) { + for (size_t i = 0; i < deval.size(); ++i) { dcache.push_back(deval[i]); } // set cache data to be all training and evaluation data learner.SetCacheData(dcache); - + // add training set to evaluation set if needed if (eval_train != 0) { devalall.push_back(data); @@ -178,13 +183,13 @@ class BoostLearnTask { int version = rabit::LoadCheckPoint(&learner); if (version == 0) this->InitLearner(); const time_t start = time(NULL); - unsigned long elapsed = 0; + unsigned long elapsed = 0; // NOLINT(*) learner.CheckInit(data); bool allow_lazy = learner.AllowLazyCheckPoint(); for (int i = version / 2; i < num_round; ++i) { - elapsed = (unsigned long)(time(NULL) - start); - if (version % 2 == 0) { + elapsed = (unsigned long)(time(NULL) - start); // NOLINT(*) + if (version % 2 == 0) { if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed); learner.UpdateOneIter(i, *data); if (allow_lazy) { @@ -196,7 +201,7 @@ class BoostLearnTask { } utils::Assert(version == rabit::VersionNumber(), "consistent check"); std::string res = learner.EvalOneIter(i, devalall, eval_data_names); - if (rabit::IsDistributed()){ + if (rabit::IsDistributed()) { if (rabit::GetRank() == 0) { rabit::TrackerPrintf("%s\n", res.c_str()); } @@ -215,29 +220,29 @@ class BoostLearnTask { } version += 1; utils::Assert(version == rabit::VersionNumber(), "consistent check"); - elapsed = (unsigned long)(time(NULL) - start); + elapsed = (unsigned long)(time(NULL) - start); // NOLINT(*) } // always save final round if ((save_period == 0 || num_round % save_period != 0) && model_out != "NONE") { - if (model_out == "NULL"){ + if (model_out == "NULL") { this->SaveModel(num_round - 1); } else { this->SaveModel(model_out.c_str()); } } - if (!silent){ + if (!silent) { printf("\nupdating end, %lu sec in all\n", elapsed); } } inline void TaskEval(void) { learner.EvalOneIter(0, devalall, eval_data_names); } - inline void TaskDump(void){ + inline void TaskDump(void) { FILE *fo = utils::FopenCheck(name_dump.c_str(), "w"); std::vector dump = learner.DumpModel(fmap, dump_model_stats != 0); - for (size_t i = 0; i < dump.size(); ++ i) { - fprintf(fo,"booster[%lu]:\n", i); - fprintf(fo,"%s", dump[i].c_str()); + for (size_t i = 0; i < dump.size(); ++i) { + fprintf(fo, "booster[%lu]:\n", i); + fprintf(fo, "%s", dump[i].c_str()); } fclose(fo); } @@ -247,14 +252,15 @@ class BoostLearnTask { } inline void SaveModel(int i) const { char fname[256]; - sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1); + utils::SPrintf(fname, sizeof(fname), + "%s/%04d.model", model_dir_path.c_str(), i + 1); this->SaveModel(fname); } inline void TaskPred(void) { std::vector preds; if (!silent) printf("start prediction...\n"); learner.Predict(*data, pred_margin != 0, &preds, ntree_limit); - if (!silent) printf("writing prediction to %s\n", name_pred.c_str()); + if (!silent) printf("writing prediction to %s\n", name_pred.c_str()); FILE *fo; if (name_pred != "stdout") { fo = utils::FopenCheck(name_pred.c_str(), "w"); @@ -266,6 +272,7 @@ class BoostLearnTask { } if (fo != stdout) fclose(fo); } + private: /*! \brief whether silent */ int silent; @@ -273,7 +280,7 @@ class BoostLearnTask { int load_part; /*! \brief whether use auto binary buffer */ int use_buffer; - /*! \brief whether evaluate training statistics */ + /*! \brief whether evaluate training statistics */ int eval_train; /*! \brief number of boosting iterations */ int num_round; @@ -309,6 +316,7 @@ class BoostLearnTask { std::vector eval_data_paths; /*! \brief the names of the evaluation data used in output log */ std::vector eval_data_names; + private: io::DataMatrix* data; std::vector deval; @@ -316,9 +324,9 @@ class BoostLearnTask { utils::FeatMap fmap; learner::BoostLearner learner; }; -} +} // namespace xgboost -int main(int argc, char *argv[]){ +int main(int argc, char *argv[]) { xgboost::BoostLearnTask tsk; tsk.SetParam("seed", "0"); int ret = tsk.Run(argc, argv); From 57ec9222145621e915a412196959604850e89474 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 19:42:44 -0700 Subject: [PATCH 29/59] fix all cpp lint --- R-package/src/xgboost_R.cpp | 33 +-- R-package/src/xgboost_R.h | 32 +-- R-package/src/xgboost_assert.c | 9 +- wrapper/xgboost_wrapper.cpp | 470 +++++++++++++++++---------------- wrapper/xgboost_wrapper.h | 46 ++-- 5 files changed, 302 insertions(+), 288 deletions(-) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index de6ed339f..436faaa5a 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -1,9 +1,10 @@ +// Copyright (c) 2014 by Contributors #include #include #include #include #include -#include +#include #include "wrapper/xgboost_wrapper.h" #include "src/utils/utils.h" #include "src/utils/omp.h" @@ -34,7 +35,7 @@ bool CheckNAN(double v) { bool LogGamma(double v) { return lgammafn(v); } -} // namespace utils +} // namespace utils namespace random { void Seed(unsigned seed) { @@ -62,7 +63,7 @@ extern "C" { SEXP XGCheckNullPtr_R(SEXP handle) { return ScalarLogical(R_ExternalPtrAddr(handle) == NULL); } - void _DMatrixFinalizer(SEXP ext) { + void _DMatrixFinalizer(SEXP ext) { if (R_ExternalPtrAddr(ext) == NULL) return; XGDMatrixFree(R_ExternalPtrAddr(ext)); R_ClearExternalPtr(ext); @@ -76,7 +77,7 @@ extern "C" { UNPROTECT(1); return ret; } - SEXP XGDMatrixCreateFromMat_R(SEXP mat, + SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing) { _WrapperBegin(); SEXP dim = getAttrib(mat, R_DimSymbol); @@ -95,7 +96,7 @@ extern "C" { SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); UNPROTECT(1); - return ret; + return ret; } SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, @@ -138,7 +139,7 @@ extern "C" { SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); UNPROTECT(1); - return ret; + return ret; } void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { _WrapperBegin(); @@ -152,7 +153,7 @@ extern "C" { const char *name = CHAR(asChar(field)); if (!strcmp("group", name)) { std::vector vec(len); - #pragma omp parallel for schedule(static) + #pragma omp parallel for schedule(static) for (int i = 0; i < len; ++i) { vec[i] = static_cast(INTEGER(array)[i]); } @@ -163,7 +164,7 @@ extern "C" { for (int i = 0; i < len; ++i) { vec[i] = REAL(array)[i]; } - XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), + XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), BeginPtr(vec), len); } @@ -187,7 +188,7 @@ extern "C" { return ScalarInteger(static_cast(nrow)); } // functions related to booster - void _BoosterFinalizer(SEXP ext) { + void _BoosterFinalizer(SEXP ext) { if (R_ExternalPtrAddr(ext) == NULL) return; XGBoosterFree(R_ExternalPtrAddr(ext)); R_ClearExternalPtr(ext); @@ -196,7 +197,7 @@ extern "C" { _WrapperBegin(); int len = length(dmats); std::vector dvec; - for (int i = 0; i < len; ++i){ + for (int i = 0; i < len; ++i) { dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); } void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size()); @@ -252,7 +253,7 @@ extern "C" { const char *ret = XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), asInteger(iter), - BeginPtr(vec_dmats), BeginPtr(vec_sptr), len); + BeginPtr(vec_dmats), BeginPtr(vec_sptr), len); _WrapperEnd(); return mkString(ret); } @@ -282,7 +283,7 @@ extern "C" { XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); _WrapperEnd(); } - void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { + void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { _WrapperBegin(); XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle), RAW(raw), @@ -298,7 +299,7 @@ extern "C" { if (olen != 0) { memcpy(RAW(ret), raw, olen); } - UNPROTECT(1); + UNPROTECT(1); return ret; } SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) { @@ -310,10 +311,10 @@ extern "C" { asInteger(with_stats), &olen); _WrapperEnd(); - SEXP out = PROTECT(allocVector(STRSXP, olen)); - for (size_t i = 0; i < olen; ++i) { + SEXP out = PROTECT(allocVector(STRSXP, olen)); + for (size_t i = 0; i < olen; ++i) { stringstream stream; - stream << "booster["< #include @@ -19,7 +21,7 @@ extern "C" { */ SEXP XGCheckNullPtr_R(SEXP handle); /*! - * \brief load a data matrix + * \brief load a data matrix * \param fname name of the content * \param silent whether print messages * \return a loaded data matrix @@ -32,9 +34,9 @@ extern "C" { * \param missing which value to represent missing value * \return created dmatrix */ - SEXP XGDMatrixCreateFromMat_R(SEXP mat, + SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing); - /*! + /*! * \brief create a matrix content from CSC format * \param indptr pointer to column headers * \param indices row indices @@ -70,26 +72,26 @@ extern "C" { * \param handle a instance of data matrix * \param field field name * \return info vector - */ + */ SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field); /*! * \brief return number of rows * \param handle a instance of data matrix */ SEXP XGDMatrixNumRow_R(SEXP handle); - /*! - * \brief create xgboost learner + /*! + * \brief create xgboost learner * \param dmats a list of dmatrix handles that will be cached - */ + */ SEXP XGBoosterCreate_R(SEXP dmats); - /*! - * \brief set parameters + /*! + * \brief set parameters * \param handle handle * \param name parameter name * \param val value of parameter */ void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val); - /*! + /*! * \brief update the model in one round using dtrain * \param handle handle * \param iter current iteration rounds @@ -132,12 +134,12 @@ extern "C" { * \brief save model into existing file * \param handle handle * \param fname file name - */ + */ void XGBoosterSaveModel_R(SEXP handle, SEXP fname); /*! * \brief load model from raw array * \param handle handle - */ + */ void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw); /*! * \brief save model into R's raw array @@ -153,4 +155,4 @@ extern "C" { */ SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats); } -#endif // XGBOOST_WRAPPER_R_H_ +#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*) diff --git a/R-package/src/xgboost_assert.c b/R-package/src/xgboost_assert.c index 20b789492..072074243 100644 --- a/R-package/src/xgboost_assert.c +++ b/R-package/src/xgboost_assert.c @@ -1,3 +1,4 @@ +// Copyright (c) 2014 by Contributors #include #include #include @@ -6,17 +7,17 @@ void XGBoostAssert_R(int exp, const char *fmt, ...) { char buf[1024]; if (exp == 0) { - va_list args; + va_list args; va_start(args, fmt); vsprintf(buf, fmt, args); va_end(args); error("AssertError:%s\n", buf); - } + } } void XGBoostCheck_R(int exp, const char *fmt, ...) { char buf[1024]; if (exp == 0) { - va_list args; + va_list args; va_start(args, fmt); vsprintf(buf, fmt, args); va_end(args); @@ -25,7 +26,7 @@ void XGBoostCheck_R(int exp, const char *fmt, ...) { } int XGBoostSPrintf_R(char *buf, size_t size, const char *fmt, ...) { int ret; - va_list args; + va_list args; va_start(args, fmt); ret = vsnprintf(buf, size, fmt, args); va_end(args); diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 4d7828faf..8572316f0 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -1,3 +1,4 @@ +// Copyright (c) 2014 by Contributors // implementations in ctypes #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE @@ -31,9 +32,11 @@ class Booster: public learner::BoostLearner { this->init_model = false; this->SetCacheData(mats); } - inline const float *Pred(const DataMatrix &dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) { + inline const float *Pred(const DataMatrix &dmat, int option_mask, + unsigned ntree_limit, bst_ulong *len) { this->CheckInitModel(); - this->Predict(dmat, (option_mask&1) != 0, &this->preds_, ntree_limit, (option_mask&2) != 0); + this->Predict(dmat, (option_mask&1) != 0, &this->preds_, + ntree_limit, (option_mask&2) != 0); *len = static_cast(this->preds_.size()); return BeginPtr(this->preds_); } @@ -57,9 +60,9 @@ class Booster: public learner::BoostLearner { this->init_model = true; } inline void LoadModelFromBuffer(const void *buf, size_t size) { - utils::MemoryFixSizeBuffer fs((void*)buf, size); + utils::MemoryFixSizeBuffer fs((void*)buf, size); // NOLINT(*) learner::BoostLearner::LoadModel(fs, true); - this->init_model = true; + this->init_model = true; } inline const char *GetModelRaw(bst_ulong *out_len) { this->CheckInitModel(); @@ -99,246 +102,247 @@ class Booster: public learner::BoostLearner { using namespace xgboost::wrapper; -extern "C"{ - void* XGDMatrixCreateFromFile(const char *fname, int silent) { - return LoadDataMatrix(fname, silent != 0, false, false); +void* XGDMatrixCreateFromFile(const char *fname, int silent) { + return LoadDataMatrix(fname, silent != 0, false, false); +} +void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem) { + DMatrixSimple *p_mat = new DMatrixSimple(); + DMatrixSimple &mat = *p_mat; + mat.row_ptr_.resize(nindptr); + for (bst_ulong i = 0; i < nindptr; ++i) { + mat.row_ptr_[i] = static_cast(indptr[i]); } - void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem) { - DMatrixSimple *p_mat = new DMatrixSimple(); - DMatrixSimple &mat = *p_mat; - mat.row_ptr_.resize(nindptr); - for (bst_ulong i = 0; i < nindptr; ++i) { - mat.row_ptr_[i] = static_cast(indptr[i]); - } - mat.row_data_.resize(nelem); - for (bst_ulong i = 0; i < nelem; ++i) { - mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]); - mat.info.info.num_col = std::max(mat.info.info.num_col, - static_cast(indices[i]+1)); - } - mat.info.info.num_row = nindptr - 1; - return p_mat; + mat.row_data_.resize(nelem); + for (bst_ulong i = 0; i < nelem; ++i) { + mat.row_data_[i] = RowBatch::Entry(indices[i], data[i]); + mat.info.info.num_col = std::max(mat.info.info.num_col, + static_cast(indices[i]+1)); } - XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem) { - int nthread; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - } - - DMatrixSimple *p_mat = new DMatrixSimple(); - DMatrixSimple &mat = *p_mat; - utils::ParallelGroupBuilder builder(&mat.row_ptr_, &mat.row_data_); - builder.InitBudget(0, nthread); - long ncol = static_cast(nindptr - 1); - #pragma omp parallel for schedule(static) - for (long i = 0; i < ncol; ++i) { - int tid = omp_get_thread_num(); - for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) { - builder.AddBudget(indices[j], tid); - } - } - builder.InitStorage(); - #pragma omp parallel for schedule(static) - for (long i = 0; i < ncol; ++i) { - int tid = omp_get_thread_num(); - for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) { - builder.Push(indices[j], - RowBatch::Entry(static_cast(i), data[j]), - tid); - } - } - mat.info.info.num_row = mat.row_ptr_.size() - 1; - mat.info.info.num_col = static_cast(ncol); - return p_mat; + mat.info.info.num_row = nindptr - 1; + return p_mat; +} +void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem) { + int nthread; + #pragma omp parallel + { + nthread = omp_get_num_threads(); } - void* XGDMatrixCreateFromMat(const float *data, - bst_ulong nrow, - bst_ulong ncol, - float missing) { - bool nan_missing = utils::CheckNAN(missing); - DMatrixSimple *p_mat = new DMatrixSimple(); - DMatrixSimple &mat = *p_mat; - mat.info.info.num_row = nrow; - mat.info.info.num_col = ncol; - for (bst_ulong i = 0; i < nrow; ++i, data += ncol) { - bst_ulong nelem = 0; - for (bst_ulong j = 0; j < ncol; ++j) { - if (utils::CheckNAN(data[j])) { - utils::Check(nan_missing, - "There are NAN in the matrix, however, you did not set missing=NAN"); - } else { - if (nan_missing || data[j] != missing) { - mat.row_data_.push_back(RowBatch::Entry(j, data[j])); - ++nelem; - } + + DMatrixSimple *p_mat = new DMatrixSimple(); + DMatrixSimple &mat = *p_mat; + utils::ParallelGroupBuilder builder(&mat.row_ptr_, &mat.row_data_); + builder.InitBudget(0, nthread); + long ncol = static_cast(nindptr - 1); // NOLINT(*) + #pragma omp parallel for schedule(static) + for (long i = 0; i < ncol; ++i) { // NOLINT(*) + int tid = omp_get_thread_num(); + for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) { + builder.AddBudget(indices[j], tid); + } + } + builder.InitStorage(); + #pragma omp parallel for schedule(static) + for (long i = 0; i < ncol; ++i) { // NOLINT(*) + int tid = omp_get_thread_num(); + for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) { + builder.Push(indices[j], + RowBatch::Entry(static_cast(i), data[j]), + tid); + } + } + mat.info.info.num_row = mat.row_ptr_.size() - 1; + mat.info.info.num_col = static_cast(ncol); + return p_mat; +} +void* XGDMatrixCreateFromMat(const float *data, + bst_ulong nrow, + bst_ulong ncol, + float missing) { + bool nan_missing = utils::CheckNAN(missing); + DMatrixSimple *p_mat = new DMatrixSimple(); + DMatrixSimple &mat = *p_mat; + mat.info.info.num_row = nrow; + mat.info.info.num_col = ncol; + for (bst_ulong i = 0; i < nrow; ++i, data += ncol) { + bst_ulong nelem = 0; + for (bst_ulong j = 0; j < ncol; ++j) { + if (utils::CheckNAN(data[j])) { + utils::Check(nan_missing, + "There are NAN in the matrix, however, you did not set missing=NAN"); + } else { + if (nan_missing || data[j] != missing) { + mat.row_data_.push_back(RowBatch::Entry(j, data[j])); + ++nelem; } } - mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem); } - return p_mat; + mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem); } - void* XGDMatrixSliceDMatrix(void *handle, - const int *idxset, - bst_ulong len) { - DMatrixSimple tmp; - DataMatrix &dsrc = *static_cast(handle); - if (dsrc.magic != DMatrixSimple::kMagic) { - tmp.CopyFrom(dsrc); - } - DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ? - *static_cast(handle): tmp); - DMatrixSimple *p_ret = new DMatrixSimple(); - DMatrixSimple &ret = *p_ret; + return p_mat; +} +void* XGDMatrixSliceDMatrix(void *handle, + const int *idxset, + bst_ulong len) { + DMatrixSimple tmp; + DataMatrix &dsrc = *static_cast(handle); + if (dsrc.magic != DMatrixSimple::kMagic) { + tmp.CopyFrom(dsrc); + } + DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ? + *static_cast(handle): tmp); + DMatrixSimple *p_ret = new DMatrixSimple(); + DMatrixSimple &ret = *p_ret; - utils::Check(src.info.group_ptr.size() == 0, - "slice does not support group structure"); - ret.Clear(); - ret.info.info.num_row = len; - ret.info.info.num_col = src.info.num_col(); + utils::Check(src.info.group_ptr.size() == 0, + "slice does not support group structure"); + ret.Clear(); + ret.info.info.num_row = len; + ret.info.info.num_col = src.info.num_col(); - utils::IIterator *iter = src.fmat()->RowIterator(); - iter->BeforeFirst(); - utils::Assert(iter->Next(), "slice"); - const RowBatch &batch = iter->Value(); - for (bst_ulong i = 0; i < len; ++i) { - const int ridx = idxset[i]; - RowBatch::Inst inst = batch[ridx]; - utils::Check(static_cast(ridx) < batch.size, "slice index exceed number of rows"); - ret.row_data_.resize(ret.row_data_.size() + inst.length); - memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data, - sizeof(RowBatch::Entry) * inst.length); - ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length); - if (src.info.labels.size() != 0) { - ret.info.labels.push_back(src.info.labels[ridx]); - } - if (src.info.weights.size() != 0) { - ret.info.weights.push_back(src.info.weights[ridx]); - } - if (src.info.info.root_index.size() != 0) { - ret.info.info.root_index.push_back(src.info.info.root_index[ridx]); - } - if (src.info.info.fold_index.size() != 0) { - ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]); - } + utils::IIterator *iter = src.fmat()->RowIterator(); + iter->BeforeFirst(); + utils::Assert(iter->Next(), "slice"); + const RowBatch &batch = iter->Value(); + for (bst_ulong i = 0; i < len; ++i) { + const int ridx = idxset[i]; + RowBatch::Inst inst = batch[ridx]; + utils::Check(static_cast(ridx) < batch.size, "slice index exceed number of rows"); + ret.row_data_.resize(ret.row_data_.size() + inst.length); + memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data, + sizeof(RowBatch::Entry) * inst.length); + ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length); + if (src.info.labels.size() != 0) { + ret.info.labels.push_back(src.info.labels[ridx]); } - return p_ret; - } - void XGDMatrixFree(void *handle) { - delete static_cast(handle); - } - void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) { - SaveDataMatrix(*static_cast(handle), fname, silent != 0); - } - void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) { - std::vector &vec = - static_cast(handle)->info.GetFloatInfo(field); - vec.resize(len); - memcpy(BeginPtr(vec), info, sizeof(float) * len); - } - void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) { - std::vector &vec = - static_cast(handle)->info.GetUIntInfo(field); - vec.resize(len); - memcpy(BeginPtr(vec), info, sizeof(unsigned) * len); - } - void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) { - DataMatrix *pmat = static_cast(handle); - pmat->info.group_ptr.resize(len + 1); - pmat->info.group_ptr[0] = 0; - for (uint64_t i = 0; i < len; ++i) { - pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i]; + if (src.info.weights.size() != 0) { + ret.info.weights.push_back(src.info.weights[ridx]); + } + if (src.info.info.root_index.size() != 0) { + ret.info.info.root_index.push_back(src.info.info.root_index[ridx]); + } + if (src.info.info.fold_index.size() != 0) { + ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]); } } - const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) { - const std::vector &vec = - static_cast(handle)->info.GetFloatInfo(field); - *len = static_cast(vec.size()); - return BeginPtr(vec); - } - const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) { - const std::vector &vec = - static_cast(handle)->info.GetUIntInfo(field); - *len = static_cast(vec.size()); - return BeginPtr(vec); - } - bst_ulong XGDMatrixNumRow(const void *handle) { - return static_cast(static_cast(handle)->info.num_row()); - } - - // xgboost implementation - void *XGBoosterCreate(void *dmats[], bst_ulong len) { - std::vector mats; - for (bst_ulong i = 0; i < len; ++i) { - DataMatrix *dtr = static_cast(dmats[i]); - mats.push_back(dtr); - } - return new Booster(mats); - } - void XGBoosterFree(void *handle) { - delete static_cast(handle); - } - void XGBoosterSetParam(void *handle, const char *name, const char *value) { - static_cast(handle)->SetParam(name, value); - } - void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) { - Booster *bst = static_cast(handle); - DataMatrix *dtr = static_cast(dtrain); - bst->CheckInitModel(); - bst->CheckInit(dtr); - bst->UpdateOneIter(iter, *dtr); - } - void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, bst_ulong len) { - Booster *bst = static_cast(handle); - DataMatrix *dtr = static_cast(dtrain); - bst->CheckInitModel(); - bst->CheckInit(dtr); - bst->BoostOneIter(*dtr, grad, hess, len); - } - const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], bst_ulong len) { - Booster *bst = static_cast(handle); - std::vector names; - std::vector mats; - for (bst_ulong i = 0; i < len; ++i) { - mats.push_back(static_cast(dmats[i])); - names.push_back(std::string(evnames[i])); - } - bst->CheckInitModel(); - bst->eval_str = bst->EvalOneIter(iter, mats, names); - return bst->eval_str.c_str(); - } - const float *XGBoosterPredict(void *handle, void *dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) { - return static_cast(handle)->Pred(*static_cast(dmat), option_mask, ntree_limit, len); - } - void XGBoosterLoadModel(void *handle, const char *fname) { - static_cast(handle)->LoadModel(fname); - } - void XGBoosterSaveModel(void *handle, const char *fname) { - Booster *bst = static_cast(handle); - bst->CheckInitModel(); - bst->SaveModel(fname, false); - } - void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len) { - static_cast(handle)->LoadModelFromBuffer(buf, len); - } - const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len) { - return static_cast(handle)->GetModelRaw(out_len); - } - const char** XGBoosterDumpModel(void *handle, const char *fmap, int with_stats, bst_ulong *len){ - utils::FeatMap featmap; - if (strlen(fmap) != 0) { - featmap.LoadText(fmap); - } - return static_cast(handle)->GetModelDump(featmap, with_stats != 0, len); + return p_ret; +} +void XGDMatrixFree(void *handle) { + delete static_cast(handle); +} +void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) { + SaveDataMatrix(*static_cast(handle), fname, silent != 0); +} +void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) { + std::vector &vec = + static_cast(handle)->info.GetFloatInfo(field); + vec.resize(len); + memcpy(BeginPtr(vec), info, sizeof(float) * len); +} +void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) { + std::vector &vec = + static_cast(handle)->info.GetUIntInfo(field); + vec.resize(len); + memcpy(BeginPtr(vec), info, sizeof(unsigned) * len); +} +void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) { + DataMatrix *pmat = static_cast(handle); + pmat->info.group_ptr.resize(len + 1); + pmat->info.group_ptr[0] = 0; + for (uint64_t i = 0; i < len; ++i) { + pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i]; } } +const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) { + const std::vector &vec = + static_cast(handle)->info.GetFloatInfo(field); + *len = static_cast(vec.size()); + return BeginPtr(vec); +} +const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) { + const std::vector &vec = + static_cast(handle)->info.GetUIntInfo(field); + *len = static_cast(vec.size()); + return BeginPtr(vec); +} +bst_ulong XGDMatrixNumRow(const void *handle) { + return static_cast(static_cast(handle)->info.num_row()); +} + +// xgboost implementation +void *XGBoosterCreate(void *dmats[], bst_ulong len) { + std::vector mats; + for (bst_ulong i = 0; i < len; ++i) { + DataMatrix *dtr = static_cast(dmats[i]); + mats.push_back(dtr); + } + return new Booster(mats); +} +void XGBoosterFree(void *handle) { + delete static_cast(handle); +} +void XGBoosterSetParam(void *handle, const char *name, const char *value) { + static_cast(handle)->SetParam(name, value); +} +void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) { + Booster *bst = static_cast(handle); + DataMatrix *dtr = static_cast(dtrain); + bst->CheckInitModel(); + bst->CheckInit(dtr); + bst->UpdateOneIter(iter, *dtr); +} +void XGBoosterBoostOneIter(void *handle, void *dtrain, + float *grad, float *hess, bst_ulong len) { + Booster *bst = static_cast(handle); + DataMatrix *dtr = static_cast(dtrain); + bst->CheckInitModel(); + bst->CheckInit(dtr); + bst->BoostOneIter(*dtr, grad, hess, len); +} +const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], + const char *evnames[], bst_ulong len) { + Booster *bst = static_cast(handle); + std::vector names; + std::vector mats; + for (bst_ulong i = 0; i < len; ++i) { + mats.push_back(static_cast(dmats[i])); + names.push_back(std::string(evnames[i])); + } + bst->CheckInitModel(); + bst->eval_str = bst->EvalOneIter(iter, mats, names); + return bst->eval_str.c_str(); +} +const float *XGBoosterPredict(void *handle, void *dmat, int option_mask, + unsigned ntree_limit, bst_ulong *len) { + return static_cast(handle)->Pred(*static_cast(dmat), + option_mask, ntree_limit, len); +} +void XGBoosterLoadModel(void *handle, const char *fname) { + static_cast(handle)->LoadModel(fname); +} +void XGBoosterSaveModel(void *handle, const char *fname) { + Booster *bst = static_cast(handle); + bst->CheckInitModel(); + bst->SaveModel(fname, false); +} +void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len) { + static_cast(handle)->LoadModelFromBuffer(buf, len); +} +const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len) { + return static_cast(handle)->GetModelRaw(out_len); +} +const char** XGBoosterDumpModel(void *handle, const char *fmap, + int with_stats, bst_ulong *len) { + utils::FeatMap featmap; + if (strlen(fmap) != 0) { + featmap.LoadText(fmap); + } + return static_cast(handle)->GetModelDump(featmap, with_stats != 0, len); +} diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h index 88a327d0d..3540a3be0 100644 --- a/wrapper/xgboost_wrapper.h +++ b/wrapper/xgboost_wrapper.h @@ -1,24 +1,26 @@ -#ifndef XGBOOST_WRAPPER_H_ -#define XGBOOST_WRAPPER_H_ /*! + * Copyright (c) 2014 by Contributors * \file xgboost_wrapper.h * \author Tianqi Chen * \brief a C style wrapper of xgboost * can be used to create wrapper of other languages */ +#ifndef XGBOOST_WRAPPER_H_ +#define XGBOOST_WRAPPER_H_ + #if defined(_MSC_VER) || defined(_WIN32) #define XGB_DLL __declspec(dllexport) #else #define XGB_DLL #endif // manually define unsign long -typedef unsigned long bst_ulong; +typedef unsigned long bst_ulong; // NOLINT(*) #ifdef __cplusplus extern "C" { #endif /*! - * \brief load a data matrix + * \brief load a data matrix * \param fname the name of the file * \param silent whether print messages during loading * \return a loaded data matrix @@ -29,7 +31,7 @@ extern "C" { * \param indptr pointer to row headers * \param indices findex * \param data fvalue - * \param nindptr number of rows in the matix + 1 + * \param nindptr number of rows in the matix + 1 * \param nelem number of nonzero elements in the matrix * \return created dmatrix */ @@ -51,7 +53,7 @@ extern "C" { const unsigned *indices, const float *data, bst_ulong nindptr, - bst_ulong nelem); + bst_ulong nelem); /*! * \brief create matrix content from dense matrix * \param data pointer to the data space @@ -92,7 +94,8 @@ extern "C" { * \param array pointer to float vector * \param len length of array */ - XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *array, bst_ulong len); + XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, + const float *array, bst_ulong len); /*! * \brief set uint32 vector to a content in info * \param handle a instance of data matrix @@ -100,7 +103,8 @@ extern "C" { * \param array pointer to float vector * \param len length of array */ - XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *array, bst_ulong len); + XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, + const unsigned *array, bst_ulong len); /*! * \brief set label of the training matrix * \param handle a instance of data matrix @@ -115,7 +119,8 @@ extern "C" { * \param out_len used to set result length * \return pointer to the result */ - XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* out_len); + XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, + const char *field, bst_ulong* out_len); /*! * \brief get uint32 info vector from matrix * \param handle a instance of data matrix @@ -123,31 +128,32 @@ extern "C" { * \param out_len used to set result length * \return pointer to the result */ - XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* out_len); + XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, + const char *field, bst_ulong* out_len); /*! * \brief return number of rows */ XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle); // --- start XGBoost class - /*! - * \brief create xgboost learner + /*! + * \brief create xgboost learner * \param dmats matrices that are set to be cached * \param len length of dmats */ XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len); - /*! - * \brief free obj in handle + /*! + * \brief free obj in handle * \param handle handle to be freed */ XGB_DLL void XGBoosterFree(void* handle); - /*! - * \brief set parameters + /*! + * \brief set parameters * \param handle handle * \param name parameter name * \param val value of parameter - */ + */ XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value); - /*! + /*! * \brief update the model in one round using dtrain * \param handle handle * \param iter current iteration rounds @@ -188,8 +194,8 @@ extern "C" { * when the parameter is set to 0, we will use all the trees * \param len used to store length of returning result */ - XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, - int option_mask, + XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, + int option_mask, unsigned ntree_limit, bst_ulong *len); /*! From 59b91cf205f294ead9242b1895cae4ebb4b466c7 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 20:36:41 -0700 Subject: [PATCH 30/59] make python lint --- wrapper/setup.py | 5 +- wrapper/xgboost.py | 427 +++++++++++++++++++++++++++------------------ 2 files changed, 263 insertions(+), 169 deletions(-) diff --git a/wrapper/setup.py b/wrapper/setup.py index 52bf1cf82..5365d61b0 100644 --- a/wrapper/setup.py +++ b/wrapper/setup.py @@ -1,9 +1,12 @@ +# pylint: disable=invalid-name +"""Setup xgboost package.""" import os import platform from setuptools import setup class XGBoostLibraryNotFound(Exception): + """Exception to raise when xgboost library cannot be found.""" pass @@ -15,7 +18,7 @@ if os.name == 'nt': dll_path.append(os.path.join(curr_dir, '../windows/x64/Release/')) else: dll_path.append(os.path.join(curr_dir, '../windows/Release/')) - + if os.name == 'nt': dll_path = [os.path.join(p, 'xgboost_wrapper.dll') for p in dll_path] diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index 0280d87b3..c21545b4c 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -6,7 +6,7 @@ Version: 0.40 Authors: Tianqi Chen, Bing Xu Early stopping by Zygmunt Zając """ - +# pylint: disable=too-many-arguments, too-many-locals, too-many-lines from __future__ import absolute_import import os @@ -28,20 +28,25 @@ except ImportError: SKLEARN_INSTALLED = False class XGBoostLibraryNotFound(Exception): + """Error throwed by when xgboost is not found""" pass class XGBoostError(Exception): + """Error throwed by xgboost trainer.""" pass __all__ = ['DMatrix', 'CVPack', 'Booster', 'aggcv', 'cv', 'mknfold', 'train'] if sys.version_info[0] == 3: - string_types = str, + # pylint: disable=invalid-name + STRING_TYPES = str, else: - string_types = basestring, + # pylint: disable=invalid-name + STRING_TYPES = basestring, def load_xglib(): + """Load the xgboost library.""" curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) dll_path = [curr_path] if os.name == 'nt': @@ -55,7 +60,8 @@ def load_xglib(): dll_path = [os.path.join(p, 'libxgboostwrapper.so') for p in dll_path] lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] if len(dll_path) == 0: - raise XGBoostLibraryNotFound('cannot find find the files in the candicate path ' + str(dll_path)) + raise XGBoostLibraryNotFound( + 'cannot find find the files in the candicate path ' + str(dll_path)) lib = ctypes.cdll.LoadLibrary(lib_path[0]) # DMatrix functions @@ -79,12 +85,11 @@ def load_xglib(): return lib # load the XGBoost library globally -xglib = load_xglib() +_LIB = load_xglib() def ctypes2numpy(cptr, length, dtype): - """ - Convert a ctypes pointer array to a numpy array. + """Convert a ctypes pointer array to a numpy array. """ if not isinstance(cptr, ctypes.POINTER(ctypes.c_float)): raise RuntimeError('expected float pointer') @@ -95,6 +100,7 @@ def ctypes2numpy(cptr, length, dtype): def ctypes2buffer(cptr, length): + """Convert ctypes pointer to buffer type.""" if not isinstance(cptr, ctypes.POINTER(ctypes.c_char)): raise RuntimeError('expected char pointer') res = bytearray(length) @@ -105,14 +111,17 @@ def ctypes2buffer(cptr, length): def c_str(string): + """Convert a python string to cstring.""" return ctypes.c_char_p(string.encode('utf-8')) def c_array(ctype, values): + """Convert a python string to c array.""" return (ctype * len(values))(*values) class DMatrix(object): + """Data Matrix used in XGBoost.""" def __init__(self, data, label=None, missing=0.0, weight=None, silent=False): """ Data matrix used in XGBoost. @@ -135,8 +144,8 @@ class DMatrix(object): if data is None: self.handle = None return - if isinstance(data, string_types): - self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromFile(c_str(data), int(silent))) + if isinstance(data, STRING_TYPES): + self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromFile(c_str(data), int(silent))) elif isinstance(data, scipy.sparse.csr_matrix): self._init_from_csr(data) elif isinstance(data, scipy.sparse.csc_matrix): @@ -160,7 +169,7 @@ class DMatrix(object): """ if len(csr.indices) != len(csr.data): raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data))) - self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSR( + self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSR( c_array(ctypes.c_ulong, csr.indptr), c_array(ctypes.c_uint, csr.indices), c_array(ctypes.c_float, csr.data), @@ -172,7 +181,7 @@ class DMatrix(object): """ if len(csc.indices) != len(csc.data): raise ValueError('length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data))) - self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSC( + self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSC( c_array(ctypes.c_ulong, csc.indptr), c_array(ctypes.c_uint, csc.indices), c_array(ctypes.c_float, csc.data), @@ -183,34 +192,77 @@ class DMatrix(object): Initialize data from a 2-D numpy matrix. """ data = np.array(mat.reshape(mat.size), dtype=np.float32) - self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromMat( + self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromMat( data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), mat.shape[0], mat.shape[1], ctypes.c_float(missing))) def __del__(self): - xglib.XGDMatrixFree(self.handle) + _LIB.XGDMatrixFree(self.handle) def get_float_info(self, field): + """Get float property from the DMatrix. + + Parameters + ---------- + field: str + The field name of the information + + Returns + ------- + info : array + a numpy array of float information of the data + """ length = ctypes.c_ulong() - ret = xglib.XGDMatrixGetFloatInfo(self.handle, c_str(field), ctypes.byref(length)) + ret = _LIB.XGDMatrixGetFloatInfo(self.handle, c_str(field), ctypes.byref(length)) return ctypes2numpy(ret, length.value, np.float32) def get_uint_info(self, field): + """Get unsigned integer property from the DMatrix. + + Parameters + ---------- + field: str + The field name of the information + + Returns + ------- + info : array + a numpy array of float information of the data + """ length = ctypes.c_ulong() - ret = xglib.XGDMatrixGetUIntInfo(self.handle, c_str(field), ctypes.byref(length)) + ret = _LIB.XGDMatrixGetUIntInfo(self.handle, c_str(field), ctypes.byref(length)) return ctypes2numpy(ret, length.value, np.uint32) def set_float_info(self, field, data): - xglib.XGDMatrixSetFloatInfo(self.handle, c_str(field), - c_array(ctypes.c_float, data), len(data)) + """Set float type property into the DMatrix. + + Parameters + ---------- + field: str + The field name of the information + + data: numpy array + The array ofdata to be set + """ + _LIB.XGDMatrixSetFloatInfo(self.handle, c_str(field), + c_array(ctypes.c_float, data), len(data)) def set_uint_info(self, field, data): - xglib.XGDMatrixSetUIntInfo(self.handle, c_str(field), - c_array(ctypes.c_uint, data), len(data)) + """Set uint type property into the DMatrix. + + Parameters + ---------- + field: str + The field name of the information + + data: numpy array + The array ofdata to be set + """ + _LIB.XGDMatrixSetUIntInfo(self.handle, c_str(field), + c_array(ctypes.c_uint, data), len(data)) def save_binary(self, fname, silent=True): - """ - Save DMatrix to an XGBoost buffer. + """Save DMatrix to an XGBoost buffer. Parameters ---------- @@ -219,74 +271,74 @@ class DMatrix(object): silent : bool (optional; default: True) If set, the output is suppressed. """ - xglib.XGDMatrixSaveBinary(self.handle, c_str(fname), int(silent)) + _LIB.XGDMatrixSaveBinary(self.handle, c_str(fname), int(silent)) def set_label(self, label): - """set label of dmatrix - Args: - label: list - label for DMatrix - Returns: - None + """Set label of dmatrix + + Parameters + ---------- + label: array like + The label information to be set into DMatrix """ self.set_float_info('label', label) def set_weight(self, weight): - """ - Set weight of each instance. + """ Set weight of each instance. Parameters ---------- - weight : float - Weight for positive instance. + weight : array like + Weight for each data point """ self.set_float_info('weight', weight) def set_base_margin(self, margin): - """ - set base margin of booster to start from - this can be used to specify a prediction value of + """ Set base margin of booster to start from. + + This can be used to specify a prediction value of existing model to be base_margin However, remember margin is needed, instead of transformed prediction e.g. for logistic regression: need to put in value before logistic transformation see also example/demo.py + + Parameters + ---------- + margin: array like + Prediction margin of each datapoint """ self.set_float_info('base_margin', margin) def set_group(self, group): - """ - Set group size of DMatrix (used for ranking). + """Set group size of DMatrix (used for ranking). Parameters ---------- - group : int - Group size. + group : array like + Group size of each group """ - xglib.XGDMatrixSetGroup(self.handle, c_array(ctypes.c_uint, group), len(group)) + _LIB.XGDMatrixSetGroup(self.handle, c_array(ctypes.c_uint, group), len(group)) def get_label(self): - """ - Get the label of the DMatrix. + """Get the label of the DMatrix. Returns ------- - label : list + label : array """ return self.get_float_info('label') def get_weight(self): - """ - Get the weight of the DMatrix. + """Get the weight of the DMatrix. Returns ------- - weight : float + weight : array """ return self.get_float_info('weight') def get_base_margin(self): - """ - Get the base margin of the DMatrix. + """Get the base margin of the DMatrix. Returns ------- @@ -295,18 +347,16 @@ class DMatrix(object): return self.get_float_info('base_margin') def num_row(self): - """ - Get the number of rows in the DMatrix. + """Get the number of rows in the DMatrix. Returns ------- number of rows : int """ - return xglib.XGDMatrixNumRow(self.handle) + return _LIB.XGDMatrixNumRow(self.handle) def slice(self, rindex): - """ - Slice the DMatrix and return a new DMatrix that only contains `rindex`. + """Slice the DMatrix and return a new DMatrix that only contains `rindex`. Parameters ---------- @@ -319,13 +369,15 @@ class DMatrix(object): A new DMatrix containing only selected indices. """ res = DMatrix(None) - res.handle = ctypes.c_void_p(xglib.XGDMatrixSliceDMatrix( + res.handle = ctypes.c_void_p(_LIB.XGDMatrixSliceDMatrix( self.handle, c_array(ctypes.c_int, rindex), len(rindex))) return res class Booster(object): + """"A Booster of of XGBoost.""" def __init__(self, params=None, cache=(), model_file=None): + # pylint: disable=invalid-name """ Learner class. @@ -342,14 +394,14 @@ class Booster(object): if not isinstance(d, DMatrix): raise TypeError('invalid cache item: {}'.format(type(d).__name__)) dmats = c_array(ctypes.c_void_p, [d.handle for d in cache]) - self.handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, len(cache))) + self.handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, len(cache))) self.set_param({'seed': 0}) self.set_param(params or {}) if model_file is not None: self.load_model(model_file) def __del__(self): - xglib.XGBoosterFree(self.handle) + _LIB.XGBoosterFree(self.handle) def __getstate__(self): # can't pickle ctypes pointers @@ -367,10 +419,10 @@ class Booster(object): if handle is not None: buf = handle dmats = c_array(ctypes.c_void_p, []) - handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, 0)) + handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, 0)) length = ctypes.c_ulong(len(buf)) ptr = (ctypes.c_char * len(buf)).from_buffer(buf) - xglib.XGBoosterLoadModelFromBuffer(handle, ptr, length) + _LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length) state['handle'] = handle self.__dict__.update(state) self.set_param({'seed': 0}) @@ -379,11 +431,10 @@ class Booster(object): return self.__deepcopy__() def __deepcopy__(self): - return Booster(model_file = self.save_raw()) + return Booster(model_file=self.save_raw()) def copy(self): - """ - Copy the booster object + """Copy the booster object. Returns -------- @@ -391,15 +442,16 @@ class Booster(object): """ return self.__copy__() - def set_param(self, params, pv=None): + def set_param(self, params, value=None): + """Set parameters into the DMatrix.""" if isinstance(params, collections.Mapping): params = params.items() - elif isinstance(params, string_types) and pv is not None: - params = [(params, pv)] - for k, v in params: - xglib.XGBoosterSetParam(self.handle, c_str(k), c_str(str(v))) + elif isinstance(params, STRING_TYPES) and value is not None: + params = [(params, value)] + for key, val in params: + _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val))) - def update(self, dtrain, it, fobj=None): + def update(self, dtrain, iteration, fobj=None): """ Update (one iteration). @@ -407,7 +459,7 @@ class Booster(object): ---------- dtrain : DMatrix Training data. - it : int + iteration : int Current iteration number. fobj : function Customized objective function. @@ -415,7 +467,7 @@ class Booster(object): if not isinstance(dtrain, DMatrix): raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) if fobj is None: - xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle) + _LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle) else: pred = self.predict(dtrain) grad, hess = fobj(pred, dtrain) @@ -438,20 +490,20 @@ class Booster(object): raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess))) if not isinstance(dtrain, DMatrix): raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) - xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle, - c_array(ctypes.c_float, grad), - c_array(ctypes.c_float, hess), - len(grad)) + _LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle, + c_array(ctypes.c_float, grad), + c_array(ctypes.c_float, hess), + len(grad)) - def eval_set(self, evals, it=0, feval=None): - """ - Evaluate by a metric. + def eval_set(self, evals, iteration=0, feval=None): + # pylint: disable=invalid-name + """Evaluate a set of data. Parameters ---------- evals : list of tuples (DMatrix, string) List of items to be evaluated. - it : int + iteration : int Current iteration. feval : function Custom evaluation function. @@ -464,20 +516,35 @@ class Booster(object): for d in evals: if not isinstance(d[0], DMatrix): raise TypeError('expected DMatrix, got {}'.format(type(d[0]).__name__)) - if not isinstance(d[1], string_types): + if not isinstance(d[1], STRING_TYPES): raise TypeError('expected string, got {}'.format(type(d[1]).__name__)) dmats = c_array(ctypes.c_void_p, [d[0].handle for d in evals]) evnames = c_array(ctypes.c_char_p, [c_str(d[1]) for d in evals]) - return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals)) + return _LIB.XGBoosterEvalOneIter(self.handle, iteration, dmats, evnames, len(evals)) else: - res = '[%d]' % it - for dm, evname in evals: - name, val = feval(self.predict(dm), dm) + res = '[%d]' % iteration + for dmat, evname in evals: + name, val = feval(self.predict(dmat), dmat) res += '\t%s-%s:%f' % (evname, name, val) return res - def eval(self, mat, name='eval', it=0): - return self.eval_set([(mat, name)], it) + def eval(self, data, name='eval', iteration=0): + """Evaluate the model on mat. + + + Parameters + --------- + data : DMatrix + The dmatrix storing the input. + + name : str (default = 'eval') + The name of the dataset + + + iteration : int (default = 0) + The current iteration number + """ + return self.eval_set([(data, name)], iteration) def predict(self, data, output_margin=False, ntree_limit=0, pred_leaf=False): """ @@ -492,10 +559,13 @@ class Booster(object): ---------- data : DMatrix The dmatrix storing the input. + output_margin : bool Whether to output the raw untransformed margin value. + ntree_limit : int Limit number of trees in the prediction; defaults to 0 (use all trees). + pred_leaf : bool When this option is on, the output will be a matrix of (nsample, ntrees) with each record indicating the predicted leaf index of each sample in each tree. @@ -512,8 +582,8 @@ class Booster(object): if pred_leaf: option_mask |= 0x02 length = ctypes.c_ulong() - preds = xglib.XGBoosterPredict(self.handle, data.handle, - option_mask, ntree_limit, ctypes.byref(length)) + preds = _LIB.XGBoosterPredict(self.handle, data.handle, + option_mask, ntree_limit, ctypes.byref(length)) preds = ctypes2numpy(preds, length.value, np.float32) if pred_leaf: preds = preds.astype(np.int32) @@ -531,8 +601,8 @@ class Booster(object): fname : string Output file name """ - if isinstance(fname, string_types): # assume file name - xglib.XGBoosterSaveModel(self.handle, c_str(fname)) + if isinstance(fname, STRING_TYPES): # assume file name + _LIB.XGBoosterSaveModel(self.handle, c_str(fname)) else: raise TypeError("fname must be a string") @@ -545,8 +615,8 @@ class Booster(object): a in memory buffer represetation of the model """ length = ctypes.c_ulong() - cptr = xglib.XGBoosterGetModelRaw(self.handle, - ctypes.byref(length)) + cptr = _LIB.XGBoosterGetModelRaw(self.handle, + ctypes.byref(length)) return ctypes2buffer(cptr, length.value) def load_model(self, fname): @@ -559,59 +629,63 @@ class Booster(object): Input file name or memory buffer(see also save_raw) """ if isinstance(fname, str): # assume file name - xglib.XGBoosterLoadModel(self.handle, c_str(fname)) + _LIB.XGBoosterLoadModel(self.handle, c_str(fname)) else: buf = fname length = ctypes.c_ulong(len(buf)) ptr = (ctypes.c_char * len(buf)).from_buffer(buf) - xglib.XGBoosterLoadModelFromBuffer(self.handle, ptr, length) + _LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length) - def dump_model(self, fo, fmap='', with_stats=False): + def dump_model(self, fout, fmap='', with_stats=False): """ Dump model into a text file. Parameters ---------- - fo : string + foout : string Output file name. fmap : string, optional Name of the file containing feature map names. with_stats : bool (optional) Controls whether the split statistics are output. """ - if isinstance(fo, string_types): - fo = open(fo, 'w') + if isinstance(fout, STRING_TYPES): + fout = open(fout, 'w') need_close = True else: need_close = False ret = self.get_dump(fmap, with_stats) for i in range(len(ret)): - fo.write('booster[{}]:\n'.format(i)) - fo.write(ret[i]) + fout.write('booster[{}]:\n'.format(i)) + fout.write(ret[i]) if need_close: - fo.close() + fout.close() def get_dump(self, fmap='', with_stats=False): """ Returns the dump the model as a list of strings. """ length = ctypes.c_ulong() - sarr = xglib.XGBoosterDumpModel(self.handle, c_str(fmap), - int(with_stats), ctypes.byref(length)) + sarr = _LIB.XGBoosterDumpModel(self.handle, c_str(fmap), + int(with_stats), ctypes.byref(length)) res = [] for i in range(length.value): res.append(str(sarr[i].decode('ascii'))) return res def get_fscore(self, fmap=''): - """ - Get feature importance of each feature. + """Get feature importance of each feature. + + Parameters + ---------- + fmap: str (optional) + The name of feature map file """ trees = self.get_dump(fmap) fmap = {} for tree in trees: - for l in tree.split('\n'): - arr = l.split('[') + for line in tree.split('\n'): + arr = line.split('[') if len(arr) == 1: continue fid = arr[1].split(']')[0] @@ -624,9 +698,9 @@ class Booster(object): def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, - early_stopping_rounds=None,evals_result=None): - """ - Train a booster with given parameters. + early_stopping_rounds=None, evals_result=None): + # pylint: disable=too-many-statements,too-many-branches, attribute-defined-outside-init + """Train a booster with given parameters. Parameters ---------- @@ -663,7 +737,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, bst = Booster(params, [dtrain] + [d[0] for d in evals]) if evals_result is not None: - if type(evals_result) is not dict: + if isinstance(evals_result, dict): raise TypeError('evals_result has to be a dictionary') else: evals_name = [d[1] for d in evals] @@ -675,37 +749,38 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, bst.update(dtrain, i, obj) if len(evals) != 0: bst_eval_set = bst.eval_set(evals, i, feval) - if isinstance(bst_eval_set, string_types): + if isinstance(bst_eval_set, STRING_TYPES): msg = bst_eval_set else: msg = bst_eval_set.decode() sys.stderr.write(msg + '\n') if evals_result is not None: - res = re.findall(":([0-9.]+).",msg) - for key,val in zip(evals_name,res): + res = re.findall(":([0-9.]+).", msg) + for key, val in zip(evals_name, res): evals_result[key].append(val) return bst else: # early stopping - if len(evals) < 1: raise ValueError('For early stopping you need at least one set in evals.') - sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(evals[-1][1], early_stopping_rounds)) + sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(\ + evals[-1][1], early_stopping_rounds)) # is params a list of tuples? are we using multiple eval metrics? - if type(params) == list: + if isinstance(params, list): if len(params) != len(dict(params).items()): - raise ValueError('Check your params. Early stopping works with single eval metric only.') + raise ValueError('Check your params.'\ + 'Early stopping works with single eval metric only.') params = dict(params) # either minimize loss or maximize AUC/MAP/NDCG maximize_score = False if 'eval_metric' in params: maximize_metrics = ('auc', 'map', 'ndcg') - if list(filter(lambda x: params['eval_metric'].startswith(x), maximize_metrics)): + if any(params['eval_metric'].startswith(x) for x in maximize_metrics): maximize_score = True if maximize_score: @@ -720,7 +795,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, bst.update(dtrain, i, obj) bst_eval_set = bst.eval_set(evals, i, feval) - if isinstance(bst_eval_set, string_types): + if isinstance(bst_eval_set, STRING_TYPES): msg = bst_eval_set else: msg = bst_eval_set.decode() @@ -728,8 +803,8 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, sys.stderr.write(msg + '\n') if evals_result is not None: - res = re.findall(":([0-9.]+).",msg) - for key,val in zip(evals_name,res): + res = re.findall(":([0-9.]+).", msg) + for key, val in zip(evals_name, res): evals_result[key].append(val) score = float(msg.rsplit(':', 1)[1]) @@ -748,17 +823,21 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, return bst class CVPack(object): + """"Auxiliary datastruct to hold one fold of CV.""" def __init__(self, dtrain, dtest, param): + """"Initialize the CVPack""" self.dtrain = dtrain self.dtest = dtest self.watchlist = [(dtrain, 'train'), (dtest, 'test')] self.bst = Booster(param, [dtrain, dtest]) - def update(self, r, fobj): - self.bst.update(self.dtrain, r, fobj) + def update(self, iteration, fobj): + """"Update the boosters for one iteration""" + self.bst.update(self.dtrain, iteration, fobj) - def eval(self, r, feval): - return self.bst.eval_set(self.watchlist, r, feval) + def eval(self, iteration, feval): + """"Evaluate the CVPack for one iteration.""" + return self.bst.eval_set(self.watchlist, iteration, feval) def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None): @@ -785,6 +864,7 @@ def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None): def aggcv(rlist, show_stdv=True): + # pylint: disable=invalid-name """ Aggregate cross-validation results. """ @@ -794,7 +874,7 @@ def aggcv(rlist, show_stdv=True): arr = line.split() assert ret == arr[0] for it in arr[1:]: - if not isinstance(it, string_types): + if not isinstance(it, STRING_TYPES): it = it.decode() k, v = it.split(':') if k not in cvmap: @@ -802,7 +882,7 @@ def aggcv(rlist, show_stdv=True): cvmap[k].append(float(v)) for k, v in sorted(cvmap.items(), key=lambda x: x[0]): v = np.array(v) - if not isinstance(ret, string_types): + if not isinstance(ret, STRING_TYPES): ret = ret.decode() if show_stdv: ret += '\tcv-%s:%f+%f' % (k, np.mean(v), np.std(v)) @@ -813,8 +893,8 @@ def aggcv(rlist, show_stdv=True): def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(), obj=None, feval=None, fpreproc=None, show_stdv=True, seed=0): - """ - Cross-validation with given paramaters. + # pylint: disable = invalid-name + """Cross-validation with given paramaters. Parameters ---------- @@ -847,8 +927,8 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(), results = [] cvfolds = mknfold(dtrain, nfold, params, seed, metrics, fpreproc) for i in range(num_boost_round): - for f in cvfolds: - f.update(i, obj) + for fold in cvfolds: + fold.update(i, obj) res = aggcv([f.eval(i, feval) for f in cvfolds], show_stdv) sys.stderr.write(res + '\n') results.append(res) @@ -857,16 +937,16 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(), # used for compatiblity without sklearn XGBModelBase = object -XGBClassifier = object -XGBRegressor = object +XGBClassifierBase = object +XGBRegressorBase = object if SKLEARN_INSTALLED: XGBModelBase = BaseEstimator - XGBRegressor = RegressorMixin - XGBClassifier = ClassifierMixin + XGBRegressorBase = RegressorMixin + XGBClassifierBase = ClassifierMixin class XGBModel(XGBModelBase): - """ - Implementation of the Scikit-Learn API for XGBoost. + # pylint: disable=too-many-arguments, too-many-instance-attributes, invalid-name + """Implementation of the Scikit-Learn API for XGBoost. Parameters ---------- @@ -902,8 +982,10 @@ class XGBModel(XGBModelBase): Value in the data which needs to be present as a missing value. If None, defaults to np.nan. """ - def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear", - nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, + def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, + silent=True, objective="reg:linear", + nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, + subsample=1, colsample_bytree=1, base_score=0.5, seed=0, missing=None): if not SKLEARN_INSTALLED: raise XGBoostError('sklearn needs to be installed in order to use this module') @@ -923,7 +1005,6 @@ class XGBModel(XGBModelBase): self.base_score = base_score self.seed = seed self.missing = missing if missing is not None else np.nan - self._Booster = None def __setstate__(self, state): @@ -936,9 +1017,9 @@ class XGBModel(XGBModelBase): self.__dict__.update(state) def booster(self): - """ - get the underlying xgboost Booster of this model - will raise an exception when fit was not called + """Get the underlying xgboost Booster of this model. + + This will raise an exception when fit was not called Returns ------- @@ -949,12 +1030,14 @@ class XGBModel(XGBModelBase): return self._Booster def get_params(self, deep=False): + """Get parameter.s""" params = super(XGBModel, self).get_params(deep=deep) if params['missing'] is np.nan: params['missing'] = None # sklearn doesn't handle nan. see #4725 return params def get_xgb_params(self): + """Get xgboost type parameters.""" xgb_params = self.get_params() xgb_params['silent'] = 1 if self.silent else 0 @@ -963,30 +1046,39 @@ class XGBModel(XGBModelBase): xgb_params.pop('nthread', None) return xgb_params - def fit(self, X, y): - trainDmatrix = DMatrix(X, label=y, missing=self.missing) - self._Booster = train(self.get_xgb_params(), trainDmatrix, self.n_estimators) + def fit(self, data, y): + # pylint: disable=missing-docstring,invalid-name + train_dmatrix = DMatrix(data, label=y, missing=self.missing) + self._Booster = train(self.get_xgb_params(), train_dmatrix, self.n_estimators) return self - def predict(self, X): - testDmatrix = DMatrix(X, missing=self.missing) - return self.booster().predict(testDmatrix) + def predict(self, data): + # pylint: disable=missing-docstring,invalid-name + test_dmatrix = DMatrix(data, missing=self.missing) + return self.booster().predict(test_dmatrix) -class XGBClassifier(XGBModel, XGBClassifier): +class XGBClassifier(XGBModel, XGBClassifierBase): + # pylint: disable=missing-docstring,too-many-arguments,invalid-name __doc__ = """ Implementation of the scikit-learn API for XGBoost classification """ + "\n".join(XGBModel.__doc__.split('\n')[2:]) - def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic", - nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, + def __init__(self, max_depth=3, learning_rate=0.1, + n_estimators=100, silent=True, + objective="binary:logistic", + nthread=-1, gamma=0, min_child_weight=1, + max_delta_step=0, subsample=1, colsample_bytree=1, base_score=0.5, seed=0, missing=None): - super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective, - nthread, gamma, min_child_weight, max_delta_step, subsample, + super(XGBClassifier, self).__init__(max_depth, learning_rate, + n_estimators, silent, objective, + nthread, gamma, min_child_weight, + max_delta_step, subsample, colsample_bytree, base_score, seed, missing) def fit(self, X, y, sample_weight=None): + # pylint: disable = attribute-defined-outside-init,arguments-differ self.classes_ = list(np.unique(y)) self.n_classes_ = len(self.classes_) if self.n_classes_ > 2: @@ -1001,29 +1093,29 @@ class XGBClassifier(XGBModel, XGBClassifier): training_labels = self._le.transform(y) if sample_weight is not None: - trainDmatrix = DMatrix(X, label=training_labels, weight=sample_weight, - missing=self.missing) + train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight, + missing=self.missing) else: - trainDmatrix = DMatrix(X, label=training_labels, - missing=self.missing) + train_dmatrix = DMatrix(X, label=training_labels, + missing=self.missing) - self._Booster = train(xgb_options, trainDmatrix, self.n_estimators) + self._Booster = train(xgb_options, train_dmatrix, self.n_estimators) return self - def predict(self, X): - testDmatrix = DMatrix(X, missing=self.missing) - class_probs = self.booster().predict(testDmatrix) + def predict(self, data): + test_dmatrix = DMatrix(data, missing=self.missing) + class_probs = self.booster().predict(test_dmatrix) if len(class_probs.shape) > 1: column_indexes = np.argmax(class_probs, axis=1) else: - column_indexes = np.repeat(0, X.shape[0]) + column_indexes = np.repeat(0, data.shape[0]) column_indexes[class_probs > 0.5] = 1 return self._le.inverse_transform(column_indexes) - def predict_proba(self, X): - testDmatrix = DMatrix(X, missing=self.missing) - class_probs = self.booster().predict(testDmatrix) + def predict_proba(self, data): + test_dmatrix = DMatrix(data, missing=self.missing) + class_probs = self.booster().predict(test_dmatrix) if self.objective == "multi:softprob": return class_probs else: @@ -1031,9 +1123,8 @@ class XGBClassifier(XGBModel, XGBClassifier): classzero_probs = 1.0 - classone_probs return np.vstack((classzero_probs, classone_probs)).transpose() -class XGBRegressor(XGBModel, XGBRegressor): +class XGBRegressor(XGBModel, XGBRegressorBase): + # pylint: disable=missing-docstring __doc__ = """ Implementation of the scikit-learn API for XGBoost regression """ + "\n".join(XGBModel.__doc__.split('\n')[2:]) - - pass From af0a451dc424a7ffdb6af153bddefebe5409643c Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 21:08:36 -0700 Subject: [PATCH 31/59] refactor and ci --- .gitignore | 1 + .travis.yml | 61 +++++++++++++++++++-------------- Makefile | 36 ++++++++++--------- scripts/travis_R_script.sh | 14 ++++++++ scripts/travis_after_failure.sh | 5 +++ scripts/travis_scripts.sh | 28 +++++++++++++++ 6 files changed, 103 insertions(+), 42 deletions(-) create mode 100755 scripts/travis_R_script.sh create mode 100755 scripts/travis_after_failure.sh create mode 100755 scripts/travis_scripts.sh diff --git a/.gitignore b/.gitignore index 44a215435..73ae6748e 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,4 @@ java/xgboost4j-demo/data/ java/xgboost4j-demo/tmp/ java/xgboost4j-demo/model/ nb-configuration* +dmlc-core diff --git a/.travis.yml b/.travis.yml index 8eca7c0ad..1a82699f5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,36 +1,45 @@ -language: c - -env: - global: - - _R_CHECK_TIMINGS_=0 - - R_BUILD_ARGS="--no-build-vignettes --no-manual" - - R_CHECK_ARGS="--no-vignettes --no-manual" - -warnings_are_errors: false - sudo: true +# Use Build Matrix to do lint and build seperately +env: + matrix: + - TASK=lint LINT_LANG=cpp + - TASK=lint LINT_LANG=python + - TASK="R-package" + - TASK="python-package" + - TASK=build CXX=g++ + - TASK=build-with-dmlc CXX=g++ + +# dependent apt packages +addons: + apt: + packages: + - doxygen + - libopenmpi-dev + - wget + - libcurl4-openssl-dev + - unzip + - python-numpy + - python-nose + before_install: - - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh - - chmod 755 ./travis-tool.sh - - ./travis-tool.sh bootstrap + - git clone https://github.com/dmlc/dmlc-core + - export TRAVIS=dmlc-core/scripts/travis/ + - export PYTHONPATH=${PYTHONPATH}:${PWD}/wrapper + - source ${TRAVIS}/travis_setup_env.sh install: - - make Rpack - - cd ./xgboost - - ../travis-tool.sh install_deps + - pip install cpplint pylint --user `whoami` -script: - - bash ../travis-tool.sh run_tests +script: scripts/travis_script.sh + + +after_failure: + - scripts/travis_after_failure.sh -after_failure: cat /home/travis/build/dmlc/xgboost/R-package/xgboost.Rcheck/00install.out - notifications: - email: - recipients: - - hetong007@gmail.com - - tqchen@cs.washington.edu - on_success: change - on_failure: always + email: + on_success: change + on_failure: always diff --git a/Makefile b/Makefile index 295de4064..7d2ff5273 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx -export LDFLAGS= -pthread -lm +export LDFLAGS= -pthread -lm export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -funroll-loops # java include path export JAVAINCFLAGS = -I${JAVA_HOME}/include -I${JAVA_HOME}/include/linux -I./java @@ -12,8 +12,8 @@ ifeq ($(OS), Windows_NT) endif ifeq ($(no_omp),1) - CFLAGS += -DDISABLE_OPENMP -else + CFLAGS += -DDISABLE_OPENMP +else CFLAGS += -fopenmp endif @@ -29,7 +29,7 @@ ifdef dmlc config = $(dmlc)/config.mk else config = $(dmlc)/make/config.mk - endif + endif endif include $(config) include $(dmlc)/make/dmlc.mk @@ -43,7 +43,7 @@ ifndef WITH_FPIC WITH_FPIC = 1 endif ifeq ($(WITH_FPIC), 1) - CFLAGS += -fPIC + CFLAGS += -fPIC endif @@ -69,7 +69,7 @@ else TARGET = $(BIN) endif -.PHONY: clean all mpi python Rpack +.PHONY: clean all mpi python Rpack lint all: $(TARGET) mpi: $(MPIBIN) @@ -78,9 +78,9 @@ python: wrapper/libxgboostwrapper.so # now the wrapper takes in two files. io and wrapper part updater.o: src/tree/updater.cpp src/tree/*.hpp src/*.h src/tree/*.h src/utils/*.h dmlc_simple.o: src/io/dmlc_simple.cpp src/utils/*.h -gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h +gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h io.o: src/io/io.cpp src/io/*.hpp src/utils/*.h src/learner/dmatrix.h src/*.h -main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h +main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h xgboost: updater.o gbm.o io.o main.o $(LIBRABIT) $(LIBDMLC) wrapper/xgboost_wrapper.dll wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o $(LIBRABIT) $(LIBDMLC) @@ -97,11 +97,11 @@ subtree/rabit/lib/librabit_mock.a: subtree/rabit/src/engine_mock.cc subtree/rabit/lib/librabit_mpi.a: subtree/rabit/src/engine_mpi.cc + cd subtree/rabit;make lib/librabit_mpi.a; cd ../.. -$(BIN) : - $(CXX) $(CFLAGS) -fPIC -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) +$(BIN) : + $(CXX) $(CFLAGS) -fPIC -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) -$(MOCKBIN) : - $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) +$(MOCKBIN) : + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) $(SLIB) : $(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(DLLFLAGS) @@ -109,13 +109,13 @@ $(SLIB) : $(JLIB) : $(CXX) $(CFLAGS) -fPIC -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(JAVAINCFLAGS) -$(OBJ) : +$(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) -$(MPIOBJ) : - $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) +$(MPIOBJ) : + $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) -$(MPIBIN) : +$(MPIBIN) : $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) install: @@ -157,6 +157,10 @@ Rcheck: make Rbuild R CMD check --as-cran xgboost*.tar.gz +# lint requires dmlc to be in current folder +lint: + dmlc-core/scripts/lint.py xgboost $(LINT_LANG) src wrapper R-package + clean: $(RM) -rf $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) $(SLIB) *.o */*.o */*/*.o *~ */*~ */*/*~ cd subtree/rabit; make clean; cd .. diff --git a/scripts/travis_R_script.sh b/scripts/travis_R_script.sh new file mode 100755 index 000000000..5a9ea7528 --- /dev/null +++ b/scripts/travis_R_script.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Test R package of xgboost +set -e +export _R_CHECK_TIMINGS_=0 +export R_BUILD_ARGS="--no-build-vignettes --no-manual" +export R_CHECK_ARGS="--no-vignettes --no-manual" + +curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh +chmod 755 ./travis-tool.sh +./travis-tool.sh bootstrap +make Rpack +cd ./xgboost +../travis-tool.sh install_deps +../travis-tool.sh run_tests \ No newline at end of file diff --git a/scripts/travis_after_failure.sh b/scripts/travis_after_failure.sh new file mode 100755 index 000000000..230f3348c --- /dev/null +++ b/scripts/travis_after_failure.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +if [ ${TASK} == "R-package" ]; then + cat R-package/xgboost.Rcheck/00install.out +fi \ No newline at end of file diff --git a/scripts/travis_scripts.sh b/scripts/travis_scripts.sh new file mode 100755 index 000000000..b90f8d3ee --- /dev/null +++ b/scripts/travis_scripts.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# main script of travis +if [ ${TASK} == "lint" ]; then + make lint || exit -1 +fi + +if [ ${TASK} == "build" ]; then + make all CXX=${CXX} || exit -1 +fi + +if [ ${TASK} == "build-with-dmlc" ]; then + cd dmlc-core + cp make/config.mk . + echo "USE_S3=1" >> config.mk + make all CXX=${CXX}|| exit -1 + cd .. + make dmlc=dmlc-core CXX=${CXX} || exit -1 +fi + +if [ ${TASK} == "R-package" ]; then + scripts/travis_R_script.sh || exit -1 +fi + +if [ ${TASK} == "python-package" ]; then + make all CXX=${CXX} || exit -1 + nosetests tests/python || exit -1 +fi From fe3464b763d7798964dd785f61aadbe492183521 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 21:11:01 -0700 Subject: [PATCH 32/59] update script --- README.md | 4 ++-- scripts/{travis_scripts.sh => travis_script.sh} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename scripts/{travis_scripts.sh => travis_script.sh} (100%) diff --git a/README.md b/README.md index 59f2028c8..cdd4c02f7 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -XGBoost: eXtreme Gradient Boosting +XGBoost: eXtreme Gradient Boosting ================================== -[![Build Status](https://travis-ci.org/dmlc/xgboost.png)](https://travis-ci.org/dmlc/xgboost) +[![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost) An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version. It implements machine learning algorithm under gradient boosting framework, including generalized linear model and gradient boosted regression tree (GBDT). XGBoost can also be distributed and scale to Terascale data diff --git a/scripts/travis_scripts.sh b/scripts/travis_script.sh similarity index 100% rename from scripts/travis_scripts.sh rename to scripts/travis_script.sh From 39913d6ee802a0223cd0151f872fee6b681a9e57 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 21:14:49 -0700 Subject: [PATCH 33/59] add scipy dep --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 1a82699f5..4599095b9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,7 @@ addons: - libcurl4-openssl-dev - unzip - python-numpy + - python-scipy - python-nose before_install: From ccf21ec061d85b247dc4bd99e2aa08024cc9e1f5 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 21:15:10 -0700 Subject: [PATCH 34/59] add scipy dep --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4599095b9..005692d45 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,8 +5,8 @@ env: matrix: - TASK=lint LINT_LANG=cpp - TASK=lint LINT_LANG=python - - TASK="R-package" - - TASK="python-package" + - TASK=R-package + - TASK=python-package - TASK=build CXX=g++ - TASK=build-with-dmlc CXX=g++ From 93319841ed2f8139789806184be973c8bc7096c0 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 21:20:56 -0700 Subject: [PATCH 35/59] ok --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 005692d45..102c87353 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,8 +5,8 @@ env: matrix: - TASK=lint LINT_LANG=cpp - TASK=lint LINT_LANG=python - - TASK=R-package - - TASK=python-package + - TASK=R-package CXX=g++ + - TASK=python-package CXX=g++ - TASK=build CXX=g++ - TASK=build-with-dmlc CXX=g++ From f0421e94550129701b23002490a1e75c5c4a97dc Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 21:27:29 -0700 Subject: [PATCH 36/59] last check --- tests/README.md | 1 + tests/python/test_basic.py | 31 +++++++++++++++++++++++++++++++ wrapper/xgboost.py | 2 +- 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 tests/README.md create mode 100644 tests/python/test_basic.py diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 000000000..19e34d5df --- /dev/null +++ b/tests/README.md @@ -0,0 +1 @@ +This folder contains tetstcases for xgboost. \ No newline at end of file diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py new file mode 100644 index 000000000..77d19595b --- /dev/null +++ b/tests/python/test_basic.py @@ -0,0 +1,31 @@ +import numpy as np +import xgboost as xgb + +dpath = 'demo/data/' + +def test_basic(): + dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') + dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } + # specify validations set to watch performance + watchlist = [(dtest,'eval'), (dtrain,'train')] + num_round = 2 + bst = xgb.train(param, dtrain, num_round, watchlist) + # this is prediction + preds = bst.predict(dtest) + labels = dtest.get_label() + err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds)) + # error must be smaller than 10% + assert err < 0.1 + + # save dmatrix into binary buffer + dtest.save_binary('dtest.buffer') + # save model + bst.save_model('xgb.model') + # load model and data in + bst2 = xgb.Booster(model_file='xgb.model') + dtest2 = xgb.DMatrix('dtest.buffer') + preds2 = bst2.predict(dtest2) + # assert they are the same + assert np.sum(np.abs(preds2-preds)) == 0 + diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index c21545b4c..a009ad81b 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -6,7 +6,7 @@ Version: 0.40 Authors: Tianqi Chen, Bing Xu Early stopping by Zygmunt Zając """ -# pylint: disable=too-many-arguments, too-many-locals, too-many-lines +# pylint: disable=too-many-arguments, too-many-locals, too-many-lines, invalid-name from __future__ import absolute_import import os From 4d436a3cb002b9b9eb1ac29963f9acf3a2b58f04 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Fri, 3 Jul 2015 21:59:40 -0700 Subject: [PATCH 37/59] Update README.md --- doc/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/README.md b/doc/README.md index d9884c682..371e18f21 100644 --- a/doc/README.md +++ b/doc/README.md @@ -29,3 +29,7 @@ This section is about blogposts, presentation and videos discussing how to use x Contribution ==== Contribution of documents and use-cases are welcomed! +* This package use Google C++ style +* Check tool of codestyle + - clone https://github.com/dmlc/dmlc-core into root directory + - type ```make lint``` and fix possible errors. From cc767add881b5cc91376f67d01a5e164c5a0fadb Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 4 Jul 2015 18:12:44 -0700 Subject: [PATCH 38/59] API refactor to make fault handling easy --- Makefile | 4 + R-package/src/xgboost_R.cpp | 105 ++++--- src/utils/utils.h | 4 +- wrapper/xgboost.py | 166 ++++++----- wrapper/xgboost_wrapper.cpp | 306 ++++++++++++++++----- wrapper/xgboost_wrapper.h | 534 +++++++++++++++++++++--------------- 6 files changed, 725 insertions(+), 394 deletions(-) diff --git a/Makefile b/Makefile index 7d2ff5273..a24bea327 100644 --- a/Makefile +++ b/Makefile @@ -69,6 +69,10 @@ else TARGET = $(BIN) endif +ifndef LINT_LANG + LINT_LANG= "all" +endif + .PHONY: clean all mpi python Rpack lint all: $(TARGET) diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp index 436faaa5a..a8084b206 100644 --- a/R-package/src/xgboost_R.cpp +++ b/R-package/src/xgboost_R.cpp @@ -59,6 +59,10 @@ inline void _WrapperEnd(void) { PutRNGstate(); } +// do nothing, check error +inline void CheckErr(int ret) { +} + extern "C" { SEXP XGCheckNullPtr_R(SEXP handle) { return ScalarLogical(R_ExternalPtrAddr(handle) == NULL); @@ -70,7 +74,8 @@ extern "C" { } SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { _WrapperBegin(); - void *handle = XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent)); + DMatrixHandle handle; + CheckErr(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); @@ -91,7 +96,8 @@ extern "C" { data[i * ncol +j] = din[i + nrow * j]; } } - void *handle = XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing)); + DMatrixHandle handle; + CheckErr(XGDMatrixCreateFromMat(BeginPtr(data), nrow, ncol, asReal(missing), &handle)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); @@ -119,8 +125,10 @@ extern "C" { indices_[i] = static_cast(p_indices[i]); data_[i] = static_cast(p_data[i]); } - void *handle = XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_), - BeginPtr(data_), nindptr, ndata); + DMatrixHandle handle; + CheckErr(XGDMatrixCreateFromCSC(BeginPtr(col_ptr_), BeginPtr(indices_), + BeginPtr(data_), nindptr, ndata, + &handle)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); @@ -134,7 +142,10 @@ extern "C" { for (int i = 0; i < len; ++i) { idxvec[i] = INTEGER(idxset)[i] - 1; } - void *res = XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), BeginPtr(idxvec), len); + DMatrixHandle res; + CheckErr(XGDMatrixSliceDMatrix(R_ExternalPtrAddr(handle), + BeginPtr(idxvec), len, + &res)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); @@ -143,8 +154,8 @@ extern "C" { } void XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) { _WrapperBegin(); - XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), - CHAR(asChar(fname)), asInteger(silent)); + CheckErr(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle), + CHAR(asChar(fname)), asInteger(silent))); _WrapperEnd(); } void XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) { @@ -157,24 +168,27 @@ extern "C" { for (int i = 0; i < len; ++i) { vec[i] = static_cast(INTEGER(array)[i]); } - XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len); + CheckErr(XGDMatrixSetGroup(R_ExternalPtrAddr(handle), BeginPtr(vec), len)); } else { std::vector vec(len); #pragma omp parallel for schedule(static) for (int i = 0; i < len; ++i) { vec[i] = REAL(array)[i]; } - XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), - CHAR(asChar(field)), - BeginPtr(vec), len); + CheckErr(XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), + BeginPtr(vec), len)); } _WrapperEnd(); } SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) { _WrapperBegin(); bst_ulong olen; - const float *res = XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), - CHAR(asChar(field)), &olen); + const float *res; + CheckErr(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), + CHAR(asChar(field)), + &olen, + &res)); _WrapperEnd(); SEXP ret = PROTECT(allocVector(REALSXP, olen)); for (size_t i = 0; i < olen; ++i) { @@ -184,13 +198,14 @@ extern "C" { return ret; } SEXP XGDMatrixNumRow_R(SEXP handle) { - bst_ulong nrow = XGDMatrixNumRow(R_ExternalPtrAddr(handle)); + bst_ulong nrow; + CheckErr(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow)); return ScalarInteger(static_cast(nrow)); } // functions related to booster void _BoosterFinalizer(SEXP ext) { if (R_ExternalPtrAddr(ext) == NULL) return; - XGBoosterFree(R_ExternalPtrAddr(ext)); + CheckErr(XGBoosterFree(R_ExternalPtrAddr(ext))); R_ClearExternalPtr(ext); } SEXP XGBoosterCreate_R(SEXP dmats) { @@ -200,7 +215,8 @@ extern "C" { for (int i = 0; i < len; ++i) { dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i))); } - void *handle = XGBoosterCreate(BeginPtr(dvec), dvec.size()); + BoosterHandle handle; + CheckErr(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle)); _WrapperEnd(); SEXP ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); @@ -209,16 +225,16 @@ extern "C" { } void XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) { _WrapperBegin(); - XGBoosterSetParam(R_ExternalPtrAddr(handle), - CHAR(asChar(name)), - CHAR(asChar(val))); + CheckErr(XGBoosterSetParam(R_ExternalPtrAddr(handle), + CHAR(asChar(name)), + CHAR(asChar(val)))); _WrapperEnd(); } void XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) { _WrapperBegin(); - XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), - asInteger(iter), - R_ExternalPtrAddr(dtrain)); + CheckErr(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + R_ExternalPtrAddr(dtrain))); _WrapperEnd(); } void XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) { @@ -231,9 +247,10 @@ extern "C" { tgrad[j] = REAL(grad)[j]; thess[j] = REAL(hess)[j]; } - XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), - R_ExternalPtrAddr(dtrain), - BeginPtr(tgrad), BeginPtr(thess), len); + CheckErr(XGBoosterBoostOneIter(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dtrain), + BeginPtr(tgrad), BeginPtr(thess), + len)); _WrapperEnd(); } SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) { @@ -250,21 +267,24 @@ extern "C" { for (int i = 0; i < len; ++i) { vec_sptr.push_back(vec_names[i].c_str()); } - const char *ret = - XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), - asInteger(iter), - BeginPtr(vec_dmats), BeginPtr(vec_sptr), len); + const char *ret; + CheckErr(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle), + asInteger(iter), + BeginPtr(vec_dmats), + BeginPtr(vec_sptr), + len, &ret)); _WrapperEnd(); return mkString(ret); } SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) { _WrapperBegin(); bst_ulong olen; - const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle), - R_ExternalPtrAddr(dmat), - asInteger(option_mask), - asInteger(ntree_limit), - &olen); + const float *res; + CheckErr(XGBoosterPredict(R_ExternalPtrAddr(handle), + R_ExternalPtrAddr(dmat), + asInteger(option_mask), + asInteger(ntree_limit), + &olen, &res)); _WrapperEnd(); SEXP ret = PROTECT(allocVector(REALSXP, olen)); for (size_t i = 0; i < olen; ++i) { @@ -275,12 +295,12 @@ extern "C" { } void XGBoosterLoadModel_R(SEXP handle, SEXP fname) { _WrapperBegin(); - XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + CheckErr(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)))); _WrapperEnd(); } void XGBoosterSaveModel_R(SEXP handle, SEXP fname) { _WrapperBegin(); - XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))); + CheckErr(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname)))); _WrapperEnd(); } void XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { @@ -293,7 +313,8 @@ extern "C" { SEXP XGBoosterModelToRaw_R(SEXP handle) { bst_ulong olen; _WrapperBegin(); - const char *raw = XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen); + const char *raw; + CheckErr(XGBoosterGetModelRaw(R_ExternalPtrAddr(handle), &olen, &raw)); _WrapperEnd(); SEXP ret = PROTECT(allocVector(RAWSXP, olen)); if (olen != 0) { @@ -305,11 +326,11 @@ extern "C" { SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats) { _WrapperBegin(); bst_ulong olen; - const char **res = - XGBoosterDumpModel(R_ExternalPtrAddr(handle), - CHAR(asChar(fmap)), - asInteger(with_stats), - &olen); + const char **res; + CheckErr(XGBoosterDumpModel(R_ExternalPtrAddr(handle), + CHAR(asChar(fmap)), + asInteger(with_stats), + &olen, &res)); _WrapperEnd(); SEXP out = PROTECT(allocVector(STRSXP, olen)); for (size_t i = 0; i < olen; ++i) { diff --git a/src/utils/utils.h b/src/utils/utils.h index 2066634d6..7a8f18390 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef XGBOOST_STRICT_CXX98_ #include @@ -73,8 +74,7 @@ inline void HandleAssertError(const char *msg) { * \param msg error message */ inline void HandleCheckError(const char *msg) { - fprintf(stderr, "%s\n", msg); - exit(-1); + throw std::runtime_error(msg); } inline void HandlePrint(const char *msg) { printf("%s", msg); diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index a009ad81b..96f6c2573 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -44,7 +44,6 @@ else: # pylint: disable=invalid-name STRING_TYPES = basestring, - def load_xglib(): """Load the xgboost library.""" curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) @@ -63,30 +62,27 @@ def load_xglib(): raise XGBoostLibraryNotFound( 'cannot find find the files in the candicate path ' + str(dll_path)) lib = ctypes.cdll.LoadLibrary(lib_path[0]) - - # DMatrix functions - lib.XGDMatrixCreateFromFile.restype = ctypes.c_void_p - lib.XGDMatrixCreateFromCSR.restype = ctypes.c_void_p - lib.XGDMatrixCreateFromCSC.restype = ctypes.c_void_p - lib.XGDMatrixCreateFromMat.restype = ctypes.c_void_p - lib.XGDMatrixSliceDMatrix.restype = ctypes.c_void_p - lib.XGDMatrixGetFloatInfo.restype = ctypes.POINTER(ctypes.c_float) - lib.XGDMatrixGetUIntInfo.restype = ctypes.POINTER(ctypes.c_uint) - lib.XGDMatrixNumRow.restype = ctypes.c_ulong - - # Booster functions - lib.XGBoosterCreate.restype = ctypes.c_void_p - lib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float) - lib.XGBoosterEvalOneIter.restype = ctypes.c_char_p - lib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p) - lib.XGBoosterGetModelRaw.restype = ctypes.POINTER(ctypes.c_char) - lib.XGBoosterLoadModelFromBuffer.restype = ctypes.c_void_p + lib.XGBGetLastError.restype = ctypes.c_char_p return lib # load the XGBoost library globally _LIB = load_xglib() +def _check_call(ret): + """Check the return value of C API call + + This function will raise exception when error occurs. + Wrap every API call with this function + + Parameters + ---------- + ret : int + return value from API calls + """ + if ret != 0: + raise XGBoostError(_LIB.XGBGetLastError()) + def ctypes2numpy(cptr, length, dtype): """Convert a ctypes pointer array to a numpy array. @@ -145,7 +141,10 @@ class DMatrix(object): self.handle = None return if isinstance(data, STRING_TYPES): - self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromFile(c_str(data), int(silent))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixCreateFromFile(c_str(data), + int(silent), + ctypes.byref(self.handle))) elif isinstance(data, scipy.sparse.csr_matrix): self._init_from_csr(data) elif isinstance(data, scipy.sparse.csc_matrix): @@ -169,11 +168,12 @@ class DMatrix(object): """ if len(csr.indices) != len(csr.data): raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data))) - self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSR( - c_array(ctypes.c_ulong, csr.indptr), - c_array(ctypes.c_uint, csr.indices), - c_array(ctypes.c_float, csr.data), - len(csr.indptr), len(csr.data))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixCreateFromCSR(c_array(ctypes.c_ulong, csr.indptr), + c_array(ctypes.c_uint, csr.indices), + c_array(ctypes.c_float, csr.data), + len(csr.indptr), len(csr.data), + ctypes.byref(self.handle))) def _init_from_csc(self, csc): """ @@ -181,23 +181,26 @@ class DMatrix(object): """ if len(csc.indices) != len(csc.data): raise ValueError('length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data))) - self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSC( - c_array(ctypes.c_ulong, csc.indptr), - c_array(ctypes.c_uint, csc.indices), - c_array(ctypes.c_float, csc.data), - len(csc.indptr), len(csc.data))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixCreateFromCSC(c_array(ctypes.c_ulong, csc.indptr), + c_array(ctypes.c_uint, csc.indices), + c_array(ctypes.c_float, csc.data), + len(csc.indptr), len(csc.data), + ctypes.byref(self.handle))) def _init_from_npy2d(self, mat, missing): """ Initialize data from a 2-D numpy matrix. """ data = np.array(mat.reshape(mat.size), dtype=np.float32) - self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromMat( - data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), - mat.shape[0], mat.shape[1], ctypes.c_float(missing))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), + mat.shape[0], mat.shape[1], + ctypes.c_float(missing), + ctypes.byref(self.handle))) def __del__(self): - _LIB.XGDMatrixFree(self.handle) + _check_call(_LIB.XGDMatrixFree(self.handle)) def get_float_info(self, field): """Get float property from the DMatrix. @@ -213,7 +216,11 @@ class DMatrix(object): a numpy array of float information of the data """ length = ctypes.c_ulong() - ret = _LIB.XGDMatrixGetFloatInfo(self.handle, c_str(field), ctypes.byref(length)) + ret = ctypes.POINTER(ctypes.c_float)() + _check_call(_LIB.XGDMatrixGetFloatInfo(self.handle, + c_str(field), + ctypes.byref(length), + ctypes.byref(ret))) return ctypes2numpy(ret, length.value, np.float32) def get_uint_info(self, field): @@ -230,7 +237,11 @@ class DMatrix(object): a numpy array of float information of the data """ length = ctypes.c_ulong() - ret = _LIB.XGDMatrixGetUIntInfo(self.handle, c_str(field), ctypes.byref(length)) + ret = ctypes.POINTER(ctypes.c_uint)() + _check_call(_LIB.XGDMatrixGetUIntInfo(self.handle, + c_str(field), + ctypes.byref(length), + ctypes.byref(ret))) return ctypes2numpy(ret, length.value, np.uint32) def set_float_info(self, field, data): @@ -244,8 +255,10 @@ class DMatrix(object): data: numpy array The array ofdata to be set """ - _LIB.XGDMatrixSetFloatInfo(self.handle, c_str(field), - c_array(ctypes.c_float, data), len(data)) + _check_call(_LIB.XGDMatrixSetFloatInfo(self.handle, + c_str(field), + c_array(ctypes.c_float, data), + len(data))) def set_uint_info(self, field, data): """Set uint type property into the DMatrix. @@ -258,8 +271,10 @@ class DMatrix(object): data: numpy array The array ofdata to be set """ - _LIB.XGDMatrixSetUIntInfo(self.handle, c_str(field), - c_array(ctypes.c_uint, data), len(data)) + _check_call(_LIB.XGDMatrixSetUIntInfo(self.handle, + c_str(field), + c_array(ctypes.c_uint, data), + len(data))) def save_binary(self, fname, silent=True): """Save DMatrix to an XGBoost buffer. @@ -271,7 +286,9 @@ class DMatrix(object): silent : bool (optional; default: True) If set, the output is suppressed. """ - _LIB.XGDMatrixSaveBinary(self.handle, c_str(fname), int(silent)) + _check_call(_LIB.XGDMatrixSaveBinary(self.handle, + c_str(fname), + int(silent))) def set_label(self, label): """Set label of dmatrix @@ -317,7 +334,9 @@ class DMatrix(object): group : array like Group size of each group """ - _LIB.XGDMatrixSetGroup(self.handle, c_array(ctypes.c_uint, group), len(group)) + _check_call(_LIB.XGDMatrixSetGroup(self.handle, + c_array(ctypes.c_uint, group), + len(group))) def get_label(self): """Get the label of the DMatrix. @@ -353,7 +372,10 @@ class DMatrix(object): ------- number of rows : int """ - return _LIB.XGDMatrixNumRow(self.handle) + ret = ctypes.c_ulong() + _check_call(_LIB.XGDMatrixNumRow(self.handle, + ctypes.byref(ret))) + return ret.value def slice(self, rindex): """Slice the DMatrix and return a new DMatrix that only contains `rindex`. @@ -369,8 +391,11 @@ class DMatrix(object): A new DMatrix containing only selected indices. """ res = DMatrix(None) - res.handle = ctypes.c_void_p(_LIB.XGDMatrixSliceDMatrix( - self.handle, c_array(ctypes.c_int, rindex), len(rindex))) + res.handle = ctypes.c_void_p() + _check_call(_LIB.XGDMatrixSliceDMatrix(self.handle, + c_array(ctypes.c_int, rindex), + len(rindex), + ctypes.byref(res.handle))) return res @@ -394,7 +419,8 @@ class Booster(object): if not isinstance(d, DMatrix): raise TypeError('invalid cache item: {}'.format(type(d).__name__)) dmats = c_array(ctypes.c_void_p, [d.handle for d in cache]) - self.handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, len(cache))) + self.handle = ctypes.c_void_p() + _check_call(_LIB.XGBoosterCreate(dmats, len(cache), ctypes.byref(self.handle))) self.set_param({'seed': 0}) self.set_param(params or {}) if model_file is not None: @@ -419,10 +445,11 @@ class Booster(object): if handle is not None: buf = handle dmats = c_array(ctypes.c_void_p, []) - handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, 0)) + handle = ctypes.c_void_p() + _check_call(_LIB.XGBoosterCreate(dmats, 0, ctypes.byref(handle))) length = ctypes.c_ulong(len(buf)) ptr = (ctypes.c_char * len(buf)).from_buffer(buf) - _LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length) + _check_call(_LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length)) state['handle'] = handle self.__dict__.update(state) self.set_param({'seed': 0}) @@ -449,7 +476,7 @@ class Booster(object): elif isinstance(params, STRING_TYPES) and value is not None: params = [(params, value)] for key, val in params: - _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val))) + _check_call(_LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val)))) def update(self, dtrain, iteration, fobj=None): """ @@ -467,7 +494,7 @@ class Booster(object): if not isinstance(dtrain, DMatrix): raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) if fobj is None: - _LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle) + _check_call(_LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle)) else: pred = self.predict(dtrain) grad, hess = fobj(pred, dtrain) @@ -490,10 +517,10 @@ class Booster(object): raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess))) if not isinstance(dtrain, DMatrix): raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) - _LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle, - c_array(ctypes.c_float, grad), - c_array(ctypes.c_float, hess), - len(grad)) + _check_call(_LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle, + c_array(ctypes.c_float, grad), + c_array(ctypes.c_float, hess), + len(grad))) def eval_set(self, evals, iteration=0, feval=None): # pylint: disable=invalid-name @@ -520,7 +547,11 @@ class Booster(object): raise TypeError('expected string, got {}'.format(type(d[1]).__name__)) dmats = c_array(ctypes.c_void_p, [d[0].handle for d in evals]) evnames = c_array(ctypes.c_char_p, [c_str(d[1]) for d in evals]) - return _LIB.XGBoosterEvalOneIter(self.handle, iteration, dmats, evnames, len(evals)) + msg = ctypes.c_char_p() + _check_call(_LIB.XGBoosterEvalOneIter(self.handle, iteration, + dmats, evnames, len(evals), + ctypes.byref(msg))) + return msg.value else: res = '[%d]' % iteration for dmat, evname in evals: @@ -582,8 +613,11 @@ class Booster(object): if pred_leaf: option_mask |= 0x02 length = ctypes.c_ulong() - preds = _LIB.XGBoosterPredict(self.handle, data.handle, - option_mask, ntree_limit, ctypes.byref(length)) + preds = ctypes.POINTER(ctypes.c_float)() + _check_call(_LIB.XGBoosterPredict(self.handle, data.handle, + option_mask, ntree_limit, + ctypes.byref(length), + ctypes.byref(preds))) preds = ctypes2numpy(preds, length.value, np.float32) if pred_leaf: preds = preds.astype(np.int32) @@ -602,7 +636,7 @@ class Booster(object): Output file name """ if isinstance(fname, STRING_TYPES): # assume file name - _LIB.XGBoosterSaveModel(self.handle, c_str(fname)) + _check_call(_LIB.XGBoosterSaveModel(self.handle, c_str(fname))) else: raise TypeError("fname must be a string") @@ -615,8 +649,10 @@ class Booster(object): a in memory buffer represetation of the model """ length = ctypes.c_ulong() - cptr = _LIB.XGBoosterGetModelRaw(self.handle, - ctypes.byref(length)) + cptr = ctypes.POINTER(ctypes.c_char)() + _check_call(_LIB.XGBoosterGetModelRaw(self.handle, + ctypes.byref(length), + ctypes.byref(cptr))) return ctypes2buffer(cptr, length.value) def load_model(self, fname): @@ -634,7 +670,7 @@ class Booster(object): buf = fname length = ctypes.c_ulong(len(buf)) ptr = (ctypes.c_char * len(buf)).from_buffer(buf) - _LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length) + _check_call(_LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length)) def dump_model(self, fout, fmap='', with_stats=False): """ @@ -666,8 +702,12 @@ class Booster(object): Returns the dump the model as a list of strings. """ length = ctypes.c_ulong() - sarr = _LIB.XGBoosterDumpModel(self.handle, c_str(fmap), - int(with_stats), ctypes.byref(length)) + sarr = ctypes.POINTER(ctypes.c_char_p)() + _check_call(_LIB.XGBoosterDumpModel(self.handle, + c_str(fmap), + int(with_stats), + ctypes.byref(length), + ctypes.byref(sarr))) res = [] for i in range(length.value): res.append(str(sarr[i].decode('ascii'))) diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 8572316f0..e1ce01119 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -8,6 +8,7 @@ #include #include #include +#include // include all std functions using namespace std; #include "./xgboost_wrapper.h" @@ -102,15 +103,79 @@ class Booster: public learner::BoostLearner { using namespace xgboost::wrapper; -void* XGDMatrixCreateFromFile(const char *fname, int silent) { - return LoadDataMatrix(fname, silent != 0, false, false); +/*! \brief macro to guard beginning and end section of all functions */ +#define API_BEGIN() try { +/*! + * \brief every function starts with API_BEGIN(); and finishes with API_END(); + * \param Finalize optionally put in a finalizer + */ +#define API_END(Finalize) } catch(std::exception &e) { \ + Finalize; return XGBHandleException(e); \ + } return 0; + +// do not use threadlocal on OSX since it is not always available +#ifndef DISABLE_THREAD_LOCAL +#ifdef __GNUC__ + #define XGB_TREAD_LOCAL __thread +#elif __STDC_VERSION__ >= 201112L + #define XGB_TREAD_LOCAL _Thread_local +#elif defined(_MSC_VER) + #define XGB_TREAD_LOCAL __declspec(thread) +#endif +#endif + +#ifndef XGB_TREAD_LOCAL +#pragma message("Warning: Threadlocal not enabled, used single thread error handling") +#define XGB_TREAD_LOCAL +#endif + +/*! + * \brief a helper function for error handling + * will set the last error to be str_set when it is not NULL + * \param str_set the error to set + * \return a pointer message to last error + */ +const char *XGBSetGetLastError_(const char *str_set) { + // use last_error to record last error + static XGB_TREAD_LOCAL std::string last_error; + if (str_set != NULL) { + last_error = str_set; + } + return last_error.c_str(); } -void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem) { - DMatrixSimple *p_mat = new DMatrixSimple(); + +/*! \brief return str message of the last error */ +const char *XGBGetLastError() { + return XGBSetGetLastError_(NULL); +} + +/*! + * \brief handle exception throwed out + * \param e the exception + * \return the return value of API after exception is handled + */ +int XGBHandleException(const std::exception &e) { + XGBSetGetLastError_(e.what()); + return -1; +} + +int XGDMatrixCreateFromFile(const char *fname, + int silent, + DMatrixHandle *out) { + API_BEGIN(); + *out = LoadDataMatrix(fname, silent != 0, false, false); + API_END(); +} + +int XGDMatrixCreateFromCSR(const bst_ulong *indptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem, + DMatrixHandle *out) { + DMatrixSimple *p_mat = NULL; + API_BEGIN(); + p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; mat.row_ptr_.resize(nindptr); for (bst_ulong i = 0; i < nindptr; ++i) { @@ -123,20 +188,24 @@ void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, static_cast(indices[i]+1)); } mat.info.info.num_row = nindptr - 1; - return p_mat; + *out = p_mat; + API_END(delete p_mat); } -void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem) { + +int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem, + DMatrixHandle *out) { + DMatrixSimple *p_mat = NULL; + API_BEGIN(); int nthread; #pragma omp parallel { nthread = omp_get_num_threads(); } - - DMatrixSimple *p_mat = new DMatrixSimple(); + p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; utils::ParallelGroupBuilder builder(&mat.row_ptr_, &mat.row_data_); builder.InitBudget(0, nthread); @@ -160,14 +229,19 @@ void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, } mat.info.info.num_row = mat.row_ptr_.size() - 1; mat.info.info.num_col = static_cast(ncol); - return p_mat; + *out = p_mat; + API_END(delete p_mat); } -void* XGDMatrixCreateFromMat(const float *data, - bst_ulong nrow, - bst_ulong ncol, - float missing) { + +int XGDMatrixCreateFromMat(const float *data, + bst_ulong nrow, + bst_ulong ncol, + float missing, + DMatrixHandle *out) { + DMatrixSimple *p_mat = NULL; + API_BEGIN(); + p_mat = new DMatrixSimple(); bool nan_missing = utils::CheckNAN(missing); - DMatrixSimple *p_mat = new DMatrixSimple(); DMatrixSimple &mat = *p_mat; mat.info.info.num_row = nrow; mat.info.info.num_col = ncol; @@ -186,11 +260,16 @@ void* XGDMatrixCreateFromMat(const float *data, } mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem); } - return p_mat; + *out = p_mat; + API_END(delete p_mat); } -void* XGDMatrixSliceDMatrix(void *handle, - const int *idxset, - bst_ulong len) { + +int XGDMatrixSliceDMatrix(DMatrixHandle handle, + const int *idxset, + bst_ulong len, + DMatrixHandle *out) { + DMatrixSimple *p_ret = NULL; + API_BEGIN(); DMatrixSimple tmp; DataMatrix &dsrc = *static_cast(handle); if (dsrc.magic != DMatrixSimple::kMagic) { @@ -198,7 +277,7 @@ void* XGDMatrixSliceDMatrix(void *handle, } DataMatrix &src = (dsrc.magic == DMatrixSimple::kMagic ? *static_cast(handle): tmp); - DMatrixSimple *p_ret = new DMatrixSimple(); + p_ret = new DMatrixSimple(); DMatrixSimple &ret = *p_ret; utils::Check(src.info.group_ptr.size() == 0, @@ -232,82 +311,151 @@ void* XGDMatrixSliceDMatrix(void *handle, ret.info.info.fold_index.push_back(src.info.info.fold_index[ridx]); } } - return p_ret; + *out = p_ret; + API_END(delete p_ret); } -void XGDMatrixFree(void *handle) { + +int XGDMatrixFree(DMatrixHandle handle) { + API_BEGIN(); delete static_cast(handle); + API_END(); } -void XGDMatrixSaveBinary(void *handle, const char *fname, int silent) { + +int XGDMatrixSaveBinary(DMatrixHandle handle, + const char *fname, + int silent) { + API_BEGIN(); SaveDataMatrix(*static_cast(handle), fname, silent != 0); + API_END(); } -void XGDMatrixSetFloatInfo(void *handle, const char *field, const float *info, bst_ulong len) { + +int XGDMatrixSetFloatInfo(DMatrixHandle handle, + const char *field, + const float *info, + bst_ulong len) { + API_BEGIN(); std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); vec.resize(len); memcpy(BeginPtr(vec), info, sizeof(float) * len); + API_END(); } -void XGDMatrixSetUIntInfo(void *handle, const char *field, const unsigned *info, bst_ulong len) { + +int XGDMatrixSetUIntInfo(DMatrixHandle handle, + const char *field, + const unsigned *info, + bst_ulong len) { + API_BEGIN(); std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); vec.resize(len); memcpy(BeginPtr(vec), info, sizeof(unsigned) * len); + API_END(); } -void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len) { + +int XGDMatrixSetGroup(DMatrixHandle handle, + const unsigned *group, + bst_ulong len) { + API_BEGIN(); DataMatrix *pmat = static_cast(handle); pmat->info.group_ptr.resize(len + 1); pmat->info.group_ptr[0] = 0; for (uint64_t i = 0; i < len; ++i) { pmat->info.group_ptr[i+1] = pmat->info.group_ptr[i] + group[i]; } + API_END(); } -const float* XGDMatrixGetFloatInfo(const void *handle, const char *field, bst_ulong* len) { + +int XGDMatrixGetFloatInfo(const DMatrixHandle handle, + const char *field, + bst_ulong *out_len, + const float **out_dptr) { + API_BEGIN(); const std::vector &vec = static_cast(handle)->info.GetFloatInfo(field); - *len = static_cast(vec.size()); - return BeginPtr(vec); + *out_len = static_cast(vec.size()); + *out_dptr = BeginPtr(vec); + API_END(); } -const unsigned* XGDMatrixGetUIntInfo(const void *handle, const char *field, bst_ulong* len) { + +int XGDMatrixGetUIntInfo(const DMatrixHandle handle, + const char *field, + bst_ulong *out_len, + const unsigned **out_dptr) { + API_BEGIN(); const std::vector &vec = static_cast(handle)->info.GetUIntInfo(field); - *len = static_cast(vec.size()); - return BeginPtr(vec); + *out_len = static_cast(vec.size()); + *out_dptr = BeginPtr(vec); + API_END(); } -bst_ulong XGDMatrixNumRow(const void *handle) { - return static_cast(static_cast(handle)->info.num_row()); +int XGDMatrixNumRow(const DMatrixHandle handle, + bst_ulong *out) { + API_BEGIN(); + *out = static_cast(static_cast(handle)->info.num_row()); + API_END(); } // xgboost implementation -void *XGBoosterCreate(void *dmats[], bst_ulong len) { +int XGBoosterCreate(DMatrixHandle dmats[], + bst_ulong len, + BoosterHandle *out) { + API_BEGIN(); std::vector mats; for (bst_ulong i = 0; i < len; ++i) { DataMatrix *dtr = static_cast(dmats[i]); mats.push_back(dtr); } - return new Booster(mats); + *out = new Booster(mats); + API_END(); } -void XGBoosterFree(void *handle) { + +int XGBoosterFree(BoosterHandle handle) { + API_BEGIN(); delete static_cast(handle); + API_END(); } -void XGBoosterSetParam(void *handle, const char *name, const char *value) { + +int XGBoosterSetParam(BoosterHandle handle, + const char *name, const char *value) { + API_BEGIN(); static_cast(handle)->SetParam(name, value); + API_END(); } -void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain) { + +int XGBoosterUpdateOneIter(BoosterHandle handle, + int iter, + DMatrixHandle dtrain) { + API_BEGIN(); Booster *bst = static_cast(handle); DataMatrix *dtr = static_cast(dtrain); bst->CheckInitModel(); bst->CheckInit(dtr); bst->UpdateOneIter(iter, *dtr); + API_END(); } -void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, bst_ulong len) { + +int XGBoosterBoostOneIter(BoosterHandle handle, + DMatrixHandle dtrain, + float *grad, + float *hess, + bst_ulong len) { + API_BEGIN(); Booster *bst = static_cast(handle); DataMatrix *dtr = static_cast(dtrain); bst->CheckInitModel(); bst->CheckInit(dtr); bst->BoostOneIter(*dtr, grad, hess, len); + API_END(); } -const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], bst_ulong len) { + +int XGBoosterEvalOneIter(BoosterHandle handle, + int iter, + DMatrixHandle dmats[], + const char *evnames[], + bst_ulong len, + const char **out_str) { + API_BEGIN(); Booster *bst = static_cast(handle); std::vector names; std::vector mats; @@ -317,32 +465,64 @@ const char* XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], } bst->CheckInitModel(); bst->eval_str = bst->EvalOneIter(iter, mats, names); - return bst->eval_str.c_str(); + *out_str = bst->eval_str.c_str(); + API_END(); } -const float *XGBoosterPredict(void *handle, void *dmat, int option_mask, - unsigned ntree_limit, bst_ulong *len) { - return static_cast(handle)->Pred(*static_cast(dmat), - option_mask, ntree_limit, len); + +int XGBoosterPredict(BoosterHandle handle, + DMatrixHandle dmat, + int option_mask, + unsigned ntree_limit, + bst_ulong *len, + const float **out_result) { + API_BEGIN(); + *out_result = static_cast(handle)-> + Pred(*static_cast(dmat), + option_mask, ntree_limit, len); + API_END(); } -void XGBoosterLoadModel(void *handle, const char *fname) { + +int XGBoosterLoadModel(BoosterHandle handle, const char *fname) { + API_BEGIN(); static_cast(handle)->LoadModel(fname); + API_END(); } -void XGBoosterSaveModel(void *handle, const char *fname) { + +int XGBoosterSaveModel(BoosterHandle handle, const char *fname) { + API_BEGIN(); Booster *bst = static_cast(handle); bst->CheckInitModel(); bst->SaveModel(fname, false); + API_END(); } -void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len) { + +int XGBoosterLoadModelFromBuffer(BoosterHandle handle, + const void *buf, + bst_ulong len) { + API_BEGIN(); static_cast(handle)->LoadModelFromBuffer(buf, len); + API_END(); } -const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len) { - return static_cast(handle)->GetModelRaw(out_len); + +int XGBoosterGetModelRaw(BoosterHandle handle, + bst_ulong *out_len, + const char **out_dptr) { + API_BEGIN(); + *out_dptr = static_cast(handle)->GetModelRaw(out_len); + API_END(); } -const char** XGBoosterDumpModel(void *handle, const char *fmap, - int with_stats, bst_ulong *len) { + +int XGBoosterDumpModel(BoosterHandle handle, + const char *fmap, + int with_stats, + bst_ulong *len, + const char ***out_models) { + API_BEGIN(); utils::FeatMap featmap; if (strlen(fmap) != 0) { featmap.LoadText(fmap); } - return static_cast(handle)->GetModelDump(featmap, with_stats != 0, len); + *out_models = static_cast(handle)->GetModelDump( + featmap, with_stats != 0, len); + API_END(); } diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h index 3540a3be0..6d3a619fb 100644 --- a/wrapper/xgboost_wrapper.h +++ b/wrapper/xgboost_wrapper.h @@ -8,234 +8,320 @@ #ifndef XGBOOST_WRAPPER_H_ #define XGBOOST_WRAPPER_H_ +#ifdef __cplusplus +#define XGB_EXTERN_C extern "C" +#endif + #if defined(_MSC_VER) || defined(_WIN32) -#define XGB_DLL __declspec(dllexport) +#define XGB_DLL XGB_EXTERN_C __declspec(dllexport) #else -#define XGB_DLL +#define XGB_DLL XGB_EXTERN_C #endif // manually define unsign long typedef unsigned long bst_ulong; // NOLINT(*) -#ifdef __cplusplus -extern "C" { -#endif - /*! - * \brief load a data matrix - * \param fname the name of the file - * \param silent whether print messages during loading - * \return a loaded data matrix - */ - XGB_DLL void* XGDMatrixCreateFromFile(const char *fname, int silent); - /*! - * \brief create a matrix content from csr format - * \param indptr pointer to row headers - * \param indices findex - * \param data fvalue - * \param nindptr number of rows in the matix + 1 - * \param nelem number of nonzero elements in the matrix - * \return created dmatrix - */ - XGB_DLL void* XGDMatrixCreateFromCSR(const bst_ulong *indptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem); - /*! - * \brief create a matrix content from CSC format - * \param col_ptr pointer to col headers - * \param indices findex - * \param data fvalue - * \param nindptr number of rows in the matix + 1 - * \param nelem number of nonzero elements in the matrix - * \return created dmatrix - */ - XGB_DLL void* XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, - const unsigned *indices, - const float *data, - bst_ulong nindptr, - bst_ulong nelem); - /*! - * \brief create matrix content from dense matrix - * \param data pointer to the data space - * \param nrow number of rows - * \param ncol number columns - * \param missing which value to represent missing value - * \return created dmatrix - */ - XGB_DLL void* XGDMatrixCreateFromMat(const float *data, - bst_ulong nrow, - bst_ulong ncol, - float missing); - /*! - * \brief create a new dmatrix from sliced content of existing matrix - * \param handle instance of data matrix to be sliced - * \param idxset index set - * \param len length of index set - * \return a sliced new matrix - */ - XGB_DLL void* XGDMatrixSliceDMatrix(void *handle, - const int *idxset, - bst_ulong len); - /*! - * \brief free space in data matrix - */ - XGB_DLL void XGDMatrixFree(void *handle); - /*! - * \brief load a data matrix into binary file - * \param handle a instance of data matrix - * \param fname file name - * \param silent print statistics when saving - */ - XGB_DLL void XGDMatrixSaveBinary(void *handle, const char *fname, int silent); - /*! - * \brief set float vector to a content in info - * \param handle a instance of data matrix - * \param field field name, can be label, weight - * \param array pointer to float vector - * \param len length of array - */ - XGB_DLL void XGDMatrixSetFloatInfo(void *handle, const char *field, - const float *array, bst_ulong len); - /*! - * \brief set uint32 vector to a content in info - * \param handle a instance of data matrix - * \param field field name - * \param array pointer to float vector - * \param len length of array - */ - XGB_DLL void XGDMatrixSetUIntInfo(void *handle, const char *field, - const unsigned *array, bst_ulong len); - /*! - * \brief set label of the training matrix - * \param handle a instance of data matrix - * \param group pointer to group size - * \param len length of array - */ - XGB_DLL void XGDMatrixSetGroup(void *handle, const unsigned *group, bst_ulong len); - /*! - * \brief get float info vector from matrix - * \param handle a instance of data matrix - * \param field field name - * \param out_len used to set result length - * \return pointer to the result - */ - XGB_DLL const float* XGDMatrixGetFloatInfo(const void *handle, - const char *field, bst_ulong* out_len); - /*! - * \brief get uint32 info vector from matrix - * \param handle a instance of data matrix - * \param field field name - * \param out_len used to set result length - * \return pointer to the result - */ - XGB_DLL const unsigned* XGDMatrixGetUIntInfo(const void *handle, - const char *field, bst_ulong* out_len); - /*! - * \brief return number of rows - */ - XGB_DLL bst_ulong XGDMatrixNumRow(const void *handle); - // --- start XGBoost class - /*! - * \brief create xgboost learner - * \param dmats matrices that are set to be cached - * \param len length of dmats - */ - XGB_DLL void *XGBoosterCreate(void* dmats[], bst_ulong len); - /*! - * \brief free obj in handle - * \param handle handle to be freed - */ - XGB_DLL void XGBoosterFree(void* handle); - /*! - * \brief set parameters - * \param handle handle - * \param name parameter name - * \param val value of parameter - */ - XGB_DLL void XGBoosterSetParam(void *handle, const char *name, const char *value); - /*! - * \brief update the model in one round using dtrain - * \param handle handle - * \param iter current iteration rounds - * \param dtrain training data - */ - XGB_DLL void XGBoosterUpdateOneIter(void *handle, int iter, void *dtrain); - /*! - * \brief update the model, by directly specify gradient and second order gradient, - * this can be used to replace UpdateOneIter, to support customized loss function - * \param handle handle - * \param dtrain training data - * \param grad gradient statistics - * \param hess second order gradient statistics - * \param len length of grad/hess array - */ - XGB_DLL void XGBoosterBoostOneIter(void *handle, void *dtrain, - float *grad, float *hess, bst_ulong len); - /*! - * \brief get evaluation statistics for xgboost - * \param handle handle - * \param iter current iteration rounds - * \param dmats pointers to data to be evaluated - * \param evnames pointers to names of each data - * \param len length of dmats - * \return the string containing evaluation stati - */ - XGB_DLL const char *XGBoosterEvalOneIter(void *handle, int iter, void *dmats[], - const char *evnames[], bst_ulong len); - /*! - * \brief make prediction based on dmat - * \param handle handle - * \param dmat data matrix - * \param option_mask bit-mask of options taken in prediction, possible values - * 0:normal prediction - * 1:output margin instead of transformed value - * 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree - * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees - * when the parameter is set to 0, we will use all the trees - * \param len used to store length of returning result - */ - XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, - int option_mask, - unsigned ntree_limit, - bst_ulong *len); - /*! - * \brief load model from existing file - * \param handle handle - * \param fname file name - */ - XGB_DLL void XGBoosterLoadModel(void *handle, const char *fname); - /*! - * \brief save model into existing file - * \param handle handle - * \param fname file name - */ - XGB_DLL void XGBoosterSaveModel(void *handle, const char *fname); - /*! - * \brief load model from in memory buffer - * \param handle handle - * \param buf pointer to the buffer - * \param len the length of the buffer - */ - XGB_DLL void XGBoosterLoadModelFromBuffer(void *handle, const void *buf, bst_ulong len); - /*! - * \brief save model into binary raw bytes, return header of the array - * user must copy the result out, before next xgboost call - * \param handle handle - * \param out_len the argument to hold the output length - * \return the pointer to the beginning of binary buffer - */ - XGB_DLL const char *XGBoosterGetModelRaw(void *handle, bst_ulong *out_len); - /*! - * \brief dump model, return array of strings representing model dump - * \param handle handle - * \param fmap name to fmap can be empty string - * \param with_stats whether to dump with statistics - * \param out_len length of output array - * \return char *data[], representing dump of each model - */ - XGB_DLL const char **XGBoosterDumpModel(void *handle, const char *fmap, - int with_stats, bst_ulong *out_len); -#ifdef __cplusplus -} -#endif +/*! \brief handle to DMatrix */ +typedef void *DMatrixHandle; +/*! \brief handle to Booster */ +typedef void *BoosterHandle; + +/*! + * \brief get string message of the last error + * + * all function in this file will return 0 when success + * and -1 when an error occured, + * XGBGetLastError can be called to retrieve the error + * + * this function is threadsafe and can be called by different thread + * \return const char* error inforomation + */ +XGB_DLL const char *XGBGetLastError(); + +/*! + * \brief load a data matrix + * \param fname the name of the file + * \param silent whether print messages during loading + * \param out a loaded data matrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixCreateFromFile(const char *fname, + int silent, + DMatrixHandle *out); + +/*! + * \brief create a matrix content from csr format + * \param indptr pointer to row headers + * \param indices findex + * \param data fvalue + * \param nindptr number of rows in the matix + 1 + * \param nelem number of nonzero elements in the matrix + * \param out created dmatrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixCreateFromCSR(const bst_ulong *indptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem, + DMatrixHandle *out); +/*! + * \brief create a matrix content from CSC format + * \param col_ptr pointer to col headers + * \param indices findex + * \param data fvalue + * \param nindptr number of rows in the matix + 1 + * \param nelem number of nonzero elements in the matrix + * \param out created dmatrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, + const unsigned *indices, + const float *data, + bst_ulong nindptr, + bst_ulong nelem, + DMatrixHandle *out); +/*! + * \brief create matrix content from dense matrix + * \param data pointer to the data space + * \param nrow number of rows + * \param ncol number columns + * \param missing which value to represent missing value + * \param out created dmatrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixCreateFromMat(const float *data, + bst_ulong nrow, + bst_ulong ncol, + float missing, + DMatrixHandle *out); +/*! + * \brief create a new dmatrix from sliced content of existing matrix + * \param handle instance of data matrix to be sliced + * \param idxset index set + * \param len length of index set + * \param out a sliced new matrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, + const int *idxset, + bst_ulong len, + DMatrixHandle *out); +/*! + * \brief free space in data matrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixFree(void *handle); +/*! + * \brief load a data matrix into binary file + * \param handle a instance of data matrix + * \param fname file name + * \param silent print statistics when saving + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle, + const char *fname, int silent); +/*! + * \brief set float vector to a content in info + * \param handle a instance of data matrix + * \param field field name, can be label, weight + * \param array pointer to float vector + * \param len length of array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, + const char *field, + const float *array, + bst_ulong len); +/*! + * \brief set uint32 vector to a content in info + * \param handle a instance of data matrix + * \param field field name + * \param array pointer to float vector + * \param len length of array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, + const char *field, + const unsigned *array, + bst_ulong len); +/*! + * \brief set label of the training matrix + * \param handle a instance of data matrix + * \param group pointer to group size + * \param len length of array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, + const unsigned *group, + bst_ulong len); +/*! + * \brief get float info vector from matrix + * \param handle a instance of data matrix + * \param field field name + * \param out_len used to set result length + * \param out_dptr pointer to the result + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, + const char *field, + bst_ulong* out_len, + const float **out_dptr); +/*! + * \brief get uint32 info vector from matrix + * \param handle a instance of data matrix + * \param field field name + * \param out_ptr pointer to the result + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle, + const char *field, + bst_ulong* out_len, + const unsigned **out_dptr); +/*! + * \brief get number of rows + * \param handle the handle to the DMatrix + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, + bst_ulong *out); +// --- start XGBoost class +/*! + * \brief create xgboost learner + * \param dmats matrices that are set to be cached + * \param len length of dmats + * \param out handle to the result booster + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterCreate(void* dmats[], + bst_ulong len, + BoosterHandle *out); +/*! + * \brief free obj in handle + * \param handle handle to be freed + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterFree(BoosterHandle handle); + +/*! + * \brief set parameters + * \param handle handle + * \param name parameter name + * \param val value of parameter + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterSetParam(BoosterHandle handle, + const char *name, + const char *value); +/*! + * \brief update the model in one round using dtrain + * \param handle handle + * \param iter current iteration rounds + * \param dtrain training data + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, + int iter, + DMatrixHandle dtrain); +/*! + * \brief update the model, by directly specify gradient and second order gradient, + * this can be used to replace UpdateOneIter, to support customized loss function + * \param handle handle + * \param dtrain training data + * \param grad gradient statistics + * \param hess second order gradient statistics + * \param len length of grad/hess array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, + DMatrixHandle dtrain, + float *grad, + float *hess, + bst_ulong len); +/*! + * \brief get evaluation statistics for xgboost + * \param handle handle + * \param iter current iteration rounds + * \param dmats pointers to data to be evaluated + * \param evnames pointers to names of each data + * \param len length of dmats + * \param out_result the string containing evaluation statistics + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, + int iter, + DMatrixHandle dmats[], + const char *evnames[], + bst_ulong len, + const char **out_result); +/*! + * \brief make prediction based on dmat + * \param handle handle + * \param dmat data matrix + * \param option_mask bit-mask of options taken in prediction, possible values + * 0:normal prediction + * 1:output margin instead of transformed value + * 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree + * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees + * when the parameter is set to 0, we will use all the trees + * \param out_len used to store length of returning result + * \param out_result used to set a pointer to array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterPredict(BoosterHandle handle, + DMatrixHandle dmat, + int option_mask, + unsigned ntree_limit, + bst_ulong *out_len, + const float **out_result); +/*! + * \brief load model from existing file + * \param handle handle + * \param fname file name +* \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, + const char *fname); +/*! + * \brief save model into existing file + * \param handle handle + * \param fname file name + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, + const char *fname); +/*! + * \brief load model from in memory buffer + * \param handle handle + * \param buf pointer to the buffer + * \param len the length of the buffer + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, + const void *buf, + bst_ulong len); +/*! + * \brief save model into binary raw bytes, return header of the array + * user must copy the result out, before next xgboost call + * \param handle handle + * \param out_len the argument to hold the output length + * \param out_dptr the argument to hold the output data pointer + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, + bst_ulong *out_len, + const char **out_dptr); +/*! + * \brief dump model, return array of strings representing model dump + * \param handle handle + * \param fmap name to fmap can be empty string + * \param with_stats whether to dump with statistics + * \param out_len length of output array + * \param out_dump_array pointer to hold representing dump of each model + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterDumpModel(BoosterHandle handle, + const char *fmap, + int with_stats, + bst_ulong *out_len, + const char ***out_dump_array); #endif // XGBOOST_WRAPPER_H_ From a735f8cb766e7c7e487cc26878f403844a413deb Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 4 Jul 2015 18:29:42 -0700 Subject: [PATCH 39/59] quick patch threadlocal --- wrapper/xgboost_wrapper.cpp | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index e1ce01119..18c1eae49 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -98,6 +98,31 @@ class Booster: public learner::BoostLearner { private: bool init_model; }; + +// helper to support threadlocal +struct ThreadLocalStore { + std::vector data; + // allocate a string + inline std::string *Alloc() { + mutex.Lock(); + data.push_back(new std::string()); + std::string *ret = data.back(); + mutex.Unlock(); + return ret; + } + ThreadLocalStore() { + mutex.Init(); + } + ~ThreadLocalStore() { + for (size_t i = 0; i < data.size(); ++i) { + delete data[i]; + } + mutex.Destroy(); + } + utils::Mutex mutex; +}; + +static ThreadLocalStore thread_local_store; } // namespace wrapper } // namespace xgboost @@ -137,11 +162,14 @@ using namespace xgboost::wrapper; */ const char *XGBSetGetLastError_(const char *str_set) { // use last_error to record last error - static XGB_TREAD_LOCAL std::string last_error; - if (str_set != NULL) { - last_error = str_set; + static XGB_TREAD_LOCAL std::string *last_error = NULL; + if (last_error == NULL) { + last_error = thread_local_store.Alloc(); } - return last_error.c_str(); + if (str_set != NULL) { + *last_error = str_set; + } + return last_error->c_str(); } /*! \brief return str message of the last error */ From f73bcd427dcdd3602458f2b1cb873085f1fd768b Mon Sep 17 00:00:00 2001 From: yanqingmen Date: Mon, 6 Jul 2015 02:32:58 -0700 Subject: [PATCH 40/59] update java wrapper for new fault handle API --- .../dmlc/xgboost4j/demo/BasicWalkThrough.java | 3 +- .../xgboost4j/demo/BoostFromPrediction.java | 3 +- .../dmlc/xgboost4j/demo/CrossValidation.java | 3 +- .../dmlc/xgboost4j/demo/CustomObjective.java | 25 +- .../dmlc/xgboost4j/demo/ExternalMemory.java | 3 +- .../demo/GeneralizedLinearModel.java | 3 +- .../xgboost4j/demo/PredictFirstNtree.java | 3 +- .../xgboost4j/demo/PredictLeafIndices.java | 3 +- .../dmlc/xgboost4j/demo/util/CustomEval.java | 12 +- .../dmlc/xgboost4j/demo/util/DataLoader.java | 6 +- .../main/java/org/dmlc/xgboost4j/Booster.java | 112 +++--- .../main/java/org/dmlc/xgboost4j/DMatrix.java | 86 +++-- .../java/org/dmlc/xgboost4j/util/CVPack.java | 15 +- .../org/dmlc/xgboost4j/util/ErrorHandle.java | 50 +++ .../java/org/dmlc/xgboost4j/util/Trainer.java | 6 +- .../org/dmlc/xgboost4j/util/XgboostError.java | 26 ++ .../dmlc/xgboost4j/wrapper/XgboostJNI.java | 52 +-- java/xgboost4j_wrapper.cpp | 340 ++++++++++-------- java/xgboost4j_wrapper.h | 136 +++---- 19 files changed, 558 insertions(+), 329 deletions(-) create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/ErrorHandle.java create mode 100644 java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XgboostError.java diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java index a0c7a3ae1..86ba49c48 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java @@ -31,6 +31,7 @@ import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.DataLoader; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.XgboostError; /** * a simple example of java wrapper for xgboost @@ -52,7 +53,7 @@ public class BasicWalkThrough { } - public static void main(String[] args) throws UnsupportedEncodingException, IOException { + public static void main(String[] args) throws UnsupportedEncodingException, IOException, XgboostError { // load file from text file, also binary buffer generated by xgboost4j DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java index 733c49503..1113eef68 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java @@ -23,13 +23,14 @@ import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.XgboostError; /** * example for start from a initial base prediction * @author hzx */ public class BoostFromPrediction { - public static void main(String[] args) { + public static void main(String[] args) throws XgboostError { System.out.println("start running example to start from a initial prediction"); // load file from text file, also binary buffer generated by xgboost4j diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java index 0c470bf17..ec5716700 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java @@ -19,13 +19,14 @@ import java.io.IOException; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.util.Trainer; import org.dmlc.xgboost4j.demo.util.Params; +import org.dmlc.xgboost4j.util.XgboostError; /** * an example of cross validation * @author hzx */ public class CrossValidation { - public static void main(String[] args) throws IOException { + public static void main(String[] args) throws IOException, XgboostError { //load train mat DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java index 03c9c4b52..4aaa053e0 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java @@ -19,12 +19,15 @@ import java.util.AbstractMap; import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.IEvaluation; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.IObjective; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.XgboostError; /** * an example user define objective and eval @@ -40,6 +43,8 @@ public class CustomObjective { * loglikelihoode loss obj function */ public static class LogRegObj implements IObjective { + private static final Log logger = LogFactory.getLog(LogRegObj.class); + /** * simple sigmoid func * @param input @@ -66,7 +71,13 @@ public class CustomObjective { public List getGradient(float[][] predicts, DMatrix dtrain) { int nrow = predicts.length; List gradients = new ArrayList<>(); - float[] labels = dtrain.getLabel(); + float[] labels; + try { + labels = dtrain.getLabel(); + } catch (XgboostError ex) { + logger.error(ex); + return null; + } float[] grad = new float[nrow]; float[] hess = new float[nrow]; @@ -93,6 +104,8 @@ public class CustomObjective { * Take this in mind when you use the customization, and maybe you need write customized evaluation function */ public static class EvalError implements IEvaluation { + private static final Log logger = LogFactory.getLog(EvalError.class); + String evalMetric = "custom_error"; public EvalError() { @@ -106,7 +119,13 @@ public class CustomObjective { @Override public float eval(float[][] predicts, DMatrix dmat) { float error = 0f; - float[] labels = dmat.getLabel(); + float[] labels; + try { + labels = dmat.getLabel(); + } catch (XgboostError ex) { + logger.error(ex); + return -1f; + } int nrow = predicts.length; for(int i=0; i0) { @@ -121,7 +140,7 @@ public class CustomObjective { } } - public static void main(String[] args) { + public static void main(String[] args) throws XgboostError { //load train mat (svmlight format) DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); //load valid mat (svmlight format) diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java index 6ac687289..e74e3e858 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java @@ -23,13 +23,14 @@ import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.XgboostError; /** * simple example for using external memory version * @author hzx */ public class ExternalMemory { - public static void main(String[] args) { + public static void main(String[] args) throws XgboostError { //this is the only difference, add a # followed by a cache prefix name //several cache file with the prefix will be generated //currently only support convert from libsvm file diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java index 2a20edbff..db3cd0e59 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java @@ -24,6 +24,7 @@ import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.CustomEval; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; +import org.dmlc.xgboost4j.util.XgboostError; /** * this is an example of fit generalized linear model in xgboost @@ -31,7 +32,7 @@ import org.dmlc.xgboost4j.util.Trainer; * @author hzx */ public class GeneralizedLinearModel { - public static void main(String[] args) { + public static void main(String[] args) throws XgboostError { // load file from text file, also binary buffer generated by xgboost4j DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java index 8e3f3abfb..6bcf67f86 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java @@ -25,13 +25,14 @@ import org.dmlc.xgboost4j.util.Trainer; import org.dmlc.xgboost4j.demo.util.CustomEval; import org.dmlc.xgboost4j.demo.util.Params; +import org.dmlc.xgboost4j.util.XgboostError; /** * predict first ntree * @author hzx */ public class PredictFirstNtree { - public static void main(String[] args) { + public static void main(String[] args) throws XgboostError { // load file from text file, also binary buffer generated by xgboost4j DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java index 697f40379..61026a6b8 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java @@ -24,13 +24,14 @@ import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.util.Trainer; import org.dmlc.xgboost4j.demo.util.Params; +import org.dmlc.xgboost4j.util.XgboostError; /** * predict leaf indices * @author hzx */ public class PredictLeafIndices { - public static void main(String[] args) { + public static void main(String[] args) throws XgboostError { // load file from text file, also binary buffer generated by xgboost4j DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java index ad3a9124b..116c06ddf 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java @@ -15,14 +15,18 @@ */ package org.dmlc.xgboost4j.demo.util; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.IEvaluation; +import org.dmlc.xgboost4j.util.XgboostError; /** * a util evaluation class for examples * @author hzx */ public class CustomEval implements IEvaluation { + private static final Log logger = LogFactory.getLog(CustomEval.class); String evalMetric = "custom_error"; @@ -34,7 +38,13 @@ public class CustomEval implements IEvaluation { @Override public float eval(float[][] predicts, DMatrix dmat) { float error = 0f; - float[] labels = dmat.getLabel(); + float[] labels; + try { + labels = dmat.getLabel(); + } catch (XgboostError ex) { + logger.error(ex); + return -1f; + } int nrow = predicts.length; for(int i=0; i0.5) { diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/DataLoader.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/DataLoader.java index 0a020c761..9bad8b372 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/DataLoader.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/DataLoader.java @@ -77,10 +77,8 @@ public class DataLoader { reader.close(); in.close(); - Float[] flabels = (Float[]) tlabels.toArray(); - denseData.labels = ArrayUtils.toPrimitive(flabels); - Float[] fdata = (Float[]) tdata.toArray(); - denseData.data = ArrayUtils.toPrimitive(fdata); + denseData.labels = ArrayUtils.toPrimitive(tlabels.toArray(new Float[tlabels.size()])); + denseData.data = ArrayUtils.toPrimitive(tdata.toArray(new Float[tdata.size()])); return denseData; } diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java index c5d8b1006..0f296241b 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java @@ -30,6 +30,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dmlc.xgboost4j.util.Initializer; +import org.dmlc.xgboost4j.util.ErrorHandle; +import org.dmlc.xgboost4j.util.XgboostError; import org.dmlc.xgboost4j.wrapper.XgboostJNI; @@ -57,8 +59,9 @@ public final class Booster { * init Booster from dMatrixs * @param params parameters * @param dMatrixs DMatrix array + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public Booster(Iterable> params, DMatrix[] dMatrixs) { + public Booster(Iterable> params, DMatrix[] dMatrixs) throws XgboostError { init(dMatrixs); setParam("seed","0"); setParams(params); @@ -70,9 +73,11 @@ public final class Booster { * load model from modelPath * @param params parameters * @param modelPath booster modelPath (model generated by booster.saveModel) + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public Booster(Iterable> params, String modelPath) { - handle = XgboostJNI.XGBoosterCreate(new long[] {}); + public Booster(Iterable> params, String modelPath) throws XgboostError { + long[] out = new long[1]; + init(null); loadModel(modelPath); setParam("seed","0"); setParams(params); @@ -81,28 +86,33 @@ public final class Booster { - private void init(DMatrix[] dMatrixs) { + private void init(DMatrix[] dMatrixs) throws XgboostError { long[] handles = null; if(dMatrixs != null) { handles = dMatrixs2handles(dMatrixs); } - handle = XgboostJNI.XGBoosterCreate(handles); + long[] out = new long[1]; + ErrorHandle.checkCall(XgboostJNI.XGBoosterCreate(handles, out)); + + handle = out[0]; } /** * set parameter * @param key param name * @param value param value + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public final void setParam(String key, String value) { - XgboostJNI.XGBoosterSetParam(handle, key, value); + public final void setParam(String key, String value) throws XgboostError { + ErrorHandle.checkCall(XgboostJNI.XGBoosterSetParam(handle, key, value)); } /** * set parameters * @param params parameters key-value map + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void setParams(Iterable> params) { + public void setParams(Iterable> params) throws XgboostError { if(params!=null) { for(Map.Entry entry : params) { setParam(entry.getKey(), entry.getValue().toString()); @@ -115,9 +125,10 @@ public final class Booster { * Update (one iteration) * @param dtrain training data * @param iter current iteration number + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void update(DMatrix dtrain, int iter) { - XgboostJNI.XGBoosterUpdateOneIter(handle, iter, dtrain.getHandle()); + public void update(DMatrix dtrain, int iter) throws XgboostError { + ErrorHandle.checkCall(XgboostJNI.XGBoosterUpdateOneIter(handle, iter, dtrain.getHandle())); } /** @@ -125,8 +136,9 @@ public final class Booster { * @param dtrain training data * @param iter current iteration number * @param obj customized objective class + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void update(DMatrix dtrain, int iter, IObjective obj) { + public void update(DMatrix dtrain, int iter, IObjective obj) throws XgboostError { float[][] predicts = predict(dtrain, true); List gradients = obj.getGradient(predicts, dtrain); boost(dtrain, gradients.get(0), gradients.get(1)); @@ -137,12 +149,13 @@ public final class Booster { * @param dtrain training data * @param grad first order of gradient * @param hess seconde order of gradient + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void boost(DMatrix dtrain, float[] grad, float[] hess) { + public void boost(DMatrix dtrain, float[] grad, float[] hess) throws XgboostError { if(grad.length != hess.length) { throw new AssertionError(String.format("grad/hess length mismatch %s / %s", grad.length, hess.length)); } - XgboostJNI.XGBoosterBoostOneIter(handle, dtrain.getHandle(), grad, hess); + ErrorHandle.checkCall(XgboostJNI.XGBoosterBoostOneIter(handle, dtrain.getHandle(), grad, hess)); } /** @@ -151,11 +164,13 @@ public final class Booster { * @param evalNames name for eval dmatrixs, used for check results * @param iter current eval iteration * @return eval information + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) { + public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throws XgboostError { long[] handles = dMatrixs2handles(evalMatrixs); - String evalInfo = XgboostJNI.XGBoosterEvalOneIter(handle, iter, handles, evalNames); - return evalInfo; + String[] evalInfo = new String[1]; + ErrorHandle.checkCall(XgboostJNI.XGBoosterEvalOneIter(handle, iter, handles, evalNames, evalInfo)); + return evalInfo[0]; } /** @@ -165,8 +180,9 @@ public final class Booster { * @param iter * @param eval * @return eval information + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, IEvaluation eval) { + public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, IEvaluation eval) throws XgboostError { String evalInfo = ""; for(int i=0; i getFeatureScore() { + public Map getFeatureScore() throws XgboostError { String[] modelInfos = getDumpInfo(false); Map featureScore = new HashMap<>(); for(String tree : modelInfos) { @@ -400,8 +431,9 @@ public final class Booster { * get importance of each feature * @param featureMap file to save dumped model info * @return featureMap key: feature index, value: feature importance score + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public Map getFeatureScore(String featureMap) { + public Map getFeatureScore(String featureMap) throws XgboostError { String[] modelInfos = getDumpInfo(featureMap, false); Map featureScore = new HashMap<>(); for(String tree : modelInfos) { diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java index ebeb80a46..b056cad09 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java @@ -18,6 +18,8 @@ package org.dmlc.xgboost4j; import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.dmlc.xgboost4j.util.ErrorHandle; +import org.dmlc.xgboost4j.util.XgboostError; import org.dmlc.xgboost4j.util.Initializer; import org.dmlc.xgboost4j.wrapper.XgboostJNI; @@ -50,9 +52,12 @@ public class DMatrix { /** * init DMatrix from file (svmlight format) * @param dataPath + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public DMatrix(String dataPath) { - handle = XgboostJNI.XGDMatrixCreateFromFile(dataPath, 1); + public DMatrix(String dataPath) throws XgboostError { + long[] out = new long[1]; + ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromFile(dataPath, 1, out)); + handle = out[0]; } /** @@ -61,17 +66,20 @@ public class DMatrix { * @param indices Indices (colIndexs for CSR or rowIndexs for CSC) * @param data non zero values (sequence by row for CSR or by col for CSC) * @param st sparse matrix type (CSR or CSC) + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public DMatrix(long[] headers, int[] indices, float[] data, SparseType st) { + public DMatrix(long[] headers, int[] indices, float[] data, SparseType st) throws XgboostError { + long[] out = new long[1]; if(st == SparseType.CSR) { - handle = XgboostJNI.XGDMatrixCreateFromCSR(headers, indices, data); + ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromCSR(headers, indices, data, out)); } else if(st == SparseType.CSC) { - handle = XgboostJNI.XGDMatrixCreateFromCSC(headers, indices, data); + ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromCSC(headers, indices, data, out)); } else { throw new UnknownError("unknow sparsetype"); } + handle = out[0]; } /** @@ -79,10 +87,13 @@ public class DMatrix { * @param data data values * @param nrow number of rows * @param ncol number of columns + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public DMatrix(float[] data, int nrow, int ncol) { - handle = XgboostJNI.XGDMatrixCreateFromMat(data, nrow, ncol, 0.0f); - } + public DMatrix(float[] data, int nrow, int ncol) throws XgboostError { + long[] out = new long[1]; + ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromMat(data, nrow, ncol, 0.0f, out)); + handle = out[0]; + } /** * used for DMatrix slice @@ -98,33 +109,36 @@ public class DMatrix { * set label of dmatrix * @param labels */ - public void setLabel(float[] labels) { - XgboostJNI.XGDMatrixSetFloatInfo(handle, "label", labels); + public void setLabel(float[] labels) throws XgboostError { + ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "label", labels)); } /** * set weight of each instance * @param weights + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void setWeight(float[] weights) { - XgboostJNI.XGDMatrixSetFloatInfo(handle, "weight", weights); + public void setWeight(float[] weights) throws XgboostError { + ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "weight", weights)); } /** * if specified, xgboost will start from this init margin * can be used to specify initial prediction to boost from * @param baseMargin + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void setBaseMargin(float[] baseMargin) { - XgboostJNI.XGDMatrixSetFloatInfo(handle, "base_margin", baseMargin); + public void setBaseMargin(float[] baseMargin) throws XgboostError { + ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "base_margin", baseMargin)); } /** * if specified, xgboost will start from this init margin * can be used to specify initial prediction to boost from * @param baseMargin + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void setBaseMargin(float[][] baseMargin) { + public void setBaseMargin(float[][] baseMargin) throws XgboostError { float[] flattenMargin = flatten(baseMargin); setBaseMargin(flattenMargin); } @@ -132,42 +146,48 @@ public class DMatrix { /** * Set group sizes of DMatrix (used for ranking) * @param group + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void setGroup(int[] group) { - XgboostJNI.XGDMatrixSetGroup(handle, group); + public void setGroup(int[] group) throws XgboostError { + ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetGroup(handle, group)); } - private float[] getFloatInfo(String field) { - float[] infos = XgboostJNI.XGDMatrixGetFloatInfo(handle, field); - return infos; + private float[] getFloatInfo(String field) throws XgboostError { + float[][] infos = new float[1][]; + ErrorHandle.checkCall(XgboostJNI.XGDMatrixGetFloatInfo(handle, field, infos)); + return infos[0]; } - private int[] getIntInfo(String field) { - int[] infos = XgboostJNI.XGDMatrixGetUIntInfo(handle, field); - return infos; + private int[] getIntInfo(String field) throws XgboostError { + int[][] infos = new int[1][]; + ErrorHandle.checkCall(XgboostJNI.XGDMatrixGetUIntInfo(handle, field, infos)); + return infos[0]; } /** * get label values * @return label + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public float[] getLabel() { + public float[] getLabel() throws XgboostError { return getFloatInfo("label"); } /** * get weight of the DMatrix * @return weights + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public float[] getWeight() { + public float[] getWeight() throws XgboostError { return getFloatInfo("weight"); } /** * get base margin of the DMatrix * @return base margin + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public float[] getBaseMargin() { + public float[] getBaseMargin() throws XgboostError { return getFloatInfo("base_margin"); } @@ -175,9 +195,12 @@ public class DMatrix { * Slice the DMatrix and return a new DMatrix that only contains `rowIndex`. * @param rowIndex * @return sliced new DMatrix + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public DMatrix slice(int[] rowIndex) { - long sHandle = XgboostJNI.XGDMatrixSliceDMatrix(handle, rowIndex); + public DMatrix slice(int[] rowIndex) throws XgboostError { + long[] out = new long[1]; + ErrorHandle.checkCall(XgboostJNI.XGDMatrixSliceDMatrix(handle, rowIndex, out)); + long sHandle = out[0]; DMatrix sMatrix = new DMatrix(sHandle); return sMatrix; } @@ -185,9 +208,12 @@ public class DMatrix { /** * get the row number of DMatrix * @return number of rows + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public long rowNum() { - return XgboostJNI.XGDMatrixNumRow(handle); + public long rowNum() throws XgboostError { + long[] rowNum = new long[1]; + ErrorHandle.checkCall(XgboostJNI.XGDMatrixNumRow(handle,rowNum)); + return rowNum[0]; } /** diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java index 3e67dc669..33be48b53 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java @@ -37,8 +37,9 @@ public class CVPack { * @param dtrain train data * @param dtest test data * @param params parameters + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public CVPack(DMatrix dtrain, DMatrix dtest, Iterable> params) { + public CVPack(DMatrix dtrain, DMatrix dtest, Iterable> params) throws XgboostError { dmats = new DMatrix[] {dtrain, dtest}; booster = new Booster(params, dmats); names = new String[] {"train", "test"}; @@ -49,8 +50,9 @@ public class CVPack { /** * update one iteration * @param iter iteration num + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void update(int iter) { + public void update(int iter) throws XgboostError { booster.update(dtrain, iter); } @@ -58,8 +60,9 @@ public class CVPack { * update one iteration * @param iter iteration num * @param obj customized objective + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public void update(int iter, IObjective obj) { + public void update(int iter, IObjective obj) throws XgboostError { booster.update(dtrain, iter, obj); } @@ -67,8 +70,9 @@ public class CVPack { * evaluation * @param iter iteration num * @return + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public String eval(int iter) { + public String eval(int iter) throws XgboostError { return booster.evalSet(dmats, names, iter); } @@ -77,8 +81,9 @@ public class CVPack { * @param iter iteration num * @param eval customized eval * @return + * @throws org.dmlc.xgboost4j.util.XgboostError */ - public String eval(int iter, IEvaluation eval) { + public String eval(int iter, IEvaluation eval) throws XgboostError { return booster.evalSet(dmats, names, iter, eval); } } diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/ErrorHandle.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/ErrorHandle.java new file mode 100644 index 000000000..5093eb1db --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/ErrorHandle.java @@ -0,0 +1,50 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.util; + +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dmlc.xgboost4j.wrapper.XgboostJNI; + +/** + * error handle for Xgboost + * @author hzx + */ +public class ErrorHandle { + private static final Log logger = LogFactory.getLog(ErrorHandle.class); + + //load native library + static { + try { + Initializer.InitXgboost(); + } catch (IOException ex) { + logger.error("load native library failed."); + logger.error(ex); + } + } + + /** + * check the return value of C API + * @param ret return valud of xgboostJNI C API call + * @throws org.dmlc.xgboost4j.util.XgboostError + */ + public static void checkCall(int ret) throws XgboostError { + if(ret != 0) { + throw new XgboostError(XgboostJNI.XGBGetLastError()); + } + } +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java index 8a336b1a8..a81963da7 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java @@ -47,7 +47,7 @@ public class Trainer { * @return trained booster */ public static Booster train(Iterable> params, DMatrix dtrain, int round, - Iterable> watchs, IObjective obj, IEvaluation eval) { + Iterable> watchs, IObjective obj, IEvaluation eval) throws XgboostError { //collect eval matrixs String[] evalNames; @@ -112,7 +112,7 @@ public class Trainer { * @param eval customized evaluation (set to null if not used) * @return evaluation history */ - public static String[] crossValiation(Iterable> params, DMatrix data, int round, int nfold, String[] metrics, IObjective obj, IEvaluation eval) { + public static String[] crossValiation(Iterable> params, DMatrix data, int round, int nfold, String[] metrics, IObjective obj, IEvaluation eval) throws XgboostError { CVPack[] cvPacks = makeNFold(data, nfold, params, metrics); String[] evalHist = new String[round]; String[] results = new String[cvPacks.length]; @@ -149,7 +149,7 @@ public class Trainer { * @param evalMetrics Evaluation metrics * @return CV package array */ - public static CVPack[] makeNFold(DMatrix data, int nfold, Iterable> params, String[] evalMetrics) { + public static CVPack[] makeNFold(DMatrix data, int nfold, Iterable> params, String[] evalMetrics) throws XgboostError { List samples = genRandPermutationNums(0, (int) data.rowNum()); int step = samples.size()/nfold; int[] testSlice = new int[step]; diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XgboostError.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XgboostError.java new file mode 100644 index 000000000..8dabcee4b --- /dev/null +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XgboostError.java @@ -0,0 +1,26 @@ +/* + Copyright (c) 2014 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package org.dmlc.xgboost4j.util; + +/** + * custom error class for xgboost + * @author hzx + */ +public class XgboostError extends Exception{ + public XgboostError(String message) { + super(message); + } +} diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/wrapper/XgboostJNI.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/wrapper/XgboostJNI.java index 96a429c07..fe181347a 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/wrapper/XgboostJNI.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/wrapper/XgboostJNI.java @@ -17,32 +17,34 @@ package org.dmlc.xgboost4j.wrapper; /** * xgboost jni wrapper functions for xgboost_wrapper.h + * change 2015-7-6: *use a long[] (length=1) as container of handle to get the output DMatrix or Booster * @author hzx */ public class XgboostJNI { - public final static native long XGDMatrixCreateFromFile(String fname, int silent); - public final static native long XGDMatrixCreateFromCSR(long[] indptr, int[] indices, float[] data); - public final static native long XGDMatrixCreateFromCSC(long[] colptr, int[] indices, float[] data); - public final static native long XGDMatrixCreateFromMat(float[] data, int nrow, int ncol, float missing); - public final static native long XGDMatrixSliceDMatrix(long handle, int[] idxset); - public final static native void XGDMatrixFree(long handle); - public final static native void XGDMatrixSaveBinary(long handle, String fname, int silent); - public final static native void XGDMatrixSetFloatInfo(long handle, String field, float[] array); - public final static native void XGDMatrixSetUIntInfo(long handle, String field, int[] array); - public final static native void XGDMatrixSetGroup(long handle, int[] group); - public final static native float[] XGDMatrixGetFloatInfo(long handle, String field); - public final static native int[] XGDMatrixGetUIntInfo(long handle, String filed); - public final static native long XGDMatrixNumRow(long handle); - public final static native long XGBoosterCreate(long[] handles); - public final static native void XGBoosterFree(long handle); - public final static native void XGBoosterSetParam(long handle, String name, String value); - public final static native void XGBoosterUpdateOneIter(long handle, int iter, long dtrain); - public final static native void XGBoosterBoostOneIter(long handle, long dtrain, float[] grad, float[] hess); - public final static native String XGBoosterEvalOneIter(long handle, int iter, long[] dmats, String[] evnames); - public final static native float[] XGBoosterPredict(long handle, long dmat, int option_mask, long ntree_limit); - public final static native void XGBoosterLoadModel(long handle, String fname); - public final static native void XGBoosterSaveModel(long handle, String fname); - public final static native void XGBoosterLoadModelFromBuffer(long handle, long buf, long len); - public final static native String XGBoosterGetModelRaw(long handle); - public final static native String[] XGBoosterDumpModel(long handle, String fmap, int with_stats); + public final static native String XGBGetLastError(); + public final static native int XGDMatrixCreateFromFile(String fname, int silent, long[] out); + public final static native int XGDMatrixCreateFromCSR(long[] indptr, int[] indices, float[] data, long[] out); + public final static native int XGDMatrixCreateFromCSC(long[] colptr, int[] indices, float[] data, long[] out); + public final static native int XGDMatrixCreateFromMat(float[] data, int nrow, int ncol, float missing, long[] out); + public final static native int XGDMatrixSliceDMatrix(long handle, int[] idxset, long[] out); + public final static native int XGDMatrixFree(long handle); + public final static native int XGDMatrixSaveBinary(long handle, String fname, int silent); + public final static native int XGDMatrixSetFloatInfo(long handle, String field, float[] array); + public final static native int XGDMatrixSetUIntInfo(long handle, String field, int[] array); + public final static native int XGDMatrixSetGroup(long handle, int[] group); + public final static native int XGDMatrixGetFloatInfo(long handle, String field, float[][] info); + public final static native int XGDMatrixGetUIntInfo(long handle, String filed, int[][] info); + public final static native int XGDMatrixNumRow(long handle, long[] row); + public final static native int XGBoosterCreate(long[] handles, long[] out); + public final static native int XGBoosterFree(long handle); + public final static native int XGBoosterSetParam(long handle, String name, String value); + public final static native int XGBoosterUpdateOneIter(long handle, int iter, long dtrain); + public final static native int XGBoosterBoostOneIter(long handle, long dtrain, float[] grad, float[] hess); + public final static native int XGBoosterEvalOneIter(long handle, int iter, long[] dmats, String[] evnames, String[] eval_info); + public final static native int XGBoosterPredict(long handle, long dmat, int option_mask, long ntree_limit, float[][] predicts); + public final static native int XGBoosterLoadModel(long handle, String fname); + public final static native int XGBoosterSaveModel(long handle, String fname); + public final static native int XGBoosterLoadModelFromBuffer(long handle, long buf, long len); + public final static native int XGBoosterGetModelRaw(long handle, String[] out_string); + public final static native int XGBoosterDumpModel(long handle, String fmap, int with_stats, String[][] out_strings); } diff --git a/java/xgboost4j_wrapper.cpp b/java/xgboost4j_wrapper.cpp index 55dc31bc8..f1e749982 100644 --- a/java/xgboost4j_wrapper.cpp +++ b/java/xgboost4j_wrapper.cpp @@ -16,21 +16,34 @@ #include "../wrapper/xgboost_wrapper.h" #include "xgboost4j_wrapper.h" -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile - (JNIEnv *jenv, jclass jcls, jstring jfname, jint jsilent) { - jlong jresult = 0 ; +JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBGetLastError + (JNIEnv *jenv, jclass jcls) { + jstring jresult = 0 ; + char* result = 0; + result = (char *)XGBGetLastError(); + if (result) jresult = jenv->NewStringUTF((const char *)result); + return jresult; +} + +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile + (JNIEnv *jenv, jclass jcls, jstring jfname, jint jsilent, jlongArray jout) { + jint jresult = 0 ; char *fname = (char *) 0 ; int silent; - void *result = 0 ; - fname = 0; - if (jfname) { - fname = (char *)jenv->GetStringUTFChars(jfname, 0); - if (!fname) return 0; - } + void* result[1]; + unsigned long out[1]; + + fname = (char *)jenv->GetStringUTFChars(jfname, 0); + silent = (int)jsilent; - result = (void *)XGDMatrixCreateFromFile((char const *)fname, silent); - *(void **)&jresult = result; + jresult = (jint) XGDMatrixCreateFromFile((char const *)fname, silent, result); + + + *(void **)&out[0] = *result; + if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname); + + jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out); return jresult; } @@ -39,12 +52,13 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCrea * Method: XGDMatrixCreateFromCSR * Signature: ([J[J[F)J */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR - (JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata) { - jlong jresult = 0 ; +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR + (JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jlongArray jout) { + jint jresult = 0 ; bst_ulong nindptr ; bst_ulong nelem; - void *result = 0 ; + void *result[1]; + unsigned long out[1]; jlong* indptr = jenv->GetLongArrayElements(jindptr, 0); jint* indices = jenv->GetIntArrayElements(jindices, 0); @@ -52,8 +66,9 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCrea nindptr = (bst_ulong)jenv->GetArrayLength(jindptr); nelem = (bst_ulong)jenv->GetArrayLength(jdata); - result = (void *)XGDMatrixCreateFromCSR((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem); - *(void **)&jresult = result; + jresult = (jint) XGDMatrixCreateFromCSR((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem, result); + *(void **)&out[0] = *result; + jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out); //release jenv->ReleaseLongArrayElements(jindptr, indptr, 0); @@ -68,12 +83,13 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCrea * Method: XGDMatrixCreateFromCSC * Signature: ([J[J[F)J */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC - (JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata) { - jlong jresult = 0 ; +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC + (JNIEnv *jenv, jclass jcls, jlongArray jindptr, jintArray jindices, jfloatArray jdata, jlongArray jout) { + jint jresult = 0; bst_ulong nindptr ; bst_ulong nelem; - void *result = 0 ; + void *result[1]; + unsigned long out[1]; jlong* indptr = jenv->GetLongArrayElements(jindptr, NULL); jint* indices = jenv->GetIntArrayElements(jindices, 0); @@ -81,8 +97,9 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCrea nindptr = (bst_ulong)jenv->GetArrayLength(jindptr); nelem = (bst_ulong)jenv->GetArrayLength(jdata); - result = (void *)XGDMatrixCreateFromCSC((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem); - *(void **)&jresult = result; + jresult = (jint) XGDMatrixCreateFromCSC((unsigned long const *)indptr, (unsigned int const *)indices, (float const *)data, nindptr, nelem, result); + *(void **)&out[0] = *result; + jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out); //release jenv->ReleaseLongArrayElements(jindptr, indptr, 0); @@ -97,21 +114,24 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCrea * Method: XGDMatrixCreateFromMat * Signature: ([FIIF)J */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat - (JNIEnv *jenv, jclass jcls, jfloatArray jdata, jint jnrow, jint jncol, jfloat jmiss) { - jlong jresult = 0 ; +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat + (JNIEnv *jenv, jclass jcls, jfloatArray jdata, jint jnrow, jint jncol, jfloat jmiss, jlongArray jout) { + jint jresult = 0 ; bst_ulong nrow ; bst_ulong ncol ; float miss ; - void *result = 0 ; + void *result[1]; + unsigned long out[1]; jfloat* data = jenv->GetFloatArrayElements(jdata, 0); nrow = (bst_ulong)jnrow; ncol = (bst_ulong)jncol; miss = (float)jmiss; - result = (void *)XGDMatrixCreateFromMat((float const *)data, nrow, ncol, miss); - *(void **)&jresult = result; + + jresult = (jint) XGDMatrixCreateFromMat((float const *)data, nrow, ncol, miss, result); + *(void **)&out[0] = *result; + jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out); //release jenv->ReleaseFloatArrayElements(jdata, data, 0); @@ -124,19 +144,21 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCrea * Method: XGDMatrixSliceDMatrix * Signature: (J[I)J */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix - (JNIEnv *jenv, jclass jcls, jlong jhandle, jintArray jindexset) { - jlong jresult = 0 ; +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix + (JNIEnv *jenv, jclass jcls, jlong jhandle, jintArray jindexset, jlongArray jout) { + jint jresult = 0 ; void *handle = (void *) 0 ; bst_ulong len; - void *result = 0 ; + void *result[1]; + unsigned long out[1]; jint* indexset = jenv->GetIntArrayElements(jindexset, 0); handle = *(void **)&jhandle; len = (bst_ulong)jenv->GetArrayLength(jindexset); - result = (void *)XGDMatrixSliceDMatrix(handle, (int const *)indexset, len); - *(void **)&jresult = result; + jresult = (jint) XGDMatrixSliceDMatrix(handle, (int const *)indexset, len, result); + *(void **)&out[0] = *result; + jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out); //release jenv->ReleaseIntArrayElements(jindexset, indexset, 0); @@ -149,11 +171,13 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSlic * Method: XGDMatrixFree * Signature: (J)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree (JNIEnv *jenv, jclass jcls, jlong jhandle) { + jint jresult = 0; void *handle = (void *) 0 ; handle = *(void **)&jhandle; - XGDMatrixFree(handle); + jresult = (jint) XGDMatrixFree(handle); + return jresult; } /* @@ -161,20 +185,21 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree * Method: XGDMatrixSaveBinary * Signature: (JLjava/lang/String;I)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname, jint jsilent) { + jint jresult = 0; void *handle = (void *) 0 ; char *fname = (char *) 0 ; int silent ; handle = *(void **)&jhandle; fname = 0; - if (jfname) { - fname = (char *)jenv->GetStringUTFChars(jfname, 0); - if (!fname) return ; - } + fname = (char *)jenv->GetStringUTFChars(jfname, 0); + silent = (int)jsilent; - XGDMatrixSaveBinary(handle, (char const *)fname, silent); + jresult = (jint) XGDMatrixSaveBinary(handle, (char const *)fname, silent); if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname); + + return jresult; } /* @@ -182,27 +207,28 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveB * Method: XGDMatrixSetFloatInfo * Signature: (JLjava/lang/String;[F)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jfloatArray jarray) { + jint jresult = 0; void *handle = (void *) 0 ; char *field = (char *) 0 ; bst_ulong len; handle = *(void **)&jhandle; - field = 0; - if (jfield) { - field = (char *)jenv->GetStringUTFChars(jfield, 0); - if (!field) return ; - } + + field = (char *)jenv->GetStringUTFChars(jfield, 0); + jfloat* array = jenv->GetFloatArrayElements(jarray, NULL); len = (bst_ulong)jenv->GetArrayLength(jarray); - XGDMatrixSetFloatInfo(handle, (char const *)field, (float const *)array, len); + jresult = (jint) XGDMatrixSetFloatInfo(handle, (char const *)field, (float const *)array, len); //release if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); jenv->ReleaseFloatArrayElements(jarray, array, 0); + + return jresult; } /* @@ -210,25 +236,26 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFl * Method: XGDMatrixSetUIntInfo * Signature: (JLjava/lang/String;[I)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jintArray jarray) { + jint jresult = 0; void *handle = (void *) 0 ; char *field = (char *) 0 ; bst_ulong len ; handle = *(void **)&jhandle; field = 0; - if (jfield) { - field = (char *)jenv->GetStringUTFChars(jfield, 0); - if (!field) return ; - } + field = (char *)jenv->GetStringUTFChars(jfield, 0); + jint* array = jenv->GetIntArrayElements(jarray, NULL); len = (bst_ulong)jenv->GetArrayLength(jarray); - XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len); + jresult = (jint) XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len); //release if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); jenv->ReleaseIntArrayElements(jarray, array, 0); + + return jresult; } /* @@ -236,8 +263,9 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUI * Method: XGDMatrixSetGroup * Signature: (J[I)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup (JNIEnv * jenv, jclass jcls, jlong jhandle, jintArray jarray) { + jint jresult = 0; void *handle = (void *) 0 ; bst_ulong len ; @@ -245,11 +273,12 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGr jint* array = jenv->GetIntArrayElements(jarray, NULL); len = (bst_ulong)jenv->GetArrayLength(jarray); - XGDMatrixSetGroup(handle, (unsigned int const *)array, len); + jresult = (jint) XGDMatrixSetGroup(handle, (unsigned int const *)array, len); //release jenv->ReleaseIntArrayElements(jarray, array, 0); - + + return jresult; } /* @@ -257,28 +286,31 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGr * Method: XGDMatrixGetFloatInfo * Signature: (JLjava/lang/String;)[F */ -JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo - (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield) { +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) { + jint jresult = 0; void *handle = (void *) 0 ; char *field = (char *) 0 ; bst_ulong len[1]; *len = 0; - float *result = 0 ; + float *result[1]; - handle = *(void **)&jhandle; + handle = *(void **)&jhandle; field = 0; if (jfield) { field = (char *)jenv->GetStringUTFChars(jfield, 0); if (!field) return 0; } - result = (float *)XGDMatrixGetFloatInfo((void const *)handle, (char const *)field, len); + jresult = (jint) XGDMatrixGetFloatInfo(handle, (char const *)field, len, (const float **) result); if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); jsize jlen = (jsize)*len; - jfloatArray jresult = jenv->NewFloatArray(jlen); - jenv->SetFloatArrayRegion(jresult, 0, jlen, (jfloat *)result); + jfloatArray jarray = jenv->NewFloatArray(jlen); + jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) *result); + jenv->SetObjectArrayElement(jout, 0, (jobject) jarray); + return jresult; } @@ -287,28 +319,26 @@ JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatr * Method: XGDMatrixGetUIntInfo * Signature: (JLjava/lang/String;)[I */ -JNIEXPORT jintArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo - (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield) { +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfield, jobjectArray jout) { + jint jresult = 0; void *handle = (void *) 0 ; char *field = (char *) 0 ; bst_ulong len[1]; *len = 0; - unsigned int *result = 0 ; + unsigned int *result[1]; handle = *(void **)&jhandle; - field = 0; - if (jfield) { - field = (char *)jenv->GetStringUTFChars(jfield, 0); - if (!field) return 0; - } + field = (char *)jenv->GetStringUTFChars(jfield, 0); - result = (unsigned int *)XGDMatrixGetUIntInfo((void const *)handle, (char const *)field, len); + jresult = (jint) XGDMatrixGetUIntInfo(handle, (char const *)field, len, (const unsigned int **) result); if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field); jsize jlen = (jsize)*len; - jintArray jresult = jenv->NewIntArray(jlen); - jenv->SetIntArrayRegion(jresult, 0, jlen, (jint *)result); + jintArray jarray = jenv->NewIntArray(jlen); + jenv->SetIntArrayRegion(jarray, 0, jlen, (jint *) *result); + jenv->SetObjectArrayElement(jout, 0, jarray); return jresult; } @@ -317,14 +347,14 @@ JNIEXPORT jintArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrix * Method: XGDMatrixNumRow * Signature: (J)J */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow - (JNIEnv *jenv, jclass jcls, jlong jhandle) { - jlong jresult = 0 ; +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow + (JNIEnv *jenv, jclass jcls, jlong jhandle, jlongArray jout) { + jint jresult = 0 ; void *handle = (void *) 0 ; - bst_ulong result; + bst_ulong result[1]; handle = *(void **)&jhandle; - result = (bst_ulong)XGDMatrixNumRow((void const *)handle); - jresult = (jlong)result; + jresult = (jint) XGDMatrixNumRow(handle, result); + jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) result); return jresult; } @@ -333,13 +363,14 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumR * Method: XGBoosterCreate * Signature: ([J)J */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate - (JNIEnv *jenv, jclass jcls, jlongArray jhandles) { - jlong jresult = 0 ; +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate + (JNIEnv *jenv, jclass jcls, jlongArray jhandles, jlongArray jout) { + jint jresult = 0; void **handles = 0; bst_ulong len = 0; - void *result = 0 ; + void *result[1]; jlong* cjhandles = 0; + unsigned long out[1]; if(jhandles) { len = (bst_ulong)jenv->GetArrayLength(jhandles); @@ -351,7 +382,7 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCrea } } - result = (void *)XGBoosterCreate(handles, len); + jresult = (jint) XGBoosterCreate(handles, len, result); //release if(jhandles) { @@ -359,7 +390,9 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCrea jenv->ReleaseLongArrayElements(jhandles, cjhandles, 0); } - *(void **)&jresult = result; + *(void **)&out[0] = *result; + jenv->SetLongArrayRegion(jout, 0, 1, (const jlong *) out); + return jresult; } @@ -368,11 +401,11 @@ JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCrea * Method: XGBoosterFree * Signature: (J)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree (JNIEnv *jenv, jclass jcls, jlong jhandle) { void *handle = (void *) 0 ; handle = *(void **)&jhandle; - XGBoosterFree(handle); + return (jint) XGBoosterFree(handle); } @@ -381,27 +414,22 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree * Method: XGBoosterSetParam * Signature: (JLjava/lang/String;Ljava/lang/String;)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jname, jstring jvalue) { + jint jresult = -1; void *handle = (void *) 0 ; char *name = (char *) 0 ; char *value = (char *) 0 ; handle = *(void **)&jhandle; - name = 0; - if (jname) { - name = (char *)jenv->GetStringUTFChars(jname, 0); - if (!name) return ; - } - - value = 0; - if (jvalue) { - value = (char *)jenv->GetStringUTFChars(jvalue, 0); - if (!value) return ; - } - XGBoosterSetParam(handle, (char const *)name, (char const *)value); + name = (char *)jenv->GetStringUTFChars(jname, 0); + value = (char *)jenv->GetStringUTFChars(jvalue, 0); + + jresult = (jint) XGBoosterSetParam(handle, (char const *)name, (char const *)value); if (name) jenv->ReleaseStringUTFChars(jname, (const char *)name); if (value) jenv->ReleaseStringUTFChars(jvalue, (const char *)value); + + return jresult; } /* @@ -409,7 +437,7 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetPa * Method: XGBoosterUpdateOneIter * Signature: (JIJ)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter (JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlong jdtrain) { void *handle = (void *) 0 ; int iter ; @@ -417,7 +445,7 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdat handle = *(void **)&jhandle; iter = (int)jiter; dtrain = *(void **)&jdtrain; - XGBoosterUpdateOneIter(handle, iter, dtrain); + return (jint) XGBoosterUpdateOneIter(handle, iter, dtrain); } /* @@ -425,8 +453,9 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdat * Method: XGBoosterBoostOneIter * Signature: (JJ[F[F)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter (JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdtrain, jfloatArray jgrad, jfloatArray jhess) { + jint jresult = 0; void *handle = (void *) 0 ; void *dtrain = (void *) 0 ; bst_ulong len ; @@ -436,11 +465,13 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoost jfloat* grad = jenv->GetFloatArrayElements(jgrad, 0); jfloat* hess = jenv->GetFloatArrayElements(jhess, 0); len = (bst_ulong)jenv->GetArrayLength(jgrad); - XGBoosterBoostOneIter(handle, dtrain, grad, hess, len); + jresult = (jint) XGBoosterBoostOneIter(handle, dtrain, grad, hess, len); //release jenv->ReleaseFloatArrayElements(jgrad, grad, 0); jenv->ReleaseFloatArrayElements(jhess, hess, 0); + + return jresult; } /* @@ -448,15 +479,15 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoost * Method: XGBoosterEvalOneIter * Signature: (JI[J[Ljava/lang/String;)Ljava/lang/String; */ -JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter - (JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlongArray jdmats, jobjectArray jevnames) { - jstring jresult = 0 ; +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter + (JNIEnv *jenv, jclass jcls, jlong jhandle, jint jiter, jlongArray jdmats, jobjectArray jevnames, jobjectArray jout) { + jint jresult = 0 ; void *handle = (void *) 0 ; int iter ; void **dmats = 0; char **evnames = 0; bst_ulong len ; - char *result = 0 ; + char *result[1]; handle = *(void **)&jhandle; iter = (int)jiter; @@ -480,7 +511,7 @@ JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEv evnames[i] = (char *)jenv->GetStringUTFChars(jevname, 0); } - result = (char *)XGBoosterEvalOneIter(handle, iter, dmats, (char const *(*))evnames, len); + jresult = (jint) XGBoosterEvalOneIter(handle, iter, dmats, (char const *(*))evnames, len, (const char **) result); if(len > 0) { delete[] dmats; @@ -493,7 +524,9 @@ JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEv jenv->ReleaseLongArrayElements(jdmats, cjdmats, 0); } - if (result) jresult = jenv->NewStringUTF((const char *)result); + jstring jinfo = 0; + if (*result) jinfo = jenv->NewStringUTF((const char *) *result); + jenv->SetObjectArrayElement(jout, 0, jinfo); return jresult; } @@ -503,26 +536,29 @@ JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEv * Method: XGBoosterPredict * Signature: (JJIJ)[F */ -JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict - (JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdmat, jint joption_mask, jlong jntree_limit) { +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict + (JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jdmat, jint joption_mask, jlong jntree_limit, jobjectArray jout) { + jint jresult = 0; void *handle = (void *) 0 ; void *dmat = (void *) 0 ; int option_mask ; unsigned int ntree_limit ; bst_ulong len[1]; *len = 0; - float *result = 0 ; + float *result[1]; handle = *(void **)&jhandle; dmat = *(void **)&jdmat; option_mask = (int)joption_mask; ntree_limit = (unsigned int)jntree_limit; - result = (float *)XGBoosterPredict(handle, dmat, option_mask, ntree_limit, len); + jresult = (jint) XGBoosterPredict(handle, dmat, option_mask, ntree_limit, len, (const float **) result); jsize jlen = (jsize)*len; - jfloatArray jresult = jenv->NewFloatArray(jlen); - jenv->SetFloatArrayRegion(jresult, 0, jlen, (jfloat *)result); + jfloatArray jarray = jenv->NewFloatArray(jlen); + jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) *result); + jenv->SetObjectArrayElement(jout, 0, jarray); + return jresult; } @@ -531,18 +567,20 @@ JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoost * Method: XGBoosterLoadModel * Signature: (JLjava/lang/String;)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) { + jint jresult = 0; void *handle = (void *) 0 ; char *fname = (char *) 0 ; handle = *(void **)&jhandle; - fname = 0; - if (jfname) { - fname = (char *)jenv->GetStringUTFChars(jfname, 0); - if (!fname) return ; - } - XGBoosterLoadModel(handle,(char const *)fname); + + fname = (char *)jenv->GetStringUTFChars(jfname, 0); + + + jresult = (jint) XGBoosterLoadModel(handle,(char const *)fname); if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname); + + return jresult; } /* @@ -550,18 +588,19 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadM * Method: XGBoosterSaveModel * Signature: (JLjava/lang/String;)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfname) { + jint jresult = 0; void *handle = (void *) 0 ; char *fname = (char *) 0 ; handle = *(void **)&jhandle; fname = 0; - if (jfname) { - fname = (char *)jenv->GetStringUTFChars(jfname, 0); - if (!fname) return ; - } - XGBoosterSaveModel(handle, (char const *)fname); + fname = (char *)jenv->GetStringUTFChars(jfname, 0); + + jresult = (jint) XGBoosterSaveModel(handle, (char const *)fname); if (fname) jenv->ReleaseStringUTFChars(jfname, (const char *)fname); + + return jresult; } /* @@ -569,7 +608,7 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveM * Method: XGBoosterLoadModelFromBuffer * Signature: (JJJ)V */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer (JNIEnv *jenv, jclass jcls, jlong jhandle, jlong jbuf, jlong jlen) { void *handle = (void *) 0 ; void *buf = (void *) 0 ; @@ -577,7 +616,7 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadM handle = *(void **)&jhandle; buf = *(void **)&jbuf; len = (bst_ulong)jlen; - XGBoosterLoadModelFromBuffer(handle, (void const *)buf, len); + return (jint) XGBoosterLoadModelFromBuffer(handle, (void const *)buf, len); } /* @@ -585,17 +624,21 @@ JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadM * Method: XGBoosterGetModelRaw * Signature: (J)Ljava/lang/String; */ -JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw - (JNIEnv * jenv, jclass jcls, jlong jhandle) { - jstring jresult = 0 ; +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw + (JNIEnv * jenv, jclass jcls, jlong jhandle, jobjectArray jout) { + jint jresult = 0 ; + jstring jinfo = 0; void *handle = (void *) 0 ; bst_ulong len[1]; *len = 0; - char *result = 0 ; + char *result[1]; handle = *(void **)&jhandle; - result = (char *)XGBoosterGetModelRaw(handle, len); - if (result) jresult = jenv->NewStringUTF((const char *)result); + jresult = (jint)XGBoosterGetModelRaw(handle, len, (const char **) result); + if (*result){ + jinfo = jenv->NewStringUTF((const char *) *result); + jenv->SetObjectArrayElement(jout, 0, jinfo); + } return jresult; } @@ -604,15 +647,16 @@ JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGe * Method: XGBoosterDumpModel * Signature: (JLjava/lang/String;I)[Ljava/lang/String; */ -JNIEXPORT jobjectArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel - (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfmap, jint jwith_stats) { +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel + (JNIEnv *jenv, jclass jcls, jlong jhandle, jstring jfmap, jint jwith_stats, jobjectArray jout) { + jint jresult = 0; void *handle = (void *) 0 ; char *fmap = (char *) 0 ; int with_stats ; bst_ulong len[1]; *len = 0; - char **result = 0 ; + char **result[1]; handle = *(void **)&jhandle; fmap = 0; if (jfmap) { @@ -621,14 +665,16 @@ JNIEXPORT jobjectArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoos } with_stats = (int)jwith_stats; - result = (char **)XGBoosterDumpModel(handle, (char const *)fmap, with_stats, len); + jresult = (jint) XGBoosterDumpModel(handle, (const char *)fmap, with_stats, len, (const char ***) result); jsize jlen = (jsize)*len; - jobjectArray jresult = jenv->NewObjectArray(jlen, jenv->FindClass("java/lang/String"), jenv->NewStringUTF("")); + jobjectArray jinfos = jenv->NewObjectArray(jlen, jenv->FindClass("java/lang/String"), jenv->NewStringUTF("")); for(int i=0 ; iSetObjectArrayElement(jresult, i, jenv->NewStringUTF((const char*)result[i])); + jenv->SetObjectArrayElement(jinfos, i, jenv->NewStringUTF((const char*) result[0][i])); } + jenv->SetObjectArrayElement(jout, 0, jinfos); if (fmap) jenv->ReleaseStringUTFChars(jfmap, (const char *)fmap); + return jresult; } \ No newline at end of file diff --git a/java/xgboost4j_wrapper.h b/java/xgboost4j_wrapper.h index d13b86f8c..93764ef53 100644 --- a/java/xgboost4j_wrapper.h +++ b/java/xgboost4j_wrapper.h @@ -9,203 +9,211 @@ extern "C" { #endif /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI - * Method: XGDMatrixCreateFromFile - * Signature: (Ljava/lang/String;I)J + * Method: XGBGetLastError + * Signature: ()Ljava/lang/String; */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile - (JNIEnv *, jclass, jstring, jint); +JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBGetLastError + (JNIEnv *, jclass); + +/* + * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI + * Method: XGDMatrixCreateFromFile + * Signature: (Ljava/lang/String;I[J)I + */ +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromFile + (JNIEnv *, jclass, jstring, jint, jlongArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixCreateFromCSR - * Signature: ([J[J[F)J + * Signature: ([J[I[F[J)I */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR - (JNIEnv *, jclass, jlongArray, jintArray, jfloatArray); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSR + (JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jlongArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixCreateFromCSC - * Signature: ([J[J[F)J + * Signature: ([J[I[F[J)I */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC - (JNIEnv *, jclass, jlongArray, jintArray, jfloatArray); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromCSC + (JNIEnv *, jclass, jlongArray, jintArray, jfloatArray, jlongArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixCreateFromMat - * Signature: ([FIIF)J + * Signature: ([FIIF[J)I */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat - (JNIEnv *, jclass, jfloatArray, jint, jint, jfloat); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixCreateFromMat + (JNIEnv *, jclass, jfloatArray, jint, jint, jfloat, jlongArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixSliceDMatrix - * Signature: (J[I)J + * Signature: (J[I[J)I */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix - (JNIEnv *, jclass, jlong, jintArray); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSliceDMatrix + (JNIEnv *, jclass, jlong, jintArray, jlongArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixFree - * Signature: (J)V + * Signature: (J)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixFree (JNIEnv *, jclass, jlong); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixSaveBinary - * Signature: (JLjava/lang/String;I)V + * Signature: (JLjava/lang/String;I)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSaveBinary (JNIEnv *, jclass, jlong, jstring, jint); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixSetFloatInfo - * Signature: (JLjava/lang/String;[F)V + * Signature: (JLjava/lang/String;[F)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetFloatInfo (JNIEnv *, jclass, jlong, jstring, jfloatArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixSetUIntInfo - * Signature: (JLjava/lang/String;[I)V + * Signature: (JLjava/lang/String;[I)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetUIntInfo (JNIEnv *, jclass, jlong, jstring, jintArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixSetGroup - * Signature: (J[I)V + * Signature: (J[I)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixSetGroup (JNIEnv *, jclass, jlong, jintArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixGetFloatInfo - * Signature: (JLjava/lang/String;)[F + * Signature: (JLjava/lang/String;[[F)I */ -JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo - (JNIEnv *, jclass, jlong, jstring); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetFloatInfo + (JNIEnv *, jclass, jlong, jstring, jobjectArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixGetUIntInfo - * Signature: (JLjava/lang/String;)[I + * Signature: (JLjava/lang/String;[[I)I */ -JNIEXPORT jintArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo - (JNIEnv *, jclass, jlong, jstring); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixGetUIntInfo + (JNIEnv *, jclass, jlong, jstring, jobjectArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGDMatrixNumRow - * Signature: (J)J + * Signature: (J[J)I */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow - (JNIEnv *, jclass, jlong); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGDMatrixNumRow + (JNIEnv *, jclass, jlong, jlongArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterCreate - * Signature: ([J)J + * Signature: ([J[J)I */ -JNIEXPORT jlong JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate - (JNIEnv *, jclass, jlongArray); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterCreate + (JNIEnv *, jclass, jlongArray, jlongArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterFree - * Signature: (J)V + * Signature: (J)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterFree (JNIEnv *, jclass, jlong); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterSetParam - * Signature: (JLjava/lang/String;Ljava/lang/String;)V + * Signature: (JLjava/lang/String;Ljava/lang/String;)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSetParam (JNIEnv *, jclass, jlong, jstring, jstring); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterUpdateOneIter - * Signature: (JIJ)V + * Signature: (JIJ)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterUpdateOneIter (JNIEnv *, jclass, jlong, jint, jlong); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterBoostOneIter - * Signature: (JJ[F[F)V + * Signature: (JJ[F[F)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterBoostOneIter (JNIEnv *, jclass, jlong, jlong, jfloatArray, jfloatArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterEvalOneIter - * Signature: (JI[J[Ljava/lang/String;)Ljava/lang/String; + * Signature: (JI[J[Ljava/lang/String;[Ljava/lang/String;)I */ -JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter - (JNIEnv *, jclass, jlong, jint, jlongArray, jobjectArray); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterEvalOneIter + (JNIEnv *, jclass, jlong, jint, jlongArray, jobjectArray, jobjectArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterPredict - * Signature: (JJIJ)[F + * Signature: (JJIJ[[F)I */ -JNIEXPORT jfloatArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict - (JNIEnv *, jclass, jlong, jlong, jint, jlong); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterPredict + (JNIEnv *, jclass, jlong, jlong, jint, jlong, jobjectArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterLoadModel - * Signature: (JLjava/lang/String;)V + * Signature: (JLjava/lang/String;)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModel (JNIEnv *, jclass, jlong, jstring); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterSaveModel - * Signature: (JLjava/lang/String;)V + * Signature: (JLjava/lang/String;)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterSaveModel (JNIEnv *, jclass, jlong, jstring); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterLoadModelFromBuffer - * Signature: (JJJ)V + * Signature: (JJJ)I */ -JNIEXPORT void JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterLoadModelFromBuffer (JNIEnv *, jclass, jlong, jlong, jlong); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterGetModelRaw - * Signature: (J)Ljava/lang/String; + * Signature: (J[Ljava/lang/String;)I */ -JNIEXPORT jstring JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw - (JNIEnv *, jclass, jlong); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterGetModelRaw + (JNIEnv *, jclass, jlong, jobjectArray); /* * Class: org_dmlc_xgboost4j_wrapper_XgboostJNI * Method: XGBoosterDumpModel - * Signature: (JLjava/lang/String;I)[Ljava/lang/String; + * Signature: (JLjava/lang/String;I[[Ljava/lang/String;)I */ -JNIEXPORT jobjectArray JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel - (JNIEnv *, jclass, jlong, jstring, jint); +JNIEXPORT jint JNICALL Java_org_dmlc_xgboost4j_wrapper_XgboostJNI_XGBoosterDumpModel + (JNIEnv *, jclass, jlong, jstring, jint, jobjectArray); #ifdef __cplusplus } From e99ab0d1dd2b5c860d315b9ddf7a8a59567fd630 Mon Sep 17 00:00:00 2001 From: yanqingmen Date: Mon, 6 Jul 2015 20:56:17 +0800 Subject: [PATCH 41/59] minor fix --- .../xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java | 6 ++++-- .../xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java index 0f296241b..4d34f43c6 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java @@ -76,14 +76,16 @@ public final class Booster { * @throws org.dmlc.xgboost4j.util.XgboostError */ public Booster(Iterable> params, String modelPath) throws XgboostError { - long[] out = new long[1]; init(null); + if(modelPath == null) { + throw new NullPointerException("modelPath : null"); + } loadModel(modelPath); setParam("seed","0"); setParams(params); } - + private void init(DMatrix[] dMatrixs) throws XgboostError { diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java index b056cad09..6beae9c90 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java @@ -55,6 +55,9 @@ public class DMatrix { * @throws org.dmlc.xgboost4j.util.XgboostError */ public DMatrix(String dataPath) throws XgboostError { + if(dataPath == null) { + throw new NullPointerException("dataPath: null"); + } long[] out = new long[1]; ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromFile(dataPath, 1, out)); handle = out[0]; From b1bcb7183bd7582a094ad1da31ca610c9108c6fd Mon Sep 17 00:00:00 2001 From: Ajinkya Kale Date: Mon, 6 Jul 2015 11:02:19 -0700 Subject: [PATCH 42/59] Adding some details on nthread parameter I got this information about nthread='real cpu count' from https://github.com/dmlc/xgboost/blob/7cb449c4a75c2a16f6dfea5244ce959d998344b1/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java#L50 Please confirm if this note is still valid before merging this change! --- demo/binary_classification/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/demo/binary_classification/README.md b/demo/binary_classification/README.md index 8d1e5e2a5..482666ec4 100644 --- a/demo/binary_classification/README.md +++ b/demo/binary_classification/README.md @@ -162,7 +162,11 @@ If you want to continue boosting from existing model, say 0002.model, use ``` xgboost will load from 0002.model continue boosting for 2 rounds, and save output to continue.model. However, beware that the training and evaluation data specified in mushroom.conf should not change when you use this function. #### Use Multi-Threading -When you are working with a large dataset, you may want to take advantage of parallelism. If your compiler supports OpenMP, xgboost is naturally multi-threaded, to set number of parallel running threads to 10, add ```nthread=10``` to your configuration. +When you are working with a large dataset, you may want to take advantage of parallelism. If your compiler supports OpenMP, xgboost is naturally multi-threaded, to set number of parallel running add ```nthread``` parameter to you configuration. +Eg. ```nthread=10``` + +Set nthread to be the number of your real cpu (On Unix, this can be found using ```lscpu```) +Some systems will have ```Thread(s) per core = 2```, for example, a 4 core cpu with 8 threads, in such case set ```nthread=4``` and not 8. #### Additional Notes * What are ```agaricus.txt.test.buffer``` and ```agaricus.txt.train.buffer``` generated during runexp.sh? From 761ab7c83435db0be62ba25ceabe4b71505e18f2 Mon Sep 17 00:00:00 2001 From: Ajinkya Kale Date: Mon, 6 Jul 2015 14:52:38 -0700 Subject: [PATCH 43/59] Adding workaround for install the R-package I was facing this issue and this workaround worked for me. Maybe this should be moved to know issues section. --- R-package/README.md | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/R-package/README.md b/R-package/README.md index e974e3554..81dabb31c 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -1,6 +1,8 @@ -# R package for xgboost. +R package for xgboost +===================== -## Installation +Installation +------------ For up-to-date version (which is recommended), please install from github. Windows user will need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first. @@ -8,8 +10,26 @@ For up-to-date version (which is recommended), please install from github. Windo devtools::install_github('dmlc/xgboost',subdir='R-package') ``` - -## Examples +Examples +-------- * Please visit [walk through example](demo). * See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.R) on this dataset and the one related to [Otto challenge](../demo/kaggle-otto), including a [RMarkdown documentation](../demo/kaggle-otto/understandingXGBoostModel.Rmd). + +Notes +----- + +If you face an issue installing the package using ```devtools::install_github```, something like this (even after updating libxml and RCurl as lot of forums say) - + +``` +devtools::install_github('dmlc/xgboost',subdir='R-package') +Downloading github repo dmlc/xgboost@master +Error in function (type, msg, asError = TRUE) : + Peer certificate cannot be authenticated with given CA certificates +``` +To get around this you can build the package locally as mentioned [here](https://github.com/dmlc/xgboost/issues/347) - +``` +1. Clone the current repository and set your workspace to xgboost/R-package/ +2. Run R CMD INSTALL --build . in terminal to get the tarball. +3. Run install.packages('path_to_the_tarball',repo=NULL) in R to install. +``` From 364abdd6d1a196316390d1cfc8af09a32c903d17 Mon Sep 17 00:00:00 2001 From: Ajinkya Kale Date: Mon, 6 Jul 2015 16:45:30 -0700 Subject: [PATCH 44/59] Adding examples on xgb.importance, xgb.plot.importance and xgb.plot tree --- R-package/vignettes/xgboostPresentation.Rmd | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd index b7648340d..39ab819f7 100644 --- a/R-package/vignettes/xgboostPresentation.Rmd +++ b/R-package/vignettes/xgboostPresentation.Rmd @@ -337,6 +337,17 @@ err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label) print(paste("test-error=", err)) ``` +View feature importance/influence from the learnt model +------------------------------------------------------- + +Feature importance is similar to R gbm package's relative influence (rel.inf). + +``` +importance_matrix <- xgb.importance(model = bst) +print(importance_matrix) +xgb.plot.importance(importance_matrix) +``` + View the trees from a model --------------------------- @@ -346,6 +357,12 @@ You can dump the tree you learned using `xgb.dump` into a text file. xgb.dump(bst, with.stats = T) ``` +You can plot the trees from your model using ```xgb.plot.tree`` + +``` +xgb.plot.tree(model = bst) +``` + > if you provide a path to `fname` parameter you can save the trees to your hard drive. Save and load models From 4d382a8cc167b72d21b6a8eb70365a230f23299b Mon Sep 17 00:00:00 2001 From: yanqingmen Date: Mon, 6 Jul 2015 17:55:13 -0700 Subject: [PATCH 45/59] rename xgboosterror --- .../dmlc/xgboost4j/demo/BasicWalkThrough.java | 4 +- .../xgboost4j/demo/BoostFromPrediction.java | 4 +- .../dmlc/xgboost4j/demo/CrossValidation.java | 4 +- .../dmlc/xgboost4j/demo/CustomObjective.java | 8 +- .../dmlc/xgboost4j/demo/ExternalMemory.java | 4 +- .../demo/GeneralizedLinearModel.java | 4 +- .../xgboost4j/demo/PredictFirstNtree.java | 4 +- .../xgboost4j/demo/PredictLeafIndices.java | 4 +- .../dmlc/xgboost4j/demo/util/CustomEval.java | 4 +- .../main/java/org/dmlc/xgboost4j/Booster.java | 90 +++++++++---------- .../main/java/org/dmlc/xgboost4j/DMatrix.java | 56 ++++++------ .../java/org/dmlc/xgboost4j/util/CVPack.java | 20 ++--- .../org/dmlc/xgboost4j/util/ErrorHandle.java | 6 +- .../java/org/dmlc/xgboost4j/util/Trainer.java | 6 +- .../{XgboostError.java => XGBoostError.java} | 4 +- 15 files changed, 111 insertions(+), 111 deletions(-) rename java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/{XgboostError.java => XGBoostError.java} (88%) diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java index 86ba49c48..0c6529d2c 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BasicWalkThrough.java @@ -31,7 +31,7 @@ import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.DataLoader; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; /** * a simple example of java wrapper for xgboost @@ -53,7 +53,7 @@ public class BasicWalkThrough { } - public static void main(String[] args) throws UnsupportedEncodingException, IOException, XgboostError { + public static void main(String[] args) throws UnsupportedEncodingException, IOException, XGBoostError { // load file from text file, also binary buffer generated by xgboost4j DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java index 1113eef68..a81da0c59 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/BoostFromPrediction.java @@ -23,14 +23,14 @@ import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; /** * example for start from a initial base prediction * @author hzx */ public class BoostFromPrediction { - public static void main(String[] args) throws XgboostError { + public static void main(String[] args) throws XGBoostError { System.out.println("start running example to start from a initial prediction"); // load file from text file, also binary buffer generated by xgboost4j diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java index ec5716700..6dcf917da 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CrossValidation.java @@ -19,14 +19,14 @@ import java.io.IOException; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.util.Trainer; import org.dmlc.xgboost4j.demo.util.Params; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; /** * an example of cross validation * @author hzx */ public class CrossValidation { - public static void main(String[] args) throws IOException, XgboostError { + public static void main(String[] args) throws IOException, XGBoostError { //load train mat DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java index 4aaa053e0..2b8c44ecd 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/CustomObjective.java @@ -27,7 +27,7 @@ import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.IObjective; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; /** * an example user define objective and eval @@ -74,7 +74,7 @@ public class CustomObjective { float[] labels; try { labels = dtrain.getLabel(); - } catch (XgboostError ex) { + } catch (XGBoostError ex) { logger.error(ex); return null; } @@ -122,7 +122,7 @@ public class CustomObjective { float[] labels; try { labels = dmat.getLabel(); - } catch (XgboostError ex) { + } catch (XGBoostError ex) { logger.error(ex); return -1f; } @@ -140,7 +140,7 @@ public class CustomObjective { } } - public static void main(String[] args) throws XgboostError { + public static void main(String[] args) throws XGBoostError { //load train mat (svmlight format) DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); //load valid mat (svmlight format) diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java index e74e3e858..b0a9d27dc 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/ExternalMemory.java @@ -23,14 +23,14 @@ import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; /** * simple example for using external memory version * @author hzx */ public class ExternalMemory { - public static void main(String[] args) throws XgboostError { + public static void main(String[] args) throws XGBoostError { //this is the only difference, add a # followed by a cache prefix name //several cache file with the prefix will be generated //currently only support convert from libsvm file diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java index db3cd0e59..7d3d717bd 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/GeneralizedLinearModel.java @@ -24,7 +24,7 @@ import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.demo.util.CustomEval; import org.dmlc.xgboost4j.demo.util.Params; import org.dmlc.xgboost4j.util.Trainer; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; /** * this is an example of fit generalized linear model in xgboost @@ -32,7 +32,7 @@ import org.dmlc.xgboost4j.util.XgboostError; * @author hzx */ public class GeneralizedLinearModel { - public static void main(String[] args) throws XgboostError { + public static void main(String[] args) throws XGBoostError { // load file from text file, also binary buffer generated by xgboost4j DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java index 6bcf67f86..2bbd1fd6c 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictFirstNtree.java @@ -25,14 +25,14 @@ import org.dmlc.xgboost4j.util.Trainer; import org.dmlc.xgboost4j.demo.util.CustomEval; import org.dmlc.xgboost4j.demo.util.Params; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; /** * predict first ntree * @author hzx */ public class PredictFirstNtree { - public static void main(String[] args) throws XgboostError { + public static void main(String[] args) throws XGBoostError { // load file from text file, also binary buffer generated by xgboost4j DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java index 61026a6b8..ede103aeb 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/PredictLeafIndices.java @@ -24,14 +24,14 @@ import org.dmlc.xgboost4j.Booster; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.util.Trainer; import org.dmlc.xgboost4j.demo.util.Params; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; /** * predict leaf indices * @author hzx */ public class PredictLeafIndices { - public static void main(String[] args) throws XgboostError { + public static void main(String[] args) throws XGBoostError { // load file from text file, also binary buffer generated by xgboost4j DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train"); DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test"); diff --git a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java index 116c06ddf..5f25278d5 100644 --- a/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java +++ b/java/xgboost4j-demo/src/main/java/org/dmlc/xgboost4j/demo/util/CustomEval.java @@ -19,7 +19,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dmlc.xgboost4j.DMatrix; import org.dmlc.xgboost4j.IEvaluation; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; /** * a util evaluation class for examples @@ -41,7 +41,7 @@ public class CustomEval implements IEvaluation { float[] labels; try { labels = dmat.getLabel(); - } catch (XgboostError ex) { + } catch (XGBoostError ex) { logger.error(ex); return -1f; } diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java index 0f296241b..51fee441e 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/Booster.java @@ -31,7 +31,7 @@ import org.apache.commons.logging.LogFactory; import org.dmlc.xgboost4j.util.Initializer; import org.dmlc.xgboost4j.util.ErrorHandle; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; import org.dmlc.xgboost4j.wrapper.XgboostJNI; @@ -59,9 +59,9 @@ public final class Booster { * init Booster from dMatrixs * @param params parameters * @param dMatrixs DMatrix array - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public Booster(Iterable> params, DMatrix[] dMatrixs) throws XgboostError { + public Booster(Iterable> params, DMatrix[] dMatrixs) throws XGBoostError { init(dMatrixs); setParam("seed","0"); setParams(params); @@ -73,9 +73,9 @@ public final class Booster { * load model from modelPath * @param params parameters * @param modelPath booster modelPath (model generated by booster.saveModel) - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public Booster(Iterable> params, String modelPath) throws XgboostError { + public Booster(Iterable> params, String modelPath) throws XGBoostError { long[] out = new long[1]; init(null); loadModel(modelPath); @@ -86,7 +86,7 @@ public final class Booster { - private void init(DMatrix[] dMatrixs) throws XgboostError { + private void init(DMatrix[] dMatrixs) throws XGBoostError { long[] handles = null; if(dMatrixs != null) { handles = dMatrixs2handles(dMatrixs); @@ -101,18 +101,18 @@ public final class Booster { * set parameter * @param key param name * @param value param value - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public final void setParam(String key, String value) throws XgboostError { + public final void setParam(String key, String value) throws XGBoostError { ErrorHandle.checkCall(XgboostJNI.XGBoosterSetParam(handle, key, value)); } /** * set parameters * @param params parameters key-value map - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void setParams(Iterable> params) throws XgboostError { + public void setParams(Iterable> params) throws XGBoostError { if(params!=null) { for(Map.Entry entry : params) { setParam(entry.getKey(), entry.getValue().toString()); @@ -125,9 +125,9 @@ public final class Booster { * Update (one iteration) * @param dtrain training data * @param iter current iteration number - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void update(DMatrix dtrain, int iter) throws XgboostError { + public void update(DMatrix dtrain, int iter) throws XGBoostError { ErrorHandle.checkCall(XgboostJNI.XGBoosterUpdateOneIter(handle, iter, dtrain.getHandle())); } @@ -136,9 +136,9 @@ public final class Booster { * @param dtrain training data * @param iter current iteration number * @param obj customized objective class - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void update(DMatrix dtrain, int iter, IObjective obj) throws XgboostError { + public void update(DMatrix dtrain, int iter, IObjective obj) throws XGBoostError { float[][] predicts = predict(dtrain, true); List gradients = obj.getGradient(predicts, dtrain); boost(dtrain, gradients.get(0), gradients.get(1)); @@ -149,9 +149,9 @@ public final class Booster { * @param dtrain training data * @param grad first order of gradient * @param hess seconde order of gradient - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void boost(DMatrix dtrain, float[] grad, float[] hess) throws XgboostError { + public void boost(DMatrix dtrain, float[] grad, float[] hess) throws XGBoostError { if(grad.length != hess.length) { throw new AssertionError(String.format("grad/hess length mismatch %s / %s", grad.length, hess.length)); } @@ -164,9 +164,9 @@ public final class Booster { * @param evalNames name for eval dmatrixs, used for check results * @param iter current eval iteration * @return eval information - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throws XgboostError { + public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throws XGBoostError { long[] handles = dMatrixs2handles(evalMatrixs); String[] evalInfo = new String[1]; ErrorHandle.checkCall(XgboostJNI.XGBoosterEvalOneIter(handle, iter, handles, evalNames, evalInfo)); @@ -180,9 +180,9 @@ public final class Booster { * @param iter * @param eval * @return eval information - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, IEvaluation eval) throws XgboostError { + public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, IEvaluation eval) throws XGBoostError { String evalInfo = ""; for(int i=0; i getFeatureScore() throws XgboostError { + public Map getFeatureScore() throws XGBoostError { String[] modelInfos = getDumpInfo(false); Map featureScore = new HashMap<>(); for(String tree : modelInfos) { @@ -431,9 +431,9 @@ public final class Booster { * get importance of each feature * @param featureMap file to save dumped model info * @return featureMap key: feature index, value: feature importance score - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public Map getFeatureScore(String featureMap) throws XgboostError { + public Map getFeatureScore(String featureMap) throws XGBoostError { String[] modelInfos = getDumpInfo(featureMap, false); Map featureScore = new HashMap<>(); for(String tree : modelInfos) { diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java index b056cad09..61db98a6d 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/DMatrix.java @@ -19,7 +19,7 @@ import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dmlc.xgboost4j.util.ErrorHandle; -import org.dmlc.xgboost4j.util.XgboostError; +import org.dmlc.xgboost4j.util.XGBoostError; import org.dmlc.xgboost4j.util.Initializer; import org.dmlc.xgboost4j.wrapper.XgboostJNI; @@ -52,9 +52,9 @@ public class DMatrix { /** * init DMatrix from file (svmlight format) * @param dataPath - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public DMatrix(String dataPath) throws XgboostError { + public DMatrix(String dataPath) throws XGBoostError { long[] out = new long[1]; ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromFile(dataPath, 1, out)); handle = out[0]; @@ -66,9 +66,9 @@ public class DMatrix { * @param indices Indices (colIndexs for CSR or rowIndexs for CSC) * @param data non zero values (sequence by row for CSR or by col for CSC) * @param st sparse matrix type (CSR or CSC) - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public DMatrix(long[] headers, int[] indices, float[] data, SparseType st) throws XgboostError { + public DMatrix(long[] headers, int[] indices, float[] data, SparseType st) throws XGBoostError { long[] out = new long[1]; if(st == SparseType.CSR) { ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromCSR(headers, indices, data, out)); @@ -87,9 +87,9 @@ public class DMatrix { * @param data data values * @param nrow number of rows * @param ncol number of columns - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public DMatrix(float[] data, int nrow, int ncol) throws XgboostError { + public DMatrix(float[] data, int nrow, int ncol) throws XGBoostError { long[] out = new long[1]; ErrorHandle.checkCall(XgboostJNI.XGDMatrixCreateFromMat(data, nrow, ncol, 0.0f, out)); handle = out[0]; @@ -109,16 +109,16 @@ public class DMatrix { * set label of dmatrix * @param labels */ - public void setLabel(float[] labels) throws XgboostError { + public void setLabel(float[] labels) throws XGBoostError { ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "label", labels)); } /** * set weight of each instance * @param weights - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void setWeight(float[] weights) throws XgboostError { + public void setWeight(float[] weights) throws XGBoostError { ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "weight", weights)); } @@ -126,9 +126,9 @@ public class DMatrix { * if specified, xgboost will start from this init margin * can be used to specify initial prediction to boost from * @param baseMargin - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void setBaseMargin(float[] baseMargin) throws XgboostError { + public void setBaseMargin(float[] baseMargin) throws XGBoostError { ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetFloatInfo(handle, "base_margin", baseMargin)); } @@ -136,9 +136,9 @@ public class DMatrix { * if specified, xgboost will start from this init margin * can be used to specify initial prediction to boost from * @param baseMargin - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void setBaseMargin(float[][] baseMargin) throws XgboostError { + public void setBaseMargin(float[][] baseMargin) throws XGBoostError { float[] flattenMargin = flatten(baseMargin); setBaseMargin(flattenMargin); } @@ -146,19 +146,19 @@ public class DMatrix { /** * Set group sizes of DMatrix (used for ranking) * @param group - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void setGroup(int[] group) throws XgboostError { + public void setGroup(int[] group) throws XGBoostError { ErrorHandle.checkCall(XgboostJNI.XGDMatrixSetGroup(handle, group)); } - private float[] getFloatInfo(String field) throws XgboostError { + private float[] getFloatInfo(String field) throws XGBoostError { float[][] infos = new float[1][]; ErrorHandle.checkCall(XgboostJNI.XGDMatrixGetFloatInfo(handle, field, infos)); return infos[0]; } - private int[] getIntInfo(String field) throws XgboostError { + private int[] getIntInfo(String field) throws XGBoostError { int[][] infos = new int[1][]; ErrorHandle.checkCall(XgboostJNI.XGDMatrixGetUIntInfo(handle, field, infos)); return infos[0]; @@ -167,27 +167,27 @@ public class DMatrix { /** * get label values * @return label - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public float[] getLabel() throws XgboostError { + public float[] getLabel() throws XGBoostError { return getFloatInfo("label"); } /** * get weight of the DMatrix * @return weights - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public float[] getWeight() throws XgboostError { + public float[] getWeight() throws XGBoostError { return getFloatInfo("weight"); } /** * get base margin of the DMatrix * @return base margin - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public float[] getBaseMargin() throws XgboostError { + public float[] getBaseMargin() throws XGBoostError { return getFloatInfo("base_margin"); } @@ -195,9 +195,9 @@ public class DMatrix { * Slice the DMatrix and return a new DMatrix that only contains `rowIndex`. * @param rowIndex * @return sliced new DMatrix - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public DMatrix slice(int[] rowIndex) throws XgboostError { + public DMatrix slice(int[] rowIndex) throws XGBoostError { long[] out = new long[1]; ErrorHandle.checkCall(XgboostJNI.XGDMatrixSliceDMatrix(handle, rowIndex, out)); long sHandle = out[0]; @@ -208,9 +208,9 @@ public class DMatrix { /** * get the row number of DMatrix * @return number of rows - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public long rowNum() throws XgboostError { + public long rowNum() throws XGBoostError { long[] rowNum = new long[1]; ErrorHandle.checkCall(XgboostJNI.XGDMatrixNumRow(handle,rowNum)); return rowNum[0]; diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java index 33be48b53..a9b932f0d 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/CVPack.java @@ -37,9 +37,9 @@ public class CVPack { * @param dtrain train data * @param dtest test data * @param params parameters - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public CVPack(DMatrix dtrain, DMatrix dtest, Iterable> params) throws XgboostError { + public CVPack(DMatrix dtrain, DMatrix dtest, Iterable> params) throws XGBoostError { dmats = new DMatrix[] {dtrain, dtest}; booster = new Booster(params, dmats); names = new String[] {"train", "test"}; @@ -50,9 +50,9 @@ public class CVPack { /** * update one iteration * @param iter iteration num - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void update(int iter) throws XgboostError { + public void update(int iter) throws XGBoostError { booster.update(dtrain, iter); } @@ -60,9 +60,9 @@ public class CVPack { * update one iteration * @param iter iteration num * @param obj customized objective - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public void update(int iter, IObjective obj) throws XgboostError { + public void update(int iter, IObjective obj) throws XGBoostError { booster.update(dtrain, iter, obj); } @@ -70,9 +70,9 @@ public class CVPack { * evaluation * @param iter iteration num * @return - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public String eval(int iter) throws XgboostError { + public String eval(int iter) throws XGBoostError { return booster.evalSet(dmats, names, iter); } @@ -81,9 +81,9 @@ public class CVPack { * @param iter iteration num * @param eval customized eval * @return - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public String eval(int iter, IEvaluation eval) throws XgboostError { + public String eval(int iter, IEvaluation eval) throws XGBoostError { return booster.evalSet(dmats, names, iter, eval); } } diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/ErrorHandle.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/ErrorHandle.java index 5093eb1db..688cd2719 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/ErrorHandle.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/ErrorHandle.java @@ -40,11 +40,11 @@ public class ErrorHandle { /** * check the return value of C API * @param ret return valud of xgboostJNI C API call - * @throws org.dmlc.xgboost4j.util.XgboostError + * @throws org.dmlc.xgboost4j.util.XGBoostError */ - public static void checkCall(int ret) throws XgboostError { + public static void checkCall(int ret) throws XGBoostError { if(ret != 0) { - throw new XgboostError(XgboostJNI.XGBGetLastError()); + throw new XGBoostError(XgboostJNI.XGBGetLastError()); } } } diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java index a81963da7..e5ac8502a 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/Trainer.java @@ -47,7 +47,7 @@ public class Trainer { * @return trained booster */ public static Booster train(Iterable> params, DMatrix dtrain, int round, - Iterable> watchs, IObjective obj, IEvaluation eval) throws XgboostError { + Iterable> watchs, IObjective obj, IEvaluation eval) throws XGBoostError { //collect eval matrixs String[] evalNames; @@ -112,7 +112,7 @@ public class Trainer { * @param eval customized evaluation (set to null if not used) * @return evaluation history */ - public static String[] crossValiation(Iterable> params, DMatrix data, int round, int nfold, String[] metrics, IObjective obj, IEvaluation eval) throws XgboostError { + public static String[] crossValiation(Iterable> params, DMatrix data, int round, int nfold, String[] metrics, IObjective obj, IEvaluation eval) throws XGBoostError { CVPack[] cvPacks = makeNFold(data, nfold, params, metrics); String[] evalHist = new String[round]; String[] results = new String[cvPacks.length]; @@ -149,7 +149,7 @@ public class Trainer { * @param evalMetrics Evaluation metrics * @return CV package array */ - public static CVPack[] makeNFold(DMatrix data, int nfold, Iterable> params, String[] evalMetrics) throws XgboostError { + public static CVPack[] makeNFold(DMatrix data, int nfold, Iterable> params, String[] evalMetrics) throws XGBoostError { List samples = genRandPermutationNums(0, (int) data.rowNum()); int step = samples.size()/nfold; int[] testSlice = new int[step]; diff --git a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XgboostError.java b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XGBoostError.java similarity index 88% rename from java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XgboostError.java rename to java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XGBoostError.java index 8dabcee4b..dc7a9a0b2 100644 --- a/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XgboostError.java +++ b/java/xgboost4j/src/main/java/org/dmlc/xgboost4j/util/XGBoostError.java @@ -19,8 +19,8 @@ package org.dmlc.xgboost4j.util; * custom error class for xgboost * @author hzx */ -public class XgboostError extends Exception{ - public XgboostError(String message) { +public class XGBoostError extends Exception{ + public XGBoostError(String message) { super(message); } } From 46342d4633552ec19181c834c23586985d45c309 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 6 Jul 2015 20:07:04 -0700 Subject: [PATCH 46/59] checkin --- src/utils/thread.h | 2 +- wrapper/xgboost_wrapper.cpp | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/utils/thread.h b/src/utils/thread.h index 78b488cff..a6e8e7fdc 100644 --- a/src/utils/thread.h +++ b/src/utils/thread.h @@ -11,7 +11,7 @@ #ifdef _MSC_VER #include #include -#include "../xgboost/utils.h" +#include "./utils.h" namespace xgboost { namespace utils { /*! \brief simple semaphore used for synchronization */ diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp index 18c1eae49..fb33d0392 100644 --- a/wrapper/xgboost_wrapper.cpp +++ b/wrapper/xgboost_wrapper.cpp @@ -134,9 +134,11 @@ using namespace xgboost::wrapper; * \brief every function starts with API_BEGIN(); and finishes with API_END(); * \param Finalize optionally put in a finalizer */ -#define API_END(Finalize) } catch(std::exception &e) { \ +#define API_END_FINALIZE(Finalize) } catch(std::exception &e) { \ Finalize; return XGBHandleException(e); \ } return 0; +/*! \brief API End with no finalization */ +#define API_END() API_END_FINALIZE(;) // do not use threadlocal on OSX since it is not always available #ifndef DISABLE_THREAD_LOCAL @@ -217,7 +219,7 @@ int XGDMatrixCreateFromCSR(const bst_ulong *indptr, } mat.info.info.num_row = nindptr - 1; *out = p_mat; - API_END(delete p_mat); + API_END_FINALIZE(delete p_mat); } int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, @@ -258,7 +260,7 @@ int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr, mat.info.info.num_row = mat.row_ptr_.size() - 1; mat.info.info.num_col = static_cast(ncol); *out = p_mat; - API_END(delete p_mat); + API_END_FINALIZE(delete p_mat); } int XGDMatrixCreateFromMat(const float *data, @@ -289,7 +291,7 @@ int XGDMatrixCreateFromMat(const float *data, mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem); } *out = p_mat; - API_END(delete p_mat); + API_END_FINALIZE(delete p_mat); } int XGDMatrixSliceDMatrix(DMatrixHandle handle, @@ -340,7 +342,7 @@ int XGDMatrixSliceDMatrix(DMatrixHandle handle, } } *out = p_ret; - API_END(delete p_ret); + API_END_FINALIZE(delete p_ret); } int XGDMatrixFree(DMatrixHandle handle) { From 9ec4c43dd2f6377c0e3875b109bed6f64a166792 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 6 Jul 2015 22:44:59 -0700 Subject: [PATCH 47/59] Update README.md --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index cdd4c02f7..9b3e6f57a 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,12 @@ What's New - Checkout the winning solution at [Highlight links](doc/README.md#highlight-links) * [External Memory Version](doc/external_memory.md) +Contributing to XGBoost +========= +XGBoost has been developed and used by a group of active community. Everyone is more than welcomed to is a great way to make the project better and more accessible to more users. +* Checkout [Feature Wish List](https://github.com/dmlc/xgboost/labels/Wish-List) to see what can be improved, or open an issue if you want something. +* Contribute to the [documents and examples](https://github.com/dmlc/xgboost/blob/master/doc/) to share your experience to other users. +* Features ======== * Easily accessible in python, R, Julia, CLI From 28f8267563880fc99fa1c73990d3643f8c7a79b4 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 6 Jul 2015 22:45:27 -0700 Subject: [PATCH 48/59] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9b3e6f57a..226526891 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,8 @@ Contributing to XGBoost ========= XGBoost has been developed and used by a group of active community. Everyone is more than welcomed to is a great way to make the project better and more accessible to more users. * Checkout [Feature Wish List](https://github.com/dmlc/xgboost/labels/Wish-List) to see what can be improved, or open an issue if you want something. -* Contribute to the [documents and examples](https://github.com/dmlc/xgboost/blob/master/doc/) to share your experience to other users. -* +* Contribute to the [documents and examples](https://github.com/dmlc/xgboost/blob/master/doc/) to share your experience with other users. + Features ======== * Easily accessible in python, R, Julia, CLI From fc75885e9e963ed28e62f5f80a81a709be551e4d Mon Sep 17 00:00:00 2001 From: yanqingmen Date: Tue, 7 Jul 2015 19:22:51 +0800 Subject: [PATCH 49/59] add travis-ci script for java wrapper --- .travis.yml | 1 + scripts/travis_java_script.sh | 6 ++++++ scripts/travis_script.sh | 5 +++++ 3 files changed, 12 insertions(+) create mode 100755 scripts/travis_java_script.sh diff --git a/.travis.yml b/.travis.yml index 102c87353..ac4f58154 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,7 @@ env: - TASK=lint LINT_LANG=python - TASK=R-package CXX=g++ - TASK=python-package CXX=g++ + - TASK=java-package CXX=g++ - TASK=build CXX=g++ - TASK=build-with-dmlc CXX=g++ diff --git a/scripts/travis_java_script.sh b/scripts/travis_java_script.sh new file mode 100755 index 000000000..c87dc2b46 --- /dev/null +++ b/scripts/travis_java_script.sh @@ -0,0 +1,6 @@ +# Test java package of xgboost +cd java +./create_wrap.sh +cd xgboost4j +mvn clean install -DskipTests=true +mvn test diff --git a/scripts/travis_script.sh b/scripts/travis_script.sh index b90f8d3ee..5702d35cd 100755 --- a/scripts/travis_script.sh +++ b/scripts/travis_script.sh @@ -26,3 +26,8 @@ if [ ${TASK} == "python-package" ]; then make all CXX=${CXX} || exit -1 nosetests tests/python || exit -1 fi + +if [ ${TASK} == "java-package" ]; then + make java CXX=${CXX} || exit -1 + scripts/travis_java_script.sh || exit -1 +fi From c489ce62b243b357e0fc58366b55d273925e2993 Mon Sep 17 00:00:00 2001 From: Ajinkya Kale Date: Tue, 7 Jul 2015 16:36:45 -0700 Subject: [PATCH 50/59] refs and formatting changes --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 226526891..d7cf31a08 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ XGBoost: eXtreme Gradient Boosting [![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost) An optimized general purpose gradient boosting library. The library is parallelized, and also provides an optimized distributed version. -It implements machine learning algorithm under gradient boosting framework, including generalized linear model and gradient boosted regression tree (GBDT). XGBoost can also be distributed and scale to Terascale data +It implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework, including [Generalized Linear Model](https://en.wikipedia.org/wiki/Generalized_linear_model) (GLM) and [Gradient Boosted Decision Trees](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) (GBDT). XGBoost can also be [distributed](#features) and scale to Terascale data Contributors: https://github.com/dmlc/xgboost/graphs/contributors From 969ea57159a1d8fc7027dfa6c0f3af81a0fb1e19 Mon Sep 17 00:00:00 2001 From: yanqingmen Date: Tue, 7 Jul 2015 17:28:45 -0700 Subject: [PATCH 51/59] Update travis_java_script.sh add "set -e" --- scripts/travis_java_script.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/travis_java_script.sh b/scripts/travis_java_script.sh index c87dc2b46..e0583e1fb 100755 --- a/scripts/travis_java_script.sh +++ b/scripts/travis_java_script.sh @@ -1,4 +1,5 @@ # Test java package of xgboost +set -e cd java ./create_wrap.sh cd xgboost4j From 57e4f4d426390c192d2904f0d2585e05612d9a9b Mon Sep 17 00:00:00 2001 From: Ajinkya Kale Date: Tue, 7 Jul 2015 17:36:18 -0700 Subject: [PATCH 52/59] need to load vcd if it was freshly installed --- R-package/demo/create_sparse_matrix.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R index e3a536cfe..11de17a91 100644 --- a/R-package/demo/create_sparse_matrix.R +++ b/R-package/demo/create_sparse_matrix.R @@ -1,8 +1,10 @@ require(xgboost) require(Matrix) require(data.table) -if (!require(vcd)) install.packages('vcd') #Available in Cran. Used for its dataset with categorical values. - +if (!require(vcd)) { + install.packages('vcd') #Available in Cran. Used for its dataset with categorical values. + require(vcd) +} # According to its documentation, Xgboost works only on numbers. # Sometimes the dataset we have to work on have categorical data. # A categorical variable is one which have a fixed number of values. By exemple, if for each observation a variable called "Colour" can have only "red", "blue" or "green" as value, it is a categorical variable. From dabb36c0066517082a0b4c6ecec1806f03789b10 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Fri, 10 Jul 2015 20:41:00 -0700 Subject: [PATCH 53/59] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d7cf31a08..e69ef1930 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -XGBoost: eXtreme Gradient Boosting +DMLC/XGBoost: eXtreme Gradient Boosting ================================== [![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost) @@ -8,7 +8,7 @@ It implements machine learning algorithms under the [Gradient Boosting](https:// Contributors: https://github.com/dmlc/xgboost/graphs/contributors -Documentations: [Documentation of xgboost](doc/README.md) +Documentations: [Documentation of dmlc/xgboost](doc/README.md) Issues Tracker: [https://github.com/dmlc/xgboost/issues](https://github.com/dmlc/xgboost/issues?q=is%3Aissue+label%3Aquestion) From e402d20876b8c19ee75990508b7c784f94b4af8e Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Fri, 10 Jul 2015 20:41:20 -0700 Subject: [PATCH 54/59] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e69ef1930..58283ed14 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -DMLC/XGBoost: eXtreme Gradient Boosting +DMLC/XGBoost ================================== [![Build Status](https://travis-ci.org/dmlc/xgboost.svg?branch=master)](https://travis-ci.org/dmlc/xgboost) From 35638f614663a0b47e1abae0bc65b636f39b139c Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sun, 12 Jul 2015 10:27:58 -0700 Subject: [PATCH 55/59] Update README.md --- doc/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/README.md b/doc/README.md index 371e18f21..e8df7d57d 100644 --- a/doc/README.md +++ b/doc/README.md @@ -20,7 +20,8 @@ How to get started Highlight Links ==== This section is about blogposts, presentation and videos discussing how to use xgboost to solve your interesting problem. If you think something belongs to here, send a pull request. -* [Kaggle Malware Prediction winning solution](https://github.com/xiaozhouwang/kaggle_Microsoft_Malware) +* [Kaggle CrowdFlower winner's solution by Chenglong Chen](https://github.com/ChenglongChen/Kaggle_CrowdFlower) +* [Kaggle Malware Prediction winner's solution](https://github.com/xiaozhouwang/kaggle_Microsoft_Malware) * [Kaggle Tradeshift winning solution by daxiongshu](https://github.com/daxiongshu/kaggle-tradeshift-winning-solution) * [Feature Importance Analysis with XGBoost in Tax audit](http://fr.slideshare.net/MichaelBENESTY/feature-importance-analysis-with-xgboost-in-tax-audit) * Video tutorial: [Better Optimization with Repeated Cross Validation and the XGBoost model](https://www.youtube.com/watch?v=Og7CGAfSr_Y) From 44f839b896c883f9db574d54581da1f0565a9c37 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sun, 12 Jul 2015 10:31:55 -0700 Subject: [PATCH 56/59] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 58283ed14..e6f5d69d1 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,10 @@ XGBoost is part of [Distributed Machine Learning Common](http://dmlc.github.io/) What's New ========== +* XGBoost helps Chenglong Chen to win [Kaggle CrowdFlower Competition](https://www.kaggle.com/c/crowdflower-search-relevance) + - Checkout the winning solution at [Highlight links](doc/README.md#highlight-links) * XGBoost-0.4 release, see [CHANGES.md](CHANGES.md#xgboost-04) -* XGBoost wins [WWW2015 Microsoft Malware Classification Challenge (BIG 2015)](http://www.kaggle.com/c/malware-classification/forums/t/13490/say-no-to-overfitting-approaches-sharing) +* XGBoost helps three champion teams to win [WWW2015 Microsoft Malware Classification Challenge (BIG 2015)](http://www.kaggle.com/c/malware-classification/forums/t/13490/say-no-to-overfitting-approaches-sharing) - Checkout the winning solution at [Highlight links](doc/README.md#highlight-links) * [External Memory Version](doc/external_memory.md) From 4a746be43a30dd5dc0a72151b340421e740c8aa4 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sun, 12 Jul 2015 10:36:16 -0700 Subject: [PATCH 57/59] Update build.md --- doc/build.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/build.md b/doc/build.md index f9a626603..7b8ee96aa 100644 --- a/doc/build.md +++ b/doc/build.md @@ -43,7 +43,7 @@ Here is the complete solution to use OpenMp-enabled compilers to install XGBoost export CXX = clang-omp++ ``` - Remember to change `header` if using clang-omp. + Remember to change `header` (mentioned in step 2) if using clang-omp. Then `cd xgboost` then `bash build.sh` to compile XGBoost. And go to `wrapper` sub-folder to install python version. From b7f355fdd2e3867e634eae8724b178a9c57ca8ab Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sun, 12 Jul 2015 11:00:52 -0700 Subject: [PATCH 58/59] Update travis_after_failure.sh --- scripts/travis_after_failure.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/travis_after_failure.sh b/scripts/travis_after_failure.sh index 230f3348c..15b74d87f 100755 --- a/scripts/travis_after_failure.sh +++ b/scripts/travis_after_failure.sh @@ -1,5 +1,5 @@ #!/bin/bash if [ ${TASK} == "R-package" ]; then - cat R-package/xgboost.Rcheck/00install.out -fi \ No newline at end of file + cat R-package/xgboost.Rcheck/*.log +fi From be95c80aa29233f7089f54befaa8727607c7d9b4 Mon Sep 17 00:00:00 2001 From: Joosep Date: Tue, 14 Jul 2015 11:38:38 +0200 Subject: [PATCH 59/59] fix wrapper dict --- wrapper/xgboost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index 96f6c2573..7a601424c 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -777,7 +777,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, bst = Booster(params, [dtrain] + [d[0] for d in evals]) if evals_result is not None: - if isinstance(evals_result, dict): + if not isinstance(evals_result, dict): raise TypeError('evals_result has to be a dictionary') else: evals_name = [d[1] for d in evals]