[jvm-packages] Scala/Java interface for Fast Histogram Algorithm (#1966)

* add back train method but mark as deprecated

* fix scalastyle error

* first commit in scala binding for fast histo

* java test

* add missed scala tests

* spark training

* add back train method but mark as deprecated

* fix scalastyle error

* local change

* first commit in scala binding for fast histo

* local change

* fix df frame test
This commit is contained in:
Nan Zhu
2017-03-04 15:37:24 -08:00
committed by GitHub
parent ac30a0aff5
commit ab13fd72bd
10 changed files with 400 additions and 37 deletions

View File

@@ -26,7 +26,6 @@ import java.util.HashMap;
import java.util.Map;
import junit.framework.TestCase;
import ml.dmlc.xgboost4j.java.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.Test;
@@ -151,6 +150,130 @@ public class BoosterImplTest {
TestCase.assertTrue("loadedPredictErr:" + loadedPredictError, loadedPredictError < 0.1f);
}
private void testWithFastHisto(DMatrix trainingSet, Map<String, DMatrix> watches, int round,
Map<String, Object> paramMap, float threshold) throws XGBoostError {
float[][] metrics = new float[watches.size()][round];
Booster booster = XGBoost.train(trainingSet, paramMap, round, watches,
metrics, null, null);
for (int i = 0; i < metrics.length; i++)
for (int j = 1; j < metrics[i].length; j++) {
TestCase.assertTrue(metrics[i][j] >= metrics[i][j - 1]);
}
for (int i = 0; i < metrics.length; i++)
for (int j = 0; j < metrics[i].length; j++) {
TestCase.assertTrue(metrics[i][j] >= threshold);
}
booster.dispose();
}
@Test
public void testFastHistoDepthWise() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 3);
put("silent", 1);
put("objective", "binary:logistic");
put("tree_method", "hist");
put("grow_policy", "depthwise");
put("eval_metric", "auc");
}
};
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
watches.put("test", testMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.0f);
}
@Test
public void testFastHistoLossGuide() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 0);
put("silent", 1);
put("objective", "binary:logistic");
put("tree_method", "hist");
put("grow_policy", "lossguide");
put("max_leaves", 8);
put("eval_metric", "auc");
}
};
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
watches.put("test", testMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.0f);
}
@Test
public void testFastHistoLossGuideMaxBin() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 0);
put("silent", 1);
put("objective", "binary:logistic");
put("tree_method", "hist");
put("grow_policy", "lossguide");
put("max_leaves", 8);
put("max_bins", 16);
put("eval_metric", "auc");
}
};
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.0f);
}
@Test
public void testFastHistoDepthwiseMaxDepth() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 3);
put("silent", 1);
put("objective", "binary:logistic");
put("tree_method", "hist");
put("max_depth", 2);
put("grow_policy", "depthwise");
put("eval_metric", "auc");
}
};
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.85f);
}
@Test
public void testFastHistoDepthwiseMaxDepthMaxBin() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 3);
put("silent", 1);
put("objective", "binary:logistic");
put("tree_method", "hist");
put("max_depth", 2);
put("max_bin", 2);
put("grow_policy", "depthwise");
put("eval_metric", "auc");
}
};
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.85f);
}
/**
* test cross valiation
*

View File

@@ -77,6 +77,23 @@ class ScalaBoosterImplSuite extends FunSuite {
XGBoost.train(trainMat, paramMap, round, watches, null, null)
}
private def trainBoosterWithFastHisto(
trainMat: DMatrix,
watches: Map[String, DMatrix],
round: Int,
paramMap: Map[String, String],
threshold: Float): Booster = {
val metrics = Array.fill(watches.size, round)(0.0f)
val booster = XGBoost.train(trainMat, paramMap, round, watches, metrics, null, null)
for (i <- 0 until watches.size; j <- 1 until metrics(i).length) {
assert(metrics(i)(j) >= metrics(i)(j - 1))
}
for (metricsArray <- metrics; m <- metricsArray) {
assert(m >= threshold)
}
booster
}
test("basic operation of booster") {
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
@@ -128,4 +145,57 @@ class ScalaBoosterImplSuite extends FunSuite {
val nfold = 5
XGBoost.crossValidation(trainMat, params, round, nfold, null, null, null)
}
test("test with fast histo depthwise") {
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
val paramMap = List("max_depth" -> "3", "silent" -> "0",
"objective" -> "binary:logistic", "tree_method" -> "hist",
"grow_policy" -> "depthwise", "eval_metric" -> "auc").toMap
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat, "test" -> testMat),
round = 10, paramMap, 0.0f)
}
test("test with fast histo lossguide") {
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
val paramMap = List("max_depth" -> "0", "silent" -> "0",
"objective" -> "binary:logistic", "tree_method" -> "hist",
"grow_policy" -> "lossguide", "max_leaves" -> "8", "eval_metric" -> "auc").toMap
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat, "test" -> testMat),
round = 10, paramMap, 0.0f)
}
test("test with fast histo lossguide with max bin") {
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
val paramMap = List("max_depth" -> "0", "silent" -> "0",
"objective" -> "binary:logistic", "tree_method" -> "hist",
"grow_policy" -> "lossguide", "max_leaves" -> "8", "max_bin" -> "16",
"eval_metric" -> "auc").toMap
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat),
round = 10, paramMap, 0.0f)
}
test("test with fast histo depthwidth with max depth") {
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
val paramMap = List("max_depth" -> "0", "silent" -> "0",
"objective" -> "binary:logistic", "tree_method" -> "hist",
"grow_policy" -> "depthwise", "max_leaves" -> "8", "max_depth" -> "2",
"eval_metric" -> "auc").toMap
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat),
round = 10, paramMap, 0.85f)
}
test("test with fast histo depthwidth with max depth and max bin") {
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
val paramMap = List("max_depth" -> "0", "silent" -> "0",
"objective" -> "binary:logistic", "tree_method" -> "hist",
"grow_policy" -> "depthwise", "max_depth" -> "2", "max_bin" -> "2",
"eval_metric" -> "auc").toMap
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat),
round = 10, paramMap, 0.85f)
}
}