[jvm-packages] Scala/Java interface for Fast Histogram Algorithm (#1966)
* add back train method but mark as deprecated * fix scalastyle error * first commit in scala binding for fast histo * java test * add missed scala tests * spark training * add back train method but mark as deprecated * fix scalastyle error * local change * first commit in scala binding for fast histo * local change * fix df frame test
This commit is contained in:
@@ -26,7 +26,6 @@ import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import ml.dmlc.xgboost4j.java.*;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.junit.Test;
|
||||
@@ -151,6 +150,130 @@ public class BoosterImplTest {
|
||||
TestCase.assertTrue("loadedPredictErr:" + loadedPredictError, loadedPredictError < 0.1f);
|
||||
}
|
||||
|
||||
private void testWithFastHisto(DMatrix trainingSet, Map<String, DMatrix> watches, int round,
|
||||
Map<String, Object> paramMap, float threshold) throws XGBoostError {
|
||||
float[][] metrics = new float[watches.size()][round];
|
||||
Booster booster = XGBoost.train(trainingSet, paramMap, round, watches,
|
||||
metrics, null, null);
|
||||
for (int i = 0; i < metrics.length; i++)
|
||||
for (int j = 1; j < metrics[i].length; j++) {
|
||||
TestCase.assertTrue(metrics[i][j] >= metrics[i][j - 1]);
|
||||
}
|
||||
for (int i = 0; i < metrics.length; i++)
|
||||
for (int j = 0; j < metrics[i].length; j++) {
|
||||
TestCase.assertTrue(metrics[i][j] >= threshold);
|
||||
}
|
||||
booster.dispose();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFastHistoDepthWise() throws XGBoostError {
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
// testBoosterWithFastHistogram(trainMat, testMat);
|
||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||
{
|
||||
put("max_depth", 3);
|
||||
put("silent", 1);
|
||||
put("objective", "binary:logistic");
|
||||
put("tree_method", "hist");
|
||||
put("grow_policy", "depthwise");
|
||||
put("eval_metric", "auc");
|
||||
}
|
||||
};
|
||||
Map<String, DMatrix> watches = new HashMap<>();
|
||||
watches.put("training", trainMat);
|
||||
watches.put("test", testMat);
|
||||
testWithFastHisto(trainMat, watches, 10, paramMap, 0.0f);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFastHistoLossGuide() throws XGBoostError {
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
// testBoosterWithFastHistogram(trainMat, testMat);
|
||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||
{
|
||||
put("max_depth", 0);
|
||||
put("silent", 1);
|
||||
put("objective", "binary:logistic");
|
||||
put("tree_method", "hist");
|
||||
put("grow_policy", "lossguide");
|
||||
put("max_leaves", 8);
|
||||
put("eval_metric", "auc");
|
||||
}
|
||||
};
|
||||
Map<String, DMatrix> watches = new HashMap<>();
|
||||
watches.put("training", trainMat);
|
||||
watches.put("test", testMat);
|
||||
testWithFastHisto(trainMat, watches, 10, paramMap, 0.0f);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFastHistoLossGuideMaxBin() throws XGBoostError {
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
// testBoosterWithFastHistogram(trainMat, testMat);
|
||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||
{
|
||||
put("max_depth", 0);
|
||||
put("silent", 1);
|
||||
put("objective", "binary:logistic");
|
||||
put("tree_method", "hist");
|
||||
put("grow_policy", "lossguide");
|
||||
put("max_leaves", 8);
|
||||
put("max_bins", 16);
|
||||
put("eval_metric", "auc");
|
||||
}
|
||||
};
|
||||
Map<String, DMatrix> watches = new HashMap<>();
|
||||
watches.put("training", trainMat);
|
||||
testWithFastHisto(trainMat, watches, 10, paramMap, 0.0f);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFastHistoDepthwiseMaxDepth() throws XGBoostError {
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
// testBoosterWithFastHistogram(trainMat, testMat);
|
||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||
{
|
||||
put("max_depth", 3);
|
||||
put("silent", 1);
|
||||
put("objective", "binary:logistic");
|
||||
put("tree_method", "hist");
|
||||
put("max_depth", 2);
|
||||
put("grow_policy", "depthwise");
|
||||
put("eval_metric", "auc");
|
||||
}
|
||||
};
|
||||
Map<String, DMatrix> watches = new HashMap<>();
|
||||
watches.put("training", trainMat);
|
||||
testWithFastHisto(trainMat, watches, 10, paramMap, 0.85f);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFastHistoDepthwiseMaxDepthMaxBin() throws XGBoostError {
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
// testBoosterWithFastHistogram(trainMat, testMat);
|
||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||
{
|
||||
put("max_depth", 3);
|
||||
put("silent", 1);
|
||||
put("objective", "binary:logistic");
|
||||
put("tree_method", "hist");
|
||||
put("max_depth", 2);
|
||||
put("max_bin", 2);
|
||||
put("grow_policy", "depthwise");
|
||||
put("eval_metric", "auc");
|
||||
}
|
||||
};
|
||||
Map<String, DMatrix> watches = new HashMap<>();
|
||||
watches.put("training", trainMat);
|
||||
testWithFastHisto(trainMat, watches, 10, paramMap, 0.85f);
|
||||
}
|
||||
|
||||
/**
|
||||
* test cross valiation
|
||||
*
|
||||
|
||||
@@ -77,6 +77,23 @@ class ScalaBoosterImplSuite extends FunSuite {
|
||||
XGBoost.train(trainMat, paramMap, round, watches, null, null)
|
||||
}
|
||||
|
||||
private def trainBoosterWithFastHisto(
|
||||
trainMat: DMatrix,
|
||||
watches: Map[String, DMatrix],
|
||||
round: Int,
|
||||
paramMap: Map[String, String],
|
||||
threshold: Float): Booster = {
|
||||
val metrics = Array.fill(watches.size, round)(0.0f)
|
||||
val booster = XGBoost.train(trainMat, paramMap, round, watches, metrics, null, null)
|
||||
for (i <- 0 until watches.size; j <- 1 until metrics(i).length) {
|
||||
assert(metrics(i)(j) >= metrics(i)(j - 1))
|
||||
}
|
||||
for (metricsArray <- metrics; m <- metricsArray) {
|
||||
assert(m >= threshold)
|
||||
}
|
||||
booster
|
||||
}
|
||||
|
||||
test("basic operation of booster") {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
@@ -128,4 +145,57 @@ class ScalaBoosterImplSuite extends FunSuite {
|
||||
val nfold = 5
|
||||
XGBoost.crossValidation(trainMat, params, round, nfold, null, null, null)
|
||||
}
|
||||
|
||||
test("test with fast histo depthwise") {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val paramMap = List("max_depth" -> "3", "silent" -> "0",
|
||||
"objective" -> "binary:logistic", "tree_method" -> "hist",
|
||||
"grow_policy" -> "depthwise", "eval_metric" -> "auc").toMap
|
||||
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat, "test" -> testMat),
|
||||
round = 10, paramMap, 0.0f)
|
||||
}
|
||||
|
||||
test("test with fast histo lossguide") {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val paramMap = List("max_depth" -> "0", "silent" -> "0",
|
||||
"objective" -> "binary:logistic", "tree_method" -> "hist",
|
||||
"grow_policy" -> "lossguide", "max_leaves" -> "8", "eval_metric" -> "auc").toMap
|
||||
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat, "test" -> testMat),
|
||||
round = 10, paramMap, 0.0f)
|
||||
}
|
||||
|
||||
test("test with fast histo lossguide with max bin") {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val paramMap = List("max_depth" -> "0", "silent" -> "0",
|
||||
"objective" -> "binary:logistic", "tree_method" -> "hist",
|
||||
"grow_policy" -> "lossguide", "max_leaves" -> "8", "max_bin" -> "16",
|
||||
"eval_metric" -> "auc").toMap
|
||||
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat),
|
||||
round = 10, paramMap, 0.0f)
|
||||
}
|
||||
|
||||
test("test with fast histo depthwidth with max depth") {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val paramMap = List("max_depth" -> "0", "silent" -> "0",
|
||||
"objective" -> "binary:logistic", "tree_method" -> "hist",
|
||||
"grow_policy" -> "depthwise", "max_leaves" -> "8", "max_depth" -> "2",
|
||||
"eval_metric" -> "auc").toMap
|
||||
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat),
|
||||
round = 10, paramMap, 0.85f)
|
||||
}
|
||||
|
||||
test("test with fast histo depthwidth with max depth and max bin") {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val paramMap = List("max_depth" -> "0", "silent" -> "0",
|
||||
"objective" -> "binary:logistic", "tree_method" -> "hist",
|
||||
"grow_policy" -> "depthwise", "max_depth" -> "2", "max_bin" -> "2",
|
||||
"eval_metric" -> "auc").toMap
|
||||
trainBoosterWithFastHisto(trainMat, Map("training" -> trainMat),
|
||||
round = 10, paramMap, 0.85f)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user