[jvm-packages] enable predictLeaf/predictContrib/treeLimit in 0.8 (#3532)

* add back train method but mark as deprecated

* add back train method but mark as deprecated

* fix scalastyle error

* fix scalastyle error

* partial finish

* no test

* add test cases

* add test cases

* address comments

* add test for regressor

* fix typo
This commit is contained in:
Nan Zhu
2018-08-07 14:01:18 -07:00
committed by GitHub
parent 246ec92163
commit 1c08b3b2ea
7 changed files with 349 additions and 32 deletions

View File

@@ -211,4 +211,77 @@ class XGBoostClassifierSuite extends FunSuite with PerTest {
assert(testObjectiveHistory.length === 5)
assert(model.summary.trainObjectiveHistory !== testObjectiveHistory)
}
test("test predictionLeaf") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "binary:logistic", "train_test_ratio" -> "0.5",
"num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Classification.train)
val test = buildDataFrame(Classification.test)
val groundTruth = test.count()
val xgb = new XGBoostClassifier(paramMap)
val model = xgb.fit(training)
model.setLeafPredictionCol("predictLeaf")
val resultDF = model.transform(test)
assert(resultDF.count == groundTruth)
assert(resultDF.columns.contains("predictLeaf"))
}
test("test predictionLeaf with empty column name") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "binary:logistic", "train_test_ratio" -> "0.5",
"num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Classification.train)
val test = buildDataFrame(Classification.test)
val xgb = new XGBoostClassifier(paramMap)
val model = xgb.fit(training)
model.setLeafPredictionCol("")
val resultDF = model.transform(test)
assert(!resultDF.columns.contains("predictLeaf"))
}
test("test predictionContrib") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "binary:logistic", "train_test_ratio" -> "0.5",
"num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Classification.train)
val test = buildDataFrame(Classification.test)
val groundTruth = test.count()
val xgb = new XGBoostClassifier(paramMap)
val model = xgb.fit(training)
model.setContribPredictionCol("predictContrib")
val resultDF = model.transform(buildDataFrame(Classification.test))
assert(resultDF.count == groundTruth)
assert(resultDF.columns.contains("predictContrib"))
}
test("test predictionContrib with empty column name") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "binary:logistic", "train_test_ratio" -> "0.5",
"num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Classification.train)
val test = buildDataFrame(Classification.test)
val xgb = new XGBoostClassifier(paramMap)
val model = xgb.fit(training)
model.setContribPredictionCol("")
val resultDF = model.transform(test)
assert(!resultDF.columns.contains("predictContrib"))
}
test("test predictionLeaf and predictionContrib") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "binary:logistic", "train_test_ratio" -> "0.5",
"num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Classification.train)
val test = buildDataFrame(Classification.test)
val groundTruth = test.count()
val xgb = new XGBoostClassifier(paramMap)
val model = xgb.fit(training)
model.setLeafPredictionCol("predictLeaf")
model.setContribPredictionCol("predictContrib")
val resultDF = model.transform(buildDataFrame(Classification.test))
assert(resultDF.count == groundTruth)
assert(resultDF.columns.contains("predictLeaf"))
assert(resultDF.columns.contains("predictContrib"))
}
}

View File

@@ -120,4 +120,72 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
val first = prediction.head.getAs[Double]("prediction")
prediction.foreach(x => assert(math.abs(x.getAs[Double]("prediction") - first) <= 0.01f))
}
test("test predictionLeaf") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test)
val groundTruth = testDF.count()
val xgb = new XGBoostRegressor(paramMap)
val model = xgb.fit(training)
model.setLeafPredictionCol("predictLeaf")
val resultDF = model.transform(testDF)
assert(resultDF.count === groundTruth)
assert(resultDF.columns.contains("predictLeaf"))
}
test("test predictionLeaf with empty column name") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test)
val xgb = new XGBoostRegressor(paramMap)
val model = xgb.fit(training)
model.setLeafPredictionCol("")
val resultDF = model.transform(testDF)
assert(!resultDF.columns.contains("predictLeaf"))
}
test("test predictionContrib") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test)
val groundTruth = testDF.count()
val xgb = new XGBoostRegressor(paramMap)
val model = xgb.fit(training)
model.setContribPredictionCol("predictContrib")
val resultDF = model.transform(testDF)
assert(resultDF.count === groundTruth)
assert(resultDF.columns.contains("predictContrib"))
}
test("test predictionContrib with empty column name") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test)
val xgb = new XGBoostRegressor(paramMap)
val model = xgb.fit(training)
model.setContribPredictionCol("")
val resultDF = model.transform(testDF)
assert(!resultDF.columns.contains("predictContrib"))
}
test("test predictionLeaf and predictionContrib") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test)
val groundTruth = testDF.count()
val xgb = new XGBoostRegressor(paramMap)
val model = xgb.fit(training)
model.setLeafPredictionCol("predictLeaf")
model.setContribPredictionCol("predictContrib")
val resultDF = model.transform(testDF)
assert(resultDF.count === groundTruth)
assert(resultDF.columns.contains("predictLeaf"))
assert(resultDF.columns.contains("predictContrib"))
}
}