[jvm-packages] Implemented early stopping (#2710)

* Allowed subsampling test from the training data frame/RDD

The implementation requires storing 1 - trainTestRatio points in memory
to make the sampling work.

An alternative approach would be to construct the full DMatrix and then
slice it deterministically into train/test. The peak memory consumption
of such scenario, however, is twice the dataset size.

* Removed duplication from 'XGBoost.train'

Scala callers can (and should) use names to supply a subset of
parameters. Method overloading is not required.

* Reuse XGBoost seed parameter to stabilize train/test splitting

* Added early stopping support to non-distributed XGBoost

Closes #1544

* Added early-stopping to distributed XGBoost

* Moved construction of 'watches' into a separate method

This commit also fixes the handling of 'baseMargin' which previously
was not added to the validation matrix.

* Addressed review comments
This commit is contained in:
Sergei Lebedev
2017-09-29 21:06:22 +02:00
committed by Nan Zhu
parent 74db9757b3
commit 69c3b78a29
15 changed files with 191 additions and 91 deletions

View File

@@ -85,7 +85,7 @@ object BasicWalkThrough {
val watches2 = new mutable.HashMap[String, DMatrix]
watches2 += "train" -> trainMax2
watches2 += "test" -> testMax2
val booster3 = XGBoost.train(trainMax2, params.toMap, round, watches2.toMap, null, null)
val booster3 = XGBoost.train(trainMax2, params.toMap, round, watches2.toMap)
val predicts3 = booster3.predict(testMax2)
println(checkPredicts(predicts, predicts3))
}

View File

@@ -41,6 +41,6 @@ object CrossValidation {
val metrics: Array[String] = null
val evalHist: Array[String] =
XGBoost.crossValidation(trainMat, params.toMap, round, nfold, metrics, null, null)
XGBoost.crossValidation(trainMat, params.toMap, round, nfold, metrics)
}
}

View File

@@ -151,7 +151,8 @@ object CustomObjective {
val round = 2
// train a model
val booster = XGBoost.train(trainMat, params.toMap, round, watches.toMap)
XGBoost.train(trainMat, params.toMap, round, watches.toMap, new LogRegObj, new EvalError)
XGBoost.train(trainMat, params.toMap, round, watches.toMap,
obj = new LogRegObj, eval = new EvalError)
}
}

View File

@@ -54,6 +54,6 @@ object ExternalMemory {
testMat.setBaseMargin(testPred)
System.out.println("result of running from initial prediction")
val booster2 = XGBoost.train(trainMat, params.toMap, 1, watches.toMap, null, null)
val booster2 = XGBoost.train(trainMat, params.toMap, 1, watches.toMap)
}
}

View File

@@ -52,7 +52,7 @@ object GeneralizedLinearModel {
watches += "test" -> testMat
val round = 4
val booster = XGBoost.train(trainMat, params.toMap, 1, watches.toMap, null, null)
val booster = XGBoost.train(trainMat, params.toMap, 1, watches.toMap)
val predicts = booster.predict(testMat)
val eval = new CustomEval
println(s"error=${eval.eval(predicts, testMat)}")