[jvm-packages] Implemented early stopping (#2710)
* Allowed subsampling test from the training data frame/RDD The implementation requires storing 1 - trainTestRatio points in memory to make the sampling work. An alternative approach would be to construct the full DMatrix and then slice it deterministically into train/test. The peak memory consumption of such scenario, however, is twice the dataset size. * Removed duplication from 'XGBoost.train' Scala callers can (and should) use names to supply a subset of parameters. Method overloading is not required. * Reuse XGBoost seed parameter to stabilize train/test splitting * Added early stopping support to non-distributed XGBoost Closes #1544 * Added early-stopping to distributed XGBoost * Moved construction of 'watches' into a separate method This commit also fixes the handling of 'baseMargin' which previously was not added to the validation matrix. * Addressed review comments
This commit is contained in:
@@ -85,7 +85,7 @@ object BasicWalkThrough {
|
||||
val watches2 = new mutable.HashMap[String, DMatrix]
|
||||
watches2 += "train" -> trainMax2
|
||||
watches2 += "test" -> testMax2
|
||||
val booster3 = XGBoost.train(trainMax2, params.toMap, round, watches2.toMap, null, null)
|
||||
val booster3 = XGBoost.train(trainMax2, params.toMap, round, watches2.toMap)
|
||||
val predicts3 = booster3.predict(testMax2)
|
||||
println(checkPredicts(predicts, predicts3))
|
||||
}
|
||||
|
||||
@@ -41,6 +41,6 @@ object CrossValidation {
|
||||
val metrics: Array[String] = null
|
||||
|
||||
val evalHist: Array[String] =
|
||||
XGBoost.crossValidation(trainMat, params.toMap, round, nfold, metrics, null, null)
|
||||
XGBoost.crossValidation(trainMat, params.toMap, round, nfold, metrics)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,7 +151,8 @@ object CustomObjective {
|
||||
val round = 2
|
||||
// train a model
|
||||
val booster = XGBoost.train(trainMat, params.toMap, round, watches.toMap)
|
||||
XGBoost.train(trainMat, params.toMap, round, watches.toMap, new LogRegObj, new EvalError)
|
||||
XGBoost.train(trainMat, params.toMap, round, watches.toMap,
|
||||
obj = new LogRegObj, eval = new EvalError)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -54,6 +54,6 @@ object ExternalMemory {
|
||||
testMat.setBaseMargin(testPred)
|
||||
|
||||
System.out.println("result of running from initial prediction")
|
||||
val booster2 = XGBoost.train(trainMat, params.toMap, 1, watches.toMap, null, null)
|
||||
val booster2 = XGBoost.train(trainMat, params.toMap, 1, watches.toMap)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,7 +52,7 @@ object GeneralizedLinearModel {
|
||||
watches += "test" -> testMat
|
||||
|
||||
val round = 4
|
||||
val booster = XGBoost.train(trainMat, params.toMap, 1, watches.toMap, null, null)
|
||||
val booster = XGBoost.train(trainMat, params.toMap, 1, watches.toMap)
|
||||
val predicts = booster.predict(testMat)
|
||||
val eval = new CustomEval
|
||||
println(s"error=${eval.eval(predicts, testMat)}")
|
||||
|
||||
Reference in New Issue
Block a user