[jvm-packages] Repair spark model eval (#2841)
In the refactor to add base margins, #2532, all of the labels were lost when creating the dmatrix. This became obvious as metrics like ndcg always returned 1.0 regardless of the results. Change-Id: I88be047e1c108afba4784bd3d892bfc9edeabe55
This commit is contained in:
parent
a8f670d247
commit
78d0bd6c9d
@ -116,7 +116,7 @@ abstract class XGBoostModel(protected var _booster: Booster)
|
||||
null
|
||||
}
|
||||
}
|
||||
val dMatrix = new DMatrix(labeledPointsPartition.map(_.features.asXGB), cacheFileName)
|
||||
val dMatrix = new DMatrix(labeledPointsPartition.map(_.asXGB), cacheFileName)
|
||||
try {
|
||||
if (groupData != null) {
|
||||
dMatrix.setGroup(groupData(TaskContext.getPartitionId()).toArray)
|
||||
|
||||
@ -346,16 +346,18 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
|
||||
val trainGroupData: Seq[Seq[Int]] = Seq(Ranking.trainGroup0)
|
||||
val testRDD = sc.parallelize(Ranking.test, numSlices = 1).map(_.features)
|
||||
|
||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||
val paramMap = Map("eta" -> "1", "max_depth" -> "2", "silent" -> "1",
|
||||
"objective" -> "rank:pairwise", "eval_metric" -> "ndcg", "groupData" -> trainGroupData)
|
||||
|
||||
val xgBoostModel = XGBoost.trainWithRDD(trainingRDD, paramMap, 5, nWorkers = 1)
|
||||
val xgBoostModel = XGBoost.trainWithRDD(trainingRDD, paramMap, 2, nWorkers = 1)
|
||||
val predRDD = xgBoostModel.predict(testRDD)
|
||||
val predResult1: Array[Array[Float]] = predRDD.collect()
|
||||
assert(testRDD.count() === predResult1.length)
|
||||
|
||||
val avgMetric = xgBoostModel.eval(trainingRDD, "test", iter = 0, groupData = trainGroupData)
|
||||
assert(avgMetric contains "ndcg")
|
||||
// If the labels were lost ndcg comes back as 1.0
|
||||
assert(avgMetric.split('=')(1).toFloat < 1F)
|
||||
}
|
||||
|
||||
test("test use nested groupData") {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user