[jvm-packages] Repair spark model eval (#2841)

In the refactor to add base margins, #2532, all of the labels were lost
when creating the dmatrix. This became obvious as metrics like ndcg
always returned 1.0 regardless of the results.

Change-Id: I88be047e1c108afba4784bd3d892bfc9edeabe55
This commit is contained in:
ebernhardson 2017-11-04 15:28:47 -07:00 committed by Sergei Lebedev
parent a8f670d247
commit 78d0bd6c9d
2 changed files with 5 additions and 3 deletions

View File

@ -116,7 +116,7 @@ abstract class XGBoostModel(protected var _booster: Booster)
null
}
}
val dMatrix = new DMatrix(labeledPointsPartition.map(_.features.asXGB), cacheFileName)
val dMatrix = new DMatrix(labeledPointsPartition.map(_.asXGB), cacheFileName)
try {
if (groupData != null) {
dMatrix.setGroup(groupData(TaskContext.getPartitionId()).toArray)

View File

@ -346,16 +346,18 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
val trainGroupData: Seq[Seq[Int]] = Seq(Ranking.trainGroup0)
val testRDD = sc.parallelize(Ranking.test, numSlices = 1).map(_.features)
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap = Map("eta" -> "1", "max_depth" -> "2", "silent" -> "1",
"objective" -> "rank:pairwise", "eval_metric" -> "ndcg", "groupData" -> trainGroupData)
val xgBoostModel = XGBoost.trainWithRDD(trainingRDD, paramMap, 5, nWorkers = 1)
val xgBoostModel = XGBoost.trainWithRDD(trainingRDD, paramMap, 2, nWorkers = 1)
val predRDD = xgBoostModel.predict(testRDD)
val predResult1: Array[Array[Float]] = predRDD.collect()
assert(testRDD.count() === predResult1.length)
val avgMetric = xgBoostModel.eval(trainingRDD, "test", iter = 0, groupData = trainGroupData)
assert(avgMetric contains "ndcg")
// If the labels were lost ndcg comes back as 1.0
assert(avgMetric.split('=')(1).toFloat < 1F)
}
test("test use nested groupData") {