[jvm-packages]add feature size for LabelPoint and DataBatch (#5303)

* fix type error

* Validate number of features.

* resolve comments

* add feature size for LabelPoint and DataBatch

* pass the feature size to native

* move feature size validating tests into a separate suite

* resolve comments

Co-authored-by: fis <jm.yuan@outlook.com>
This commit is contained in:
Bobby Wang
2020-04-08 07:49:52 +08:00
committed by GitHub
parent 8bc595ea1e
commit ad826e913f
17 changed files with 193 additions and 75 deletions

View File

@@ -20,6 +20,7 @@ package ml.dmlc.xgboost4j
* Labeled training data point.
*
* @param label Label of this point.
* @param size Feature dimensionality
* @param indices Feature indices of this point or `null` if the data is dense.
* @param values Feature values of this point.
* @param weight Weight of this point.
@@ -28,6 +29,7 @@ package ml.dmlc.xgboost4j
*/
case class LabeledPoint(
label: Float,
size: Int,
indices: Array[Int],
values: Array[Float],
weight: Float = 1f,
@@ -36,8 +38,11 @@ case class LabeledPoint(
require(indices == null || indices.length == values.length,
"indices and values must have the same number of elements")
def this(label: Float, indices: Array[Int], values: Array[Float]) = {
require(indices == null || size >= indices.length,
"feature dimensionality must be greater equal than size of indices")
def this(label: Float, size: Int, indices: Array[Int], values: Array[Float]) = {
// [[weight]] default duplicated to disambiguate the constructor call.
this(label, indices, values, 1.0f)
this(label, size, indices, values, 1.0f)
}
}