[jvm-packages]add feature size for LabelPoint and DataBatch (#5303)

* fix type error

* Validate number of features.

* resolve comments

* add feature size for LabelPoint and DataBatch

* pass the feature size to native

* move feature size validating tests into a separate suite

* resolve comments

Co-authored-by: fis <jm.yuan@outlook.com>
This commit is contained in:
Bobby Wang
2020-04-08 07:49:52 +08:00
committed by GitHub
parent 8bc595ea1e
commit ad826e913f
17 changed files with 193 additions and 75 deletions

View File

@@ -45,7 +45,7 @@ public class DMatrixTest {
java.util.List<LabeledPoint> blist = new java.util.LinkedList<LabeledPoint>();
for (int i = 0; i < nrep; ++i) {
LabeledPoint p = new LabeledPoint(
0.1f + i, new int[]{0, 2, 3}, new float[]{3, 4, 5});
0.1f + i, 4, new int[]{0, 2, 3}, new float[]{3, 4, 5});
blist.add(p);
labelall.add(p.label());
}
@@ -57,6 +57,33 @@ public class DMatrixTest {
}
}
@Test
public void testCreateFromDataIteratorWithDiffFeatureSize() throws XGBoostError {
//create DMatrix from DataIterator
java.util.ArrayList<Float> labelall = new java.util.ArrayList<Float>();
int nrep = 3000;
java.util.List<LabeledPoint> blist = new java.util.LinkedList<LabeledPoint>();
int featureSize = 4;
for (int i = 0; i < nrep; ++i) {
// set some rows with wrong feature size
if (i % 10 == 1) {
featureSize = 5;
}
LabeledPoint p = new LabeledPoint(
0.1f + i, featureSize, new int[]{0, 2, 3}, new float[]{3, 4, 5});
blist.add(p);
labelall.add(p.label());
}
boolean success = true;
try {
DMatrix dmat = new DMatrix(blist.iterator(), null);
} catch (XGBoostError e) {
success = false;
}
TestCase.assertTrue(success == false);
}
@Test
public void testCreateFromFile() throws XGBoostError {
//create DMatrix from file