[jvm-packages] remove xgboost4j-gpu and rework cudf column (#10630)

This commit is contained in:
Bobby Wang
2024-07-25 15:31:16 +08:00
committed by GitHub
parent fcae6301ec
commit d5834b68c3
26 changed files with 509 additions and 632 deletions

View File

@@ -0,0 +1,118 @@
/*
Copyright (c) 2021-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ml.dmlc.xgboost4j.java;
import java.io.File;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import ai.rapids.cudf.*;
import junit.framework.TestCase;
import org.junit.Test;
/**
* Tests the BoosterTest trained by DMatrix
*
* @throws XGBoostError
*/
public class BoosterTest {
@Test
public void testBooster() throws XGBoostError {
String trainingDataPath = getClass().getClassLoader()
.getResource("veterans_lung_cancer.csv").getPath();
Schema schema = Schema.builder()
.column(DType.FLOAT32, "A")
.column(DType.FLOAT32, "B")
.column(DType.FLOAT32, "C")
.column(DType.FLOAT32, "D")
.column(DType.FLOAT32, "E")
.column(DType.FLOAT32, "F")
.column(DType.FLOAT32, "G")
.column(DType.FLOAT32, "H")
.column(DType.FLOAT32, "I")
.column(DType.FLOAT32, "J")
.column(DType.FLOAT32, "K")
.column(DType.FLOAT32, "L")
.column(DType.FLOAT32, "label")
.build();
CSVOptions opts = CSVOptions.builder()
.hasHeader().build();
int maxBin = 16;
int round = 10;
//set params
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 2);
put("objective", "binary:logistic");
put("num_round", round);
put("num_workers", 1);
put("tree_method", "hist");
put("device", "cuda");
put("max_bin", maxBin);
}
};
try (Table tmpTable = Table.readCSV(schema, opts, new File(trainingDataPath))) {
ColumnVector[] df = new ColumnVector[10];
// exclude the first two columns, they are label bounds and contain inf.
for (int i = 2; i < 12; ++i) {
df[i - 2] = tmpTable.getColumn(i);
}
try (Table X = new Table(df);) {
ColumnVector[] labels = new ColumnVector[1];
labels[0] = tmpTable.getColumn(12);
try (Table y = new Table(labels);) {
CudfColumnBatch batch = new CudfColumnBatch(X, y, null, null, null);
CudfColumn labelColumn = CudfColumn.from(tmpTable.getColumn(12));
//set watchList
HashMap<String, DMatrix> watches = new HashMap<>();
DMatrix dMatrix1 = new DMatrix(batch, Float.NaN, 1);
dMatrix1.setLabel(labelColumn);
watches.put("train", dMatrix1);
Booster model1 = XGBoost.train(dMatrix1, paramMap, round, watches, null, null);
List<ColumnBatch> tables = new LinkedList<>();
tables.add(batch);
DMatrix incrementalDMatrix = new QuantileDMatrix(tables.iterator(), Float.NaN, maxBin, 1);
//set watchList
HashMap<String, DMatrix> watches1 = new HashMap<>();
watches1.put("train", incrementalDMatrix);
Booster model2 = XGBoost.train(incrementalDMatrix, paramMap, round, watches1, null, null);
float[][] predicat1 = model1.predict(dMatrix1);
float[][] predicat2 = model2.predict(dMatrix1);
for (int i = 0; i < tmpTable.getRowCount(); i++) {
TestCase.assertTrue(predicat1[i][0] - predicat2[i][0] < 1e-6);
}
}
}
}
}
}

View File

@@ -0,0 +1,154 @@
/*
Copyright (c) 2021-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ml.dmlc.xgboost4j.java;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import ai.rapids.cudf.Table;
import junit.framework.TestCase;
import org.junit.Test;
import static org.junit.Assert.assertArrayEquals;
/**
* Test suite for DMatrix based on GPU
*/
public class DMatrixTest {
@Test
public void testCreateFromArrayInterfaceColumns() {
Float[] labelFloats = new Float[]{2f, 4f, 6f, 8f, 10f};
Integer[] groups = new Integer[]{1, 1, 7, 7, 19, 26};
int[] expectedGroup = new int[]{0, 2, 4, 5, 6};
Throwable ex = null;
try (
Table X = new Table.TestBuilder().column(1.f, null, 5.f, 7.f, 9.f).build();
Table y = new Table.TestBuilder().column(labelFloats).build();
Table w = new Table.TestBuilder().column(labelFloats).build();
Table q = new Table.TestBuilder().column(groups).build();
Table margin = new Table.TestBuilder().column(labelFloats).build();) {
CudfColumnBatch cudfDataFrame = new CudfColumnBatch(X, y, w, null, null);
CudfColumn labelColumn = CudfColumn.from(y.getColumn(0));
CudfColumn weightColumn = CudfColumn.from(w.getColumn(0));
CudfColumn baseMarginColumn = CudfColumn.from(margin.getColumn(0));
CudfColumn qidColumn = CudfColumn.from(q.getColumn(0));
DMatrix dMatrix = new DMatrix(cudfDataFrame, 0, 1);
dMatrix.setLabel(labelColumn);
dMatrix.setWeight(weightColumn);
dMatrix.setBaseMargin(baseMarginColumn);
dMatrix.setQueryId(qidColumn);
String[] featureNames = new String[]{"f1"};
dMatrix.setFeatureNames(featureNames);
String[] retFeatureNames = dMatrix.getFeatureNames();
assertArrayEquals(featureNames, retFeatureNames);
String[] featureTypes = new String[]{"i"};
dMatrix.setFeatureTypes(featureTypes);
String[] retFeatureTypes = dMatrix.getFeatureTypes();
assertArrayEquals(featureTypes, retFeatureTypes);
float[] anchor = convertFloatTofloat(labelFloats);
float[] label = dMatrix.getLabel();
float[] weight = dMatrix.getWeight();
float[] baseMargin = dMatrix.getBaseMargin();
int[] group = dMatrix.getGroup();
TestCase.assertTrue(Arrays.equals(anchor, label));
TestCase.assertTrue(Arrays.equals(anchor, weight));
TestCase.assertTrue(Arrays.equals(anchor, baseMargin));
TestCase.assertTrue(Arrays.equals(expectedGroup, group));
} catch (Throwable e) {
ex = e;
e.printStackTrace();
}
TestCase.assertNull(ex);
}
@Test
public void testCreateFromColumnDataIterator() throws XGBoostError {
Float[] label1 = {25f, 21f, 22f, 20f, 24f};
Float[] weight1 = {1.3f, 2.31f, 0.32f, 3.3f, 1.34f};
Float[] baseMargin1 = {1.2f, 0.2f, 1.3f, 2.4f, 3.5f};
Integer[] groups1 = new Integer[]{1, 1, 7, 7, 19, 26};
Float[] label2 = {9f, 5f, 4f, 10f, 12f};
Float[] weight2 = {3.0f, 1.3f, 3.2f, 0.3f, 1.34f};
Float[] baseMargin2 = {0.2f, 2.5f, 3.1f, 4.4f, 2.2f};
Integer[] groups2 = new Integer[]{30, 30, 30, 40, 40};
int[] expectedGroup = new int[]{0, 2, 4, 5, 6, 9, 11};
try (
Table X_0 = new Table.TestBuilder()
.column(1.2f, null, 5.2f, 7.2f, 9.2f)
.column(0.2f, 0.4f, 0.6f, 2.6f, 0.10f)
.build();
Table y_0 = new Table.TestBuilder().column(label1).build();
Table w_0 = new Table.TestBuilder().column(weight1).build();
Table m_0 = new Table.TestBuilder().column(baseMargin1).build();
Table q_0 = new Table.TestBuilder().column(groups1).build();
Table X_1 = new Table.TestBuilder().column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f)
.column(1.2f, 1.4f, null, 12.6f, 10.10f).build();
Table y_1 = new Table.TestBuilder().column(label2).build();
Table w_1 = new Table.TestBuilder().column(weight2).build();
Table m_1 = new Table.TestBuilder().column(baseMargin2).build();) {
Table q_1 = new Table.TestBuilder().column(groups2).build();
List<ColumnBatch> tables = new LinkedList<>();
tables.add(new CudfColumnBatch(X_0, y_0, w_0, m_0, q_0));
tables.add(new CudfColumnBatch(X_1, y_1, w_1, m_1, q_1));
DMatrix dmat = new QuantileDMatrix(tables.iterator(), 0.0f, 256, 1);
float[] anchorLabel = convertFloatTofloat(label1, label2);
float[] anchorWeight = convertFloatTofloat(weight1, weight2);
float[] anchorBaseMargin = convertFloatTofloat(baseMargin1, baseMargin2);
TestCase.assertTrue(Arrays.equals(anchorLabel, dmat.getLabel()));
TestCase.assertTrue(Arrays.equals(anchorWeight, dmat.getWeight()));
TestCase.assertTrue(Arrays.equals(anchorBaseMargin, dmat.getBaseMargin()));
TestCase.assertTrue(Arrays.equals(expectedGroup, dmat.getGroup()));
}
}
private float[] convertFloatTofloat(Float[]... datas) {
int totalLength = 0;
for (Float[] data : datas) {
totalLength += data.length;
}
float[] floatArray = new float[totalLength];
int index = 0;
for (Float[] data : datas) {
for (int i = 0; i < data.length; i++) {
floatArray[i + index] = data[i];
}
index += data.length;
}
return floatArray;
}
}

View File

@@ -0,0 +1,85 @@
/*
Copyright (c) 2021-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ml.dmlc.xgboost4j.scala
import ai.rapids.cudf.Table
import ml.dmlc.xgboost4j.java.CudfColumnBatch
import org.scalatest.funsuite.AnyFunSuite
import scala.collection.mutable.ArrayBuffer
class QuantileDMatrixSuite extends AnyFunSuite {
test("QuantileDMatrix test") {
val label1 = Array[java.lang.Float](25f, 21f, 22f, 20f, 24f)
val weight1 = Array[java.lang.Float](1.3f, 2.31f, 0.32f, 3.3f, 1.34f)
val baseMargin1 = Array[java.lang.Float](1.2f, 0.2f, 1.3f, 2.4f, 3.5f)
val group1 = Array[java.lang.Integer](1, 1, 7, 7, 19, 26)
val label2 = Array[java.lang.Float](9f, 5f, 4f, 10f, 12f)
val weight2 = Array[java.lang.Float](3.0f, 1.3f, 3.2f, 0.3f, 1.34f)
val baseMargin2 = Array[java.lang.Float](0.2f, 2.5f, 3.1f, 4.4f, 2.2f)
val group2 = Array[java.lang.Integer](30, 30, 30, 40, 40)
val expectedGroup = Array(0, 2, 4, 5, 6, 9, 11)
withResource(new Table.TestBuilder()
.column(1.2f, null.asInstanceOf[java.lang.Float], 5.2f, 7.2f, 9.2f)
.column(0.2f, 0.4f, 0.6f, 2.6f, 0.10f.asInstanceOf[java.lang.Float])
.build) { X_0 =>
withResource(new Table.TestBuilder().column(label1: _*).build) { y_0 =>
withResource(new Table.TestBuilder().column(weight1: _*).build) { w_0 =>
withResource(new Table.TestBuilder().column(baseMargin1: _*).build) { m_0 =>
withResource(new Table.TestBuilder().column(group1: _*).build) { q_0 =>
withResource(new Table.TestBuilder()
.column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f.asInstanceOf[java.lang.Float])
.column(1.2f, 1.4f, null.asInstanceOf[java.lang.Float], 12.6f, 10.10f).build) {
X_1 =>
withResource(new Table.TestBuilder().column(label2: _*).build) { y_1 =>
withResource(new Table.TestBuilder().column(weight2: _*).build) { w_1 =>
withResource(new Table.TestBuilder().column(baseMargin2: _*).build) { m_1 =>
withResource(new Table.TestBuilder().column(group2: _*).build) { q_2 =>
val batches = new ArrayBuffer[CudfColumnBatch]()
batches += new CudfColumnBatch(X_0, y_0, w_0, m_0, q_0)
batches += new CudfColumnBatch(X_1, y_1, w_1, m_1, q_2)
val dmatrix = new QuantileDMatrix(batches.toIterator, 0.0f, 8, 1)
assert(dmatrix.getLabel.sameElements(label1 ++ label2))
assert(dmatrix.getWeight.sameElements(weight1 ++ weight2))
assert(dmatrix.getBaseMargin.sameElements(baseMargin1 ++ baseMargin2))
assert(dmatrix.getGroup().sameElements(expectedGroup))
}
}
}
}
}
}
}
}
}
}
}
/** Executes the provided code block and then closes the resource */
private def withResource[T <: AutoCloseable, V](r: T)(block: T => V): V = {
try {
block(r)
} finally {
r.close()
}
}
}