[jvm-packages] remove xgboost4j-gpu and rework cudf column (#10630)
This commit is contained in:
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
Copyright (c) 2021-2024 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package ml.dmlc.xgboost4j.java;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import ai.rapids.cudf.*;
|
||||
import junit.framework.TestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests the BoosterTest trained by DMatrix
|
||||
*
|
||||
* @throws XGBoostError
|
||||
*/
|
||||
public class BoosterTest {
|
||||
|
||||
@Test
|
||||
public void testBooster() throws XGBoostError {
|
||||
String trainingDataPath = getClass().getClassLoader()
|
||||
.getResource("veterans_lung_cancer.csv").getPath();
|
||||
Schema schema = Schema.builder()
|
||||
.column(DType.FLOAT32, "A")
|
||||
.column(DType.FLOAT32, "B")
|
||||
.column(DType.FLOAT32, "C")
|
||||
.column(DType.FLOAT32, "D")
|
||||
|
||||
.column(DType.FLOAT32, "E")
|
||||
.column(DType.FLOAT32, "F")
|
||||
.column(DType.FLOAT32, "G")
|
||||
.column(DType.FLOAT32, "H")
|
||||
|
||||
.column(DType.FLOAT32, "I")
|
||||
.column(DType.FLOAT32, "J")
|
||||
.column(DType.FLOAT32, "K")
|
||||
.column(DType.FLOAT32, "L")
|
||||
|
||||
.column(DType.FLOAT32, "label")
|
||||
.build();
|
||||
CSVOptions opts = CSVOptions.builder()
|
||||
.hasHeader().build();
|
||||
|
||||
int maxBin = 16;
|
||||
int round = 10;
|
||||
//set params
|
||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||
{
|
||||
put("max_depth", 2);
|
||||
put("objective", "binary:logistic");
|
||||
put("num_round", round);
|
||||
put("num_workers", 1);
|
||||
put("tree_method", "hist");
|
||||
put("device", "cuda");
|
||||
put("max_bin", maxBin);
|
||||
}
|
||||
};
|
||||
|
||||
try (Table tmpTable = Table.readCSV(schema, opts, new File(trainingDataPath))) {
|
||||
ColumnVector[] df = new ColumnVector[10];
|
||||
// exclude the first two columns, they are label bounds and contain inf.
|
||||
for (int i = 2; i < 12; ++i) {
|
||||
df[i - 2] = tmpTable.getColumn(i);
|
||||
}
|
||||
try (Table X = new Table(df);) {
|
||||
ColumnVector[] labels = new ColumnVector[1];
|
||||
labels[0] = tmpTable.getColumn(12);
|
||||
|
||||
try (Table y = new Table(labels);) {
|
||||
|
||||
CudfColumnBatch batch = new CudfColumnBatch(X, y, null, null, null);
|
||||
CudfColumn labelColumn = CudfColumn.from(tmpTable.getColumn(12));
|
||||
|
||||
//set watchList
|
||||
HashMap<String, DMatrix> watches = new HashMap<>();
|
||||
|
||||
DMatrix dMatrix1 = new DMatrix(batch, Float.NaN, 1);
|
||||
dMatrix1.setLabel(labelColumn);
|
||||
watches.put("train", dMatrix1);
|
||||
Booster model1 = XGBoost.train(dMatrix1, paramMap, round, watches, null, null);
|
||||
|
||||
List<ColumnBatch> tables = new LinkedList<>();
|
||||
tables.add(batch);
|
||||
DMatrix incrementalDMatrix = new QuantileDMatrix(tables.iterator(), Float.NaN, maxBin, 1);
|
||||
//set watchList
|
||||
HashMap<String, DMatrix> watches1 = new HashMap<>();
|
||||
watches1.put("train", incrementalDMatrix);
|
||||
Booster model2 = XGBoost.train(incrementalDMatrix, paramMap, round, watches1, null, null);
|
||||
|
||||
float[][] predicat1 = model1.predict(dMatrix1);
|
||||
float[][] predicat2 = model2.predict(dMatrix1);
|
||||
|
||||
for (int i = 0; i < tmpTable.getRowCount(); i++) {
|
||||
TestCase.assertTrue(predicat1[i][0] - predicat2[i][0] < 1e-6);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
Copyright (c) 2021-2024 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package ml.dmlc.xgboost4j.java;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import ai.rapids.cudf.Table;
|
||||
import junit.framework.TestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
|
||||
/**
|
||||
* Test suite for DMatrix based on GPU
|
||||
*/
|
||||
public class DMatrixTest {
|
||||
|
||||
@Test
|
||||
public void testCreateFromArrayInterfaceColumns() {
|
||||
Float[] labelFloats = new Float[]{2f, 4f, 6f, 8f, 10f};
|
||||
Integer[] groups = new Integer[]{1, 1, 7, 7, 19, 26};
|
||||
int[] expectedGroup = new int[]{0, 2, 4, 5, 6};
|
||||
|
||||
Throwable ex = null;
|
||||
try (
|
||||
Table X = new Table.TestBuilder().column(1.f, null, 5.f, 7.f, 9.f).build();
|
||||
Table y = new Table.TestBuilder().column(labelFloats).build();
|
||||
Table w = new Table.TestBuilder().column(labelFloats).build();
|
||||
Table q = new Table.TestBuilder().column(groups).build();
|
||||
Table margin = new Table.TestBuilder().column(labelFloats).build();) {
|
||||
|
||||
CudfColumnBatch cudfDataFrame = new CudfColumnBatch(X, y, w, null, null);
|
||||
|
||||
CudfColumn labelColumn = CudfColumn.from(y.getColumn(0));
|
||||
CudfColumn weightColumn = CudfColumn.from(w.getColumn(0));
|
||||
CudfColumn baseMarginColumn = CudfColumn.from(margin.getColumn(0));
|
||||
CudfColumn qidColumn = CudfColumn.from(q.getColumn(0));
|
||||
|
||||
DMatrix dMatrix = new DMatrix(cudfDataFrame, 0, 1);
|
||||
dMatrix.setLabel(labelColumn);
|
||||
dMatrix.setWeight(weightColumn);
|
||||
dMatrix.setBaseMargin(baseMarginColumn);
|
||||
dMatrix.setQueryId(qidColumn);
|
||||
|
||||
String[] featureNames = new String[]{"f1"};
|
||||
dMatrix.setFeatureNames(featureNames);
|
||||
String[] retFeatureNames = dMatrix.getFeatureNames();
|
||||
assertArrayEquals(featureNames, retFeatureNames);
|
||||
|
||||
String[] featureTypes = new String[]{"i"};
|
||||
dMatrix.setFeatureTypes(featureTypes);
|
||||
String[] retFeatureTypes = dMatrix.getFeatureTypes();
|
||||
assertArrayEquals(featureTypes, retFeatureTypes);
|
||||
|
||||
float[] anchor = convertFloatTofloat(labelFloats);
|
||||
float[] label = dMatrix.getLabel();
|
||||
float[] weight = dMatrix.getWeight();
|
||||
float[] baseMargin = dMatrix.getBaseMargin();
|
||||
int[] group = dMatrix.getGroup();
|
||||
|
||||
TestCase.assertTrue(Arrays.equals(anchor, label));
|
||||
TestCase.assertTrue(Arrays.equals(anchor, weight));
|
||||
TestCase.assertTrue(Arrays.equals(anchor, baseMargin));
|
||||
TestCase.assertTrue(Arrays.equals(expectedGroup, group));
|
||||
} catch (Throwable e) {
|
||||
ex = e;
|
||||
e.printStackTrace();
|
||||
}
|
||||
TestCase.assertNull(ex);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateFromColumnDataIterator() throws XGBoostError {
|
||||
|
||||
Float[] label1 = {25f, 21f, 22f, 20f, 24f};
|
||||
Float[] weight1 = {1.3f, 2.31f, 0.32f, 3.3f, 1.34f};
|
||||
Float[] baseMargin1 = {1.2f, 0.2f, 1.3f, 2.4f, 3.5f};
|
||||
Integer[] groups1 = new Integer[]{1, 1, 7, 7, 19, 26};
|
||||
|
||||
Float[] label2 = {9f, 5f, 4f, 10f, 12f};
|
||||
Float[] weight2 = {3.0f, 1.3f, 3.2f, 0.3f, 1.34f};
|
||||
Float[] baseMargin2 = {0.2f, 2.5f, 3.1f, 4.4f, 2.2f};
|
||||
Integer[] groups2 = new Integer[]{30, 30, 30, 40, 40};
|
||||
|
||||
int[] expectedGroup = new int[]{0, 2, 4, 5, 6, 9, 11};
|
||||
|
||||
try (
|
||||
Table X_0 = new Table.TestBuilder()
|
||||
.column(1.2f, null, 5.2f, 7.2f, 9.2f)
|
||||
.column(0.2f, 0.4f, 0.6f, 2.6f, 0.10f)
|
||||
.build();
|
||||
Table y_0 = new Table.TestBuilder().column(label1).build();
|
||||
Table w_0 = new Table.TestBuilder().column(weight1).build();
|
||||
Table m_0 = new Table.TestBuilder().column(baseMargin1).build();
|
||||
Table q_0 = new Table.TestBuilder().column(groups1).build();
|
||||
|
||||
Table X_1 = new Table.TestBuilder().column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f)
|
||||
.column(1.2f, 1.4f, null, 12.6f, 10.10f).build();
|
||||
Table y_1 = new Table.TestBuilder().column(label2).build();
|
||||
Table w_1 = new Table.TestBuilder().column(weight2).build();
|
||||
Table m_1 = new Table.TestBuilder().column(baseMargin2).build();) {
|
||||
Table q_1 = new Table.TestBuilder().column(groups2).build();
|
||||
|
||||
List<ColumnBatch> tables = new LinkedList<>();
|
||||
|
||||
tables.add(new CudfColumnBatch(X_0, y_0, w_0, m_0, q_0));
|
||||
tables.add(new CudfColumnBatch(X_1, y_1, w_1, m_1, q_1));
|
||||
|
||||
DMatrix dmat = new QuantileDMatrix(tables.iterator(), 0.0f, 256, 1);
|
||||
|
||||
float[] anchorLabel = convertFloatTofloat(label1, label2);
|
||||
float[] anchorWeight = convertFloatTofloat(weight1, weight2);
|
||||
float[] anchorBaseMargin = convertFloatTofloat(baseMargin1, baseMargin2);
|
||||
|
||||
TestCase.assertTrue(Arrays.equals(anchorLabel, dmat.getLabel()));
|
||||
TestCase.assertTrue(Arrays.equals(anchorWeight, dmat.getWeight()));
|
||||
TestCase.assertTrue(Arrays.equals(anchorBaseMargin, dmat.getBaseMargin()));
|
||||
TestCase.assertTrue(Arrays.equals(expectedGroup, dmat.getGroup()));
|
||||
}
|
||||
}
|
||||
|
||||
private float[] convertFloatTofloat(Float[]... datas) {
|
||||
int totalLength = 0;
|
||||
for (Float[] data : datas) {
|
||||
totalLength += data.length;
|
||||
}
|
||||
float[] floatArray = new float[totalLength];
|
||||
int index = 0;
|
||||
for (Float[] data : datas) {
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
floatArray[i + index] = data[i];
|
||||
}
|
||||
index += data.length;
|
||||
}
|
||||
return floatArray;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
Copyright (c) 2021-2024 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package ml.dmlc.xgboost4j.scala
|
||||
|
||||
import ai.rapids.cudf.Table
|
||||
import ml.dmlc.xgboost4j.java.CudfColumnBatch
|
||||
import org.scalatest.funsuite.AnyFunSuite
|
||||
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
class QuantileDMatrixSuite extends AnyFunSuite {
|
||||
|
||||
test("QuantileDMatrix test") {
|
||||
|
||||
val label1 = Array[java.lang.Float](25f, 21f, 22f, 20f, 24f)
|
||||
val weight1 = Array[java.lang.Float](1.3f, 2.31f, 0.32f, 3.3f, 1.34f)
|
||||
val baseMargin1 = Array[java.lang.Float](1.2f, 0.2f, 1.3f, 2.4f, 3.5f)
|
||||
val group1 = Array[java.lang.Integer](1, 1, 7, 7, 19, 26)
|
||||
|
||||
val label2 = Array[java.lang.Float](9f, 5f, 4f, 10f, 12f)
|
||||
val weight2 = Array[java.lang.Float](3.0f, 1.3f, 3.2f, 0.3f, 1.34f)
|
||||
val baseMargin2 = Array[java.lang.Float](0.2f, 2.5f, 3.1f, 4.4f, 2.2f)
|
||||
val group2 = Array[java.lang.Integer](30, 30, 30, 40, 40)
|
||||
|
||||
val expectedGroup = Array(0, 2, 4, 5, 6, 9, 11)
|
||||
|
||||
withResource(new Table.TestBuilder()
|
||||
.column(1.2f, null.asInstanceOf[java.lang.Float], 5.2f, 7.2f, 9.2f)
|
||||
.column(0.2f, 0.4f, 0.6f, 2.6f, 0.10f.asInstanceOf[java.lang.Float])
|
||||
.build) { X_0 =>
|
||||
withResource(new Table.TestBuilder().column(label1: _*).build) { y_0 =>
|
||||
withResource(new Table.TestBuilder().column(weight1: _*).build) { w_0 =>
|
||||
withResource(new Table.TestBuilder().column(baseMargin1: _*).build) { m_0 =>
|
||||
withResource(new Table.TestBuilder().column(group1: _*).build) { q_0 =>
|
||||
withResource(new Table.TestBuilder()
|
||||
.column(11.2f, 11.2f, 15.2f, 17.2f, 19.2f.asInstanceOf[java.lang.Float])
|
||||
.column(1.2f, 1.4f, null.asInstanceOf[java.lang.Float], 12.6f, 10.10f).build) {
|
||||
X_1 =>
|
||||
withResource(new Table.TestBuilder().column(label2: _*).build) { y_1 =>
|
||||
withResource(new Table.TestBuilder().column(weight2: _*).build) { w_1 =>
|
||||
withResource(new Table.TestBuilder().column(baseMargin2: _*).build) { m_1 =>
|
||||
withResource(new Table.TestBuilder().column(group2: _*).build) { q_2 =>
|
||||
val batches = new ArrayBuffer[CudfColumnBatch]()
|
||||
batches += new CudfColumnBatch(X_0, y_0, w_0, m_0, q_0)
|
||||
batches += new CudfColumnBatch(X_1, y_1, w_1, m_1, q_2)
|
||||
val dmatrix = new QuantileDMatrix(batches.toIterator, 0.0f, 8, 1)
|
||||
assert(dmatrix.getLabel.sameElements(label1 ++ label2))
|
||||
assert(dmatrix.getWeight.sameElements(weight1 ++ weight2))
|
||||
assert(dmatrix.getBaseMargin.sameElements(baseMargin1 ++ baseMargin2))
|
||||
assert(dmatrix.getGroup().sameElements(expectedGroup))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Executes the provided code block and then closes the resource */
|
||||
private def withResource[T <: AutoCloseable, V](r: T)(block: T => V): V = {
|
||||
try {
|
||||
block(r)
|
||||
} finally {
|
||||
r.close()
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user