[jvm-packages] remove xgboost4j-gpu and rework cudf column (#10630)

This commit is contained in:
Bobby Wang
2024-07-25 15:31:16 +08:00
committed by GitHub
parent fcae6301ec
commit d5834b68c3
26 changed files with 509 additions and 632 deletions

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2021 by Contributors
Copyright (c) 2021-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -17,24 +17,17 @@
package ml.dmlc.xgboost4j.java;
/**
* The abstracted XGBoost Column to get the cuda array interface which is used to
* set the information for DMatrix.
*
* This Column abstraction provides an array interface JSON string, which is
* used to reconstruct columnar data within the XGBoost library.
*/
public abstract class Column implements AutoCloseable {
/**
* Get the cuda array interface json string for the Column which can be representing
* weight, label, base margin column.
*
* This API will be called by
* {@link DMatrix#setLabel(Column)}
* {@link DMatrix#setWeight(Column)}
* {@link DMatrix#setBaseMargin(Column)}
* Return array interface json string for this Column
*/
public abstract String getArrayInterfaceJson();
public abstract String toJson();
@Override
public void close() throws Exception {}
public void close() throws Exception {
}
}

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2021 by Contributors
Copyright (c) 2021-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -16,78 +16,12 @@
package ml.dmlc.xgboost4j.java;
import java.util.Iterator;
/**
* The abstracted XGBoost ColumnBatch to get array interface from columnar data format.
* For example, the cuDF dataframe which employs apache arrow specification.
* This class wraps multiple Column and provides the array interface json
* for all columns.
*/
public abstract class ColumnBatch implements AutoCloseable {
/**
* Get the cuda array interface json string for the whole ColumnBatch including
* the must-have feature, label columns and the optional weight, base margin columns.
*
* This function is be called by native code during iteration and can be made as private
* method. We keep it as public simply to silent the linter.
*/
public final String getArrayInterfaceJson() {
StringBuilder builder = new StringBuilder();
builder.append("{");
String featureStr = this.getFeatureArrayInterface();
if (featureStr == null || featureStr.isEmpty()) {
throw new RuntimeException("Feature array interface must not be empty");
} else {
builder.append("\"features_str\":" + featureStr);
}
String labelStr = this.getLabelsArrayInterface();
if (labelStr == null || labelStr.isEmpty()) {
throw new RuntimeException("Label array interface must not be empty");
} else {
builder.append(",\"label_str\":" + labelStr);
}
String weightStr = getWeightsArrayInterface();
if (weightStr != null && ! weightStr.isEmpty()) {
builder.append(",\"weight_str\":" + weightStr);
}
String baseMarginStr = getBaseMarginsArrayInterface();
if (baseMarginStr != null && ! baseMarginStr.isEmpty()) {
builder.append(",\"basemargin_str\":" + baseMarginStr);
}
builder.append("}");
return builder.toString();
}
/**
* Get the cuda array interface of the feature columns.
* The returned value must not be null or empty
*/
public abstract String getFeatureArrayInterface();
/**
* Get the cuda array interface of the label columns.
* The returned value must not be null or empty if we're creating
* {@link QuantileDMatrix#QuantileDMatrix(Iterator, float, int, int)}
*/
public abstract String getLabelsArrayInterface();
/**
* Get the cuda array interface of the weight columns.
* The returned value can be null or empty
*/
public abstract String getWeightsArrayInterface();
/**
* Get the cuda array interface of the base margin columns.
* The returned value can be null or empty
*/
public abstract String getBaseMarginsArrayInterface();
@Override
public void close() throws Exception {}
public abstract class ColumnBatch extends Column {
/** Get features cuda array interface json string */
public abstract String toFeaturesJson();
}

View File

@@ -195,7 +195,7 @@ public class DMatrix {
*/
public DMatrix(ColumnBatch columnBatch, float missing, int nthread) throws XGBoostError {
long[] out = new long[1];
String json = columnBatch.getFeatureArrayInterface();
String json = columnBatch.toFeaturesJson();
if (json == null || json.isEmpty()) {
throw new XGBoostError("Expecting non-empty feature columns' array interface");
}
@@ -228,7 +228,7 @@ public class DMatrix {
* @throws XGBoostError native error
*/
public void setQueryId(Column column) throws XGBoostError {
setXGBDMatrixInfo("qid", column.getArrayInterfaceJson());
setXGBDMatrixInfo("qid", column.toJson());
}
private void setXGBDMatrixInfo(String type, String json) throws XGBoostError {
@@ -362,7 +362,7 @@ public class DMatrix {
* @throws XGBoostError native error
*/
public void setLabel(Column column) throws XGBoostError {
setXGBDMatrixInfo("label", column.getArrayInterfaceJson());
setXGBDMatrixInfo("label", column.toJson());
}
/**
@@ -393,7 +393,7 @@ public class DMatrix {
* @throws XGBoostError native error
*/
public void setWeight(Column column) throws XGBoostError {
setXGBDMatrixInfo("weight", column.getArrayInterfaceJson());
setXGBDMatrixInfo("weight", column.toJson());
}
/**
@@ -421,7 +421,7 @@ public class DMatrix {
* @throws XGBoostError native error
*/
public void setBaseMargin(Column column) throws XGBoostError {
setXGBDMatrixInfo("base_margin", column.getArrayInterfaceJson());
setXGBDMatrixInfo("base_margin", column.toJson());
}
/**