diff --git a/dev/change_scala_version.py b/dev/change_scala_version.py
index d9438f76a..c8a9b54cc 100644
--- a/dev/change_scala_version.py
+++ b/dev/change_scala_version.py
@@ -33,7 +33,6 @@ def main(args):
for artifact in [
"xgboost-jvm",
"xgboost4j",
- "xgboost4j-gpu",
"xgboost4j-spark",
"xgboost4j-spark-gpu",
"xgboost4j-flink",
diff --git a/jvm-packages/CMakeLists.txt b/jvm-packages/CMakeLists.txt
index 83f17f1a8..c5488e69d 100644
--- a/jvm-packages/CMakeLists.txt
+++ b/jvm-packages/CMakeLists.txt
@@ -2,11 +2,11 @@ find_package(JNI REQUIRED)
list(APPEND JVM_SOURCES
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j.cpp
- ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cpp)
+ ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cpp)
if(USE_CUDA)
list(APPEND JVM_SOURCES
- ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu)
+ ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu)
endif()
add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})
diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py
index a4d64e148..81b13c398 100755
--- a/jvm-packages/create_jni.py
+++ b/jvm-packages/create_jni.py
@@ -131,14 +131,6 @@ def native_build(args):
run("cmake .. " + " ".join(args))
run("cmake --build . --config Release" + maybe_parallel_build)
- with cd("demo/CLI/regression"):
- run(f'"{sys.executable}" mapfeat.py')
- run(f'"{sys.executable}" mknfold.py machine.txt 1')
-
- xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" else "xgboost4j"
- xgboost4j_spark = (
- "xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" else "xgboost4j-spark"
- )
print("copying native library")
library_name, os_folder = {
@@ -155,26 +147,34 @@ def native_build(args):
"arm64": "aarch64", # on macOS & Windows ARM 64-bit
"aarch64": "aarch64",
}[platform.machine().lower()]
- output_folder = "{}/src/main/resources/lib/{}/{}".format(
- xgboost4j, os_folder, arch_folder
+ output_folder = "xgboost4j/src/main/resources/lib/{}/{}".format(
+ os_folder, arch_folder
)
maybe_makedirs(output_folder)
cp("../lib/" + library_name, output_folder)
print("copying train/test files")
- maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
+
+ # for xgboost4j
+ maybe_makedirs("xgboost4j/src/test/resources")
+ for file in glob.glob("../demo/data/agaricus.*"):
+ cp(file, "xgboost4j/src/test/resources")
+
+ # for xgboost4j-spark
+ maybe_makedirs("xgboost4j-spark/src/test/resources")
with cd("../demo/CLI/regression"):
run(f'"{sys.executable}" mapfeat.py')
run(f'"{sys.executable}" mknfold.py machine.txt 1')
-
for file in glob.glob("../demo/CLI/regression/machine.txt.t*"):
- cp(file, "{}/src/test/resources".format(xgboost4j_spark))
+ cp(file, "xgboost4j-spark/src/test/resources")
for file in glob.glob("../demo/data/agaricus.*"):
- cp(file, "{}/src/test/resources".format(xgboost4j_spark))
+ cp(file, "xgboost4j-spark/src/test/resources")
- maybe_makedirs("{}/src/test/resources".format(xgboost4j))
- for file in glob.glob("../demo/data/agaricus.*"):
- cp(file, "{}/src/test/resources".format(xgboost4j))
+ # for xgboost4j-spark-gpu
+ if cli_args.use_cuda == "ON":
+ maybe_makedirs("xgboost4j-spark-gpu/src/test/resources")
+ for file in glob.glob("../demo/data/veterans_lung_cancer.csv"):
+ cp(file, "xgboost4j-spark-gpu/src/test/resources")
if __name__ == "__main__":
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index a154f2d48..318d89cad 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -37,7 +37,7 @@
4.13.2
3.5.1
3.5.1
- 2.17.2
+ 2.15.0
2.12.18
2.12
3.4.0
@@ -105,7 +105,7 @@
ON
- xgboost4j-gpu
+ xgboost4j
xgboost4j-spark-gpu
@@ -117,7 +117,6 @@
xgboost4j-example
xgboost4j-spark
xgboost4j-flink
- xgboost4j-gpu
xgboost4j-spark-gpu
@@ -243,7 +242,6 @@
xgboost4j-example
xgboost4j-spark
xgboost4j-flink
- xgboost4j-gpu
xgboost4j-spark-gpu
diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml
deleted file mode 100644
index 600c5ffeb..000000000
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ /dev/null
@@ -1,140 +0,0 @@
-
-
- 4.0.0
-
- ml.dmlc
- xgboost-jvm_2.12
- 2.2.0-SNAPSHOT
-
- xgboost4j-gpu_2.12
- xgboost4j-gpu
- 2.2.0-SNAPSHOT
- jar
-
-
-
- org.scala-lang
- scala-compiler
- ${scala.version}
-
-
- org.scala-lang
- scala-library
- ${scala.version}
-
-
- org.scala-lang.modules
- scala-collection-compat_${scala.binary.version}
- ${scala-collection-compat.version}
-
-
- ai.rapids
- cudf
- ${cudf.version}
- ${cudf.classifier}
- provided
-
-
- org.apache.hadoop
- hadoop-hdfs
- ${hadoop.version}
- provided
-
-
- org.apache.hadoop
- hadoop-common
- ${hadoop.version}
- provided
-
-
- junit
- junit
- ${junit.version}
- test
-
-
- org.scalatest
- scalatest_${scala.binary.version}
- ${scalatest.version}
- provided
-
-
- org.apache.commons
- commons-lang3
- 3.14.0
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
- 3.7.0
-
- protected
- true
-
-
-
- org.apache.maven.plugins
- maven-assembly-plugin
-
- false
-
-
-
- exec-maven-plugin
- org.codehaus.mojo
- 3.3.0
-
-
- native
- generate-sources
-
- exec
-
-
- python
-
- create_jni.py
- --log-capi-invocation
- ${log.capi.invocation}
- --use-cuda
- ${use.cuda}
-
- ${user.dir}
- ${skip.native.build}
-
-
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
- 3.4.1
-
-
-
- test-jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-resources-plugin
- 3.3.1
-
-
- dll
- dylib
- so
-
-
-
-
-
-
diff --git a/jvm-packages/xgboost4j-gpu/src/main/java/ml/dmlc/xgboost4j/gpu/java/CudfColumn.java b/jvm-packages/xgboost4j-gpu/src/main/java/ml/dmlc/xgboost4j/gpu/java/CudfColumn.java
deleted file mode 100644
index ebbd802e4..000000000
--- a/jvm-packages/xgboost4j-gpu/src/main/java/ml/dmlc/xgboost4j/gpu/java/CudfColumn.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- Copyright (c) 2021 by Contributors
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
-
-package ml.dmlc.xgboost4j.gpu.java;
-
-import ai.rapids.cudf.BaseDeviceMemoryBuffer;
-import ai.rapids.cudf.BufferType;
-import ai.rapids.cudf.ColumnVector;
-import ai.rapids.cudf.DType;
-
-import ml.dmlc.xgboost4j.java.Column;
-
-/**
- * This class is composing of base data with Apache Arrow format from Cudf ColumnVector.
- * It will be used to generate the cuda array interface.
- */
-public class CudfColumn extends Column {
-
- private final long dataPtr; // gpu data buffer address
- private final long shape; // row count
- private final long validPtr; // gpu valid buffer address
- private final int typeSize; // type size in bytes
- private final String typeStr; // follow array interface spec
- private final long nullCount; // null count
-
- private String arrayInterface = null; // the cuda array interface
-
- public static CudfColumn from(ColumnVector cv) {
- BaseDeviceMemoryBuffer dataBuffer = cv.getDeviceBufferFor(BufferType.DATA);
- BaseDeviceMemoryBuffer validBuffer = cv.getDeviceBufferFor(BufferType.VALIDITY);
- long validPtr = 0;
- if (validBuffer != null) {
- validPtr = validBuffer.getAddress();
- }
- DType dType = cv.getType();
- String typeStr = "";
- if (dType == DType.FLOAT32 || dType == DType.FLOAT64 ||
- dType == DType.TIMESTAMP_DAYS || dType == DType.TIMESTAMP_MICROSECONDS ||
- dType == DType.TIMESTAMP_MILLISECONDS || dType == DType.TIMESTAMP_NANOSECONDS ||
- dType == DType.TIMESTAMP_SECONDS) {
- typeStr = " table.getColumn(i))
- .map(CudfColumn::from)
- .toArray(CudfColumn[]::new);
- }
-
-}
diff --git a/jvm-packages/xgboost4j-gpu/src/main/java/ml/dmlc/xgboost4j/gpu/java/CudfUtils.java b/jvm-packages/xgboost4j-gpu/src/main/java/ml/dmlc/xgboost4j/gpu/java/CudfUtils.java
deleted file mode 100644
index f7071dcd5..000000000
--- a/jvm-packages/xgboost4j-gpu/src/main/java/ml/dmlc/xgboost4j/gpu/java/CudfUtils.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- Copyright (c) 2021-2022 by Contributors
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
-
-package ml.dmlc.xgboost4j.gpu.java;
-
-import java.util.ArrayList;
-
-/**
- * Cudf utilities to build cuda array interface against {@link CudfColumn}
- */
-class CudfUtils {
-
- /**
- * Build the cuda array interface based on CudfColumn(s)
- * @param cudfColumns the CudfColumn(s) to be built
- * @return the json format of cuda array interface
- */
- public static String buildArrayInterface(CudfColumn... cudfColumns) {
- return new Builder().add(cudfColumns).build();
- }
-
- // Helper class to build array interface string
- private static class Builder {
- private ArrayList colArrayInterfaces = new ArrayList();
-
- private Builder add(CudfColumn... columns) {
- if (columns == null || columns.length <= 0) {
- throw new IllegalArgumentException("At least one ColumnData is required.");
- }
- for (CudfColumn cd : columns) {
- colArrayInterfaces.add(buildColumnObject(cd));
- }
- return this;
- }
-
- private String build() {
- StringBuilder builder = new StringBuilder();
- builder.append("[");
- for (int i = 0; i < colArrayInterfaces.size(); i++) {
- builder.append(colArrayInterfaces.get(i));
- if (i != colArrayInterfaces.size() - 1) {
- builder.append(",");
- }
- }
- builder.append("]");
- return builder.toString();
- }
-
- /** build the whole column information including data and valid info */
- private String buildColumnObject(CudfColumn column) {
- if (column.getDataPtr() == 0) {
- throw new IllegalArgumentException("Empty column data is NOT accepted!");
- }
- if (column.getTypeStr() == null || column.getTypeStr().isEmpty()) {
- throw new IllegalArgumentException("Empty type string is NOT accepted!");
- }
-
- StringBuilder builder = new StringBuilder();
- String colData = buildMetaObject(column.getDataPtr(), column.getShape(),
- column.getTypeStr());
- builder.append("{");
- builder.append(colData);
- if (column.getValidPtr() != 0 && column.getNullCount() != 0) {
- String validString = buildMetaObject(column.getValidPtr(), column.getShape(), "
ml.dmlc
- xgboost4j-gpu_2.12
+ xgboost4j_2.12
${project.version}
@@ -51,5 +51,17 @@
${spark.rapids.version}
provided
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ ${fasterxml.jackson.version}
+ provided
+
+
+ junit
+ junit
+ ${junit.version}
+ test
+
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/CudfColumn.java b/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/CudfColumn.java
new file mode 100644
index 000000000..683ad024b
--- /dev/null
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/CudfColumn.java
@@ -0,0 +1,117 @@
+/*
+ Copyright (c) 2021-2024 by Contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+package ml.dmlc.xgboost4j.java;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import ai.rapids.cudf.BaseDeviceMemoryBuffer;
+import ai.rapids.cudf.ColumnVector;
+import ai.rapids.cudf.DType;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * CudfColumn is the CUDF column representing, providing the cuda array interface
+ */
+@JsonInclude(JsonInclude.Include.NON_NULL)
+public class CudfColumn extends Column {
+ private List shape = new ArrayList<>(); // row count
+ private List