[jvm-packages] refactor xgboost read/write (#7956)
1. Removed the duplicated Default XGBoost read/write which is copied from spark 2.3.x 2. Put some utils into util package
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 by Contributors
|
||||
Copyright (c) 2014-2022 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -18,7 +18,8 @@ package ml.dmlc.xgboost4j.scala.spark
|
||||
|
||||
import org.apache.spark.ml.linalg.Vectors
|
||||
import org.scalatest.FunSuite
|
||||
import ml.dmlc.xgboost4j.scala.spark.DataUtils.PackedParams
|
||||
import ml.dmlc.xgboost4j.scala.spark.util.DataUtils
|
||||
import ml.dmlc.xgboost4j.scala.spark.util.DataUtils.PackedParams
|
||||
|
||||
import org.apache.spark.sql.functions._
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ class FeatureSizeValidatingSuite extends FunSuite with PerTest {
|
||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||
"objective" -> "binary:logistic",
|
||||
"num_round" -> 5, "num_workers" -> 2, "use_external_memory" -> true, "missing" -> 0)
|
||||
import DataUtils._
|
||||
import ml.dmlc.xgboost4j.scala.spark.util.DataUtils._
|
||||
val sparkSession = ss
|
||||
import sparkSession.implicits._
|
||||
val repartitioned = sc.parallelize(Synthetic.trainWithDiffFeatureSize, 2)
|
||||
|
||||
@@ -73,7 +73,7 @@ trait PerTest extends BeforeAndAfterEach { self: FunSuite =>
|
||||
protected def buildDataFrame(
|
||||
labeledPoints: Seq[XGBLabeledPoint],
|
||||
numPartitions: Int = numWorkers): DataFrame = {
|
||||
import DataUtils._
|
||||
import ml.dmlc.xgboost4j.scala.spark.util.DataUtils._
|
||||
val it = labeledPoints.iterator.zipWithIndex
|
||||
.map { case (labeledPoint: XGBLabeledPoint, id: Int) =>
|
||||
(id, labeledPoint.label, labeledPoint.features)
|
||||
@@ -98,7 +98,7 @@ trait PerTest extends BeforeAndAfterEach { self: FunSuite =>
|
||||
protected def buildDataFrameWithGroup(
|
||||
labeledPoints: Seq[XGBLabeledPoint],
|
||||
numPartitions: Int = numWorkers): DataFrame = {
|
||||
import DataUtils._
|
||||
import ml.dmlc.xgboost4j.scala.spark.util.DataUtils._
|
||||
val it = labeledPoints.iterator.zipWithIndex
|
||||
.map { case (labeledPoint: XGBLabeledPoint, id: Int) =>
|
||||
(id, labeledPoint.label, labeledPoint.features, labeledPoint.group)
|
||||
|
||||
@@ -310,7 +310,7 @@ class XGBoostClassifierSuite extends FunSuite with PerTest with TmpFolderPerSuit
|
||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||
"objective" -> "binary:logistic",
|
||||
"num_round" -> 5, "num_workers" -> 2, "missing" -> 0)
|
||||
import DataUtils._
|
||||
import ml.dmlc.xgboost4j.scala.spark.util.DataUtils._
|
||||
val sparkSession = SparkSession.builder().getOrCreate()
|
||||
import sparkSession.implicits._
|
||||
val repartitioned = sc.parallelize(Synthetic.train, 3).map(lp => (lp.label, lp)).partitionBy(
|
||||
@@ -331,7 +331,7 @@ class XGBoostClassifierSuite extends FunSuite with PerTest with TmpFolderPerSuit
|
||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||
"objective" -> "binary:logistic",
|
||||
"num_round" -> 5, "num_workers" -> 2, "use_external_memory" -> true, "missing" -> 0)
|
||||
import DataUtils._
|
||||
import ml.dmlc.xgboost4j.scala.spark.util.DataUtils._
|
||||
val sparkSession = SparkSession.builder().getOrCreate()
|
||||
import sparkSession.implicits._
|
||||
val repartitioned = sc.parallelize(Synthetic.train, 3).map(lp => (lp.label, lp)).partitionBy(
|
||||
|
||||
Reference in New Issue
Block a user