[jvm-packages] update scala style configuration (#10836)

This commit is contained in:
Bobby Wang 2024-09-24 17:39:44 +08:00 committed by GitHub
parent 2a03685bff
commit f3df0d0eb4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 112 additions and 37 deletions

View File

@ -82,19 +82,27 @@ This file is divided into 3 sections:
</check> </check>
<check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true"> <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
<parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters> <parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check> </check>
<check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true"> <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
<parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters> <parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check> </check>
<check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true"> <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
<parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters> <parameters>
<parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
</parameters>
</check> </check>
<check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true"> <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
<parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters> <parameters>
<parameter name="maxParameters"><![CDATA[10]]></parameter>
</parameters>
</check> </check>
<check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="false"></check> <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="false"></check>
@ -121,14 +129,16 @@ This file is divided into 3 sections:
<check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check> <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true"> <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
<parameters> <parameters>
<parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter> <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
</parameters> </parameters>
</check> </check>
<check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true"> <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
<parameters> <parameters>
<parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter> <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY,
LARROW, RARROW
</parameter>
</parameters> </parameters>
</check> </check>
@ -136,14 +146,18 @@ This file is divided into 3 sections:
<check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check> <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
<check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">@VisibleForTesting</parameter></parameters> <parameters>
<parameter name="regex">@VisibleForTesting</parameter>
</parameters>
<customMessage><![CDATA[ <customMessage><![CDATA[
@VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615). @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
]]></customMessage> ]]></customMessage>
</check> </check>
<check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters> <parameters>
<parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter>
</parameters>
<customMessage><![CDATA[ <customMessage><![CDATA[
Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
ShutdownHookManager.addShutdownHook instead. ShutdownHookManager.addShutdownHook instead.
@ -155,7 +169,9 @@ This file is divided into 3 sections:
</check> </check>
<check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters> <parameters>
<parameter name="regex">mutable\.SynchronizedBuffer</parameter>
</parameters>
<customMessage><![CDATA[ <customMessage><![CDATA[
Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
java.util.concurrent.ConcurrentLinkedQueue instead. java.util.concurrent.ConcurrentLinkedQueue instead.
@ -167,7 +183,9 @@ This file is divided into 3 sections:
</check> </check>
<check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">Class\.forName</parameter></parameters> <parameters>
<parameter name="regex">Class\.forName</parameter>
</parameters>
<customMessage><![CDATA[ <customMessage><![CDATA[
Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead. Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
If you must use Class.forName, wrap the code block with If you must use Class.forName, wrap the code block with
@ -179,17 +197,20 @@ This file is divided into 3 sections:
<!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters --> <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
<check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true"> <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
<parameters><parameter name="regex">JavaConversions</parameter></parameters> <parameters>
<parameter name="regex">JavaConversions</parameter>
</parameters>
<customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage> scala.collection.JavaConverters._ and use .asScala / .asJava methods
</customMessage>
</check> </check>
<check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true"> <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
<parameters> <parameters>
<parameter name="groups">java,scala,3rdParty,spark</parameter> <parameter name="groups">java,scala,3rdParty,dmlc</parameter>
<parameter name="group.java">javax?\..*</parameter> <parameter name="group.java">javax?\..*</parameter>
<parameter name="group.scala">scala\..*</parameter> <parameter name="group.scala">scala\..*</parameter>
<parameter name="group.3rdParty">(?!ml\.dmlc\.xgboost4j\.).*</parameter> <parameter name="group.3rdParty">(?!ml\.dmlc\.xgboost4j).*</parameter>
<parameter name="group.dmlc">ml.dmlc.xgboost4j.*</parameter> <parameter name="group.dmlc">ml.dmlc.xgboost4j.*</parameter>
</parameters> </parameters>
</check> </check>
@ -213,7 +234,7 @@ This file is divided into 3 sections:
<!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. --> <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
<check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false"> <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
<parameters> <parameters>
<parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter> <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
</parameters> </parameters>
</check> </check>
@ -225,7 +246,9 @@ This file is divided into 3 sections:
<!-- ================================================================================ --> <!-- ================================================================================ -->
<check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false"> <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
<parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters> <parameters>
<parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter>
</parameters>
</check> </check>
<!-- We want the opposite of this: NewLineAtEofChecker --> <!-- We want the opposite of this: NewLineAtEofChecker -->
@ -245,33 +268,81 @@ This file is divided into 3 sections:
<!-- Doesn't seem super big deal here ... --> <!-- Doesn't seem super big deal here ... -->
<check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false"> <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
<parameters><parameter name="maxFileLength">800></parameter></parameters> <parameters>
<parameter name="maxFileLength">800></parameter>
</parameters>
</check> </check>
<!-- Doesn't seem super big deal here ... --> <!-- Doesn't seem super big deal here ... -->
<check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false"> <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
<parameters><parameter name="maxTypes">30</parameter></parameters> <parameters>
<parameter name="maxTypes">30</parameter>
</parameters>
</check> </check>
<!-- Doesn't seem super big deal here ... --> <!-- Doesn't seem super big deal here ... -->
<check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false"> <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
<parameters><parameter name="maximum">10</parameter></parameters> <parameters>
<parameter name="maximum">10</parameter>
</parameters>
</check> </check>
<!-- Doesn't seem super big deal here ... --> <!-- Doesn't seem super big deal here ... -->
<check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false"> <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
<parameters><parameter name="maxLength">50</parameter></parameters> <parameters>
<parameter name="maxLength">50</parameter>
</parameters>
</check> </check>
<!-- Not exactly feasible to enforce this right now. --> <!-- Not exactly feasible to enforce this right now. -->
<!-- It is also infrequent that somebody introduces a new class with a lot of methods. --> <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
<check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false"> <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
<parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters> <parameters>
<parameter name="maxMethods"><![CDATA[30]]></parameter>
</parameters>
</check> </check>
<!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... --> <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
<check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false"> <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
<parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters> <parameters>
<parameter name="ignore">-1,0,1,2,3</parameter>
</parameters>
</check> </check>
<check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
<parameters>
<parameter name="illegalImports"><![CDATA[scala.collection.Seq,scala.collection.IndexedSeq]]></parameter>
</parameters>
<customMessage><![CDATA[
Don't import scala.collection.Seq and scala.collection.IndexedSeq as it may cause issues with cross-build between Scala 2.12 and 2.13.
Please refer below page to see the details of changes around Seq / IndexedSeq.
https://docs.scala-lang.org/overviews/core/collections-migration-213.html
If you really need to use scala.collection.Seq or scala.collection.IndexedSeq, please use the fully-qualified name instead.
]]></customMessage>
</check>
<check level="error" class="org.scalastyle.scalariform.ProcedureDeclarationChecker" enabled="true">
<customMessage>procedure syntax is deprecated in Scala 2.13: add return type `: Unit` and `=`</customMessage>
</check>
<check level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters>
<parameter name="regex">ArrayBuilder.make\[(.+)\]\(\)</parameter>
<parameter name="line">false</parameter>
</parameters>
<customMessage>ArrayBuilder.make does not accept parens anymore in Scala 2.13</customMessage>
</check>
<check level="warning" class="org.scalastyle.file.RegexChecker" enabled="false">
<parameters>
<parameter name="regex">(: |\[)(Indexed)?Seq\[[A-Za-z0-9_]+\]</parameter>
<parameter name="line">false</parameter>
</parameters>
<customMessage><![CDATA[NOTE: Scala 2.12 defaults scala.(Indexed)Seq to scala.collection.(Indexed)Seq while Scala 2.13 defaults
scala.(Indexed)Seq to scala.collection.immutable.(Indexed)Seq
Please refer below page to see the details of changes around Seq / IndexedSeq.
https://docs.scala-lang.org/overviews/core/collections-migration-213.html
]]></customMessage>
</check>
</scalastyle> </scalastyle>

View File

@ -16,10 +16,10 @@
package ml.dmlc.xgboost4j.scala package ml.dmlc.xgboost4j.scala
import ml.dmlc.xgboost4j.java.{Column, ColumnBatch, XGBoostError, QuantileDMatrix => JQuantileDMatrix}
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import ml.dmlc.xgboost4j.java.{Column, ColumnBatch, QuantileDMatrix => JQuantileDMatrix, XGBoostError}
class QuantileDMatrix private[scala]( class QuantileDMatrix private[scala](
private[scala] override val jDMatrix: JQuantileDMatrix) extends DMatrix(jDMatrix) { private[scala] override val jDMatrix: JQuantileDMatrix) extends DMatrix(jDMatrix) {

View File

@ -0,0 +1 @@
log4j.logger.org.apache.spark=INFO

View File

@ -17,13 +17,12 @@
package ml.dmlc.xgboost4j.scala.rapids.spark package ml.dmlc.xgboost4j.scala.rapids.spark
import java.nio.file.{Files, Path} import java.nio.file.{Files, Path}
import java.sql.{Date, Timestamp}
import java.util.{Locale, TimeZone} import java.util.{Locale, TimeZone}
import org.apache.spark.{GpuTestUtils, SparkConf} import org.apache.spark.{GpuTestUtils, SparkConf}
import org.apache.spark.internal.Logging import org.apache.spark.internal.Logging
import org.apache.spark.network.util.JavaUtils import org.apache.spark.network.util.JavaUtils
import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.SparkSession
import org.scalatest.BeforeAndAfterAll import org.scalatest.BeforeAndAfterAll
import org.scalatest.funsuite.AnyFunSuite import org.scalatest.funsuite.AnyFunSuite

View File

@ -16,18 +16,20 @@
package ml.dmlc.xgboost4j.scala.spark package ml.dmlc.xgboost4j.scala.spark
import java.io.File
import scala.collection.mutable.ArrayBuffer
import ai.rapids.cudf.{OrderByArg, Table} import ai.rapids.cudf.{OrderByArg, Table}
import org.apache.spark.SparkConf
import org.apache.spark.ml.linalg.DenseVector
import org.apache.spark.sql.{Dataset, Row, SparkSession}
import ml.dmlc.xgboost4j.java.CudfColumnBatch import ml.dmlc.xgboost4j.java.CudfColumnBatch
import ml.dmlc.xgboost4j.scala.{DMatrix, QuantileDMatrix, XGBoost => ScalaXGBoost} import ml.dmlc.xgboost4j.scala.{DMatrix, QuantileDMatrix, XGBoost => ScalaXGBoost}
import ml.dmlc.xgboost4j.scala.rapids.spark.GpuTestSuite import ml.dmlc.xgboost4j.scala.rapids.spark.GpuTestSuite
import ml.dmlc.xgboost4j.scala.rapids.spark.SparkSessionHolder.withSparkSession import ml.dmlc.xgboost4j.scala.rapids.spark.SparkSessionHolder.withSparkSession
import ml.dmlc.xgboost4j.scala.spark.Utils.withResource import ml.dmlc.xgboost4j.scala.spark.Utils.withResource
import org.apache.spark.ml.linalg.DenseVector
import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.SparkConf
import java.io.File
import scala.collection.mutable.ArrayBuffer
class GpuXGBoostPluginSuite extends GpuTestSuite { class GpuXGBoostPluginSuite extends GpuTestSuite {

View File

@ -22,11 +22,12 @@ import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable, MLReadable, MLReader} import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable, MLReadable, MLReader}
import org.apache.spark.ml.xgboost.SparkUtils import org.apache.spark.ml.xgboost.SparkUtils
import org.apache.spark.sql.Dataset import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
import ml.dmlc.xgboost4j.scala.Booster import ml.dmlc.xgboost4j.scala.Booster
import ml.dmlc.xgboost4j.scala.spark.XGBoostRanker._uid import ml.dmlc.xgboost4j.scala.spark.XGBoostRanker._uid
import ml.dmlc.xgboost4j.scala.spark.params.HasGroupCol import ml.dmlc.xgboost4j.scala.spark.params.HasGroupCol
import ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.RANKER_OBJS import ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.RANKER_OBJS
import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
class XGBoostRanker(override val uid: String, class XGBoostRanker(override val uid: String,
private val xgboostParams: Map[String, Any]) private val xgboostParams: Map[String, Any])

View File

@ -22,11 +22,11 @@ import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable, MLReadable, MLReader} import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable, MLReadable, MLReader}
import org.apache.spark.ml.xgboost.SparkUtils import org.apache.spark.ml.xgboost.SparkUtils
import org.apache.spark.sql.Dataset import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
import ml.dmlc.xgboost4j.scala.Booster import ml.dmlc.xgboost4j.scala.Booster
import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor._uid import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor._uid
import ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.REGRESSION_OBJS import ml.dmlc.xgboost4j.scala.spark.params.LearningTaskParams.REGRESSION_OBJS
import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
class XGBoostRegressor(override val uid: String, class XGBoostRegressor(override val uid: String,
private val xgboostParams: Map[String, Any]) private val xgboostParams: Map[String, Any])

View File

@ -18,10 +18,11 @@ package ml.dmlc.xgboost4j.scala.spark
import java.io.File import java.io.File
import java.util.Arrays import java.util.Arrays
import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.ArrayBuffer
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vectors}
import org.apache.spark.SparkException import org.apache.spark.SparkException
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vectors}
import org.json4s.{DefaultFormats, Formats} import org.json4s.{DefaultFormats, Formats}
import org.json4s.jackson.parseJson import org.json4s.jackson.parseJson
import org.scalatest.funsuite.AnyFunSuite import org.scalatest.funsuite.AnyFunSuite