[jvm-packages] Integration with Spark Dataframe/Dataset (#1559)

* bump up to scala 2.11 * framework of data frame integration * test consistency between RDD and DataFrame * order preservation * test order preservation * example code and fix makefile * improve type checking * improve APIs * user docs * work around travis CI's limitation on log length * adjust test structure * integrate with Spark -1 .x * spark 2.x integration * remove spark 1.x implementation but provide instructions on how to downgrade
2016-09-11 15:02:58 -04:00
parent 7ff742ebf7
commit fb02797e2a
15 changed files with 625 additions and 139 deletions
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -14,8 +14,6 @@
        <maven.compiler.source>1.7</maven.compiler.source>
        <maven.compiler.target>1.7</maven.compiler.target>
        <maven.version>3.3.9</maven.version>
-        <scala.version>2.10.5</scala.version>
-        <scala.binary.version>2.10</scala.binary.version>
    </properties>
    <modules>
        <module>xgboost4j</module>
@@ -25,13 +23,15 @@
    </modules>
    <profiles>
        <profile>
-            <id>spark-1.x</id>
+            <id>spark-2.x</id>
            <activation>
                <activeByDefault>true</activeByDefault>
            </activation>
            <properties>
-                <spark.version>1.6.1</spark.version>
-                <scala.binary.version>2.10</scala.binary.version>
+                <spark.version>2.0.0</spark.version>
+                <flink.suffix>_2.11</flink.suffix>
+                <scala.version>2.11.8</scala.version>
+                <scala.binary.version>2.11</scala.binary.version>
            </properties>
        </profile>
    </profiles>