[jvm-packages] Integration with Spark Dataframe/Dataset (#1559)

* bump up to scala 2.11

* framework of data frame integration

* test consistency between RDD and DataFrame

* order preservation

* test order preservation

* example code and fix makefile

* improve type checking

* improve APIs

* user docs

* work around travis CI's limitation on log length

* adjust test structure

* integrate with Spark -1 .x

* spark 2.x integration

* remove spark 1.x implementation but provide instructions on how to downgrade
This commit is contained in:
Nan Zhu
2016-09-11 15:02:58 -04:00
committed by GitHub
parent 7ff742ebf7
commit fb02797e2a
15 changed files with 625 additions and 139 deletions

View File

@@ -14,8 +14,6 @@
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
<maven.version>3.3.9</maven.version>
<scala.version>2.10.5</scala.version>
<scala.binary.version>2.10</scala.binary.version>
</properties>
<modules>
<module>xgboost4j</module>
@@ -25,13 +23,15 @@
</modules>
<profiles>
<profile>
<id>spark-1.x</id>
<id>spark-2.x</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>1.6.1</spark.version>
<scala.binary.version>2.10</scala.binary.version>
<spark.version>2.0.0</spark.version>
<flink.suffix>_2.11</flink.suffix>
<scala.version>2.11.8</scala.version>
<scala.binary.version>2.11</scala.binary.version>
</properties>
</profile>
</profiles>