[jvm-packages] xgboost4j-spark external memory (#1219)

* implement external memory support for XGBoost4J

* remove extra space

* enable external memory for prediction

* update doc
This commit is contained in:
Nan Zhu
2016-05-22 14:01:28 -04:00
parent 587999755f
commit c85b9012c6
8 changed files with 62 additions and 16 deletions

View File

@@ -127,7 +127,7 @@ class XGBoostSuite extends FunSuite with BeforeAndAfter {
List("eta" -> "1", "max_depth" -> "2", "silent" -> "0",
"objective" -> "binary:logistic").toMap,
new scala.collection.mutable.HashMap[String, String],
numWorkers = 2, round = 5, null, null)
numWorkers = 2, round = 5, null, null, false)
val boosterCount = boosterRDD.count()
assert(boosterCount === 2)
val boosters = boosterRDD.collect()
@@ -210,4 +210,26 @@ class XGBoostSuite extends FunSuite with BeforeAndAfter {
println(xgBoostModel.predict(testRDD))
}
test("training with external memory cache") {
sc.stop()
sc = null
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("XGBoostSuite")
val customSparkContext = new SparkContext(sparkConf)
val eval = new EvalError()
val trainingRDD = buildTrainingRDD(Some(customSparkContext))
val testSet = readFile(getClass.getResource("/agaricus.txt.test").getFile).iterator
import DataUtils._
val testSetDMatrix = new DMatrix(new JDMatrix(testSet, null))
val paramMap = List("eta" -> "1", "max_depth" -> "2", "silent" -> "0",
"objective" -> "binary:logistic").toMap
val xgBoostModel = XGBoost.train(trainingRDD, paramMap, 5, numWorkers, useExternalMemory = true)
assert(eval.eval(xgBoostModel.predict(testSetDMatrix), testSetDMatrix) < 0.1)
customSparkContext.stop()
// clean
val dir = new File(".")
for (file <- dir.listFiles() if file.getName.startsWith("XGBoostSuite-dtrain_cache")) {
file.delete()
}
}
}