[jvm-packages] xgboost4j-spark external memory (#1219)

* implement external memory support for XGBoost4J

* remove extra space

* enable external memory for prediction

* update doc
This commit is contained in:
Nan Zhu
2016-05-22 14:01:28 -04:00
parent 587999755f
commit c85b9012c6
8 changed files with 62 additions and 16 deletions

View File

@@ -32,8 +32,8 @@ public class ExternalMemory {
//this is the only difference, add a # followed by a cache prefix name
//several cache file with the prefix will be generated
//currently only support convert from libsvm file
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train#dtrain.cache");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test#dtest.cache");
DMatrix trainMat = new DMatrix("../demo/data/agaricus.txt.train#dtrain.cache");
DMatrix testMat = new DMatrix("../demo/data/agaricus.txt.test#dtest.cache");
//specify parameters
HashMap<String, Object> params = new HashMap<String, Object>();

View File

@@ -28,7 +28,7 @@ object DistTrainWithSpark {
"usage: program num_of_rounds num_workers training_path test_path model_path")
sys.exit(1)
}
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("XGBoost-spark-example")
val sparkConf = new SparkConf().setAppName("XGBoost-spark-example")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
sparkConf.registerKryoClasses(Array(classOf[Booster]))
val sc = new SparkContext(sparkConf)
@@ -45,7 +45,8 @@ object DistTrainWithSpark {
"eta" -> 0.1f,
"max_depth" -> 2,
"objective" -> "binary:logistic").toMap
val xgboostModel = XGBoost.train(trainRDD, paramMap, numRound, nWorkers = args(1).toInt)
val xgboostModel = XGBoost.train(trainRDD, paramMap, numRound, nWorkers = args(1).toInt,
useExternalMemory = true)
xgboostModel.predict(new DMatrix(testSet))
// save model to HDFS path
xgboostModel.saveModelAsHadoopFile(outputModelPath)