[jvm-packages] xgboost4j-spark external memory (#1219)
* implement external memory support for XGBoost4J * remove extra space * enable external memory for prediction * update doc
This commit is contained in:
@@ -32,8 +32,8 @@ public class ExternalMemory {
|
||||
//this is the only difference, add a # followed by a cache prefix name
|
||||
//several cache file with the prefix will be generated
|
||||
//currently only support convert from libsvm file
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train#dtrain.cache");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test#dtest.cache");
|
||||
DMatrix trainMat = new DMatrix("../demo/data/agaricus.txt.train#dtrain.cache");
|
||||
DMatrix testMat = new DMatrix("../demo/data/agaricus.txt.test#dtest.cache");
|
||||
|
||||
//specify parameters
|
||||
HashMap<String, Object> params = new HashMap<String, Object>();
|
||||
|
||||
@@ -28,7 +28,7 @@ object DistTrainWithSpark {
|
||||
"usage: program num_of_rounds num_workers training_path test_path model_path")
|
||||
sys.exit(1)
|
||||
}
|
||||
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("XGBoost-spark-example")
|
||||
val sparkConf = new SparkConf().setAppName("XGBoost-spark-example")
|
||||
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
|
||||
sparkConf.registerKryoClasses(Array(classOf[Booster]))
|
||||
val sc = new SparkContext(sparkConf)
|
||||
@@ -45,7 +45,8 @@ object DistTrainWithSpark {
|
||||
"eta" -> 0.1f,
|
||||
"max_depth" -> 2,
|
||||
"objective" -> "binary:logistic").toMap
|
||||
val xgboostModel = XGBoost.train(trainRDD, paramMap, numRound, nWorkers = args(1).toInt)
|
||||
val xgboostModel = XGBoost.train(trainRDD, paramMap, numRound, nWorkers = args(1).toInt,
|
||||
useExternalMemory = true)
|
||||
xgboostModel.predict(new DMatrix(testSet))
|
||||
// save model to HDFS path
|
||||
xgboostModel.saveModelAsHadoopFile(outputModelPath)
|
||||
|
||||
Reference in New Issue
Block a user