From 8b2f4e2d394a5fc6d76a8b90f5c0bdde054c958d Mon Sep 17 00:00:00 2001 From: Yun Ni Date: Sat, 20 Jan 2018 21:13:25 -0800 Subject: [PATCH] [jvm-packages] Move cache files to TempDirectory and delete this directory after XGBoost job finishes (#3022) * [jvm-packages] Move cache files to tmp dir and delete on exit * Delete the cache dir when watches are deleted --- .../scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala index 736dfd060..8e4426552 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala @@ -25,6 +25,7 @@ import ml.dmlc.xgboost4j.java.{IRabitTracker, Rabit, XGBoostError, RabitTracker import ml.dmlc.xgboost4j.scala.rabit.RabitTracker import ml.dmlc.xgboost4j.scala.{XGBoost => SXGBoost, _} import ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint} +import org.apache.commons.io.FileUtils import org.apache.commons.logging.LogFactory import org.apache.hadoop.fs.{FSDataInputStream, Path} import org.apache.spark.rdd.RDD @@ -122,7 +123,6 @@ object XGBoost extends Serializable { val taskId = TaskContext.getPartitionId().toString val cacheDirName = if (useExternalMemory) { val dir = Files.createTempDirectory(s"${TaskContext.get().stageId()}-cache-$taskId") - new File(dir.toUri).deleteOnExit() Some(dir.toAbsolutePath.toString) } else { None @@ -480,11 +480,7 @@ private class Watches private( def delete(): Unit = { toMap.values.foreach(_.delete()) cacheDirName.foreach { name => - for (cacheFile <- new File(name).listFiles()) { - if (!cacheFile.delete()) { - throw new IllegalStateException(s"failed to delete $cacheFile") - } - } + FileUtils.deleteDirectory(new File(name)) } }