Merge branch 'master' into master
This commit is contained in:
commit
e27977d416
@ -64,8 +64,8 @@ raw = xgb.save.raw(bst)
|
|||||||
# load binary model to R
|
# load binary model to R
|
||||||
bst3 <- xgb.load(raw)
|
bst3 <- xgb.load(raw)
|
||||||
pred3 <- predict(bst3, test$data)
|
pred3 <- predict(bst3, test$data)
|
||||||
# pred2 should be identical to pred
|
# pred3 should be identical to pred
|
||||||
print(paste("sum(abs(pred3-pred))=", sum(abs(pred2-pred))))
|
print(paste("sum(abs(pred3-pred))=", sum(abs(pred3-pred))))
|
||||||
|
|
||||||
#----------------Advanced features --------------
|
#----------------Advanced features --------------
|
||||||
# to use advanced features, we need to put data in xgb.DMatrix
|
# to use advanced features, we need to put data in xgb.DMatrix
|
||||||
|
|||||||
@ -32,8 +32,12 @@ class XGBoostModel(_booster: Booster)(implicit val sc: SparkContext) extends Ser
|
|||||||
import DataUtils._
|
import DataUtils._
|
||||||
val broadcastBooster = testSet.sparkContext.broadcast(_booster)
|
val broadcastBooster = testSet.sparkContext.broadcast(_booster)
|
||||||
testSet.mapPartitions { testSamples =>
|
testSet.mapPartitions { testSamples =>
|
||||||
|
if (testSamples.hasNext) {
|
||||||
val dMatrix = new DMatrix(new JDMatrix(testSamples, null))
|
val dMatrix = new DMatrix(new JDMatrix(testSamples, null))
|
||||||
Iterator(broadcastBooster.value.predict(dMatrix))
|
Iterator(broadcastBooster.value.predict(dMatrix))
|
||||||
|
} else {
|
||||||
|
Iterator()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -23,7 +23,7 @@ import scala.collection.mutable.ListBuffer
|
|||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
|
||||||
import org.apache.commons.logging.LogFactory
|
import org.apache.commons.logging.LogFactory
|
||||||
import org.apache.spark.mllib.linalg.DenseVector
|
import org.apache.spark.mllib.linalg.{Vector => SparkVector, Vectors, DenseVector}
|
||||||
import org.apache.spark.mllib.regression.LabeledPoint
|
import org.apache.spark.mllib.regression.LabeledPoint
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
import org.apache.spark.{SparkConf, SparkContext}
|
import org.apache.spark.{SparkConf, SparkContext}
|
||||||
@ -190,4 +190,24 @@ class XGBoostSuite extends FunSuite with BeforeAndAfter {
|
|||||||
assert(eval.eval(xgBoostModel.predict(testSetDMatrix), testSetDMatrix) < 0.1)
|
assert(eval.eval(xgBoostModel.predict(testSetDMatrix), testSetDMatrix) < 0.1)
|
||||||
customSparkContext.stop()
|
customSparkContext.stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("test with empty partition") {
|
||||||
|
|
||||||
|
def buildEmptyRDD(sparkContext: Option[SparkContext] = None): RDD[SparkVector] = {
|
||||||
|
val sampleList = new ListBuffer[SparkVector]
|
||||||
|
sparkContext.getOrElse(sc).parallelize(sampleList, numWorkers)
|
||||||
|
}
|
||||||
|
|
||||||
|
val eval = new EvalError()
|
||||||
|
val trainingRDD = buildTrainingRDD()
|
||||||
|
val testRDD = buildEmptyRDD()
|
||||||
|
import DataUtils._
|
||||||
|
val tempDir = Files.createTempDirectory("xgboosttest-")
|
||||||
|
val tempFile = Files.createTempFile(tempDir, "", "")
|
||||||
|
val paramMap = List("eta" -> "1", "max_depth" -> "2", "silent" -> "0",
|
||||||
|
"objective" -> "binary:logistic").toMap
|
||||||
|
val xgBoostModel = XGBoost.train(trainingRDD, paramMap, 5, numWorkers)
|
||||||
|
|
||||||
|
println(xgBoostModel.predict(testRDD))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -144,7 +144,8 @@ def _maybe_pandas_data(data, feature_names, feature_types):
|
|||||||
|
|
||||||
data_dtypes = data.dtypes
|
data_dtypes = data.dtypes
|
||||||
if not all(dtype.name in PANDAS_DTYPE_MAPPER for dtype in data_dtypes):
|
if not all(dtype.name in PANDAS_DTYPE_MAPPER for dtype in data_dtypes):
|
||||||
raise ValueError('DataFrame.dtypes for data must be int, float or bool')
|
bad_fields = [data.columns[i] for i, dtype in enumerate(data_dtypes) if dtype.name not in PANDAS_DTYPE_MAPPER ]
|
||||||
|
raise ValueError('DataFrame.dtypes for data must be int, float or bool.\nDid not expect the data types in fie lds '+', '.join(bad_fields))
|
||||||
|
|
||||||
if feature_names is None:
|
if feature_names is None:
|
||||||
feature_names = data.columns.format()
|
feature_names = data.columns.format()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user