[Breaking] Require format to be specified in input URI. (#9077)
Previously, we use `libsvm` as default when format is not specified. However, the dmlc data parser is not particularly robust against errors, and the most common type of error is undefined format. Along with which, we will recommend users to use other data loader instead. We will continue the maintenance of the parsers as it's currently used for many internal tests including federated learning.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014-2021 by Contributors
|
||||
Copyright (c) 2014-2023 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -62,8 +62,8 @@ public class BasicWalkThrough {
|
||||
|
||||
public static void main(String[] args) throws IOException, XGBoostError {
|
||||
// load file from text file, also binary buffer generated by xgboost4j
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm");
|
||||
|
||||
HashMap<String, Object> params = new HashMap<String, Object>();
|
||||
params.put("eta", 1.0);
|
||||
@@ -112,7 +112,8 @@ public class BasicWalkThrough {
|
||||
|
||||
System.out.println("start build dmatrix from csr sparse data ...");
|
||||
//build dmatrix from CSR Sparse Matrix
|
||||
DataLoader.CSRSparseData spData = DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train");
|
||||
DataLoader.CSRSparseData spData =
|
||||
DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train?format=libsvm");
|
||||
|
||||
DMatrix trainMat2 = new DMatrix(spData.rowHeaders, spData.colIndex, spData.data,
|
||||
DMatrix.SparseType.CSR, 127);
|
||||
|
||||
@@ -32,8 +32,8 @@ public class BoostFromPrediction {
|
||||
System.out.println("start running example to start from a initial prediction");
|
||||
|
||||
// load file from text file, also binary buffer generated by xgboost4j
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm");
|
||||
|
||||
//specify parameters
|
||||
HashMap<String, Object> params = new HashMap<String, Object>();
|
||||
|
||||
@@ -30,7 +30,7 @@ import ml.dmlc.xgboost4j.java.XGBoostError;
|
||||
public class CrossValidation {
|
||||
public static void main(String[] args) throws IOException, XGBoostError {
|
||||
//load train mat
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm");
|
||||
|
||||
//set params
|
||||
HashMap<String, Object> params = new HashMap<String, Object>();
|
||||
|
||||
@@ -139,9 +139,9 @@ public class CustomObjective {
|
||||
|
||||
public static void main(String[] args) throws XGBoostError {
|
||||
//load train mat (svmlight format)
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm");
|
||||
//load valid mat (svmlight format)
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm");
|
||||
|
||||
HashMap<String, Object> params = new HashMap<String, Object>();
|
||||
params.put("eta", 1.0);
|
||||
|
||||
@@ -29,9 +29,9 @@ import ml.dmlc.xgboost4j.java.example.util.DataLoader;
|
||||
public class EarlyStopping {
|
||||
public static void main(String[] args) throws IOException, XGBoostError {
|
||||
DataLoader.CSRSparseData trainCSR =
|
||||
DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train");
|
||||
DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train?format=libsvm");
|
||||
DataLoader.CSRSparseData testCSR =
|
||||
DataLoader.loadSVMFile("../../demo/data/agaricus.txt.test");
|
||||
DataLoader.loadSVMFile("../../demo/data/agaricus.txt.test?format=libsvm");
|
||||
|
||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||
{
|
||||
|
||||
@@ -32,8 +32,8 @@ public class ExternalMemory {
|
||||
//this is the only difference, add a # followed by a cache prefix name
|
||||
//several cache file with the prefix will be generated
|
||||
//currently only support convert from libsvm file
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train#dtrain.cache");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test#dtest.cache");
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm#dtrain.cache");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm#dtest.cache");
|
||||
|
||||
//specify parameters
|
||||
HashMap<String, Object> params = new HashMap<String, Object>();
|
||||
|
||||
@@ -32,8 +32,8 @@ import ml.dmlc.xgboost4j.java.example.util.CustomEval;
|
||||
public class GeneralizedLinearModel {
|
||||
public static void main(String[] args) throws XGBoostError {
|
||||
// load file from text file, also binary buffer generated by xgboost4j
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm");
|
||||
|
||||
//specify parameters
|
||||
//change booster to gblinear, so that we are fitting a linear model
|
||||
|
||||
@@ -31,8 +31,8 @@ import ml.dmlc.xgboost4j.java.example.util.CustomEval;
|
||||
public class PredictFirstNtree {
|
||||
public static void main(String[] args) throws XGBoostError {
|
||||
// load file from text file, also binary buffer generated by xgboost4j
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm");
|
||||
|
||||
//specify parameters
|
||||
HashMap<String, Object> params = new HashMap<String, Object>();
|
||||
|
||||
@@ -31,8 +31,8 @@ import ml.dmlc.xgboost4j.java.XGBoostError;
|
||||
public class PredictLeafIndices {
|
||||
public static void main(String[] args) throws XGBoostError {
|
||||
// load file from text file, also binary buffer generated by xgboost4j
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm");
|
||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm");
|
||||
|
||||
//specify parameters
|
||||
HashMap<String, Object> params = new HashMap<String, Object>();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2014 by Contributors
|
||||
Copyright (c) 2014-2023 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -36,8 +36,8 @@ object BasicWalkThrough {
|
||||
}
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val trainMax = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMax = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val trainMax = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
|
||||
val testMax = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm")
|
||||
|
||||
val params = new mutable.HashMap[String, Any]()
|
||||
params += "eta" -> 1.0
|
||||
@@ -76,7 +76,7 @@ object BasicWalkThrough {
|
||||
|
||||
// build dmatrix from CSR Sparse Matrix
|
||||
println("start build dmatrix from csr sparse data ...")
|
||||
val spData = DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train")
|
||||
val spData = DataLoader.loadSVMFile("../../demo/data/agaricus.txt.train?format=libsvm")
|
||||
val trainMax2 = new DMatrix(spData.rowHeaders, spData.colIndex, spData.data,
|
||||
JDMatrix.SparseType.CSR)
|
||||
trainMax2.setLabel(spData.labels)
|
||||
|
||||
@@ -24,8 +24,8 @@ object BoostFromPrediction {
|
||||
def main(args: Array[String]): Unit = {
|
||||
println("start running example to start from a initial prediction")
|
||||
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm")
|
||||
|
||||
val params = new mutable.HashMap[String, Any]()
|
||||
params += "eta" -> 1.0
|
||||
|
||||
@@ -21,7 +21,7 @@ import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix}
|
||||
|
||||
object CrossValidation {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val trainMat: DMatrix = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val trainMat: DMatrix = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
|
||||
|
||||
// set params
|
||||
val params = new mutable.HashMap[String, Any]
|
||||
|
||||
@@ -138,8 +138,8 @@ object CustomObjective {
|
||||
}
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm")
|
||||
val params = new mutable.HashMap[String, Any]()
|
||||
params += "eta" -> 1.0
|
||||
params += "max_depth" -> 2
|
||||
|
||||
@@ -25,8 +25,8 @@ object ExternalMemory {
|
||||
// this is the only difference, add a # followed by a cache prefix name
|
||||
// several cache file with the prefix will be generated
|
||||
// currently only support convert from libsvm file
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train#dtrain.cache")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test#dtest.cache")
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm#dtrain.cache")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm#dtest.cache")
|
||||
|
||||
val params = new mutable.HashMap[String, Any]()
|
||||
params += "eta" -> 1.0
|
||||
|
||||
@@ -27,8 +27,8 @@ import ml.dmlc.xgboost4j.scala.example.util.CustomEval
|
||||
*/
|
||||
object GeneralizedLinearModel {
|
||||
def main(args: Array[String]): Unit = {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm")
|
||||
|
||||
// specify parameters
|
||||
// change booster to gblinear, so that we are fitting a linear model
|
||||
|
||||
@@ -23,8 +23,8 @@ import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix}
|
||||
object PredictFirstNTree {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm")
|
||||
|
||||
val params = new mutable.HashMap[String, Any]()
|
||||
params += "eta" -> 1.0
|
||||
|
||||
@@ -25,8 +25,8 @@ import ml.dmlc.xgboost4j.scala.{XGBoost, DMatrix}
|
||||
object PredictLeafIndices {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test")
|
||||
val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm")
|
||||
val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm")
|
||||
|
||||
val params = new mutable.HashMap[String, Any]()
|
||||
params += "eta" -> 1.0
|
||||
|
||||
Reference in New Issue
Block a user