Revert "[jvm-packages] Add XGBOOST_RABIT_TRACKER_IP_FOR_TEST to set rabit tracker IP. (#6869)" (#6886)
This reverts commit 2828da3c4c951baa45d1bb6f85c7b3a6657cd607.
This commit is contained in:
parent
c8cc3eacc9
commit
74b41637de
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@ -210,8 +210,6 @@ jobs:
|
|||||||
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
|
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
|
||||||
env:
|
env:
|
||||||
RABIT_MOCK: ON
|
RABIT_MOCK: ON
|
||||||
SPARK_LOCAL_IP: 127.0.0.1
|
|
||||||
XGBOOST_RABIT_TRACKER_IP_FOR_TEST: 127.0.0.1
|
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
@ -162,17 +162,17 @@ Example of setting a missing value (e.g. -999) to the "missing" parameter in XGB
|
|||||||
doing this with missing values encoded as NaN, you will want to set ``setHandleInvalid = "keep"`` on VectorAssembler
|
doing this with missing values encoded as NaN, you will want to set ``setHandleInvalid = "keep"`` on VectorAssembler
|
||||||
in order to keep the NaN values in the dataset. You would then set the "missing" parameter to whatever you want to be
|
in order to keep the NaN values in the dataset. You would then set the "missing" parameter to whatever you want to be
|
||||||
treated as missing. However this may cause a large amount of memory use if your dataset is very sparse. For example:
|
treated as missing. However this may cause a large amount of memory use if your dataset is very sparse. For example:
|
||||||
|
|
||||||
.. code-block:: scala
|
.. code-block:: scala
|
||||||
|
|
||||||
val assembler = new VectorAssembler().setInputCols(feature_names.toArray).setOutputCol("features").setHandleInvalid("keep")
|
val assembler = new VectorAssembler().setInputCols(feature_names.toArray).setOutputCol("features").setHandleInvalid("keep")
|
||||||
|
|
||||||
// conversion to dense vector using Array()
|
// conversion to dense vector using Array()
|
||||||
|
|
||||||
val featurePipeline = new Pipeline().setStages(Array(assembler))
|
val featurePipeline = new Pipeline().setStages(Array(assembler))
|
||||||
val featureModel = featurePipeline.fit(df_training)
|
val featureModel = featurePipeline.fit(df_training)
|
||||||
val featureDf = featureModel.transform(df_training)
|
val featureDf = featureModel.transform(df_training)
|
||||||
|
|
||||||
val xgbParam = Map("eta" -> 0.1f,
|
val xgbParam = Map("eta" -> 0.1f,
|
||||||
"max_depth" -> 2,
|
"max_depth" -> 2,
|
||||||
"objective" -> "multi:softprob",
|
"objective" -> "multi:softprob",
|
||||||
@ -181,10 +181,10 @@ Example of setting a missing value (e.g. -999) to the "missing" parameter in XGB
|
|||||||
"num_workers" -> 2,
|
"num_workers" -> 2,
|
||||||
"allow_non_zero_for_missing" -> "true",
|
"allow_non_zero_for_missing" -> "true",
|
||||||
"missing" -> -999)
|
"missing" -> -999)
|
||||||
|
|
||||||
val xgb = new XGBoostClassifier(xgbParam)
|
val xgb = new XGBoostClassifier(xgbParam)
|
||||||
val xgbclassifier = xgb.fit(featureDf)
|
val xgbclassifier = xgb.fit(featureDf)
|
||||||
|
|
||||||
|
|
||||||
2. Before calling VectorAssembler you can transform the values you want to represent missing into an irregular value
|
2. Before calling VectorAssembler you can transform the values you want to represent missing into an irregular value
|
||||||
that is not 0, NaN, or Null and set the "missing" parameter to 0. The irregular value should ideally be chosen to be
|
that is not 0, NaN, or Null and set the "missing" parameter to 0. The irregular value should ideally be chosen to be
|
||||||
@ -586,11 +586,3 @@ An equivalent way is to pass in parameters in XGBoostClassifier's constructor:
|
|||||||
setLabelCol("classIndex")
|
setLabelCol("classIndex")
|
||||||
|
|
||||||
If the training failed during these 100 rounds, the next run of training would start by reading the latest checkpoint file in ``/checkpoints_path`` and start from the iteration when the checkpoint was built until to next failure or the specified 100 rounds.
|
If the training failed during these 100 rounds, the next run of training would start by reading the latest checkpoint file in ``/checkpoints_path`` and start from the iteration when the checkpoint was built until to next failure or the specified 100 rounds.
|
||||||
|
|
||||||
|
|
||||||
Developer Notes
|
|
||||||
===============
|
|
||||||
|
|
||||||
There's an environment variable called ``XGBOOST_RABIT_TRACKER_IP_FOR_TEST`` used to
|
|
||||||
specify the tracker IP, which can be used in combination with ``SPARK_LOCAL_IP``. It's
|
|
||||||
only used for testing and is not maintained as a part of the interface.
|
|
||||||
|
|||||||
@ -51,8 +51,6 @@ public class TrackerProperties {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String getHostIp(){
|
public String getHostIp(){
|
||||||
// mostly for testing
|
return this.properties.getProperty(HOST_IP);
|
||||||
String hostIp = System.getenv("XGBOOST_RABIT_TRACKER_IP_FOR_TEST");
|
|
||||||
return hostIp != null ? hostIp : this.properties.getProperty(HOST_IP);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user