Drop single point model recovery (#6262)
* Pass rabit params in JVM package. * Implement timeout using poll timeout parameter. * Remove OOB data check.
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
"""Distributed GPU tests."""
|
||||
import sys
|
||||
import time
|
||||
import xgboost as xgb
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
|
||||
def run_test(name, params_fun):
|
||||
@@ -28,7 +28,7 @@ def run_test(name, params_fun):
|
||||
# Have each worker save its model
|
||||
model_name = "test.model.%s.%d" % (name, rank)
|
||||
bst.dump_model(model_name, with_stats=True)
|
||||
time.sleep(2)
|
||||
xgb.rabit.allreduce(np.ones((1, 1)), xgb.rabit.Op.MAX) # sync
|
||||
xgb.rabit.tracker_print("Finished training\n")
|
||||
|
||||
if (rank == 0):
|
||||
@@ -49,9 +49,6 @@ def run_test(name, params_fun):
|
||||
|
||||
xgb.rabit.finalize()
|
||||
|
||||
if os.path.exists(model_name):
|
||||
os.remove(model_name)
|
||||
|
||||
|
||||
base_params = {
|
||||
'tree_method': 'gpu_hist',
|
||||
|
||||
@@ -7,6 +7,8 @@ submit="timeout 30 python ../../dmlc-core/tracker/dmlc-submit"
|
||||
|
||||
echo -e "\n ====== 1. Basic distributed-gpu test with Python: 4 workers; 1 GPU per worker ====== \n"
|
||||
$submit --num-workers=$(nvidia-smi -L | wc -l) python distributed_gpu.py basic_1x4 || exit 1
|
||||
rm test.model.*
|
||||
|
||||
echo -e "\n ====== 2. RF distributed-gpu test with Python: 4 workers; 1 GPU per worker ====== \n"
|
||||
$submit --num-workers=$(nvidia-smi -L | wc -l) python distributed_gpu.py rf_1x4 || exit 1
|
||||
rm test.model.*
|
||||
|
||||
Reference in New Issue
Block a user