Fix dask ip resolution. (#6475)

This adopts the solution used in dask/dask-xgboost#40 which employs the get_host_ip from dmlc-core tracker.
This commit is contained in:
Jiaming Yuan 2020-12-08 08:36:23 +08:00 committed by Hyunsu Cho
parent c39f6b25f0
commit 1bf3899983
2 changed files with 24 additions and 3 deletions

View File

@ -33,7 +33,7 @@ from .compat import lazy_isinstance
from .core import DMatrix, DeviceQuantileDMatrix, Booster, _expect, DataIter from .core import DMatrix, DeviceQuantileDMatrix, Booster, _expect, DataIter
from .core import _deprecate_positional_args from .core import _deprecate_positional_args
from .training import train as worker_train from .training import train as worker_train
from .tracker import RabitTracker from .tracker import RabitTracker, get_host_ip
from .sklearn import XGBModel, XGBRegressorBase, XGBClassifierBase from .sklearn import XGBModel, XGBRegressorBase, XGBClassifierBase
from .sklearn import xgboost_model_doc from .sklearn import xgboost_model_doc
@ -70,8 +70,7 @@ LOGGER = logging.getLogger('[xgboost.dask]')
def _start_tracker(n_workers): def _start_tracker(n_workers):
"""Start Rabit tracker """ """Start Rabit tracker """
env = {'DMLC_NUM_WORKER': n_workers} env = {'DMLC_NUM_WORKER': n_workers}
import socket host = get_host_ip('auto')
host = socket.gethostbyname(socket.gethostname())
rabit_context = RabitTracker(hostIP=host, nslave=n_workers) rabit_context = RabitTracker(hostIP=host, nslave=n_workers)
env.update(rabit_context.slave_envs()) env.update(rabit_context.slave_envs())

View File

@ -52,6 +52,28 @@ def get_some_ip(host):
return socket.getaddrinfo(host, None)[0][4][0] return socket.getaddrinfo(host, None)[0][4][0]
def get_host_ip(hostIP=None):
if hostIP is None or hostIP == 'auto':
hostIP = 'ip'
if hostIP == 'dns':
hostIP = socket.getfqdn()
elif hostIP == 'ip':
from socket import gaierror
try:
hostIP = socket.gethostbyname(socket.getfqdn())
except gaierror:
logging.warning(
'gethostbyname(socket.getfqdn()) failed... trying on hostname()')
hostIP = socket.gethostbyname(socket.gethostname())
if hostIP.startswith("127."):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
# doesn't have to be reachable
s.connect(('10.255.255.255', 1))
hostIP = s.getsockname()[0]
return hostIP
def get_family(addr): def get_family(addr):
return socket.getaddrinfo(addr, None)[0][0] return socket.getaddrinfo(addr, None)[0][0]