Fix typo in dask interface. (#6240)

This commit is contained in:
Jiaming Yuan 2020-10-15 15:26:29 +08:00 committed by GitHub
parent 06e453ddf4
commit 3da5a69dc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 17 deletions

View File

@ -987,7 +987,7 @@ def inplace_predict(client, model, data,
async def _evaluation_matrices(client, validation_set, async def _evaluation_matrices(client, validation_set,
sample_weights, missing): sample_weight, missing):
''' '''
Parameters Parameters
---------- ----------
@ -1010,8 +1010,8 @@ async def _evaluation_matrices(client, validation_set,
if validation_set is not None: if validation_set is not None:
assert isinstance(validation_set, list) assert isinstance(validation_set, list)
for i, e in enumerate(validation_set): for i, e in enumerate(validation_set):
w = (sample_weights[i] w = (sample_weight[i]
if sample_weights is not None else None) if sample_weight is not None else None)
dmat = await DaskDMatrix(client=client, data=e[0], label=e[1], dmat = await DaskDMatrix(client=client, data=e[0], label=e[1],
weight=w, missing=missing) weight=w, missing=missing)
evals.append((dmat, 'validation_{}'.format(i))) evals.append((dmat, 'validation_{}'.format(i)))
@ -1027,7 +1027,7 @@ class DaskScikitLearnBase(XGBModel):
# pylint: disable=arguments-differ # pylint: disable=arguments-differ
def fit(self, X, y, def fit(self, X, y,
sample_weights=None, sample_weight=None,
base_margin=None, base_margin=None,
eval_set=None, eval_set=None,
sample_weight_eval_set=None, sample_weight_eval_set=None,
@ -1086,13 +1086,13 @@ class DaskScikitLearnBase(XGBModel):
['estimators', 'model']) ['estimators', 'model'])
class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase): class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
# pylint: disable=missing-class-docstring # pylint: disable=missing-class-docstring
async def _fit_async(self, X, y, sample_weights, base_margin, eval_set, async def _fit_async(self, X, y, sample_weight, base_margin, eval_set,
sample_weight_eval_set, early_stopping_rounds, sample_weight_eval_set, early_stopping_rounds,
verbose): verbose):
dtrain = await DaskDMatrix(client=self.client, dtrain = await DaskDMatrix(client=self.client,
data=X, data=X,
label=y, label=y,
weight=sample_weights, weight=sample_weight,
base_margin=base_margin, base_margin=base_margin,
missing=self.missing) missing=self.missing)
params = self.get_xgb_params() params = self.get_xgb_params()
@ -1115,7 +1115,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
def fit(self, def fit(self,
X, X,
y, y,
sample_weights=None, sample_weight=None,
base_margin=None, base_margin=None,
eval_set=None, eval_set=None,
sample_weight_eval_set=None, sample_weight_eval_set=None,
@ -1125,7 +1125,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
return self.client.sync(self._fit_async, return self.client.sync(self._fit_async,
X=X, X=X,
y=y, y=y,
sample_weights=sample_weights, sample_weight=sample_weight,
base_margin=base_margin, base_margin=base_margin,
eval_set=eval_set, eval_set=eval_set,
sample_weight_eval_set=sample_weight_eval_set, sample_weight_eval_set=sample_weight_eval_set,
@ -1150,17 +1150,18 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
output_margin=output_margin, output_margin=output_margin,
base_margin=base_margin) base_margin=base_margin)
@xgboost_model_doc( @xgboost_model_doc(
'Implementation of the scikit-learn API for XGBoost classification.', 'Implementation of the scikit-learn API for XGBoost classification.',
['estimators', 'model']) ['estimators', 'model'])
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
async def _fit_async(self, X, y, sample_weights, base_margin, eval_set, async def _fit_async(self, X, y, sample_weight, base_margin, eval_set,
sample_weight_eval_set, early_stopping_rounds, sample_weight_eval_set, early_stopping_rounds,
verbose): verbose):
dtrain = await DaskDMatrix(client=self.client, dtrain = await DaskDMatrix(client=self.client,
data=X, data=X,
label=y, label=y,
weight=sample_weights, weight=sample_weight,
base_margin=base_margin, base_margin=base_margin,
missing=self.missing) missing=self.missing)
params = self.get_xgb_params() params = self.get_xgb_params()
@ -1196,7 +1197,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
def fit(self, def fit(self,
X, X,
y, y,
sample_weights=None, sample_weight=None,
base_margin=None, base_margin=None,
eval_set=None, eval_set=None,
sample_weight_eval_set=None, sample_weight_eval_set=None,
@ -1206,7 +1207,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
return self.client.sync(self._fit_async, return self.client.sync(self._fit_async,
X=X, X=X,
y=y, y=y,
sample_weights=sample_weights, sample_weight=sample_weight,
base_margin=base_margin, base_margin=base_margin,
eval_set=eval_set, eval_set=eval_set,
sample_weight_eval_set=sample_weight_eval_set, sample_weight_eval_set=sample_weight_eval_set,

View File

@ -40,10 +40,13 @@ kCols = 10
kWorkers = 5 kWorkers = 5
def generate_array(): def generate_array(with_weights=False):
partition_size = 20 partition_size = 20
X = da.random.random((kRows, kCols), partition_size) X = da.random.random((kRows, kCols), partition_size)
y = da.random.random(kRows, partition_size) y = da.random.random(kRows, partition_size)
if with_weights:
w = da.random.random(kRows, partition_size)
return X, y, w
return X, y return X, y
@ -252,11 +255,11 @@ def test_dask_missing_value_cls():
def test_dask_regressor(): def test_dask_regressor():
with LocalCluster(n_workers=kWorkers) as cluster: with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
X, y = generate_array() X, y, w = generate_array(with_weights=True)
regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2) regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
regressor.set_params(tree_method='hist') regressor.set_params(tree_method='hist')
regressor.client = client regressor.client = client
regressor.fit(X, y, eval_set=[(X, y)]) regressor.fit(X, y, sample_weight=w, eval_set=[(X, y)])
prediction = regressor.predict(X) prediction = regressor.predict(X)
assert prediction.ndim == 1 assert prediction.ndim == 1
@ -274,12 +277,12 @@ def test_dask_regressor():
def test_dask_classifier(): def test_dask_classifier():
with LocalCluster(n_workers=kWorkers) as cluster: with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
X, y = generate_array() X, y, w = generate_array(with_weights=True)
y = (y * 10).astype(np.int32) y = (y * 10).astype(np.int32)
classifier = xgb.dask.DaskXGBClassifier( classifier = xgb.dask.DaskXGBClassifier(
verbosity=1, n_estimators=2, eval_metric='merror') verbosity=1, n_estimators=2, eval_metric='merror')
classifier.client = client classifier.client = client
classifier.fit(X, y, eval_set=[(X, y)]) classifier.fit(X, y, sample_weight=w, eval_set=[(X, y)])
prediction = classifier.predict(X) prediction = classifier.predict(X)
assert prediction.ndim == 1 assert prediction.ndim == 1