Fix GPU RF (#6755)

* Fix sampling.
This commit is contained in:
Jiaming Yuan 2021-03-17 06:23:35 +08:00 committed by GitHub
parent 1a73a28511
commit 4f75f514ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 26 additions and 9 deletions

View File

@ -169,6 +169,7 @@ struct GPUHistMakerDevice {
std::unique_ptr<RowPartitioner> row_partitioner; std::unique_ptr<RowPartitioner> row_partitioner;
DeviceHistogram<GradientSumT> hist{}; DeviceHistogram<GradientSumT> hist{};
dh::caching_device_vector<GradientPair> d_gpair; // storage for gpair;
common::Span<GradientPair> gpair; common::Span<GradientPair> gpair;
dh::caching_device_vector<int> monotone_constraints; dh::caching_device_vector<int> monotone_constraints;
@ -269,7 +270,13 @@ struct GPUHistMakerDevice {
std::fill(node_sum_gradients.begin(), node_sum_gradients.end(), std::fill(node_sum_gradients.begin(), node_sum_gradients.end(),
GradientPair()); GradientPair());
auto sample = sampler->Sample(dh_gpair->DeviceSpan(), dmat); if (d_gpair.size() != dh_gpair->Size()) {
d_gpair.resize(dh_gpair->Size());
}
thrust::copy(thrust::device, dh_gpair->ConstDevicePointer(),
dh_gpair->ConstDevicePointer() + dh_gpair->Size(),
d_gpair.begin());
auto sample = sampler->Sample(dh::ToSpan(d_gpair), dmat);
page = sample.page; page = sample.page;
gpair = sample.gpair; gpair = sample.gpair;

View File

@ -503,12 +503,15 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
auto gpair = GenerateRandomGradients(kRows); auto gpair = GenerateRandomGradients(kRows);
// Build a tree using the in-memory DMatrix. // Build a tree using the in-memory DMatrix.
auto rng = common::GlobalRandom();
RegTree tree; RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, 0); HostDeviceVector<bst_float> preds(kRows, 0.0, 0);
UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod, UpdateTree(&gpair, dmat.get(), 0, &tree, &preds, kSubsample, kSamplingMethod,
kRows); kRows);
// Build another tree using multiple ELLPACK pages. // Build another tree using multiple ELLPACK pages.
common::GlobalRandom() = rng;
RegTree tree_ext; RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0); HostDeviceVector<bst_float> preds_ext(kRows, 0.0, 0);
UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext, UpdateTree(&gpair, dmat_ext.get(), kPageSize, &tree_ext, &preds_ext,
@ -518,7 +521,7 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
auto preds_h = preds.ConstHostVector(); auto preds_h = preds.ConstHostVector();
auto preds_ext_h = preds_ext.ConstHostVector(); auto preds_ext_h = preds_ext.ConstHostVector();
for (int i = 0; i < kRows; i++) { for (int i = 0; i < kRows; i++) {
EXPECT_NEAR(preds_h[i], preds_ext_h[i], 2e-3); EXPECT_NEAR(preds_h[i], preds_ext_h[i], 1e-3);
} }
} }

View File

@ -33,4 +33,8 @@ def test_gpu_binary_classification():
def test_boost_from_prediction_gpu_hist(): def test_boost_from_prediction_gpu_hist():
cpu_test = twskl.run_boost_from_prediction('gpu_hist') twskl.run_boost_from_prediction('gpu_hist')
def test_num_parallel_tree():
twskl.run_boston_housing_rf_regression("gpu_hist")

View File

@ -357,23 +357,26 @@ def test_boston_housing_regression():
assert mean_squared_error(preds4, labels) < 350 assert mean_squared_error(preds4, labels) < 350
def test_boston_housing_rf_regression(): def run_boston_housing_rf_regression(tree_method):
from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston from sklearn.datasets import load_boston
from sklearn.model_selection import KFold from sklearn.model_selection import KFold
boston = load_boston() X, y = load_boston(return_X_y=True)
y = boston['target']
X = boston['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng) kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y): for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBRFRegressor(random_state=42).fit( xgb_model = xgb.XGBRFRegressor(random_state=42, tree_method=tree_method).fit(
X[train_index], y[train_index]) X[train_index], y[train_index]
)
preds = xgb_model.predict(X[test_index]) preds = xgb_model.predict(X[test_index])
labels = y[test_index] labels = y[test_index]
assert mean_squared_error(preds, labels) < 35 assert mean_squared_error(preds, labels) < 35
def test_boston_housing_rf_regression():
run_boston_housing_rf_regression("hist")
def test_parameter_tuning(): def test_parameter_tuning():
from sklearn.model_selection import GridSearchCV from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_boston from sklearn.datasets import load_boston