rocm enable for v2.0.1

This commit is contained in:
Hui Liu
2023-10-27 18:50:28 -07:00
447 changed files with 13518 additions and 8719 deletions

View File

@@ -289,8 +289,6 @@ TEST(GpuHist, PartitionTwoNodes) {
dh::ToSpan(feature_histogram_b)};
thrust::device_vector<GPUExpandEntry> results(2);
evaluator.EvaluateSplits({0, 1}, 1, dh::ToSpan(inputs), shared_inputs, dh::ToSpan(results));
GPUExpandEntry result_a = results[0];
GPUExpandEntry result_b = results[1];
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(0)[0]),
std::bitset<32>("10000000000000000000000000000000"));
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(1)[0]),

View File

@@ -43,7 +43,8 @@ void VerifySampling(size_t page_size,
EXPECT_NE(page->n_rows, kRows);
}
GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method);
GradientBasedSampler sampler(&ctx, kRows, param, subsample, sampling_method,
!fixed_size_sampling);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
if (fixed_size_sampling) {
@@ -97,7 +98,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_NE(page->n_rows, kRows);
GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform);
GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
auto sampled_page = sample.page;
EXPECT_EQ(sample.sample_rows, kRows);
@@ -145,7 +146,8 @@ TEST(GradientBasedSampler, GradientBasedSampling) {
constexpr size_t kPageSize = 0;
constexpr float kSubsample = 0.8;
constexpr int kSamplingMethod = TrainParam::kGradientBased;
VerifySampling(kPageSize, kSubsample, kSamplingMethod);
constexpr bool kFixedSizeSampling = true;
VerifySampling(kPageSize, kSubsample, kSamplingMethod, kFixedSizeSampling);
}
TEST(GradientBasedSampler, GradientBasedSamplingExternalMemory) {

View File

@@ -50,15 +50,9 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
quantiser);
std::vector<GradientPairInt64> histogram_h(num_bins);
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaMemcpy(histogram_h.data(), d_histogram.data(),
num_bins * sizeof(GradientPairInt64),
cudaMemcpyDeviceToHost));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipMemcpy(histogram_h.data(), d_histogram.data(),
num_bins * sizeof(GradientPairInt64),
hipMemcpyDeviceToHost));
#endif
for (size_t i = 0; i < kRounds; ++i) {
dh::device_vector<GradientPairInt64> new_histogram(num_bins);
@@ -70,15 +64,9 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
d_new_histogram, quantiser);
std::vector<GradientPairInt64> new_histogram_h(num_bins);
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaMemcpy(new_histogram_h.data(), d_new_histogram.data(),
num_bins * sizeof(GradientPairInt64),
cudaMemcpyDeviceToHost));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipMemcpy(new_histogram_h.data(), d_new_histogram.data(),
num_bins * sizeof(GradientPairInt64),
hipMemcpyDeviceToHost));
#endif
for (size_t j = 0; j < new_histogram_h.size(); ++j) {
ASSERT_EQ(new_histogram_h[j].GetQuantisedGrad(), histogram_h[j].GetQuantisedGrad());
ASSERT_EQ(new_histogram_h[j].GetQuantisedHess(), histogram_h[j].GetQuantisedHess());
@@ -98,15 +86,9 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
dh::ToSpan(baseline), quantiser);
std::vector<GradientPairInt64> baseline_h(num_bins);
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaMemcpy(baseline_h.data(), baseline.data().get(),
num_bins * sizeof(GradientPairInt64),
cudaMemcpyDeviceToHost));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipMemcpy(baseline_h.data(), baseline.data().get(),
num_bins * sizeof(GradientPairInt64),
hipMemcpyDeviceToHost));
#endif
for (size_t i = 0; i < baseline.size(); ++i) {
EXPECT_NEAR(baseline_h[i].GetQuantisedGrad(), histogram_h[i].GetQuantisedGrad(),

View File

@@ -72,19 +72,13 @@ void TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Se
total_rows += segments.at(i).Size();
}
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaMemcpyAsync(d_batch_info.data().get(), h_batch_info.data(),
h_batch_info.size() * sizeof(PerNodeData<int>), cudaMemcpyDefault,
nullptr));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipMemcpyAsync(d_batch_info.data().get(), h_batch_info.data(),
h_batch_info.size() * sizeof(PerNodeData<int>), hipMemcpyDefault,
nullptr));
#endif
dh::device_vector<int8_t> tmp;
SortPositionBatch<uint32_t, decltype(op), int>(dh::ToSpan(d_batch_info), dh::ToSpan(ridx),
dh::ToSpan(ridx_tmp), dh::ToSpan(counts),
total_rows, op, &tmp, nullptr);
total_rows, op, &tmp);
auto op_without_data = [=] __device__(auto ridx) { return ridx % 2 == 0; };
for (size_t i = 0; i < segments.size(); i++) {