xgboost/src/objective/regression_obj_gpu.cu
Rory Mitchell ccf80703ef
Clang-tidy static analysis (#3222)
* Clang-tidy static analysis

* Modernise checks

* Google coding standard checks

* Identifier renaming according to Google style
2018-04-19 18:57:13 +12:00

219 lines
7.3 KiB
Plaintext

/*!
* Copyright 2017 XGBoost contributors
*/
// GPU implementation of objective function.
// Necessary to avoid extra copying of data to CPU.
#include <dmlc/omp.h>
#include <thrust/device_vector.h>
#include <thrust/copy.h>
#include <xgboost/logging.h>
#include <xgboost/objective.h>
#include <cmath>
#include <memory>
#include <vector>
#include "../common/device_helpers.cuh"
#include "../common/host_device_vector.h"
#include "./regression_loss.h"
namespace xgboost {
namespace obj {
using dh::DVec;
DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);
struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
float scale_pos_weight;
int n_gpus;
int gpu_id;
// declare parameters
DMLC_DECLARE_PARAMETER(GPURegLossParam) {
DMLC_DECLARE_FIELD(scale_pos_weight).set_default(1.0f).set_lower_bound(0.0f)
.describe("Scale the weight of positive examples by this factor");
DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(-1)
.describe("Number of GPUs to use for multi-gpu algorithms (NOT IMPLEMENTED)");
DMLC_DECLARE_FIELD(gpu_id)
.set_lower_bound(0)
.set_default(0)
.describe("gpu to use for objective function evaluation");
}
};
// GPU kernel for gradient computation
template<typename Loss>
__global__ void get_gradient_k
(GradientPair *__restrict__ out_gpair, unsigned int *__restrict__ label_correct,
const float * __restrict__ preds, const float * __restrict__ labels,
const float * __restrict__ weights, int n, float scale_pos_weight) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= n)
return;
float p = Loss::PredTransform(preds[i]);
float w = weights == nullptr ? 1.0f : weights[i];
float label = labels[i];
if (label == 1.0f)
w *= scale_pos_weight;
if (!Loss::CheckLabel(label))
atomicAnd(label_correct, 0);
out_gpair[i] = GradientPair
(Loss::FirstOrderGradient(p, label) * w, Loss::SecondOrderGradient(p, label) * w);
}
// GPU kernel for predicate transformation
template<typename Loss>
__global__ void pred_transform_k(float * __restrict__ preds, int n) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= n)
return;
preds[i] = Loss::PredTransform(preds[i]);
}
// regression loss function for evaluation on GPU (eventually)
template<typename Loss>
class GPURegLossObj : public ObjFunction {
protected:
// manages device data
struct DeviceData {
DVec<float> labels, weights;
DVec<unsigned int> label_correct;
// allocate everything on device
DeviceData(dh::BulkAllocator<dh::MemoryType::kDevice>* ba, int device_idx, size_t n) {
ba->Allocate(device_idx, false,
&labels, n,
&weights, n,
&label_correct, 1);
}
size_t Size() const { return labels.Size(); }
};
bool copied_;
std::unique_ptr<dh::BulkAllocator<dh::MemoryType::kDevice>> ba_;
std::unique_ptr<DeviceData> data_;
HostDeviceVector<bst_float> preds_d_;
HostDeviceVector<GradientPair> out_gpair_d_;
// allocate device data for n elements, do nothing if enough memory is allocated already
void LazyResize(int n) {
if (data_.get() != nullptr && data_->Size() >= n)
return;
copied_ = false;
// free the old data and allocate the new data
ba_.reset(new dh::BulkAllocator<dh::MemoryType::kDevice>());
data_.reset(new DeviceData(ba_.get(), 0, n));
preds_d_.Resize(n, 0.0f, param_.gpu_id);
out_gpair_d_.Resize(n, GradientPair(), param_.gpu_id);
}
public:
GPURegLossObj() : copied_(false), preds_d_(0, -1), out_gpair_d_({}, -1) {}
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
CHECK(param_.n_gpus != 0) << "Must have at least one device";
}
void GetGradient(HostDeviceVector<float>* preds,
const MetaInfo &info,
int iter,
HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->Size(), info.labels_.size())
<< "labels are not correctly provided"
<< "preds.size=" << preds->Size() << ", label.size=" << info.labels_.size();
size_t ndata = preds->Size();
out_gpair->Resize(ndata, GradientPair(), param_.gpu_id);
LazyResize(ndata);
GetGradientDevice(preds->DevicePointer(param_.gpu_id), info, iter,
out_gpair->DevicePointer(param_.gpu_id), ndata);
}
private:
void GetGradientDevice(float* preds,
const MetaInfo &info,
int iter,
GradientPair* out_gpair, size_t n) {
dh::safe_cuda(cudaSetDevice(param_.gpu_id));
DeviceData& d = *data_;
d.label_correct.Fill(1);
// only copy the labels and weights once, similar to how the data is copied
if (!copied_) {
thrust::copy(info.labels_.begin(), info.labels_.begin() + n,
d.labels.tbegin());
if (info.weights_.size() > 0) {
thrust::copy(info.weights_.begin(), info.weights_.begin() + n,
d.weights.tbegin());
}
copied_ = true;
}
// run the kernel
const int block = 256;
get_gradient_k<Loss><<<dh::DivRoundUp(n, block), block>>>
(out_gpair, d.label_correct.Data(), preds,
d.labels.Data(), info.weights_.size() > 0 ? d.weights.Data() : nullptr,
n, param_.scale_pos_weight);
dh::safe_cuda(cudaGetLastError());
// copy output data from the GPU
unsigned int label_correct_h;
thrust::copy_n(d.label_correct.tbegin(), 1, &label_correct_h);
bool label_correct = label_correct_h != 0;
if (!label_correct) {
LOG(FATAL) << Loss::LabelErrorMsg();
}
}
public:
const char* DefaultEvalMetric() const override {
return Loss::DefaultEvalMetric();
}
void PredTransform(HostDeviceVector<float> *io_preds) override {
PredTransformDevice(io_preds->DevicePointer(param_.gpu_id), io_preds->Size());
}
void PredTransformDevice(float* preds, size_t n) {
dh::safe_cuda(cudaSetDevice(param_.gpu_id));
const int block = 256;
pred_transform_k<Loss><<<dh::DivRoundUp(n, block), block>>>(preds, n);
dh::safe_cuda(cudaGetLastError());
dh::safe_cuda(cudaDeviceSynchronize());
}
float ProbToMargin(float base_score) const override {
return Loss::ProbToMargin(base_score);
}
protected:
GPURegLossParam param_;
};
// register the objective functions
DMLC_REGISTER_PARAMETER(GPURegLossParam);
XGBOOST_REGISTER_OBJECTIVE(GPULinearRegression, "gpu:reg:linear")
.describe("Linear regression (computed on GPU).")
.set_body([]() { return new GPURegLossObj<LinearSquareLoss>(); });
XGBOOST_REGISTER_OBJECTIVE(GPULogisticRegression, "gpu:reg:logistic")
.describe("Logistic regression for probability regression task (computed on GPU).")
.set_body([]() { return new GPURegLossObj<LogisticRegression>(); });
XGBOOST_REGISTER_OBJECTIVE(GPULogisticClassification, "gpu:binary:logistic")
.describe("Logistic regression for binary classification task (computed on GPU).")
.set_body([]() { return new GPURegLossObj<LogisticClassification>(); });
XGBOOST_REGISTER_OBJECTIVE(GPULogisticRaw, "gpu:binary:logitraw")
.describe("Logistic regression for classification, output score "
"before logistic transformation (computed on GPU)")
.set_body([]() { return new GPURegLossObj<LogisticRaw>(); });
} // namespace obj
} // namespace xgboost