merge changes Jun092023
This commit is contained in:
@@ -499,9 +499,13 @@ class QuantileError : public MetricNoCache {
|
||||
|
||||
const char* Name() const override { return "quantile"; }
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& name = get<String const>(in["name"]);
|
||||
CHECK_EQ(name, "quantile");
|
||||
FromJson(in["quantile_loss_param"], ¶m_);
|
||||
auto const& obj = get<Object const>(in);
|
||||
auto it = obj.find("quantile_loss_param");
|
||||
if (it != obj.cend()) {
|
||||
FromJson(it->second, ¶m_);
|
||||
auto const& name = get<String const>(in["name"]);
|
||||
CHECK_EQ(name, "quantile");
|
||||
}
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
|
||||
@@ -152,7 +152,7 @@ void PredictByAllTrees(gbm::GBTreeModel const &model, std::uint32_t const tree_b
|
||||
} else {
|
||||
for (std::size_t i = 0; i < block_size; ++i) {
|
||||
out_predt(predict_offset + i, gid) +=
|
||||
scalar::PredValueByOneTree<true>(thread_temp[offset + i], tree, cats);
|
||||
scalar::PredValueByOneTree<false>(thread_temp[offset + i], tree, cats);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -430,8 +430,7 @@ class ColumnSplitHelper {
|
||||
<< "column-split prediction is only supported for distributed training";
|
||||
|
||||
for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
CHECK_EQ(out_preds->size(),
|
||||
p_fmat->Info().num_row_ * model_.learner_model_param->num_output_group);
|
||||
CHECK_EQ(out_preds->size(), p_fmat->Info().num_row_ * (tree_end_ - tree_begin_));
|
||||
PredictBatchKernel<SparsePageView, kBlockOfRowsSize, true>(SparsePageView{&batch}, out_preds);
|
||||
}
|
||||
}
|
||||
@@ -543,8 +542,12 @@ class ColumnSplitHelper {
|
||||
for (size_t tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {
|
||||
auto const gid = model_.tree_info[tree_id];
|
||||
for (size_t i = 0; i < block_size; ++i) {
|
||||
preds[(predict_offset + i) * num_group + gid] +=
|
||||
PredictOneTree<predict_leaf>(tree_id, batch_offset + i);
|
||||
auto const result = PredictOneTree<predict_leaf>(tree_id, batch_offset + i);
|
||||
if constexpr (predict_leaf) {
|
||||
preds[(predict_offset + i) * (tree_end_ - tree_begin_) + tree_id] = result;
|
||||
} else {
|
||||
preds[(predict_offset + i) * num_group + gid] += result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -645,6 +648,9 @@ class CPUPredictor : public Predictor {
|
||||
void PredictDMatrix(DMatrix *p_fmat, std::vector<bst_float> *out_preds,
|
||||
gbm::GBTreeModel const &model, int32_t tree_begin, int32_t tree_end) const {
|
||||
if (p_fmat->Info().IsColumnSplit()) {
|
||||
CHECK(!model.learner_model_param->IsVectorLeaf())
|
||||
<< "Predict DMatrix with column split" << MTNotImplemented();
|
||||
|
||||
ColumnSplitHelper helper(this->ctx_->Threads(), model, tree_begin, tree_end);
|
||||
helper.PredictDMatrix(p_fmat, out_preds);
|
||||
return;
|
||||
@@ -743,6 +749,8 @@ class CPUPredictor : public Predictor {
|
||||
unsigned tree_end) const override {
|
||||
auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
|
||||
CHECK(proxy)<< "Inplace predict accepts only DMatrixProxy as input.";
|
||||
CHECK(!p_m->Info().IsColumnSplit())
|
||||
<< "Inplace predict support for column-wise data split is not yet implemented.";
|
||||
auto x = proxy->Adapter();
|
||||
if (x.type() == typeid(std::shared_ptr<data::DenseAdapter>)) {
|
||||
this->DispatchedInplacePredict<data::DenseAdapter, kBlockOfRowsSize>(
|
||||
@@ -773,6 +781,9 @@ class CPUPredictor : public Predictor {
|
||||
out_preds->resize(model.learner_model_param->num_output_group);
|
||||
|
||||
if (is_column_split) {
|
||||
CHECK(!model.learner_model_param->IsVectorLeaf())
|
||||
<< "Predict instance with column split" << MTNotImplemented();
|
||||
|
||||
ColumnSplitHelper helper(this->ctx_->Threads(), model, 0, ntree_limit);
|
||||
helper.PredictInstance(inst, out_preds);
|
||||
return;
|
||||
@@ -802,6 +813,9 @@ class CPUPredictor : public Predictor {
|
||||
preds.resize(info.num_row_ * ntree_limit);
|
||||
|
||||
if (p_fmat->Info().IsColumnSplit()) {
|
||||
CHECK(!model.learner_model_param->IsVectorLeaf())
|
||||
<< "Predict leaf with column split" << MTNotImplemented();
|
||||
|
||||
ColumnSplitHelper helper(n_threads, model, 0, ntree_limit);
|
||||
helper.PredictLeaf(p_fmat, &preds);
|
||||
return;
|
||||
|
||||
@@ -302,7 +302,7 @@ struct GPUHistMakerDevice {
|
||||
matrix.feature_segments,
|
||||
matrix.gidx_fvalue_map,
|
||||
matrix.min_fvalue,
|
||||
matrix.is_dense
|
||||
matrix.is_dense && !collective::IsDistributed()
|
||||
};
|
||||
auto split = this->evaluator_.EvaluateSingleSplit(inputs, shared_inputs);
|
||||
return split;
|
||||
@@ -316,11 +316,11 @@ struct GPUHistMakerDevice {
|
||||
std::vector<bst_node_t> nidx(2 * candidates.size());
|
||||
auto h_node_inputs = pinned2.GetSpan<EvaluateSplitInputs>(2 * candidates.size());
|
||||
auto matrix = page->GetDeviceAccessor(ctx_->gpu_id);
|
||||
EvaluateSplitSharedInputs shared_inputs{
|
||||
GPUTrainingParam{param}, *quantiser, feature_types, matrix.feature_segments,
|
||||
matrix.gidx_fvalue_map, matrix.min_fvalue,
|
||||
matrix.is_dense
|
||||
};
|
||||
EvaluateSplitSharedInputs shared_inputs{GPUTrainingParam{param}, *quantiser, feature_types,
|
||||
matrix.feature_segments, matrix.gidx_fvalue_map,
|
||||
matrix.min_fvalue,
|
||||
// is_dense represents the local data
|
||||
matrix.is_dense && !collective::IsDistributed()};
|
||||
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
|
||||
// Store the feature set ptrs so they dont go out of scope before the kernel is called
|
||||
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
|
||||
|
||||
@@ -419,6 +419,7 @@ class HistBuilder {
|
||||
|
||||
CPUExpandEntry InitRoot(DMatrix *p_fmat, linalg::MatrixView<GradientPair const> gpair,
|
||||
RegTree *p_tree) {
|
||||
monitor_->Start(__func__);
|
||||
CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0));
|
||||
|
||||
std::size_t page_id = 0;
|
||||
@@ -434,7 +435,7 @@ class HistBuilder {
|
||||
|
||||
{
|
||||
GradientPairPrecise grad_stat;
|
||||
if (p_fmat->IsDense()) {
|
||||
if (p_fmat->IsDense() && !collective::IsDistributed()) {
|
||||
/**
|
||||
* Specialized code for dense data: For dense data (with no missing value), the sum
|
||||
* of gradient histogram is equal to snode[nid]
|
||||
@@ -475,12 +476,14 @@ class HistBuilder {
|
||||
node = entries.front();
|
||||
}
|
||||
|
||||
monitor_->Stop(__func__);
|
||||
return node;
|
||||
}
|
||||
|
||||
void BuildHistogram(DMatrix *p_fmat, RegTree *p_tree,
|
||||
std::vector<CPUExpandEntry> const &valid_candidates,
|
||||
linalg::MatrixView<GradientPair const> gpair) {
|
||||
monitor_->Start(__func__);
|
||||
std::vector<CPUExpandEntry> nodes_to_build(valid_candidates.size());
|
||||
std::vector<CPUExpandEntry> nodes_to_sub(valid_candidates.size());
|
||||
|
||||
@@ -508,6 +511,7 @@ class HistBuilder {
|
||||
nodes_to_sub, gpair.Values());
|
||||
++page_id;
|
||||
}
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
|
||||
void UpdatePosition(DMatrix *p_fmat, RegTree const *p_tree,
|
||||
@@ -525,6 +529,7 @@ class HistBuilder {
|
||||
std::vector<bst_node_t> *p_out_position) {
|
||||
monitor_->Start(__func__);
|
||||
if (!task_->UpdateTreeLeaf()) {
|
||||
monitor_->Stop(__func__);
|
||||
return;
|
||||
}
|
||||
for (auto const &part : partitioner_) {
|
||||
|
||||
Reference in New Issue
Block a user