Fix feature weights with multiple column sampling. (#8100)
This commit is contained in:
@@ -7,8 +7,7 @@
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
std::shared_ptr<HostDeviceVector<bst_feature_t>> ColumnSampler::ColSample(
|
||||
std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features,
|
||||
float colsample) {
|
||||
std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features, float colsample) {
|
||||
if (colsample == 1.0f) {
|
||||
return p_features;
|
||||
}
|
||||
@@ -20,19 +19,21 @@ std::shared_ptr<HostDeviceVector<bst_feature_t>> ColumnSampler::ColSample(
|
||||
auto &new_features = *p_new_features;
|
||||
|
||||
if (feature_weights_.size() != 0) {
|
||||
new_features.HostVector() = WeightedSamplingWithoutReplacement(
|
||||
p_features->HostVector(), feature_weights_, n);
|
||||
auto const &h_features = p_features->HostVector();
|
||||
std::vector<float> weights(h_features.size());
|
||||
for (size_t i = 0; i < h_features.size(); ++i) {
|
||||
weights[i] = feature_weights_[h_features[i]];
|
||||
}
|
||||
new_features.HostVector() =
|
||||
WeightedSamplingWithoutReplacement(p_features->HostVector(), weights, n);
|
||||
} else {
|
||||
new_features.Resize(features.size());
|
||||
std::copy(features.begin(), features.end(),
|
||||
new_features.HostVector().begin());
|
||||
std::shuffle(new_features.HostVector().begin(),
|
||||
new_features.HostVector().end(), rng_);
|
||||
std::copy(features.begin(), features.end(), new_features.HostVector().begin());
|
||||
std::shuffle(new_features.HostVector().begin(), new_features.HostVector().end(), rng_);
|
||||
new_features.Resize(n);
|
||||
}
|
||||
std::sort(new_features.HostVector().begin(), new_features.HostVector().end());
|
||||
return p_new_features;
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user