Fix spelling in documents (#6948)

* Update roxygen2 doc.

Co-authored-by: fis <jm.yuan@outlook.com>
This commit is contained in:
Andrew Ziem
2021-05-11 06:44:36 -06:00
committed by GitHub
parent 2a9979e256
commit 3e7e426b36
100 changed files with 284 additions and 284 deletions

View File

@@ -638,10 +638,10 @@ XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle,
bst_float const **out_result) {
API_BEGIN();
if (handle == nullptr) {
LOG(FATAL) << "Booster has not been intialized or has already been disposed.";
LOG(FATAL) << "Booster has not been initialized or has already been disposed.";
}
if (dmat == nullptr) {
LOG(FATAL) << "DMatrix has not been intialized or has already been disposed.";
LOG(FATAL) << "DMatrix has not been initialized or has already been disposed.";
}
auto config = Json::Load(StringView{c_json_config});

View File

@@ -36,7 +36,7 @@
return 0; // NOLINT(*)
#define CHECK_HANDLE() if (handle == nullptr) \
LOG(FATAL) << "DMatrix/Booster has not been intialized or has already been disposed.";
LOG(FATAL) << "DMatrix/Booster has not been initialized or has already been disposed.";
/*!
* \brief Set the last error message needed by C API

View File

@@ -1,7 +1,7 @@
/*!
* Copyright 2020 by XGBoost Contributors
*
* \brief An implemenation of Ryu algorithm:
* \brief An implementation of Ryu algorithm:
*
* https://dl.acm.org/citation.cfm?id=3192369
*
@@ -686,7 +686,7 @@ int32_t ToCharsFloatImpl(float f, char * const result) {
// This is an implementation for base 10 inspired by the one in libstdc++v3. The general
// scheme is by decomposing the value into multiple combination of base (which is 10) by
// mod, until the value is lesser than 10, then last char is just char '0' (ascii 48) plus
// mod, until the value is lesser than 10, then last char is just char '0' (ASCII 48) plus
// that value. Other popular implementations can be found in RapidJson and libc++ (in
// llvm-project), which uses the same general work flow with the same look up table, but
// probably with better performance as they are more complicated.

View File

@@ -55,7 +55,7 @@ namespace xgboost {
namespace common {
/*!
* \brief Split a string by delimiter
* \param s String to be splitted.
* \param s String to be split.
* \param delim The delimiter.
*/
inline std::vector<std::string> Split(const std::string& s, char delim) {

View File

@@ -253,7 +253,7 @@ __global__ void LaunchNKernel(int device_idx, size_t begin, size_t end,
* function as argument. Hence functions like `LaunchN` cannot use this wrapper.
*
* - With c++ initialization list `{}` syntax, you are forced to comply with the CUDA type
* spcification.
* specification.
*/
class LaunchKernel {
size_t shmem_size_;
@@ -930,7 +930,7 @@ class SegmentSorter {
// Items sorted within the group
caching_device_vector<T> ditems_;
// Original position of the items before they are sorted descendingly within its groups
// Original position of the items before they are sorted descending within their groups
caching_device_vector<uint32_t> doriginal_pos_;
// Segments within the original list that delineates the different groups

View File

@@ -81,7 +81,7 @@ class HistogramCuts {
}
// Getters. Cuts should be of no use after building histogram indices, but currently
// it's deeply linked with quantile_hist, gpu sketcher and gpu_hist. So we preserve
// they are deeply linked with quantile_hist, gpu sketcher and gpu_hist, so we preserve
// these for now.
std::vector<uint32_t> const& Ptrs() const { return cut_ptrs_.ConstHostVector(); }
std::vector<float> const& Values() const { return cut_values_.ConstHostVector(); }
@@ -247,7 +247,7 @@ struct GHistIndexMatrix {
// Create a global histogram matrix, given cut
void Init(DMatrix* p_fmat, int max_num_bins);
// specific method for sparse data as no posibility to reduce allocated memory
// specific method for sparse data as no possibility to reduce allocated memory
template <typename BinIdxType, typename GetOffset>
void SetIndexData(common::Span<BinIdxType> index_data_span,
size_t batch_threads, const SparsePage &batch,

View File

@@ -394,7 +394,7 @@ Json JsonReader::Parse() {
return ParseArray();
} else if ( c == '-' || std::isdigit(c) ||
c == 'N' || c == 'I') {
// For now we only accept `NaN`, not `nan` as the later violiates LR(1) with `null`.
// For now we only accept `NaN`, not `nan` as the later violates LR(1) with `null`.
return ParseNumber();
} else if ( c == '\"' ) {
return ParseString();

View File

@@ -77,7 +77,7 @@ XGBOOST_DEVICE inline void Softmax(Iterator start, Iterator end) {
/*!
* \brief Find the maximum iterator within the iterators
* \param begin The begining iterator.
* \param begin The beginning iterator.
* \param end The end iterator.
* \return the iterator point to the maximum value.
* \tparam Iterator The type of the iterator.
@@ -107,7 +107,7 @@ inline float LogSum(float x, float y) {
/*!
* \brief perform numerically safe logsum
* \param begin The begining iterator.
* \param begin The beginning iterator.
* \param end The end iterator.
* \return the iterator point to the maximum value.
* \tparam Iterator The type of the iterator.
@@ -135,7 +135,7 @@ inline static bool CmpSecond(const std::pair<float, unsigned> &a,
return a.second > b.second;
}
// Redefined here to workaround a VC bug that doesn't support overloadng for integer
// Redefined here to workaround a VC bug that doesn't support overloading for integer
// types.
template <typename T>
XGBOOST_DEVICE typename std::enable_if<

View File

@@ -55,7 +55,7 @@ HostSketchContainer::CalcColumnSize(SparsePage const &batch,
std::vector<bst_feature_t> HostSketchContainer::LoadBalance(
SparsePage const &batch, bst_feature_t n_columns, size_t const nthreads) {
/* Some sparse datasets have their mass concentrating on small number of features. To
* avoid wating for a few threads running forever, we here distirbute different number
* avoid waiting for a few threads running forever, we here distribute different number
* of columns to different threads according to number of entries.
*/
auto page = batch.GetView();

View File

@@ -184,9 +184,9 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
});
// Compute the index for both x and y (which of the element in a and b are used in each
// comparison) by scaning the binary merge path. Take output [(x_0, y_0), (x_0, y_1),
// comparison) by scanning the binary merge path. Take output [(x_0, y_0), (x_0, y_1),
// ...] as an example, the comparison between (x_0, y_0) adds 1 step in the merge path.
// Asumming y_0 is less than x_0 so this step is torward the end of y. After the
// Assuming y_0 is less than x_0 so this step is toward the end of y. After the
// comparison, index of y is incremented by 1 from y_0 to y_1, and at the same time, y_0
// is landed into output as the first element in merge result. The scan result is the
// subscript of x and y.
@@ -367,7 +367,7 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_columns_ptr_in) {
/* There are 2 types of duplication. First is duplicated feature values, which comes
* from user input data. Second is duplicated sketching entries, which is generated by
* prunning or merging. We preserve the first type and remove the second type.
* pruning or merging. We preserve the first type and remove the second type.
*/
timer_.Start(__func__);
dh::safe_cuda(cudaSetDevice(device_));

View File

@@ -44,7 +44,7 @@ constexpr double kMaxGradient = 15.0;
constexpr double kMinHessian = 1e-16; // Ensure that no data point gets zero hessian
constexpr double kMaxHessian = 15.0;
constexpr double kEps = 1e-12; // A denomitor in a fraction should not be too small
constexpr double kEps = 1e-12; // A denominator in a fraction should not be too small
// Clip (limit) x to fit range [x_min, x_max].
// If x < x_min, return x_min; if x > x_max, return x_max; if x_min <= x <= x_max, return x.

View File

@@ -52,7 +52,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range,
*
* If you use it in a function that can be compiled by both nvcc and host
* compiler, the behaviour is un-defined! Because your function is NOT
* duplicated by `CompiledWithCuda`. At link time, cuda compiler resolution
* duplicated by `CompiledWithCuda`. At link time, CUDA compiler resolution
* will merge functions with same signature.
*/
template <bool CompiledWithCuda = WITH_CUDA()>
@@ -155,7 +155,7 @@ class Transform {
_func, shard_range, UnpackHDVOnDevice(_vectors)...);
}
#else
/*! \brief Dummy funtion defined when compiling for CPU. */
/*! \brief Dummy function defined when compiling for CPU. */
template <typename std::enable_if<!CompiledWithCuda>::type* = nullptr,
typename... HDV>
void LaunchCUDA(Functor _func, HDV*...) const {

View File

@@ -36,7 +36,7 @@ Version::TripletT Version::Load(Json const& in) {
Version::TripletT Version::Load(dmlc::Stream* fi) {
XGBoostVersionT major{0}, minor{0}, patch{0};
// This is only used in DMatrix serialization, so doesn't break model compability.
// This is only used in DMatrix serialization, so doesn't break model compatibility.
std::string msg { "Incorrect version format found in binary file. "
"Binary file from XGBoost < 1.0.0 is no longer supported. "
"Please generate it again." };

View File

@@ -17,7 +17,7 @@ struct Version {
using TripletT = std::tuple<XGBoostVersionT, XGBoostVersionT, XGBoostVersionT>;
static const TripletT kInvalid;
// Save/Load version info to Json document
// Save/Load version info to JSON document
static TripletT Load(Json const& in);
static void Save(Json* out);

View File

@@ -42,7 +42,7 @@ namespace data {
* This abstraction allows us to read through different sparse matrix formats
* using the same interface. In particular we can write a DMatrix constructor
* that uses the same code to construct itself from a CSR matrix, CSC matrix,
* dense matrix, csv, libsvm file, or potentially other formats. To see why this
* dense matrix, CSV, LIBSVM file, or potentially other formats. To see why this
* is necessary, imagine we have 5 external matrix formats and 5 internal
* DMatrix types where each DMatrix needs a custom constructor for each possible
* input. The number of constructors is 5*5=25. Using an abstraction over the
@@ -736,7 +736,7 @@ class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
size_t columns_;
size_t row_offset_;
// at the beinning.
// at the beginning.
bool at_first_;
// handle to the iterator,
DataIterHandle data_handle_;

View File

@@ -187,7 +187,7 @@ template <typename AdapterBatchT>
void CopyDataToEllpack(const AdapterBatchT& batch, EllpackPageImpl* dst,
int device_idx, float missing) {
// Some witchcraft happens here
// The goal is to copy valid elements out of the input to an ellpack matrix
// The goal is to copy valid elements out of the input to an ELLPACK matrix
// with a given row stride, using no extra working memory Standard stream
// compaction needs to be modified to do this, so we manually define a
// segmented stream compaction via operators on an inclusive scan. The output

View File

@@ -13,13 +13,13 @@
#include <thrust/binary_search.h>
namespace xgboost {
/** \brief Struct for accessing and manipulating an ellpack matrix on the
/** \brief Struct for accessing and manipulating an ELLPACK matrix on the
* device. Does not own underlying memory and may be trivially copied into
* kernels.*/
struct EllpackDeviceAccessor {
/*! \brief Whether or not if the matrix is dense. */
bool is_dense;
/*! \brief Row length for ELLPack, equal to number of features. */
/*! \brief Row length for ELLPACK, equal to number of features. */
size_t row_stride;
size_t base_rowid{};
size_t n_rows{};
@@ -197,11 +197,11 @@ class EllpackPageImpl {
public:
/*! \brief Whether or not if the matrix is dense. */
bool is_dense;
/*! \brief Row length for ELLPack. */
/*! \brief Row length for ELLPACK. */
size_t row_stride;
size_t base_rowid{0};
size_t n_rows{};
/*! \brief global index of histogram, which is stored in ELLPack format. */
/*! \brief global index of histogram, which is stored in ELLPACK format. */
HostDeviceVector<common::CompressedByteT> gidx_buffer;
private:

View File

@@ -563,7 +563,7 @@ GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
// GPU_Hist by default has prediction cache calculated from quantile values,
// so GPU Predictor is not used for training dataset. But when XGBoost
// performs continue training with an existing model, the prediction cache is
// not availbale and number of trees doesn't equal zero, the whole training
// not available and number of trees doesn't equal zero, the whole training
// dataset got copied into GPU for precise prediction. This condition tries
// to avoid such copy by calling CPU Predictor instead.
if ((out_pred && out_pred->Size() == 0) && (model_.param.num_trees != 0) &&
@@ -831,7 +831,7 @@ class Dart : public GBTree {
#pragma omp parallel for
for (omp_ulong ridx = 0; ridx < n_rows; ++ridx) {
const size_t offset = ridx * n_groups + group;
// Need to remove the base margin from indiviual tree.
// Need to remove the base margin from individual tree.
h_out_predts[offset] +=
(h_predts[offset] - model_.learner_model_param->base_score) * w;
}

View File

@@ -5,9 +5,9 @@
// possible for a valid device ordinal to be present for non GPU builds. However, it is possible
// for an invalid device ordinal to be specified in GPU builds - to train/predict and/or compute
// the metrics on CPU. To accommodate these scenarios, the following is done for the metrics
// accelarated on the GPU.
// accelerated on the GPU.
// - An internal GPU registry holds all the GPU metric types (defined in the .cu file)
// - An instance of the appropriate gpu metric type is created when a device ordinal is present
// - An instance of the appropriate GPU metric type is created when a device ordinal is present
// - If the creation is successful, the metric computation is done on the device
// - else, it falls back on the CPU
// - The GPU metric types are *only* registered when xgboost is built for GPUs
@@ -561,7 +561,7 @@ XGBOOST_REGISTER_METRIC(MAP, "map")
.set_body([](const char* param) { return new EvalMAP("map", param); });
XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik")
.describe("Negative log partial likelihood of Cox proportioanl hazards model.")
.describe("Negative log partial likelihood of Cox proportional hazards model.")
.set_body([](const char*) { return new EvalCox(); });
} // namespace metric
} // namespace xgboost

View File

@@ -271,7 +271,7 @@ class PoissonRegression : public ObjFunction {
DMLC_REGISTER_PARAMETER(PoissonRegressionParam);
XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson")
.describe("Possion regression for count data.")
.describe("Poisson regression for count data.")
.set_body([]() { return new PoissonRegression(); });

View File

@@ -17,7 +17,7 @@ namespace xgboost {
/*!
* \brief Feature interaction constraint implementation for CPU tree updaters.
*
* The interface is similiar to the one for GPU Hist.
* The interface is similar to the one for GPU Hist.
*/
class FeatureInteractionConstraintHost {
protected:

View File

@@ -125,7 +125,7 @@ struct UpdateNumeric {
EvaluateSplitInputs<GradientSumT> const &inputs,
DeviceSplitCandidate *best_split) {
// Use pointer from cut to indicate begin and end of bins for each feature.
uint32_t gidx_begin = inputs.feature_segments[fidx]; // begining bin
uint32_t gidx_begin = inputs.feature_segments[fidx]; // beginning bin
int split_gidx = (scan_begin + threadIdx.x) - 1;
float fvalue;
if (split_gidx < static_cast<int>(gidx_begin)) {
@@ -152,7 +152,7 @@ __device__ void EvaluateFeature(
TempStorageT* temp_storage // temp memory for cub operations
) {
// Use pointer from cut to indicate begin and end of bins for each feature.
uint32_t gidx_begin = inputs.feature_segments[fidx]; // begining bin
uint32_t gidx_begin = inputs.feature_segments[fidx]; // beginning bin
uint32_t gidx_end =
inputs.feature_segments[fidx + 1]; // end bin for i^th feature
auto feature_hist = inputs.gradient_histogram.subspan(gidx_begin, gidx_end - gidx_begin);

View File

@@ -124,7 +124,7 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy {
* Processing Systems (pp. 3146-3154).
* \see Zhu, R. (2016). Gradient-based sampling: An adaptive importance sampling for least-squares.
* In Advances in Neural Information Processing Systems (pp. 406-414).
* \see Ohlsson, E. (1998). Sequential poisson sampling. Journal of official Statistics, 14(2), 149.
* \see Ohlsson, E. (1998). Sequential Poisson sampling. Journal of official Statistics, 14(2), 149.
*/
class GradientBasedSampler {
public:

View File

@@ -17,7 +17,7 @@
namespace xgboost {
namespace tree {
// Following 2 functions are slightly modifed version of fbcuda.
// Following 2 functions are slightly modified version of fbcuda.
/* \brief Constructs a rounding factor used to truncate elements in a sum such that the
sum of the truncated elements is the same no matter what the order of the sum is.

View File

@@ -76,7 +76,7 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
// the criteria to use for ranking splits
std::string split_evaluator;
// ------ From cpu quantile histogram -------.
// ------ From CPU quantile histogram -------.
// percentage threshold for treating a feature as sparse
// e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse
double sparse_threshold;
@@ -316,7 +316,7 @@ XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, StatT stat) {
return CalcGain(p, stat.GetGrad(), stat.GetHess());
}
// Used in gpu code where GradientPair is used for gradient sum, not GradStats.
// Used in GPU code where GradientPair is used for gradient sum, not GradStats.
template <typename TrainingParams, typename GpairT>
XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad) {
return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess());
@@ -484,7 +484,7 @@ using SplitEntry = SplitEntryContainer<GradStats>;
/*
* \brief Parse the interaction constraints from string.
* \param constraint_str String storing the interfaction constraints:
* \param constraint_str String storing the interaction constraints:
*
* Example input string:
*

View File

@@ -157,7 +157,7 @@ TreeGenerator* TreeGenerator::Create(std::string const& attrs, FeatureMap const&
if (pos != std::string::npos) {
name = attrs.substr(0, pos);
params = attrs.substr(pos+1, attrs.length() - pos - 1);
// Eliminate all occurances of single quote string.
// Eliminate all occurrences of single quote string.
size_t pos = std::string::npos;
while ((pos = params.find('\'')) != std::string::npos) {
params.replace(pos, 1, "\"");
@@ -1069,7 +1069,7 @@ void RegTree::CalculateContributionsApprox(const RegTree::FVec &feat,
// Used by TreeShap
// data we keep about our decision path
// note that pweight is included for convenience and is not tied with the other attributes
// the pweight of the i'th path element is the permuation weight of paths with i-1 ones in them
// the pweight of the i'th path element is the permutation weight of paths with i-1 ones in them
struct PathElement {
int feature_index;
bst_float zero_fraction;
@@ -1123,7 +1123,7 @@ void UnwindPath(PathElement *unique_path, unsigned unique_depth,
}
}
// determine what the total permuation weight would be if
// determine what the total permutation weight would be if
// we unwound a previous extension in the decision path
bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_depth,
unsigned path_index) {

View File

@@ -196,8 +196,8 @@ class BaseMaker: public TreeUpdater {
}
}
/*!
* \brief this is helper function uses column based data structure,
* reset the positions to the lastest one
* \brief This is a helper function that uses a column based data structure
* and reset the positions to the latest one
* \param nodes the set of nodes that contains the split to be used
* \param p_fmat feature matrix needed for tree construction
* \param tree the regression tree structure

View File

@@ -549,7 +549,7 @@ struct GPUHistMakerDevice {
bst_float weight = evaluator.CalcWeight(
pos, param_d, GradStats{d_node_sum_gradients[pos]});
static_assert(!std::is_const<decltype(out_preds_d)>::value, "");
auto v_predt = out_preds_d; // for some reaon out_preds_d is const by both nvcc and clang.
auto v_predt = out_preds_d; // for some reason out_preds_d is const by both nvcc and clang.
v_predt[d_ridx[local_idx]] += weight * param_d.learning_rate;
});
row_partitioner.reset();

View File

@@ -401,7 +401,7 @@ class CQHistMaker: public HistMaker {
for (auto& sketch : sketchs_) {
sketch.Init(info.num_row_, this->param_.sketch_eps);
}
// intitialize the summary array
// initialize the summary array
summary_array_.resize(sketchs_.size());
// setup maximum size
unsigned max_size = this->param_.MaxSketchSize();
@@ -409,7 +409,7 @@ class CQHistMaker: public HistMaker {
summary_array_[i].Reserve(max_size);
}
{
// get smmary
// get summary
thread_sketch_.resize(omp_get_max_threads());
// TWOPASS: use the real set + split set in the column iteration.

View File

@@ -441,7 +441,7 @@ class QuantileHistMaker: public TreeUpdater {
std::unique_ptr<ExpandQueue> qexpand_loss_guided_;
std::vector<ExpandEntry> qexpand_depth_wise_;
// key is the node id which should be calculated by Subtraction Trick, value is the node which
// provides the evidence for substracts
// provides the evidence for subtraction
std::vector<ExpandEntry> nodes_for_subtraction_trick_;
// list of nodes whose histograms would be built explicitly.
std::vector<ExpandEntry> nodes_for_explicit_hist_build_;

View File

@@ -123,7 +123,7 @@ class TreeRefresher: public TreeUpdater {
// start from groups that belongs to current data
auto pid = 0;
gstats[pid].Add(gpair[ridx]);
// tranverse tree
// traverse tree
while (!tree[pid].IsLeaf()) {
unsigned split_index = tree[pid].SplitIndex();
pid = tree.GetNext(pid, feat.GetFvalue(split_index), feat.IsMissing(split_index));