Fix typos (#9731)
This commit is contained in:
parent
6755179e77
commit
2cfc90e8db
10
doc/faq.rst
10
doc/faq.rst
@ -10,14 +10,14 @@ How to tune parameters
|
||||
See :doc:`Parameter Tuning Guide </tutorials/param_tuning>`.
|
||||
|
||||
************************
|
||||
Description on the model
|
||||
Description of the model
|
||||
************************
|
||||
See :doc:`Introduction to Boosted Trees </tutorials/model>`.
|
||||
|
||||
********************
|
||||
I have a big dataset
|
||||
********************
|
||||
XGBoost is designed to be memory efficient. Usually it can handle problems as long as the data fit into your memory.
|
||||
XGBoost is designed to be memory efficient. Usually it can handle problems as long as the data fits into your memory.
|
||||
This usually means millions of instances.
|
||||
|
||||
If you are running out of memory, checkout the tutorial page for using :doc:`distributed training </tutorials/index>` with one of the many frameworks, or the :doc:`external memory version </tutorials/external_memory>` for using external memory.
|
||||
@ -26,7 +26,7 @@ If you are running out of memory, checkout the tutorial page for using :doc:`dis
|
||||
**********************************
|
||||
How to handle categorical feature?
|
||||
**********************************
|
||||
Visit :doc:`this tutorial </tutorials/categorical>` for a walk through of categorical data handling and some worked examples.
|
||||
Visit :doc:`this tutorial </tutorials/categorical>` for a walkthrough of categorical data handling and some worked examples.
|
||||
|
||||
******************************************************************
|
||||
Why not implement distributed XGBoost on top of X (Spark, Hadoop)?
|
||||
@ -37,14 +37,14 @@ The ultimate question will still come back to how to push the limit of each comp
|
||||
and use less resources to complete the task (thus with less communication and chance of failure).
|
||||
|
||||
To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.
|
||||
The demand of communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit).
|
||||
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit).
|
||||
Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.
|
||||
Most importantly, it pushes the limit of the computation resources we can use.
|
||||
|
||||
****************************************
|
||||
How can I port a model to my own system?
|
||||
****************************************
|
||||
The model and data format of XGBoost is exchangeable,
|
||||
The model and data format of XGBoost are exchangeable,
|
||||
which means the model trained by one language can be loaded in another.
|
||||
This means you can train the model using R, while running prediction using
|
||||
Java or C++, which are more common in production systems.
|
||||
|
||||
@ -73,7 +73,7 @@ Parameters for Tree Booster
|
||||
===========================
|
||||
* ``eta`` [default=0.3, alias: ``learning_rate``]
|
||||
|
||||
- Step size shrinkage used in update to prevents overfitting. After each boosting step, we can directly get the weights of new features, and ``eta`` shrinks the feature weights to make the boosting process more conservative.
|
||||
- Step size shrinkage used in update to prevent overfitting. After each boosting step, we can directly get the weights of new features, and ``eta`` shrinks the feature weights to make the boosting process more conservative.
|
||||
- range: [0,1]
|
||||
|
||||
* ``gamma`` [default=0, alias: ``min_split_loss``]
|
||||
|
||||
@ -17,7 +17,7 @@ class HasArbitraryParamsDict(Params):
|
||||
Params._dummy(),
|
||||
"arbitrary_params_dict",
|
||||
"arbitrary_params_dict This parameter holds all of the additional parameters which are "
|
||||
"not exposed as the the XGBoost Spark estimator params but can be recognized by "
|
||||
"not exposed as the XGBoost Spark estimator params but can be recognized by "
|
||||
"underlying XGBoost library. It is stored as a dictionary.",
|
||||
)
|
||||
|
||||
|
||||
@ -106,7 +106,7 @@ bool AllreduceBase::Init(int argc, char* argv[]) {
|
||||
}
|
||||
}
|
||||
if (dmlc_role != "worker") {
|
||||
LOG(FATAL) << "Rabit Module currently only work with dmlc worker";
|
||||
LOG(FATAL) << "Rabit Module currently only works with dmlc worker";
|
||||
}
|
||||
|
||||
// clear the setting before start reconnection
|
||||
@ -273,7 +273,7 @@ void AllreduceBase::SetParam(const char *name, const char *val) {
|
||||
return xgboost::collective::Success();
|
||||
}
|
||||
/*!
|
||||
* \brief connect to the tracker to fix the the missing links
|
||||
* \brief connect to the tracker to fix the missing links
|
||||
* this function is also used when the engine start up
|
||||
*/
|
||||
[[nodiscard]] xgboost::collective::Result AllreduceBase::ReConnectLinks(const char *cmd) {
|
||||
|
||||
@ -89,7 +89,7 @@ class AllreduceBase : public IEngine {
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief internal Allgather function, each node have a segment of data in the ring of sendrecvbuf,
|
||||
* \brief internal Allgather function, each node has a segment of data in the ring of sendrecvbuf,
|
||||
* the data provided by current node k is [slice_begin, slice_end),
|
||||
* the next node's segment must start with slice_end
|
||||
* after the call of Allgather, sendrecvbuf_ contains all the contents including all segments
|
||||
@ -281,7 +281,7 @@ class AllreduceBase : public IEngine {
|
||||
* this function can not be used together with ReadToRingBuffer
|
||||
* a link can either read into the ring buffer, or existing array
|
||||
* \param max_size maximum size of array
|
||||
* \return true if it is an successful read, false if there is some error happens, check errno
|
||||
* \return true if it is a successful read, false if there is some error happens, check errno
|
||||
*/
|
||||
inline ReturnType ReadToArray(void *recvbuf_, size_t max_size) {
|
||||
if (max_size == size_read) return kSuccess;
|
||||
@ -299,7 +299,7 @@ class AllreduceBase : public IEngine {
|
||||
* \brief write data in array to sock
|
||||
* \param sendbuf_ head of array
|
||||
* \param max_size maximum size of array
|
||||
* \return true if it is an successful write, false if there is some error happens, check errno
|
||||
* \return true if it is a successful write, false if there is some error happens, check errno
|
||||
*/
|
||||
inline ReturnType WriteFromArray(const void *sendbuf_, size_t max_size) {
|
||||
const char *p = static_cast<const char*>(sendbuf_);
|
||||
@ -333,7 +333,7 @@ class AllreduceBase : public IEngine {
|
||||
*/
|
||||
[[nodiscard]] xgboost::collective::Result ConnectTracker(xgboost::collective::TCPSocket *out) const;
|
||||
/*!
|
||||
* \brief connect to the tracker to fix the the missing links
|
||||
* \brief connect to the tracker to fix the missing links
|
||||
* this function is also used when the engine start up
|
||||
* \param cmd possible command to sent to tracker
|
||||
*/
|
||||
@ -358,7 +358,7 @@ class AllreduceBase : public IEngine {
|
||||
size_t count,
|
||||
ReduceFunction reducer);
|
||||
/*!
|
||||
* \brief broadcast data from root to all nodes, this function can fail,and will return the cause of failure
|
||||
* \brief broadcast data from root to all nodes, this function can fail, and will return the cause of failure
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param size the size of the data to be broadcasted
|
||||
* \param root the root worker id to broadcast the data
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user