diff --git a/doc/conf.py b/doc/conf.py index 4a54ababb..e5e53e059 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -78,6 +78,8 @@ master_doc = 'index' # Usually you set "language" from the command line for these cases. language = None +autoclass_content = 'both' + # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' diff --git a/doc/python/python_intro.md b/doc/python/python_intro.md index 2dd389c41..e3d5d511d 100644 --- a/doc/python/python_intro.md +++ b/doc/python/python_intro.md @@ -25,7 +25,9 @@ Data Interface -------------- The XGBoost python module is able to load data from: - libsvm txt format file -- Numpy 2D array, and +- comma-separated values (CSV) file +- Numpy 2D array +- Scipy 2D sparse array, and - xgboost binary buffer file. The data is stored in a ```DMatrix``` object. @@ -35,6 +37,16 @@ The data is stored in a ```DMatrix``` object. dtrain = xgb.DMatrix('train.svm.txt') dtest = xgb.DMatrix('test.svm.buffer') ``` +* To load a CSV file into ```DMatrix```: +```python +# label_column specifies the index of the column containing the true label +dtrain = xgb.DMatrix('train.csv?format=csv&label_column=0') +dtest = xgb.DMatrix('test.csv?format=csv&label_column=0') +``` +(Note that XGBoost does not support categorical features; if your data contains +categorical features, load it as a numpy array first and then perform +[one-hot encoding](http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html).) + * To load a numpy array into ```DMatrix```: ```python data = np.random.rand(5, 10) # 5 entities, each contains 10 features diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 769df0e53..9b29df695 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -235,8 +235,6 @@ class DMatrix(object): feature_names=None, feature_types=None, nthread=None): """ - Data matrix used in XGBoost. - Parameters ---------- data : string/numpy array/scipy.sparse/pd.DataFrame @@ -706,7 +704,7 @@ class DMatrix(object): class Booster(object): - """"A Booster of of XGBoost. + """A Booster of of XGBoost. Booster is the model of xgboost, that contains low level routines for training, prediction and evaluation. @@ -716,8 +714,7 @@ class Booster(object): def __init__(self, params=None, cache=(), model_file=None): # pylint: disable=invalid-name - """Initialize the Booster. - + """ Parameters ---------- params : dict