diff --git a/multi-node/col-split/mushroom-col.conf b/multi-node/col-split/mushroom-col.conf new file mode 100644 index 000000000..2c779a44d --- /dev/null +++ b/multi-node/col-split/mushroom-col.conf @@ -0,0 +1,35 @@ +# General Parameters, see comment for each definition +# choose the booster, can be gbtree or gblinear +booster = gbtree +# choose logistic regression loss function for binary classification +objective = binary:logistic + +# Tree Booster Parameters +# step size shrinkage +eta = 1.0 +# minimum loss reduction required to make a further partition +gamma = 1.0 +# minimum sum of instance weight(hessian) needed in a child +min_child_weight = 1 +# maximum depth of a tree +max_depth = 3 + +# Task Parameters +# the number of round to do boosting +num_round = 2 +# 0 means do not save any model except the final round model +save_period = 0 +use_buffer = 0 + +# The path of training data %d is the wildcard for the rank of the data +# The idea is each process take a feature matrix with subset of columns +# +data = "train.col%d" + +# The path of validation data, used to monitor training process, here [test] sets name of the validation set +eval[test] = "../../demo/data/agaricus.txt.test" +# evaluate on training data as well each round +eval_train = 1 + +# The path of test data, need to use full data of test, try not use it, or keep an subsampled version +test:data = "../../demo/data/agaricus.txt.test"