Group CLI demo into subdirectory. (#6258)
CLI is not most developed interface. Putting them into correct directory can help new users to avoid it as most of the use cases are from a language binding.
This commit is contained in:
9
demo/CLI/yearpredMSD/README.md
Normal file
9
demo/CLI/yearpredMSD/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
Demonstrating how to use XGBoost on [Year Prediction task of Million Song Dataset](https://archive.ics.uci.edu/ml/datasets/YearPredictionMSD)
|
||||
|
||||
1. Run runexp.sh
|
||||
```bash
|
||||
./runexp.sh
|
||||
```
|
||||
|
||||
You can also use the script to prepare LIBSVM format, and run the [Distributed Version](../../multi-node).
|
||||
Note that though that normally you only need to use single machine for dataset at this scale, and use distributed version for larger scale dataset.
|
||||
9
demo/CLI/yearpredMSD/csv2libsvm.py
Executable file
9
demo/CLI/yearpredMSD/csv2libsvm.py
Executable file
@@ -0,0 +1,9 @@
|
||||
import sys
|
||||
fo = open(sys.argv[2], 'w')
|
||||
|
||||
for l in open(sys.argv[1]):
|
||||
arr = l.split(',')
|
||||
fo.write('%s' % arr[0])
|
||||
for i in range(len(arr) - 1):
|
||||
fo.write(' %d:%s' % (i, arr[i+1]))
|
||||
fo.close()
|
||||
17
demo/CLI/yearpredMSD/runexp.sh
Executable file
17
demo/CLI/yearpredMSD/runexp.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -f YearPredictionMSD.txt ]
|
||||
then
|
||||
echo "use existing data to run experiment"
|
||||
else
|
||||
echo "getting data from uci, make sure you are connected to internet"
|
||||
wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip
|
||||
unzip YearPredictionMSD.txt.zip
|
||||
fi
|
||||
echo "start making data.."
|
||||
# map feature using indicator encoding, also produce featmap.txt
|
||||
python csv2libsvm.py YearPredictionMSD.txt yearpredMSD.libsvm
|
||||
head -n 463715 yearpredMSD.libsvm > yearpredMSD.libsvm.train
|
||||
tail -n 51630 yearpredMSD.libsvm > yearpredMSD.libsvm.test
|
||||
echo "finish making the data"
|
||||
../../../xgboost yearpredMSD.conf
|
||||
29
demo/CLI/yearpredMSD/yearpredMSD.conf
Normal file
29
demo/CLI/yearpredMSD/yearpredMSD.conf
Normal file
@@ -0,0 +1,29 @@
|
||||
# General Parameters, see comment for each definition
|
||||
# choose the tree booster, can also change to gblinear
|
||||
booster = gbtree
|
||||
# this is the only difference with classification, use reg:squarederror to do linear classification
|
||||
# when labels are in [0,1] we can also use reg:logistic
|
||||
objective = reg:squarederror
|
||||
|
||||
# Tree Booster Parameters
|
||||
# step size shrinkage
|
||||
eta = 1.0
|
||||
# minimum loss reduction required to make a further partition
|
||||
gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
min_child_weight = 1
|
||||
# maximum depth of a tree
|
||||
max_depth = 5
|
||||
|
||||
base_score = 2001
|
||||
# Task parameters
|
||||
# the number of round to do boosting
|
||||
num_round = 100
|
||||
# 0 means do not save any model except the final round model
|
||||
save_period = 0
|
||||
# The path of training data
|
||||
data = "yearpredMSD.libsvm.train"
|
||||
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
||||
eval[test] = "yearpredMSD.libsvm.test"
|
||||
# The path of test data
|
||||
#test:data = "yearpredMSD.libsvm.test"
|
||||
Reference in New Issue
Block a user