diff --git a/README.md b/README.md index 64cbf5159..c775c9776 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,10 @@ xgboost: eXtreme Gradient Boosting ======= An optimized general purpose gradient boosting (tree) library. -Authors: +Contributors: * Tianqi Chen, project creater * Kailong Chen, contributes regression module +* Bing Xu, contributes python interface, higgs example Turorial and Documentation: https://github.com/tqchen/xgboost/wiki diff --git a/demo/kaggle-higgs/README.md b/demo/kaggle-higgs/README.md index a3c208002..b3db23266 100644 --- a/demo/kaggle-higgs/README.md +++ b/demo/kaggle-higgs/README.md @@ -16,5 +16,6 @@ make - +Speed +===== speedtest.py compares xgboost's speed on this dataset with sklearn.GBM diff --git a/demo/rank/runexp.sh b/demo/rank/runexp.sh index d948ca5a0..c17ebee05 100755 --- a/demo/rank/runexp.sh +++ b/demo/rank/runexp.sh @@ -1,7 +1,17 @@ -python trans_data.py train.txt mq2008.train mq2008.train.group -python trans_data.py test.txt mq2008.test mq2008.test.group -python trans_data.py vali.txt mq2008.vali mq2008.vali.group +#Download the dataset from web site +wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar + +#please first install the unrar package +unrar x MQ2008 + +python trans_data.py MQ2008/Fold1/train.txt mq2008.train mq2008.train.group + +python trans_data.py MQ2008/Fold1/test.txt mq2008.test mq2008.test.group + +python trans_data.py MQ2008/Fold1/vali.txt mq2008.vali mq2008.vali.group ../../xgboost mq2008.conf + ../../xgboost mq2008.conf task=pred model_in=0004.model + diff --git a/python/example/demo.py b/python/example/demo.py index f5e0aa2a7..73935efab 100755 --- a/python/example/demo.py +++ b/python/example/demo.py @@ -12,7 +12,7 @@ dtrain = xgb.DMatrix('agaricus.txt.train') dtest = xgb.DMatrix('agaricus.txt.test') # specify parameters via map, definition are same as c++ version -param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'loss_type':2 } +param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' } # specify validations set to watch performance evallist = [(dtest,'eval'), (dtrain,'train')] @@ -29,11 +29,6 @@ bst.dump_model('dump.raw.txt') # dump model with feature map bst.dump_model('dump.raw.txt','featmap.txt') -# beta: interact mode -bst.set_param('bst:interact:expand',4) -bst.update_interact( dtrain, 'update', 0) -bst.dump_model('dump.raw2.txt') - ### # build dmatrix in python iteratively # diff --git a/python/xgboost.py b/python/xgboost.py index 5c3555770..d7cf9f63e 100644 --- a/python/xgboost.py +++ b/python/xgboost.py @@ -1,3 +1,4 @@ +# Author: Tianqi Chen, Bing Xu # module for xgboost import ctypes import os diff --git a/python/xgboost_python.cpp b/python/xgboost_python.cpp index 8dd210c52..7c63fc6ac 100644 --- a/python/xgboost_python.cpp +++ b/python/xgboost_python.cpp @@ -1,3 +1,4 @@ +// implementations in ctypes #include "xgboost_python.h" #include "../regrank/xgboost_regrank.h" #include "../regrank/xgboost_regrank_data.h" diff --git a/python/xgboost_python.h b/python/xgboost_python.h index ac3ca94ac..6c113a108 100644 --- a/python/xgboost_python.h +++ b/python/xgboost_python.h @@ -1,7 +1,8 @@ #ifndef XGBOOST_PYTHON_H #define XGBOOST_PYTHON_H /*! - * \file xgboost_regrank_data.h + * \file xgboost_python.h + * \author Tianqi Chen * \brief python wrapper for xgboost, using ctypes, * hides everything behind functions * use c style interface