Merge branch 'master' of ssh://github.com/tqchen/xgboost
Conflicts: regression/xgboost_reg_data.h
This commit is contained in:
commit
c3592dc06c
2
Makefile
2
Makefile
@ -12,6 +12,8 @@ export LDFLAGS= -pthread -lm
|
|||||||
|
|
||||||
xgboost: regression/xgboost_reg_main.cpp regression/*.h booster/*.h booster/*/*.hpp booster/*.hpp
|
xgboost: regression/xgboost_reg_main.cpp regression/*.h booster/*.h booster/*/*.hpp booster/*.hpp
|
||||||
|
|
||||||
|
#xgboost: rank/xgboost_rank_main.cpp base/*.h rank/*.h booster/*.h booster/*/*.hpp booster/*.hpp
|
||||||
|
|
||||||
$(BIN) :
|
$(BIN) :
|
||||||
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
||||||
|
|
||||||
|
|||||||
@ -288,7 +288,8 @@ namespace xgboost{
|
|||||||
booster_info.push_back(0);
|
booster_info.push_back(0);
|
||||||
this->ConfigBooster(boosters.back());
|
this->ConfigBooster(boosters.back());
|
||||||
boosters.back()->InitModel();
|
boosters.back()->InitModel();
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
this->ConfigBooster(boosters.back());
|
this->ConfigBooster(boosters.back());
|
||||||
}
|
}
|
||||||
return boosters.back();
|
return boosters.back();
|
||||||
|
|||||||
13
demo/rank/README
Normal file
13
demo/rank/README
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
Demonstrating how to use XGBoost accomplish regression tasks on computer hardware dataset https://archive.ics.uci.edu/ml/datasets/Computer+Hardware
|
||||||
|
|
||||||
|
Run: ./runexp.sh
|
||||||
|
|
||||||
|
Format of input: LIBSVM format
|
||||||
|
|
||||||
|
Format of ```featmap.txt: <featureid> <featurename> <q or i or int>\n ```:
|
||||||
|
- Feature id must be from 0 to number of features, in sorted order.
|
||||||
|
- i means this feature is binary indicator feature
|
||||||
|
- q means this feature is a quantitative value, such as age, time, can be missing
|
||||||
|
- int means this feature is integer value (when int is hinted, the decision boundary will be integer)
|
||||||
|
|
||||||
|
Explainations: https://github.com/tqchen/xgboost/wiki/Regression
|
||||||
16
demo/rank/runexp.sh
Normal file
16
demo/rank/runexp.sh
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# map the data to features. For convenience we only use 7 original attributes and encode them as features in a trivial way
|
||||||
|
python mapfeat.py
|
||||||
|
# split train and test
|
||||||
|
python mknfold.py machine.txt 1
|
||||||
|
# training and output the models
|
||||||
|
../../xgboost machine.conf
|
||||||
|
# output predictions of test data
|
||||||
|
../../xgboost machine.conf task=pred model_in=0002.model
|
||||||
|
# print the boosters of 0002.model in dump.raw.txt
|
||||||
|
../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
|
||||||
|
# print the boosters of 0002.model in dump.nice.txt with feature map
|
||||||
|
../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||||
|
|
||||||
|
# cat the result
|
||||||
|
cat dump.nice.txt
|
||||||
5
demo/rank/toy.eval
Normal file
5
demo/rank/toy.eval
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
1 0:2 1:3 2:2
|
||||||
|
0 0:2 1:3 2:2
|
||||||
|
0 0:2 1:3 2:2
|
||||||
|
0 0:2 1:3 2:2
|
||||||
|
1 0:2 1:3 2:2
|
||||||
2
demo/rank/toy.eval.group
Normal file
2
demo/rank/toy.eval.group
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
2
|
||||||
|
3
|
||||||
5
demo/rank/toy.test
Normal file
5
demo/rank/toy.test
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
1 0:2 1:3 2:2
|
||||||
|
0 0:2 1:3 2:2
|
||||||
|
0 0:2 1:3 2:2
|
||||||
|
0 0:2 1:3 2:2
|
||||||
|
1 0:2 1:3 2:2
|
||||||
2
demo/rank/toy.test.group
Normal file
2
demo/rank/toy.test.group
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
2
|
||||||
|
3
|
||||||
11
demo/rank/toy.train
Normal file
11
demo/rank/toy.train
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
1 0:1.2 1:3 2:5.6
|
||||||
|
0 0:2.0 1:2.3 2:5.1
|
||||||
|
0 0:3.9 1:3 2:3.1
|
||||||
|
0 0:2 1:3.2 2:3.4
|
||||||
|
1 0:2.1 1:4.5 2:4.2
|
||||||
|
0 0:1.9 1:2.8 2:3.1
|
||||||
|
1 0:3.0 1:2.0 2:1.1
|
||||||
|
0 0:1.9 1:1.8 2:2.1
|
||||||
|
0 0:1.1 1:2.2 2:1.4
|
||||||
|
1 0:2.1 1:4.1 2:4.0
|
||||||
|
0 0:1.9 1:2.2 2:1.1
|
||||||
2
demo/rank/toy.train.group
Normal file
2
demo/rank/toy.train.group
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
6
|
||||||
|
5
|
||||||
0
demo/rank/train
Normal file
0
demo/rank/train
Normal file
@ -20,6 +20,7 @@ namespace xgboost{
|
|||||||
class BoostTask{
|
class BoostTask{
|
||||||
public:
|
public:
|
||||||
inline int Run(int argc, char *argv[]){
|
inline int Run(int argc, char *argv[]){
|
||||||
|
|
||||||
if (argc < 2){
|
if (argc < 2){
|
||||||
printf("Usage: <config>\n");
|
printf("Usage: <config>\n");
|
||||||
return 0;
|
return 0;
|
||||||
@ -34,6 +35,7 @@ namespace xgboost{
|
|||||||
this->SetParam(name, val);
|
this->SetParam(name, val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this->InitData();
|
this->InitData();
|
||||||
this->InitLearner();
|
this->InitLearner();
|
||||||
if (task == "dump"){
|
if (task == "dump"){
|
||||||
@ -128,6 +130,7 @@ namespace xgboost{
|
|||||||
|
|
||||||
|
|
||||||
inline void InitData(void){
|
inline void InitData(void){
|
||||||
|
|
||||||
if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
|
if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
|
||||||
if (task == "dump") return;
|
if (task == "dump") return;
|
||||||
if (learning_task == RANKING){
|
if (learning_task == RANKING){
|
||||||
@ -140,6 +143,7 @@ namespace xgboost{
|
|||||||
// training
|
// training
|
||||||
sscanf(train_path.c_str(), "%[^;];%s", instance_path, group_path);
|
sscanf(train_path.c_str(), "%[^;];%s", instance_path, group_path);
|
||||||
data.CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
|
data.CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
|
||||||
|
|
||||||
utils::Assert(eval_data_names.size() == eval_data_paths.size());
|
utils::Assert(eval_data_names.size() == eval_data_paths.size());
|
||||||
for (size_t i = 0; i < eval_data_names.size(); ++i){
|
for (size_t i = 0; i < eval_data_names.size(); ++i){
|
||||||
deval.push_back(new DMatrix());
|
deval.push_back(new DMatrix());
|
||||||
@ -147,8 +151,6 @@ namespace xgboost{
|
|||||||
deval.back()->CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
|
deval.back()->CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
if (task == "pred" || task == "dumppath"){
|
if (task == "pred" || task == "dumppath"){
|
||||||
@ -166,7 +168,9 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
|
|
||||||
learner_->SetData(&data, deval, eval_data_names);
|
learner_->SetData(&data, deval, eval_data_names);
|
||||||
|
if(!silent) printf("BoostTask:Data Initiation Done!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void InitLearner(void){
|
inline void InitLearner(void){
|
||||||
cfg.BeforeFirst();
|
cfg.BeforeFirst();
|
||||||
while (cfg.Next()){
|
while (cfg.Next()){
|
||||||
@ -182,6 +186,7 @@ namespace xgboost{
|
|||||||
learner_->InitModel();
|
learner_->InitModel();
|
||||||
}
|
}
|
||||||
learner_->InitTrainer();
|
learner_->InitTrainer();
|
||||||
|
if(!silent) printf("BoostTask:InitLearner Done!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void TaskTrain(void){
|
inline void TaskTrain(void){
|
||||||
|
|||||||
@ -70,17 +70,27 @@ namespace xgboost{
|
|||||||
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
|
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
|
||||||
}
|
}
|
||||||
fclose(file);
|
fclose(file);
|
||||||
|
LoadGroup(fgroup,silent);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void LoadGroup(const char* fgroup, bool silent = false){
|
||||||
//if exists group data load it in
|
//if exists group data load it in
|
||||||
FILE *file_group = fopen64(fgroup, "r");
|
FILE *file_group = fopen64(fgroup, "r");
|
||||||
|
|
||||||
if (file_group != NULL){
|
if (file_group != NULL){
|
||||||
group_index.push_back(0);
|
group_index.push_back(0);
|
||||||
int tmp = 0, acc = 0;
|
int tmp = 0, acc = 0,cnt = 0;
|
||||||
while (fscanf(file_group, "%d", tmp) == 1){
|
while (fscanf(file_group, "%d", &tmp) == 1){
|
||||||
acc += tmp;
|
acc += tmp;
|
||||||
group_index.push_back(acc);
|
group_index.push_back(acc);
|
||||||
|
cnt++;
|
||||||
}
|
}
|
||||||
|
if(!silent) printf("%d groups are loaded from %s\n",cnt,fgroup);
|
||||||
|
fclose(file_group);
|
||||||
|
}else{
|
||||||
|
if(!silent) printf("There is no group file\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief load from binary file
|
* \brief load from binary file
|
||||||
@ -100,26 +110,14 @@ namespace xgboost{
|
|||||||
data.InitData();
|
data.InitData();
|
||||||
|
|
||||||
if (!silent){
|
if (!silent){
|
||||||
printf("%ux%u matrix with %lu entries is loaded from %s\n",
|
printf("%ux%u matrix with %lu entries is loaded from %s as binary\n",
|
||||||
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
|
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
|
||||||
}
|
}
|
||||||
|
|
||||||
//if group data exists load it in
|
LoadGroupBinary(fgroup,silent);
|
||||||
FILE *file_group = fopen64(fgroup, "r");
|
|
||||||
if (file_group != NULL){
|
|
||||||
int group_index_size = 0;
|
|
||||||
utils::FileStream group_stream(file_group);
|
|
||||||
utils::Assert(group_stream.Read(&group_index_size, sizeof(int)) != 0, "Load group indice size");
|
|
||||||
group_index.resize(group_index_size);
|
|
||||||
utils::Assert(group_stream.Read(&group_index, sizeof(int)* group_index_size) != 0, "Load group indice");
|
|
||||||
|
|
||||||
if (!silent){
|
|
||||||
printf("the group index of %d groups is loaded from %s\n",
|
|
||||||
group_index_size - 1, fgroup);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief save to binary file
|
* \brief save to binary file
|
||||||
* \param fname name of binary data
|
* \param fname name of binary data
|
||||||
@ -134,19 +132,45 @@ namespace xgboost{
|
|||||||
fs.Write(&labels[0], sizeof(float)* data.NumRow());
|
fs.Write(&labels[0], sizeof(float)* data.NumRow());
|
||||||
fs.Close();
|
fs.Close();
|
||||||
if (!silent){
|
if (!silent){
|
||||||
printf("%ux%u matrix with %lu entries is saved to %s\n",
|
printf("%ux%u matrix with %lu entries is saved to %s as binary\n",
|
||||||
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
|
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SaveGroupBinary(fgroup,silent);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void SaveGroupBinary(const char* fgroup, bool silent = false){
|
||||||
//save group data
|
//save group data
|
||||||
if (group_index.size() > 0){
|
if (group_index.size() > 0){
|
||||||
utils::FileStream file_group(utils::FopenCheck(fgroup, "wb"));
|
utils::FileStream file_group(utils::FopenCheck(fgroup, "wb"));
|
||||||
int group_index_size = group_index.size();
|
int group_index_size = group_index.size();
|
||||||
file_group.Write(&(group_index_size), sizeof(int));
|
file_group.Write(&(group_index_size), sizeof(int));
|
||||||
file_group.Write(&group_index[0], sizeof(int) * group_index_size);
|
file_group.Write(&group_index[0], sizeof(int) * group_index_size);
|
||||||
|
file_group.Close();
|
||||||
|
if(!silent){printf("Index info of %d groups is saved to %s as binary\n",group_index_size-1,fgroup);}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void LoadGroupBinary(const char* fgroup, bool silent = false){
|
||||||
|
//if group data exists load it in
|
||||||
|
FILE *file_group = fopen64(fgroup, "r");
|
||||||
|
if (file_group != NULL){
|
||||||
|
int group_index_size = 0;
|
||||||
|
utils::FileStream group_stream(file_group);
|
||||||
|
utils::Assert(group_stream.Read(&group_index_size, sizeof(int)) != 0, "Load group indice size");
|
||||||
|
group_index.resize(group_index_size);
|
||||||
|
utils::Assert(group_stream.Read(&group_index[0], sizeof(int) * group_index_size) != 0, "Load group indice");
|
||||||
|
|
||||||
|
if (!silent){
|
||||||
|
printf("Index info of %d groups is loaded from %s as binary\n",
|
||||||
|
group_index.size() - 1, fgroup);
|
||||||
}
|
}
|
||||||
|
fclose(file_group);
|
||||||
|
}else{
|
||||||
|
if(!silent){printf("The binary file of group info not exists");}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief cache load data given a file name, if filename ends with .buffer, direct load binary
|
* \brief cache load data given a file name, if filename ends with .buffer, direct load binary
|
||||||
* otherwise the function will first check if fname + '.buffer' exists,
|
* otherwise the function will first check if fname + '.buffer' exists,
|
||||||
@ -161,11 +185,13 @@ namespace xgboost{
|
|||||||
if (len > 8 && !strcmp(fname + len - 7, ".buffer")){
|
if (len > 8 && !strcmp(fname + len - 7, ".buffer")){
|
||||||
this->LoadBinary(fname, fgroup, silent); return;
|
this->LoadBinary(fname, fgroup, silent); return;
|
||||||
}
|
}
|
||||||
char bname[1024];
|
char bname[1024],bgroup[1024];
|
||||||
sprintf(bname, "%s.buffer", fname);
|
sprintf(bname, "%s.buffer", fname);
|
||||||
if (!this->LoadBinary(bname, fgroup, silent)){
|
sprintf(bgroup, "%s.buffer", fgroup);
|
||||||
|
if (!this->LoadBinary(bname, bgroup, silent))
|
||||||
|
{
|
||||||
this->LoadText(fname, fgroup, silent);
|
this->LoadText(fname, fgroup, silent);
|
||||||
if (savebuffer) this->SaveBinary(bname, fgroup, silent);
|
if (savebuffer) this->SaveBinary(bname, bgroup, silent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
@ -182,9 +208,6 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -96,6 +96,7 @@ namespace xgboost {
|
|||||||
*/
|
*/
|
||||||
inline void InitModel(void) {
|
inline void InitModel(void) {
|
||||||
base_gbm.InitModel();
|
base_gbm.InitModel();
|
||||||
|
if(!silent) printf("BoostLearner:InitModel Done!\n");
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief load model from stream
|
* \brief load model from stream
|
||||||
@ -143,16 +144,23 @@ namespace xgboost {
|
|||||||
this->GetGradient(preds_, train_->labels, train_->group_index, grad_, hess_);
|
this->GetGradient(preds_, train_->labels, train_->group_index, grad_, hess_);
|
||||||
std::vector<unsigned> root_index;
|
std::vector<unsigned> root_index;
|
||||||
base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
|
base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
|
||||||
|
|
||||||
|
// printf("xgboost_learner.h:UpdateOneIter\n");
|
||||||
|
// const unsigned ndata = static_cast<unsigned>(train_->Size());
|
||||||
|
// #pragma omp parallel for schedule( static )
|
||||||
|
// for (unsigned j = 0; j < ndata; ++j) {
|
||||||
|
// printf("haha:%d %f\n",j,base_gbm.Predict(train_->data, j, j));
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! \brief get intransformed prediction, without buffering */
|
/*! \brief get intransformed prediction, without buffering */
|
||||||
inline void Predict(std::vector<float> &preds, const DMatrix &data) {
|
inline void Predict(std::vector<float> &preds, const DMatrix &data) {
|
||||||
preds.resize(data.Size());
|
preds.resize(data.Size());
|
||||||
|
|
||||||
const unsigned ndata = static_cast<unsigned>(data.Size());
|
const unsigned ndata = static_cast<unsigned>(data.Size());
|
||||||
#pragma omp parallel for schedule( static )
|
#pragma omp parallel for schedule( static )
|
||||||
for (unsigned j = 0; j < ndata; ++j) {
|
for (unsigned j = 0; j < ndata; ++j) {
|
||||||
preds[j] = base_gbm.Predict(data.data, j, -1);
|
preds[j] = base_gbm.Predict(data.data, j, -1);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -210,8 +218,8 @@ namespace xgboost {
|
|||||||
/*! \brief get intransformed predictions, given data */
|
/*! \brief get intransformed predictions, given data */
|
||||||
virtual inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset) {
|
virtual inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset) {
|
||||||
preds.resize(data.Size());
|
preds.resize(data.Size());
|
||||||
|
|
||||||
const unsigned ndata = static_cast<unsigned>(data.Size());
|
const unsigned ndata = static_cast<unsigned>(data.Size());
|
||||||
|
|
||||||
#pragma omp parallel for schedule( static )
|
#pragma omp parallel for schedule( static )
|
||||||
for (unsigned j = 0; j < ndata; ++j) {
|
for (unsigned j = 0; j < ndata; ++j) {
|
||||||
preds[j] = base_gbm.Predict(data.data, j, buffer_offset + j);
|
preds[j] = base_gbm.Predict(data.data, j, buffer_offset + j);
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <vector>
|
||||||
#include "xgboost_sample.h"
|
#include "xgboost_sample.h"
|
||||||
#include "xgboost_rank_eval.h"
|
#include "xgboost_rank_eval.h"
|
||||||
#include "../base/xgboost_data_instance.h"
|
#include "../base/xgboost_data_instance.h"
|
||||||
@ -71,31 +71,128 @@ namespace xgboost {
|
|||||||
fprintf(fo, "\n");
|
fprintf(fo, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void SetParam(const char *name, const char *val){
|
virtual inline void SetParam(const char *name, const char *val){
|
||||||
|
BoostLearner::SetParam(name,val);
|
||||||
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
|
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
|
||||||
if (!strcmp(name, "rank:sampler")) sampler.AssignSampler(atoi(val));
|
if (!strcmp(name, "rank:sampler")) sampler.AssignSampler(atoi(val));
|
||||||
}
|
}
|
||||||
/*! \brief get the first order and second order gradient, given the transformed predictions and labels */
|
|
||||||
inline void GetGradient(const std::vector<float> &preds,
|
private:
|
||||||
|
inline std::vector< Triple<float,float,int> > GetSortedTuple(const std::vector<float> &preds,
|
||||||
|
const std::vector<float> &labels,
|
||||||
|
const std::vector<int> &group_index,
|
||||||
|
int group){
|
||||||
|
std::vector< Triple<float,float,int> > sorted_triple;
|
||||||
|
for(int j = group_index[group]; j < group_index[group+1]; j++){
|
||||||
|
sorted_triple.push_back(Triple<float,float,int>(preds[j],labels[j],j));
|
||||||
|
}
|
||||||
|
std::sort(sorted_triple.begin(),sorted_triple.end(),Triplef1Comparer);
|
||||||
|
return sorted_triple;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::vector<int> GetIndexMap(std::vector< Triple<float,float,int> > sorted_triple,int start){
|
||||||
|
std::vector<int> index_remap;
|
||||||
|
index_remap.resize(sorted_triple.size());
|
||||||
|
for(int i = 0; i < sorted_triple.size(); i++){
|
||||||
|
index_remap[sorted_triple[i].f3_-start] = i;
|
||||||
|
}
|
||||||
|
return index_remap;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float GetLambdaMAP(const std::vector< Triple<float,float,int> > sorted_triple,
|
||||||
|
int index1,int index2,
|
||||||
|
std::vector< Quadruple<float,float,float,float> > map_acc){
|
||||||
|
if(index1 > index2) std::swap(index1,index2);
|
||||||
|
float original = map_acc[index2].f1_;
|
||||||
|
if(index1 != 0) original -= map_acc[index1 - 1].f1_;
|
||||||
|
float changed = 0;
|
||||||
|
if(sorted_triple[index1].f2_ < sorted_triple[index2].f2_){
|
||||||
|
changed += map_acc[index2 - 1].f3_ - map_acc[index1].f3_;
|
||||||
|
changed += (map_acc[index1].f4_ + 1.0f)/(index1 + 1);
|
||||||
|
}else{
|
||||||
|
changed += map_acc[index2 - 1].f2_ - map_acc[index1].f2_;
|
||||||
|
changed += map_acc[index2].f4_/(index2 + 1);
|
||||||
|
}
|
||||||
|
float ans = (changed - original)/(map_acc[map_acc.size() - 1].f4_);
|
||||||
|
if(ans < 0) ans = -ans;
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float GetLambdaNDCG(const std::vector< Triple<float,float,int> > sorted_triple,
|
||||||
|
int index1,
|
||||||
|
int index2,float IDCG){
|
||||||
|
float original = pow(2,sorted_triple[index1].f2_)/log(index1+2)
|
||||||
|
+ pow(2,sorted_triple[index2].f2_)/log(index2+2);
|
||||||
|
float changed = pow(2,sorted_triple[index2].f2_)/log(index1+2)
|
||||||
|
+ pow(2,sorted_triple[index1].f2_)/log(index2+2);
|
||||||
|
float ans = (original - changed)/IDCG;
|
||||||
|
if(ans < 0) ans = -ans;
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline float GetIDCG(const std::vector< Triple<float,float,int> > sorted_triple){
|
||||||
|
std::vector<float> labels;
|
||||||
|
for(int i = 0; i < sorted_triple.size(); i++){
|
||||||
|
labels.push_back(sorted_triple[i].f2_);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(labels.begin(),labels.end(),std::greater<float>());
|
||||||
|
return EvalNDCG::DCG(labels);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::vector< Quadruple<float,float,float,float> > GetMAPAcc(const std::vector< Triple<float,float,int> > sorted_triple){
|
||||||
|
std::vector< Quadruple<float,float,float,float> > map_acc;
|
||||||
|
float hit = 0,acc1 = 0,acc2 = 0,acc3 = 0;
|
||||||
|
for(int i = 0; i < sorted_triple.size(); i++){
|
||||||
|
if(sorted_triple[i].f2_ == 1) {
|
||||||
|
hit++;
|
||||||
|
acc1 += hit /( i + 1 );
|
||||||
|
acc2 += (hit - 1)/(i+1);
|
||||||
|
acc3 += (hit + 1)/(i+1);
|
||||||
|
}
|
||||||
|
map_acc.push_back(Quadruple<float,float,float,float>(acc1,acc2,acc3,hit));
|
||||||
|
}
|
||||||
|
return map_acc;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void GetGroupGradient(const std::vector<float> &preds,
|
||||||
const std::vector<float> &labels,
|
const std::vector<float> &labels,
|
||||||
const std::vector<int> &group_index,
|
const std::vector<int> &group_index,
|
||||||
std::vector<float> &grad,
|
std::vector<float> &grad,
|
||||||
std::vector<float> &hess) {
|
std::vector<float> &hess,
|
||||||
grad.resize(preds.size());
|
const std::vector< Triple<float,float,int> > sorted_triple,
|
||||||
hess.resize(preds.size());
|
const std::vector<int> index_remap,
|
||||||
|
const sample::Pairs& pairs,
|
||||||
|
int group){
|
||||||
bool j_better;
|
bool j_better;
|
||||||
float pred_diff, pred_diff_exp, first_order_gradient, second_order_gradient;
|
float IDCG, pred_diff, pred_diff_exp, delta;
|
||||||
for (int i = 0; i < group_index.size() - 1; i++){
|
float first_order_gradient, second_order_gradient;
|
||||||
sample::Pairs pairs = sampler.GenPairs(preds, labels, group_index[i], group_index[i + 1]);
|
std::vector< Quadruple<float,float,float,float> > map_acc;
|
||||||
for (int j = group_index[i]; j < group_index[i + 1]; j++){
|
|
||||||
|
if(mparam.loss_type == NDCG){
|
||||||
|
IDCG = GetIDCG(sorted_triple);
|
||||||
|
}else if(mparam.loss_type == MAP){
|
||||||
|
map_acc = GetMAPAcc(sorted_triple);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = group_index[group]; j < group_index[group + 1]; j++){
|
||||||
std::vector<int> pair_instance = pairs.GetPairs(j);
|
std::vector<int> pair_instance = pairs.GetPairs(j);
|
||||||
for (int k = 0; k < pair_instance.size(); k++){
|
for (int k = 0; k < pair_instance.size(); k++){
|
||||||
j_better = labels[j] > labels[pair_instance[k]];
|
j_better = labels[j] > labels[pair_instance[k]];
|
||||||
if (j_better){
|
if (j_better){
|
||||||
|
switch(mparam.loss_type){
|
||||||
|
case PAIRWISE: delta = 1.0;break;
|
||||||
|
case MAP: delta = GetLambdaMAP(sorted_triple,index_remap[j - group_index[group]],index_remap[pair_instance[k]-group_index[group]],map_acc);break;
|
||||||
|
case NDCG: delta = GetLambdaNDCG(sorted_triple,index_remap[j - group_index[group]],index_remap[pair_instance[k]-group_index[group]],IDCG);break;
|
||||||
|
default: utils::Error("Cannot find the specified loss type");
|
||||||
|
}
|
||||||
|
|
||||||
pred_diff = preds[preds[j] - pair_instance[k]];
|
pred_diff = preds[preds[j] - pair_instance[k]];
|
||||||
pred_diff_exp = j_better ? expf(-pred_diff) : expf(pred_diff);
|
pred_diff_exp = j_better ? expf(-pred_diff) : expf(pred_diff);
|
||||||
first_order_gradient = FirstOrderGradient(pred_diff_exp);
|
first_order_gradient = delta * FirstOrderGradient(pred_diff_exp);
|
||||||
second_order_gradient = 2 * SecondOrderGradient(pred_diff_exp);
|
second_order_gradient = 2 * delta * SecondOrderGradient(pred_diff_exp);
|
||||||
hess[j] += second_order_gradient;
|
hess[j] += second_order_gradient;
|
||||||
grad[j] += first_order_gradient;
|
grad[j] += first_order_gradient;
|
||||||
hess[pair_instance[k]] += second_order_gradient;
|
hess[pair_instance[k]] += second_order_gradient;
|
||||||
@ -104,11 +201,54 @@ namespace xgboost {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
public:
|
||||||
|
/*! \brief get the first order and second order gradient, given the
|
||||||
|
* intransformed predictions and labels */
|
||||||
|
inline void GetGradient(const std::vector<float> &preds,
|
||||||
|
const std::vector<float> &labels,
|
||||||
|
const std::vector<int> &group_index,
|
||||||
|
std::vector<float> &grad,
|
||||||
|
std::vector<float> &hess) {
|
||||||
|
grad.resize(preds.size());
|
||||||
|
hess.resize(preds.size());
|
||||||
|
for (int i = 0; i < group_index.size() - 1; i++){
|
||||||
|
sample::Pairs pairs = sampler.GenPairs(preds, labels, group_index[i], group_index[i + 1]);
|
||||||
|
//pairs.GetPairs()
|
||||||
|
std::vector< Triple<float,float,int> > sorted_triple = GetSortedTuple(preds,labels,group_index,i);
|
||||||
|
std::vector<int> index_remap = GetIndexMap(sorted_triple,group_index[i]);
|
||||||
|
GetGroupGradient(preds,labels,group_index,
|
||||||
|
grad,hess,sorted_triple,index_remap,pairs,i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void UpdateInteract(std::string action) {
|
inline void UpdateInteract(std::string action) {
|
||||||
|
this->InteractPredict(preds_, *train_, 0);
|
||||||
|
|
||||||
|
int buffer_offset = static_cast<int>(train_->Size());
|
||||||
|
for (size_t i = 0; i < evals_.size(); ++i){
|
||||||
|
std::vector<float> &preds = this->eval_preds_[i];
|
||||||
|
this->InteractPredict(preds, *evals_[i], buffer_offset);
|
||||||
|
buffer_offset += static_cast<int>(evals_[i]->Size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (action == "remove"){
|
||||||
|
base_gbm.DelteBooster(); return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->GetGradient(preds_, train_->labels,train_->group_index, grad_, hess_);
|
||||||
|
std::vector<unsigned> root_index;
|
||||||
|
base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
|
||||||
|
|
||||||
|
this->InteractRePredict(*train_, 0);
|
||||||
|
buffer_offset = static_cast<int>(train_->Size());
|
||||||
|
for (size_t i = 0; i < evals_.size(); ++i){
|
||||||
|
this->InteractRePredict(*evals_[i], buffer_offset);
|
||||||
|
buffer_offset += static_cast<int>(evals_[i]->Size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum LossType {
|
enum LossType {
|
||||||
PAIRWISE = 0,
|
PAIRWISE = 0,
|
||||||
|
|||||||
@ -34,9 +34,7 @@ namespace xgboost {
|
|||||||
float key_;
|
float key_;
|
||||||
float value_;
|
float value_;
|
||||||
|
|
||||||
Pair(float key, float value){
|
Pair(float key, float value):key_(key),value_(value){
|
||||||
key_ = key;
|
|
||||||
value_ = value_;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -48,6 +46,32 @@ namespace xgboost {
|
|||||||
return a.value_ < b.value_;
|
return a.value_ < b.value_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T1,typename T2,typename T3>
|
||||||
|
class Triple{
|
||||||
|
public:
|
||||||
|
T1 f1_;
|
||||||
|
T2 f2_;
|
||||||
|
T3 f3_;
|
||||||
|
Triple(T1 f1,T2 f2,T3 f3):f1_(f1),f2_(f2),f3_(f3){
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T1,typename T2,typename T3,typename T4>
|
||||||
|
class Quadruple{
|
||||||
|
public:
|
||||||
|
T1 f1_;
|
||||||
|
T2 f2_;
|
||||||
|
T3 f3_;
|
||||||
|
T4 f4_;
|
||||||
|
Quadruple(T1 f1,T2 f2,T3 f3,T4 f4):f1_(f1),f2_(f2),f3_(f3),f4_(f4){
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
bool Triplef1Comparer(const Triple<float,float,int> &a, const Triple<float,float,int> &b){
|
||||||
|
return a.f1_< b.f1_;
|
||||||
|
}
|
||||||
|
|
||||||
/*! \brief Mean Average Precision */
|
/*! \brief Mean Average Precision */
|
||||||
class EvalMAP : public IRankEvaluator {
|
class EvalMAP : public IRankEvaluator {
|
||||||
@ -55,6 +79,7 @@ namespace xgboost {
|
|||||||
float Eval(const std::vector<float> &preds,
|
float Eval(const std::vector<float> &preds,
|
||||||
const std::vector<float> &labels,
|
const std::vector<float> &labels,
|
||||||
const std::vector<int> &group_index) const {
|
const std::vector<int> &group_index) const {
|
||||||
|
if (group_index.size() <= 1) return 0;
|
||||||
float acc = 0;
|
float acc = 0;
|
||||||
std::vector<Pair> pairs_sort;
|
std::vector<Pair> pairs_sort;
|
||||||
for (int i = 0; i < group_index.size() - 1; i++){
|
for (int i = 0; i < group_index.size() - 1; i++){
|
||||||
@ -68,10 +93,11 @@ namespace xgboost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
virtual const char *Name(void) const {
|
virtual const char *Name(void) const {
|
||||||
return "MAP";
|
return "MAP";
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
float average_precision(std::vector<Pair> pairs_sort) const{
|
float average_precision(std::vector<Pair> pairs_sort) const{
|
||||||
|
|
||||||
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
|
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
|
||||||
@ -94,12 +120,31 @@ namespace xgboost {
|
|||||||
float Eval(const std::vector<float> &preds,
|
float Eval(const std::vector<float> &preds,
|
||||||
const std::vector<float> &labels,
|
const std::vector<float> &labels,
|
||||||
const std::vector<int> &group_index) const {
|
const std::vector<int> &group_index) const {
|
||||||
return 0;
|
if (group_index.size() <= 1) return 0;
|
||||||
|
float acc = 0;
|
||||||
|
for (int i = 0; i < group_index.size() - 1; i++){
|
||||||
|
acc += Count_Inversion(preds,labels,
|
||||||
|
group_index[i],group_index[i+1]);
|
||||||
|
}
|
||||||
|
return acc / (group_index.size() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *Name(void) const {
|
const char *Name(void) const {
|
||||||
return "PAIR";
|
return "PAIR";
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
|
float Count_Inversion(const std::vector<float> &preds,
|
||||||
|
const std::vector<float> &labels,int begin,int end
|
||||||
|
) const{
|
||||||
|
float ans = 0;
|
||||||
|
for(int i = begin; i < end; i++){
|
||||||
|
for(int j = i + 1; j < end; j++){
|
||||||
|
if(preds[i] > preds[j] && labels[i] < labels[j])
|
||||||
|
ans++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! \brief Normalized DCG */
|
/*! \brief Normalized DCG */
|
||||||
@ -121,6 +166,19 @@ namespace xgboost {
|
|||||||
return acc / (group_index.size() - 1);
|
return acc / (group_index.size() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static float DCG(const std::vector<float> &labels){
|
||||||
|
float ans = 0.0;
|
||||||
|
for (int i = 0; i < labels.size(); i++){
|
||||||
|
ans += (pow(2,labels[i]) - 1 ) / log(i + 2);
|
||||||
|
}
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual const char *Name(void) const {
|
||||||
|
return "NDCG";
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
float NDCG(std::vector<Pair> pairs_sort) const{
|
float NDCG(std::vector<Pair> pairs_sort) const{
|
||||||
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
|
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
|
||||||
float dcg = DCG(pairs_sort);
|
float dcg = DCG(pairs_sort);
|
||||||
@ -131,17 +189,14 @@ namespace xgboost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
float DCG(std::vector<Pair> pairs_sort) const{
|
float DCG(std::vector<Pair> pairs_sort) const{
|
||||||
float ans = 0.0;
|
std::vector<float> labels;
|
||||||
ans += pairs_sort[0].value_;
|
|
||||||
for (int i = 1; i < pairs_sort.size(); i++){
|
for (int i = 1; i < pairs_sort.size(); i++){
|
||||||
ans += pairs_sort[i].value_ / log(i + 1);
|
labels.push_back(pairs_sort[i].value_);
|
||||||
}
|
}
|
||||||
return ans;
|
return DCG(labels);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual const char *Name(void) const {
|
|
||||||
return "NDCG";
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
@ -11,20 +11,12 @@
|
|||||||
#include "../base/xgboost_boost_task.h"
|
#include "../base/xgboost_boost_task.h"
|
||||||
#include "xgboost_rank.h"
|
#include "xgboost_rank.h"
|
||||||
#include "../regression/xgboost_reg.h"
|
#include "../regression/xgboost_reg.h"
|
||||||
|
#include "../regression/xgboost_reg_main.cpp"
|
||||||
|
#include "../base/xgboost_data_instance.h"
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
xgboost::random::Seed(0);
|
xgboost::random::Seed(0);
|
||||||
xgboost::base::BoostTask tsk;
|
xgboost::base::BoostTask rank_tsk;
|
||||||
xgboost::utils::ConfigIterator itr(argv[1]);
|
rank_tsk.SetLearner(new xgboost::rank::RankBoostLearner);
|
||||||
/* int learner_index = 0;
|
return rank_tsk.Run(argc, argv);
|
||||||
while (itr.Next()){
|
|
||||||
if (!strcmp(itr.name(), "learning_task")){
|
|
||||||
learner_index = atoi(itr.val());
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
xgboost::rank::RankBoostLearner* rank_learner = new xgboost::rank::RankBoostLearner;
|
|
||||||
xgboost::base::BoostLearner *parent = static_cast<xgboost::base::BoostLearner*>(rank_learner);
|
|
||||||
tsk.SetLearner(parent);
|
|
||||||
return tsk.Run(argc, argv);
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -19,7 +19,7 @@ namespace xgboost {
|
|||||||
* \param start the begin index of the group
|
* \param start the begin index of the group
|
||||||
* \param end the end index of the group
|
* \param end the end index of the group
|
||||||
*/
|
*/
|
||||||
Pairs(int start,int end):start_(start),end_(end_){
|
Pairs(int start, int end) :start_(start), end_(end){
|
||||||
for (int i = start; i < end; i++){
|
for (int i = start; i < end; i++){
|
||||||
std::vector<int> v;
|
std::vector<int> v;
|
||||||
pairs_.push_back(v);
|
pairs_.push_back(v);
|
||||||
@ -30,7 +30,7 @@ namespace xgboost {
|
|||||||
* \param index, the index of retrieved instance
|
* \param index, the index of retrieved instance
|
||||||
* \return the index of instances paired
|
* \return the index of instances paired
|
||||||
*/
|
*/
|
||||||
std::vector<int> GetPairs(int index) {
|
std::vector<int> GetPairs(int index) const{
|
||||||
utils::Assert(index >= start_ && index < end_, "The query index out of sampling bound");
|
utils::Assert(index >= start_ && index < end_, "The query index out of sampling bound");
|
||||||
return pairs_[index - start_];
|
return pairs_[index - start_];
|
||||||
}
|
}
|
||||||
@ -115,6 +115,7 @@ namespace xgboost {
|
|||||||
Pairs GenPairs(const std::vector<float> &preds,
|
Pairs GenPairs(const std::vector<float> &preds,
|
||||||
const std::vector<float> &labels,
|
const std::vector<float> &labels,
|
||||||
int start, int end){
|
int start, int end){
|
||||||
|
utils::Assert(sampler_ != NULL,"Not config the sampler yet. Add rank:sampler in the config file\n");
|
||||||
return sampler_->GenPairs(preds, labels, start, end);
|
return sampler_->GenPairs(preds, labels, start, end);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -94,7 +94,8 @@ namespace xgboost{
|
|||||||
base_gbm.InitTrainer();
|
base_gbm.InitTrainer();
|
||||||
if (mparam.loss_type == kLogisticClassify){
|
if (mparam.loss_type == kLogisticClassify){
|
||||||
evaluator_.AddEval("error");
|
evaluator_.AddEval("error");
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
evaluator_.AddEval("rmse");
|
evaluator_.AddEval("rmse");
|
||||||
}
|
}
|
||||||
evaluator_.Init();
|
evaluator_.Init();
|
||||||
|
|||||||
@ -50,7 +50,8 @@ namespace xgboost{
|
|||||||
unsigned index; float value;
|
unsigned index; float value;
|
||||||
if (sscanf(tmp, "%u:%f", &index, &value) == 2){
|
if (sscanf(tmp, "%u:%f", &index, &value) == 2){
|
||||||
findex.push_back(index); fvalue.push_back(value);
|
findex.push_back(index); fvalue.push_back(value);
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
if (!init){
|
if (!init){
|
||||||
labels.push_back(label);
|
labels.push_back(label);
|
||||||
data.AddRow(findex, fvalue);
|
data.AddRow(findex, fvalue);
|
||||||
|
|||||||
@ -55,7 +55,8 @@ namespace xgboost{
|
|||||||
for (unsigned i = 0; i < ndata; ++i){
|
for (unsigned i = 0; i < ndata; ++i){
|
||||||
if (preds[i] > 0.5f){
|
if (preds[i] > 0.5f){
|
||||||
if (labels[i] < 0.5f) nerr += 1;
|
if (labels[i] < 0.5f) nerr += 1;
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
if (labels[i] > 0.5f) nerr += 1;
|
if (labels[i] > 0.5f) nerr += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -50,7 +50,8 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
if (task == "pred"){
|
if (task == "pred"){
|
||||||
this->TaskPred();
|
this->TaskPred();
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
this->TaskTrain();
|
this->TaskTrain();
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@ -113,7 +114,8 @@ namespace xgboost{
|
|||||||
if (task == "dump") return;
|
if (task == "dump") return;
|
||||||
if (task == "pred" || task == "dumppath"){
|
if (task == "pred" || task == "dumppath"){
|
||||||
data.CacheLoad(test_path.c_str(), silent != 0, use_buffer != 0);
|
data.CacheLoad(test_path.c_str(), silent != 0, use_buffer != 0);
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
// training
|
// training
|
||||||
data.CacheLoad(train_path.c_str(), silent != 0, use_buffer != 0);
|
data.CacheLoad(train_path.c_str(), silent != 0, use_buffer != 0);
|
||||||
utils::Assert(eval_data_names.size() == eval_data_paths.size());
|
utils::Assert(eval_data_names.size() == eval_data_paths.size());
|
||||||
@ -133,7 +135,8 @@ namespace xgboost{
|
|||||||
utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
|
utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
|
||||||
learner.LoadModel(fi);
|
learner.LoadModel(fi);
|
||||||
fi.Close();
|
fi.Close();
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
utils::Assert(task == "train", "model_in not specified");
|
utils::Assert(task == "train", "model_in not specified");
|
||||||
learner.InitModel();
|
learner.InitModel();
|
||||||
}
|
}
|
||||||
@ -156,7 +159,8 @@ namespace xgboost{
|
|||||||
if (save_period == 0 || num_round % save_period != 0){
|
if (save_period == 0 || num_round % save_period != 0){
|
||||||
if (model_out == "NULL"){
|
if (model_out == "NULL"){
|
||||||
this->SaveModel(num_round - 1);
|
this->SaveModel(num_round - 1);
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
this->SaveModel(model_out.c_str());
|
this->SaveModel(model_out.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -177,7 +181,8 @@ namespace xgboost{
|
|||||||
if (!strcmp(cfg_batch.name(), "run")){
|
if (!strcmp(cfg_batch.name(), "run")){
|
||||||
learner.UpdateInteract(interact_action);
|
learner.UpdateInteract(interact_action);
|
||||||
batch_action += 1;
|
batch_action += 1;
|
||||||
} else{
|
}
|
||||||
|
else{
|
||||||
learner.SetParam(cfg_batch.name(), cfg_batch.val());
|
learner.SetParam(cfg_batch.name(), cfg_batch.val());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -94,7 +94,8 @@ namespace xgboost{
|
|||||||
case '\"':
|
case '\"':
|
||||||
if (i == 0){
|
if (i == 0){
|
||||||
ParseStr(tok); ch_buf = fgetc(fi); return new_line;
|
ParseStr(tok); ch_buf = fgetc(fi); return new_line;
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
Error("token followed directly by string");
|
Error("token followed directly by string");
|
||||||
}
|
}
|
||||||
case '=':
|
case '=':
|
||||||
@ -102,7 +103,8 @@ namespace xgboost{
|
|||||||
ch_buf = fgetc(fi);
|
ch_buf = fgetc(fi);
|
||||||
tok[0] = '=';
|
tok[0] = '=';
|
||||||
tok[1] = '\0';
|
tok[1] = '\0';
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
tok[i] = '\0';
|
tok[i] = '\0';
|
||||||
}
|
}
|
||||||
return new_line;
|
return new_line;
|
||||||
@ -155,7 +157,8 @@ namespace xgboost{
|
|||||||
if (priority == 0){
|
if (priority == 0){
|
||||||
names.push_back(std::string(name));
|
names.push_back(std::string(name));
|
||||||
values.push_back(std::string(val));
|
values.push_back(std::string(val));
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
names_high.push_back(std::string(name));
|
names_high.push_back(std::string(name));
|
||||||
values_high.push_back(std::string(val));
|
values_high.push_back(std::string(val));
|
||||||
}
|
}
|
||||||
@ -184,7 +187,8 @@ namespace xgboost{
|
|||||||
size_t i = idx - 1;
|
size_t i = idx - 1;
|
||||||
if (i >= names.size()){
|
if (i >= names.size()){
|
||||||
return names_high[i - names.size()].c_str();
|
return names_high[i - names.size()].c_str();
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
return names[i].c_str();
|
return names[i].c_str();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -197,7 +201,8 @@ namespace xgboost{
|
|||||||
size_t i = idx - 1;
|
size_t i = idx - 1;
|
||||||
if (i >= values.size()){
|
if (i >= values.size()){
|
||||||
return values_high[i - values.size()].c_str();
|
return values_high[i - values.size()].c_str();
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
return values[i].c_str();
|
return values[i].c_str();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -50,7 +50,8 @@ namespace xgboost{
|
|||||||
if (!UseAcList){
|
if (!UseAcList){
|
||||||
rptr.clear();
|
rptr.clear();
|
||||||
rptr.resize(nrows + 1, 0);
|
rptr.resize(nrows + 1, 0);
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
Assert(nrows + 1 == rptr.size(), "rptr must be initialized already");
|
Assert(nrows + 1 == rptr.size(), "rptr must be initialized already");
|
||||||
this->Cleanup();
|
this->Cleanup();
|
||||||
}
|
}
|
||||||
@ -79,7 +80,8 @@ namespace xgboost{
|
|||||||
rptr[i] = start;
|
rptr[i] = start;
|
||||||
start += rlen;
|
start += rlen;
|
||||||
}
|
}
|
||||||
}else{
|
}
|
||||||
|
else{
|
||||||
// case with active list
|
// case with active list
|
||||||
std::sort(aclist.begin(), aclist.end());
|
std::sort(aclist.begin(), aclist.end());
|
||||||
|
|
||||||
|
|||||||
@ -10,7 +10,7 @@
|
|||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#else
|
#else
|
||||||
//#warning "OpenMP is not available, compile to single thread code"
|
#warning "OpenMP is not available, compile to single thread code"
|
||||||
inline int omp_get_thread_num() { return 0; }
|
inline int omp_get_thread_num() { return 0; }
|
||||||
inline int omp_get_num_threads() { return 1; }
|
inline int omp_get_num_threads() { return 1; }
|
||||||
inline void omp_set_num_threads(int nthread) {}
|
inline void omp_set_num_threads(int nthread) {}
|
||||||
|
|||||||
@ -88,7 +88,8 @@ namespace xgboost{
|
|||||||
u = NextDouble();
|
u = NextDouble();
|
||||||
} while (u == 0.0);
|
} while (u == 0.0);
|
||||||
return SampleGamma(alpha + 1.0, beta) * pow(u, 1.0 / alpha);
|
return SampleGamma(alpha + 1.0, beta) * pow(u, 1.0 / alpha);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
double d, c, x, v, u;
|
double d, c, x, v, u;
|
||||||
d = alpha - 1.0 / 3.0;
|
d = alpha - 1.0 / 3.0;
|
||||||
c = 1.0 / sqrt(9.0 * d);
|
c = 1.0 / sqrt(9.0 * d);
|
||||||
|
|||||||
@ -62,16 +62,6 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
return fp;
|
return fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! \brief replace fopen, */
|
|
||||||
inline FILE *FopenTry( const char *fname , const char *flag ){
|
|
||||||
FILE *fp = fopen64( fname , flag );
|
|
||||||
if( fp == NULL ){
|
|
||||||
fprintf( stderr, "can not open file \"%s\"\n",fname );
|
|
||||||
exit( -1 );
|
|
||||||
}
|
|
||||||
return fp;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user