add base_margin
This commit is contained in:
@@ -233,7 +233,7 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
pred_counter[bid] = static_cast<unsigned>(trees.size());
|
||||
pred_buffer[bid] = psum;
|
||||
}
|
||||
return psum + mparam.base_score;
|
||||
return psum;
|
||||
}
|
||||
// initialize thread local space for prediction
|
||||
inline void InitThreadTemp(int nthread) {
|
||||
@@ -296,8 +296,6 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
};
|
||||
/*! \brief model parameters */
|
||||
struct ModelParam {
|
||||
/*! \brief base prediction score of everything */
|
||||
float base_score;
|
||||
/*! \brief number of trees */
|
||||
int num_trees;
|
||||
/*! \brief number of root: default 0, means single tree */
|
||||
@@ -316,7 +314,6 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
int reserved[32];
|
||||
/*! \brief constructor */
|
||||
ModelParam(void) {
|
||||
base_score = 0.0f;
|
||||
num_trees = 0;
|
||||
num_roots = num_feature = 0;
|
||||
num_pbuffer = 0;
|
||||
@@ -329,7 +326,6 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
inline void SetParam(const char *name, const char *val) {
|
||||
if (!strcmp("base_score", name)) base_score = static_cast<float>(atof(val));
|
||||
if (!strcmp("num_pbuffer", name)) num_pbuffer = atol(val);
|
||||
if (!strcmp("num_output_group", name)) num_output_group = atol(val);
|
||||
if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
|
||||
|
||||
@@ -110,10 +110,13 @@ class DMatrixSimple : public DataMatrix {
|
||||
"DMatrix: group data does not match the number of rows in features");
|
||||
}
|
||||
std::string wname = name + ".weight";
|
||||
if (info.TryLoadWeight(wname.c_str(), silent)) {
|
||||
if (info.TryLoadFloatInfo("weight", wname.c_str(), silent)) {
|
||||
utils::Check(info.weights.size() == info.num_row,
|
||||
"DMatrix: weight data does not match the number of rows in features");
|
||||
}
|
||||
std::string mname = name + ".base_margin";
|
||||
if (info.TryLoadFloatInfo("base_margin", mname.c_str(), silent)) {
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief load from binary file
|
||||
|
||||
@@ -33,6 +33,15 @@ struct MetaInfo {
|
||||
* can be used for multi task setting
|
||||
*/
|
||||
std::vector<unsigned> root_index;
|
||||
/*!
|
||||
* \brief initialized margins,
|
||||
* if specified, xgboost will start from this init margin
|
||||
* can be used to specify initial prediction to boost from
|
||||
*/
|
||||
std::vector<float> base_margin;
|
||||
/*! \brief version flag, used to check version of this info */
|
||||
static const int kVersion = 0;
|
||||
// constructor
|
||||
MetaInfo(void) : num_row(0), num_col(0) {}
|
||||
/*! \brief clear all the information */
|
||||
inline void Clear(void) {
|
||||
@@ -40,6 +49,7 @@ struct MetaInfo {
|
||||
group_ptr.clear();
|
||||
weights.clear();
|
||||
root_index.clear();
|
||||
base_margin.clear();
|
||||
num_row = num_col = 0;
|
||||
}
|
||||
/*! \brief get weight of each instances */
|
||||
@@ -59,20 +69,26 @@ struct MetaInfo {
|
||||
}
|
||||
}
|
||||
inline void SaveBinary(utils::IStream &fo) const {
|
||||
int version = kVersion;
|
||||
fo.Write(&version, sizeof(version));
|
||||
fo.Write(&num_row, sizeof(num_row));
|
||||
fo.Write(&num_col, sizeof(num_col));
|
||||
fo.Write(labels);
|
||||
fo.Write(group_ptr);
|
||||
fo.Write(weights);
|
||||
fo.Write(root_index);
|
||||
fo.Write(base_margin);
|
||||
}
|
||||
inline void LoadBinary(utils::IStream &fi) {
|
||||
int version;
|
||||
utils::Check(fi.Read(&version, sizeof(version)), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&num_row, sizeof(num_row)), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&num_col, sizeof(num_col)), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&root_index), "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&base_margin), "MetaInfo: invalid format");
|
||||
}
|
||||
// try to load group information from file, if exists
|
||||
inline bool TryLoadGroup(const char* fname, bool silent = false) {
|
||||
@@ -89,8 +105,19 @@ struct MetaInfo {
|
||||
fclose(fi);
|
||||
return true;
|
||||
}
|
||||
inline std::vector<float>& GetInfo(const char *field) {
|
||||
if (!strcmp(field, "label")) return labels;
|
||||
if (!strcmp(field, "weight")) return weights;
|
||||
if (!strcmp(field, "base_margin")) return base_margin;
|
||||
utils::Error("unknown field %s", field);
|
||||
return labels;
|
||||
}
|
||||
inline const std::vector<float>& GetInfo(const char *field) const {
|
||||
return ((MetaInfo*)this)->GetInfo(field);
|
||||
}
|
||||
// try to load weight information from file, if exists
|
||||
inline bool TryLoadWeight(const char* fname, bool silent = false) {
|
||||
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
|
||||
std::vector<float> &weights = this->GetInfo(field);
|
||||
FILE *fi = fopen64(fname, "r");
|
||||
if (fi == NULL) return false;
|
||||
float wt;
|
||||
@@ -98,7 +125,7 @@ struct MetaInfo {
|
||||
weights.push_back(wt);
|
||||
}
|
||||
if (!silent) {
|
||||
printf("loading weight from %s\n", fname);
|
||||
printf("loading %s from %s\n", field, fname);
|
||||
}
|
||||
fclose(fi);
|
||||
return true;
|
||||
|
||||
@@ -97,9 +97,6 @@ class BoostLearner {
|
||||
this->InitObjGBM();
|
||||
// reset the base score
|
||||
mparam.base_score = obj_->ProbToMargin(mparam.base_score);
|
||||
char tmp[32];
|
||||
snprintf(tmp, sizeof(tmp), "%g", mparam.base_score);
|
||||
this->SetParam("base_score", tmp);
|
||||
// initialize GBM model
|
||||
gbm_->InitModel();
|
||||
}
|
||||
@@ -199,12 +196,16 @@ class BoostLearner {
|
||||
/*!
|
||||
* \brief get prediction
|
||||
* \param data input data
|
||||
* \param output_margin whether to only predict margin value instead of transformed prediction
|
||||
* \param out_preds output vector that stores the prediction
|
||||
*/
|
||||
inline void Predict(const DMatrix<FMatrix> &data,
|
||||
bool output_margin,
|
||||
std::vector<float> *out_preds) const {
|
||||
this->PredictRaw(data, out_preds);
|
||||
obj_->PredTransform(out_preds);
|
||||
if (!output_margin) {
|
||||
obj_->PredTransform(out_preds);
|
||||
}
|
||||
}
|
||||
/*! \brief dump model out */
|
||||
inline std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
|
||||
@@ -236,6 +237,22 @@ class BoostLearner {
|
||||
std::vector<float> *out_preds) const {
|
||||
gbm_->Predict(data.fmat, this->FindBufferOffset(data),
|
||||
data.info.root_index, out_preds);
|
||||
// add base margin
|
||||
std::vector<float> &preds = *out_preds;
|
||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
||||
if (data.info.base_margin.size() != 0) {
|
||||
utils::Check(preds.size() == data.info.base_margin.size(),
|
||||
"base_margin.size does not match with prediction size");
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned j = 0; j < ndata; ++j) {
|
||||
preds[j] += data.info.base_margin[j];
|
||||
}
|
||||
} else {
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (unsigned j = 0; j < ndata; ++j) {
|
||||
preds[j] += mparam.base_score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*! \brief training parameter for regression */
|
||||
|
||||
@@ -49,6 +49,7 @@ class BoostLearnTask{
|
||||
if (!strcmp("silent", name)) silent = atoi(val);
|
||||
if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
|
||||
if (!strcmp("num_round", name)) num_round = atoi(val);
|
||||
if (!strcmp("pred_margin", name)) pred_margin = atoi(val);
|
||||
if (!strcmp("save_period", name)) save_period = atoi(val);
|
||||
if (!strcmp("eval_train", name)) eval_train = atoi(val);
|
||||
if (!strcmp("task", name)) task = val;
|
||||
@@ -77,6 +78,7 @@ class BoostLearnTask{
|
||||
num_round = 10;
|
||||
save_period = 0;
|
||||
eval_train = 0;
|
||||
pred_margin = 0;
|
||||
dump_model_stats = 0;
|
||||
task = "train";
|
||||
model_in = "NULL";
|
||||
@@ -184,7 +186,7 @@ class BoostLearnTask{
|
||||
inline void TaskPred(void) {
|
||||
std::vector<float> preds;
|
||||
if (!silent) printf("start prediction...\n");
|
||||
learner.Predict(*data, &preds);
|
||||
learner.Predict(*data, pred_margin != 0, &preds);
|
||||
if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
|
||||
FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
|
||||
for (size_t i = 0; i < preds.size(); i++) {
|
||||
@@ -193,37 +195,39 @@ class BoostLearnTask{
|
||||
fclose(fo);
|
||||
}
|
||||
private:
|
||||
/* \brief whether silent */
|
||||
/*! \brief whether silent */
|
||||
int silent;
|
||||
/* \brief whether use auto binary buffer */
|
||||
/*! \brief whether use auto binary buffer */
|
||||
int use_buffer;
|
||||
/* \brief whether evaluate training statistics */
|
||||
/*! \brief whether evaluate training statistics */
|
||||
int eval_train;
|
||||
/* \brief number of boosting iterations */
|
||||
/*! \brief number of boosting iterations */
|
||||
int num_round;
|
||||
/* \brief the period to save the model, 0 means only save the final round model */
|
||||
/*! \brief the period to save the model, 0 means only save the final round model */
|
||||
int save_period;
|
||||
/* \brief the path of training/test data set */
|
||||
/*! \brief the path of training/test data set */
|
||||
std::string train_path, test_path;
|
||||
/* \brief the path of test model file, or file to restart training */
|
||||
/*! \brief the path of test model file, or file to restart training */
|
||||
std::string model_in;
|
||||
/* \brief the path of final model file, to be saved */
|
||||
/*! \brief the path of final model file, to be saved */
|
||||
std::string model_out;
|
||||
/* \brief the path of directory containing the saved models */
|
||||
/*! \brief the path of directory containing the saved models */
|
||||
std::string model_dir_path;
|
||||
/* \brief task to perform */
|
||||
/*! \brief task to perform */
|
||||
std::string task;
|
||||
/* \brief name of predict file */
|
||||
/*! \brief name of predict file */
|
||||
std::string name_pred;
|
||||
/* \brief whether dump statistics along with model */
|
||||
/*!\brief whether to directly output margin value */
|
||||
int pred_margin;
|
||||
/*! \brief whether dump statistics along with model */
|
||||
int dump_model_stats;
|
||||
/* \brief name of feature map */
|
||||
/*! \brief name of feature map */
|
||||
std::string name_fmap;
|
||||
/* \brief name of dump file */
|
||||
/*! \brief name of dump file */
|
||||
std::string name_dump;
|
||||
/* \brief the paths of validation data sets */
|
||||
/*! \brief the paths of validation data sets */
|
||||
std::vector<std::string> eval_data_paths;
|
||||
/* \brief the names of the evaluation data used in output log */
|
||||
/*! \brief the names of the evaluation data used in output log */
|
||||
std::vector<std::string> eval_data_names;
|
||||
private:
|
||||
io::DataMatrix* data;
|
||||
|
||||
Reference in New Issue
Block a user