initial correction for vec tree

This commit is contained in:
tqchen 2014-08-24 18:48:19 -07:00
parent 7874c2559b
commit c640485f1d
2 changed files with 40 additions and 23 deletions

View File

@ -117,17 +117,13 @@ class GBTree : public IGradBooster<FMatrix> {
}
std::vector<float> &preds = *out_preds;
preds.resize(0);
const size_t stride = info.num_row * mparam.num_output_group;
preds.resize(stride * (mparam.size_leaf_vector+1));
// start collecting the prediction
utils::IIterator<SparseBatch> *iter = fmat.RowIterator();
iter->BeforeFirst();
while (iter->Next()) {
const SparseBatch &batch = iter->Value();
utils::Assert(batch.base_rowid * mparam.num_output_group == preds.size(),
"base_rowid is not set correctly");
// output convention: nrow * k, where nrow is number of rows
// k is number of group
preds.resize(preds.size() + batch.size * mparam.num_output_group);
// parallel over local batch
const unsigned nsize = static_cast<unsigned>(batch.size);
#pragma omp parallel for schedule(static)
@ -135,13 +131,13 @@ class GBTree : public IGradBooster<FMatrix> {
const int tid = omp_get_thread_num();
tree::RegTree::FVec &feats = thread_temp[tid];
const size_t ridx = batch.base_rowid + i;
const unsigned root_idx = info.GetRoot(ridx);
utils::Assert(ridx < info.num_row, "data row index exceed bound");
// loop over output groups
for (int gid = 0; gid < mparam.num_output_group; ++gid) {
preds[ridx * mparam.num_output_group + gid] =
this->Pred(batch[i],
buffer_offset < 0 ? -1 : buffer_offset+ridx,
gid, root_idx, &feats);
this->Pred(batch[i],
buffer_offset < 0 ? -1 : buffer_offset + ridx,
gid, info.GetRoot(ridx), &feats,
&preds[ridx * mparam.num_output_group + gid], stride);
}
}
}
@ -211,24 +207,34 @@ class GBTree : public IGradBooster<FMatrix> {
mparam.num_trees += tparam.num_parallel_tree;
}
// make a prediction for a single instance
inline float Pred(const SparseBatch::Inst &inst,
int64_t buffer_index,
int bst_group,
unsigned root_index,
tree::RegTree::FVec *p_feats) {
inline void Pred(const SparseBatch::Inst &inst,
int64_t buffer_index,
int bst_group,
unsigned root_index,
tree::RegTree::FVec *p_feats,
float *out_pred, size_t stride) {
size_t itop = 0;
float psum = 0.0f;
// sum of leaf vector
std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
const int bid = mparam.BufferOffset(buffer_index, bst_group);
// load buffered results if any
if (bid >= 0) {
itop = pred_counter[bid];
psum = pred_buffer[bid];
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
vec_psum[i] = pred_buffer[bid + i + 1];
}
}
if (itop != trees.size()) {
p_feats->Fill(inst);
for (size_t i = itop; i < trees.size(); ++i) {
if (tree_info[i] == bst_group) {
psum += trees[i]->Predict(*p_feats, root_index);
int tid = trees[i]->GetLeafIndex(*p_feats, root_index);
psum += (*trees[i])[tid].leaf_value();
for (int j = 0; j < mparam.size_leaf_vector; ++j) {
vec_psum[j] += trees[i]->leafvec(tid)[j];
}
}
}
p_feats->Drop(inst);
@ -237,8 +243,14 @@ class GBTree : public IGradBooster<FMatrix> {
if (bid >= 0) {
pred_counter[bid] = static_cast<unsigned>(trees.size());
pred_buffer[bid] = psum;
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
pred_buffer[bid + i + 1] = vec_psum[i];
}
}
out_pred[0] = psum;
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
out_pred[stride * (i + 1)] = vec_psum[i];
}
return psum;
}
// --- data structure ---
/*! \brief training parameters */
@ -291,14 +303,17 @@ class GBTree : public IGradBooster<FMatrix> {
* suppose we have n instance and k group, output will be k*n
*/
int num_output_group;
/*! \brief size of leaf vector needed in tree */
int size_leaf_vector;
/*! \brief reserved parameters */
int reserved[32];
int reserved[31];
/*! \brief constructor */
ModelParam(void) {
num_trees = 0;
num_roots = num_feature = 0;
num_pbuffer = 0;
num_output_group = 1;
size_leaf_vector = 0;
memset(reserved, 0, sizeof(reserved));
}
/*!
@ -311,10 +326,11 @@ class GBTree : public IGradBooster<FMatrix> {
if (!strcmp("num_output_group", name)) num_output_group = atol(val);
if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
if (!strcmp("bst:size_leaf_vector", name)) size_leaf_vector = atoi(val);
}
/*! \return size of prediction buffer actually needed */
inline size_t PredBufferSize(void) const {
return num_output_group * num_pbuffer;
return num_output_group * num_pbuffer * (size_leaf_vector + 1);
}
/*!
* \brief get the buffer offset given a buffer index and group id
@ -323,7 +339,7 @@ class GBTree : public IGradBooster<FMatrix> {
inline size_t BufferOffset(int64_t buffer_index, int bst_group) const {
if (buffer_index < 0) return -1;
utils::Check(buffer_index < num_pbuffer, "buffer_index exceed num_pbuffer");
return buffer_index + num_pbuffer * bst_group;
return (buffer_index + num_pbuffer * bst_group) * (size_leaf_vector + 1);
}
};
// training parameter

View File

@ -24,9 +24,10 @@ template<typename Derived>
struct EvalEWiseBase : public IEvaluator {
virtual float Eval(const std::vector<float> &preds,
const MetaInfo &info) const {
utils::Check(preds.size() == info.labels.size(),
utils::Check(info.labels.size() != 0, "label set cannot be empty");
utils::Check(preds.size() % info.labels.size() == 0,
"label and prediction size not match");
const unsigned ndata = static_cast<unsigned>(preds.size());
const unsigned ndata = static_cast<unsigned>(info.labels.size());
float sum = 0.0, wsum = 0.0;
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
for (unsigned i = 0; i < ndata; ++i) {