initial correction for vec tree
This commit is contained in:
parent
7874c2559b
commit
c640485f1d
@ -117,17 +117,13 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
}
|
||||
|
||||
std::vector<float> &preds = *out_preds;
|
||||
preds.resize(0);
|
||||
const size_t stride = info.num_row * mparam.num_output_group;
|
||||
preds.resize(stride * (mparam.size_leaf_vector+1));
|
||||
// start collecting the prediction
|
||||
utils::IIterator<SparseBatch> *iter = fmat.RowIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const SparseBatch &batch = iter->Value();
|
||||
utils::Assert(batch.base_rowid * mparam.num_output_group == preds.size(),
|
||||
"base_rowid is not set correctly");
|
||||
// output convention: nrow * k, where nrow is number of rows
|
||||
// k is number of group
|
||||
preds.resize(preds.size() + batch.size * mparam.num_output_group);
|
||||
// parallel over local batch
|
||||
const unsigned nsize = static_cast<unsigned>(batch.size);
|
||||
#pragma omp parallel for schedule(static)
|
||||
@ -135,13 +131,13 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
const int tid = omp_get_thread_num();
|
||||
tree::RegTree::FVec &feats = thread_temp[tid];
|
||||
const size_t ridx = batch.base_rowid + i;
|
||||
const unsigned root_idx = info.GetRoot(ridx);
|
||||
utils::Assert(ridx < info.num_row, "data row index exceed bound");
|
||||
// loop over output groups
|
||||
for (int gid = 0; gid < mparam.num_output_group; ++gid) {
|
||||
preds[ridx * mparam.num_output_group + gid] =
|
||||
this->Pred(batch[i],
|
||||
buffer_offset < 0 ? -1 : buffer_offset+ridx,
|
||||
gid, root_idx, &feats);
|
||||
this->Pred(batch[i],
|
||||
buffer_offset < 0 ? -1 : buffer_offset + ridx,
|
||||
gid, info.GetRoot(ridx), &feats,
|
||||
&preds[ridx * mparam.num_output_group + gid], stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -211,24 +207,34 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
mparam.num_trees += tparam.num_parallel_tree;
|
||||
}
|
||||
// make a prediction for a single instance
|
||||
inline float Pred(const SparseBatch::Inst &inst,
|
||||
int64_t buffer_index,
|
||||
int bst_group,
|
||||
unsigned root_index,
|
||||
tree::RegTree::FVec *p_feats) {
|
||||
inline void Pred(const SparseBatch::Inst &inst,
|
||||
int64_t buffer_index,
|
||||
int bst_group,
|
||||
unsigned root_index,
|
||||
tree::RegTree::FVec *p_feats,
|
||||
float *out_pred, size_t stride) {
|
||||
size_t itop = 0;
|
||||
float psum = 0.0f;
|
||||
// sum of leaf vector
|
||||
std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
|
||||
const int bid = mparam.BufferOffset(buffer_index, bst_group);
|
||||
// load buffered results if any
|
||||
if (bid >= 0) {
|
||||
itop = pred_counter[bid];
|
||||
psum = pred_buffer[bid];
|
||||
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
|
||||
vec_psum[i] = pred_buffer[bid + i + 1];
|
||||
}
|
||||
}
|
||||
if (itop != trees.size()) {
|
||||
p_feats->Fill(inst);
|
||||
for (size_t i = itop; i < trees.size(); ++i) {
|
||||
if (tree_info[i] == bst_group) {
|
||||
psum += trees[i]->Predict(*p_feats, root_index);
|
||||
int tid = trees[i]->GetLeafIndex(*p_feats, root_index);
|
||||
psum += (*trees[i])[tid].leaf_value();
|
||||
for (int j = 0; j < mparam.size_leaf_vector; ++j) {
|
||||
vec_psum[j] += trees[i]->leafvec(tid)[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
p_feats->Drop(inst);
|
||||
@ -237,8 +243,14 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
if (bid >= 0) {
|
||||
pred_counter[bid] = static_cast<unsigned>(trees.size());
|
||||
pred_buffer[bid] = psum;
|
||||
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
|
||||
pred_buffer[bid + i + 1] = vec_psum[i];
|
||||
}
|
||||
}
|
||||
out_pred[0] = psum;
|
||||
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
|
||||
out_pred[stride * (i + 1)] = vec_psum[i];
|
||||
}
|
||||
return psum;
|
||||
}
|
||||
// --- data structure ---
|
||||
/*! \brief training parameters */
|
||||
@ -291,14 +303,17 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
* suppose we have n instance and k group, output will be k*n
|
||||
*/
|
||||
int num_output_group;
|
||||
/*! \brief size of leaf vector needed in tree */
|
||||
int size_leaf_vector;
|
||||
/*! \brief reserved parameters */
|
||||
int reserved[32];
|
||||
int reserved[31];
|
||||
/*! \brief constructor */
|
||||
ModelParam(void) {
|
||||
num_trees = 0;
|
||||
num_roots = num_feature = 0;
|
||||
num_pbuffer = 0;
|
||||
num_output_group = 1;
|
||||
size_leaf_vector = 0;
|
||||
memset(reserved, 0, sizeof(reserved));
|
||||
}
|
||||
/*!
|
||||
@ -311,10 +326,11 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
if (!strcmp("num_output_group", name)) num_output_group = atol(val);
|
||||
if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
|
||||
if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
|
||||
if (!strcmp("bst:size_leaf_vector", name)) size_leaf_vector = atoi(val);
|
||||
}
|
||||
/*! \return size of prediction buffer actually needed */
|
||||
inline size_t PredBufferSize(void) const {
|
||||
return num_output_group * num_pbuffer;
|
||||
return num_output_group * num_pbuffer * (size_leaf_vector + 1);
|
||||
}
|
||||
/*!
|
||||
* \brief get the buffer offset given a buffer index and group id
|
||||
@ -323,7 +339,7 @@ class GBTree : public IGradBooster<FMatrix> {
|
||||
inline size_t BufferOffset(int64_t buffer_index, int bst_group) const {
|
||||
if (buffer_index < 0) return -1;
|
||||
utils::Check(buffer_index < num_pbuffer, "buffer_index exceed num_pbuffer");
|
||||
return buffer_index + num_pbuffer * bst_group;
|
||||
return (buffer_index + num_pbuffer * bst_group) * (size_leaf_vector + 1);
|
||||
}
|
||||
};
|
||||
// training parameter
|
||||
|
||||
@ -24,9 +24,10 @@ template<typename Derived>
|
||||
struct EvalEWiseBase : public IEvaluator {
|
||||
virtual float Eval(const std::vector<float> &preds,
|
||||
const MetaInfo &info) const {
|
||||
utils::Check(preds.size() == info.labels.size(),
|
||||
utils::Check(info.labels.size() != 0, "label set cannot be empty");
|
||||
utils::Check(preds.size() % info.labels.size() == 0,
|
||||
"label and prediction size not match");
|
||||
const unsigned ndata = static_cast<unsigned>(preds.size());
|
||||
const unsigned ndata = static_cast<unsigned>(info.labels.size());
|
||||
float sum = 0.0, wsum = 0.0;
|
||||
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
|
||||
for (unsigned i = 0; i < ndata; ++i) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user