make xgcombine buffer work

This commit is contained in:
tqchen 2014-08-17 22:49:36 -07:00
parent 4ed4b08146
commit 0d9a8c042c
5 changed files with 205 additions and 207 deletions

View File

@ -1,6 +1,6 @@
export CC = clang export CC = gcc
export CXX = clang++ export CXX = g++
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp
# specify tensor path # specify tensor path
BIN = xgboost BIN = xgboost

View File

@ -140,7 +140,7 @@ extern "C"{
for (size_t i = 0; i < len; ++i) { for (size_t i = 0; i < len; ++i) {
const int ridx = idxset[i]; const int ridx = idxset[i];
SparseBatch::Inst inst = batch[ridx]; SparseBatch::Inst inst = batch[ridx];
utils::Check(ridx < batch.size, "slice index exceed number of rows"); utils::Check(static_cast<size_t>(ridx) < batch.size, "slice index exceed number of rows");
ret.row_data_.resize(ret.row_data_.size() + inst.length); ret.row_data_.resize(ret.row_data_.size() + inst.length);
memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data, memcpy(&ret.row_data_[ret.row_ptr_.back()], inst.data,
sizeof(SparseBatch::Entry) * inst.length); sizeof(SparseBatch::Entry) * inst.length);

View File

@ -171,7 +171,7 @@ class SoftmaxMultiClassObj : public IObjFunction {
rec[k] = preds[j * nclass + k]; rec[k] = preds[j * nclass + k];
} }
Softmax(&rec); Softmax(&rec);
unsigned label = static_cast<unsigned>(info.labels[j]); int label = static_cast<int>(info.labels[j]);
utils::Check(label < nclass, "SoftmaxMultiClassObj: label exceed num_class"); utils::Check(label < nclass, "SoftmaxMultiClassObj: label exceed num_class");
const float wt = info.GetWeight(j); const float wt = info.GetWeight(j);
for (int k = 0; k < nclass; ++k) { for (int k = 0; k < nclass; ++k) {

View File

@ -12,7 +12,6 @@ export LDFLAGS= -pthread -lm
xgcombine_buffer : xgcombine_buffer.cpp xgcombine_buffer : xgcombine_buffer.cpp
$(BIN) : $(BIN) :
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)

View File

@ -10,12 +10,11 @@
#include <cstring> #include <cstring>
#include <ctime> #include <ctime>
#include <cmath> #include <cmath>
#include "../regrank/xgboost_regrank_data.h" #include "../src/io/simple_dmatrix-inl.hpp"
#include "../utils/xgboost_utils.h" #include "../src/utils/utils.h"
using namespace xgboost; using namespace xgboost;
using namespace xgboost::booster; using namespace xgboost::io;
using namespace xgboost::regrank;
// header in dataset // header in dataset
struct Header{ struct Header{
@ -69,14 +68,14 @@ inline int readnum( std::vector<Header> &vec ){
inline void vskip( std::vector<Header> &vec ){ inline void vskip( std::vector<Header> &vec ){
for( size_t i = 0; i < vec.size(); i ++ ){ for( size_t i = 0; i < vec.size(); i ++ ){
if( !vec[i].is_dense ){ if( !vec[i].is_dense ){
utils::Assert( fscanf( vec[i].fi, "%*d%*[^\n]\n" ) >= 0 ); utils::Assert( fscanf( vec[i].fi, "%*d%*[^\n]\n" ) >= 0, "sparse" );
}else{ }else{
utils::Assert( fscanf( vec[i].fi, "%*f\n" ) >= 0 ); utils::Assert( fscanf( vec[i].fi, "%*f\n" ) >= 0, "dense" );
} }
} }
} }
class DataLoader: public DMatrix{ class DataLoader: public DMatrixSimple {
public: public:
// whether to do node and edge feature renormalization // whether to do node and edge feature renormalization
int rescale; int rescale;
@ -84,44 +83,43 @@ public:
public: public:
FILE *fp, *fwlist, *fgroup, *fweight; FILE *fp, *fwlist, *fgroup, *fweight;
std::vector<Header> fheader; std::vector<Header> fheader;
std::vector<FMatrixS::REntry> entry;
DataLoader( void ){ DataLoader( void ){
rescale = 0; rescale = 0;
linelimit = -1; linelimit = -1;
fp = NULL; fwlist = NULL; fgroup = NULL; fweight = NULL; fp = NULL; fwlist = NULL; fgroup = NULL; fweight = NULL;
} }
private: private:
inline void Load( std::vector<unsigned> &findex, std::vector<float> &fvalue, std::vector<Header> &vec ){ inline void Load( std::vector<SparseBatch::Entry> &feats, std::vector<Header> &vec ){
unsigned fidx; float fv; SparseBatch::Entry e;
for( size_t i = 0; i < vec.size(); i ++ ){ for( size_t i = 0; i < vec.size(); i ++ ){
if( !vec[i].is_dense ) { if( !vec[i].is_dense ) {
for( int j = 0; j < vec[i].tmp_num; j ++ ){ for( int j = 0; j < vec[i].tmp_num; j ++ ){
utils::Assert( fscanf ( vec[i].fi, "%u:%f", &fidx, &fv ) == 2, "Error when load feat" ); utils::Assert( fscanf ( vec[i].fi, "%u:%f", &e.findex, &e.fvalue ) == 2, "Error when load feat" );
vec[i].CheckBase( fidx ); vec[i].CheckBase( e.findex );
fidx += vec[i].base; e.findex += vec[i].base;
findex.push_back( fidx ); fvalue.push_back( fv ); feats.push_back(e);
} }
}else{ }else{
utils::Assert( fscanf ( vec[i].fi, "%f", &fv ) == 1, "load feat" ); utils::Assert( fscanf ( vec[i].fi, "%f", &e.fvalue ) == 1, "load feat" );
fidx = vec[i].base; e.findex = vec[i].base;
findex.push_back( fidx ); fvalue.push_back( fv ); feats.push_back(e);
} }
} }
} }
inline void DoRescale( std::vector<float> &vec ){ inline void DoRescale( std::vector<SparseBatch::Entry> &vec ){
double sum = 0.0; double sum = 0.0;
for( size_t i = 0; i < vec.size(); i ++ ){ for( size_t i = 0; i < vec.size(); i ++ ){
sum += vec[i] * vec[i]; sum += vec[i].fvalue * vec[i].fvalue;
} }
sum = sqrt( sum ); sum = sqrt( sum );
for( size_t i = 0; i < vec.size(); i ++ ){ for( size_t i = 0; i < vec.size(); i ++ ){
vec[i] /= sum; vec[i].fvalue /= sum;
} }
} }
public: public:
// basically we are loading all the data inside // basically we are loading all the data inside
inline void Load( void ){ inline void Load( void ){
this->data.Clear(); this->Clear();
float label, weight = 0.0f; float label, weight = 0.0f;
unsigned ngleft = 0, ngacc = 0; unsigned ngleft = 0, ngacc = 0;
@ -132,39 +130,40 @@ public:
while( fscanf( fp, "%f", &label ) == 1 ){ while( fscanf( fp, "%f", &label ) == 1 ){
if( ngleft == 0 && fgroup != NULL ){ if( ngleft == 0 && fgroup != NULL ){
utils::Assert( fscanf( fgroup, "%u", &ngleft ) == 1 ); utils::Assert( fscanf( fgroup, "%u", &ngleft ) == 1, "group" );
} }
if( fweight != NULL ){ if( fweight != NULL ){
utils::Assert( fscanf( fweight, "%f", &weight ) == 1 ); utils::Assert( fscanf( fweight, "%f", &weight ) == 1, "weight" );
} }
ngleft -= 1; ngacc += 1; ngleft -= 1; ngacc += 1;
int pass = 1; int pass = 1;
if( fwlist != NULL ){ if( fwlist != NULL ){
utils::Assert( fscanf( fwlist, "%u", &pass ) ==1 ); utils::Assert( fscanf( fwlist, "%u", &pass ) ==1, "pass" );
} }
if( pass == 0 ){ if( pass == 0 ){
vskip( fheader ); ngacc -= 1; vskip( fheader ); ngacc -= 1;
}else{ }else{
const int nfeat = readnum( fheader ); const int nfeat = readnum( fheader );
std::vector<unsigned> findex;
std::vector<float> fvalue; std::vector<SparseBatch::Entry> feats;
// pairs // pairs
this->Load( findex, fvalue, fheader ); this->Load( feats, fheader );
utils::Assert( findex.size() == (unsigned)nfeat ); utils::Assert( feats.size() == (unsigned)nfeat, "nfeat" );
if( rescale != 0 ) this->DoRescale( fvalue ); if( rescale != 0 ) this->DoRescale( feats );
// push back data :) // push back data :)
this->info.labels.push_back( label ); this->info.labels.push_back( label );
// push back weight if any // push back weight if any
if( fweight != NULL ){ if( fweight != NULL ){
this->info.weights.push_back( weight ); this->info.weights.push_back( weight );
} }
this->data.AddRow( findex, fvalue ); this->AddRow( feats );
} }
if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){ if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){
info.group_ptr.push_back( info.group_ptr.back() + ngacc ); info.group_ptr.push_back( info.group_ptr.back() + ngacc );
utils::Assert( info.group_ptr.back() == data.NumRow(), "group size must match num rows" ); utils::Assert( info.group_ptr.back() == info.num_row, "group size must match num rows" );
ngacc = 0; ngacc = 0;
} }
// linelimit // linelimit
@ -174,10 +173,10 @@ public:
} }
if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){ if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){
info.group_ptr.push_back( info.group_ptr.back() + ngacc ); info.group_ptr.push_back( info.group_ptr.back() + ngacc );
utils::Assert( info.group_ptr.back() == data.NumRow(), "group size must match num rows" ); utils::Assert( info.group_ptr.back() == info.num_row, "group size must match num rows" );
} }
this->data.InitData();
} }
}; };
const char *folder = "features"; const char *folder = "features";