From c356a0acc2489c644b67ddefc366fb10818ee7f1 Mon Sep 17 00:00:00 2001 From: Ted Fujimoto Date: Tue, 25 Nov 2014 21:27:50 -0500 Subject: [PATCH] Remove tools folder --- tools/Makefile | 25 ---- tools/xgcombine_buffer.cpp | 247 ------------------------------------- 2 files changed, 272 deletions(-) delete mode 100644 tools/Makefile delete mode 100644 tools/xgcombine_buffer.cpp diff --git a/tools/Makefile b/tools/Makefile deleted file mode 100644 index b35277aff..000000000 --- a/tools/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -export CC = gcc -export CXX = g++ -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp - -# specify tensor path -BIN = xgcombine_buffer -OBJ = -.PHONY: clean all - -all: $(BIN) $(OBJ) -export LDFLAGS= -pthread -lm - -xgcombine_buffer : xgcombine_buffer.cpp - -$(BIN) : - $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) - -$(OBJ) : - $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) - -install: - cp -f -r $(BIN) $(INSTALL_PATH) - -clean: - $(RM) $(OBJ) $(BIN) *~ diff --git a/tools/xgcombine_buffer.cpp b/tools/xgcombine_buffer.cpp deleted file mode 100644 index 84bc996a4..000000000 --- a/tools/xgcombine_buffer.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/*! - * a tool to combine different set of features into binary buffer - * not well organized code, but does it's job - * \author Tianqi Chen: tianqi.tchen@gmail.com - */ -#define _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_DEPRECATE - -#include -#include -#include -#include -#include "../src/io/simple_dmatrix-inl.hpp" -#include "../src/utils/utils.h" - -using namespace xgboost; -using namespace xgboost::io; - -// header in dataset -struct Header{ - FILE *fi; - int tmp_num; - int base; - int num_feat; - // whether it's dense format - bool is_dense; - bool warned; - - Header( void ){ this->warned = false; this->is_dense = false; } - - inline void CheckBase( unsigned findex ){ - if( findex >= (unsigned)num_feat && ! warned ) { - fprintf( stderr, "warning:some feature exceed bound, num_feat=%d\n", num_feat ); - warned = true; - } - } -}; - - -inline int norm( std::vector
&vec, int base = 0 ){ - int n = base; - for( size_t i = 0; i < vec.size(); i ++ ){ - if( vec[i].is_dense ) vec[i].num_feat = 1; - vec[i].base = n; n += vec[i].num_feat; - } - return n; -} - -inline void vclose( std::vector
&vec ){ - for( size_t i = 0; i < vec.size(); i ++ ){ - fclose( vec[i].fi ); - } -} - -inline int readnum( std::vector
&vec ){ - int n = 0; - for( size_t i = 0; i < vec.size(); i ++ ){ - if( !vec[i].is_dense ){ - utils::Assert( fscanf( vec[i].fi, "%d", &vec[i].tmp_num ) == 1, "load num" ); - n += vec[i].tmp_num; - }else{ - n ++; - } - } - return n; -} - -inline void vskip( std::vector
&vec ){ - for( size_t i = 0; i < vec.size(); i ++ ){ - if( !vec[i].is_dense ){ - utils::Assert( fscanf( vec[i].fi, "%*d%*[^\n]\n" ) >= 0, "sparse" ); - }else{ - utils::Assert( fscanf( vec[i].fi, "%*f\n" ) >= 0, "dense" ); - } - } -} - -class DataLoader: public DMatrixSimple { - public: - // whether to do node and edge feature renormalization - int rescale; - int linelimit; - public: - FILE *fp, *fwlist, *fgroup, *fweight; - std::vector
fheader; - DataLoader( void ){ - rescale = 0; - linelimit = -1; - fp = NULL; fwlist = NULL; fgroup = NULL; fweight = NULL; - } - private: - inline void Load( std::vector &feats, std::vector
&vec ){ - SparseBatch::Entry e; - for( size_t i = 0; i < vec.size(); i ++ ){ - if( !vec[i].is_dense ) { - for( int j = 0; j < vec[i].tmp_num; j ++ ){ - utils::Assert( fscanf ( vec[i].fi, "%u:%f", &e.findex, &e.fvalue ) == 2, "Error when load feat" ); - vec[i].CheckBase( e.findex ); - e.findex += vec[i].base; - feats.push_back(e); - } - }else{ - utils::Assert( fscanf ( vec[i].fi, "%f", &e.fvalue ) == 1, "load feat" ); - e.findex = vec[i].base; - feats.push_back(e); - } - } - } - inline void DoRescale( std::vector &vec ){ - double sum = 0.0; - for( size_t i = 0; i < vec.size(); i ++ ){ - sum += vec[i].fvalue * vec[i].fvalue; - } - sum = sqrt( sum ); - for( size_t i = 0; i < vec.size(); i ++ ){ - vec[i].fvalue /= sum; - } - } - public: - // basically we are loading all the data inside - inline void Load( void ){ - this->Clear(); - float label, weight = 0.0f; - - unsigned ngleft = 0, ngacc = 0; - if( fgroup != NULL ){ - info.group_ptr.clear(); - info.group_ptr.push_back(0); - } - - while( fscanf( fp, "%f", &label ) == 1 ){ - if( ngleft == 0 && fgroup != NULL ){ - utils::Assert( fscanf( fgroup, "%u", &ngleft ) == 1, "group" ); - } - if( fweight != NULL ){ - utils::Assert( fscanf( fweight, "%f", &weight ) == 1, "weight" ); - } - - ngleft -= 1; ngacc += 1; - - int pass = 1; - if( fwlist != NULL ){ - utils::Assert( fscanf( fwlist, "%u", &pass ) ==1, "pass" ); - } - if( pass == 0 ){ - vskip( fheader ); ngacc -= 1; - }else{ - const int nfeat = readnum( fheader ); - - std::vector feats; - - // pairs - this->Load( feats, fheader ); - utils::Assert( feats.size() == (unsigned)nfeat, "nfeat" ); - if( rescale != 0 ) this->DoRescale( feats ); - // push back data :) - this->info.labels.push_back( label ); - // push back weight if any - if( fweight != NULL ){ - this->info.weights.push_back( weight ); - } - this->AddRow( feats ); - } - if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){ - info.group_ptr.push_back( info.group_ptr.back() + ngacc ); - utils::Assert( info.group_ptr.back() == info.num_row, "group size must match num rows" ); - ngacc = 0; - } - // linelimit - if( linelimit >= 0 ) { - if( -- linelimit <= 0 ) break; - } - } - if( ngleft == 0 && fgroup != NULL && ngacc != 0 ){ - info.group_ptr.push_back( info.group_ptr.back() + ngacc ); - utils::Assert( info.group_ptr.back() == info.num_row, "group size must match num rows" ); - } - } - -}; - -const char *folder = "features"; - -int main( int argc, char *argv[] ){ - if( argc < 3 ){ - printf("Usage:xgcombine_buffer [options] -f [features] -fd [densefeatures]\n" \ - "options: -rescale -linelimit -fgroup -wlist \n"); - return 0; - } - - DataLoader loader; - time_t start = time( NULL ); - - int mode = 0; - for( int i = 3; i < argc; i ++ ){ - if( !strcmp( argv[i], "-f") ){ - mode = 0; continue; - } - if( !strcmp( argv[i], "-fd") ){ - mode = 2; continue; - } - if( !strcmp( argv[i], "-rescale") ){ - loader.rescale = 1; continue; - } - if( !strcmp( argv[i], "-wlist") ){ - loader.fwlist = utils::FopenCheck( argv[ ++i ], "r" ); continue; - } - if( !strcmp( argv[i], "-fgroup") ){ - loader.fgroup = utils::FopenCheck( argv[ ++i ], "r" ); continue; - } - if( !strcmp( argv[i], "-fweight") ){ - loader.fweight = utils::FopenCheck( argv[ ++i ], "r" ); continue; - } - if( !strcmp( argv[i], "-linelimit") ){ - loader.linelimit = atoi( argv[ ++i ] ); continue; - } - - char name[ 256 ]; - sprintf( name, "%s/%s.%s", folder, argv[1], argv[i] ); - Header h; - h.fi = utils::FopenCheck( name, "r" ); - - if( mode == 2 ){ - h.is_dense = true; h.num_feat = 1; - loader.fheader.push_back( h ); - }else{ - utils::Assert( fscanf( h.fi, "%d", &h.num_feat ) == 1, "num feat" ); - switch( mode ){ - case 0: loader.fheader.push_back( h ); break; - default: ; - } - } - } - loader.fp = utils::FopenCheck( argv[1], "r" ); - - printf("num_features=%d\n", norm( loader.fheader ) ); - printf("start creating buffer...\n"); - loader.Load(); - loader.SaveBinary( argv[2] ); - // close files - fclose( loader.fp ); - if( loader.fwlist != NULL ) fclose( loader.fwlist ); - if( loader.fgroup != NULL ) fclose( loader.fgroup ); - vclose( loader.fheader ); - printf("all generation end, %lu sec used\n", (unsigned long)(time(NULL) - start) ); - return 0; -}