Merge branch 'master' of ssh://github.com/tqchen/rabit

Conflicts:
	tracker/rabit_tracker.py
This commit is contained in:
tqchen
2015-03-11 13:35:35 -07:00
13 changed files with 440 additions and 64 deletions

2
rabit-learn/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
config.mk
*.log

View File

@@ -19,6 +19,7 @@ class FileStream : public utils::ISeekStream {
public:
explicit FileStream(const char *fname, const char *mode)
: use_stdio(false) {
using namespace std;
#ifndef RABIT_STRICT_CXX98_
if (!strcmp(fname, "stdin")) {
use_stdio = true; fp = stdin;
@@ -51,7 +52,7 @@ class FileStream : public utils::ISeekStream {
return std::ftell(fp);
}
virtual bool AtEnd(void) const {
return feof(fp) != 0;
return std::feof(fp) != 0;
}
inline void Close(void) {
if (fp != NULL && !use_stdio) {
@@ -60,7 +61,7 @@ class FileStream : public utils::ISeekStream {
}
private:
FILE *fp;
std::FILE *fp;
bool use_stdio;
};
@@ -71,7 +72,7 @@ class FileSplit : public LineSplitBase {
LineSplitBase::SplitNames(&fnames_, uri, "#");
std::vector<size_t> fsize;
for (size_t i = 0; i < fnames_.size(); ++i) {
if (!strncmp(fnames_[i].c_str(), "file://", 7)) {
if (!std::strncmp(fnames_[i].c_str(), "file://", 7)) {
std::string tmp = fnames_[i].c_str() + 7;
fnames_[i] = tmp;
}
@@ -88,11 +89,11 @@ class FileSplit : public LineSplitBase {
}
// get file size
inline static size_t GetFileSize(const char *fname) {
FILE *fp = utils::FopenCheck(fname, "rb");
std::FILE *fp = utils::FopenCheck(fname, "rb");
// NOTE: fseek may not be good, but serves as ok solution
fseek(fp, 0, SEEK_END);
size_t fsize = static_cast<size_t>(ftell(fp));
fclose(fp);
std::fseek(fp, 0, SEEK_END);
size_t fsize = static_cast<size_t>(std::ftell(fp));
std::fclose(fp);
return fsize;
}

View File

@@ -25,6 +25,7 @@ namespace io {
inline InputSplit *CreateInputSplit(const char *uri,
unsigned part,
unsigned nsplit) {
using namespace std;
if (!strcmp(uri, "stdin")) {
return new SingleFileSplit(uri);
}
@@ -48,6 +49,7 @@ inline InputSplit *CreateInputSplit(const char *uri,
* \param mode can be 'w' or 'r' for read or write
*/
inline IStream *CreateStream(const char *uri, const char *mode) {
using namespace std;
if (!strncmp(uri, "file://", 7)) {
return new FileStream(uri + 7, mode);
}

View File

@@ -1,7 +1,7 @@
#ifndef RABIT_LEARN_IO_LINE_SPLIT_INL_H_
#define RABIT_LEARN_IO_LINE_SPLIT_INL_H_
/*!
* \file line_split-inl.h
* \std::FILE line_split-inl.h
* \brief base implementation of line-spliter
* \author Tianqi Chen
*/
@@ -30,7 +30,7 @@ class LineSplitBase : public InputSplit {
if (out_data->length() != 0) return true;
file_ptr_ += 1;
if (offset_curr_ != file_offset_[file_ptr_]) {
utils::Error("warning:file size not calculated correctly\n");
utils::Error("warning:std::FILE size not calculated correctly\n");
offset_curr_ = file_offset_[file_ptr_];
}
if (offset_curr_ >= offset_end_) return false;
@@ -59,7 +59,7 @@ class LineSplitBase : public InputSplit {
}
/*!
* \brief initialize the line spliter,
* \param file_size, size of each files
* \param file_size, size of each std::FILEs
* \param rank the current rank of the data
* \param nsplit number of split we will divide the data into
*/
@@ -96,31 +96,31 @@ class LineSplitBase : public InputSplit {
}
/*!
* \brief get the seek stream of given file_index
* \return the corresponding seek stream at head of file
* \return the corresponding seek stream at head of std::FILE
*/
virtual utils::ISeekStream *GetFile(size_t file_index) = 0;
/*!
* \brief split names given
* \param out_fname output file names
* \param uri_ the iput uri file
* \param out_fname output std::FILE names
* \param uri_ the iput uri std::FILE
* \param dlm deliminetr
*/
inline static void SplitNames(std::vector<std::string> *out_fname,
const char *uri_,
const char *dlm) {
std::string uri = uri_;
char *p = strtok(BeginPtr(uri), dlm);
char *p = std::strtok(BeginPtr(uri), dlm);
while (p != NULL) {
out_fname->push_back(std::string(p));
p = strtok(NULL, dlm);
p = std::strtok(NULL, dlm);
}
}
private:
/*! \brief current input stream */
utils::ISeekStream *fs_;
/*! \brief file pointer of which file to read on */
/*! \brief std::FILE pointer of which std::FILE to read on */
size_t file_ptr_;
/*! \brief file pointer where the end of file lies */
/*! \brief std::FILE pointer where the end of std::FILE lies */
size_t file_ptr_end_;
/*! \brief get the current offset */
size_t offset_curr_;
@@ -128,7 +128,7 @@ class LineSplitBase : public InputSplit {
size_t offset_begin_;
/*! \brief end of the offset */
size_t offset_end_;
/*! \brief byte-offset of each file */
/*! \brief byte-offset of each std::FILE */
std::vector<size_t> file_offset_;
/*! \brief buffer reader */
StreamBufferReader reader_;
@@ -136,11 +136,11 @@ class LineSplitBase : public InputSplit {
const static size_t kBufferSize = 256;
};
/*! \brief line split from single file */
/*! \brief line split from single std::FILE */
class SingleFileSplit : public InputSplit {
public:
explicit SingleFileSplit(const char *fname) {
if (!strcmp(fname, "stdin")) {
if (!std::strcmp(fname, "stdin")) {
#ifndef RABIT_STRICT_CXX98_
use_stdin_ = true; fp_ = stdin;
#endif
@@ -151,13 +151,13 @@ class SingleFileSplit : public InputSplit {
end_of_file_ = false;
}
virtual ~SingleFileSplit(void) {
if (!use_stdin_) fclose(fp_);
if (!use_stdin_) std::fclose(fp_);
}
virtual bool NextLine(std::string *out_data) {
if (end_of_file_) return false;
out_data->clear();
while (true) {
char c = fgetc(fp_);
char c = std::fgetc(fp_);
if (c == EOF) {
end_of_file_ = true;
}
@@ -172,7 +172,7 @@ class SingleFileSplit : public InputSplit {
}
private:
FILE *fp_;
std::FILE *fp_;
bool use_stdin_;
bool end_of_file_;
};

View File

@@ -145,8 +145,9 @@ class LBFGSSolver {
if (silent == 0 && rabit::GetRank() == 0) {
rabit::TrackerPrintf
("L-BFGS solver starts, num_dim=%lu, init_objval=%g, size_memory=%lu\n",
gstate.num_dim, gstate.init_objval, gstate.size_memory);
("L-BFGS solver starts, num_dim=%lu, init_objval=%g, size_memory=%lu, RAM-approx=%lu\n",
gstate.num_dim, gstate.init_objval, gstate.size_memory,
gstate.MemCost() + hist.MemCost());
}
}
}
@@ -496,6 +497,10 @@ class LBFGSSolver {
data.resize(n * n, 0.0);
this->AllocSpace();
}
// memory cost
inline size_t MemCost(void) const {
return sizeof(DType) * 3 * num_dim;
}
inline double &DotBuf(size_t i, size_t j) {
if (i > j) std::swap(i, j);
return data[MapIndex(i, offset_, size_memory) * (size_memory * 2 + 1) +
@@ -565,6 +570,10 @@ class LBFGSSolver {
size_t n = size_memory * 2 + 1;
dptr_ = new DType[n * stride_];
}
// memory cost
inline size_t MemCost(void) const {
return sizeof(DType) * (size_memory_ * 2 + 1) * stride_;
}
// fetch element from rolling array
inline const DType *operator[](size_t i) const {
return dptr_ + MapIndex(i, offset_, size_memory_) * stride_;

View File

@@ -77,11 +77,15 @@ struct SparseMat {
feat_dim += 1;
utils::Check(feat_dim < std::numeric_limits<index_t>::max(),
"feature dimension exceed limit of index_t"\
"consider change the index_t to unsigned long");
"consider change the index_t to unsigned long");
}
inline size_t NumRow(void) const {
return row_ptr.size() - 1;
}
// memory cost
inline size_t MemCost(void) const {
return data.size() * sizeof(Entry);
}
// maximum feature dimension
size_t feat_dim;
std::vector<size_t> row_ptr;