Merge commit '75bf97b57539e5572e7ae8eba72bac6562c63c07'
Conflicts: subtree/rabit/rabit-learn/io/line_split-inl.h subtree/rabit/yarn/build.sh
This commit is contained in:
2
subtree/rabit/rabit-learn/.gitignore
vendored
Normal file
2
subtree/rabit/rabit-learn/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
config.mk
|
||||
*.log
|
||||
@@ -38,6 +38,7 @@ class StreamBufferReader {
|
||||
}
|
||||
}
|
||||
}
|
||||
/*! \brief whether we are reaching the end of file */
|
||||
inline bool AtEnd(void) const {
|
||||
return read_len_ == 0;
|
||||
}
|
||||
|
||||
@@ -66,27 +66,36 @@ class FileStream : public utils::ISeekStream {
|
||||
};
|
||||
|
||||
/*! \brief line split from normal file system */
|
||||
class FileSplit : public LineSplitBase {
|
||||
class FileProvider : public LineSplitter::IFileProvider {
|
||||
public:
|
||||
explicit FileSplit(const char *uri, unsigned rank, unsigned nsplit) {
|
||||
LineSplitBase::SplitNames(&fnames_, uri, "#");
|
||||
explicit FileProvider(const char *uri) {
|
||||
LineSplitter::SplitNames(&fnames_, uri, "#");
|
||||
std::vector<size_t> fsize;
|
||||
for (size_t i = 0; i < fnames_.size(); ++i) {
|
||||
if (!std::strncmp(fnames_[i].c_str(), "file://", 7)) {
|
||||
std::string tmp = fnames_[i].c_str() + 7;
|
||||
fnames_[i] = tmp;
|
||||
}
|
||||
fsize.push_back(GetFileSize(fnames_[i].c_str()));
|
||||
size_t fz = GetFileSize(fnames_[i].c_str());
|
||||
if (fz != 0) {
|
||||
fsize_.push_back(fz);
|
||||
}
|
||||
}
|
||||
LineSplitBase::Init(fsize, rank, nsplit);
|
||||
}
|
||||
virtual ~FileSplit(void) {}
|
||||
|
||||
protected:
|
||||
virtual utils::ISeekStream *GetFile(size_t file_index) {
|
||||
// destrucor
|
||||
virtual ~FileProvider(void) {}
|
||||
virtual utils::ISeekStream *Open(size_t file_index) {
|
||||
utils::Assert(file_index < fnames_.size(), "file index exceed bound");
|
||||
return new FileStream(fnames_[file_index].c_str(), "rb");
|
||||
}
|
||||
virtual const std::vector<size_t> &FileSize(void) const {
|
||||
return fsize_;
|
||||
}
|
||||
private:
|
||||
// file sizes
|
||||
std::vector<size_t> fsize_;
|
||||
// file names
|
||||
std::vector<std::string> fnames_;
|
||||
// get file size
|
||||
inline static size_t GetFileSize(const char *fname) {
|
||||
std::FILE *fp = utils::FopenCheck(fname, "rb");
|
||||
@@ -96,10 +105,6 @@ class FileSplit : public LineSplitBase {
|
||||
std::fclose(fp);
|
||||
return fsize;
|
||||
}
|
||||
|
||||
private:
|
||||
// file names
|
||||
std::vector<std::string> fnames_;
|
||||
};
|
||||
} // namespace io
|
||||
} // namespace rabit
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
#include <hdfs.h>
|
||||
#include <errno.h>
|
||||
@@ -15,11 +16,15 @@
|
||||
/*! \brief io interface */
|
||||
namespace rabit {
|
||||
namespace io {
|
||||
class HDFSStream : public utils::ISeekStream {
|
||||
class HDFSStream : public ISeekStream {
|
||||
public:
|
||||
HDFSStream(hdfsFS fs, const char *fname, const char *mode)
|
||||
: fs_(fs), at_end_(false) {
|
||||
int flag;
|
||||
HDFSStream(hdfsFS fs,
|
||||
const char *fname,
|
||||
const char *mode,
|
||||
bool disconnect_when_done)
|
||||
: fs_(fs), at_end_(false),
|
||||
disconnect_when_done_(disconnect_when_done) {
|
||||
int flag = 0;
|
||||
if (!strcmp(mode, "r")) {
|
||||
flag = O_RDONLY;
|
||||
} else if (!strcmp(mode, "w")) {
|
||||
@@ -35,6 +40,9 @@ class HDFSStream : public utils::ISeekStream {
|
||||
}
|
||||
virtual ~HDFSStream(void) {
|
||||
this->Close();
|
||||
if (disconnect_when_done_) {
|
||||
utils::Check(hdfsDisconnect(fs_) == 0, "hdfsDisconnect error");
|
||||
}
|
||||
}
|
||||
virtual size_t Read(void *ptr, size_t size) {
|
||||
tSize nread = hdfsRead(fs_, fp_, ptr, size);
|
||||
@@ -86,52 +94,69 @@ class HDFSStream : public utils::ISeekStream {
|
||||
}
|
||||
}
|
||||
|
||||
inline static std::string GetNameNode(void) {
|
||||
const char *nn = getenv("rabit_hdfs_namenode");
|
||||
if (nn == NULL) {
|
||||
return std::string("default");
|
||||
} else {
|
||||
return std::string(nn);
|
||||
}
|
||||
}
|
||||
private:
|
||||
hdfsFS fs_;
|
||||
hdfsFile fp_;
|
||||
bool at_end_;
|
||||
bool disconnect_when_done_;
|
||||
};
|
||||
|
||||
/*! \brief line split from normal file system */
|
||||
class HDFSSplit : public LineSplitBase {
|
||||
class HDFSProvider : public LineSplitter::IFileProvider {
|
||||
public:
|
||||
explicit HDFSSplit(const char *uri, unsigned rank, unsigned nsplit) {
|
||||
fs_ = hdfsConnect("default", 0);
|
||||
explicit HDFSProvider(const char *uri) {
|
||||
fs_ = hdfsConnect(HDFSStream::GetNameNode().c_str(), 0);
|
||||
utils::Check(fs_ != NULL, "error when connecting to default HDFS");
|
||||
std::vector<std::string> paths;
|
||||
LineSplitBase::SplitNames(&paths, uri, "#");
|
||||
LineSplitter::SplitNames(&paths, uri, "#");
|
||||
// get the files
|
||||
std::vector<size_t> fsize;
|
||||
for (size_t i = 0; i < paths.size(); ++i) {
|
||||
hdfsFileInfo *info = hdfsGetPathInfo(fs_, paths[i].c_str());
|
||||
utils::Check(info != NULL, "path %s do not exist", paths[i].c_str());
|
||||
if (info->mKind == 'D') {
|
||||
int nentry;
|
||||
hdfsFileInfo *files = hdfsListDirectory(fs_, info->mName, &nentry);
|
||||
utils::Check(files != NULL, "error when ListDirectory %s", info->mName);
|
||||
for (int i = 0; i < nentry; ++i) {
|
||||
if (files[i].mKind == 'F') {
|
||||
fsize.push_back(files[i].mSize);
|
||||
if (files[i].mKind == 'F' && files[i].mSize != 0) {
|
||||
fsize_.push_back(files[i].mSize);
|
||||
fnames_.push_back(std::string(files[i].mName));
|
||||
}
|
||||
}
|
||||
hdfsFreeFileInfo(files, nentry);
|
||||
} else {
|
||||
fsize.push_back(info->mSize);
|
||||
fnames_.push_back(std::string(info->mName));
|
||||
if (info->mSize != 0) {
|
||||
fsize_.push_back(info->mSize);
|
||||
fnames_.push_back(std::string(info->mName));
|
||||
}
|
||||
}
|
||||
hdfsFreeFileInfo(info, 1);
|
||||
}
|
||||
LineSplitBase::Init(fsize, rank, nsplit);
|
||||
}
|
||||
virtual ~HDFSSplit(void) {}
|
||||
|
||||
protected:
|
||||
virtual utils::ISeekStream *GetFile(size_t file_index) {
|
||||
virtual ~HDFSProvider(void) {
|
||||
utils::Check(hdfsDisconnect(fs_) == 0, "hdfsDisconnect error");
|
||||
}
|
||||
virtual const std::vector<size_t> &FileSize(void) const {
|
||||
return fsize_;
|
||||
}
|
||||
virtual ISeekStream *Open(size_t file_index) {
|
||||
utils::Assert(file_index < fnames_.size(), "file index exceed bound");
|
||||
return new HDFSStream(fs_, fnames_[file_index].c_str(), "r");
|
||||
return new HDFSStream(fs_, fnames_[file_index].c_str(), "r", false);
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
// hdfs handle
|
||||
hdfsFS fs_;
|
||||
// file sizes
|
||||
std::vector<size_t> fsize_;
|
||||
// file names
|
||||
std::vector<std::string> fnames_;
|
||||
};
|
||||
|
||||
@@ -30,16 +30,16 @@ inline InputSplit *CreateInputSplit(const char *uri,
|
||||
return new SingleFileSplit(uri);
|
||||
}
|
||||
if (!strncmp(uri, "file://", 7)) {
|
||||
return new FileSplit(uri, part, nsplit);
|
||||
return new LineSplitter(new FileProvider(uri), part, nsplit);
|
||||
}
|
||||
if (!strncmp(uri, "hdfs://", 7)) {
|
||||
#if RABIT_USE_HDFS
|
||||
return new HDFSSplit(uri, part, nsplit);
|
||||
return new LineSplitter(new HDFSProvider(uri), part, nsplit);
|
||||
#else
|
||||
utils::Error("Please compile with RABIT_USE_HDFS=1");
|
||||
#endif
|
||||
}
|
||||
return new FileSplit(uri, part, nsplit);
|
||||
return new LineSplitter(new FileProvider(uri), part, nsplit);
|
||||
}
|
||||
/*!
|
||||
* \brief create an stream, the stream must be able to close
|
||||
@@ -55,7 +55,8 @@ inline IStream *CreateStream(const char *uri, const char *mode) {
|
||||
}
|
||||
if (!strncmp(uri, "hdfs://", 7)) {
|
||||
#if RABIT_USE_HDFS
|
||||
return new HDFSStream(hdfsConnect("default", 0), uri, mode);
|
||||
return new HDFSStream(hdfsConnect(HDFSStream::GetNameNode().c_str(), 0),
|
||||
uri, mode, true);
|
||||
#else
|
||||
utils::Error("Please compile with RABIT_USE_HDFS=1");
|
||||
#endif
|
||||
|
||||
@@ -19,6 +19,7 @@ namespace rabit {
|
||||
* \brief namespace to handle input split and filesystem interfacing
|
||||
*/
|
||||
namespace io {
|
||||
/*! \brief reused ISeekStream's definition */
|
||||
typedef utils::ISeekStream ISeekStream;
|
||||
/*!
|
||||
* \brief user facing input split helper,
|
||||
|
||||
@@ -15,11 +15,42 @@
|
||||
|
||||
namespace rabit {
|
||||
namespace io {
|
||||
class LineSplitBase : public InputSplit {
|
||||
|
||||
/*! \brief class that split the files by line */
|
||||
class LineSplitter : public InputSplit {
|
||||
public:
|
||||
virtual ~LineSplitBase() {
|
||||
if (fs_ != NULL) delete fs_;
|
||||
class IFileProvider {
|
||||
public:
|
||||
/*!
|
||||
* \brief get the seek stream of given file_index
|
||||
* \return the corresponding seek stream at head of the stream
|
||||
* the seek stream's resource can be freed by calling delete
|
||||
*/
|
||||
virtual ISeekStream *Open(size_t file_index) = 0;
|
||||
/*!
|
||||
* \return const reference to size of each files
|
||||
*/
|
||||
virtual const std::vector<size_t> &FileSize(void) const = 0;
|
||||
// virtual destructor
|
||||
virtual ~IFileProvider() {}
|
||||
};
|
||||
// constructor
|
||||
explicit LineSplitter(IFileProvider *provider,
|
||||
unsigned rank,
|
||||
unsigned nsplit)
|
||||
: provider_(provider), fs_(NULL),
|
||||
reader_(kBufferSize) {
|
||||
this->Init(provider_->FileSize(), rank, nsplit);
|
||||
}
|
||||
// destructor
|
||||
virtual ~LineSplitter() {
|
||||
if (fs_ != NULL) {
|
||||
delete fs_; fs_ = NULL;
|
||||
}
|
||||
// delete provider after destructing the streams
|
||||
delete provider_;
|
||||
}
|
||||
// get next line
|
||||
virtual bool NextLine(std::string *out_data) {
|
||||
if (file_ptr_ >= file_ptr_end_ &&
|
||||
offset_curr_ >= offset_end_) return false;
|
||||
@@ -29,15 +60,15 @@ class LineSplitBase : public InputSplit {
|
||||
if (reader_.AtEnd()) {
|
||||
if (out_data->length() != 0) return true;
|
||||
file_ptr_ += 1;
|
||||
if (offset_curr_ >= offset_end_) return false;
|
||||
if (offset_curr_ != file_offset_[file_ptr_]) {
|
||||
utils::Error("warning:std::FILE size not calculated correctly\n");
|
||||
utils::Error("warning: FILE size not calculated correctly\n");
|
||||
offset_curr_ = file_offset_[file_ptr_];
|
||||
}
|
||||
if (offset_curr_ >= offset_end_) return false;
|
||||
utils::Assert(file_ptr_ + 1 < file_offset_.size(),
|
||||
"boundary check");
|
||||
delete fs_;
|
||||
fs_ = this->GetFile(file_ptr_);
|
||||
fs_ = provider_->Open(file_ptr_);
|
||||
reader_.set_stream(fs_);
|
||||
} else {
|
||||
++offset_curr_;
|
||||
@@ -51,15 +82,27 @@ class LineSplitBase : public InputSplit {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
// constructor
|
||||
LineSplitBase(void)
|
||||
: fs_(NULL), reader_(kBufferSize) {
|
||||
/*!
|
||||
* \brief split names given
|
||||
* \param out_fname output std::FILE names
|
||||
* \param uri_ the iput uri std::FILE
|
||||
* \param dlm deliminetr
|
||||
*/
|
||||
inline static void SplitNames(std::vector<std::string> *out_fname,
|
||||
const char *uri_,
|
||||
const char *dlm) {
|
||||
std::string uri = uri_;
|
||||
char *p = std::strtok(BeginPtr(uri), dlm);
|
||||
while (p != NULL) {
|
||||
out_fname->push_back(std::string(p));
|
||||
p = std::strtok(NULL, dlm);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
/*!
|
||||
* \brief initialize the line spliter,
|
||||
* \param file_size, size of each std::FILEs
|
||||
* \param file_size, size of each files
|
||||
* \param rank the current rank of the data
|
||||
* \param nsplit number of split we will divide the data into
|
||||
*/
|
||||
@@ -82,7 +125,7 @@ class LineSplitBase : public InputSplit {
|
||||
file_ptr_end_ = std::upper_bound(file_offset_.begin(),
|
||||
file_offset_.end(),
|
||||
offset_end_) - file_offset_.begin() - 1;
|
||||
fs_ = GetFile(file_ptr_);
|
||||
fs_ = provider_->Open(file_ptr_);
|
||||
reader_.set_stream(fs_);
|
||||
// try to set the starting position correctly
|
||||
if (file_offset_[file_ptr_] != offset_begin_) {
|
||||
@@ -94,33 +137,15 @@ class LineSplitBase : public InputSplit {
|
||||
}
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief get the seek stream of given file_index
|
||||
* \return the corresponding seek stream at head of std::FILE
|
||||
*/
|
||||
virtual utils::ISeekStream *GetFile(size_t file_index) = 0;
|
||||
/*!
|
||||
* \brief split names given
|
||||
* \param out_fname output std::FILE names
|
||||
* \param uri_ the iput uri std::FILE
|
||||
* \param dlm deliminetr
|
||||
*/
|
||||
inline static void SplitNames(std::vector<std::string> *out_fname,
|
||||
const char *uri_,
|
||||
const char *dlm) {
|
||||
std::string uri = uri_;
|
||||
char *p = std::strtok(BeginPtr(uri), dlm);
|
||||
while (p != NULL) {
|
||||
out_fname->push_back(std::string(p));
|
||||
p = std::strtok(NULL, dlm);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
/*! \brief FileProvider */
|
||||
IFileProvider *provider_;
|
||||
/*! \brief current input stream */
|
||||
utils::ISeekStream *fs_;
|
||||
/*! \brief std::FILE pointer of which std::FILE to read on */
|
||||
/*! \brief file pointer of which file to read on */
|
||||
size_t file_ptr_;
|
||||
/*! \brief std::FILE pointer where the end of std::FILE lies */
|
||||
/*! \brief file pointer where the end of file lies */
|
||||
size_t file_ptr_end_;
|
||||
/*! \brief get the current offset */
|
||||
size_t offset_curr_;
|
||||
@@ -128,7 +153,7 @@ class LineSplitBase : public InputSplit {
|
||||
size_t offset_begin_;
|
||||
/*! \brief end of the offset */
|
||||
size_t offset_end_;
|
||||
/*! \brief byte-offset of each std::FILE */
|
||||
/*! \brief byte-offset of each file */
|
||||
std::vector<size_t> file_offset_;
|
||||
/*! \brief buffer reader */
|
||||
StreamBufferReader reader_;
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
# specify tensor path
|
||||
ifneq ("$(wildcard ../config.mk)","")
|
||||
config = ../config.mk
|
||||
else
|
||||
config = ../make/config.mk
|
||||
endif
|
||||
include $(config)
|
||||
|
||||
BIN = linear.rabit
|
||||
MOCKBIN= linear.mock
|
||||
MPIBIN =
|
||||
@@ -6,10 +12,10 @@ MPIBIN =
|
||||
OBJ = linear.o
|
||||
|
||||
# common build script for programs
|
||||
include ../make/config.mk
|
||||
include ../make/common.mk
|
||||
CFLAGS+=-fopenmp
|
||||
linear.o: linear.cc ../../src/*.h linear.h ../solver/*.h
|
||||
# dependenies here
|
||||
linear.rabit: linear.o lib
|
||||
linear.mock: linear.o lib
|
||||
|
||||
|
||||
@@ -206,21 +206,22 @@ int main(int argc, char *argv[]) {
|
||||
rabit::Finalize();
|
||||
return 0;
|
||||
}
|
||||
rabit::linear::LinearObjFunction linear;
|
||||
rabit::linear::LinearObjFunction *linear = new rabit::linear::LinearObjFunction();
|
||||
if (!strcmp(argv[1], "stdin")) {
|
||||
linear.LoadData(argv[1]);
|
||||
linear->LoadData(argv[1]);
|
||||
rabit::Init(argc, argv);
|
||||
} else {
|
||||
rabit::Init(argc, argv);
|
||||
linear.LoadData(argv[1]);
|
||||
linear->LoadData(argv[1]);
|
||||
}
|
||||
for (int i = 2; i < argc; ++i) {
|
||||
char name[256], val[256];
|
||||
if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
|
||||
linear.SetParam(name, val);
|
||||
linear->SetParam(name, val);
|
||||
}
|
||||
}
|
||||
linear.Run();
|
||||
linear->Run();
|
||||
delete linear;
|
||||
rabit::Finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -26,10 +26,11 @@ struct LinearModel {
|
||||
int reserved[16];
|
||||
// constructor
|
||||
ModelParam(void) {
|
||||
memset(this, 0, sizeof(ModelParam));
|
||||
base_score = 0.5f;
|
||||
num_feature = 0;
|
||||
loss_type = 1;
|
||||
std::memset(reserved, 0, sizeof(reserved));
|
||||
num_feature = 0;
|
||||
}
|
||||
// initialize base score
|
||||
inline void InitBaseScore(void) {
|
||||
@@ -119,7 +120,7 @@ struct LinearModel {
|
||||
}
|
||||
fi.Read(weight, sizeof(float) * (param.num_feature + 1));
|
||||
}
|
||||
inline void Save(rabit::IStream &fo, const float *wptr = NULL) const {
|
||||
inline void Save(rabit::IStream &fo, const float *wptr = NULL) {
|
||||
fo.Write(¶m, sizeof(param));
|
||||
if (wptr == NULL) wptr = weight;
|
||||
fo.Write(wptr, sizeof(float) * (param.num_feature + 1));
|
||||
|
||||
@@ -6,12 +6,13 @@ then
|
||||
fi
|
||||
|
||||
# put the local training file to HDFS
|
||||
hadoop fs -rm -r -f $2/data
|
||||
hadoop fs -rm -r -f $2/mushroom.linear.model
|
||||
|
||||
hadoop fs -mkdir $2/data
|
||||
hadoop fs -put ../data/agaricus.txt.train $2/data
|
||||
|
||||
# submit to hadoop
|
||||
../../tracker/rabit_yarn.py -n $1 --vcores 1 linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}"
|
||||
../../tracker/rabit_yarn.py -n $1 --vcores 1 ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}"
|
||||
|
||||
# get the final model file
|
||||
hadoop fs -get $2/mushroom.linear.model ./linear.model
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
#
|
||||
# - copy this file to the root of rabit-learn folder
|
||||
# - modify the configuration you want
|
||||
# - type make or make -j n for parallel build
|
||||
# - type make or make -j n on each of the folder
|
||||
#----------------------------------------------------
|
||||
|
||||
# choice of compiler
|
||||
|
||||
@@ -145,8 +145,9 @@ class LBFGSSolver {
|
||||
|
||||
if (silent == 0 && rabit::GetRank() == 0) {
|
||||
rabit::TrackerPrintf
|
||||
("L-BFGS solver starts, num_dim=%lu, init_objval=%g, size_memory=%lu\n",
|
||||
gstate.num_dim, gstate.init_objval, gstate.size_memory);
|
||||
("L-BFGS solver starts, num_dim=%lu, init_objval=%g, size_memory=%lu, RAM-approx=%lu\n",
|
||||
gstate.num_dim, gstate.init_objval, gstate.size_memory,
|
||||
gstate.MemCost() + hist.MemCost());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -176,7 +177,7 @@ class LBFGSSolver {
|
||||
// swap new weight
|
||||
std::swap(g.weight, g.grad);
|
||||
// check stop condition
|
||||
if (gstate.num_iteration > min_lbfgs_iter) {
|
||||
if (gstate.num_iteration > static_cast<size_t>(min_lbfgs_iter)) {
|
||||
if (g.old_objval - g.new_objval < lbfgs_stop_tol * g.init_objval) {
|
||||
return true;
|
||||
}
|
||||
@@ -195,7 +196,7 @@ class LBFGSSolver {
|
||||
/*! \brief run optimization */
|
||||
virtual void Run(void) {
|
||||
this->Init();
|
||||
while (gstate.num_iteration < max_lbfgs_iter) {
|
||||
while (gstate.num_iteration < static_cast<size_t>(max_lbfgs_iter)) {
|
||||
if (this->UpdateOneIter()) break;
|
||||
}
|
||||
if (silent == 0 && rabit::GetRank() == 0) {
|
||||
@@ -225,7 +226,7 @@ class LBFGSSolver {
|
||||
const size_t num_dim = gstate.num_dim;
|
||||
const DType *gsub = grad + range_begin_;
|
||||
const size_t nsub = range_end_ - range_begin_;
|
||||
double vdot;
|
||||
double vdot = 0.0;
|
||||
if (n != 0) {
|
||||
// hist[m + n - 1] stores old gradient
|
||||
Minus(hist[m + n - 1], gsub, hist[m + n - 1], nsub);
|
||||
@@ -241,15 +242,19 @@ class LBFGSSolver {
|
||||
idxset.push_back(std::make_pair(m + j, 2 * m));
|
||||
idxset.push_back(std::make_pair(m + j, m + n - 1));
|
||||
}
|
||||
|
||||
// calculate dot products
|
||||
std::vector<double> tmp(idxset.size());
|
||||
for (size_t i = 0; i < tmp.size(); ++i) {
|
||||
tmp[i] = hist.CalcDot(idxset[i].first, idxset[i].second);
|
||||
}
|
||||
|
||||
rabit::Allreduce<rabit::op::Sum>(BeginPtr(tmp), tmp.size());
|
||||
|
||||
for (size_t i = 0; i < tmp.size(); ++i) {
|
||||
gstate.DotBuf(idxset[i].first, idxset[i].second) = tmp[i];
|
||||
}
|
||||
|
||||
// BFGS steps, use vector-free update
|
||||
// parameterize vector using basis in hist
|
||||
std::vector<double> alpha(n);
|
||||
@@ -263,7 +268,7 @@ class LBFGSSolver {
|
||||
}
|
||||
alpha[j] = vsum / gstate.DotBuf(j, m + j);
|
||||
delta[m + j] = delta[m + j] - alpha[j];
|
||||
}
|
||||
}
|
||||
// scale
|
||||
double scale = gstate.DotBuf(n - 1, m + n - 1) /
|
||||
gstate.DotBuf(m + n - 1, m + n - 1);
|
||||
@@ -279,6 +284,7 @@ class LBFGSSolver {
|
||||
double beta = vsum / gstate.DotBuf(j, m + j);
|
||||
delta[j] = delta[j] + (alpha[j] - beta);
|
||||
}
|
||||
|
||||
// set all to zero
|
||||
std::fill(dir, dir + num_dim, 0.0f);
|
||||
DType *dirsub = dir + range_begin_;
|
||||
@@ -291,10 +297,11 @@ class LBFGSSolver {
|
||||
}
|
||||
FixDirL1Sign(dirsub, hist[2 * m], nsub);
|
||||
vdot = -Dot(dirsub, hist[2 * m], nsub);
|
||||
|
||||
// allreduce to get full direction
|
||||
rabit::Allreduce<rabit::op::Sum>(dir, num_dim);
|
||||
rabit::Allreduce<rabit::op::Sum>(&vdot, 1);
|
||||
} else {
|
||||
} else {
|
||||
SetL1Dir(dir, grad, weight, num_dim);
|
||||
vdot = -Dot(dir, dir, num_dim);
|
||||
}
|
||||
@@ -482,6 +489,7 @@ class LBFGSSolver {
|
||||
num_iteration = 0;
|
||||
num_dim = 0;
|
||||
old_objval = 0.0;
|
||||
offset_ = 0;
|
||||
}
|
||||
~GlobalState(void) {
|
||||
if (grad != NULL) {
|
||||
@@ -496,6 +504,10 @@ class LBFGSSolver {
|
||||
data.resize(n * n, 0.0);
|
||||
this->AllocSpace();
|
||||
}
|
||||
// memory cost
|
||||
inline size_t MemCost(void) const {
|
||||
return sizeof(DType) * 3 * num_dim;
|
||||
}
|
||||
inline double &DotBuf(size_t i, size_t j) {
|
||||
if (i > j) std::swap(i, j);
|
||||
return data[MapIndex(i, offset_, size_memory) * (size_memory * 2 + 1) +
|
||||
@@ -565,6 +577,10 @@ class LBFGSSolver {
|
||||
size_t n = size_memory * 2 + 1;
|
||||
dptr_ = new DType[n * stride_];
|
||||
}
|
||||
// memory cost
|
||||
inline size_t MemCost(void) const {
|
||||
return sizeof(DType) * (size_memory_ * 2 + 1) * stride_;
|
||||
}
|
||||
// fetch element from rolling array
|
||||
inline const DType *operator[](size_t i) const {
|
||||
return dptr_ + MapIndex(i, offset_, size_memory_) * stride_;
|
||||
|
||||
@@ -77,11 +77,15 @@ struct SparseMat {
|
||||
feat_dim += 1;
|
||||
utils::Check(feat_dim < std::numeric_limits<index_t>::max(),
|
||||
"feature dimension exceed limit of index_t"\
|
||||
"consider change the index_t to unsigned long");
|
||||
"consider change the index_t to unsigned long");
|
||||
}
|
||||
inline size_t NumRow(void) const {
|
||||
return row_ptr.size() - 1;
|
||||
}
|
||||
// memory cost
|
||||
inline size_t MemCost(void) const {
|
||||
return data.size() * sizeof(Entry);
|
||||
}
|
||||
// maximum feature dimension
|
||||
size_t feat_dim;
|
||||
std::vector<size_t> row_ptr;
|
||||
|
||||
Reference in New Issue
Block a user