This commit is contained in:
tqchen 2015-03-13 22:59:04 -07:00
parent 8cc650847a
commit f165ffbc95
4 changed files with 28 additions and 7 deletions

View File

@ -17,8 +17,12 @@ namespace rabit {
namespace io { namespace io {
class HDFSStream : public utils::ISeekStream { class HDFSStream : public utils::ISeekStream {
public: public:
HDFSStream(hdfsFS fs, const char *fname, const char *mode) HDFSStream(hdfsFS fs,
: fs_(fs), at_end_(false) { const char *fname,
const char *mode,
bool disconnect_when_done)
: fs_(fs), at_end_(false),
disconnect_when_done_(disconnect_when_done) {
int flag; int flag;
if (!strcmp(mode, "r")) { if (!strcmp(mode, "r")) {
flag = O_RDONLY; flag = O_RDONLY;
@ -35,6 +39,9 @@ class HDFSStream : public utils::ISeekStream {
} }
virtual ~HDFSStream(void) { virtual ~HDFSStream(void) {
this->Close(); this->Close();
if (disconnect_when_done_) {
utils::Check(hdfsDisconnect(fs_) == 0, "hdfsDisconnect error");
}
} }
virtual size_t Read(void *ptr, size_t size) { virtual size_t Read(void *ptr, size_t size) {
tSize nread = hdfsRead(fs_, fp_, ptr, size); tSize nread = hdfsRead(fs_, fp_, ptr, size);
@ -90,6 +97,7 @@ class HDFSStream : public utils::ISeekStream {
hdfsFS fs_; hdfsFS fs_;
hdfsFile fp_; hdfsFile fp_;
bool at_end_; bool at_end_;
bool disconnect_when_done_;
}; };
/*! \brief line split from normal file system */ /*! \brief line split from normal file system */
@ -124,12 +132,15 @@ class HDFSSplit : public LineSplitBase {
} }
LineSplitBase::Init(fsize, rank, nsplit); LineSplitBase::Init(fsize, rank, nsplit);
} }
virtual ~HDFSSplit(void) {} virtual ~HDFSSplit(void) {
LineSplitBase::Destroy();
utils::Check(hdfsDisconnect(fs_) == 0, "hdfsDisconnect error");
}
protected: protected:
virtual utils::ISeekStream *GetFile(size_t file_index) { virtual utils::ISeekStream *GetFile(size_t file_index) {
utils::Assert(file_index < fnames_.size(), "file index exceed bound"); utils::Assert(file_index < fnames_.size(), "file index exceed bound");
return new HDFSStream(fs_, fnames_[file_index].c_str(), "r"); return new HDFSStream(fs_, fnames_[file_index].c_str(), "r", false);
} }
private: private:

View File

@ -55,7 +55,7 @@ inline IStream *CreateStream(const char *uri, const char *mode) {
} }
if (!strncmp(uri, "hdfs://", 7)) { if (!strncmp(uri, "hdfs://", 7)) {
#if RABIT_USE_HDFS #if RABIT_USE_HDFS
return new HDFSStream(hdfsConnect("default", 0), uri, mode); return new HDFSStream(hdfsConnect("default", 0), uri, mode, true);
#else #else
utils::Error("Please compile with RABIT_USE_HDFS=1"); utils::Error("Please compile with RABIT_USE_HDFS=1");
#endif #endif

View File

@ -18,7 +18,7 @@ namespace io {
class LineSplitBase : public InputSplit { class LineSplitBase : public InputSplit {
public: public:
virtual ~LineSplitBase() { virtual ~LineSplitBase() {
if (fs_ != NULL) delete fs_; this->Destroy();
} }
virtual bool NextLine(std::string *out_data) { virtual bool NextLine(std::string *out_data) {
if (file_ptr_ >= file_ptr_end_ && if (file_ptr_ >= file_ptr_end_ &&
@ -57,6 +57,15 @@ class LineSplitBase : public InputSplit {
LineSplitBase(void) LineSplitBase(void)
: fs_(NULL), reader_(kBufferSize) { : fs_(NULL), reader_(kBufferSize) {
} }
/*!
* \brief destroy all the filesystem resources owned
* can be called by child destructor
*/
inline void Destroy(void) {
if (fs_ != NULL) {
delete fs_; fs_ = NULL;
}
}
/*! /*!
* \brief initialize the line spliter, * \brief initialize the line spliter,
* \param file_size, size of each std::FILEs * \param file_size, size of each std::FILEs

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
""" """
this script helps setup classpath env for HDFS, before running the program this script helps setup classpath env for HDFS, before running program
that links with libhdfs
""" """
import glob import glob
import sys import sys