fix hdfs
This commit is contained in:
parent
8cc650847a
commit
f165ffbc95
@ -17,8 +17,12 @@ namespace rabit {
|
|||||||
namespace io {
|
namespace io {
|
||||||
class HDFSStream : public utils::ISeekStream {
|
class HDFSStream : public utils::ISeekStream {
|
||||||
public:
|
public:
|
||||||
HDFSStream(hdfsFS fs, const char *fname, const char *mode)
|
HDFSStream(hdfsFS fs,
|
||||||
: fs_(fs), at_end_(false) {
|
const char *fname,
|
||||||
|
const char *mode,
|
||||||
|
bool disconnect_when_done)
|
||||||
|
: fs_(fs), at_end_(false),
|
||||||
|
disconnect_when_done_(disconnect_when_done) {
|
||||||
int flag;
|
int flag;
|
||||||
if (!strcmp(mode, "r")) {
|
if (!strcmp(mode, "r")) {
|
||||||
flag = O_RDONLY;
|
flag = O_RDONLY;
|
||||||
@ -35,6 +39,9 @@ class HDFSStream : public utils::ISeekStream {
|
|||||||
}
|
}
|
||||||
virtual ~HDFSStream(void) {
|
virtual ~HDFSStream(void) {
|
||||||
this->Close();
|
this->Close();
|
||||||
|
if (disconnect_when_done_) {
|
||||||
|
utils::Check(hdfsDisconnect(fs_) == 0, "hdfsDisconnect error");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
virtual size_t Read(void *ptr, size_t size) {
|
virtual size_t Read(void *ptr, size_t size) {
|
||||||
tSize nread = hdfsRead(fs_, fp_, ptr, size);
|
tSize nread = hdfsRead(fs_, fp_, ptr, size);
|
||||||
@ -90,6 +97,7 @@ class HDFSStream : public utils::ISeekStream {
|
|||||||
hdfsFS fs_;
|
hdfsFS fs_;
|
||||||
hdfsFile fp_;
|
hdfsFile fp_;
|
||||||
bool at_end_;
|
bool at_end_;
|
||||||
|
bool disconnect_when_done_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! \brief line split from normal file system */
|
/*! \brief line split from normal file system */
|
||||||
@ -124,12 +132,15 @@ class HDFSSplit : public LineSplitBase {
|
|||||||
}
|
}
|
||||||
LineSplitBase::Init(fsize, rank, nsplit);
|
LineSplitBase::Init(fsize, rank, nsplit);
|
||||||
}
|
}
|
||||||
virtual ~HDFSSplit(void) {}
|
virtual ~HDFSSplit(void) {
|
||||||
|
LineSplitBase::Destroy();
|
||||||
|
utils::Check(hdfsDisconnect(fs_) == 0, "hdfsDisconnect error");
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual utils::ISeekStream *GetFile(size_t file_index) {
|
virtual utils::ISeekStream *GetFile(size_t file_index) {
|
||||||
utils::Assert(file_index < fnames_.size(), "file index exceed bound");
|
utils::Assert(file_index < fnames_.size(), "file index exceed bound");
|
||||||
return new HDFSStream(fs_, fnames_[file_index].c_str(), "r");
|
return new HDFSStream(fs_, fnames_[file_index].c_str(), "r", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -55,7 +55,7 @@ inline IStream *CreateStream(const char *uri, const char *mode) {
|
|||||||
}
|
}
|
||||||
if (!strncmp(uri, "hdfs://", 7)) {
|
if (!strncmp(uri, "hdfs://", 7)) {
|
||||||
#if RABIT_USE_HDFS
|
#if RABIT_USE_HDFS
|
||||||
return new HDFSStream(hdfsConnect("default", 0), uri, mode);
|
return new HDFSStream(hdfsConnect("default", 0), uri, mode, true);
|
||||||
#else
|
#else
|
||||||
utils::Error("Please compile with RABIT_USE_HDFS=1");
|
utils::Error("Please compile with RABIT_USE_HDFS=1");
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -18,7 +18,7 @@ namespace io {
|
|||||||
class LineSplitBase : public InputSplit {
|
class LineSplitBase : public InputSplit {
|
||||||
public:
|
public:
|
||||||
virtual ~LineSplitBase() {
|
virtual ~LineSplitBase() {
|
||||||
if (fs_ != NULL) delete fs_;
|
this->Destroy();
|
||||||
}
|
}
|
||||||
virtual bool NextLine(std::string *out_data) {
|
virtual bool NextLine(std::string *out_data) {
|
||||||
if (file_ptr_ >= file_ptr_end_ &&
|
if (file_ptr_ >= file_ptr_end_ &&
|
||||||
@ -57,6 +57,15 @@ class LineSplitBase : public InputSplit {
|
|||||||
LineSplitBase(void)
|
LineSplitBase(void)
|
||||||
: fs_(NULL), reader_(kBufferSize) {
|
: fs_(NULL), reader_(kBufferSize) {
|
||||||
}
|
}
|
||||||
|
/*!
|
||||||
|
* \brief destroy all the filesystem resources owned
|
||||||
|
* can be called by child destructor
|
||||||
|
*/
|
||||||
|
inline void Destroy(void) {
|
||||||
|
if (fs_ != NULL) {
|
||||||
|
delete fs_; fs_ = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief initialize the line spliter,
|
* \brief initialize the line spliter,
|
||||||
* \param file_size, size of each std::FILEs
|
* \param file_size, size of each std::FILEs
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
this script helps setup classpath env for HDFS, before running the program
|
this script helps setup classpath env for HDFS, before running program
|
||||||
|
that links with libhdfs
|
||||||
"""
|
"""
|
||||||
import glob
|
import glob
|
||||||
import sys
|
import sys
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user