Small cleanup to jvm iter adapter. (#9616)
- Remove header dependency on c_api - Remove remaining code for arrow.
This commit is contained in:
parent
417c3ba47e
commit
d95be1c38d
@ -62,6 +62,7 @@ OBJECTS= \
|
||||
$(PKGROOT)/src/gbm/gbtree_model.o \
|
||||
$(PKGROOT)/src/gbm/gblinear.o \
|
||||
$(PKGROOT)/src/gbm/gblinear_model.o \
|
||||
$(PKGROOT)/src/data/adapter.o \
|
||||
$(PKGROOT)/src/data/simple_dmatrix.o \
|
||||
$(PKGROOT)/src/data/data.o \
|
||||
$(PKGROOT)/src/data/sparse_page_raw_format.o \
|
||||
|
||||
@ -62,6 +62,7 @@ OBJECTS= \
|
||||
$(PKGROOT)/src/gbm/gbtree_model.o \
|
||||
$(PKGROOT)/src/gbm/gblinear.o \
|
||||
$(PKGROOT)/src/gbm/gblinear_model.o \
|
||||
$(PKGROOT)/src/data/adapter.o \
|
||||
$(PKGROOT)/src/data/simple_dmatrix.o \
|
||||
$(PKGROOT)/src/data/data.o \
|
||||
$(PKGROOT)/src/data/sparse_page_raw_format.o \
|
||||
|
||||
@ -269,8 +269,8 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
|
||||
if (cache_info != nullptr) {
|
||||
scache = cache_info;
|
||||
}
|
||||
xgboost::data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext,
|
||||
XGBoostBatchCSR> adapter(data_handle, callback);
|
||||
xgboost::data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR> adapter(
|
||||
data_handle, callback);
|
||||
xgboost_CHECK_C_ARG_PTR(out);
|
||||
*out = new std::shared_ptr<DMatrix> {
|
||||
DMatrix::Create(
|
||||
|
||||
28
src/data/adapter.cc
Normal file
28
src/data/adapter.cc
Normal file
@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
#include "adapter.h"
|
||||
|
||||
#include "../c_api/c_api_error.h" // for API_BEGIN, API_END
|
||||
#include "xgboost/c_api.h"
|
||||
|
||||
namespace xgboost::data {
|
||||
template <typename DataIterHandle, typename XGBCallbackDataIterNext, typename XGBoostBatchCSR>
|
||||
bool IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>::Next() {
|
||||
if ((*next_callback_)(
|
||||
data_handle_,
|
||||
[](void *handle, XGBoostBatchCSR batch) -> int {
|
||||
API_BEGIN();
|
||||
static_cast<IteratorAdapter *>(handle)->SetData(batch);
|
||||
API_END();
|
||||
},
|
||||
this) != 0) {
|
||||
at_first_ = false;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template class IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;
|
||||
} // namespace xgboost::data
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright (c) 2019~2021 by Contributors
|
||||
/**
|
||||
* Copyright 2019-2023, XGBoost Contributors
|
||||
* \file adapter.h
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_ADAPTER_H_
|
||||
@ -16,7 +16,6 @@
|
||||
#include <utility> // std::move
|
||||
#include <vector>
|
||||
|
||||
#include "../c_api/c_api_error.h"
|
||||
#include "../common/error_msg.h" // for MaxFeatureSize
|
||||
#include "../common/math.h"
|
||||
#include "array_interface.h"
|
||||
@ -742,8 +741,10 @@ class FileAdapter : dmlc::DataIter<FileAdapterBatch> {
|
||||
dmlc::Parser<uint32_t>* parser_;
|
||||
};
|
||||
|
||||
/*! \brief Data iterator that takes callback to return data, used in JVM package for
|
||||
* accepting data iterator. */
|
||||
/**
|
||||
* @brief Data iterator that takes callback to return data, used in JVM package for accepting data
|
||||
* iterator.
|
||||
*/
|
||||
template <typename DataIterHandle, typename XGBCallbackDataIterNext, typename XGBoostBatchCSR>
|
||||
class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
|
||||
public:
|
||||
@ -757,23 +758,9 @@ class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
|
||||
CHECK(at_first_) << "Cannot reset IteratorAdapter";
|
||||
}
|
||||
|
||||
bool Next() override {
|
||||
if ((*next_callback_)(
|
||||
data_handle_,
|
||||
[](void *handle, XGBoostBatchCSR batch) -> int {
|
||||
API_BEGIN();
|
||||
static_cast<IteratorAdapter *>(handle)->SetData(batch);
|
||||
API_END();
|
||||
},
|
||||
this) != 0) {
|
||||
at_first_ = false;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
[[nodiscard]] bool Next() override;
|
||||
|
||||
FileAdapterBatch const& Value() const override {
|
||||
[[nodiscard]] FileAdapterBatch const& Value() const override {
|
||||
return *batch_.get();
|
||||
}
|
||||
|
||||
@ -821,12 +808,12 @@ class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
|
||||
block_.index = dmlc::BeginPtr(index_);
|
||||
block_.value = dmlc::BeginPtr(value_);
|
||||
|
||||
batch_.reset(new FileAdapterBatch(&block_, row_offset_));
|
||||
batch_ = std::make_unique<FileAdapterBatch>(&block_, row_offset_);
|
||||
row_offset_ += offset_.size() - 1;
|
||||
}
|
||||
|
||||
size_t NumColumns() const { return columns_; }
|
||||
size_t NumRows() const { return kAdapterUnknownSize; }
|
||||
[[nodiscard]] std::size_t NumColumns() const { return columns_; }
|
||||
[[nodiscard]] std::size_t NumRows() const { return kAdapterUnknownSize; }
|
||||
|
||||
private:
|
||||
std::vector<size_t> offset_;
|
||||
@ -848,56 +835,6 @@ class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
|
||||
std::unique_ptr<FileAdapterBatch> batch_;
|
||||
};
|
||||
|
||||
enum ColumnDType : uint8_t {
|
||||
kUnknown,
|
||||
kInt8,
|
||||
kUInt8,
|
||||
kInt16,
|
||||
kUInt16,
|
||||
kInt32,
|
||||
kUInt32,
|
||||
kInt64,
|
||||
kUInt64,
|
||||
kFloat,
|
||||
kDouble
|
||||
};
|
||||
|
||||
class Column {
|
||||
public:
|
||||
Column() = default;
|
||||
|
||||
Column(size_t col_idx, size_t length, size_t null_count, const uint8_t* bitmap)
|
||||
: col_idx_{col_idx}, length_{length}, null_count_{null_count}, bitmap_{bitmap} {}
|
||||
|
||||
virtual ~Column() = default;
|
||||
|
||||
Column(const Column&) = delete;
|
||||
Column& operator=(const Column&) = delete;
|
||||
Column(Column&&) = delete;
|
||||
Column& operator=(Column&&) = delete;
|
||||
|
||||
// whether the valid bit is set for this element
|
||||
bool IsValid(size_t row_idx) const {
|
||||
return (!bitmap_ || (bitmap_[row_idx/8] & (1 << (row_idx%8))));
|
||||
}
|
||||
|
||||
virtual COOTuple GetElement(size_t row_idx) const = 0;
|
||||
|
||||
virtual bool IsValidElement(size_t row_idx) const = 0;
|
||||
|
||||
virtual std::vector<float> AsFloatVector() const = 0;
|
||||
|
||||
virtual std::vector<uint64_t> AsUint64Vector() const = 0;
|
||||
|
||||
size_t Length() const { return length_; }
|
||||
|
||||
protected:
|
||||
size_t col_idx_;
|
||||
size_t length_;
|
||||
size_t null_count_;
|
||||
const uint8_t* bitmap_;
|
||||
};
|
||||
|
||||
class SparsePageAdapterBatch {
|
||||
HostSparsePageView page_;
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user