Complete cudf support. (#4850)
* Handles missing value. * Accept all floating point and integer types. * Move to cudf 9.0 API. * Remove requirement on `null_count`. * Arbitrary column types support.
This commit is contained in:
@@ -7,11 +7,13 @@
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(MetaInfo, FromInterface) {
|
||||
cudaSetDevice(0);
|
||||
constexpr size_t kRows = 16;
|
||||
|
||||
thrust::device_vector<float> d_data(kRows);
|
||||
template <typename T>
|
||||
std::string PrepareData(std::string typestr, thrust::device_vector<T>* out) {
|
||||
constexpr size_t kRows = 16;
|
||||
out->resize(kRows);
|
||||
auto& d_data = *out;
|
||||
|
||||
for (size_t i = 0; i < d_data.size(); ++i) {
|
||||
d_data[i] = i * 2.0;
|
||||
}
|
||||
@@ -22,7 +24,7 @@ TEST(MetaInfo, FromInterface) {
|
||||
column["shape"] = Array(j_shape);
|
||||
column["strides"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(4)))});
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
column["typestr"] = String("<f4");
|
||||
column["typestr"] = String(typestr);
|
||||
|
||||
auto p_d_data = dh::Raw(d_data);
|
||||
std::vector<Json> j_data {
|
||||
@@ -34,6 +36,15 @@ TEST(MetaInfo, FromInterface) {
|
||||
Json::Dump(column, &ss);
|
||||
std::string str = ss.str();
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
TEST(MetaInfo, FromInterface) {
|
||||
cudaSetDevice(0);
|
||||
thrust::device_vector<float> d_data;
|
||||
|
||||
std::string str = PrepareData<float>("<f4", &d_data);
|
||||
|
||||
MetaInfo info;
|
||||
info.SetInfo("label", str.c_str());
|
||||
|
||||
@@ -53,5 +64,22 @@ TEST(MetaInfo, FromInterface) {
|
||||
for (size_t i = 0; i < d_data.size(); ++i) {
|
||||
ASSERT_EQ(h_base_margin[i], d_data[i]);
|
||||
}
|
||||
|
||||
EXPECT_ANY_THROW({info.SetInfo("group", str.c_str());});
|
||||
}
|
||||
|
||||
TEST(MetaInfo, Group) {
|
||||
cudaSetDevice(0);
|
||||
thrust::device_vector<uint32_t> d_data;
|
||||
std::string str = PrepareData<uint32_t>("<u4", &d_data);
|
||||
|
||||
MetaInfo info;
|
||||
|
||||
info.SetInfo("group", str.c_str());
|
||||
auto const& h_group = info.group_ptr_;
|
||||
ASSERT_EQ(h_group.size(), d_data.size() + 1);
|
||||
for (size_t i = 1; i < h_group.size(); ++i) {
|
||||
ASSERT_EQ(h_group[i], d_data[i-1] + h_group[i-1]) << "i: " << i;
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -8,17 +8,48 @@
|
||||
#include "../../../src/common/bitfield.h"
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include "../../../src/data/simple_csr_source.h"
|
||||
#include "../../../src/data/columnar.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TEST(SimpleCSRSource, FromColumnarDense) {
|
||||
constexpr size_t kRows = 16;
|
||||
TEST(ArrayInterfaceHandler, Error) {
|
||||
constexpr size_t kRows {16};
|
||||
Json column { Object() };
|
||||
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
||||
column["shape"] = Array(j_shape);
|
||||
std::vector<Json> j_data {
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(nullptr))),
|
||||
Json(Boolean(false))};
|
||||
|
||||
auto const& column_obj = get<Object>(column);
|
||||
// missing version
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractArray<float>(column_obj), dmlc::Error);
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
// missing data
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractArray<float>(column_obj), dmlc::Error);
|
||||
column["data"] = j_data;
|
||||
// missing typestr
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractArray<float>(column_obj), dmlc::Error);
|
||||
column["typestr"] = String("<f4");
|
||||
// nullptr is not valid
|
||||
EXPECT_THROW(ArrayInterfaceHandler::ExtractArray<float>(column_obj), dmlc::Error);
|
||||
thrust::device_vector<float> d_data(kRows);
|
||||
j_data = {Json(Integer(reinterpret_cast<Integer::Int>(d_data.data().get()))),
|
||||
Json(Boolean(false))};
|
||||
column["data"] = j_data;
|
||||
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractArray<float>(column_obj));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Json GenerateDenseColumn(std::string const& typestr, size_t kRows,
|
||||
thrust::device_vector<T>* out_d_data) {
|
||||
auto& d_data = *out_d_data;
|
||||
Json column { Object() };
|
||||
std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
|
||||
column["shape"] = Array(j_shape);
|
||||
column["strides"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(4)))});
|
||||
|
||||
thrust::device_vector<float> d_data(kRows);
|
||||
d_data.resize(kRows);
|
||||
for (size_t i = 0; i < d_data.size(); ++i) {
|
||||
d_data[i] = i * 2.0;
|
||||
}
|
||||
@@ -26,39 +57,91 @@ TEST(SimpleCSRSource, FromColumnarDense) {
|
||||
auto p_d_data = dh::Raw(d_data);
|
||||
|
||||
std::vector<Json> j_data {
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
|
||||
Json(Boolean(false))};
|
||||
column["data"] = j_data;
|
||||
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
column["typestr"] = String("<f4");
|
||||
Json column_arr {Array{std::vector<Json>{column}}};
|
||||
column["typestr"] = String(typestr);
|
||||
return column;
|
||||
}
|
||||
|
||||
TEST(SimpleCSRSource, FromColumnarDense) {
|
||||
constexpr size_t kRows {16};
|
||||
constexpr size_t kCols {2};
|
||||
std::vector<Json> columns;
|
||||
thrust::device_vector<float> d_data_0(kRows);
|
||||
thrust::device_vector<int32_t> d_data_1(kRows);
|
||||
columns.emplace_back(GenerateDenseColumn<float>("<f4", kRows, &d_data_0));
|
||||
columns.emplace_back(GenerateDenseColumn<int32_t>("<i4", kRows, &d_data_1));
|
||||
|
||||
Json column_arr {columns};
|
||||
|
||||
std::stringstream ss;
|
||||
Json::Dump(column_arr, &ss);
|
||||
std::string str = ss.str();
|
||||
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
source->CopyFrom(str.c_str());
|
||||
// no missing value
|
||||
{
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
source->CopyFrom(str.c_str(), false);
|
||||
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
auto const& offset = source->page_.offset.HostVector();
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
auto e = data[i];
|
||||
ASSERT_NEAR(e.fvalue, i * 2.0, kRtEps);
|
||||
ASSERT_EQ(e.index, 0); // feature 0
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
auto const& offset = source->page_.offset.HostVector();
|
||||
for (size_t i = 0; i < kRows; i++) {
|
||||
auto const idx = i * kCols;
|
||||
auto const e_0 = data.at(idx);
|
||||
ASSERT_NEAR(e_0.fvalue, i * 2.0, kRtEps) << "idx: " << idx;
|
||||
ASSERT_EQ(e_0.index, 0); // feature 0
|
||||
|
||||
auto e_1 = data.at(idx+1);
|
||||
ASSERT_NEAR(e_1.fvalue, i * 2.0, kRtEps);
|
||||
ASSERT_EQ(e_1.index, 1); // feature 1
|
||||
}
|
||||
ASSERT_EQ(offset.back(), kRows * kCols);
|
||||
for (size_t i = 0; i < kRows + 1; ++i) {
|
||||
ASSERT_EQ(offset[i], i * kCols);
|
||||
}
|
||||
ASSERT_EQ(source->info.num_row_, kRows);
|
||||
ASSERT_EQ(source->info.num_col_, kCols);
|
||||
}
|
||||
ASSERT_EQ(offset.back(), 16);
|
||||
for (size_t i = 0; i < kRows + 1; ++i) {
|
||||
ASSERT_EQ(offset[i], i);
|
||||
|
||||
// with missing value specified
|
||||
{
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
source->CopyFrom(str.c_str(), true, 4.0);
|
||||
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
auto const& offset = source->page_.offset.HostVector();
|
||||
ASSERT_EQ(data.size(), kRows * kCols - 2);
|
||||
ASSERT_NEAR(data[4].fvalue, 6.0, kRtEps); // kCols * 2
|
||||
ASSERT_EQ(offset.back(), 30);
|
||||
for (size_t i = 3; i < kRows + 1; ++i) {
|
||||
ASSERT_EQ(offset[i], (i - 1) * 2);
|
||||
}
|
||||
ASSERT_EQ(source->info.num_row_, kRows);
|
||||
ASSERT_EQ(source->info.num_col_, kCols);
|
||||
}
|
||||
|
||||
{
|
||||
// no missing value, but has NaN
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
d_data_0[3] = std::numeric_limits<float>::quiet_NaN();
|
||||
ASSERT_TRUE(std::isnan(d_data_0[3])); // removes 6.0
|
||||
source->CopyFrom(str.c_str(), false);
|
||||
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
auto const& offset = source->page_.offset.HostVector();
|
||||
ASSERT_EQ(data.size(), kRows * kCols - 1);
|
||||
ASSERT_NEAR(data[7].fvalue, 8.0, kRtEps);
|
||||
ASSERT_EQ(source->info.num_row_, kRows);
|
||||
ASSERT_EQ(source->info.num_col_, kCols);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SimpleCSRSource, FromColumnarWithEmptyRows) {
|
||||
// In this test we construct a data storage similar to cudf
|
||||
constexpr size_t kRows = 102;
|
||||
constexpr size_t kCols = 24;
|
||||
constexpr size_t kMissingRows = 3;
|
||||
|
||||
std::vector<Json> v_columns (kCols);
|
||||
std::vector<dh::device_vector<float>> columns_data(kCols);
|
||||
@@ -90,6 +173,7 @@ TEST(SimpleCSRSource, FromColumnarWithEmptyRows) {
|
||||
// Construct the mask object.
|
||||
col["mask"] = Object();
|
||||
auto& j_mask = col["mask"];
|
||||
j_mask["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
auto& mask_storage = column_bitfields[i];
|
||||
mask_storage.resize(16); // 16 bytes
|
||||
|
||||
@@ -111,7 +195,6 @@ TEST(SimpleCSRSource, FromColumnarWithEmptyRows) {
|
||||
Json(Boolean(false))};
|
||||
j_mask["shape"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(16)))});
|
||||
j_mask["typestr"] = String("|i1");
|
||||
j_mask["null_count"] = Json(Integer(static_cast<Integer::Int>(kMissingRows)));
|
||||
}
|
||||
|
||||
Json column_arr {Array(v_columns)};
|
||||
@@ -119,7 +202,7 @@ TEST(SimpleCSRSource, FromColumnarWithEmptyRows) {
|
||||
Json::Dump(column_arr, &ss);
|
||||
std::string str = ss.str();
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
source->CopyFrom(str.c_str());
|
||||
source->CopyFrom(str.c_str(), false);
|
||||
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
auto const& offset = source->page_.offset.HostVector();
|
||||
@@ -131,6 +214,7 @@ TEST(SimpleCSRSource, FromColumnarWithEmptyRows) {
|
||||
ASSERT_NEAR(data[j].fvalue, i - 1, kRtEps);
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(source->info.num_row_, kRows);
|
||||
}
|
||||
|
||||
TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
@@ -149,6 +233,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
for (size_t j = 0; j < mask.size(); ++j) {
|
||||
mask[j] = ~0;
|
||||
}
|
||||
// the 2^th entry of first column is invalid
|
||||
// [0 0 0 0 0 1 0 0]
|
||||
mask[0] = ~(kUCOne << 2);
|
||||
}
|
||||
{
|
||||
@@ -159,6 +245,8 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
for (size_t j = 0; j < mask.size(); ++j) {
|
||||
mask[j] = ~0;
|
||||
}
|
||||
// the 19^th entry of second column is invalid
|
||||
// [~0~], [~0~], [0 0 0 0 1 0 0 0]
|
||||
mask[2] = ~(kUCOne << 3);
|
||||
}
|
||||
|
||||
@@ -186,12 +274,12 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
|
||||
column["mask"] = Object();
|
||||
auto& j_mask = column["mask"];
|
||||
j_mask["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
j_mask["data"] = std::vector<Json>{
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(column_bitfields[c].data().get()))),
|
||||
Json(Boolean(false))};
|
||||
j_mask["shape"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(8)))});
|
||||
j_mask["typestr"] = String("|i1");
|
||||
j_mask["null_count"] = Json(Integer(static_cast<Integer::Int>(1)));
|
||||
}
|
||||
|
||||
Json column_arr {Array(j_columns)};
|
||||
@@ -200,17 +288,64 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
Json::Dump(column_arr, &ss);
|
||||
std::string str = ss.str();
|
||||
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
source->CopyFrom(str.c_str());
|
||||
{
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
source->CopyFrom(str.c_str(), false);
|
||||
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
auto const& offset = source->page_.offset.HostVector();
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
auto const& offset = source->page_.offset.HostVector();
|
||||
|
||||
ASSERT_EQ(offset.size(), kRows + 1);
|
||||
ASSERT_EQ(data[4].index, 1);
|
||||
ASSERT_EQ(data[4].fvalue, 2);
|
||||
ASSERT_EQ(data[37].index, 0);
|
||||
ASSERT_EQ(data[37].fvalue, 19);
|
||||
ASSERT_EQ(offset.size(), kRows + 1);
|
||||
ASSERT_EQ(data[4].index, 1);
|
||||
ASSERT_EQ(data[4].fvalue, 2);
|
||||
ASSERT_EQ(data[37].index, 0);
|
||||
ASSERT_EQ(data[37].fvalue, 19);
|
||||
}
|
||||
|
||||
{
|
||||
// with missing value
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
source->CopyFrom(str.c_str(), true, /*missing=*/2.0);
|
||||
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
ASSERT_NE(data[4].fvalue, 2.0);
|
||||
}
|
||||
|
||||
{
|
||||
// no missing value, but has NaN
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
columns_data[0][4] = std::numeric_limits<float>::quiet_NaN(); // 0^th column 4^th row
|
||||
ASSERT_TRUE(std::isnan(columns_data[0][4]));
|
||||
source->CopyFrom(str.c_str(), false);
|
||||
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
auto const& offset = source->page_.offset.HostVector();
|
||||
// Two invalid entries and one NaN, in CSC
|
||||
// 0^th column: 0, 1, 4, 5, 6, ..., kRows
|
||||
// 1^th column: 0, 1, 2, 3, ..., 19, 21, ..., kRows
|
||||
// Turning it into CSR:
|
||||
// | 0, 0 | 1, 1 | 2 | 3, 3 | 4 | ...
|
||||
ASSERT_EQ(data.size(), kRows * kCols - 3);
|
||||
ASSERT_EQ(data[4].index, 1); // from 1^th column
|
||||
ASSERT_EQ(data[5].fvalue, 3.0);
|
||||
ASSERT_EQ(data[7].index, 1); // from 1^th column
|
||||
ASSERT_EQ(data[7].fvalue, 4.0);
|
||||
|
||||
ASSERT_EQ(data[offset[2]].fvalue, 2.0);
|
||||
ASSERT_EQ(data[offset[4]].fvalue, 4.0);
|
||||
}
|
||||
|
||||
{
|
||||
// with NaN as missing value
|
||||
// NaN is already set up by above test
|
||||
std::unique_ptr<data::SimpleCSRSource> source (new data::SimpleCSRSource());
|
||||
source->CopyFrom(str.c_str(), true,
|
||||
/*missing=*/std::numeric_limits<float>::quiet_NaN());
|
||||
|
||||
auto const& data = source->page_.data.HostVector();
|
||||
ASSERT_EQ(data.size(), kRows * kCols - 1);
|
||||
ASSERT_EQ(data[8].fvalue, 4.0);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
@@ -1,5 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
|
||||
xgb.rabit.init()
|
||||
|
||||
@@ -59,6 +60,7 @@ X = [
|
||||
4415.50,22731.62,1.00,55.00,0.00,499.94,22.00,0.58,67.00,0.21,341.72,16.00,0.00,965.07,
|
||||
17.00,138.41,0.00,0.00,1.00,0.14,1.00,0.02,0.35,1.69,369.00,1300.00,25.00,0.00,0.01,
|
||||
0.00,0.00,0.00,0.00,52.00,8.00]]
|
||||
X = np.array(X)
|
||||
y = [1, 0]
|
||||
|
||||
dtrain = xgb.DMatrix(X, label=y)
|
||||
|
||||
@@ -6,6 +6,35 @@ sys.path.append("tests/python")
|
||||
import testing as tm
|
||||
|
||||
|
||||
def dmatrix_from_cudf(input_type, missing=np.NAN):
|
||||
'''Test constructing DMatrix from cudf'''
|
||||
import cudf
|
||||
import pandas as pd
|
||||
|
||||
kRows = 80
|
||||
kCols = 3
|
||||
|
||||
na = np.random.randn(kRows, kCols)
|
||||
na[:, 0:2] = na[:, 0:2].astype(input_type)
|
||||
|
||||
na[5, 0] = missing
|
||||
na[3, 1] = missing
|
||||
|
||||
pa = pd.DataFrame({'0': na[:, 0],
|
||||
'1': na[:, 1],
|
||||
'2': na[:, 2].astype(np.int32)})
|
||||
|
||||
np_label = np.random.randn(kRows).astype(input_type)
|
||||
pa_label = pd.DataFrame(np_label)
|
||||
|
||||
cd: cudf.DataFrame = cudf.from_pandas(pa)
|
||||
cd_label: cudf.DataFrame = cudf.from_pandas(pa_label)
|
||||
|
||||
dtrain = xgb.DMatrix(cd, missing=missing, label=cd_label)
|
||||
assert dtrain.num_col() == kCols
|
||||
assert dtrain.num_row() == kRows
|
||||
|
||||
|
||||
class TestFromColumnar:
|
||||
'''Tests for constructing DMatrix from data structure conforming Apache
|
||||
Arrow specification.'''
|
||||
@@ -13,30 +42,13 @@ Arrow specification.'''
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
def test_from_cudf(self):
|
||||
'''Test constructing DMatrix from cudf'''
|
||||
import cudf
|
||||
import pandas as pd
|
||||
dmatrix_from_cudf(np.float32, np.NAN)
|
||||
dmatrix_from_cudf(np.float64, np.NAN)
|
||||
|
||||
kRows = 80
|
||||
kCols = 2
|
||||
dmatrix_from_cudf(np.uint8, 2)
|
||||
dmatrix_from_cudf(np.uint32, 3)
|
||||
dmatrix_from_cudf(np.uint64, 4)
|
||||
|
||||
na = np.random.randn(kRows, kCols).astype(np.float32)
|
||||
na[3, 1] = np.NAN
|
||||
na[5, 0] = np.NAN
|
||||
|
||||
pa = pd.DataFrame(na)
|
||||
|
||||
np_label = np.random.randn(kRows).astype(np.float32)
|
||||
pa_label = pd.DataFrame(np_label)
|
||||
|
||||
names = []
|
||||
|
||||
for i in range(0, kCols):
|
||||
names.append(str(i))
|
||||
pa.columns = names
|
||||
|
||||
cd: cudf.DataFrame = cudf.from_pandas(pa)
|
||||
cd_label: cudf.DataFrame = cudf.from_pandas(pa_label)
|
||||
|
||||
dtrain = xgb.DMatrix(cd, label=cd_label)
|
||||
assert dtrain.num_col() == kCols
|
||||
assert dtrain.num_row() == kRows
|
||||
dmatrix_from_cudf(np.int8, 2)
|
||||
dmatrix_from_cudf(np.int32, -2)
|
||||
dmatrix_from_cudf(np.int64, -3)
|
||||
|
||||
@@ -67,17 +67,17 @@ class TestBasic(unittest.TestCase):
|
||||
def test_np_view(self):
|
||||
# Sliced Float32 array
|
||||
y = np.array([12, 34, 56], np.float32)[::2]
|
||||
from_view = xgb.DMatrix([], label=y).get_label()
|
||||
from_array = xgb.DMatrix([], label=y + 0).get_label()
|
||||
from_view = xgb.DMatrix(np.array([[]]), label=y).get_label()
|
||||
from_array = xgb.DMatrix(np.array([[]]), label=y + 0).get_label()
|
||||
assert (from_view.shape == from_array.shape)
|
||||
assert (from_view == from_array).all()
|
||||
|
||||
# Sliced UInt array
|
||||
z = np.array([12, 34, 56], np.uint32)[::2]
|
||||
dmat = xgb.DMatrix([])
|
||||
dmat = xgb.DMatrix(np.array([[]]))
|
||||
dmat.set_uint_info('root_index', z)
|
||||
from_view = dmat.get_uint_info('root_index')
|
||||
dmat = xgb.DMatrix([])
|
||||
dmat = xgb.DMatrix(np.array([[]]))
|
||||
dmat.set_uint_info('root_index', z + 0)
|
||||
from_array = dmat.get_uint_info('root_index')
|
||||
assert (from_view.shape == from_array.shape)
|
||||
@@ -256,7 +256,7 @@ class TestBasic(unittest.TestCase):
|
||||
assert dm.num_row() == 5
|
||||
assert dm.num_col() == 5
|
||||
|
||||
data = np.matrix([[1, 2], [3, 4]])
|
||||
data = np.array([[1, 2], [3, 4]])
|
||||
dm = xgb.DMatrix(data)
|
||||
assert dm.num_row() == 2
|
||||
assert dm.num_col() == 2
|
||||
@@ -430,4 +430,3 @@ class TestBasicPathLike(unittest.TestCase):
|
||||
|
||||
# invalid values raise Type error
|
||||
self.assertRaises(TypeError, xgb.compat.os_fspath, 123)
|
||||
|
||||
|
||||
@@ -69,8 +69,8 @@ class TestUpdaters(unittest.TestCase):
|
||||
nan = np.nan
|
||||
param = {'missing': nan, 'tree_method': 'hist'}
|
||||
model = xgb.XGBRegressor(**param)
|
||||
X = [[6.18827160e+05, 1.73000000e+02], [6.37345679e+05, nan],
|
||||
[6.38888889e+05, nan], [6.28086420e+05, nan]]
|
||||
X = np.array([[6.18827160e+05, 1.73000000e+02], [6.37345679e+05, nan],
|
||||
[6.38888889e+05, nan], [6.28086420e+05, nan]])
|
||||
y = [1000000., 0., 0., 500000.]
|
||||
w = [0, 0, 1, 0]
|
||||
model.fit(X, y, sample_weight=w)
|
||||
|
||||
@@ -19,7 +19,7 @@ pytestmark = pytest.mark.skipif(**tm.no_dask())
|
||||
|
||||
def run_train():
|
||||
# Contains one label equal to rank
|
||||
dmat = xgb.DMatrix([[0]], label=[xgb.rabit.get_rank()])
|
||||
dmat = xgb.DMatrix(np.array([[0]]), label=[xgb.rabit.get_rank()])
|
||||
bst = xgb.train({"eta": 1.0, "lambda": 0.0}, dmat, 1)
|
||||
pred = bst.predict(dmat)
|
||||
expected_result = np.average(range(xgb.rabit.get_world_size()))
|
||||
@@ -78,7 +78,7 @@ def test_get_local_data(client):
|
||||
|
||||
def run_sklearn():
|
||||
# Contains one label equal to rank
|
||||
X = [[0]]
|
||||
X = np.array([[0]])
|
||||
y = [xgb.rabit.get_rank()]
|
||||
model = xgb.XGBRegressor(learning_rate=1.0)
|
||||
model.fit(X, y)
|
||||
|
||||
@@ -393,7 +393,8 @@ def test_sklearn_nfolds_cv():
|
||||
nfolds = 5
|
||||
skf = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=seed)
|
||||
|
||||
cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed, as_pandas=True)
|
||||
cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds,
|
||||
seed=seed, as_pandas=True)
|
||||
cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds,
|
||||
folds=skf, seed=seed, as_pandas=True)
|
||||
cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds,
|
||||
|
||||
Reference in New Issue
Block a user