Support column major array. (#6765)

This commit is contained in:
Jiaming Yuan
2021-03-20 05:19:46 +08:00
committed by GitHub
parent f6fe15d11f
commit 4ee8340e79
9 changed files with 181 additions and 151 deletions

View File

@@ -138,5 +138,4 @@ TEST(Adapter, IteratorAdapter) {
}
ASSERT_EQ(num_batch, 1);
}
} // namespace xgboost

View File

@@ -38,17 +38,28 @@ TEST(ArrayInterface, Error) {
Json(Boolean(false))};
auto const& column_obj = get<Object>(column);
std::pair<size_t, size_t> shape{kRows, kCols};
std::string typestr{"<f4"};
// missing version
EXPECT_THROW(ArrayInterfaceHandler::ExtractData<float>(column_obj), dmlc::Error);
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj,
StringView{typestr}, shape),
dmlc::Error);
column["version"] = Integer(static_cast<Integer::Int>(1));
// missing data
EXPECT_THROW(ArrayInterfaceHandler::ExtractData<float>(column_obj), dmlc::Error);
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj,
StringView{typestr}, shape),
dmlc::Error);
column["data"] = j_data;
// missing typestr
EXPECT_THROW(ArrayInterfaceHandler::ExtractData<float>(column_obj), dmlc::Error);
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj,
StringView{typestr}, shape),
dmlc::Error);
column["typestr"] = String("<f4");
// nullptr is not valid
EXPECT_THROW(ArrayInterfaceHandler::ExtractData<float>(column_obj), dmlc::Error);
EXPECT_THROW(ArrayInterfaceHandler::ExtractData(column_obj,
StringView{typestr}, shape),
dmlc::Error);
HostDeviceVector<float> storage;
auto array = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);
@@ -56,7 +67,23 @@ TEST(ArrayInterface, Error) {
Json(Integer(reinterpret_cast<Integer::Int>(storage.ConstHostPointer()))),
Json(Boolean(false))};
column["data"] = j_data;
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData<float>(column_obj));
EXPECT_NO_THROW(ArrayInterfaceHandler::ExtractData(
column_obj, StringView{typestr}, shape));
}
TEST(ArrayInterface, GetElement) {
size_t kRows = 4, kCols = 2;
HostDeviceVector<float> storage;
auto intefrace_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);
ArrayInterface array_interface{intefrace_str};
auto const& h_storage = storage.ConstHostVector();
for (size_t i = 0; i < kRows; ++i) {
for (size_t j = 0; j < kCols; ++j) {
float v0 = array_interface.GetElement(i, j);
float v1 = h_storage.at(i * kCols + j);
ASSERT_EQ(v0, v1);
}
}
}
} // namespace xgboost

View File

@@ -210,9 +210,13 @@ class TestGPUPredict:
cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)
def predict_df(x):
inplace_predt = booster.inplace_predict(x)
# column major array
inplace_predt = booster.inplace_predict(x.values)
d = xgb.DMatrix(x)
copied_predt = cp.array(booster.predict(d))
assert cp.all(copied_predt == inplace_predt)
inplace_predt = booster.inplace_predict(x)
return cp.all(copied_predt == inplace_predt)
for i in range(10):

View File

@@ -2,7 +2,10 @@
from concurrent.futures import ThreadPoolExecutor
import numpy as np
from scipy import sparse
import pytest
import pandas as pd
import testing as tm
import xgboost as xgb
@@ -147,6 +150,19 @@ class TestInplacePredict:
for i in range(10):
run_threaded_predict(X, self.rows, predict_csr)
@pytest.mark.skipif(**tm.no_pandas())
def test_predict_pd(self):
X = self.X
# construct it in column major style
df = pd.DataFrame({str(i): X[:, i] for i in range(X.shape[1])})
booster = self.booster
df_predt = booster.inplace_predict(df)
arr_predt = booster.inplace_predict(X)
dmat_predt = booster.predict(xgb.DMatrix(X))
np.testing.assert_allclose(dmat_predt, arr_predt)
np.testing.assert_allclose(df_predt, arr_predt)
def test_base_margin(self):
booster = self.booster