* Fix CPU hist init for sparse dataset. * Implement sparse histogram cut. * Allow empty features. * Fix windows build, don't use sparse in distributed environment. * Comments. * Smaller threshold. * Fix windows omp. * Fix msvc lambda capture. * Fix MSVC macro. * Fix MSVC initialization list. * Fix MSVC initialization list x2. * Preserve categorical feature behavior. * Rename matrix to sparse cuts. * Reuse UseGroup. * Check for categorical data when adding cut. Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu> * Sanity check. * Fix comments. * Fix comment.
56 lines
1.7 KiB
C++
56 lines
1.7 KiB
C++
#include "../../../src/common/compressed_iterator.h"
|
|
#include "gtest/gtest.h"
|
|
#include <algorithm>
|
|
|
|
namespace xgboost {
|
|
namespace common {
|
|
TEST(CompressedIterator, Test) {
|
|
ASSERT_TRUE(detail::SymbolBits(256) == 8);
|
|
ASSERT_TRUE(detail::SymbolBits(150) == 8);
|
|
std::vector<int> test_cases = {1, 3, 426, 21, 64, 256, 100000, INT32_MAX};
|
|
int num_elements = 1000;
|
|
int repetitions = 1000;
|
|
srand(9);
|
|
|
|
for (auto alphabet_size : test_cases) {
|
|
for (int i = 0; i < repetitions; i++) {
|
|
std::vector<int> input(num_elements);
|
|
std::generate(input.begin(), input.end(),
|
|
[=]() { return rand() % alphabet_size; });
|
|
CompressedBufferWriter cbw(alphabet_size);
|
|
|
|
// Test write entire array
|
|
std::vector<unsigned char> buffer(
|
|
CompressedBufferWriter::CalculateBufferSize(input.size(),
|
|
alphabet_size));
|
|
|
|
cbw.Write(buffer.data(), input.begin(), input.end());
|
|
|
|
CompressedIterator<int> ci(buffer.data(), alphabet_size);
|
|
std::vector<int> output(input.size());
|
|
for (size_t i = 0; i < input.size(); i++) {
|
|
output[i] = ci[i];
|
|
}
|
|
|
|
ASSERT_TRUE(input == output);
|
|
|
|
// Test write Symbol
|
|
std::vector<unsigned char> buffer2(
|
|
CompressedBufferWriter::CalculateBufferSize(input.size(),
|
|
alphabet_size));
|
|
for (size_t i = 0; i < input.size(); i++) {
|
|
cbw.WriteSymbol(buffer2.data(), input[i], i);
|
|
}
|
|
CompressedIterator<int> ci2(buffer.data(), alphabet_size);
|
|
std::vector<int> output2(input.size());
|
|
for (size_t i = 0; i < input.size(); i++) {
|
|
output2[i] = ci2[i];
|
|
}
|
|
ASSERT_TRUE(input == output2);
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace common
|
|
} // namespace xgboost
|