Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
522b8977c2 | ||
|
|
8147d78b12 | ||
|
|
651c4ac03b | ||
|
|
1fb75d2460 | ||
|
|
6609211517 | ||
|
|
b78ad1e623 | ||
|
|
96f8843694 | ||
|
|
a6d1fbf8d1 |
5
.github/workflows/main.yml
vendored
5
.github/workflows/main.yml
vendored
@@ -31,7 +31,10 @@ jobs:
|
||||
- name: Run gtest binary
|
||||
run: |
|
||||
cd build
|
||||
ctest --exclude-regex AllTestsInDMLCUnitTests --extra-verbose
|
||||
# libomp internal error:
|
||||
# OMP: Error #131: Thread identifier invalid.
|
||||
./testxgboost --gtest_filter="-HistIndexCreationWithExternalMemory.Test"
|
||||
ctest -R TestXGBoostCLI --extra-verbose
|
||||
|
||||
gtest-cpu-nonomp:
|
||||
name: Test Google C++ unittest (CPU Non-OMP)
|
||||
|
||||
12
.travis.yml
12
.travis.yml
@@ -19,18 +19,10 @@ jobs:
|
||||
env: TASK=java_test
|
||||
|
||||
# dependent brew packages
|
||||
# the dependencies from homebrew is installed manually from setup script due to outdated image from travis.
|
||||
addons:
|
||||
homebrew:
|
||||
packages:
|
||||
- cmake
|
||||
- libomp
|
||||
- graphviz
|
||||
- openssl
|
||||
- libgit2
|
||||
- lz4
|
||||
- wget
|
||||
- r
|
||||
update: true
|
||||
update: false
|
||||
apt:
|
||||
packages:
|
||||
- snapd
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.4.1)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.4.2)
|
||||
include(cmake/Utils.cmake)
|
||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||
cmake_policy(SET CMP0022 NEW)
|
||||
|
||||
5
Jenkinsfile
vendored
5
Jenkinsfile
vendored
@@ -179,10 +179,10 @@ def BuildCPUARM64() {
|
||||
${dockerRun} ${container_type} ${docker_binary} bash -c "cd build && ctest --extra-verbose"
|
||||
${dockerRun} ${container_type} ${docker_binary} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
|
||||
${dockerRun} ${container_type} ${docker_binary} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} ${wheel_tag}
|
||||
${dockerRun} ${container_type} ${docker_binary} auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl
|
||||
${dockerRun} ${container_type} ${docker_binary} bash -c "auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl && python tests/ci_build/rename_whl.py wheelhouse/*.whl ${commit_id} ${wheel_tag}"
|
||||
mv -v wheelhouse/*.whl python-package/dist/
|
||||
# Make sure that libgomp.so is vendored in the wheel
|
||||
${dockerRun} ${container_type} ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
|
||||
${dockerRun} ${container_type} ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
|
||||
"""
|
||||
echo 'Stashing Python wheel...'
|
||||
stash name: "xgboost_whl_arm64_cpu", includes: 'python-package/dist/*.whl'
|
||||
@@ -231,6 +231,7 @@ def BuildCUDA(args) {
|
||||
if (args.cuda_version == ref_cuda_ver) {
|
||||
sh """
|
||||
${dockerRun} auditwheel_x86_64 ${docker_binary} auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py wheelhouse/*.whl ${commit_id} ${wheel_tag}
|
||||
mv -v wheelhouse/*.whl python-package/dist/
|
||||
# Make sure that libgomp.so is vendored in the wheel
|
||||
${dockerRun} auditwheel_x86_64 ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: Extreme Gradient Boosting
|
||||
Version: 1.4.1.1
|
||||
Version: 1.4.2.1
|
||||
Date: 2020-08-28
|
||||
Authors@R: c(
|
||||
person("Tianqi", "Chen", role = c("aut"),
|
||||
|
||||
@@ -49,7 +49,7 @@ def download_wheels(
|
||||
dir_URL: str,
|
||||
src_filename_prefix: str,
|
||||
target_filename_prefix: str,
|
||||
) -> None:
|
||||
) -> List[str]:
|
||||
"""Download all binary wheels. dir_URL is the URL for remote directory storing the release
|
||||
wheels
|
||||
|
||||
@@ -72,6 +72,8 @@ def download_wheels(
|
||||
assert stderr.find("warning") == -1, "Unresolved warnings:\n" + stderr
|
||||
assert stdout.find("warning") == -1, "Unresolved warnings:\n" + stdout
|
||||
|
||||
return filenames
|
||||
|
||||
|
||||
def check_path():
|
||||
root = os.path.abspath(os.path.curdir)
|
||||
|
||||
@@ -6,6 +6,6 @@
|
||||
|
||||
#define XGBOOST_VER_MAJOR 1
|
||||
#define XGBOOST_VER_MINOR 4
|
||||
#define XGBOOST_VER_PATCH 1
|
||||
#define XGBOOST_VER_PATCH 2
|
||||
|
||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||
|
||||
@@ -84,8 +84,9 @@ if __name__ == "__main__":
|
||||
|
||||
print("building Java wrapper")
|
||||
with cd(".."):
|
||||
maybe_makedirs("build")
|
||||
with cd("build"):
|
||||
build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' else 'build'
|
||||
maybe_makedirs(build_dir)
|
||||
with cd(build_dir):
|
||||
if sys.platform == "win32":
|
||||
# Force x64 build on Windows.
|
||||
maybe_generator = ' -A x64'
|
||||
@@ -114,6 +115,9 @@ if __name__ == "__main__":
|
||||
if gpu_arch_flag is not None:
|
||||
args.append("%s" % gpu_arch_flag)
|
||||
|
||||
lib_dir = os.path.join(os.pardir, 'lib')
|
||||
if os.path.exists(lib_dir):
|
||||
shutil.rmtree(lib_dir)
|
||||
run("cmake .. " + " ".join(args) + maybe_generator)
|
||||
run("cmake --build . --config Release" + maybe_parallel_build)
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>XGBoost JVM Package</name>
|
||||
<description>JVM Package for XGBoost</description>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
<packaging>jar</packaging>
|
||||
<build>
|
||||
<plugins>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
@@ -37,7 +37,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../xgboost4j/src/
|
||||
1
jvm-packages/xgboost4j-gpu/src/main/java
Symbolic link
1
jvm-packages/xgboost4j-gpu/src/main/java
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../xgboost4j/src/main/java/
|
||||
@@ -0,0 +1 @@
|
||||
../../../../xgboost4j/src/main/resources/xgboost4j-version.properties
|
||||
1
jvm-packages/xgboost4j-gpu/src/main/scala
Symbolic link
1
jvm-packages/xgboost4j-gpu/src/main/scala
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../xgboost4j/src/main/scala/
|
||||
1
jvm-packages/xgboost4j-gpu/src/native
Symbolic link
1
jvm-packages/xgboost4j-gpu/src/native
Symbolic link
@@ -0,0 +1 @@
|
||||
../../xgboost4j/src/native
|
||||
1
jvm-packages/xgboost4j-gpu/src/test
Symbolic link
1
jvm-packages/xgboost4j-gpu/src/test
Symbolic link
@@ -0,0 +1 @@
|
||||
../../xgboost4j/src/test
|
||||
@@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||
<build>
|
||||
@@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../xgboost4j-spark/src/
|
||||
1
jvm-packages/xgboost4j-spark-gpu/src/main/scala
Symbolic link
1
jvm-packages/xgboost4j-spark-gpu/src/main/scala
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../xgboost4j-spark/src/main/scala
|
||||
1
jvm-packages/xgboost4j-spark-gpu/src/test
Symbolic link
1
jvm-packages/xgboost4j-spark-gpu/src/test
Symbolic link
@@ -0,0 +1 @@
|
||||
../../xgboost4j-spark/src/test
|
||||
@@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||
<build>
|
||||
@@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j_2.12</artifactId>
|
||||
<version>1.4.1</version>
|
||||
<version>1.4.2</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@@ -1 +1 @@
|
||||
1.4.1
|
||||
1.4.2
|
||||
|
||||
@@ -229,6 +229,9 @@ def _numpy2ctypes_type(dtype):
|
||||
|
||||
|
||||
def _array_interface(data: np.ndarray) -> bytes:
|
||||
assert (
|
||||
data.dtype.hasobject is False
|
||||
), "Input data contains `object` dtype. Expecting numeric data."
|
||||
interface = data.__array_interface__
|
||||
if "mask" in interface:
|
||||
interface["mask"] = interface["mask"].__array_interface__
|
||||
@@ -1841,8 +1844,8 @@ class Booster(object):
|
||||
)
|
||||
|
||||
if isinstance(data, np.ndarray):
|
||||
from .data import _maybe_np_slice
|
||||
data = _maybe_np_slice(data, data.dtype)
|
||||
from .data import _ensure_np_dtype
|
||||
data, _ = _ensure_np_dtype(data, data.dtype)
|
||||
_check_call(
|
||||
_LIB.XGBoosterPredictFromDense(
|
||||
self.handle,
|
||||
@@ -1872,7 +1875,9 @@ class Booster(object):
|
||||
)
|
||||
)
|
||||
return _prediction_output(shape, dims, preds, False)
|
||||
if lazy_isinstance(data, "cupy.core.core", "ndarray"):
|
||||
if lazy_isinstance(data, "cupy.core.core", "ndarray") or lazy_isinstance(
|
||||
data, "cupy._core.core", "ndarray"
|
||||
):
|
||||
from .data import _transform_cupy_array
|
||||
data = _transform_cupy_array(data)
|
||||
interface = data.__cuda_array_interface__
|
||||
@@ -2027,7 +2032,7 @@ class Booster(object):
|
||||
"""
|
||||
if isinstance(fout, (STRING_TYPES, os.PathLike)):
|
||||
fout = os.fspath(os.path.expanduser(fout))
|
||||
fout = open(fout, 'w')
|
||||
fout = open(fout, 'w') # pylint: disable=consider-using-with
|
||||
need_close = True
|
||||
else:
|
||||
need_close = False
|
||||
|
||||
@@ -104,6 +104,13 @@ def _is_numpy_array(data):
|
||||
return isinstance(data, (np.ndarray, np.matrix))
|
||||
|
||||
|
||||
def _ensure_np_dtype(data, dtype):
|
||||
if data.dtype.hasobject:
|
||||
data = data.astype(np.float32, copy=False)
|
||||
dtype = np.float32
|
||||
return data, dtype
|
||||
|
||||
|
||||
def _maybe_np_slice(data, dtype):
|
||||
'''Handle numpy slice. This can be removed if we use __array_interface__.
|
||||
'''
|
||||
@@ -118,6 +125,7 @@ def _maybe_np_slice(data, dtype):
|
||||
data = np.array(data, copy=False, dtype=dtype)
|
||||
except AttributeError:
|
||||
data = np.array(data, copy=False, dtype=dtype)
|
||||
data, dtype = _ensure_np_dtype(data, dtype)
|
||||
return data
|
||||
|
||||
|
||||
|
||||
@@ -734,8 +734,9 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
std::shared_ptr<xgboost::data::CSRArrayAdapter> x{
|
||||
new xgboost::data::CSRArrayAdapter{
|
||||
StringView{indptr}, StringView{indices}, StringView{data}, cols}};
|
||||
new xgboost::data::CSRArrayAdapter{StringView{indptr},
|
||||
StringView{indices}, StringView{data},
|
||||
static_cast<size_t>(cols)}};
|
||||
std::shared_ptr<DMatrix> p_m {nullptr};
|
||||
if (m) {
|
||||
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
|
||||
|
||||
@@ -98,6 +98,10 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
|
||||
forest = std::max(static_cast<decltype(forest)>(1), forest);
|
||||
shape[3] = forest;
|
||||
*out_dim = shape.size();
|
||||
} else if (chunksize == 1) {
|
||||
*out_dim = 1;
|
||||
shape.resize(*out_dim);
|
||||
shape.front() = rows;
|
||||
} else {
|
||||
*out_dim = 2;
|
||||
shape.resize(*out_dim);
|
||||
|
||||
@@ -229,8 +229,11 @@ class ArrayInterfaceHandler {
|
||||
}
|
||||
strides[1] = n;
|
||||
}
|
||||
auto valid = (rows - 1) * strides[0] + (cols - 1) * strides[1] == (rows * cols) - 1;
|
||||
CHECK(valid) << "Invalid strides in array.";
|
||||
|
||||
auto valid = rows * strides[0] + cols * strides[1] >= (rows * cols);
|
||||
CHECK(valid) << "Invalid strides in array."
|
||||
<< " strides: (" << strides[0] << "," << strides[1]
|
||||
<< "), shape: (" << rows << ", " << cols << ")";
|
||||
}
|
||||
|
||||
static void* ExtractData(std::map<std::string, Json> const &column,
|
||||
|
||||
@@ -87,8 +87,7 @@ std::tuple<float, float, float> BinaryAUC(std::vector<float> const &predts,
|
||||
* - Kleiman, Ross and Page, David. $AUC_{\mu}$: A Performance Metric for Multi-Class
|
||||
* Machine Learning Models
|
||||
*/
|
||||
float MultiClassOVR(std::vector<float> const& predts, MetaInfo const& info) {
|
||||
auto n_classes = predts.size() / info.labels_.Size();
|
||||
float MultiClassOVR(std::vector<float> const& predts, MetaInfo const& info, size_t n_classes) {
|
||||
CHECK_NE(n_classes, 0);
|
||||
auto const& labels = info.labels_.ConstHostVector();
|
||||
|
||||
@@ -230,6 +229,10 @@ class EvalAUC : public Metric {
|
||||
info.labels_.SetDevice(tparam_->gpu_id);
|
||||
info.weights_.SetDevice(tparam_->gpu_id);
|
||||
}
|
||||
// We use the global size to handle empty dataset.
|
||||
std::array<size_t, 2> meta{info.labels_.Size(), preds.Size()};
|
||||
rabit::Allreduce<rabit::op::Max>(meta.data(), meta.size());
|
||||
|
||||
if (!info.group_ptr_.empty()) {
|
||||
/**
|
||||
* learning to rank
|
||||
@@ -261,16 +264,17 @@ class EvalAUC : public Metric {
|
||||
CHECK_LE(auc, 1) << "Total AUC across groups: " << auc * valid_groups
|
||||
<< ", valid groups: " << valid_groups;
|
||||
}
|
||||
} else if (info.labels_.Size() != preds.Size() &&
|
||||
preds.Size() % info.labels_.Size() == 0) {
|
||||
} else if (meta[0] != meta[1] && meta[1] % meta[0] == 0) {
|
||||
/**
|
||||
* multi class
|
||||
*/
|
||||
size_t n_classes = meta[1] / meta[0];
|
||||
CHECK_NE(n_classes, 0);
|
||||
if (tparam_->gpu_id == GenericParameter::kCpuId) {
|
||||
auc = MultiClassOVR(preds.ConstHostVector(), info);
|
||||
auc = MultiClassOVR(preds.ConstHostVector(), info, n_classes);
|
||||
} else {
|
||||
auc = GPUMultiClassAUCOVR(preds.ConstDeviceSpan(), info, tparam_->gpu_id,
|
||||
&this->d_cache_);
|
||||
&this->d_cache_, n_classes);
|
||||
}
|
||||
} else {
|
||||
/**
|
||||
@@ -323,7 +327,8 @@ GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
|
||||
}
|
||||
|
||||
float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info,
|
||||
int32_t device, std::shared_ptr<DeviceAUCCache>* cache) {
|
||||
int32_t device, std::shared_ptr<DeviceAUCCache>* cache,
|
||||
size_t n_classes) {
|
||||
common::AssertGPUSupport();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -61,10 +61,12 @@ struct DeviceAUCCache {
|
||||
neg_pos.resize(sorted_idx.size());
|
||||
if (is_multi) {
|
||||
predts_t.resize(sorted_idx.size());
|
||||
reducer.reset(new dh::AllReducer);
|
||||
reducer->Init(rabit::GetRank());
|
||||
}
|
||||
}
|
||||
if (is_multi && !reducer) {
|
||||
reducer.reset(new dh::AllReducer);
|
||||
reducer->Init(device);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -197,12 +199,48 @@ XGBOOST_DEVICE size_t LastOf(size_t group, common::Span<Idx> indptr) {
|
||||
return indptr[group + 1] - 1;
|
||||
}
|
||||
|
||||
|
||||
float ScaleClasses(common::Span<float> results, common::Span<float> local_area,
|
||||
common::Span<float> fp, common::Span<float> tp,
|
||||
common::Span<float> auc, std::shared_ptr<DeviceAUCCache> cache,
|
||||
size_t n_classes) {
|
||||
dh::XGBDeviceAllocator<char> alloc;
|
||||
if (rabit::IsDistributed()) {
|
||||
CHECK_EQ(dh::CudaGetPointerDevice(results.data()), dh::CurrentDevice());
|
||||
cache->reducer->AllReduceSum(results.data(), results.data(), results.size());
|
||||
}
|
||||
auto reduce_in = dh::MakeTransformIterator<thrust::pair<float, float>>(
|
||||
thrust::make_counting_iterator(0), [=] __device__(size_t i) {
|
||||
if (local_area[i] > 0) {
|
||||
return thrust::make_pair(auc[i] / local_area[i] * tp[i], tp[i]);
|
||||
}
|
||||
return thrust::make_pair(std::numeric_limits<float>::quiet_NaN(), 0.0f);
|
||||
});
|
||||
|
||||
float tp_sum;
|
||||
float auc_sum;
|
||||
thrust::tie(auc_sum, tp_sum) = thrust::reduce(
|
||||
thrust::cuda::par(alloc), reduce_in, reduce_in + n_classes,
|
||||
thrust::make_pair(0.0f, 0.0f),
|
||||
[=] __device__(auto const &l, auto const &r) {
|
||||
return thrust::make_pair(l.first + r.first, l.second + r.second);
|
||||
});
|
||||
if (tp_sum != 0 && !std::isnan(auc_sum)) {
|
||||
auc_sum /= tp_sum;
|
||||
} else {
|
||||
return std::numeric_limits<float>::quiet_NaN();
|
||||
}
|
||||
return auc_sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* MultiClass implementation is similar to binary classification, except we need to split
|
||||
* up each class in all kernels.
|
||||
*/
|
||||
float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info,
|
||||
int32_t device, std::shared_ptr<DeviceAUCCache>* p_cache) {
|
||||
int32_t device, std::shared_ptr<DeviceAUCCache>* p_cache,
|
||||
size_t n_classes) {
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
auto& cache = *p_cache;
|
||||
if (!cache) {
|
||||
cache.reset(new DeviceAUCCache);
|
||||
@@ -213,8 +251,19 @@ float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info
|
||||
auto weights = info.weights_.ConstDeviceSpan();
|
||||
|
||||
size_t n_samples = labels.size();
|
||||
size_t n_classes = predts.size() / labels.size();
|
||||
CHECK_NE(n_classes, 0);
|
||||
|
||||
if (n_samples == 0) {
|
||||
dh::TemporaryArray<float> resutls(n_classes * 4, 0.0f);
|
||||
auto d_results = dh::ToSpan(resutls);
|
||||
dh::LaunchN(device, n_classes * 4, [=]__device__(size_t i) {
|
||||
d_results[i] = 0.0f;
|
||||
});
|
||||
auto local_area = d_results.subspan(0, n_classes);
|
||||
auto fp = d_results.subspan(n_classes, n_classes);
|
||||
auto tp = d_results.subspan(2 * n_classes, n_classes);
|
||||
auto auc = d_results.subspan(3 * n_classes, n_classes);
|
||||
return ScaleClasses(d_results, local_area, fp, tp, auc, cache, n_classes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create sorted index for each class
|
||||
@@ -377,32 +426,7 @@ float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info
|
||||
tp[c] = last.second;
|
||||
local_area[c] = last.first * last.second;
|
||||
});
|
||||
if (rabit::IsDistributed()) {
|
||||
cache->reducer->AllReduceSum(resutls.data().get(), resutls.data().get(),
|
||||
resutls.size());
|
||||
}
|
||||
auto reduce_in = dh::MakeTransformIterator<thrust::pair<float, float>>(
|
||||
thrust::make_counting_iterator(0), [=] __device__(size_t i) {
|
||||
if (local_area[i] > 0) {
|
||||
return thrust::make_pair(auc[i] / local_area[i] * tp[i], tp[i]);
|
||||
}
|
||||
return thrust::make_pair(std::numeric_limits<float>::quiet_NaN(), 0.0f);
|
||||
});
|
||||
|
||||
float tp_sum;
|
||||
float auc_sum;
|
||||
thrust::tie(auc_sum, tp_sum) = thrust::reduce(
|
||||
thrust::cuda::par(alloc), reduce_in, reduce_in + n_classes,
|
||||
thrust::make_pair(0.0f, 0.0f),
|
||||
[=] __device__(auto const &l, auto const &r) {
|
||||
return thrust::make_pair(l.first + r.first, l.second + r.second);
|
||||
});
|
||||
if (tp_sum != 0 && !std::isnan(auc_sum)) {
|
||||
auc_sum /= tp_sum;
|
||||
} else {
|
||||
return std::numeric_limits<float>::quiet_NaN();
|
||||
}
|
||||
return auc_sum;
|
||||
return ScaleClasses(d_results, local_area, fp, tp, auc, cache, n_classes);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -26,7 +26,8 @@ GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
|
||||
int32_t device, std::shared_ptr<DeviceAUCCache> *p_cache);
|
||||
|
||||
float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info,
|
||||
int32_t device, std::shared_ptr<DeviceAUCCache>* cache);
|
||||
int32_t device, std::shared_ptr<DeviceAUCCache>* cache,
|
||||
size_t n_classes);
|
||||
|
||||
std::pair<float, uint32_t>
|
||||
GPURankingAUC(common::Span<float const> predts, MetaInfo const &info,
|
||||
|
||||
@@ -15,6 +15,7 @@ dependencies:
|
||||
- matplotlib
|
||||
- dask
|
||||
- distributed
|
||||
- graphviz
|
||||
- python-graphviz
|
||||
- hypothesis
|
||||
- astroid
|
||||
|
||||
@@ -11,7 +11,7 @@ vcomp140_path = 'C:\\Windows\\System32\\vcomp140.dll'
|
||||
|
||||
for wheel_path in sorted(glob.glob(sys.argv[1])):
|
||||
m = re.search(r'xgboost-(.*)-py3', wheel_path)
|
||||
assert m
|
||||
assert m, f'wheel_path = {wheel_path}'
|
||||
version = m.group(1)
|
||||
|
||||
with zipfile.ZipFile(wheel_path, 'a') as f:
|
||||
|
||||
@@ -26,8 +26,9 @@ dirname, basename = os.path.dirname(whl_path), os.path.basename(whl_path)
|
||||
with cd(dirname):
|
||||
tokens = basename.split('-')
|
||||
assert len(tokens) == 5
|
||||
version = tokens[1].split('+')[0]
|
||||
keywords = {'pkg_name': tokens[0],
|
||||
'version': tokens[1],
|
||||
'version': version,
|
||||
'commit_id': commit_id,
|
||||
'platform_tag': platform_tag}
|
||||
new_name = '{pkg_name}-{version}+{commit_id}-py3-none-{platform_tag}.whl'.format(**keywords)
|
||||
|
||||
@@ -277,7 +277,7 @@ class TestDistributedGPU:
|
||||
X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_))
|
||||
y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_))
|
||||
w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_))
|
||||
run_dask_classifier(X, y, w, model, client, 10)
|
||||
run_dask_classifier(X, y, w, model, "gpu_hist", client, 10)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||
|
||||
@@ -57,15 +57,13 @@ class TestLinear:
|
||||
param['updater'] = 'shotgun'
|
||||
param = dataset.set_params(param)
|
||||
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
|
||||
# shotgun is non-deterministic, so we relax the test by sampling
|
||||
# result.
|
||||
# shotgun is non-deterministic, so we relax the test by only using first and last
|
||||
# iteration.
|
||||
if len(result) > 2:
|
||||
sampled_result = [score for i, score in enumerate(result)
|
||||
if i % 2 == 0]
|
||||
sampled_result[-1] = result[-1] # make sure the last one is used
|
||||
sampled_result = (result[0], result[-1])
|
||||
else:
|
||||
sampled_result = result
|
||||
assert tm.non_increasing(sampled_result, 1e-3)
|
||||
assert tm.non_increasing(sampled_result)
|
||||
|
||||
@given(parameter_strategy, strategies.integers(10, 50),
|
||||
tm.dataset_strategy, strategies.floats(1e-5, 2.0),
|
||||
|
||||
@@ -75,6 +75,11 @@ def run_predict_leaf(predictor):
|
||||
first = sliced[0, ...]
|
||||
|
||||
assert np.prod(first.shape) == classes * num_parallel_tree * ntree_limit
|
||||
|
||||
# When there's only 1 tree, the output is a 1 dim vector
|
||||
booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
|
||||
assert booster.predict(m, pred_leaf=True).shape == (rows, )
|
||||
|
||||
return leaf
|
||||
|
||||
|
||||
@@ -150,6 +155,14 @@ class TestInplacePredict:
|
||||
predt_from_array = booster.inplace_predict(X[:10, ...], missing=self.missing)
|
||||
predt_from_dmatrix = booster.predict(test)
|
||||
|
||||
X_obj = X.copy().astype(object)
|
||||
|
||||
assert X_obj.dtype.hasobject is True
|
||||
assert X.dtype.hasobject is False
|
||||
np.testing.assert_allclose(
|
||||
booster.inplace_predict(X_obj), booster.inplace_predict(X)
|
||||
)
|
||||
|
||||
np.testing.assert_allclose(predt_from_dmatrix, predt_from_array)
|
||||
|
||||
predt_from_array = booster.inplace_predict(
|
||||
@@ -187,8 +200,13 @@ class TestInplacePredict:
|
||||
arr_predt = booster.inplace_predict(X)
|
||||
dmat_predt = booster.predict(xgb.DMatrix(X))
|
||||
|
||||
X = df.values
|
||||
X = np.asfortranarray(X)
|
||||
fort_predt = booster.inplace_predict(X)
|
||||
|
||||
np.testing.assert_allclose(dmat_predt, arr_predt)
|
||||
np.testing.assert_allclose(df_predt, arr_predt)
|
||||
np.testing.assert_allclose(fort_predt, arr_predt)
|
||||
|
||||
def test_base_margin(self):
|
||||
booster = self.booster
|
||||
|
||||
@@ -317,6 +317,7 @@ def run_dask_classifier(
|
||||
y: xgb.dask._DaskCollection,
|
||||
w: xgb.dask._DaskCollection,
|
||||
model: str,
|
||||
tree_method: Optional[str],
|
||||
client: "Client",
|
||||
n_classes,
|
||||
) -> None:
|
||||
@@ -324,11 +325,11 @@ def run_dask_classifier(
|
||||
|
||||
if model == "boosting":
|
||||
classifier = xgb.dask.DaskXGBClassifier(
|
||||
verbosity=1, n_estimators=2, eval_metric=metric
|
||||
verbosity=1, n_estimators=2, eval_metric=metric, tree_method=tree_method
|
||||
)
|
||||
else:
|
||||
classifier = xgb.dask.DaskXGBRFClassifier(
|
||||
verbosity=1, n_estimators=2, eval_metric=metric
|
||||
verbosity=1, n_estimators=2, eval_metric=metric, tree_method=tree_method
|
||||
)
|
||||
|
||||
assert classifier._estimator_type == "classifier"
|
||||
@@ -397,12 +398,12 @@ def run_dask_classifier(
|
||||
def test_dask_classifier(model: str, client: "Client") -> None:
|
||||
X, y, w = generate_array(with_weights=True)
|
||||
y = (y * 10).astype(np.int32)
|
||||
run_dask_classifier(X, y, w, model, client, 10)
|
||||
run_dask_classifier(X, y, w, model, None, client, 10)
|
||||
|
||||
y_bin = y.copy()
|
||||
y_bin[y > 5] = 1.0
|
||||
y_bin[y <= 5] = 0.0
|
||||
run_dask_classifier(X, y_bin, w, model, client, 2)
|
||||
run_dask_classifier(X, y_bin, w, model, None, client, 2)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@@ -568,22 +569,26 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
|
||||
# multiclass
|
||||
X_, y_ = make_classification(
|
||||
n_samples=n_samples,
|
||||
n_classes=10,
|
||||
n_classes=n_workers,
|
||||
n_informative=n_features,
|
||||
n_redundant=0,
|
||||
n_repeated=0
|
||||
)
|
||||
for i in range(y_.shape[0]):
|
||||
y_[i] = i % n_workers
|
||||
X = dd.from_array(X_, chunksize=10)
|
||||
y = dd.from_array(y_, chunksize=10)
|
||||
|
||||
n_samples = n_workers - 1
|
||||
valid_X_, valid_y_ = make_classification(
|
||||
n_samples=n_samples,
|
||||
n_classes=10,
|
||||
n_classes=n_workers,
|
||||
n_informative=n_features,
|
||||
n_redundant=0,
|
||||
n_repeated=0
|
||||
)
|
||||
for i in range(valid_y_.shape[0]):
|
||||
valid_y_[i] = i % n_workers
|
||||
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
|
||||
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
|
||||
|
||||
@@ -594,9 +599,9 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
|
||||
|
||||
|
||||
def test_empty_dmatrix_auc() -> None:
|
||||
with LocalCluster(n_workers=2) as cluster:
|
||||
with LocalCluster(n_workers=8) as cluster:
|
||||
with Client(cluster) as client:
|
||||
run_empty_dmatrix_auc(client, "hist", 2)
|
||||
run_empty_dmatrix_auc(client, "hist", 8)
|
||||
|
||||
|
||||
def run_auc(client: "Client", tree_method: str) -> None:
|
||||
@@ -1023,7 +1028,17 @@ class TestWithDask:
|
||||
evals=[(m, 'train')])['history']
|
||||
note(history)
|
||||
history = history['train'][dataset.metric]
|
||||
assert tm.non_increasing(history)
|
||||
|
||||
def is_stump():
|
||||
return params["max_depth"] == 1 or params["max_leaves"] == 1
|
||||
|
||||
def minimum_bin():
|
||||
return "max_bin" in params and params["max_bin"] == 2
|
||||
|
||||
if minimum_bin() and is_stump():
|
||||
assert tm.non_increasing(history, tolerance=1e-3)
|
||||
else:
|
||||
assert tm.non_increasing(history)
|
||||
# Make sure that it's decreasing
|
||||
assert history[-1] < history[0]
|
||||
|
||||
|
||||
@@ -272,6 +272,8 @@ def eval_error_metric(predt, dtrain: xgb.DMatrix):
|
||||
label = dtrain.get_label()
|
||||
r = np.zeros(predt.shape)
|
||||
gt = predt > 0.5
|
||||
if predt.size == 0:
|
||||
return "CustomErr", 0
|
||||
r[gt] = 1 - label[gt]
|
||||
le = predt <= 0.5
|
||||
r[le] = label[le]
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
make -f dmlc-core/scripts/packages.mk lz4
|
||||
|
||||
source $HOME/miniconda/bin/activate
|
||||
|
||||
if [ ${TASK} == "python_sdist_test" ]; then
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
# https://travis-ci.community/t/macos-build-fails-because-of-homebrew-bundle-unknown-command/7296/27
|
||||
brew install cmake libomp lz4
|
||||
|
||||
|
||||
if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_sdist_test" ]; then
|
||||
if [ ${TRAVIS_OS_NAME} == "osx" ]; then
|
||||
wget --no-verbose -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
|
||||
|
||||
Reference in New Issue
Block a user