Compare commits

...

217 Commits

Author SHA1 Message Date
Hui Liu
1ce5029a96 fix memory type 2024-01-26 15:45:04 -08:00
Hui Liu
420f8d6fde merge v2.0.3 from upstream 2024-01-25 07:40:06 -08:00
Hui Liu
dc7ee041cc use __HIPCC__ for device code 2024-01-24 12:32:51 -08:00
Hui Liu
7dc152450e workaround memoryType and change rccl config 2024-01-11 13:13:14 -08:00
Philip Hyunsu Cho
82d846bbeb
Update change_scala_version.py to also change scala.version property (#9897) 2023-12-18 23:49:41 -08:00
Philip Hyunsu Cho
71d330afdc
Bump version to 2.0.3 (#9895) 2023-12-14 17:54:05 -08:00
Philip Hyunsu Cho
3acbd8692b
[jvm-packages] Fix POM for xgboost-jvm metapackage (#9893)
* [jvm-packages] Fix POM for xgboost-jvm metapackage

* Add script for updating the Scala version
2023-12-14 16:50:34 -08:00
Philip Hyunsu Cho
ad524f76ab
[backport] [CI] Upload libxgboost4j.dylib (M1) to S3 bucket (#9887)
* [CI] Set up CI for Mac M1 (#9699)

* [CI] Improve CI for Mac M1 (#9748)

* [CI] Build libxgboost4j.dylib with CMAKE_OSX_DEPLOYMENT_TARGET (#9749)

* [CI] Upload libxgboost4j.dylib (M1) to S3 bucket (#9886)
2023-12-13 16:05:40 -08:00
Jiaming Yuan
d2d1751c03
[backport][py] Use the first found native library. (#9860) (#9879) 2023-12-13 14:20:30 +08:00
Jiaming Yuan
e4ee4e79dc
[backport][sklearn] Fix loading model attributes. (#9808) (#9880) 2023-12-13 14:20:04 +08:00
Philip Hyunsu Cho
41ce8f28b2
[jvm-packages] Add Scala version suffix to xgboost-jvm package (#9776)
* Update JVM script (#9714)

* Bump version to 2.0.2; revamp pom.xml

* Update instructions in prepare_jvm_release.py

* Fix formatting
2023-11-08 10:17:26 -08:00
Jiaming Yuan
0ffc52e05c
[backport] Fix using categorical data with the ranker. (#9753) (#9778) 2023-11-09 01:20:52 +08:00
Hui Liu
82d81bca94 rm hip.h files 2023-10-30 21:54:00 -07:00
Hui Liu
6ec5cf26fc enable 3 more tests 2023-10-30 15:27:02 -07:00
Hui Liu
1ec57fd1a3 enable ROCm support, rm un-necessary code 2023-10-30 12:39:30 -07:00
Hui Liu
d0774a78e4 add hip to config 2023-10-30 12:01:24 -07:00
Hui Liu
8d160a206e add jvm rocm support 2023-10-30 11:49:47 -07:00
Hui Liu
a41bc0975c rocm enable for v2.0.1, rm setup.py 2023-10-27 18:53:16 -07:00
Hui Liu
782b73f2bb rocm enable for v2.0.1 2023-10-27 18:50:28 -07:00
Philip Hyunsu Cho
a408254c2f
Use sys.base_prefix instead of sys.prefix (#9711)
* Use sys.base_prefix instead of sys.prefix

* Update libpath.py too
2023-10-23 23:31:40 -07:00
Philip Hyunsu Cho
22e891dafa
[jvm-packages] Remove hard dependency on libjvm (#9698) (#9705) 2023-10-23 21:21:14 -07:00
Philip Hyunsu Cho
89530c80a7 [CI] Build libxgboost4j.dylib for Intel Mac (#9704) 2023-10-23 20:45:01 -07:00
Philip Hyunsu Cho
946ab53b57
Fix libpath logic for Windows (#9687) 2023-10-19 10:42:46 -07:00
Philip Hyunsu Cho
afd03a6934
Fix build for AppleClang 11 (#9684) 2023-10-18 09:35:59 -07:00
Jiaming Yuan
f7da938458
[backport][pyspark] Support stage-level scheduling (#9519) (#9686)
Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2023-10-18 14:05:08 +08:00
Philip Hyunsu Cho
6ab6577511
Fix build for GCC 8.x (#9670) 2023-10-12 23:36:41 -07:00
Philip Hyunsu Cho
8c57558d74
[backport] [CI] Pull CentOS 7 images from NGC (#9666) (#9668) 2023-10-13 14:09:54 +08:00
Jiaming Yuan
58aa98a796
Bump version to 2.0.1. (#9660) 2023-10-13 08:47:32 +08:00
Jiaming Yuan
92273b39d8
[backport] Add support for cgroupv2. (#9651) (#9656) 2023-10-12 11:39:27 +08:00
Jiaming Yuan
e824b18bf6
[backport] Support pandas 2.1.0. (#9557) (#9655) 2023-10-12 11:29:59 +08:00
Jiaming Yuan
66ee89d8b4
[backport] Workaround Apple clang issue. (#9615) (#9636) 2023-10-08 15:42:15 +08:00
Jiaming Yuan
54d1d72d01
[backport] Use array interface for testing numpy arrays. (#9602) (#9635) 2023-10-08 11:45:49 +08:00
Jiaming Yuan
032bcc57f9
[backport][R] Fix method name. (#9577) (#9592) 2023-09-19 02:08:46 +08:00
Jiaming Yuan
ace7713201
[backport] Fix default metric configuration. (#9575) (#9590) 2023-09-18 23:40:43 +08:00
Jiaming Yuan
096047c547
Make 2.0 release. (#9567) 2023-09-12 00:20:49 +08:00
Jiaming Yuan
e75dd75bb2
[backport] [pyspark] support gpu transform (#9542) (#9559)
---------

Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2023-09-07 17:21:09 +08:00
Jiaming Yuan
4d387cbfbf
[backport] [pyspark] rework transform to reuse same code (#9292) (#9558)
Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2023-09-07 15:26:24 +08:00
Jiaming Yuan
3fde9361d7
[backport] Fix inplace predict with fallback when base margin is used. (#9536) (#9548)
- Copy meta info from proxy DMatrix.
- Use `std::call_once` to emit less warnings.
2023-09-05 23:38:06 +08:00
Jiaming Yuan
b67c2ed96d
[backport] [CI] bump setup-r action version. (#9544) (#9551) 2023-09-05 22:10:30 +08:00
Jiaming Yuan
177fd79864
[backport] Fix read the doc configuration. [skip ci] (#9549) 2023-09-05 17:32:00 +08:00
Jiaming Yuan
06487d3896
[backport] Fix GPU categorical split memory allocation. (#9529) (#9535) 2023-08-29 21:14:43 +08:00
Jiaming Yuan
e50ccc4d3c
[R] Fix integer inputs with NA. (#9522) (#9534) 2023-08-29 19:52:13 +08:00
Jiaming Yuan
add57f8880
[backport] Delay the check for vector leaf. (#9509) (#9533) 2023-08-29 18:25:59 +08:00
Jiaming Yuan
a0d3573c74
[backport] Fix device dispatch for linear updater. (#9507) (#9532) 2023-08-29 15:10:43 +08:00
Jiaming Yuan
4301558a57
Make 2.0.0 RC1. (#9492) 2023-08-17 16:16:51 +08:00
amdsc21
2e7e9d3b2d update rocgputreeshap branch 2023-06-23 19:50:08 +02:00
amdsc21
3e0c7d1dee new url for rocgputreeshap 2023-06-23 19:46:45 +02:00
amdsc21
2f47a1ebe6 rm warp-primitives 2023-06-22 21:43:00 +02:00
amdsc21
5ca7daaa13 merge latest changes 2023-06-15 21:39:14 +02:00
amdsc21
5f78360949 merge changes Jun092023 2023-06-09 22:41:33 +02:00
amdsc21
35cde3b1b2 remove some hip.h 2023-06-07 04:48:09 +02:00
amdsc21
ce345c30a8 remove some hip.h 2023-06-07 03:39:01 +02:00
amdsc21
af8845405a sync Jun 5 2023-06-07 02:43:21 +02:00
amdsc21
9ee1852d4e restore device helper 2023-06-02 02:55:13 +02:00
Your Name
6ecd7903f2 Merge branch 'master' into sync-condition-2023Jun01 2023-06-01 15:58:31 -07:00
Your Name
42867a4805 sync Jun 1 2023-06-01 15:55:06 -07:00
amdsc21
c5b575e00e fix host __assert_fail 2023-05-24 19:40:24 +02:00
amdsc21
1354138b7d Merge branch 'master' into sync-condition-2023May15 2023-05-24 17:44:16 +02:00
amdsc21
b994a38b28 Merge branch 'master' into sync-condition-2023May15 2023-05-23 01:07:50 +02:00
amdsc21
3a834c4992 change workflow 2023-05-20 07:04:06 +02:00
amdsc21
b22644fc10 add hip.h 2023-05-20 01:25:33 +02:00
amdsc21
7663d47383 Merge branch 'master' into sync-condition-2023May15 2023-05-19 20:30:35 +02:00
amdsc21
88fc8badfa Merge branch 'master' into sync-condition-2023May15 2023-05-17 19:55:50 +02:00
amdsc21
8cad8c693c sync up May15 2023 2023-05-15 18:59:18 +02:00
amdsc21
b066accad6 fix lambdarank_obj 2023-05-02 21:06:22 +02:00
amdsc21
b324d51f14 fix array_interface.h half type 2023-05-02 20:50:50 +02:00
amdsc21
65097212b3 fix IterativeDeviceDMatrix, support HIP 2023-05-02 20:20:11 +02:00
amdsc21
4a24ca2f95 fix helpers.h, enable HIP 2023-05-02 20:04:23 +02:00
amdsc21
83e6fceb5c fix lambdarank_obj.cc, support HIP 2023-05-02 19:03:18 +02:00
amdsc21
e4538cb13c fix, to support hip 2023-05-02 17:43:11 +02:00
amdsc21
5446c501af merge 23Mar01 2023-05-02 00:05:58 +02:00
amdsc21
313a74b582 add Shap Magic to check if use cat 2023-05-01 21:55:14 +02:00
amdsc21
65d83e288f fix device query 2023-04-19 19:53:26 +02:00
amdsc21
f645cf51c1 Merge branch 'master' into sync-condition-2023Apr11 2023-04-17 18:33:00 +02:00
amdsc21
db8420225b fix RCCL 2023-04-12 01:09:14 +02:00
amdsc21
843fdde61b sync Apr 11 2023 2023-04-11 20:03:25 +02:00
amdsc21
08bc4b0c0f Merge branch 'master' into sync-condition-2023Apr11 2023-04-11 19:38:38 +02:00
amdsc21
6825d986fd move Dockerfile to ci 2023-04-11 19:34:23 +02:00
paklui
d155ec77f9 building docker for xgboost-amd-condition 2023-03-30 13:36:39 -07:00
amdsc21
991738690f Merge branch 'sync-condition-2023Mar27' into amd-condition 2023-03-30 05:16:36 +02:00
amdsc21
aeb3fd1c95 Merge branch 'master' into sync-condition-2023Mar27 2023-03-30 05:15:55 +02:00
amdsc21
141a062e00 Merge branch 'sync-condition-2023Mar27' into amd-condition 2023-03-30 00:47:16 +02:00
amdsc21
acad01afc9 sync Mar 29 2023-03-30 00:46:50 +02:00
amdsc21
f289e5001d Merge branch 'sync-condition-2023Mar27' into amd-condition 2023-03-28 00:24:12 +02:00
amdsc21
06d9b998ce fix CAPI BuildInfo 2023-03-28 00:14:18 +02:00
amdsc21
c50cc424bc sync Mar 27 2023 2023-03-27 18:54:41 +02:00
amdsc21
8c77e936d1 tune grid size 2023-03-26 17:45:19 +02:00
amdsc21
18034a4291 tune histogram 2023-03-26 01:42:51 +01:00
amdsc21
7ee4734d3a rm device_helpers.hip.h from cu 2023-03-26 00:24:11 +01:00
amdsc21
ee582f03c3 rm device_helpers.hip.h from cuh 2023-03-25 23:35:57 +01:00
amdsc21
f3286bac04 rm warp header 2023-03-25 23:01:44 +01:00
amdsc21
3ee3bea683 fix warp header 2023-03-25 22:37:37 +01:00
amdsc21
5098735698 Merge branch 'condition-sync-Mar24-23' into hui-condition 2023-03-25 05:28:40 +01:00
amdsc21
e74b3bbf3c fix macro 2023-03-25 05:17:39 +01:00
amdsc21
22525c002a fix macro 2023-03-25 05:08:30 +01:00
amdsc21
80961039d7 fix macro 2023-03-25 05:00:55 +01:00
amdsc21
1474789787 add new file 2023-03-25 04:54:02 +01:00
amdsc21
1dc138404a initial merge, fix linalg.h 2023-03-25 04:48:47 +01:00
amdsc21
e1d050f64e initial merge, fix linalg.h 2023-03-25 04:37:43 +01:00
amdsc21
7fbc561e17 initial merge 2023-03-25 04:31:55 +01:00
amdsc21
d97be6f396 enable last 3 tests 2023-03-25 04:05:05 +01:00
amdsc21
f1211cffca enable last 3 tests 2023-03-25 00:45:52 +01:00
amdsc21
e0716afabf fix objective/objective.cc, CMakeFile and setup.py 2023-03-23 20:22:34 +01:00
amdsc21
595cd81251 add max shared mem workaround 2023-03-19 20:08:42 +01:00
amdsc21
0325ce0bed update gputreeshap 2023-03-19 20:07:36 +01:00
amdsc21
a79a35c22c add warp size 2023-03-15 22:00:26 +01:00
amdsc21
4484c7f073 disable Optin Shared Mem 2023-03-15 02:10:16 +01:00
amdsc21
8207015e48 fix ../tests/cpp/common/test_span.h 2023-03-14 22:19:06 +01:00
amdsc21
364df7db0f fix ../tree/gpu_hist/evaluate_splits.hip bugs, size 64 2023-03-14 06:17:21 +01:00
amdsc21
a2bab03205 fix aft_obj.hip 2023-03-13 23:19:59 +01:00
amdsc21
b71c1b50de fix macro, no ! 2023-03-12 23:02:28 +01:00
amdsc21
fa2336fcfd sort bug fix 2023-03-12 07:09:10 +01:00
amdsc21
7d96758382 macro format 2023-03-11 06:57:24 +01:00
amdsc21
b0dacc5a80 fix bug 2023-03-11 03:47:23 +01:00
amdsc21
f64152bf97 add helpers.hip 2023-03-11 02:56:50 +01:00
amdsc21
b4dbe7a649 fix isnan 2023-03-11 02:39:58 +01:00
amdsc21
e5b6219a84 typo 2023-03-11 02:30:27 +01:00
amdsc21
3a07b1edf8 complete test porting 2023-03-11 02:17:05 +01:00
amdsc21
9bf16a2ca6 testing porting 2023-03-11 01:38:54 +01:00
amdsc21
332f6a89a9 more tests 2023-03-11 01:33:48 +01:00
amdsc21
204d0c9a53 add hip tests 2023-03-11 00:38:16 +01:00
amdsc21
e961016e71 rm HIPCUB 2023-03-10 22:21:37 +01:00
amdsc21
f0b8c02f15 merge latest changes 2023-03-10 22:10:20 +01:00
amdsc21
5e8b1842b9 fix Pointer Attr 2023-03-10 19:06:02 +01:00
amdsc21
9f072b50ba fix __popc 2023-03-10 17:14:31 +01:00
amdsc21
e1ddb5ae58 fix macro XGBOOST_USE_HIP 2023-03-10 07:11:05 +01:00
amdsc21
643e2a7b39 fix macro XGBOOST_USE_HIP 2023-03-10 07:09:41 +01:00
amdsc21
bde3107c3e fix macro XGBOOST_USE_HIP 2023-03-10 07:01:25 +01:00
amdsc21
5edfc1e2e9 finish ellpack_page.cc 2023-03-10 06:41:25 +01:00
amdsc21
c073417d0c finish aft_obj.cu 2023-03-10 06:39:03 +01:00
amdsc21
9bbbeb3f03 finish multiclass_obj.cu 2023-03-10 06:35:46 +01:00
amdsc21
4bde2e3412 finish multiclass_obj.cu 2023-03-10 06:35:21 +01:00
amdsc21
58a9fe07b6 finish multiclass_obj.cu 2023-03-10 06:35:06 +01:00
amdsc21
41407850d5 finish rank_obj.cu 2023-03-10 06:29:08 +01:00
amdsc21
968a1db4c0 finish regression_obj.cu 2023-03-10 06:07:53 +01:00
amdsc21
ad710e4888 finish hinge.cu 2023-03-10 06:04:59 +01:00
amdsc21
4e3c699814 finish adaptive.cu 2023-03-10 06:02:48 +01:00
amdsc21
757de84398 finish quantile.cu 2023-03-10 05:55:51 +01:00
amdsc21
d27f9dfdce finish host_device_vector.cu 2023-03-10 05:45:38 +01:00
amdsc21
14cc438a64 finish stats.cu 2023-03-10 05:38:16 +01:00
amdsc21
911a5d8a60 finish hist_util.cu 2023-03-10 05:32:38 +01:00
amdsc21
54b076b40f finish common.cu 2023-03-10 05:20:29 +01:00
amdsc21
91a5ef762e finish common.cu 2023-03-10 05:19:41 +01:00
amdsc21
8fd2af1c8b finish numeric.cu 2023-03-10 05:16:23 +01:00
amdsc21
bb6adda8a3 finish c_api.cu 2023-03-10 05:12:51 +01:00
amdsc21
a76ccff390 finish c_api.cu 2023-03-10 05:11:20 +01:00
amdsc21
61c0b19331 finish ellpack_page_source.cu 2023-03-10 05:06:36 +01:00
amdsc21
fa9f69dd85 finish sparse_page_dmatrix.cu 2023-03-10 05:04:57 +01:00
amdsc21
080fc35c4b finish ellpack_page_raw_format.cu 2023-03-10 05:02:35 +01:00
amdsc21
ccce4cf7e1 finish data.cu 2023-03-10 05:00:57 +01:00
amdsc21
713ab9e1a0 finish sparse_page_source.cu 2023-03-10 04:42:56 +01:00
amdsc21
134cbfddbe finish gradient_index.cu 2023-03-10 04:40:33 +01:00
amdsc21
6e2c5be83e finish array_interface.cu 2023-03-10 04:36:04 +01:00
amdsc21
185dbce21f finish ellpack_page.cu 2023-03-10 04:26:09 +01:00
amdsc21
49732359ef finish iterative_dmatrix.cu 2023-03-10 03:47:00 +01:00
amdsc21
ec9f500a49 finish proxy_dmatrix.cu 2023-03-10 03:40:07 +01:00
amdsc21
53244bef6f finish simple_dmatrix.cu 2023-03-10 03:38:09 +01:00
amdsc21
f0febfbcac finish gpu_predictor.cu 2023-03-10 01:29:54 +01:00
amdsc21
1c58ff61d1 finish fit_stump.cu 2023-03-10 00:46:29 +01:00
amdsc21
1530c03f7d finish constraints.cu 2023-03-09 22:43:51 +01:00
amdsc21
309268de02 finish updater_gpu_hist.cu 2023-03-09 22:40:44 +01:00
amdsc21
500428cc0f finish row_partitioner.cu 2023-03-09 22:31:11 +01:00
amdsc21
495816f694 finished gradient_based_sampler.cu 2023-03-09 22:26:08 +01:00
amdsc21
df42dd2c53 finished evaluator.cu 2023-03-09 22:22:05 +01:00
amdsc21
f55243fda0 finish evaluate_splits.cu 2023-03-09 22:15:10 +01:00
amdsc21
1e09c21456 finished feature_groups.cu 2023-03-09 21:31:00 +01:00
amdsc21
0ed5d3c849 finished histogram.cu 2023-03-09 21:28:37 +01:00
amdsc21
f67e7de7ef finished communicator.cu 2023-03-09 21:02:48 +01:00
amdsc21
5044713388 finished updater_gpu_coordinate.cu 2023-03-09 20:53:54 +01:00
amdsc21
c875f0425f finished rank_metric.cu 2023-03-09 20:48:31 +01:00
amdsc21
4fd08b6c32 finished survival_metric.cu 2023-03-09 20:41:52 +01:00
amdsc21
b9d86d44d6 finish multiclass_metric.cu 2023-03-09 20:37:16 +01:00
amdsc21
a56055225a fix auc.cu 2023-03-09 20:29:38 +01:00
amdsc21
6eba0a56ec fix CMakeLists.txt 2023-03-09 18:57:14 +01:00
amdsc21
00c24a58b1 finish elementwise_metric.cu 2023-03-08 22:50:07 +01:00
amdsc21
6fa248b75f try elementwise_metric.cu 2023-03-08 22:42:48 +01:00
amdsc21
946f9e9802 fix gbtree.cc 2023-03-08 21:44:20 +01:00
amdsc21
4c4e5af29c port elementwise_metric.cu 2023-03-08 21:39:56 +01:00
amdsc21
7e1b06417b finish gbtree.cu porting 2023-03-08 21:09:56 +01:00
amdsc21
cdd7794641 add unused option 2023-03-08 20:37:53 +01:00
amdsc21
cd743a1ae9 fix DispatchRadixSort 2023-03-08 20:31:23 +01:00
amdsc21
a45005863b fix DispatchScan 2023-03-08 20:15:33 +01:00
amdsc21
bdcb036592 add context.hip 2023-03-08 07:34:19 +01:00
amdsc21
7a3a9b682a add device_helpers.hip.h 2023-03-08 07:18:33 +01:00
amdsc21
0a711662c3 add device_helpers.hip.h 2023-03-08 07:10:32 +01:00
amdsc21
312e58ec99 enable rocm, fix common.h 2023-03-08 06:45:03 +01:00
amdsc21
ca8f4e7993 enable rocm, fix stats.cuh 2023-03-08 06:43:06 +01:00
amdsc21
60795f22de enable rocm, fix linalg_op.cuh 2023-03-08 06:42:20 +01:00
amdsc21
05fdca893f enable rocm, fix cuda_pinned_allocator.h 2023-03-08 06:39:40 +01:00
amdsc21
d8cc93f3f2 enable rocm, fix algorithm.cuh 2023-03-08 06:38:35 +01:00
amdsc21
62c4efac51 enable rocm, fix transform.h 2023-03-08 06:37:34 +01:00
amdsc21
ba9e00d911 enable rocm, fix hist_util.cuh 2023-03-08 06:36:15 +01:00
amdsc21
d3be67ad8e enable rocm, fix quantile.cuh 2023-03-08 06:32:09 +01:00
amdsc21
2eb0b6aae4 enable rocm, fix threading_utils.cuh 2023-03-08 06:30:52 +01:00
amdsc21
327f1494f1 enable rocm, fix cuda_context.cuh 2023-03-08 06:29:45 +01:00
amdsc21
fa92aa56ee enable rocm, fix device_adapter.cuh 2023-03-08 06:26:31 +01:00
amdsc21
427f6c2a1a enable rocm, fix simple_dmatrix.cuh 2023-03-08 06:24:34 +01:00
amdsc21
270c7b4802 enable rocm, fix row_partitioner.cuh 2023-03-08 06:22:25 +01:00
amdsc21
0fc1f640a9 enable rocm, fix nccl_device_communicator.cuh 2023-03-08 06:18:13 +01:00
amdsc21
762fd9028d enable rocm, fix device_communicator_adapter.cuh 2023-03-08 06:13:29 +01:00
amdsc21
f2009533e1 rm hip.h 2023-03-08 06:04:01 +01:00
amdsc21
53b5cd73f2 add hip flags 2023-03-08 03:42:51 +01:00
amdsc21
52b05d934e add hip 2023-03-08 03:32:19 +01:00
amdsc21
840f15209c add HIP flags, common 2023-03-08 03:11:49 +01:00
amdsc21
1e1c7fd8d5 add HIP flags, c_api 2023-03-08 01:34:37 +01:00
amdsc21
f5f800c80d add HIP flags 2023-03-08 01:33:38 +01:00
amdsc21
6b7be96373 add HIP flags 2023-03-08 01:22:25 +01:00
amdsc21
75712b9c3c enable HIP flags 2023-03-08 01:10:07 +01:00
amdsc21
ed45aa2816 Merge branch 'master' into dev-hui 2023-03-08 00:39:33 +01:00
amdsc21
f286ae5bfa add hip rocthrust hipcub 2023-03-07 06:35:00 +01:00
amdsc21
f13a7f8d91 add submodules 2023-03-07 05:44:24 +01:00
amdsc21
c51a1c9aae rename hip.cc to hip 2023-03-07 05:39:53 +01:00
amdsc21
30de728631 fix hip.cc 2023-03-07 05:11:42 +01:00
amdsc21
75fa15b36d add hip support 2023-03-07 04:02:49 +01:00
amdsc21
eb30cb6293 add hip support 2023-03-07 03:49:52 +01:00
amdsc21
cafbfce51f add hip.h 2023-03-07 03:46:26 +01:00
amdsc21
6039a71e6c add hip structure 2023-03-07 02:17:19 +01:00
317 changed files with 4079 additions and 908 deletions

View File

@ -51,14 +51,14 @@ jobs:
id: extract_branch
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
matrix.os == 'windows-latest'
(matrix.os == 'windows-latest' || matrix.os == 'macos-11')
- name: Publish artifact xgboost4j.dll to S3
run: |
cd lib/
Rename-Item -Path xgboost4j.dll -NewName xgboost4j_${{ github.sha }}.dll
dir
python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/ --acl public-read
python -m awscli s3 cp xgboost4j_${{ github.sha }}.dll s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
matrix.os == 'windows-latest'
@ -66,6 +66,19 @@ jobs:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
- name: Publish artifact libxgboost4j.dylib to S3
run: |
cd lib/
mv -v libxgboost4j.dylib libxgboost4j_${{ github.sha }}.dylib
ls
python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
matrix.os == 'macos-11'
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
- name: Test XGBoost4J (Core, Spark, Examples)
run: |

View File

@ -25,7 +25,7 @@ jobs:
with:
submodules: 'true'
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1
- uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
with:
r-version: ${{ matrix.config.r }}
@ -64,7 +64,7 @@ jobs:
with:
submodules: 'true'
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1
- uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
with:
r-version: ${{ matrix.config.r }}

3
.gitmodules vendored
View File

@ -5,3 +5,6 @@
[submodule "gputreeshap"]
path = gputreeshap
url = https://github.com/rapidsai/gputreeshap.git
[submodule "rocgputreeshap"]
path = rocgputreeshap
url = https://github.com/ROCmSoftwarePlatform/rocgputreeshap

View File

@ -32,4 +32,3 @@ formats:
python:
install:
- requirements: doc/requirements.txt
system_packages: true

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 2.0.0)
project(xgboost LANGUAGES CXX C VERSION 2.0.3)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW)
@ -58,7 +58,7 @@ option(ENABLE_ALL_WARNINGS "Enable all compiler warnings. Only effective for GCC
option(LOG_CAPI_INVOCATION "Log all C API invocations for debugging" OFF)
option(GOOGLE_TEST "Build google tests" OFF)
option(USE_DMLC_GTEST "Use google tests bundled with dmlc-core submodule" OFF)
option(USE_DEVICE_DEBUG "Generate CUDA device debug info." OFF)
option(USE_DEVICE_DEBUG "Generate CUDA/HIP device debug info." OFF)
option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF)
set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header")
option(RABIT_MOCK "Build rabit with mock" OFF)
@ -71,6 +71,10 @@ option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
set(GPU_COMPUTE_VER "" CACHE STRING
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
## HIP
option(USE_HIP "Build with GPU acceleration" OFF)
option(USE_RCCL "Build with RCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_RCCL "Build with shared RCCL library." OFF)
## Copied From dmlc
option(USE_HDFS "Build with HDFS support" OFF)
option(USE_AZURE "Build with AZURE support" OFF)
@ -93,6 +97,7 @@ option(ADD_PKGCONFIG "Add xgboost.pc into system." ON)
if (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
message(SEND_ERROR "Do not enable `USE_DEBUG_OUTPUT' with release build.")
endif (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
if (USE_NCCL AND NOT (USE_CUDA))
message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
endif (USE_NCCL AND NOT (USE_CUDA))
@ -102,6 +107,17 @@ endif (USE_DEVICE_DEBUG AND NOT (USE_CUDA))
if (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
message(SEND_ERROR "Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.")
endif (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
if (USE_RCCL AND NOT (USE_HIP))
message(SEND_ERROR "`USE_RCCL` must be enabled with `USE_HIP` flag.")
endif (USE_RCCL AND NOT (USE_HIP))
if (USE_DEVICE_DEBUG AND NOT (USE_HIP))
message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_HIP` flag.")
endif (USE_DEVICE_DEBUG AND NOT (USE_HIP))
if (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
message(SEND_ERROR "Build XGBoost with -DUSE_RCCL=ON to enable BUILD_WITH_SHARED_RCCL.")
endif (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
if (JVM_BINDINGS AND R_LIB)
message(SEND_ERROR "`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.")
endif (JVM_BINDINGS AND R_LIB)
@ -115,9 +131,15 @@ endif (USE_AVX)
if (PLUGIN_LZ4)
message(SEND_ERROR "The option 'PLUGIN_LZ4' is removed from XGBoost.")
endif (PLUGIN_LZ4)
if (PLUGIN_RMM AND NOT (USE_CUDA))
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
endif (PLUGIN_RMM AND NOT (USE_CUDA))
if (PLUGIN_RMM AND NOT (USE_HIP))
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_HIP` flag.")
endif (PLUGIN_RMM AND NOT (USE_HIP))
if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
@ -170,6 +192,24 @@ if (USE_CUDA)
find_package(CUDAToolkit REQUIRED)
endif (USE_CUDA)
if (USE_HIP)
set(USE_OPENMP ON CACHE BOOL "HIP requires OpenMP" FORCE)
# `export CXX=' is ignored by CMake HIP.
set(CMAKE_HIP_HOST_COMPILER ${CMAKE_CXX_COMPILER})
message(STATUS "Configured HIP host compiler: ${CMAKE_HIP_HOST_COMPILER}")
enable_language(HIP)
find_package(hip REQUIRED)
find_package(rocthrust REQUIRED)
find_package(hipcub REQUIRED)
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -I${HIP_INCLUDE_DIRS} -I${HIP_INCLUDE_DIRS}/hip")
set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -Wunused-result -w")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_INCLUDE_DIRS}")
add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
endif (USE_HIP)
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
@ -209,6 +249,10 @@ if (USE_NCCL)
find_package(Nccl REQUIRED)
endif (USE_NCCL)
if (USE_RCCL)
find_package(rccl REQUIRED)
endif (USE_RCCL)
# dmlc-core
msvc_use_static_runtime()
if (FORCE_SHARED_CRT)
@ -233,6 +277,11 @@ endif (RABIT_BUILD_MPI)
add_subdirectory(${xgboost_SOURCE_DIR}/src)
target_link_libraries(objxgboost PUBLIC dmlc)
# Link -lstdc++fs for GCC 8.x
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0")
target_link_libraries(objxgboost PUBLIC stdc++fs)
endif()
# Exports some R specific definitions and objects
if (R_LIB)
add_subdirectory(${xgboost_SOURCE_DIR}/R-package)

View File

@ -1,8 +1,8 @@
Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
Version: 2.0.0.1
Date: 2022-10-18
Version: 2.0.3.1
Date: 2023-12-14
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"),

View File

@ -70,7 +70,7 @@ cb.print.evaluation <- function(period = 1, showsd = TRUE) {
i == env$begin_iteration ||
i == env$end_iteration) {
stdev <- if (showsd) env$bst_evaluation_err else NULL
msg <- format.eval.string(i, env$bst_evaluation, stdev)
msg <- .format_eval_string(i, env$bst_evaluation, stdev)
cat(msg, '\n')
}
}
@ -380,7 +380,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
if ((maximize && score > best_score) ||
(!maximize && score < best_score)) {
best_msg <<- format.eval.string(i, env$bst_evaluation, env$bst_evaluation_err)
best_msg <<- .format_eval_string(
i, env$bst_evaluation, env$bst_evaluation_err
)
best_score <<- score
best_iteration <<- i
best_ntreelimit <<- best_iteration * env$num_parallel_tree
@ -754,7 +756,7 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
#
# Format the evaluation metric string
format.eval.string <- function(iter, eval_res, eval_err = NULL) {
.format_eval_string <- function(iter, eval_res, eval_err = NULL) {
if (length(eval_res) == 0)
stop('no evaluation results')
enames <- names(eval_res)

18
R-package/configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for xgboost 2.0.0.
# Generated by GNU Autoconf 2.71 for xgboost 2.0.3.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='2.0.0'
PACKAGE_STRING='xgboost 2.0.0'
PACKAGE_VERSION='2.0.3'
PACKAGE_STRING='xgboost 2.0.3'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures xgboost 2.0.0 to adapt to many kinds of systems.
\`configure' configures xgboost 2.0.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1287,7 +1287,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 2.0.0:";;
short | recursive ) echo "Configuration of xgboost 2.0.3:";;
esac
cat <<\_ACEOF
@ -1367,7 +1367,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
xgboost configure 2.0.0
xgboost configure 2.0.3
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by xgboost $as_me 2.0.0, which was
It was created by xgboost $as_me 2.0.3, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by xgboost $as_me 2.0.0, which was
This file was extended by xgboost $as_me 2.0.3, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
xgboost config.status 2.0.0
xgboost config.status 2.0.3
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

View File

@ -2,7 +2,7 @@
AC_PREREQ(2.69)
AC_INIT([xgboost],[2.0.0],[],[xgboost],[])
AC_INIT([xgboost],[2.0.3],[],[xgboost],[])
: ${R_HOME=`R RHOME`}
if test -z "${R_HOME}"; then

View File

@ -120,11 +120,25 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
ctx.nthread = asInteger(n_threads);
std::int32_t threads = ctx.Threads();
if (is_int) {
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
for (size_t j = 0; j < ncol; ++j) {
data[i * ncol + j] = is_int ? static_cast<float>(iin[i + nrow * j]) : din[i + nrow * j];
auto v = iin[i + nrow * j];
if (v == NA_INTEGER) {
data[i * ncol + j] = std::numeric_limits<float>::quiet_NaN();
} else {
data[i * ncol + j] = static_cast<float>(v);
}
}
});
} else {
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
for (size_t j = 0; j < ncol; ++j) {
data[i * ncol + j] = din[i + nrow * j];
}
});
}
DMatrixHandle handle;
CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol,
asReal(missing), &handle, threads));

View File

@ -32,7 +32,7 @@ namespace common {
bool CheckNAN(double v) {
return ISNAN(v);
}
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
double LogGamma(double v) {
return lgammafn(v);
}

View File

@ -56,6 +56,42 @@ test_that("xgb.DMatrix: basic construction", {
expect_equal(raw_fd, raw_dgc)
})
test_that("xgb.DMatrix: NA", {
n_samples <- 3
x <- cbind(
x1 = sample(x = 4, size = n_samples, replace = TRUE),
x2 = sample(x = 4, size = n_samples, replace = TRUE)
)
x[1, "x1"] <- NA
m <- xgb.DMatrix(x)
xgb.DMatrix.save(m, "int.dmatrix")
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
colnames(x) <- c("x1", "x2")
m <- xgb.DMatrix(x)
xgb.DMatrix.save(m, "float.dmatrix")
iconn <- file("int.dmatrix", "rb")
fconn <- file("float.dmatrix", "rb")
expect_equal(file.size("int.dmatrix"), file.size("float.dmatrix"))
bytes <- file.size("int.dmatrix")
idmatrix <- readBin(iconn, "raw", n = bytes)
fdmatrix <- readBin(fconn, "raw", n = bytes)
expect_equal(length(idmatrix), length(fdmatrix))
expect_equal(idmatrix, fdmatrix)
close(iconn)
close(fconn)
file.remove("int.dmatrix")
file.remove("float.dmatrix")
})
test_that("xgb.DMatrix: saving, loading", {
# save to a local file
dtest1 <- xgb.DMatrix(test_data, label = test_label)

View File

@ -181,6 +181,23 @@ function(xgboost_set_cuda_flags target)
CUDA_RUNTIME_LIBRARY Static)
endfunction(xgboost_set_cuda_flags)
# Set HIP related flags to target.
function(xgboost_set_hip_flags target)
if (USE_DEVICE_DEBUG)
target_compile_options(${target} PRIVATE
$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:HIP>>:-G>)
endif (USE_DEVICE_DEBUG)
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_HIP=1)
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/rocgputreeshap)
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/warp-primitives/include)
set_target_properties(${target} PROPERTIES
HIP_STANDARD 17
HIP_STANDARD_REQUIRED ON
HIP_SEPARABLE_COMPILATION OFF)
endfunction(xgboost_set_hip_flags)
macro(xgboost_link_nccl target)
if (BUILD_STATIC_LIB)
target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR})
@ -193,6 +210,20 @@ macro(xgboost_link_nccl target)
endif (BUILD_STATIC_LIB)
endmacro(xgboost_link_nccl)
macro(xgboost_link_rccl target)
if(BUILD_STATIC_LIB)
target_include_directories(${target} PUBLIC ${RCCL_INCLUDE_DIR}/rccl)
target_compile_definitions(${target} PUBLIC -DXGBOOST_USE_RCCL=1)
target_link_directories(${target} PUBLIC ${HIP_LIB_INSTALL_DIR})
target_link_libraries(${target} PUBLIC ${RCCL_LIBRARY})
else()
target_include_directories(${target} PRIVATE ${RCCL_INCLUDE_DIR}/rccl)
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_RCCL=1)
target_link_directories(${target} PUBLIC ${HIP_LIB_INSTALL_DIR})
target_link_libraries(${target} PRIVATE ${RCCL_LIBRARY})
endif()
endmacro()
# compile options
macro(xgboost_target_properties target)
set_target_properties(${target} PROPERTIES
@ -215,6 +246,10 @@ macro(xgboost_target_properties target)
-Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
-Wall -Wextra -Wno-expansion-to-defined>
)
target_compile_options(${target} PUBLIC
$<IF:$<COMPILE_LANGUAGE:HIP>,
-Wall -Wextra >
)
endif(ENABLE_ALL_WARNINGS)
target_compile_options(${target}
@ -283,6 +318,10 @@ macro(xgboost_target_link_libraries target)
target_link_libraries(${target} PUBLIC CUDA::cudart_static)
endif (USE_CUDA)
if (USE_HIP)
xgboost_set_hip_flags(${target})
endif (USE_HIP)
if (PLUGIN_RMM)
target_link_libraries(${target} PRIVATE rmm::rmm)
endif (PLUGIN_RMM)
@ -291,6 +330,10 @@ macro(xgboost_target_link_libraries target)
xgboost_link_nccl(${target})
endif (USE_NCCL)
if(USE_RCCL)
xgboost_link_rccl(${target})
endif()
if (USE_NVTX)
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
endif (USE_NVTX)

View File

@ -3,6 +3,8 @@
set(USE_OPENMP @USE_OPENMP@)
set(USE_CUDA @USE_CUDA@)
set(USE_NCCL @USE_NCCL@)
set(USE_HIP @USE_HIP@)
set(USE_RCCL @USE_RCCL@)
set(XGBOOST_BUILD_STATIC_LIB @BUILD_STATIC_LIB@)
include(CMakeFindDependencyMacro)
@ -15,6 +17,9 @@ if (XGBOOST_BUILD_STATIC_LIB)
if(USE_CUDA)
find_dependency(CUDA)
endif()
if(USE_HIP)
find_dependency(HIP)
endif()
# nccl should be linked statically if xgboost is built as static library.
endif (XGBOOST_BUILD_STATIC_LIB)

View File

@ -4,13 +4,13 @@ python mapfeat.py
# split train and test
python mknfold.py machine.txt 1
# training and output the models
../../xgboost machine.conf
../../../xgboost machine.conf
# output predictions of test data
../../xgboost machine.conf task=pred model_in=0002.model
../../../xgboost machine.conf task=pred model_in=0002.model
# print the boosters of 0002.model in dump.raw.txt
../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
../../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
# print the boosters of 0002.model in dump.nice.txt with feature map
../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
../../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
# cat the result
cat dump.nice.txt

View File

@ -0,0 +1,79 @@
import argparse
import pathlib
import re
import shutil
def main(args):
if args.scala_version == "2.12":
scala_ver = "2.12"
scala_patchver = "2.12.18"
elif args.scala_version == "2.13":
scala_ver = "2.13"
scala_patchver = "2.13.11"
else:
raise ValueError(f"Unsupported Scala version: {args.scala_version}")
# Clean artifacts
if args.purge_artifacts:
for target in pathlib.Path("jvm-packages/").glob("**/target"):
if target.is_dir():
print(f"Removing {target}...")
shutil.rmtree(target)
# Update pom.xml
for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"):
print(f"Updating {pom}...")
with open(pom, "r", encoding="utf-8") as f:
lines = f.readlines()
with open(pom, "w", encoding="utf-8") as f:
replaced_scalaver = False
replaced_scala_binver = False
for line in lines:
for artifact in [
"xgboost-jvm",
"xgboost4j",
"xgboost4j-gpu",
"xgboost4j-spark",
"xgboost4j-spark-gpu",
"xgboost4j-flink",
"xgboost4j-example",
]:
line = re.sub(
f"<artifactId>{artifact}_[0-9\\.]*",
f"<artifactId>{artifact}_{scala_ver}",
line,
)
# Only replace the first occurrence of scala.version
if not replaced_scalaver:
line, nsubs = re.subn(
r"<scala.version>[0-9\.]*",
f"<scala.version>{scala_patchver}",
line,
)
if nsubs > 0:
replaced_scalaver = True
# Only replace the first occurrence of scala.binary.version
if not replaced_scala_binver:
line, nsubs = re.subn(
r"<scala.binary.version>[0-9\.]*",
f"<scala.binary.version>{scala_ver}",
line,
)
if nsubs > 0:
replaced_scala_binver = True
f.write(line)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--purge-artifacts", action="store_true")
parser.add_argument(
"--scala-version",
type=str,
required=True,
help="Version of Scala to use in the JVM packages",
choices=["2.12", "2.13"],
)
parsed_args = parser.parse_args()
main(parsed_args)

View File

@ -2,7 +2,6 @@ import argparse
import errno
import glob
import os
import platform
import re
import shutil
import subprocess
@ -21,12 +20,14 @@ def normpath(path):
else:
return normalized
def cp(source, target):
source = normpath(source)
target = normpath(target)
print("cp {0} {1}".format(source, target))
shutil.copy(source, target)
def maybe_makedirs(path):
path = normpath(path)
print("mkdir -p " + path)
@ -36,6 +37,7 @@ def maybe_makedirs(path):
if e.errno != errno.EEXIST:
raise
@contextmanager
def cd(path):
path = normpath(path)
@ -47,18 +49,22 @@ def cd(path):
finally:
os.chdir(cwd)
def run(command, **kwargs):
print(command)
subprocess.check_call(command, shell=True, **kwargs)
def get_current_git_tag():
out = subprocess.check_output(["git", "tag", "--points-at", "HEAD"])
return out.decode().split("\n")[0]
def get_current_commit_hash():
out = subprocess.check_output(["git", "rev-parse", "HEAD"])
return out.decode().split("\n")[0]
def get_current_git_branch():
out = subprocess.check_output(["git", "log", "-n", "1", "--pretty=%d", "HEAD"])
m = re.search(r"release_[0-9\.]+", out.decode())
@ -66,38 +72,49 @@ def get_current_git_branch():
raise ValueError("Expected branch name of form release_xxx")
return m.group(0)
def retrieve(url, filename=None):
print(f"{url} -> {filename}")
return urlretrieve(url, filename)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--release-version", type=str, required=True,
help="Version of the release being prepared")
parser.add_argument(
"--release-version",
type=str,
required=True,
help="Version of the release being prepared",
)
args = parser.parse_args()
if sys.platform != "darwin" or platform.machine() != "x86_64":
raise NotImplementedError("Please run this script using an Intel Mac")
version = args.release_version
expected_git_tag = "v" + version
current_git_tag = get_current_git_tag()
if current_git_tag != expected_git_tag:
if not current_git_tag:
raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD has no tag. "
f"Run: git checkout {expected_git_tag}")
raise ValueError(f"Expected git tag {expected_git_tag} but current HEAD is at tag "
f"{current_git_tag}. Run: git checkout {expected_git_tag}")
raise ValueError(
f"Expected git tag {expected_git_tag} but current HEAD has no tag. "
f"Run: git checkout {expected_git_tag}"
)
raise ValueError(
f"Expected git tag {expected_git_tag} but current HEAD is at tag "
f"{current_git_tag}. Run: git checkout {expected_git_tag}"
)
commit_hash = get_current_commit_hash()
git_branch = get_current_git_branch()
print(f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}")
print(
f"Using commit {commit_hash} of branch {git_branch}, git tag {current_git_tag}"
)
with cd("jvm-packages/"):
print("====copying pure-Python tracker====")
for use_cuda in [True, False]:
xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
cp("../python-package/xgboost/tracker.py", f"{xgboost4j}/src/main/resources")
cp(
"../python-package/xgboost/tracker.py",
f"{xgboost4j}/src/main/resources",
)
print("====copying resources for testing====")
with cd("../demo/CLI/regression"):
@ -115,7 +132,12 @@ def main():
cp(file, f"{xgboost4j_spark}/src/test/resources")
print("====Creating directories to hold native binaries====")
for os_ident, arch in [("linux", "x86_64"), ("windows", "x86_64"), ("macos", "x86_64")]:
for os_ident, arch in [
("linux", "x86_64"),
("windows", "x86_64"),
("macos", "x86_64"),
("macos", "aarch64"),
]:
output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
maybe_makedirs(output_dir)
for os_ident, arch in [("linux", "x86_64")]:
@ -123,52 +145,98 @@ def main():
maybe_makedirs(output_dir)
print("====Downloading native binaries from CI====")
nightly_bucket_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
maven_repo_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc"
nightly_bucket_prefix = (
"https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
)
maven_repo_prefix = (
"https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc"
)
retrieve(url=f"{nightly_bucket_prefix}/{git_branch}/xgboost4j_{commit_hash}.dll",
filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll")
retrieve(
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/xgboost4j_{commit_hash}.dll",
filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll",
)
retrieve(
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib",
filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib",
)
retrieve(
url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib",
filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib",
)
with tempfile.TemporaryDirectory() as tempdir:
# libxgboost4j.so for Linux x86_64, CPU only
zip_path = os.path.join(tempdir, "xgboost4j_2.12.jar")
extract_dir = os.path.join(tempdir, "xgboost4j")
retrieve(url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/"
retrieve(
url=f"{maven_repo_prefix}/xgboost4j_2.12/{version}/"
f"xgboost4j_2.12-{version}.jar",
filename=zip_path)
filename=zip_path,
)
os.mkdir(extract_dir)
with zipfile.ZipFile(zip_path, "r") as t:
t.extractall(extract_dir)
cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
"xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so")
cp(
os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
"xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
)
# libxgboost4j.so for Linux x86_64, GPU support
zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar")
extract_dir = os.path.join(tempdir, "xgboost4j-gpu")
retrieve(url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/"
retrieve(
url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/"
f"xgboost4j-gpu_2.12-{version}.jar",
filename=zip_path)
filename=zip_path,
)
os.mkdir(extract_dir)
with zipfile.ZipFile(zip_path, "r") as t:
t.extractall(extract_dir)
cp(os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
"xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so")
cp(
os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
"xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
)
print("====Next Steps====")
print("1. Gain upload right to Maven Central repo.")
print("1-1. Sign up for a JIRA account at Sonatype: ")
print("1-2. File a JIRA ticket: "
print(
"1-2. File a JIRA ticket: "
"https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134. Example: "
"https://issues.sonatype.org/browse/OSSRH-67724")
print("2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
"https://central.sonatype.org/publish/publish-maven/")
print("3. Now on a Mac machine, run:")
print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests")
print("4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-1085 "
"https://issues.sonatype.org/browse/OSSRH-67724"
)
print(
"2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
"https://central.sonatype.org/publish/publish-maven/"
)
print(
"3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. "
"Make sure to use an Internet connection with fast upload speed:"
)
print(
" # Skip native build, since we have all needed native binaries from CI\n"
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests"
)
print(
"4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
"Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx "
"to inspect the staged JAR files. Finally, press Release button to publish the "
"artifacts to the Maven Central repository.")
"artifacts to the Maven Central repository. The top-level metapackage should be "
"named xgboost-jvm_2.12."
)
print(
"5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n"
" export MAVEN_SKIP_NATIVE_BUILD=1\n"
" python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n"
" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests"
)
print(
"6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. "
"The top-level metapackage should be named xgboost-jvm_2.13."
)
if __name__ == "__main__":
main()

View File

@ -329,7 +329,7 @@ Parameters for Linear Booster (``booster=gblinear``)
- Choice of algorithm to fit linear model
- ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.
- ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution.
- ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the ``device`` parameter is set to ``cuda`` or ``gpu``, a GPU variant would be used.
* ``feature_selector`` [default= ``cyclic``]

View File

@ -58,19 +58,19 @@
/*!
* \brief Tag function as usable by device
*/
#if defined (__CUDA__) || defined(__NVCC__)
#if defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
#define XGBOOST_DEVICE __host__ __device__
#else
#define XGBOOST_DEVICE
#endif // defined (__CUDA__) || defined(__NVCC__)
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
#if defined(__CUDA__) || defined(__CUDACC__)
#if defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__)
#define XGBOOST_HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
#define XGBOOST_DEV_INLINE __device__ __forceinline__
#else
#define XGBOOST_HOST_DEV_INLINE
#define XGBOOST_DEV_INLINE
#endif // defined(__CUDA__) || defined(__CUDACC__)
#endif // defined(__CUDA__) || defined(__CUDACC__) || defined(__HIPCC__)
// These check are for Makefile.
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
@ -274,8 +274,8 @@ class GradientPairInt64 {
GradientPairInt64(GradientPairInt64 const &g) = default;
GradientPairInt64 &operator=(GradientPairInt64 const &g) = default;
XGBOOST_DEVICE [[nodiscard]] T GetQuantisedGrad() const { return grad_; }
XGBOOST_DEVICE [[nodiscard]] T GetQuantisedHess() const { return hess_; }
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedGrad() const { return grad_; }
[[nodiscard]] XGBOOST_DEVICE T GetQuantisedHess() const { return hess_; }
XGBOOST_DEVICE GradientPairInt64 &operator+=(const GradientPairInt64 &rhs) {
grad_ += rhs.grad_;

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2020 by Contributors
/**
* Copyright 2020-2023, XGBoost Contributors
* \file global_config.h
* \brief Global configuration for XGBoost
* \author Hyunsu Cho
@ -7,24 +7,22 @@
#ifndef XGBOOST_GLOBAL_CONFIG_H_
#define XGBOOST_GLOBAL_CONFIG_H_
#include <xgboost/parameter.h>
#include <vector>
#include <string>
#include <dmlc/thread_local.h> // for ThreadLocalStore
#include <xgboost/parameter.h> // for XGBoostParameter
#include <cstdint> // for int32_t
namespace xgboost {
class Json;
struct GlobalConfiguration : public XGBoostParameter<GlobalConfiguration> {
int verbosity { 1 };
bool use_rmm { false };
std::int32_t verbosity{1};
bool use_rmm{false};
DMLC_DECLARE_PARAMETER(GlobalConfiguration) {
DMLC_DECLARE_FIELD(verbosity)
.set_range(0, 3)
.set_default(1) // shows only warning
.describe("Flag to print out detailed breakdown of runtime.");
DMLC_DECLARE_FIELD(use_rmm)
.set_default(false)
.describe("Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
DMLC_DECLARE_FIELD(use_rmm).set_default(false).describe(
"Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
}
};

View File

@ -58,11 +58,11 @@
namespace xgboost {
#ifdef __CUDACC__
#if defined(__CUDACC__) || defined(__HIPCC__)
// Sets a function to call instead of cudaSetDevice();
// only added for testing
void SetCudaSetDeviceHandler(void (*handler)(int));
#endif // __CUDACC__
#endif // __CUDACC__ || __HIPCC__
template <typename T> struct HostDeviceVectorImpl;

View File

@ -30,11 +30,11 @@
// decouple it from xgboost.
#ifndef LINALG_HD
#if defined(__CUDA__) || defined(__NVCC__)
#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
#define LINALG_HD __host__ __device__
#else
#define LINALG_HD
#endif // defined (__CUDA__) || defined(__NVCC__)
#endif // defined (__CUDA__) || defined(__NVCC__) || defined(__HIPCC__)
#endif // LINALG_HD
namespace xgboost::linalg {
@ -118,7 +118,7 @@ using IndexToTag = std::conditional_t<std::is_integral<RemoveCRType<S>>::value,
template <int32_t n, typename Fn>
LINALG_HD constexpr auto UnrollLoop(Fn fn) {
#if defined __CUDA_ARCH__
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
#pragma unroll n
#endif // defined __CUDA_ARCH__
for (int32_t i = 0; i < n; ++i) {
@ -136,7 +136,7 @@ int32_t NativePopc(T v) {
inline LINALG_HD int Popc(uint32_t v) {
#if defined(__CUDA_ARCH__)
return __popc(v);
#elif defined(__GNUC__) || defined(__clang__)
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__)
return __builtin_popcount(v);
#elif defined(_MSC_VER)
return __popcnt(v);
@ -148,7 +148,7 @@ inline LINALG_HD int Popc(uint32_t v) {
inline LINALG_HD int Popc(uint64_t v) {
#if defined(__CUDA_ARCH__)
return __popcll(v);
#elif defined(__GNUC__) || defined(__clang__)
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIPCC__)
return __builtin_popcountll(v);
#elif defined(_MSC_VER) && defined(_M_X64)
return __popcnt64(v);

View File

@ -41,7 +41,9 @@
#if defined(__CUDACC__)
#include <cuda_runtime.h>
#endif // defined(__CUDACC__)
#elif defined(__HIPCC__)
#include <hip/hip_runtime.h>
#endif
/*!
* The version number 1910 is picked up from GSL.
@ -104,7 +106,42 @@ namespace common {
#define SPAN_CHECK KERNEL_CHECK
#else // ------------------------------ not CUDA ----------------------------
#elif defined(__HIPCC__)
// Usual logging facility is not available inside device code.
#if defined(_MSC_VER)
// Windows HIP doesn't have __assert_fail.
#define HIP_KERNEL_CHECK(cond) \
do { \
if (XGBOOST_EXPECT(!(cond), false)) { \
__builtin_trap(); \
} \
} while (0)
#else // defined(_MSC_VER)
#define __ASSERT_STR_HELPER(x) #x
#if 0
#define HIP_KERNEL_CHECK(cond) \
(XGBOOST_EXPECT((cond), true) \
? static_cast<void>(0) \
: __assert_fail(__ASSERT_STR_HELPER((cond)), __FILE__, __LINE__, __PRETTY_FUNCTION__))
#else
#define HIP_KERNEL_CHECK(cond) \
(XGBOOST_EXPECT((cond), true) \
? static_cast<void>(0) \
: __builtin_trap())
#endif
#endif // defined(_MSC_VER)
#define KERNEL_CHECK HIP_KERNEL_CHECK
#define SPAN_CHECK KERNEL_CHECK
#else // ------------------------------ not CUDA or HIP ----------------------------
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
@ -120,7 +157,7 @@ namespace common {
#endif // defined(XGBOOST_STRICT_R_MODE)
#endif // __CUDA_ARCH__
#endif // __CUDA_ARCH__ || __HIPCC__
#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs))
@ -317,7 +354,7 @@ struct IsSpanOracle<Span<T, Extent>> : std::true_type {};
template <class T>
struct IsSpan : public IsSpanOracle<typename std::remove_cv<T>::type> {};
// Re-implement std algorithms here to adopt CUDA.
// Re-implement std algorithms here to adopt CUDA/HIP
template <typename T>
struct Less {
XGBOOST_DEVICE constexpr bool operator()(const T& _x, const T& _y) const {

View File

@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
#define XGBOOST_VER_MINOR 0 /* NOLINT */
#define XGBOOST_VER_PATCH 0 /* NOLINT */
#define XGBOOST_VER_PATCH 3 /* NOLINT */
#endif // XGBOOST_VERSION_CONFIG_H_

View File

@ -9,6 +9,11 @@ if (USE_CUDA)
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu)
endif (USE_CUDA)
if (USE_HIP)
list(APPEND JVM_SOURCES
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.hip)
endif (USE_HIP)
add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})
if (ENABLE_ALL_WARNINGS)
@ -25,4 +30,3 @@ target_include_directories(xgboost4j
${PROJECT_SOURCE_DIR}/rabit/include)
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
target_link_libraries(xgboost4j PRIVATE ${JAVA_JVM_LIBRARY})

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python
import errno
import argparse
import errno
import glob
import os
import platform
@ -19,11 +19,12 @@ CONFIG = {
"USE_HDFS": "OFF",
"USE_AZURE": "OFF",
"USE_S3": "OFF",
"USE_CUDA": "OFF",
"USE_NCCL": "OFF",
"USE_HIP": "OFF",
"USE_RCCL": "OFF",
"JVM_BINDINGS": "ON",
"LOG_CAPI_INVOCATION": "OFF"
"LOG_CAPI_INVOCATION": "OFF",
}
@ -70,26 +71,22 @@ def normpath(path):
return normalized
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF')
parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF')
cli_args = parser.parse_args()
def native_build(args):
if sys.platform == "darwin":
# Enable of your compiler supports OpenMP.
CONFIG["USE_OPENMP"] = "OFF"
os.environ["JAVA_HOME"] = subprocess.check_output(
"/usr/libexec/java_home").strip().decode()
os.environ["JAVA_HOME"] = (
subprocess.check_output("/usr/libexec/java_home").strip().decode()
)
print("building Java wrapper")
with cd(".."):
build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' else 'build'
build_dir = "build-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "build"
maybe_makedirs(build_dir)
with cd(build_dir):
if sys.platform == "win32":
# Force x64 build on Windows.
maybe_generator = ' -A x64'
maybe_generator = " -A x64"
else:
maybe_generator = ""
if sys.platform == "linux":
@ -97,12 +94,15 @@ if __name__ == "__main__":
else:
maybe_parallel_build = ""
if cli_args.log_capi_invocation == 'ON':
CONFIG['LOG_CAPI_INVOCATION'] = 'ON'
if cli_args.log_capi_invocation == "ON":
CONFIG["LOG_CAPI_INVOCATION"] = "ON"
if cli_args.use_cuda == 'ON':
CONFIG['USE_CUDA'] = 'ON'
CONFIG['USE_NCCL'] = 'ON'
if cli_args.use_cuda == "ON":
CONFIG["USE_CUDA"] = "ON"
CONFIG["USE_NCCL"] = "ON"
elif cli_args.use_hip == "ON":
CONFIG["USE_HIP"] = "ON"
CONFIG["USE_RCCL"] = "ON"
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
@ -115,7 +115,7 @@ if __name__ == "__main__":
if gpu_arch_flag is not None:
args.append("%s" % gpu_arch_flag)
lib_dir = os.path.join(os.pardir, 'lib')
lib_dir = os.path.join(os.pardir, "lib")
if os.path.exists(lib_dir):
shutil.rmtree(lib_dir)
run("cmake .. " + " ".join(args) + maybe_generator)
@ -125,8 +125,10 @@ if __name__ == "__main__":
run(f'"{sys.executable}" mapfeat.py')
run(f'"{sys.executable}" mknfold.py machine.txt 1')
xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j'
xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j-spark'
xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "xgboost4j"
xgboost4j_spark = (
"xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" or cli_args.use_hip == "ON" else "xgboost4j-spark"
)
print("copying native library")
library_name, os_folder = {
@ -141,14 +143,19 @@ if __name__ == "__main__":
"i86pc": "x86_64", # on Solaris x86_64
"sun4v": "sparc", # on Solaris sparc
"arm64": "aarch64", # on macOS & Windows ARM 64-bit
"aarch64": "aarch64"
"aarch64": "aarch64",
}[platform.machine().lower()]
output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder)
output_folder = "{}/src/main/resources/lib/{}/{}".format(
xgboost4j, os_folder, arch_folder
)
maybe_makedirs(output_folder)
cp("../lib/" + library_name, output_folder)
print("copying pure-Python tracker")
cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j))
cp(
"../python-package/xgboost/tracker.py",
"{}/src/main/resources".format(xgboost4j),
)
print("copying train/test files")
maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
@ -164,3 +171,18 @@ if __name__ == "__main__":
maybe_makedirs("{}/src/test/resources".format(xgboost4j))
for file in glob.glob("../demo/data/agaricus.*"):
cp(file, "{}/src/test/resources".format(xgboost4j))
if __name__ == "__main__":
if "MAVEN_SKIP_NATIVE_BUILD" in os.environ:
print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...")
else:
parser = argparse.ArgumentParser()
parser.add_argument(
"--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF"
)
parser.add_argument(
"--use-cuda", type=str, choices=["ON", "OFF"], default="OFF"
)
cli_args = parser.parse_args()
native_build(cli_args)

View File

@ -5,8 +5,8 @@
<modelVersion>4.0.0</modelVersion>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
<packaging>pom</packaging>
<name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description>
@ -43,6 +43,7 @@
<maven.wagon.http.retryHandler.count>5</maven.wagon.http.retryHandler.count>
<log.capi.invocation>OFF</log.capi.invocation>
<use.cuda>OFF</use.cuda>
<use.hip>OFF</use.hip>
<cudf.version>23.08.0</cudf.version>
<spark.rapids.version>23.08.0</spark.rapids.version>
<cudf.classifier>cuda11</cudf.classifier>
@ -189,6 +190,93 @@
</plugins>
</build>
</profile>
<profile>
<id>release-cpu-only</id>
<modules>
<module>xgboost4j</module>
<module>xgboost4j-example</module>
<module>xgboost4j-spark</module>
<module>xgboost4j-flink</module>
</modules>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<executions>
<execution>
<id>empty-javadoc-jar</id>
<phase>package</phase>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>javadoc</classifier>
<classesDirectory>${basedir}/javadoc</classesDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId>
<version>3.0.1</version>
<configuration>
<autoVersionSubmodules>true</autoVersionSubmodules>
<useReleaseProfile>false</useReleaseProfile>
<releaseProfiles>release</releaseProfiles>
<goals>deploy</goals>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>3.1.0</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.3.0</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
<version>1.6.13</version>
<extensions>true</extensions>
<configuration>
<serverId>ossrh</serverId>
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
<autoReleaseAfterClose>false</autoReleaseAfterClose>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>assembly</id>
<build>

View File

@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j-example</name>
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost4j-example_2.12</artifactId>
<version>2.0.3</version>
<packaging>jar</packaging>
<build>
<plugins>
@ -26,7 +26,7 @@
<dependencies>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-spark_2.12</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
@ -37,7 +37,7 @@
</dependency>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-flink_2.12</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>

View File

@ -5,13 +5,13 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j-flink</name>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost4j-flink_2.12</artifactId>
<version>2.0.3</version>
<properties>
<flink-ml.version>2.2.0</flink-ml.version>
</properties>
@ -30,7 +30,7 @@
<dependencies>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<artifactId>xgboost4j_2.12</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>

View File

@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-gpu_2.12</artifactId>
<name>xgboost4j-gpu</name>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.3</version>
<packaging>jar</packaging>
<dependencies>
@ -104,6 +104,8 @@
<argument>${log.capi.invocation}</argument>
<argument>--use-cuda</argument>
<argument>${use.cuda}</argument>
<argument>--use-hip</argument>
<argument>${use.hip}</argument>
</arguments>
<workingDirectory>${user.dir}</workingDirectory>
</configuration>

View File

@ -2,7 +2,7 @@
// Created by bobwang on 2021/9/8.
//
#ifndef XGBOOST_USE_CUDA
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
#include <jni.h>

View File

@ -1,6 +1,10 @@
#include <jni.h>
#if defined(XGBOOST_USE_CUDA)
#include "../../../../src/common/device_helpers.cuh"
#elif defined(XGBOOST_USE_HIP)
#include "../../../../src/common/device_helpers.hip.h"
#endif
#include "../../../../src/common/cuda_pinned_allocator.h"
#include "../../../../src/data/array_interface.h"
#include "jvm_utils.h"

View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "xgboost4j-gpu.cu"
#endif

View File

@ -5,11 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j-spark-gpu</name>
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build>
<plugins>
<plugin>
@ -24,7 +24,7 @@
<dependencies>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-gpu_2.12</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>

View File

@ -5,11 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j-spark</name>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<artifactId>xgboost4j-spark_2.12</artifactId>
<build>
<plugins>
<plugin>
@ -24,7 +24,7 @@
<dependencies>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<artifactId>xgboost4j_2.12</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>

View File

@ -5,12 +5,12 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>2.0.3</version>
</parent>
<name>xgboost4j</name>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<artifactId>xgboost4j_2.12</artifactId>
<version>2.0.3</version>
<packaging>jar</packaging>
<dependencies>

View File

@ -15,6 +15,10 @@ class BuildConfiguration: # pylint: disable=R0902
use_cuda: bool = False
# Whether to enable NCCL
use_nccl: bool = False
# Whether to enablea HIP
use_hip: bool = False
# Whether to enable RCCL
use_rccl: bool = False
# Whether to enable HDFS
use_hdfs: bool = False
# Whether to enable Azure Storage

View File

@ -132,17 +132,29 @@ def locate_or_build_libxgboost(
if build_config.use_system_libxgboost:
# Find libxgboost from system prefix
sys_base_prefix = pathlib.Path(sys.base_prefix).absolute().resolve()
libxgboost_sys = sys_base_prefix / "lib" / _lib_name()
if not libxgboost_sys.exists():
sys_prefix = pathlib.Path(sys.base_prefix)
sys_prefix_candidates = [
sys_prefix / "lib",
# Paths possibly used on Windows
sys_prefix / "bin",
sys_prefix / "Library",
sys_prefix / "Library" / "bin",
sys_prefix / "Library" / "lib",
]
sys_prefix_candidates = [
p.expanduser().resolve() for p in sys_prefix_candidates
]
for candidate_dir in sys_prefix_candidates:
libtreelite_sys = candidate_dir / _lib_name()
if libtreelite_sys.exists():
logger.info("Using system XGBoost: %s", str(libtreelite_sys))
return libtreelite_sys
raise RuntimeError(
f"use_system_libxgboost was specified but {_lib_name()} is "
f"not found in {libxgboost_sys.parent}"
f"not found. Paths searched (in order): \n"
+ "\n".join([f"* {str(p)}" for p in sys_prefix_candidates])
)
logger.info("Using system XGBoost: %s", str(libxgboost_sys))
return libxgboost_sys
libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
if libxgboost is not None:
return libxgboost

View File

@ -7,7 +7,7 @@ build-backend = "packager.pep517"
[project]
name = "xgboost"
version = "2.0.0-dev"
version = "2.0.3"
authors = [
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }

View File

@ -1 +1 @@
2.0.0-dev
2.0.3

View File

@ -88,6 +88,18 @@ def is_cudf_available() -> bool:
return False
def is_cupy_available() -> bool:
"""Check cupy package available or not"""
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy
return True
except ImportError:
return False
try:
import scipy.sparse as scipy_sparse
from scipy.sparse import csr_matrix as scipy_csr

View File

@ -206,6 +206,7 @@ def _load_lib() -> ctypes.CDLL:
lib = ctypes.cdll.LoadLibrary(lib_path)
setattr(lib, "path", os.path.normpath(lib_path))
lib_success = True
break
except OSError as e:
os_error_list.append(str(e))
continue
@ -2399,6 +2400,7 @@ class Booster:
_is_cudf_df,
_is_cupy_array,
_is_list,
_is_np_array_like,
_is_pandas_df,
_is_pandas_series,
_is_tuple,
@ -2428,7 +2430,7 @@ class Booster:
f"got {data.shape[1]}"
)
if isinstance(data, np.ndarray):
if _is_np_array_like(data):
from .data import _ensure_np_dtype
data, _ = _ensure_np_dtype(data, data.dtype)

View File

@ -78,7 +78,6 @@ from .data import _is_cudf_ser, _is_cupy_array
from .sklearn import (
XGBClassifier,
XGBClassifierBase,
XGBClassifierMixIn,
XGBModel,
XGBRanker,
XGBRankerMixIn,
@ -1854,7 +1853,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
"Implementation of the scikit-learn API for XGBoost classification.",
["estimators", "model"],
)
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBase):
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
# pylint: disable=missing-class-docstring
async def _fit_async(
self,
@ -2036,10 +2035,6 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBa
preds = da.map_blocks(_argmax, pred_probs, drop_axis=1)
return preds
def load_model(self, fname: ModelIn) -> None:
super().load_model(fname)
self._load_model_attributes(self.get_booster())
@xgboost_model_doc(
"""Implementation of the Scikit-Learn API for XGBoost Ranking.

View File

@ -164,8 +164,8 @@ def _is_scipy_coo(data: DataType) -> bool:
return isinstance(data, scipy.sparse.coo_matrix)
def _is_numpy_array(data: DataType) -> bool:
return isinstance(data, (np.ndarray, np.matrix))
def _is_np_array_like(data: DataType) -> bool:
return hasattr(data, "__array_interface__")
def _ensure_np_dtype(
@ -317,7 +317,6 @@ def pandas_feature_info(
) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]:
"""Handle feature info for pandas dataframe."""
import pandas as pd
from pandas.api.types import is_categorical_dtype, is_sparse
# handle feature names
if feature_names is None and meta is None:
@ -332,10 +331,10 @@ def pandas_feature_info(
if feature_types is None and meta is None:
feature_types = []
for dtype in data.dtypes:
if is_sparse(dtype):
if is_pd_sparse_dtype(dtype):
feature_types.append(_pandas_dtype_mapper[dtype.subtype.name])
elif (
is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
is_pd_cat_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
) and enable_categorical:
feature_types.append(CAT_T)
else:
@ -345,18 +344,13 @@ def pandas_feature_info(
def is_nullable_dtype(dtype: PandasDType) -> bool:
"""Whether dtype is a pandas nullable type."""
from pandas.api.types import (
is_bool_dtype,
is_categorical_dtype,
is_float_dtype,
is_integer_dtype,
)
from pandas.api.types import is_bool_dtype, is_float_dtype, is_integer_dtype
is_int = is_integer_dtype(dtype) and dtype.name in pandas_nullable_mapper
# np.bool has alias `bool`, while pd.BooleanDtype has `boolean`.
is_bool = is_bool_dtype(dtype) and dtype.name == "boolean"
is_float = is_float_dtype(dtype) and dtype.name in pandas_nullable_mapper
return is_int or is_bool or is_float or is_categorical_dtype(dtype)
return is_int or is_bool or is_float or is_pd_cat_dtype(dtype)
def is_pa_ext_dtype(dtype: Any) -> bool:
@ -371,17 +365,48 @@ def is_pa_ext_categorical_dtype(dtype: Any) -> bool:
)
def is_pd_cat_dtype(dtype: PandasDType) -> bool:
"""Wrapper for testing pandas category type."""
import pandas as pd
if hasattr(pd.util, "version") and hasattr(pd.util.version, "Version"):
Version = pd.util.version.Version
if Version(pd.__version__) >= Version("2.1.0"):
from pandas import CategoricalDtype
return isinstance(dtype, CategoricalDtype)
from pandas.api.types import is_categorical_dtype
return is_categorical_dtype(dtype)
def is_pd_sparse_dtype(dtype: PandasDType) -> bool:
"""Wrapper for testing pandas sparse type."""
import pandas as pd
if hasattr(pd.util, "version") and hasattr(pd.util.version, "Version"):
Version = pd.util.version.Version
if Version(pd.__version__) >= Version("2.1.0"):
from pandas import SparseDtype
return isinstance(dtype, SparseDtype)
from pandas.api.types import is_sparse
return is_sparse(dtype)
def pandas_cat_null(data: DataFrame) -> DataFrame:
"""Handle categorical dtype and nullable extension types from pandas."""
import pandas as pd
from pandas.api.types import is_categorical_dtype
# handle category codes and nullable.
cat_columns = []
nul_columns = []
# avoid an unnecessary conversion if possible
for col, dtype in zip(data.columns, data.dtypes):
if is_categorical_dtype(dtype):
if is_pd_cat_dtype(dtype):
cat_columns.append(col)
elif is_pa_ext_categorical_dtype(dtype):
raise ValueError(
@ -398,7 +423,7 @@ def pandas_cat_null(data: DataFrame) -> DataFrame:
transformed = data
def cat_codes(ser: pd.Series) -> pd.Series:
if is_categorical_dtype(ser.dtype):
if is_pd_cat_dtype(ser.dtype):
return ser.cat.codes
assert is_pa_ext_categorical_dtype(ser.dtype)
# Not yet supported, the index is not ordered for some reason. Alternately:
@ -454,14 +479,12 @@ def _transform_pandas_df(
meta: Optional[str] = None,
meta_type: Optional[NumpyDType] = None,
) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]:
from pandas.api.types import is_categorical_dtype, is_sparse
pyarrow_extension = False
for dtype in data.dtypes:
if not (
(dtype.name in _pandas_dtype_mapper)
or is_sparse(dtype)
or (is_categorical_dtype(dtype) and enable_categorical)
or is_pd_sparse_dtype(dtype)
or (is_pd_cat_dtype(dtype) and enable_categorical)
or is_pa_ext_dtype(dtype)
):
_invalid_dataframe_dtype(data)
@ -515,9 +538,8 @@ def _meta_from_pandas_series(
) -> None:
"""Help transform pandas series for meta data like labels"""
data = data.values.astype("float")
from pandas.api.types import is_sparse
if is_sparse(data):
if is_pd_sparse_dtype(getattr(data, "dtype", data)):
data = data.to_dense() # type: ignore
assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
_meta_from_numpy(data, name, dtype, handle)
@ -539,13 +561,11 @@ def _from_pandas_series(
feature_names: Optional[FeatureNames],
feature_types: Optional[FeatureTypes],
) -> DispatchedDataBackendReturnType:
from pandas.api.types import is_categorical_dtype
if (data.dtype.name not in _pandas_dtype_mapper) and not (
is_categorical_dtype(data.dtype) and enable_categorical
is_pd_cat_dtype(data.dtype) and enable_categorical
):
_invalid_dataframe_dtype(data)
if enable_categorical and is_categorical_dtype(data.dtype):
if enable_categorical and is_pd_cat_dtype(data.dtype):
data = data.cat.codes
return _from_numpy_array(
data.values.reshape(data.shape[0], 1).astype("float"),
@ -1051,7 +1071,7 @@ def dispatch_data_backend(
return _from_scipy_csr(
data.tocsr(), missing, threads, feature_names, feature_types
)
if _is_numpy_array(data):
if _is_np_array_like(data):
return _from_numpy_array(
data, missing, threads, feature_names, feature_types, data_split_mode
)
@ -1194,7 +1214,7 @@ def dispatch_meta_backend(
if _is_tuple(data):
_meta_from_tuple(data, name, dtype, handle)
return
if _is_numpy_array(data):
if _is_np_array_like(data):
_meta_from_numpy(data, name, dtype, handle)
return
if _is_pandas_df(data):
@ -1281,7 +1301,7 @@ def _proxy_transform(
return _transform_dlpack(data), None, feature_names, feature_types
if _is_list(data) or _is_tuple(data):
data = np.array(data)
if _is_numpy_array(data):
if _is_np_array_like(data):
data, _ = _ensure_np_dtype(data, data.dtype)
return data, None, feature_names, feature_types
if _is_scipy_csr(data):
@ -1331,7 +1351,7 @@ def dispatch_proxy_set_data(
if not allow_host:
raise err
if _is_numpy_array(data):
if _is_np_array_like(data):
_check_data_shape(data)
proxy._set_data_from_array(data) # pylint: disable=W0212
return

View File

@ -31,16 +31,15 @@ def find_lib_path() -> List[str]:
]
if sys.platform == "win32":
if platform.architecture()[0] == "64bit":
dll_path.append(os.path.join(curr_path, "../../windows/x64/Release/"))
# hack for pip installation when copy all parent source
# directory here
dll_path.append(os.path.join(curr_path, "./windows/x64/Release/"))
else:
dll_path.append(os.path.join(curr_path, "../../windows/Release/"))
# hack for pip installation when copy all parent source
# directory here
dll_path.append(os.path.join(curr_path, "./windows/Release/"))
# On Windows, Conda may install libs in different paths
dll_path.extend(
[
os.path.join(sys.base_prefix, "bin"),
os.path.join(sys.base_prefix, "Library"),
os.path.join(sys.base_prefix, "Library", "bin"),
os.path.join(sys.base_prefix, "Library", "lib"),
]
)
dll_path = [os.path.join(p, "xgboost.dll") for p in dll_path]
elif sys.platform.startswith(("linux", "freebsd", "emscripten")):
dll_path = [os.path.join(p, "libxgboost.so") for p in dll_path]

View File

@ -43,19 +43,6 @@ from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df
from .training import train
class XGBClassifierMixIn: # pylint: disable=too-few-public-methods
"""MixIn for classification."""
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
def _load_model_attributes(self, booster: Booster) -> None:
config = json.loads(booster.save_config())
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
# binary classification is treated as regression in XGBoost.
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
class XGBRankerMixIn: # pylint: disable=too-few-public-methods
"""MixIn for ranking, defines the _estimator_type usually defined in scikit-learn
base classes.
@ -845,9 +832,7 @@ class XGBModel(XGBModelBase):
self.get_booster().load_model(fname)
meta_str = self.get_booster().attr("scikit_learn")
if meta_str is None:
return
if meta_str is not None:
meta = json.loads(meta_str)
t = meta.get("_estimator_type", None)
if t is not None and t != self._get_type():
@ -855,11 +840,30 @@ class XGBModel(XGBModelBase):
"Loading an estimator with different type. Expecting: "
f"{self._get_type()}, got: {t}"
)
self.feature_types = self.get_booster().feature_types
self.get_booster().set_attr(scikit_learn=None)
config = json.loads(self.get_booster().save_config())
self._load_model_attributes(config)
load_model.__doc__ = f"""{Booster.load_model.__doc__}"""
def _load_model_attributes(self, config: dict) -> None:
"""Load model attributes without hyper-parameters."""
from sklearn.base import is_classifier
booster = self.get_booster()
self.objective = config["learner"]["objective"]["name"]
self.booster = config["learner"]["gradient_booster"]["name"]
self.base_score = config["learner"]["learner_model_param"]["base_score"]
self.feature_types = booster.feature_types
if is_classifier(self):
self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"])
# binary classification is treated as regression in XGBoost.
self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_
# pylint: disable=too-many-branches
def _configure_fit(
self,
@ -1409,7 +1413,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) ->
Number of boosting rounds.
""",
)
class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
@_deprecate_positional_args
def __init__(
@ -1637,10 +1641,6 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
def classes_(self) -> np.ndarray:
return np.arange(self.n_classes_)
def load_model(self, fname: ModelIn) -> None:
super().load_model(fname)
self._load_model_attributes(self.get_booster())
@xgboost_model_doc(
"scikit-learn API for XGBoost random forest classification.",
@ -2093,7 +2093,17 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
"""
X, qid = _get_qid(X, None)
Xyq = DMatrix(X, y, qid=qid)
# fixme(jiamingy): base margin and group weight is not yet supported. We might
# need to make extra special fields in the dataframe.
Xyq = DMatrix(
X,
y,
qid=qid,
missing=self.missing,
enable_categorical=self.enable_categorical,
nthread=self.n_jobs,
feature_types=self.feature_types,
)
if callable(self.eval_metric):
metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric)

View File

@ -22,7 +22,7 @@ from typing import (
import numpy as np
import pandas as pd
from pyspark import SparkContext, cloudpickle
from pyspark import RDD, SparkContext, cloudpickle
from pyspark.ml import Estimator, Model
from pyspark.ml.functions import array_to_vector, vector_to_array
from pyspark.ml.linalg import VectorUDT
@ -44,6 +44,7 @@ from pyspark.ml.util import (
MLWritable,
MLWriter,
)
from pyspark.resource import ResourceProfileBuilder, TaskResourceRequests
from pyspark.sql import Column, DataFrame
from pyspark.sql.functions import col, countDistinct, pandas_udf, rand, struct
from pyspark.sql.types import (
@ -59,11 +60,12 @@ from scipy.special import expit, softmax # pylint: disable=no-name-in-module
import xgboost
from xgboost import XGBClassifier
from xgboost.compat import is_cudf_available
from xgboost.compat import is_cudf_available, is_cupy_available
from xgboost.core import Booster, _check_distributed_params
from xgboost.sklearn import DEFAULT_N_ESTIMATORS, XGBModel, _can_use_qdm
from xgboost.training import train as worker_train
from .._typing import ArrayLike
from .data import (
_read_csr_matrix_from_unwrapped_spark_vec,
alias,
@ -87,6 +89,7 @@ from .utils import (
_get_rabit_args,
_get_spark_session,
_is_local,
_is_standalone_or_localcluster,
deserialize_booster,
deserialize_xgb_model,
get_class_name,
@ -241,6 +244,13 @@ class _SparkXGBParams(
TypeConverters.toList,
)
def set_device(self, value: str) -> "_SparkXGBParams":
"""Set device, optional value: cpu, cuda, gpu"""
_check_distributed_params({"device": value})
assert value in ("cpu", "cuda", "gpu")
self.set(self.device, value)
return self
@classmethod
def _xgb_cls(cls) -> Type[XGBModel]:
"""
@ -334,6 +344,54 @@ class _SparkXGBParams(
predict_params[param.name] = self.getOrDefault(param)
return predict_params
def _validate_gpu_params(self) -> None:
"""Validate the gpu parameters and gpu configurations"""
if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu):
ss = _get_spark_session()
sc = ss.sparkContext
if _is_local(sc):
# Support GPU training in Spark local mode is just for debugging
# purposes, so it's okay for printing the below warning instead of
# checking the real gpu numbers and raising the exception.
get_logger(self.__class__.__name__).warning(
"You have enabled GPU in spark local mode. Please make sure your"
" local node has at least %d GPUs",
self.getOrDefault(self.num_workers),
)
else:
executor_gpus = sc.getConf().get("spark.executor.resource.gpu.amount")
if executor_gpus is None:
raise ValueError(
"The `spark.executor.resource.gpu.amount` is required for training"
" on GPU."
)
if not (ss.version >= "3.4.0" and _is_standalone_or_localcluster(sc)):
# We will enable stage-level scheduling in spark 3.4.0+ which doesn't
# require spark.task.resource.gpu.amount to be set explicitly
gpu_per_task = sc.getConf().get("spark.task.resource.gpu.amount")
if gpu_per_task is not None:
if float(gpu_per_task) < 1.0:
raise ValueError(
"XGBoost doesn't support GPU fractional configurations. "
"Please set `spark.task.resource.gpu.amount=spark.executor"
".resource.gpu.amount`"
)
if float(gpu_per_task) > 1.0:
get_logger(self.__class__.__name__).warning(
"%s GPUs for each Spark task is configured, but each "
"XGBoost training task uses only 1 GPU.",
gpu_per_task,
)
else:
raise ValueError(
"The `spark.task.resource.gpu.amount` is required for training"
" on GPU."
)
def _validate_params(self) -> None:
# pylint: disable=too-many-branches
init_model = self.getOrDefault("xgb_model")
@ -413,53 +471,7 @@ class _SparkXGBParams(
"`pyspark.ml.linalg.Vector` type."
)
if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu):
gpu_per_task = (
_get_spark_session()
.sparkContext.getConf()
.get("spark.task.resource.gpu.amount")
)
is_local = _is_local(_get_spark_session().sparkContext)
if is_local:
# checking spark local mode.
if gpu_per_task is not None:
raise RuntimeError(
"The spark local mode does not support gpu configuration."
"Please remove spark.executor.resource.gpu.amount and "
"spark.task.resource.gpu.amount"
)
# Support GPU training in Spark local mode is just for debugging
# purposes, so it's okay for printing the below warning instead of
# checking the real gpu numbers and raising the exception.
get_logger(self.__class__.__name__).warning(
"You have enabled GPU in spark local mode. Please make sure your"
" local node has at least %d GPUs",
self.getOrDefault(self.num_workers),
)
else:
# checking spark non-local mode.
if gpu_per_task is not None:
if float(gpu_per_task) < 1.0:
raise ValueError(
"XGBoost doesn't support GPU fractional configurations. "
"Please set `spark.task.resource.gpu.amount=spark.executor"
".resource.gpu.amount`"
)
if float(gpu_per_task) > 1.0:
get_logger(self.__class__.__name__).warning(
"%s GPUs for each Spark task is configured, but each "
"XGBoost training task uses only 1 GPU.",
gpu_per_task,
)
else:
raise ValueError(
"The `spark.task.resource.gpu.amount` is required for training"
" on GPU."
)
self._validate_gpu_params()
def _validate_and_convert_feature_col_as_float_col_list(
@ -584,6 +596,8 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
arbitrary_params_dict={},
)
self.logger = get_logger(self.__class__.__name__)
def setParams(self, **kwargs: Any) -> None: # pylint: disable=invalid-name
"""
Set params for the estimator.
@ -886,6 +900,116 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
return booster_params, train_call_kwargs_params, dmatrix_kwargs
def _skip_stage_level_scheduling(self) -> bool:
# pylint: disable=too-many-return-statements
"""Check if stage-level scheduling is not needed,
return true to skip stage-level scheduling"""
if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu):
ss = _get_spark_session()
sc = ss.sparkContext
if ss.version < "3.4.0":
self.logger.info(
"Stage-level scheduling in xgboost requires spark version 3.4.0+"
)
return True
if not _is_standalone_or_localcluster(sc):
self.logger.info(
"Stage-level scheduling in xgboost requires spark standalone or "
"local-cluster mode"
)
return True
executor_cores = sc.getConf().get("spark.executor.cores")
executor_gpus = sc.getConf().get("spark.executor.resource.gpu.amount")
if executor_cores is None or executor_gpus is None:
self.logger.info(
"Stage-level scheduling in xgboost requires spark.executor.cores, "
"spark.executor.resource.gpu.amount to be set."
)
return True
if int(executor_cores) == 1:
# there will be only 1 task running at any time.
self.logger.info(
"Stage-level scheduling in xgboost requires spark.executor.cores > 1 "
)
return True
if int(executor_gpus) > 1:
# For spark.executor.resource.gpu.amount > 1, we suppose user knows how to configure
# to make xgboost run successfully.
#
self.logger.info(
"Stage-level scheduling in xgboost will not work "
"when spark.executor.resource.gpu.amount>1"
)
return True
task_gpu_amount = sc.getConf().get("spark.task.resource.gpu.amount")
if task_gpu_amount is None:
# The ETL tasks will not grab a gpu when spark.task.resource.gpu.amount is not set,
# but with stage-level scheduling, we can make training task grab the gpu.
return False
if float(task_gpu_amount) == float(executor_gpus):
# spark.executor.resource.gpu.amount=spark.task.resource.gpu.amount "
# results in only 1 task running at a time, which may cause perf issue.
return True
# We can enable stage-level scheduling
return False
# CPU training doesn't require stage-level scheduling
return True
def _try_stage_level_scheduling(self, rdd: RDD) -> RDD:
"""Try to enable stage-level scheduling"""
if self._skip_stage_level_scheduling():
return rdd
ss = _get_spark_session()
# executor_cores will not be None
executor_cores = ss.sparkContext.getConf().get("spark.executor.cores")
assert executor_cores is not None
# Spark-rapids is a project to leverage GPUs to accelerate spark SQL.
# If spark-rapids is enabled, to avoid GPU OOM, we don't allow other
# ETL gpu tasks running alongside training tasks.
spark_plugins = ss.conf.get("spark.plugins", " ")
assert spark_plugins is not None
spark_rapids_sql_enabled = ss.conf.get("spark.rapids.sql.enabled", "true")
assert spark_rapids_sql_enabled is not None
task_cores = (
int(executor_cores)
if "com.nvidia.spark.SQLPlugin" in spark_plugins
and "true" == spark_rapids_sql_enabled.lower()
else (int(executor_cores) // 2) + 1
)
# Each training task requires cpu cores > total executor cores//2 + 1 which can
# make sure the tasks be sent to different executors.
#
# Please note that we can't use GPU to limit the concurrent tasks because of
# https://issues.apache.org/jira/browse/SPARK-45527.
task_gpus = 1.0
treqs = TaskResourceRequests().cpus(task_cores).resource("gpu", task_gpus)
rp = ResourceProfileBuilder().require(treqs).build
self.logger.info(
"XGBoost training tasks require the resource(cores=%s, gpu=%s).",
task_cores,
task_gpus,
)
return rdd.withResources(rp)
def _fit(self, dataset: DataFrame) -> "_SparkXGBModel":
# pylint: disable=too-many-statements, too-many-locals
self._validate_params()
@ -986,14 +1110,16 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
)
def _run_job() -> Tuple[str, str]:
ret = (
rdd = (
dataset.mapInPandas(
_train_booster, schema="config string, booster string" # type: ignore
_train_booster, # type: ignore
schema="config string, booster string",
)
.rdd.barrier()
.mapPartitions(lambda x: x)
.collect()[0]
)
rdd_with_resource = self._try_stage_level_scheduling(rdd)
ret = rdd_with_resource.collect()[0]
return ret[0], ret[1]
get_logger("XGBoost-PySpark").info(
@ -1117,12 +1243,111 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
)
return features_col, feature_col_names
def _get_pred_contrib_col_name(self) -> Optional[str]:
"""Return the pred_contrib_col col name"""
pred_contrib_col_name = None
if (
self.isDefined(self.pred_contrib_col)
and self.getOrDefault(self.pred_contrib_col) != ""
):
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
return pred_contrib_col_name
def _out_schema(self) -> Tuple[bool, str]:
"""Return the bool to indicate if it's a single prediction, true is single prediction,
and the returned type of the user-defined function. The value must
be a DDL-formatted type string."""
if self._get_pred_contrib_col_name() is not None:
return False, f"{pred.prediction} double, {pred.pred_contrib} array<double>"
return True, "double"
def _get_predict_func(self) -> Callable:
"""Return the true prediction function which will be running on the executor side"""
predict_params = self._gen_predict_params_dict()
pred_contrib_col_name = self._get_pred_contrib_col_name()
def _predict(
model: XGBModel, X: ArrayLike, base_margin: Optional[ArrayLike]
) -> Union[pd.DataFrame, pd.Series]:
data = {}
preds = model.predict(
X,
base_margin=base_margin,
validate_features=False,
**predict_params,
)
data[pred.prediction] = pd.Series(preds)
if pred_contrib_col_name is not None:
contribs = pred_contribs(model, X, base_margin)
data[pred.pred_contrib] = pd.Series(list(contribs))
return pd.DataFrame(data=data)
return data[pred.prediction]
return _predict
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
"""Post process of transform"""
prediction_col_name = self.getOrDefault(self.predictionCol)
single_pred, _ = self._out_schema()
if single_pred:
if prediction_col_name:
dataset = dataset.withColumn(prediction_col_name, pred_col)
else:
pred_struct_col = "_prediction_struct"
dataset = dataset.withColumn(pred_struct_col, pred_col)
if prediction_col_name:
dataset = dataset.withColumn(
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
)
pred_contrib_col_name = self._get_pred_contrib_col_name()
if pred_contrib_col_name is not None:
dataset = dataset.withColumn(
pred_contrib_col_name,
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
)
dataset = dataset.drop(pred_struct_col)
return dataset
def _gpu_transform(self) -> bool:
"""If gpu is used to do the prediction, true to gpu prediction"""
if _is_local(_get_spark_session().sparkContext):
# if it's local model, we just use the internal "device"
return use_cuda(self.getOrDefault(self.device))
gpu_per_task = (
_get_spark_session()
.sparkContext.getConf()
.get("spark.task.resource.gpu.amount")
)
# User don't set gpu configurations, just use cpu
if gpu_per_task is None:
if use_cuda(self.getOrDefault(self.device)):
get_logger("XGBoost-PySpark").warning(
"Do the prediction on the CPUs since "
"no gpu configurations are set"
)
return False
# User already sets the gpu configurations, we just use the internal "device".
return use_cuda(self.getOrDefault(self.device))
def _transform(self, dataset: DataFrame) -> DataFrame:
# pylint: disable=too-many-statements, too-many-locals
# Save xgb_sklearn_model and predict_params to be local variable
# to avoid the `self` object to be pickled to remote.
xgb_sklearn_model = self._xgb_sklearn_model
predict_params = self._gen_predict_params_dict()
has_base_margin = False
if (
@ -1137,79 +1362,92 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
features_col, feature_col_names = self._get_feature_col(dataset)
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
pred_contrib_col_name = None
if (
self.isDefined(self.pred_contrib_col)
and self.getOrDefault(self.pred_contrib_col) != ""
):
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
predict_func = self._get_predict_func()
single_pred = True
schema = "double"
if pred_contrib_col_name:
single_pred = False
schema = f"{pred.prediction} double, {pred.pred_contrib} array<double>"
_, schema = self._out_schema()
is_local = _is_local(_get_spark_session().sparkContext)
run_on_gpu = self._gpu_transform()
@pandas_udf(schema) # type: ignore
def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:
assert xgb_sklearn_model is not None
model = xgb_sklearn_model
from pyspark import TaskContext
context = TaskContext.get()
assert context is not None
dev_ordinal = -1
if is_cudf_available():
if is_local:
if run_on_gpu and is_cupy_available():
import cupy as cp # pylint: disable=import-error
total_gpus = cp.cuda.runtime.getDeviceCount()
if total_gpus > 0:
partition_id = context.partitionId()
# For transform local mode, default the dev_ordinal to
# (partition id) % gpus.
dev_ordinal = partition_id % total_gpus
elif run_on_gpu:
dev_ordinal = _get_gpu_id(context)
if dev_ordinal >= 0:
device = "cuda:" + str(dev_ordinal)
get_logger("XGBoost-PySpark").info(
"Do the inference with device: %s", device
)
model.set_params(device=device)
else:
get_logger("XGBoost-PySpark").info("Do the inference on the CPUs")
else:
msg = (
"CUDF is unavailable, fallback the inference on the CPUs"
if run_on_gpu
else "Do the inference on the CPUs"
)
get_logger("XGBoost-PySpark").info(msg)
def to_gpu_if_possible(data: ArrayLike) -> ArrayLike:
"""Move the data to gpu if possible"""
if dev_ordinal >= 0:
import cudf # pylint: disable=import-error
import cupy as cp # pylint: disable=import-error
# We must set the device after import cudf, which will change the device id to 0
# See https://github.com/rapidsai/cudf/issues/11386
cp.cuda.runtime.setDevice(dev_ordinal) # pylint: disable=I1101
df = cudf.DataFrame(data)
del data
return df
return data
for data in iterator:
if enable_sparse_data_optim:
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
else:
if feature_col_names is not None:
X = data[feature_col_names]
tmp = data[feature_col_names]
else:
X = stack_series(data[alias.data])
tmp = stack_series(data[alias.data])
X = to_gpu_if_possible(tmp)
if has_base_margin:
base_margin = data[alias.margin].to_numpy()
base_margin = to_gpu_if_possible(data[alias.margin])
else:
base_margin = None
data = {}
preds = model.predict(
X,
base_margin=base_margin,
validate_features=False,
**predict_params,
)
data[pred.prediction] = pd.Series(preds)
if pred_contrib_col_name:
contribs = pred_contribs(model, X, base_margin)
data[pred.pred_contrib] = pd.Series(list(contribs))
yield pd.DataFrame(data=data)
else:
yield data[pred.prediction]
yield predict_func(model, X, base_margin)
if has_base_margin:
pred_col = predict_udf(struct(*features_col, base_margin_col))
else:
pred_col = predict_udf(struct(*features_col))
prediction_col_name = self.getOrDefault(self.predictionCol)
if single_pred:
dataset = dataset.withColumn(prediction_col_name, pred_col)
else:
pred_struct_col = "_prediction_struct"
dataset = dataset.withColumn(pred_struct_col, pred_col)
dataset = dataset.withColumn(
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
)
if pred_contrib_col_name:
dataset = dataset.withColumn(
pred_contrib_col_name,
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
)
dataset = dataset.drop(pred_struct_col)
return dataset
return self._post_transform(dataset, pred_col)
class _ClassificationModel( # pylint: disable=abstract-method
@ -1221,22 +1459,21 @@ class _ClassificationModel( # pylint: disable=abstract-method
.. Note:: This API is experimental.
"""
def _transform(self, dataset: DataFrame) -> DataFrame:
# pylint: disable=too-many-statements, too-many-locals
# Save xgb_sklearn_model and predict_params to be local variable
# to avoid the `self` object to be pickled to remote.
xgb_sklearn_model = self._xgb_sklearn_model
predict_params = self._gen_predict_params_dict()
has_base_margin = False
if (
self.isDefined(self.base_margin_col)
and self.getOrDefault(self.base_margin_col) != ""
):
has_base_margin = True
base_margin_col = col(self.getOrDefault(self.base_margin_col)).alias(
alias.margin
def _out_schema(self) -> Tuple[bool, str]:
schema = (
f"{pred.raw_prediction} array<double>, {pred.prediction} double,"
f" {pred.probability} array<double>"
)
if self._get_pred_contrib_col_name() is not None:
# We will force setting strict_shape to True when predicting contribs,
# So, it will also output 3-D shape result.
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
return False, schema
def _get_predict_func(self) -> Callable:
predict_params = self._gen_predict_params_dict()
pred_contrib_col_name = self._get_pred_contrib_col_name()
def transform_margin(margins: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
if margins.ndim == 1:
@ -1251,45 +1488,9 @@ class _ClassificationModel( # pylint: disable=abstract-method
class_probs = softmax(raw_preds, axis=1)
return raw_preds, class_probs
features_col, feature_col_names = self._get_feature_col(dataset)
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
pred_contrib_col_name = None
if (
self.isDefined(self.pred_contrib_col)
and self.getOrDefault(self.pred_contrib_col) != ""
):
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
schema = (
f"{pred.raw_prediction} array<double>, {pred.prediction} double,"
f" {pred.probability} array<double>"
)
if pred_contrib_col_name:
# We will force setting strict_shape to True when predicting contribs,
# So, it will also output 3-D shape result.
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
@pandas_udf(schema) # type: ignore
def predict_udf(
iterator: Iterator[Tuple[pd.Series, ...]]
) -> Iterator[pd.DataFrame]:
assert xgb_sklearn_model is not None
model = xgb_sklearn_model
for data in iterator:
if enable_sparse_data_optim:
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
else:
if feature_col_names is not None:
X = data[feature_col_names] # type: ignore
else:
X = stack_series(data[alias.data])
if has_base_margin:
base_margin = stack_series(data[alias.margin])
else:
base_margin = None
def _predict(
model: XGBModel, X: ArrayLike, base_margin: Optional[np.ndarray]
) -> Union[pd.DataFrame, pd.Series]:
margins = model.predict(
X,
base_margin=base_margin,
@ -1308,19 +1509,17 @@ class _ClassificationModel( # pylint: disable=abstract-method
pred.probability: pd.Series(list(class_probs)),
}
if pred_contrib_col_name:
if pred_contrib_col_name is not None:
contribs = pred_contribs(model, X, base_margin, strict_shape=True)
result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))
yield pd.DataFrame(data=result)
return pd.DataFrame(data=result)
if has_base_margin:
pred_struct = predict_udf(struct(*features_col, base_margin_col))
else:
pred_struct = predict_udf(struct(*features_col))
return _predict
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
pred_struct_col = "_prediction_struct"
dataset = dataset.withColumn(pred_struct_col, pred_struct)
dataset = dataset.withColumn(pred_struct_col, pred_col)
raw_prediction_col_name = self.getOrDefault(self.rawPredictionCol)
if raw_prediction_col_name:
@ -1342,7 +1541,8 @@ class _ClassificationModel( # pylint: disable=abstract-method
array_to_vector(getattr(col(pred_struct_col), pred.probability)),
)
if pred_contrib_col_name:
pred_contrib_col_name = self._get_pred_contrib_col_name()
if pred_contrib_col_name is not None:
dataset = dataset.withColumn(
pred_contrib_col_name,
getattr(col(pred_struct_col), pred.pred_contrib),

View File

@ -10,7 +10,7 @@ from threading import Thread
from typing import Any, Callable, Dict, Optional, Set, Type
import pyspark
from pyspark import BarrierTaskContext, SparkContext, SparkFiles
from pyspark import BarrierTaskContext, SparkContext, SparkFiles, TaskContext
from pyspark.sql.session import SparkSession
from xgboost import Booster, XGBModel, collective
@ -129,7 +129,14 @@ def _is_local(spark_context: SparkContext) -> bool:
return spark_context._jsc.sc().isLocal()
def _get_gpu_id(task_context: BarrierTaskContext) -> int:
def _is_standalone_or_localcluster(spark_context: SparkContext) -> bool:
master = spark_context.getConf().get("spark.master")
return master is not None and (
master.startswith("spark://") or master.startswith("local-cluster")
)
def _get_gpu_id(task_context: TaskContext) -> int:
"""Get the gpu id from the task resources"""
if task_context is None:
# This is a safety check.

View File

@ -75,3 +75,28 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
with pytest.raises(ValueError, match="Either `group` or `qid`."):
ranker.fit(df, y, eval_set=[(X, y)])
def run_ranking_categorical(device: str) -> None:
"""Test LTR with categorical features."""
from sklearn.model_selection import cross_val_score
X, y = tm.make_categorical(
n_samples=512, n_features=10, n_categories=3, onehot=False
)
rng = np.random.default_rng(1994)
qid = rng.choice(3, size=y.shape[0])
qid = np.sort(qid)
X["qid"] = qid
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
ltr.fit(X, y)
score = ltr.score(X, y)
assert score > 0.9
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
# test using the score function inside sklearn.
scores = cross_val_score(ltr, X, y)
for s in scores:
assert s > 0.7

1
rocgputreeshap Submodule

@ -0,0 +1 @@
Subproject commit 2fea6734e83cf147c1bbe580ac4713cd50abcad5

View File

@ -16,6 +16,11 @@ if (USE_CUDA)
target_sources(objxgboost PRIVATE ${CUDA_SOURCES})
endif (USE_CUDA)
if (USE_HIP)
file(GLOB_RECURSE HIP_SOURCES *.hip *.hip.h)
target_sources(objxgboost PRIVATE ${HIP_SOURCES})
endif (USE_HIP)
target_include_directories(objxgboost
PRIVATE
${xgboost_SOURCE_DIR}/include
@ -33,6 +38,7 @@ msvc_use_static_runtime()
# This grouping organises source files nicely in visual studio
auto_source_group("${CUDA_SOURCES}")
auto_source_group("${HIP_SOURCES}")
auto_source_group("${CPU_SOURCES}")
#-- End object library

View File

@ -70,12 +70,14 @@ XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) {
using GlobalConfigAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
namespace xgboost {
void XGBBuildInfoDevice(Json *p_info) {
auto &info = *p_info;
info["USE_CUDA"] = Boolean{false};
info["USE_NCCL"] = Boolean{false};
info["USE_HIP"] = Boolean{false};
info["USE_RCCL"] = Boolean{false};
info["USE_RMM"] = Boolean{false};
}
} // namespace xgboost
@ -279,7 +281,7 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
API_END();
}
#ifndef XGBOOST_USE_CUDA
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *, char const *, DMatrixHandle *) {
API_BEGIN();
common::AssertGPUSupport();
@ -1189,7 +1191,7 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
API_END();
}
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
DMatrixHandle, xgboost::bst_ulong const **,
xgboost::bst_ulong *, const float **) {

View File

@ -17,7 +17,11 @@ namespace xgboost {
void XGBBuildInfoDevice(Json *p_info) {
auto &info = *p_info;
#if defined(XGBOOST_USE_CUDA)
info["USE_CUDA"] = true;
#elif defined(XGBOOST_USE_HIP)
info["USE_HIP"] = true;
#endif
std::vector<Json> v{Json{Integer{THRUST_MAJOR_VERSION}}, Json{Integer{THRUST_MINOR_VERSION}},
Json{Integer{THRUST_SUBMINOR_VERSION}}};
@ -30,8 +34,13 @@ void XGBBuildInfoDevice(Json *p_info) {
info["USE_NCCL"] = Boolean{true};
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
info["NCCL_VERSION"] = v;
#elif defined(XGBOOST_USE_RCCL)
info["USE_RCCL"] = Boolean{true};
v = {Json{Integer{NCCL_MAJOR}}, Json{Integer{NCCL_MINOR}}, Json{Integer{NCCL_PATCH}}};
info["RCCL_VERSION"] = v;
#else
info["USE_NCCL"] = Boolean{false};
info["USE_RCCL"] = Boolean{false};
#endif
#if defined(XGBOOST_USE_RMM)

4
src/c_api/c_api.hip Normal file
View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "c_api.cu"
#endif

View File

@ -175,7 +175,7 @@ inline float GetMissing(Json const &config) {
// Safe guard some global variables from being changed by XGBoost.
class XGBoostAPIGuard {
#if defined(XGBOOST_USE_CUDA)
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
int32_t device_id_ {0};
void SetGPUAttribute();

View File

@ -51,7 +51,7 @@ void Communicator::Init(Json const& config) {
}
}
#ifndef XGBOOST_USE_CUDA
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
void Communicator::Finalize() {
communicator_->Shutdown();
communicator_.reset(new NoOpCommunicator());

View File

@ -5,7 +5,7 @@
#include "device_communicator.cuh"
#include "device_communicator_adapter.cuh"
#include "noop_communicator.h"
#ifdef XGBOOST_USE_NCCL
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
#include "nccl_device_communicator.cuh"
#endif
@ -28,7 +28,7 @@ DeviceCommunicator* Communicator::GetDevice(int device_ordinal) {
communicator_->GetWorldSize() != old_world_size) {
old_device_ordinal = device_ordinal;
old_world_size = communicator_->GetWorldSize();
#ifdef XGBOOST_USE_NCCL
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
switch (type_) {
case CommunicatorType::kRabit:
device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, false));

View File

@ -98,7 +98,7 @@ class Communicator {
/** @brief Get the communicator instance. */
static Communicator *Get() { return communicator_.get(); }
#if defined(XGBOOST_USE_CUDA)
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
/**
* @brief Get the device communicator.
*
@ -230,7 +230,7 @@ class Communicator {
static thread_local std::unique_ptr<Communicator> communicator_;
static thread_local CommunicatorType type_;
#if defined(XGBOOST_USE_CUDA)
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
static thread_local std::unique_ptr<DeviceCommunicator> device_communicator_;
#endif

View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "communicator.cu"
#endif

View File

@ -41,7 +41,6 @@ class DeviceCommunicatorAdapter : public DeviceCommunicator {
}
dh::safe_cuda(cudaSetDevice(device_ordinal_));
segments->clear();
segments->resize(world_size_, 0);
segments->at(rank_) = length_bytes;
@ -60,6 +59,7 @@ class DeviceCommunicatorAdapter : public DeviceCommunicator {
Broadcast(host_buffer_.data() + offset, as_bytes, i);
offset += as_bytes;
}
dh::safe_cuda(cudaMemcpy(receive_buffer->data().get(), host_buffer_.data(), total_bytes,
cudaMemcpyDefault));
}

View File

@ -1,7 +1,7 @@
/*!
* Copyright 2023 XGBoost contributors
*/
#if defined(XGBOOST_USE_NCCL)
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
#include "nccl_device_communicator.cuh"
namespace xgboost {

View File

@ -35,12 +35,22 @@ class NcclDeviceCommunicator : public DeviceCommunicator {
private:
static constexpr std::size_t kUuidLength =
#if defined(XGBOOST_USE_CUDA)
sizeof(std::declval<cudaDeviceProp>().uuid) / sizeof(uint64_t);
#elif defined(XGBOOST_USE_HIP)
sizeof(hipUUID) / sizeof(uint64_t);
#endif
void GetCudaUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) const {
#if defined(XGBOOST_USE_CUDA)
cudaDeviceProp prob{};
dh::safe_cuda(cudaGetDeviceProperties(&prob, device_ordinal_));
std::memcpy(uuid.data(), static_cast<void *>(&(prob.uuid)), sizeof(prob.uuid));
#elif defined(XGBOOST_USE_HIP)
hipUUID id;
hipDeviceGetUuid(&id, device_ordinal_);
std::memcpy(uuid.data(), static_cast<void *>(&id), sizeof(id));
#endif
}
static std::string PrintUUID(xgboost::common::Span<uint64_t, kUuidLength> const &uuid) {

View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "nccl_device_communicator.cu"
#endif

View File

@ -10,7 +10,13 @@
#include <cstddef> // size_t
#include <cstdint> // int32_t
#if defined(XGBOOST_USE_HIP)
#include <hipcub/hipcub.hpp>
#elif defined(XGBOOST_USE_CUDA)
#include <cub/cub.cuh> // DispatchSegmentedRadixSort,NullType,DoubleBuffer
#endif
#include <iterator> // distance
#include <limits> // numeric_limits
#include <type_traits> // conditional_t,remove_const_t
@ -39,6 +45,7 @@ static void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_st
using OffsetT = int;
// Null value type
#if defined(XGBOOST_USE_CUDA)
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
cub::DoubleBuffer<cub::NullType> d_values;
@ -47,6 +54,20 @@ static void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_st
OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,
num_segments, d_begin_offsets, d_end_offsets, begin_bit,
end_bit, false, ctx->Stream(), debug_synchronous)));
#elif defined(XGBOOST_USE_HIP)
if (IS_DESCENDING) {
rocprim::segmented_radix_sort_pairs_desc<KeyT, hipcub::NullType, BeginOffsetIteratorT>(d_temp_storage,
temp_storage_bytes, d_keys_in, d_keys_out, nullptr, nullptr, num_items,
num_segments, d_begin_offsets, d_end_offsets,
begin_bit, end_bit, ctx->Stream(), debug_synchronous);
}
else {
rocprim::segmented_radix_sort_pairs<KeyT, hipcub::NullType, BeginOffsetIteratorT>(d_temp_storage,
temp_storage_bytes, d_keys_in, d_keys_out, nullptr, nullptr, num_items,
num_segments, d_begin_offsets, d_end_offsets,
begin_bit, end_bit, ctx->Stream(), debug_synchronous);
}
#endif
}
// Wrapper around cub sort for easier `descending` sort.
@ -60,14 +81,18 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets, dh::CUDAStreamView stream,
int begin_bit = 0, int end_bit = sizeof(KeyT) * 8) {
#if defined(XGBOOST_USE_CUDA)
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in), d_values_out);
#endif
// In old version of cub, num_items in dispatch is also int32_t, no way to change.
using OffsetT = std::conditional_t<dh::BuildWithCUDACub() && dh::HasThrustMinorVer<13>(),
std::size_t, std::int32_t>;
CHECK_LE(num_items, std::numeric_limits<OffsetT>::max());
// For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation
#if defined(XGBOOST_USE_CUDA)
#if THRUST_MAJOR_VERSION >= 2
dh::safe_cuda((cub::DispatchSegmentedRadixSort<
descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
@ -88,6 +113,18 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
d_begin_offsets, d_end_offsets, begin_bit,
end_bit, false, stream, false)));
#endif
#elif defined(XGBOOST_USE_HIP)
if (descending) {
rocprim::segmented_radix_sort_pairs_desc(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out,
d_values_in, d_values_out, num_items, num_segments,
d_begin_offsets, d_end_offsets, begin_bit, end_bit, stream, false);
}
else {
rocprim::segmented_radix_sort_pairs(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out,
d_values_in, d_values_out, num_items, num_segments, d_begin_offsets, d_end_offsets,
begin_bit, end_bit, stream, false);
}
#endif
}
} // namespace detail

View File

@ -17,14 +17,18 @@
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
#include "device_helpers.cuh"
#endif // defined(__CUDACC__)
#elif defined(__HIPCC__)
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
#include "device_helpers.hip.h"
#endif // defined(__CUDACC__) || defined(__HIPCC__)
#include "xgboost/span.h"
#include "common.h"
namespace xgboost {
#if defined(__CUDACC__)
#if defined(__CUDACC__) || defined(__HIPCC__)
using BitFieldAtomicType = unsigned long long; // NOLINT
__forceinline__ __device__ BitFieldAtomicType AtomicOr(BitFieldAtomicType* address,
@ -48,7 +52,7 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
return old;
}
#endif // defined(__CUDACC__)
#endif // defined(__CUDACC__) || defined(__HIPCC__)
/**
* @brief A non-owning type with auxiliary methods defined for manipulating bits.
@ -106,7 +110,7 @@ struct BitFieldContainer {
XGBOOST_DEVICE static size_t ComputeStorageSize(index_type size) {
return common::DivRoundUp(size, kValueSize);
}
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
__device__ BitFieldContainer& operator|=(BitFieldContainer const& rhs) {
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
size_t min_size = min(NumValues(), rhs.NumValues());
@ -123,9 +127,9 @@ struct BitFieldContainer {
}
return *this;
}
#endif // #if defined(__CUDA_ARCH__)
#endif // #if defined(__CUDA_ARCH__) || defined(__HIPCC__)
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
__device__ BitFieldContainer& operator&=(BitFieldContainer const& rhs) {
size_t min_size = min(NumValues(), rhs.NumValues());
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
@ -144,7 +148,7 @@ struct BitFieldContainer {
}
#endif // defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
__device__ auto Set(index_type pos) noexcept(true) {
Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = Data()[pos_v.int_pos];
@ -159,6 +163,14 @@ struct BitFieldContainer {
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
}
#ifdef __HIPCC__
void Clear(index_type pos) noexcept(true) {
Pos pos_v = Direction::Shift(ToBitPos(pos));
value_type& value = Data()[pos_v.int_pos];
value_type clear_bit = ~(kOne << pos_v.bit_pos);
value &= clear_bit;
}
#endif
#else
void Set(index_type pos) noexcept(true) {
Pos pos_v = Direction::Shift(ToBitPos(pos));
@ -172,7 +184,7 @@ struct BitFieldContainer {
value_type clear_bit = ~(kOne << pos_v.bit_pos);
value &= clear_bit;
}
#endif // defined(__CUDA_ARCH__)
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
XGBOOST_DEVICE bool Check(Pos pos_v) const noexcept(true) {
pos_v = Direction::Shift(pos_v);

View File

@ -52,7 +52,7 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
*
* Go to left if it's NOT the matching category, which matches one-hot encoding.
*/
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
inline XGBOOST_DEVICE bool Decision(common::Span<CatBitField::value_type const> cats, float cat) {
KCatBitField const s_cats(cats);
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
return true;

View File

@ -55,7 +55,7 @@ void EscapeU8(std::string const &string, std::string *p_buffer) {
}
}
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
int AllVisibleGPUs() { return 0; }
#endif // !defined(XGBOOST_USE_CUDA)

View File

@ -2,6 +2,7 @@
* Copyright 2018-2022 XGBoost contributors
*/
#include "common.h"
#include "cuda_to_hip.h"
namespace xgboost {
namespace common {

View File

@ -26,6 +26,12 @@
#define WITH_CUDA() true
#elif defined(__HIPCC__)
#include <thrust/system/hip/error.h>
#include <thrust/system_error.h>
#define WITH_CUDA() true
#else
#define WITH_CUDA() false
@ -39,8 +45,8 @@ namespace dh {
*/
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
int line) {
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, int line)
{
if (code != cudaSuccess) {
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
std::string{file} + ": " + // NOLINT
@ -48,7 +54,23 @@ inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
}
return code;
}
#endif // defined(__CUDACC__)
#elif defined(__HIPCC__)
/*
* Error handling functions
*/
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
inline hipError_t ThrowOnCudaError(hipError_t code, const char *file, int line)
{
if (code != hipSuccess) {
LOG(FATAL) << thrust::system_error(code, thrust::hip_category(),
std::string{file} + ": " + // NOLINT
std::to_string(line)).what();
}
return code;
}
#endif
} // namespace dh
namespace xgboost::common {
@ -159,9 +181,9 @@ class Range {
int AllVisibleGPUs();
inline void AssertGPUSupport() {
#ifndef XGBOOST_USE_CUDA
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA
#endif // XGBOOST_USE_CUDA && XGBOOST_USE_HIP
}
inline void AssertOneAPISupport() {
@ -172,7 +194,7 @@ inline void AssertOneAPISupport() {
void SetDevice(std::int32_t device);
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
inline void SetDevice(std::int32_t device) {
if (device >= 0) {
AssertGPUSupport();

4
src/common/common.hip Normal file
View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "common.cu"
#endif

View File

@ -11,9 +11,11 @@
#include "common.h"
#ifdef __CUDACC__
#if defined(__CUDACC__)
#include "device_helpers.cuh"
#endif // __CUDACC__
#elif defined(__HIPCC__)
#include "device_helpers.hip.h"
#endif // __CUDACC__ || __HIPCC__
namespace xgboost {
namespace common {
@ -105,7 +107,7 @@ class CompressedBufferWriter {
}
}
#ifdef __CUDACC__
#if defined(__CUDACC__) || defined(__HIPCC__)
__device__ void AtomicWriteSymbol
(CompressedByteT* buffer, uint64_t symbol, size_t offset) {
size_t ibit_start = offset * symbol_bits_;
@ -119,7 +121,7 @@ class CompressedBufferWriter {
symbol >>= 8;
}
}
#endif // __CUDACC__
#endif // __CUDACC__ || __HIPCC__
template <typename IterT>
void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {

View File

@ -4,7 +4,6 @@
#ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_
#define XGBOOST_COMMON_CUDA_CONTEXT_CUH_
#include <thrust/execution_policy.h>
#include "device_helpers.cuh"
namespace xgboost {

View File

@ -72,11 +72,23 @@ class pinned_allocator {
if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if
pointer result(nullptr);
#if defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipHostMalloc(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
#else
dh::safe_cuda(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
#endif
return result;
}
inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT
inline void deallocate(pointer p, size_type) {
#if defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipHostFree(p));
#else
dh::safe_cuda(cudaFreeHost(p));
#endif
} // NOLINT
inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); } // NOLINT

71
src/common/cuda_to_hip.h Normal file
View File

@ -0,0 +1,71 @@
/**
* Copyright 2017-2023 XGBoost contributors
*/
#pragma once
#if defined(XGBOOST_USE_HIP)
#define cudaSuccess hipSuccess
#define cudaGetLastError hipGetLastError
#define cudaStream_t hipStream_t
#define cudaStreamCreate hipStreamCreate
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
#define cudaStreamDestroy hipStreamDestroy
#define cudaStreamWaitEvent hipStreamWaitEvent
#define cudaStreamSynchronize hipStreamSynchronize
#define cudaStreamPerThread hipStreamPerThread
#define cudaStreamLegacy hipStreamLegacy
#define cudaEvent_t hipEvent_t
#define cudaEventCreate hipEventCreate
#define cudaEventCreateWithFlags hipEventCreateWithFlags
#define cudaEventDestroy hipEventDestroy
#define cudaGetDevice hipGetDevice
#define cudaSetDevice hipSetDevice
#define cudaGetDeviceCount hipGetDeviceCount
#define cudaDeviceSynchronize hipDeviceSynchronize
#define cudaGetDeviceProperties hipGetDeviceProperties
#define cudaDeviceGetAttribute hipDeviceGetAttribute
#define cudaMallocHost hipMallocHost
#define cudaFreeHost hipFreeHost
#define cudaMalloc hipMalloc
#define cudaFree hipFree
#define cudaMemcpy hipMemcpy
#define cudaMemcpyAsync hipMemcpyAsync
#define cudaMemcpyDefault hipMemcpyDefault
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemcpyHostToHost hipMemcpyHostToHost
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
#define cudaMemsetAsync hipMemsetAsync
#define cudaMemset hipMemset
#define cudaPointerAttributes hipPointerAttribute_t
#define cudaPointerGetAttributes hipPointerGetAttributes
#define cudaMemGetInfo hipMemGetInfo
#define cudaFuncSetAttribute hipFuncSetAttribute
#define cudaDevAttrMultiProcessorCount hipDeviceAttributeMultiprocessorCount
#define cudaOccupancyMaxActiveBlocksPerMultiprocessor hipOccupancyMaxActiveBlocksPerMultiprocessor
namespace thrust {
namespace hip {
}
}
namespace thrust {
namespace cuda = thrust::hip;
}
namespace hipcub {
}
namespace cub = hipcub;
#endif

View File

@ -2,6 +2,8 @@
* Copyright 2017-2023 XGBoost contributors
*/
#pragma once
#if defined(XGBOOST_USE_CUDA)
#include <thrust/binary_search.h> // thrust::upper_bound
#include <thrust/device_malloc_allocator.h>
#include <thrust/device_ptr.h>
@ -1218,3 +1220,7 @@ class LDGIterator {
}
};
} // namespace dh
#elif defined(XGBOOST_USE_HIP)
#include "device_helpers.hip.h"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -3,9 +3,11 @@
*/
#include "error_msg.h"
#include <mutex> // for call_once, once_flag
#include <sstream> // for stringstream
#include "../collective/communicator-inl.h" // for GetRank
#include "xgboost/context.h" // for Context
#include "xgboost/logging.h"
namespace xgboost::error {
@ -26,34 +28,43 @@ void WarnDeprecatedGPUHist() {
}
void WarnManualUpdater() {
bool static thread_local logged{false};
if (logged) {
return;
}
static std::once_flag flag;
std::call_once(flag, [] {
LOG(WARNING)
<< "You have manually specified the `updater` parameter. The `tree_method` parameter "
"will be ignored. Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
logged = true;
});
}
void WarnDeprecatedGPUId() {
static thread_local bool logged{false};
if (logged) {
return;
}
static std::once_flag flag;
std::call_once(flag, [] {
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
msg += " E.g. device=cpu/cuda/cuda:0";
LOG(WARNING) << msg;
logged = true;
});
}
void WarnEmptyDataset() {
static thread_local bool logged{false};
if (logged) {
return;
}
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank();
logged = true;
static std::once_flag flag;
std::call_once(flag,
[] { LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank(); });
}
void MismatchedDevices(Context const* booster, Context const* data) {
static std::once_flag flag;
std::call_once(flag, [&] {
LOG(WARNING)
<< "Falling back to prediction using DMatrix due to mismatched devices. This might "
"lead to higher memory usage and slower performance. XGBoost is running on: "
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName() << ".\n"
<< R"(Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.
This warning will only be shown once.
)";
});
}
} // namespace xgboost::error

View File

@ -11,6 +11,7 @@
#include <string> // for string
#include "xgboost/base.h" // for bst_feature_t
#include "xgboost/context.h" // for Context
#include "xgboost/logging.h"
#include "xgboost/string_view.h" // for StringView
@ -94,5 +95,7 @@ constexpr StringView InvalidCUDAOrdinal() {
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
"available for using GPU.";
}
void MismatchedDevices(Context const* booster, Context const* data);
} // namespace xgboost::error
#endif // XGBOOST_COMMON_ERROR_MSG_H_

View File

@ -84,10 +84,19 @@ __global__ void GetColumnSizeSharedMemKernel(IterSpan<BatchIt> batch_iter,
template <std::uint32_t kBlockThreads, typename Kernel>
std::uint32_t EstimateGridSize(std::int32_t device, Kernel kernel, std::size_t shared_mem) {
int n_mps = 0;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipDeviceGetAttribute(&n_mps, hipDeviceAttributeMultiprocessorCount, device));
#endif
int n_blocks_per_mp = 0;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
kBlockThreads, shared_mem));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
kBlockThreads, shared_mem));
#endif
std::uint32_t grid_size = n_blocks_per_mp * n_mps;
return grid_size;
}
@ -170,6 +179,7 @@ void GetColumnSizesScan(int device, size_t num_columns, std::size_t num_cuts_per
column_sizes_scan->begin(), [=] __device__(size_t column_size) {
return thrust::min(num_cuts_per_feature, column_size);
});
thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer());
thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(),
@ -294,6 +304,7 @@ void ProcessSlidingWindow(AdapterBatch const &batch, MetaInfo const &info,
&column_sizes_scan,
&sorted_entries);
dh::XGBDeviceAllocator<char> alloc;
thrust::sort(thrust::cuda::par(alloc), sorted_entries.begin(),
sorted_entries.end(), detail::EntryCompareOp());
@ -353,11 +364,13 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
bst_group_t group_idx = dh::SegmentId(d_group_ptr, ridx);
return weights[group_idx];
});
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
weight_iter + begin, weight_iter + end,
batch_iter + begin,
d_temp_weights.data(), // output
is_valid);
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
} else {
CHECK_EQ(batch.NumRows(), weights.size());
@ -366,11 +379,13 @@ void ProcessWeightedSlidingWindow(Batch batch, MetaInfo const& info,
[=]__device__(size_t idx) -> float {
return weights[batch.GetElement(idx).row_idx];
});
auto retit = thrust::copy_if(thrust::cuda::par(alloc),
weight_iter + begin, weight_iter + end,
batch_iter + begin,
d_temp_weights.data(), // output
is_valid);
CHECK_EQ(retit - d_temp_weights.data(), d_temp_weights.size());
}

4
src/common/hist_util.hip Normal file
View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "hist_util.cu"
#endif

View File

@ -1,7 +1,7 @@
/**
* Copyright 2017-2023 by XGBoost contributors
*/
#ifndef XGBOOST_USE_CUDA
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
// dummy implementation of HostDeviceVector in case CUDA is not used
@ -199,4 +199,4 @@ template class HostDeviceVector<std::size_t>;
} // namespace xgboost
#endif // XGBOOST_USE_CUDA
#endif // XGBOOST_USE_CUDA && !defined(XGBOOST_USE_HIP)

View File

@ -139,6 +139,7 @@ class HostDeviceVectorImpl {
auto ptr = other->ConstDevicePointer();
SetDevice();
CHECK_EQ(this->DeviceIdx(), other->DeviceIdx());
dh::safe_cuda(cudaMemcpyAsync(this->DevicePointer() + ori_size,
ptr,
other->Size() * sizeof(T),
@ -195,6 +196,7 @@ class HostDeviceVectorImpl {
gpu_access_ = access;
if (data_h_.size() != data_d_->size()) { data_h_.resize(data_d_->size()); }
SetDevice();
dh::safe_cuda(cudaMemcpy(data_h_.data(),
data_d_->data().get(),
data_d_->size() * sizeof(T),
@ -211,6 +213,7 @@ class HostDeviceVectorImpl {
// data is on the host
LazyResizeDevice(data_h_.size());
SetDevice();
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(),
data_h_.data(),
data_d_->size() * sizeof(T),
@ -239,6 +242,7 @@ class HostDeviceVectorImpl {
LazyResizeDevice(Size());
gpu_access_ = GPUAccess::kWrite;
SetDevice();
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), other->data_d_->data().get(),
data_d_->size() * sizeof(T), cudaMemcpyDefault));
}
@ -248,6 +252,7 @@ class HostDeviceVectorImpl {
LazyResizeDevice(Size());
gpu_access_ = GPUAccess::kWrite;
SetDevice();
dh::safe_cuda(cudaMemcpyAsync(data_d_->data().get(), begin,
data_d_->size() * sizeof(T), cudaMemcpyDefault));
}

View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "host_device_vector.cu"
#endif

View File

@ -384,7 +384,8 @@ class PrivateMmapConstStream : public AlignedResourceReadStream {
* @param length See the `length` parameter of `mmap` for details.
*/
explicit PrivateMmapConstStream(std::string path, std::size_t offset, std::size_t length)
: AlignedResourceReadStream{std::make_shared<MmapResource>(path, offset, length)} {}
: AlignedResourceReadStream{std::shared_ptr<MmapResource>{ // NOLINT
new MmapResource{std::move(path), offset, length}}} {}
~PrivateMmapConstStream() noexcept(false) override;
};

View File

@ -12,7 +12,8 @@
namespace xgboost {
namespace linalg {
template <typename T, int32_t D, typename Fn>
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
{
dh::safe_cuda(cudaSetDevice(t.DeviceIdx()));
static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
"For function with return, use transform instead.");
@ -28,7 +29,8 @@ void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s
}
template <typename T, int32_t D, typename Fn>
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr)
{
if (t.Contiguous()) {
auto ptr = t.Values().data();
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); });

View File

@ -42,7 +42,7 @@ void ElementWiseKernelHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&& f
}
}
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
template <typename T, int32_t D, typename Fn>
void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
common::AssertGPUSupport();
@ -60,7 +60,7 @@ void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn)
}
ElementWiseKernelHost(t, ctx->Threads(), fn);
}
#endif // !defined(XGBOOST_USE_CUDA)
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
template <typename T, std::int32_t kDim>
auto cbegin(TensorView<T, kDim> const& v) { // NOLINT

View File

@ -143,7 +143,7 @@ CheckNAN(T) {
return false;
}
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
#if XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__) && !defined(__HIPCC__)
bool CheckNAN(double v);
@ -152,23 +152,27 @@ bool CheckNAN(double v);
XGBOOST_DEVICE bool inline CheckNAN(float x) {
#if defined(__CUDA_ARCH__)
return isnan(x);
#elif defined(__HIPCC__)
return __builtin_isnan(x);
#else
return std::isnan(x);
#endif // defined(__CUDA_ARCH__)
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
}
XGBOOST_DEVICE bool inline CheckNAN(double x) {
#if defined(__CUDA_ARCH__)
return isnan(x);
#elif defined(__HIPCC__)
return __builtin_isnan(x);
#else
return std::isnan(x);
#endif // defined(__CUDA_ARCH__)
#endif // defined(__CUDA_ARCH__) || defined(__HIPCC__)
}
#endif // XGBOOST_STRICT_R_MODE && !defined(__CUDA_ARCH__)
// GPU version is not uploaded in CRAN anyway.
// Specialize only when using R with CPU.
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
#if XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
double LogGamma(double v);
#else // Not R or R with GPU.
@ -191,7 +195,7 @@ XGBOOST_DEVICE inline T LogGamma(T v) {
#endif // _MSC_VER
}
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA)
#endif // XGBOOST_STRICT_R_MODE && !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
} // namespace common
} // namespace xgboost

View File

@ -15,6 +15,7 @@ double Reduce(Context const* ctx, HostDeviceVector<float> const& values) {
values.SetDevice(ctx->gpu_id);
auto const d_values = values.ConstDeviceSpan();
dh::XGBCachingDeviceAllocator<char> alloc;
return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0,
thrust::plus<float>{});
}

View File

@ -99,12 +99,12 @@ void PartialSum(int32_t n_threads, InIt begin, InIt end, T init, OutIt out_it) {
namespace cuda_impl {
double Reduce(Context const* ctx, HostDeviceVector<float> const& values);
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
inline double Reduce(Context const*, HostDeviceVector<float> const&) {
AssertGPUSupport();
return 0;
}
#endif // !defined(XGBOOST_USE_CUDA)
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
} // namespace cuda_impl
/**

4
src/common/numeric.hip Normal file
View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "numeric.cu"
#endif

View File

@ -109,6 +109,7 @@ template <typename T, typename U>
void CopyTo(Span<T> out, Span<U> src) {
CHECK_EQ(out.size(), src.size());
static_assert(std::is_same<std::remove_cv_t<T>, std::remove_cv_t<T>>::value);
dh::safe_cuda(cudaMemcpyAsync(out.data(), src.data(),
out.size_bytes(),
cudaMemcpyDefault));
@ -162,6 +163,7 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
// Compute output ptr
auto transform_it =
thrust::make_zip_iterator(thrust::make_tuple(x_ptr.data(), y_ptr.data()));
thrust::transform(
thrust::cuda::par(alloc), transform_it, transform_it + x_ptr.size(),
out_ptr.data(),
@ -211,6 +213,7 @@ void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
Span<bst_row_t const> const &x_ptr, Span<SketchEntry const> const &d_y,
Span<bst_row_t const> const &y_ptr, Span<SketchEntry> out, Span<bst_row_t> out_ptr) {
dh::safe_cuda(cudaSetDevice(device));
CHECK_EQ(d_x.size() + d_y.size(), out.size());
CHECK_EQ(x_ptr.size(), out_ptr.size());
CHECK_EQ(y_ptr.size(), out_ptr.size());
@ -309,6 +312,7 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
common::Span<OffsetT> cuts_ptr,
size_t total_cuts, Span<float> weights) {
dh::safe_cuda(cudaSetDevice(device_));
Span<SketchEntry> out;
dh::device_vector<SketchEntry> cuts;
bool first_window = this->Current().empty();
@ -378,6 +382,7 @@ size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_col
});
// Reverse scan to accumulate weights into first duplicated element on left.
auto val_it = thrust::make_reverse_iterator(dh::tend(entries));
thrust::inclusive_scan_by_key(
thrust::cuda::par(alloc), key_it, key_it + entries.size(),
val_it, val_it,
@ -443,6 +448,7 @@ void SketchContainer::Prune(size_t to) {
void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
Span<SketchEntry const> that) {
dh::safe_cuda(cudaSetDevice(device_));
timer_.Start(__func__);
if (this->Current().size() == 0) {
CHECK_EQ(this->columns_ptr_.HostVector().back(), 0);
@ -478,6 +484,7 @@ void SketchContainer::Merge(Span<OffsetT const> d_that_columns_ptr,
void SketchContainer::FixError() {
dh::safe_cuda(cudaSetDevice(device_));
auto d_columns_ptr = this->columns_ptr_.ConstDeviceSpan();
auto in = dh::ToSpan(this->Current());
dh::LaunchN(in.size(), [=] __device__(size_t idx) {
@ -635,10 +642,12 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts, bool is_column_split) {
CHECK_EQ(num_columns_, d_in_columns_ptr.size() - 1);
max_values.resize(d_in_columns_ptr.size() - 1);
dh::caching_device_vector<SketchEntry> d_max_values(d_in_columns_ptr.size() - 1);
thrust::reduce_by_key(thrust::cuda::par(alloc), key_it, key_it + in_cut_values.size(), val_it,
thrust::make_discard_iterator(), d_max_values.begin(),
thrust::equal_to<bst_feature_t>{},
[] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values));
auto max_it = MakeIndexTransformIter([&](auto i) {
if (IsCat(h_feature_types, i)) {

View File

@ -175,6 +175,7 @@ class SketchContainer {
template <typename KeyComp = thrust::equal_to<size_t>>
size_t Unique(KeyComp key_comp = thrust::equal_to<size_t>{}) {
timer_.Start(__func__);
dh::safe_cuda(cudaSetDevice(device_));
this->columns_ptr_.SetDevice(device_);
Span<OffsetT> d_column_scan = this->columns_ptr_.DeviceSpan();
@ -186,11 +187,13 @@ class SketchContainer {
dh::XGBCachingDeviceAllocator<char> alloc;
d_column_scan = this->columns_ptr_.DeviceSpan();
size_t n_uniques = dh::SegmentedUnique(
thrust::cuda::par(alloc), d_column_scan.data(),
d_column_scan.data() + d_column_scan.size(), entries.data(),
entries.data() + entries.size(), scan_out.DevicePointer(),
entries.data(), detail::SketchUnique{}, key_comp);
this->columns_ptr_.Copy(scan_out);
CHECK(!this->columns_ptr_.HostCanRead());

4
src/common/quantile.hip Normal file
View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "quantile.cu"
#endif

View File

@ -62,7 +62,7 @@ common::Span<std::size_t const> RankingCache::MakeRankOnCPU(Context const* ctx,
return rank;
}
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
void RankingCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const*,
common::Span<float const>) {
@ -108,9 +108,9 @@ void NDCGCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
});
}
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
void NDCGCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA)
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
DMLC_REGISTER_PARAMETER(LambdaRankParam);
@ -120,7 +120,7 @@ void PreCache::InitOnCPU(Context const*, MetaInfo const& info) {
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
}
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
void PreCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA)
@ -130,9 +130,9 @@ void MAPCache::InitOnCPU(Context const*, MetaInfo const& info) {
[](auto beg, auto end, auto op) { return std::all_of(beg, end, op); });
}
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
void MAPCache::InitOnCUDA(Context const*, MetaInfo const&) { common::AssertGPUSupport(); }
#endif // !defined(XGBOOST_USE_CUDA)
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
std::string ParseMetricName(StringView name, StringView param, position_t* topn, bool* minus) {
std::string out_name;

View File

@ -23,6 +23,10 @@
#include "xgboost/logging.h" // for CHECK
#include "xgboost/span.h" // for Span
#if defined(XGBOOST_USE_HIP)
#include <hipcub/hipcub.hpp>
#endif
namespace xgboost::ltr {
namespace cuda_impl {
void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels,

View File

@ -0,0 +1,4 @@
#if defined(XGBOOST_USE_HIP)
#include "ranking_utils.cu"
#endif

View File

@ -76,7 +76,7 @@ class RefResourceView {
[[nodiscard]] size_type size() const { return size_; } // NOLINT
[[nodiscard]] size_type size_bytes() const { // NOLINT
return Span{data(), size()}.size_bytes();
return Span<const value_type>{data(), size()}.size_bytes();
}
[[nodiscard]] value_type* data() { return ptr_; }; // NOLINT
[[nodiscard]] value_type const* data() const { return ptr_; }; // NOLINT

View File

@ -18,6 +18,7 @@
namespace xgboost {
namespace common {
namespace cuda_impl {
void Median(Context const* ctx, linalg::TensorView<float const, 2> t,
common::OptionalWeights weights, linalg::Tensor<float, 1>* out) {
CHECK_GE(t.Shape(1), 1);

View File

@ -216,6 +216,7 @@ void SegmentedWeightedQuantile(Context const* ctx, AlphaIt alpha_it, SegIt seg_b
detail::SegOp<SegIt>{seg_beg, seg_end});
auto scan_val = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
detail::WeightOp<WIter>{w_begin, d_sorted_idx});
thrust::inclusive_scan_by_key(thrust::cuda::par(caching), scan_key, scan_key + n_weights,
scan_val, weights_cdf.begin());

View File

@ -112,7 +112,7 @@ void Median(Context const* ctx, linalg::TensorView<float const, 2> t, OptionalWe
void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorView<float> out);
#if !defined(XGBOOST_USE_CUDA)
#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalWeights,
linalg::Tensor<float, 1>*) {
common::AssertGPUSupport();
@ -120,7 +120,7 @@ inline void Median(Context const*, linalg::TensorView<float const, 2>, OptionalW
inline void Mean(Context const*, linalg::VectorView<float const>, linalg::VectorView<float>) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
#endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP)
} // namespace cuda_impl
/**

Some files were not shown because too many files have changed in this diff Show More