/** * Copyright 2021-2023 by XGBoost Contributors */ #ifndef XGBOOST_C_API_C_API_UTILS_H_ #define XGBOOST_C_API_C_API_UTILS_H_ #include #include #include #include // for shared_ptr #include // for string #include // for make_tuple #include // for move #include #include "xgboost/c_api.h" #include "xgboost/data.h" // DMatrix #include "xgboost/feature_map.h" // for FeatureMap #include "xgboost/json.h" #include "xgboost/learner.h" #include "xgboost/linalg.h" // ArrayInterfaceHandler, MakeTensorView, ArrayInterfaceStr #include "xgboost/logging.h" #include "xgboost/string_view.h" // StringView namespace xgboost { /* \brief Determine the output shape of prediction. * * \param strict_shape Whether should we reshape the output with consideration of groups * and forest. * \param type Prediction type * \param rows Input samples * \param cols Input features * \param chunksize Total elements of output / rows * \param groups Number of output groups from Learner * \param rounds end_iteration - beg_iteration * \param out_shape Output shape * \param out_dim Output dimension */ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows, size_t cols, size_t chunksize, size_t groups, size_t rounds, std::vector *out_shape, xgboost::bst_ulong *out_dim) { auto &shape = *out_shape; if (type == PredictionType::kMargin && rows != 0) { // When kValue is used, softmax can change the chunksize. CHECK_EQ(chunksize, groups); } switch (type) { case PredictionType::kValue: case PredictionType::kMargin: { if (chunksize == 1 && !strict_shape) { *out_dim = 1; shape.resize(*out_dim); shape.front() = rows; } else { *out_dim = 2; shape.resize(*out_dim); shape.front() = rows; // chunksize can be 1 if it's softmax shape.back() = std::min(groups, chunksize); } break; } case PredictionType::kApproxContribution: case PredictionType::kContribution: { if (groups == 1 && !strict_shape) { *out_dim = 2; shape.resize(*out_dim); shape.front() = rows; shape.back() = cols + 1; } else { *out_dim = 3; shape.resize(*out_dim); shape[0] = rows; shape[1] = groups; shape[2] = cols + 1; } break; } case PredictionType::kApproxInteraction: case PredictionType::kInteraction: { if (groups == 1 && !strict_shape) { *out_dim = 3; shape.resize(*out_dim); shape[0] = rows; shape[1] = cols + 1; shape[2] = cols + 1; } else { *out_dim = 4; shape.resize(*out_dim); shape[0] = rows; shape[1] = groups; shape[2] = cols + 1; shape[3] = cols + 1; } break; } case PredictionType::kLeaf: { if (strict_shape) { shape.resize(4); shape[0] = rows; shape[1] = rounds; shape[2] = groups; auto forest = chunksize / (shape[1] * shape[2]); forest = std::max(static_cast(1), forest); shape[3] = forest; *out_dim = shape.size(); } else if (chunksize == 1) { *out_dim = 1; shape.resize(*out_dim); shape.front() = rows; } else { *out_dim = 2; shape.resize(*out_dim); shape.front() = rows; shape.back() = chunksize; } break; } default: { LOG(FATAL) << "Unknown prediction type:" << static_cast(type); } } CHECK_EQ( std::accumulate(shape.cbegin(), shape.cend(), static_cast(1), std::multiplies<>{}), chunksize * rows); } // Reverse the ntree_limit in old prediction API. inline uint32_t GetIterationFromTreeLimit(uint32_t ntree_limit, Learner *learner) { // On Python and R, `best_ntree_limit` is set to `best_iteration * num_parallel_tree`. // To reverse it we just divide it by `num_parallel_tree`. if (ntree_limit != 0) { learner->Configure(); uint32_t num_parallel_tree = 0; Json config{Object()}; learner->SaveConfig(&config); auto const &booster = get(config["learner"]["gradient_booster"]["name"]); if (booster == "gblinear") { num_parallel_tree = 0; } else if (booster == "dart") { num_parallel_tree = std::stoi(get(config["learner"]["gradient_booster"]["gbtree"] ["gbtree_model_param"]["num_parallel_tree"])); } else if (booster == "gbtree") { num_parallel_tree = std::stoi(get( (config["learner"]["gradient_booster"]["gbtree_model_param"]["num_parallel_tree"]))); } else { LOG(FATAL) << "Unknown booster:" << booster; } ntree_limit /= std::max(num_parallel_tree, 1u); } return ntree_limit; } inline float GetMissing(Json const &config) { float missing; auto const &obj = get