Implement feature score for linear model. (#7048)
* Add feature score support for linear model. * Port R interface to the new implementation. * Add linear model support in Python. Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
parent
b2d300e727
commit
663136aa08
@ -96,41 +96,44 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
|||||||
if (!(is.null(feature_names) || is.character(feature_names)))
|
if (!(is.null(feature_names) || is.character(feature_names)))
|
||||||
stop("feature_names: Has to be a character vector")
|
stop("feature_names: Has to be a character vector")
|
||||||
|
|
||||||
model_text_dump <- xgb.dump(model = model, with_stats = TRUE)
|
model <- xgb.Booster.complete(model)
|
||||||
|
config <- jsonlite::fromJSON(xgb.config(model))
|
||||||
# linear model
|
if (config$learner$gradient_booster$name == "gblinear") {
|
||||||
if (model_text_dump[2] == "bias:"){
|
args <- list(importance_type = "weight", feature_names = feature_names)
|
||||||
weight_index <- which(model_text_dump == "weight:") + 1
|
results <- .Call(
|
||||||
weights <- as.numeric(
|
XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
|
||||||
model_text_dump[weight_index:length(model_text_dump)]
|
|
||||||
)
|
)
|
||||||
|
names(results) <- c("features", "shape", "weight")
|
||||||
num_class <- NVL(model$params$num_class, 1)
|
n_classes <- if (length(results$shape) == 2) { results$shape[2] } else { 0 }
|
||||||
if (is.null(feature_names))
|
importance <- if (n_classes == 0) {
|
||||||
feature_names <- seq(to = length(weights) / num_class) - 1
|
data.table(Feature = results$features, Weight = results$weight)[order(-abs(Weight))]
|
||||||
if (length(feature_names) * num_class != length(weights))
|
|
||||||
stop("feature_names length does not match the number of features used in the model")
|
|
||||||
|
|
||||||
result <- if (num_class == 1) {
|
|
||||||
data.table(Feature = feature_names, Weight = weights)[order(-abs(Weight))]
|
|
||||||
} else {
|
} else {
|
||||||
data.table(Feature = rep(feature_names, each = num_class),
|
data.table(
|
||||||
Weight = weights,
|
Feature = rep(results$features, each = n_classes), Weight = results$weight, Class = seq_len(n_classes) - 1
|
||||||
Class = seq_len(num_class) - 1)[order(Class, -abs(Weight))]
|
)[order(Class, -abs(Weight))]
|
||||||
}
|
}
|
||||||
} else { # tree model
|
} else {
|
||||||
result <- xgb.model.dt.tree(feature_names = feature_names,
|
concatenated <- list()
|
||||||
text = model_text_dump,
|
output_names <- vector()
|
||||||
trees = trees)[
|
for (importance_type in c("weight", "gain", "cover")) {
|
||||||
Feature != "Leaf", .(Gain = sum(Quality),
|
args <- list(importance_type = importance_type, feature_names = feature_names)
|
||||||
Cover = sum(Cover),
|
results <- .Call(
|
||||||
Frequency = .N), by = Feature][
|
XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
|
||||||
, `:=`(Gain = Gain / sum(Gain),
|
)
|
||||||
Cover = Cover / sum(Cover),
|
names(results) <- c("features", "shape", importance_type)
|
||||||
Frequency = Frequency / sum(Frequency))][
|
concatenated[
|
||||||
order(Gain, decreasing = TRUE)]
|
switch(importance_type, "weight" = "Frequency", "gain" = "Gain", "cover" = "Cover")
|
||||||
|
] <- results[importance_type]
|
||||||
|
output_names <- results$features
|
||||||
|
}
|
||||||
|
importance <- data.table(
|
||||||
|
Feature = output_names,
|
||||||
|
Gain = concatenated$Gain / sum(concatenated$Gain),
|
||||||
|
Cover = concatenated$Cover / sum(concatenated$Cover),
|
||||||
|
Frequency = concatenated$Frequency / sum(concatenated$Frequency)
|
||||||
|
)[order(Gain, decreasing = TRUE)]
|
||||||
}
|
}
|
||||||
result
|
importance
|
||||||
}
|
}
|
||||||
|
|
||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
|
|||||||
@ -47,6 +47,7 @@ extern SEXP XGDMatrixSetInfo_R(SEXP, SEXP, SEXP);
|
|||||||
extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP);
|
extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP);
|
||||||
extern SEXP XGBSetGlobalConfig_R(SEXP);
|
extern SEXP XGBSetGlobalConfig_R(SEXP);
|
||||||
extern SEXP XGBGetGlobalConfig_R();
|
extern SEXP XGBGetGlobalConfig_R();
|
||||||
|
extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
|
||||||
|
|
||||||
static const R_CallMethodDef CallEntries[] = {
|
static const R_CallMethodDef CallEntries[] = {
|
||||||
{"XGBoosterBoostOneIter_R", (DL_FUNC) &XGBoosterBoostOneIter_R, 4},
|
{"XGBoosterBoostOneIter_R", (DL_FUNC) &XGBoosterBoostOneIter_R, 4},
|
||||||
@ -81,6 +82,7 @@ static const R_CallMethodDef CallEntries[] = {
|
|||||||
{"XGDMatrixSliceDMatrix_R", (DL_FUNC) &XGDMatrixSliceDMatrix_R, 2},
|
{"XGDMatrixSliceDMatrix_R", (DL_FUNC) &XGDMatrixSliceDMatrix_R, 2},
|
||||||
{"XGBSetGlobalConfig_R", (DL_FUNC) &XGBSetGlobalConfig_R, 1},
|
{"XGBSetGlobalConfig_R", (DL_FUNC) &XGBSetGlobalConfig_R, 1},
|
||||||
{"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0},
|
{"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0},
|
||||||
|
{"XGBoosterFeatureScore_R", (DL_FUNC) &XGBoosterFeatureScore_R, 2},
|
||||||
{NULL, NULL, 0}
|
{NULL, NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -38,11 +38,11 @@
|
|||||||
|
|
||||||
using namespace dmlc;
|
using namespace dmlc;
|
||||||
|
|
||||||
SEXP XGCheckNullPtr_R(SEXP handle) {
|
XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle) {
|
||||||
return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
|
return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
void _DMatrixFinalizer(SEXP ext) {
|
XGB_DLL void _DMatrixFinalizer(SEXP ext) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
if (R_ExternalPtrAddr(ext) == NULL) return;
|
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||||
CHECK_CALL(XGDMatrixFree(R_ExternalPtrAddr(ext)));
|
CHECK_CALL(XGDMatrixFree(R_ExternalPtrAddr(ext)));
|
||||||
@ -50,14 +50,14 @@ void _DMatrixFinalizer(SEXP ext) {
|
|||||||
R_API_END();
|
R_API_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBSetGlobalConfig_R(SEXP json_str) {
|
XGB_DLL SEXP XGBSetGlobalConfig_R(SEXP json_str) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGBSetGlobalConfig(CHAR(asChar(json_str))));
|
CHECK_CALL(XGBSetGlobalConfig(CHAR(asChar(json_str))));
|
||||||
R_API_END();
|
R_API_END();
|
||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBGetGlobalConfig_R() {
|
XGB_DLL SEXP XGBGetGlobalConfig_R() {
|
||||||
const char* json_str;
|
const char* json_str;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGBGetGlobalConfig(&json_str));
|
CHECK_CALL(XGBGetGlobalConfig(&json_str));
|
||||||
@ -65,7 +65,7 @@ SEXP XGBGetGlobalConfig_R() {
|
|||||||
return mkString(json_str);
|
return mkString(json_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
|
XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
|
||||||
SEXP ret;
|
SEXP ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
DMatrixHandle handle;
|
DMatrixHandle handle;
|
||||||
@ -77,8 +77,7 @@ SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing) {
|
||||||
SEXP missing) {
|
|
||||||
SEXP ret;
|
SEXP ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
SEXP dim = getAttrib(mat, R_DimSymbol);
|
SEXP dim = getAttrib(mat, R_DimSymbol);
|
||||||
@ -112,10 +111,8 @@ SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
|
XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data,
|
||||||
SEXP indices,
|
SEXP num_row) {
|
||||||
SEXP data,
|
|
||||||
SEXP num_row) {
|
|
||||||
SEXP ret;
|
SEXP ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
const int *p_indptr = INTEGER(indptr);
|
const int *p_indptr = INTEGER(indptr);
|
||||||
@ -151,7 +148,7 @@ SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
|
XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
|
||||||
SEXP ret;
|
SEXP ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
int len = length(idxset);
|
int len = length(idxset);
|
||||||
@ -171,7 +168,7 @@ SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
|
XGB_DLL SEXP XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
|
CHECK_CALL(XGDMatrixSaveBinary(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(fname)),
|
CHAR(asChar(fname)),
|
||||||
@ -180,7 +177,7 @@ SEXP XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
|
|||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
|
XGB_DLL SEXP XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
int len = length(array);
|
int len = length(array);
|
||||||
const char *name = CHAR(asChar(field));
|
const char *name = CHAR(asChar(field));
|
||||||
@ -214,7 +211,7 @@ SEXP XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
|
|||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
XGB_DLL SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
||||||
SEXP ret;
|
SEXP ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
bst_ulong olen;
|
bst_ulong olen;
|
||||||
@ -232,7 +229,7 @@ SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGDMatrixNumRow_R(SEXP handle) {
|
XGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle) {
|
||||||
bst_ulong nrow;
|
bst_ulong nrow;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));
|
CHECK_CALL(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrow));
|
||||||
@ -240,7 +237,7 @@ SEXP XGDMatrixNumRow_R(SEXP handle) {
|
|||||||
return ScalarInteger(static_cast<int>(nrow));
|
return ScalarInteger(static_cast<int>(nrow));
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGDMatrixNumCol_R(SEXP handle) {
|
XGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle) {
|
||||||
bst_ulong ncol;
|
bst_ulong ncol;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGDMatrixNumCol(R_ExternalPtrAddr(handle), &ncol));
|
CHECK_CALL(XGDMatrixNumCol(R_ExternalPtrAddr(handle), &ncol));
|
||||||
@ -255,7 +252,7 @@ void _BoosterFinalizer(SEXP ext) {
|
|||||||
R_ClearExternalPtr(ext);
|
R_ClearExternalPtr(ext);
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterCreate_R(SEXP dmats) {
|
XGB_DLL SEXP XGBoosterCreate_R(SEXP dmats) {
|
||||||
SEXP ret;
|
SEXP ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
int len = length(dmats);
|
int len = length(dmats);
|
||||||
@ -272,7 +269,7 @@ SEXP XGBoosterCreate_R(SEXP dmats) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterCreateInEmptyObj_R(SEXP dmats, SEXP R_handle) {
|
XGB_DLL SEXP XGBoosterCreateInEmptyObj_R(SEXP dmats, SEXP R_handle) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
int len = length(dmats);
|
int len = length(dmats);
|
||||||
std::vector<void*> dvec;
|
std::vector<void*> dvec;
|
||||||
@ -287,7 +284,7 @@ SEXP XGBoosterCreateInEmptyObj_R(SEXP dmats, SEXP R_handle) {
|
|||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
|
XGB_DLL SEXP XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGBoosterSetParam(R_ExternalPtrAddr(handle),
|
CHECK_CALL(XGBoosterSetParam(R_ExternalPtrAddr(handle),
|
||||||
CHAR(asChar(name)),
|
CHAR(asChar(name)),
|
||||||
@ -296,7 +293,7 @@ SEXP XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
|
|||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
|
XGB_DLL SEXP XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
|
CHECK_CALL(XGBoosterUpdateOneIter(R_ExternalPtrAddr(handle),
|
||||||
asInteger(iter),
|
asInteger(iter),
|
||||||
@ -305,7 +302,7 @@ SEXP XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
|
|||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
|
XGB_DLL SEXP XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_EQ(length(grad), length(hess))
|
CHECK_EQ(length(grad), length(hess))
|
||||||
<< "gradient and hess must have same length";
|
<< "gradient and hess must have same length";
|
||||||
@ -328,7 +325,7 @@ SEXP XGBoosterBoostOneIter_R(SEXP handle, SEXP dtrain, SEXP grad, SEXP hess) {
|
|||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
|
XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
|
||||||
const char *ret;
|
const char *ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_EQ(length(dmats), length(evnames))
|
CHECK_EQ(length(dmats), length(evnames))
|
||||||
@ -353,8 +350,8 @@ SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
|
|||||||
return mkString(ret);
|
return mkString(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask,
|
XGB_DLL SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask,
|
||||||
SEXP ntree_limit, SEXP training) {
|
SEXP ntree_limit, SEXP training) {
|
||||||
SEXP ret;
|
SEXP ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
bst_ulong olen;
|
bst_ulong olen;
|
||||||
@ -374,7 +371,7 @@ SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config) {
|
XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config) {
|
||||||
SEXP r_out_shape;
|
SEXP r_out_shape;
|
||||||
SEXP r_out_result;
|
SEXP r_out_result;
|
||||||
SEXP r_out;
|
SEXP r_out;
|
||||||
@ -413,21 +410,21 @@ SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config) {
|
|||||||
return r_out;
|
return r_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
|
XGB_DLL SEXP XGBoosterLoadModel_R(SEXP handle, SEXP fname) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
|
CHECK_CALL(XGBoosterLoadModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
|
||||||
R_API_END();
|
R_API_END();
|
||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
|
XGB_DLL SEXP XGBoosterSaveModel_R(SEXP handle, SEXP fname) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
|
CHECK_CALL(XGBoosterSaveModel(R_ExternalPtrAddr(handle), CHAR(asChar(fname))));
|
||||||
R_API_END();
|
R_API_END();
|
||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterModelToRaw_R(SEXP handle) {
|
XGB_DLL SEXP XGBoosterModelToRaw_R(SEXP handle) {
|
||||||
SEXP ret;
|
SEXP ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
bst_ulong olen;
|
bst_ulong olen;
|
||||||
@ -442,7 +439,7 @@ SEXP XGBoosterModelToRaw_R(SEXP handle) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
|
XGB_DLL SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle),
|
CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle),
|
||||||
RAW(raw),
|
RAW(raw),
|
||||||
@ -451,7 +448,7 @@ SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
|
|||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterSaveJsonConfig_R(SEXP handle) {
|
XGB_DLL SEXP XGBoosterSaveJsonConfig_R(SEXP handle) {
|
||||||
const char* ret;
|
const char* ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
bst_ulong len {0};
|
bst_ulong len {0};
|
||||||
@ -462,14 +459,14 @@ SEXP XGBoosterSaveJsonConfig_R(SEXP handle) {
|
|||||||
return mkString(ret);
|
return mkString(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value) {
|
XGB_DLL SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGBoosterLoadJsonConfig(R_ExternalPtrAddr(handle), CHAR(asChar(value))));
|
CHECK_CALL(XGBoosterLoadJsonConfig(R_ExternalPtrAddr(handle), CHAR(asChar(value))));
|
||||||
R_API_END();
|
R_API_END();
|
||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterSerializeToBuffer_R(SEXP handle) {
|
XGB_DLL SEXP XGBoosterSerializeToBuffer_R(SEXP handle) {
|
||||||
SEXP ret;
|
SEXP ret;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
bst_ulong out_len;
|
bst_ulong out_len;
|
||||||
@ -484,7 +481,7 @@ SEXP XGBoosterSerializeToBuffer_R(SEXP handle) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw) {
|
XGB_DLL SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
CHECK_CALL(XGBoosterUnserializeFromBuffer(R_ExternalPtrAddr(handle),
|
CHECK_CALL(XGBoosterUnserializeFromBuffer(R_ExternalPtrAddr(handle),
|
||||||
RAW(raw),
|
RAW(raw),
|
||||||
@ -493,7 +490,7 @@ SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw) {
|
|||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats, SEXP dump_format) {
|
XGB_DLL SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats, SEXP dump_format) {
|
||||||
SEXP out;
|
SEXP out;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
bst_ulong olen;
|
bst_ulong olen;
|
||||||
@ -530,7 +527,7 @@ SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats, SEXP dump_for
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name) {
|
XGB_DLL SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name) {
|
||||||
SEXP out;
|
SEXP out;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
int success;
|
int success;
|
||||||
@ -550,7 +547,7 @@ SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name) {
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val) {
|
XGB_DLL SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val) {
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
const char *v = isNull(val) ? nullptr : CHAR(asChar(val));
|
const char *v = isNull(val) ? nullptr : CHAR(asChar(val));
|
||||||
CHECK_CALL(XGBoosterSetAttr(R_ExternalPtrAddr(handle),
|
CHECK_CALL(XGBoosterSetAttr(R_ExternalPtrAddr(handle),
|
||||||
@ -559,7 +556,7 @@ SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val) {
|
|||||||
return R_NilValue;
|
return R_NilValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP XGBoosterGetAttrNames_R(SEXP handle) {
|
XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle) {
|
||||||
SEXP out;
|
SEXP out;
|
||||||
R_API_BEGIN();
|
R_API_BEGIN();
|
||||||
bst_ulong len;
|
bst_ulong len;
|
||||||
@ -578,3 +575,51 @@ SEXP XGBoosterGetAttrNames_R(SEXP handle) {
|
|||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {
|
||||||
|
SEXP out_features_sexp;
|
||||||
|
SEXP out_scores_sexp;
|
||||||
|
SEXP out_shape_sexp;
|
||||||
|
SEXP r_out;
|
||||||
|
|
||||||
|
R_API_BEGIN();
|
||||||
|
char const *c_json_config = CHAR(asChar(json_config));
|
||||||
|
bst_ulong out_n_features;
|
||||||
|
char const **out_features;
|
||||||
|
|
||||||
|
bst_ulong out_dim;
|
||||||
|
bst_ulong const *out_shape;
|
||||||
|
float const *out_scores;
|
||||||
|
|
||||||
|
CHECK_CALL(XGBoosterFeatureScore(R_ExternalPtrAddr(handle), c_json_config,
|
||||||
|
&out_n_features, &out_features,
|
||||||
|
&out_dim, &out_shape, &out_scores));
|
||||||
|
|
||||||
|
out_shape_sexp = PROTECT(allocVector(INTSXP, out_dim));
|
||||||
|
size_t len = 1;
|
||||||
|
for (size_t i = 0; i < out_dim; ++i) {
|
||||||
|
INTEGER(out_shape_sexp)[i] = out_shape[i];
|
||||||
|
len *= out_shape[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
out_scores_sexp = PROTECT(allocVector(REALSXP, len));
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (omp_ulong i = 0; i < len; ++i) {
|
||||||
|
REAL(out_scores_sexp)[i] = out_scores[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
out_features_sexp = PROTECT(allocVector(STRSXP, out_n_features));
|
||||||
|
for (size_t i = 0; i < out_n_features; ++i) {
|
||||||
|
SET_STRING_ELT(out_features_sexp, i, mkChar(out_features[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
r_out = PROTECT(allocVector(VECSXP, 3));
|
||||||
|
SET_VECTOR_ELT(r_out, 0, out_features_sexp);
|
||||||
|
SET_VECTOR_ELT(r_out, 1, out_shape_sexp);
|
||||||
|
SET_VECTOR_ELT(r_out, 2, out_scores_sexp);
|
||||||
|
|
||||||
|
R_API_END();
|
||||||
|
UNPROTECT(4);
|
||||||
|
|
||||||
|
return r_out;
|
||||||
|
}
|
||||||
|
|||||||
@ -275,4 +275,12 @@ XGB_DLL SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val);
|
|||||||
*/
|
*/
|
||||||
XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle);
|
XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Get feature scores from the model.
|
||||||
|
* \param json_config See `XGBoosterFeatureScore` in xgboost c_api.h
|
||||||
|
* \return A vector with the first element as feature names, second element as shape of
|
||||||
|
* feature scores and thrid element as feature scores.
|
||||||
|
*/
|
||||||
|
XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);
|
||||||
|
|
||||||
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
|
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
|
||||||
|
|||||||
@ -11,8 +11,8 @@ DEMO_DIR = os.path.join(XGBOOST_ROOT_DIR, 'demo')
|
|||||||
|
|
||||||
# simple example
|
# simple example
|
||||||
# load file from text file, also binary buffer generated by xgboost
|
# load file from text file, also binary buffer generated by xgboost
|
||||||
dtrain = xgb.DMatrix(os.path.join(DEMO_DIR, 'data', 'agaricus.txt.train'))
|
dtrain = xgb.DMatrix(os.path.join(DEMO_DIR, 'data', 'agaricus.txt.train?indexing_mode=1'))
|
||||||
dtest = xgb.DMatrix(os.path.join(DEMO_DIR, 'data', 'agaricus.txt.test'))
|
dtest = xgb.DMatrix(os.path.join(DEMO_DIR, 'data', 'agaricus.txt.test?indexing_mode=1'))
|
||||||
|
|
||||||
# specify parameters via map, definition are same as c++ version
|
# specify parameters via map, definition are same as c++ version
|
||||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||||
|
|||||||
@ -1195,10 +1195,13 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
|
|||||||
const char ***out_features);
|
const char ***out_features);
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief Calculate feature scores for tree models.
|
* \brief Calculate feature scores for tree models. When used on linear model, only the
|
||||||
|
* `weight` importance type is defined, and output scores is a row major matrix with shape
|
||||||
|
* [n_features, n_classes] for multi-class model. For tree model, out_n_feature is always
|
||||||
|
* equal to out_n_scores and has multiple definitions of importance type.
|
||||||
*
|
*
|
||||||
* \param handle An instance of Booster
|
* \param handle An instance of Booster
|
||||||
* \param json_config Parameters for computing scores. Accepted JSON keys are:
|
* \param json_config Parameters for computing scores. Accepted JSON keys are:
|
||||||
* - importance_type: A JSON string with following possible values:
|
* - importance_type: A JSON string with following possible values:
|
||||||
* * 'weight': the number of times a feature is used to split the data across all trees.
|
* * 'weight': the number of times a feature is used to split the data across all trees.
|
||||||
* * 'gain': the average gain across all splits the feature is used in.
|
* * 'gain': the average gain across all splits the feature is used in.
|
||||||
@ -1206,15 +1209,20 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
|
|||||||
* * 'total_gain': the total gain across all splits the feature is used in.
|
* * 'total_gain': the total gain across all splits the feature is used in.
|
||||||
* * 'total_cover': the total coverage across all splits the feature is used in.
|
* * 'total_cover': the total coverage across all splits the feature is used in.
|
||||||
* - feature_map: An optional JSON string with URI or path to the feature map file.
|
* - feature_map: An optional JSON string with URI or path to the feature map file.
|
||||||
|
* - feature_names: An optional JSON array with string names for each feature.
|
||||||
*
|
*
|
||||||
* \param out_length Length of output arrays.
|
* \param out_n_features Length of output feature names.
|
||||||
* \param out_features An array of string as feature names, ordered the same as output scores.
|
* \param out_features An array of string as feature names, ordered the same as output scores.
|
||||||
* \param out_scores An array of floating point as feature scores.
|
* \param out_dim Dimension of output feature scores.
|
||||||
|
* \param out_shape Shape of output feature scores with length of `out_dim`.
|
||||||
|
* \param out_scores An array of floating point as feature scores with shape of `out_shape`.
|
||||||
*
|
*
|
||||||
* \return 0 when success, -1 when failure happens
|
* \return 0 when success, -1 when failure happens
|
||||||
*/
|
*/
|
||||||
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
|
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *json_config,
|
||||||
bst_ulong *out_length,
|
bst_ulong *out_n_features,
|
||||||
const char ***out_features,
|
char const ***out_features,
|
||||||
float **out_scores);
|
bst_ulong *out_dim,
|
||||||
|
bst_ulong const **out_shape,
|
||||||
|
float const **out_scores);
|
||||||
#endif // XGBOOST_C_API_H_
|
#endif // XGBOOST_C_API_H_
|
||||||
|
|||||||
@ -184,9 +184,7 @@ class GradientBooster : public Model, public Configurable {
|
|||||||
|
|
||||||
virtual void FeatureScore(std::string const &importance_type,
|
virtual void FeatureScore(std::string const &importance_type,
|
||||||
std::vector<bst_feature_t> *features,
|
std::vector<bst_feature_t> *features,
|
||||||
std::vector<float> *scores) const {
|
std::vector<float> *scores) const = 0;
|
||||||
LOG(FATAL) << "`feature_score` is not implemented for current booster.";
|
|
||||||
}
|
|
||||||
/*!
|
/*!
|
||||||
* \brief Whether the current booster uses GPU.
|
* \brief Whether the current booster uses GPU.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -13,6 +13,7 @@
|
|||||||
#include <array>
|
#include <array>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
/*!
|
/*!
|
||||||
@ -59,6 +60,13 @@ template <typename T> class MatrixView {
|
|||||||
strides_[0] = shape[1];
|
strides_[0] = shape[1];
|
||||||
strides_[1] = 1;
|
strides_[1] = 1;
|
||||||
}
|
}
|
||||||
|
MatrixView(std::vector<T> *vec, std::array<size_t, 2> shape)
|
||||||
|
: device_{GenericParameter::kCpuId}, values_{*vec} {
|
||||||
|
CHECK_EQ(vec->size(), shape[0] * shape[1]);
|
||||||
|
std::copy(shape.cbegin(), shape.cend(), shape_);
|
||||||
|
strides_[0] = shape[1];
|
||||||
|
strides_[1] = 1;
|
||||||
|
}
|
||||||
MatrixView(HostDeviceVector<std::remove_const_t<T>> const *vec,
|
MatrixView(HostDeviceVector<std::remove_const_t<T>> const *vec,
|
||||||
std::array<size_t, 2> shape, int32_t device)
|
std::array<size_t, 2> shape, int32_t device)
|
||||||
: device_{device}, values_{InferValues(vec, device)} {
|
: device_{device}, values_{InferValues(vec, device)} {
|
||||||
|
|||||||
@ -1,10 +1,10 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2014 by Contributors
|
Copyright (c) 2014-2021 by Contributors
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
You may obtain a copy of the License at
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
Unless required by applicable law or agreed to in writing, software
|
||||||
@ -32,6 +32,9 @@ import org.junit.Test;
|
|||||||
* @author hzx
|
* @author hzx
|
||||||
*/
|
*/
|
||||||
public class BoosterImplTest {
|
public class BoosterImplTest {
|
||||||
|
private String train_uri = "../../demo/data/agaricus.txt.train?indexing_mode=1";
|
||||||
|
private String test_uri = "../../demo/data/agaricus.txt.test?indexing_mode=1";
|
||||||
|
|
||||||
public static class EvalError implements IEvaluation {
|
public static class EvalError implements IEvaluation {
|
||||||
@Override
|
@Override
|
||||||
public String getMetric() {
|
public String getMetric() {
|
||||||
@ -87,8 +90,8 @@ public class BoosterImplTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testBoosterBasic() throws XGBoostError, IOException {
|
public void testBoosterBasic() throws XGBoostError, IOException {
|
||||||
|
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
|
|
||||||
@ -103,8 +106,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void saveLoadModelWithPath() throws XGBoostError, IOException {
|
public void saveLoadModelWithPath() throws XGBoostError, IOException {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
IEvaluation eval = new EvalError();
|
IEvaluation eval = new EvalError();
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
@ -121,8 +124,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void saveLoadModelWithStream() throws XGBoostError, IOException {
|
public void saveLoadModelWithStream() throws XGBoostError, IOException {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
|
|
||||||
@ -310,8 +313,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBoosterEarlyStop() throws XGBoostError, IOException {
|
public void testBoosterEarlyStop() throws XGBoostError, IOException {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||||
{
|
{
|
||||||
put("max_depth", 3);
|
put("max_depth", 3);
|
||||||
@ -363,8 +366,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testQuantileHistoDepthWise() throws XGBoostError {
|
public void testQuantileHistoDepthWise() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||||
{
|
{
|
||||||
put("max_depth", 3);
|
put("max_depth", 3);
|
||||||
@ -383,8 +386,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testQuantileHistoLossGuide() throws XGBoostError {
|
public void testQuantileHistoLossGuide() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||||
{
|
{
|
||||||
put("max_depth", 3);
|
put("max_depth", 3);
|
||||||
@ -404,8 +407,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testQuantileHistoLossGuideMaxBin() throws XGBoostError {
|
public void testQuantileHistoLossGuideMaxBin() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||||
{
|
{
|
||||||
put("max_depth", 3);
|
put("max_depth", 3);
|
||||||
@ -425,8 +428,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDumpModelJson() throws XGBoostError {
|
public void testDumpModelJson() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
String[] dump = booster.getModelDump("", false, "json");
|
String[] dump = booster.getModelDump("", false, "json");
|
||||||
@ -441,8 +444,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetFeatureScore() throws XGBoostError {
|
public void testGetFeatureScore() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
String[] featureNames = new String[126];
|
String[] featureNames = new String[126];
|
||||||
@ -453,8 +456,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetFeatureImportanceGain() throws XGBoostError {
|
public void testGetFeatureImportanceGain() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
String[] featureNames = new String[126];
|
String[] featureNames = new String[126];
|
||||||
@ -465,8 +468,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetFeatureImportanceTotalGain() throws XGBoostError {
|
public void testGetFeatureImportanceTotalGain() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
String[] featureNames = new String[126];
|
String[] featureNames = new String[126];
|
||||||
@ -477,8 +480,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetFeatureImportanceCover() throws XGBoostError {
|
public void testGetFeatureImportanceCover() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
String[] featureNames = new String[126];
|
String[] featureNames = new String[126];
|
||||||
@ -489,8 +492,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetFeatureImportanceTotalCover() throws XGBoostError {
|
public void testGetFeatureImportanceTotalCover() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
String[] featureNames = new String[126];
|
String[] featureNames = new String[126];
|
||||||
@ -501,7 +504,7 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testQuantileHistoDepthwiseMaxDepth() throws XGBoostError {
|
public void testQuantileHistoDepthwiseMaxDepth() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||||
{
|
{
|
||||||
put("max_depth", 3);
|
put("max_depth", 3);
|
||||||
@ -519,8 +522,8 @@ public class BoosterImplTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testQuantileHistoDepthwiseMaxDepthMaxBin() throws XGBoostError {
|
public void testQuantileHistoDepthwiseMaxDepthMaxBin() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||||
{
|
{
|
||||||
put("max_depth", 3);
|
put("max_depth", 3);
|
||||||
@ -545,7 +548,7 @@ public class BoosterImplTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testCV() throws XGBoostError {
|
public void testCV() throws XGBoostError {
|
||||||
//load train mat
|
//load train mat
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
|
|
||||||
//set params
|
//set params
|
||||||
Map<String, Object> param = new HashMap<String, Object>() {
|
Map<String, Object> param = new HashMap<String, Object>() {
|
||||||
@ -573,8 +576,8 @@ public class BoosterImplTest {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testTrainFromExistingModel() throws XGBoostError, IOException {
|
public void testTrainFromExistingModel() throws XGBoostError, IOException {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
IEvaluation eval = new EvalError();
|
IEvaluation eval = new EvalError();
|
||||||
|
|
||||||
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
Map<String, Object> paramMap = new HashMap<String, Object>() {
|
||||||
@ -624,8 +627,8 @@ public class BoosterImplTest {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testSetAndGetAttrs() throws XGBoostError {
|
public void testSetAndGetAttrs() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
booster.setAttr("testKey1", "testValue1");
|
booster.setAttr("testKey1", "testValue1");
|
||||||
@ -654,10 +657,10 @@ public class BoosterImplTest {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testGetNumFeature() throws XGBoostError {
|
public void testGetNumFeature() throws XGBoostError {
|
||||||
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
|
DMatrix trainMat = new DMatrix(this.train_uri);
|
||||||
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
|
DMatrix testMat = new DMatrix(this.test_uri);
|
||||||
|
|
||||||
Booster booster = trainBooster(trainMat, testMat);
|
Booster booster = trainBooster(trainMat, testMat);
|
||||||
TestCase.assertEquals(booster.getNumFeature(), 127);
|
TestCase.assertEquals(booster.getNumFeature(), 126);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2132,47 +2132,18 @@ class Booster(object):
|
|||||||
fmap = os.fspath(os.path.expanduser(fmap))
|
fmap = os.fspath(os.path.expanduser(fmap))
|
||||||
length = c_bst_ulong()
|
length = c_bst_ulong()
|
||||||
sarr = ctypes.POINTER(ctypes.c_char_p)()
|
sarr = ctypes.POINTER(ctypes.c_char_p)()
|
||||||
if self.feature_names is not None and fmap == '':
|
_check_call(_LIB.XGBoosterDumpModelEx(self.handle,
|
||||||
flen = len(self.feature_names)
|
c_str(fmap),
|
||||||
|
ctypes.c_int(with_stats),
|
||||||
fname = from_pystr_to_cstr(self.feature_names)
|
c_str(dump_format),
|
||||||
|
ctypes.byref(length),
|
||||||
if self.feature_types is None:
|
ctypes.byref(sarr)))
|
||||||
# use quantitative as default
|
|
||||||
# {'q': quantitative, 'i': indicator}
|
|
||||||
ftype = from_pystr_to_cstr(['q'] * flen)
|
|
||||||
else:
|
|
||||||
ftype = from_pystr_to_cstr(self.feature_types)
|
|
||||||
_check_call(_LIB.XGBoosterDumpModelExWithFeatures(
|
|
||||||
self.handle,
|
|
||||||
ctypes.c_int(flen),
|
|
||||||
fname,
|
|
||||||
ftype,
|
|
||||||
ctypes.c_int(with_stats),
|
|
||||||
c_str(dump_format),
|
|
||||||
ctypes.byref(length),
|
|
||||||
ctypes.byref(sarr)))
|
|
||||||
else:
|
|
||||||
if fmap != '' and not os.path.exists(fmap):
|
|
||||||
raise ValueError("No such file: {0}".format(fmap))
|
|
||||||
_check_call(_LIB.XGBoosterDumpModelEx(self.handle,
|
|
||||||
c_str(fmap),
|
|
||||||
ctypes.c_int(with_stats),
|
|
||||||
c_str(dump_format),
|
|
||||||
ctypes.byref(length),
|
|
||||||
ctypes.byref(sarr)))
|
|
||||||
res = from_cstr_to_pystr(sarr, length)
|
res = from_cstr_to_pystr(sarr, length)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def get_fscore(self, fmap=''):
|
def get_fscore(self, fmap=''):
|
||||||
"""Get feature importance of each feature.
|
"""Get feature importance of each feature.
|
||||||
|
|
||||||
.. note:: Feature importance is defined only for tree boosters
|
|
||||||
|
|
||||||
Feature importance is only defined when the decision tree model is chosen as base
|
|
||||||
learner (`booster=gbtree`). It is not defined for other base learner types, such
|
|
||||||
as linear learners (`booster=gblinear`).
|
|
||||||
|
|
||||||
.. note:: Zero-importance features will not be included
|
.. note:: Zero-importance features will not be included
|
||||||
|
|
||||||
Keep in mind that this function does not include zero-importance feature, i.e.
|
Keep in mind that this function does not include zero-importance feature, i.e.
|
||||||
@ -2190,7 +2161,7 @@ class Booster(object):
|
|||||||
self, fmap: os.PathLike = '', importance_type: str = 'weight'
|
self, fmap: os.PathLike = '', importance_type: str = 'weight'
|
||||||
) -> Dict[str, float]:
|
) -> Dict[str, float]:
|
||||||
"""Get feature importance of each feature.
|
"""Get feature importance of each feature.
|
||||||
Importance type can be defined as:
|
For tree model Importance type can be defined as:
|
||||||
|
|
||||||
* 'weight': the number of times a feature is used to split the data across all trees.
|
* 'weight': the number of times a feature is used to split the data across all trees.
|
||||||
* 'gain': the average gain across all splits the feature is used in.
|
* 'gain': the average gain across all splits the feature is used in.
|
||||||
@ -2198,11 +2169,15 @@ class Booster(object):
|
|||||||
* 'total_gain': the total gain across all splits the feature is used in.
|
* 'total_gain': the total gain across all splits the feature is used in.
|
||||||
* 'total_cover': the total coverage across all splits the feature is used in.
|
* 'total_cover': the total coverage across all splits the feature is used in.
|
||||||
|
|
||||||
.. note:: Feature importance is defined only for tree boosters
|
.. note::
|
||||||
|
|
||||||
Feature importance is only defined when the decision tree model is chosen as
|
For linear model, only "weight" is defined and it's the normalized coefficients
|
||||||
base learner (`booster=gbtree` or `booster=dart`). It is not defined for other
|
without bias.
|
||||||
base learner types, such as linear learners (`booster=gblinear`).
|
|
||||||
|
.. note:: Zero-importance features will not be included
|
||||||
|
|
||||||
|
Keep in mind that this function does not include zero-importance feature, i.e.
|
||||||
|
those features that have not been used in any split conditions.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@ -2213,7 +2188,9 @@ class Booster(object):
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
A map between feature names and their scores.
|
A map between feature names and their scores. When `gblinear` is used for
|
||||||
|
multi-class classification the scores for each feature is a list with length
|
||||||
|
`n_classes`, otherwise they're scalars.
|
||||||
"""
|
"""
|
||||||
fmap = os.fspath(os.path.expanduser(fmap))
|
fmap = os.fspath(os.path.expanduser(fmap))
|
||||||
args = from_pystr_to_cstr(
|
args = from_pystr_to_cstr(
|
||||||
@ -2221,21 +2198,31 @@ class Booster(object):
|
|||||||
)
|
)
|
||||||
features = ctypes.POINTER(ctypes.c_char_p)()
|
features = ctypes.POINTER(ctypes.c_char_p)()
|
||||||
scores = ctypes.POINTER(ctypes.c_float)()
|
scores = ctypes.POINTER(ctypes.c_float)()
|
||||||
length = c_bst_ulong()
|
n_out_features = c_bst_ulong()
|
||||||
|
out_dim = c_bst_ulong()
|
||||||
|
shape = ctypes.POINTER(c_bst_ulong)()
|
||||||
|
|
||||||
_check_call(
|
_check_call(
|
||||||
_LIB.XGBoosterFeatureScore(
|
_LIB.XGBoosterFeatureScore(
|
||||||
self.handle,
|
self.handle,
|
||||||
args,
|
args,
|
||||||
ctypes.byref(length),
|
ctypes.byref(n_out_features),
|
||||||
ctypes.byref(features),
|
ctypes.byref(features),
|
||||||
ctypes.byref(scores)
|
ctypes.byref(out_dim),
|
||||||
|
ctypes.byref(shape),
|
||||||
|
ctypes.byref(scores),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
features_arr = from_cstr_to_pystr(features, length)
|
features_arr = from_cstr_to_pystr(features, n_out_features)
|
||||||
scores_arr = ctypes2numpy(scores, length.value, np.float32)
|
scores_arr = _prediction_output(shape, out_dim, scores, False)
|
||||||
|
|
||||||
results = {}
|
results = {}
|
||||||
for feat, score in zip(features_arr, scores_arr):
|
if len(scores_arr.shape) > 1 and scores_arr.shape[1] > 1:
|
||||||
results[feat] = float(score)
|
for feat, score in zip(features_arr, scores_arr):
|
||||||
|
results[feat] = [float(s) for s in score]
|
||||||
|
else:
|
||||||
|
for feat, score in zip(features_arr, scores_arr):
|
||||||
|
results[feat] = float(score)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def trees_to_dataframe(self, fmap=''):
|
def trees_to_dataframe(self, fmap=''):
|
||||||
|
|||||||
@ -156,9 +156,14 @@ __model_doc = f'''
|
|||||||
[2, 3, 4]], where each inner list is a group of indices of features
|
[2, 3, 4]], where each inner list is a group of indices of features
|
||||||
that are allowed to interact with each other. See tutorial for more
|
that are allowed to interact with each other. See tutorial for more
|
||||||
information
|
information
|
||||||
importance_type: string, default "gain"
|
importance_type: Optional[str]
|
||||||
The feature importance type for the feature_importances\\_ property:
|
The feature importance type for the feature_importances\\_ property:
|
||||||
either "gain", "weight", "cover", "total_gain" or "total_cover".
|
|
||||||
|
* For tree model, it's either "gain", "weight", "cover", "total_gain" or
|
||||||
|
"total_cover".
|
||||||
|
* For linear model, only "weight" is defined and it's the normalized coefficients
|
||||||
|
without bias.
|
||||||
|
|
||||||
gpu_id : Optional[int]
|
gpu_id : Optional[int]
|
||||||
Device ordinal.
|
Device ordinal.
|
||||||
validate_parameters : Optional[bool]
|
validate_parameters : Optional[bool]
|
||||||
@ -382,7 +387,7 @@ class XGBModel(XGBModelBase):
|
|||||||
num_parallel_tree: Optional[int] = None,
|
num_parallel_tree: Optional[int] = None,
|
||||||
monotone_constraints: Optional[Union[Dict[str, int], str]] = None,
|
monotone_constraints: Optional[Union[Dict[str, int], str]] = None,
|
||||||
interaction_constraints: Optional[Union[str, List[Tuple[str]]]] = None,
|
interaction_constraints: Optional[Union[str, List[Tuple[str]]]] = None,
|
||||||
importance_type: str = "gain",
|
importance_type: Optional[str] = None,
|
||||||
gpu_id: Optional[int] = None,
|
gpu_id: Optional[int] = None,
|
||||||
validate_parameters: Optional[bool] = None,
|
validate_parameters: Optional[bool] = None,
|
||||||
predictor: Optional[str] = None,
|
predictor: Optional[str] = None,
|
||||||
@ -991,29 +996,26 @@ class XGBModel(XGBModelBase):
|
|||||||
@property
|
@property
|
||||||
def feature_importances_(self) -> np.ndarray:
|
def feature_importances_(self) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Feature importances property
|
Feature importances property, return depends on `importance_type` parameter.
|
||||||
|
|
||||||
.. note:: Feature importance is defined only for tree boosters
|
|
||||||
|
|
||||||
Feature importance is only defined when the decision tree model is chosen as base
|
|
||||||
learner (`booster=gbtree`). It is not defined for other base learner types, such
|
|
||||||
as linear learners (`booster=gblinear`).
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
feature_importances_ : array of shape ``[n_features]``
|
feature_importances_ : array of shape ``[n_features]`` except for multi-class
|
||||||
|
linear model, which returns an array with shape `(n_features, n_classes)`
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if self.get_params()['booster'] not in {'gbtree', 'dart'}:
|
|
||||||
raise AttributeError(
|
|
||||||
'Feature importance is not defined for Booster type {}'
|
|
||||||
.format(self.booster))
|
|
||||||
b: Booster = self.get_booster()
|
b: Booster = self.get_booster()
|
||||||
score = b.get_score(importance_type=self.importance_type)
|
|
||||||
|
def dft() -> str:
|
||||||
|
return "weight" if self.booster == "gblinear" else "gain"
|
||||||
|
score = b.get_score(
|
||||||
|
importance_type=self.importance_type if self.importance_type else dft()
|
||||||
|
)
|
||||||
if b.feature_names is None:
|
if b.feature_names is None:
|
||||||
feature_names = ["f{0}".format(i) for i in range(self.n_features_in_)]
|
feature_names = ["f{0}".format(i) for i in range(self.n_features_in_)]
|
||||||
else:
|
else:
|
||||||
feature_names = b.feature_names
|
feature_names = b.feature_names
|
||||||
|
# gblinear returns all features so the `get` in next line is only for gbtree.
|
||||||
all_features = [score.get(f, 0.) for f in feature_names]
|
all_features = [score.get(f, 0.) for f in feature_names]
|
||||||
all_features_arr = np.array(all_features, dtype=np.float32)
|
all_features_arr = np.array(all_features, dtype=np.float32)
|
||||||
total = all_features_arr.sum()
|
total = all_features_arr.sum()
|
||||||
|
|||||||
@ -927,14 +927,17 @@ XGB_DLL int XGBoosterSlice(BoosterHandle handle, int begin_layer,
|
|||||||
API_END();
|
API_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void XGBoostDumpModelImpl(BoosterHandle handle, const FeatureMap &fmap,
|
inline void XGBoostDumpModelImpl(BoosterHandle handle, FeatureMap* fmap,
|
||||||
int with_stats, const char *format,
|
int with_stats, const char *format,
|
||||||
xgboost::bst_ulong *len,
|
xgboost::bst_ulong *len,
|
||||||
const char ***out_models) {
|
const char ***out_models) {
|
||||||
auto *bst = static_cast<Learner*>(handle);
|
auto *bst = static_cast<Learner*>(handle);
|
||||||
|
bst->Configure();
|
||||||
|
GenerateFeatureMap(bst, {}, bst->GetNumFeature(), fmap);
|
||||||
|
|
||||||
std::vector<std::string>& str_vecs = bst->GetThreadLocal().ret_vec_str;
|
std::vector<std::string>& str_vecs = bst->GetThreadLocal().ret_vec_str;
|
||||||
std::vector<const char*>& charp_vecs = bst->GetThreadLocal().ret_vec_charp;
|
std::vector<const char*>& charp_vecs = bst->GetThreadLocal().ret_vec_charp;
|
||||||
str_vecs = bst->DumpModel(fmap, with_stats != 0, format);
|
str_vecs = bst->DumpModel(*fmap, with_stats != 0, format);
|
||||||
charp_vecs.resize(str_vecs.size());
|
charp_vecs.resize(str_vecs.size());
|
||||||
for (size_t i = 0; i < str_vecs.size(); ++i) {
|
for (size_t i = 0; i < str_vecs.size(); ++i) {
|
||||||
charp_vecs[i] = str_vecs[i].c_str();
|
charp_vecs[i] = str_vecs[i].c_str();
|
||||||
@ -962,14 +965,9 @@ XGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle,
|
|||||||
const char*** out_models) {
|
const char*** out_models) {
|
||||||
API_BEGIN();
|
API_BEGIN();
|
||||||
CHECK_HANDLE();
|
CHECK_HANDLE();
|
||||||
FeatureMap featmap;
|
std::string uri{fmap};
|
||||||
if (strlen(fmap) != 0) {
|
FeatureMap featmap = LoadFeatureMap(uri);
|
||||||
std::unique_ptr<dmlc::Stream> fs(
|
XGBoostDumpModelImpl(handle, &featmap, with_stats, format, len, out_models);
|
||||||
dmlc::Stream::Create(fmap, "r"));
|
|
||||||
dmlc::istream is(fs.get());
|
|
||||||
featmap.LoadText(is);
|
|
||||||
}
|
|
||||||
XGBoostDumpModelImpl(handle, featmap, with_stats, format, len, out_models);
|
|
||||||
API_END();
|
API_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -980,8 +978,8 @@ XGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle,
|
|||||||
int with_stats,
|
int with_stats,
|
||||||
xgboost::bst_ulong* len,
|
xgboost::bst_ulong* len,
|
||||||
const char*** out_models) {
|
const char*** out_models) {
|
||||||
return XGBoosterDumpModelExWithFeatures(handle, fnum, fname, ftype, with_stats,
|
return XGBoosterDumpModelExWithFeatures(handle, fnum, fname, ftype,
|
||||||
"text", len, out_models);
|
with_stats, "text", len, out_models);
|
||||||
}
|
}
|
||||||
|
|
||||||
XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle,
|
XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle,
|
||||||
@ -998,7 +996,7 @@ XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle,
|
|||||||
for (int i = 0; i < fnum; ++i) {
|
for (int i = 0; i < fnum; ++i) {
|
||||||
featmap.PushBack(i, fname[i], ftype[i]);
|
featmap.PushBack(i, fname[i], ftype[i]);
|
||||||
}
|
}
|
||||||
XGBoostDumpModelImpl(handle, featmap, with_stats, format, len, out_models);
|
XGBoostDumpModelImpl(handle, &featmap, with_stats, format, len, out_models);
|
||||||
API_END();
|
API_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1098,11 +1096,12 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
|
|||||||
API_END();
|
API_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle,
|
XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, char const *json_config,
|
||||||
const char *json_config,
|
xgboost::bst_ulong *out_n_features,
|
||||||
xgboost::bst_ulong* out_length,
|
char const ***out_features,
|
||||||
const char ***out_features,
|
bst_ulong *out_dim,
|
||||||
float **out_scores) {
|
bst_ulong const **out_shape,
|
||||||
|
float const **out_scores) {
|
||||||
API_BEGIN();
|
API_BEGIN();
|
||||||
CHECK_HANDLE();
|
CHECK_HANDLE();
|
||||||
auto *learner = static_cast<Learner *>(handle);
|
auto *learner = static_cast<Learner *>(handle);
|
||||||
@ -1113,14 +1112,17 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle,
|
|||||||
feature_map_uri = get<String const>(config["feature_map"]);
|
feature_map_uri = get<String const>(config["feature_map"]);
|
||||||
}
|
}
|
||||||
FeatureMap feature_map = LoadFeatureMap(feature_map_uri);
|
FeatureMap feature_map = LoadFeatureMap(feature_map_uri);
|
||||||
|
std::vector<Json> custom_feature_names;
|
||||||
|
if (!IsA<Null>(config["feature_names"])) {
|
||||||
|
custom_feature_names = get<Array const>(config["feature_names"]);
|
||||||
|
}
|
||||||
|
|
||||||
auto& scores = learner->GetThreadLocal().ret_vec_float;
|
auto& scores = learner->GetThreadLocal().ret_vec_float;
|
||||||
std::vector<bst_feature_t> features;
|
std::vector<bst_feature_t> features;
|
||||||
learner->CalcFeatureScore(importance, &features, &scores);
|
learner->CalcFeatureScore(importance, &features, &scores);
|
||||||
|
|
||||||
auto n_features = learner->GetNumFeature();
|
auto n_features = learner->GetNumFeature();
|
||||||
GenerateFeatureMap(learner, n_features, &feature_map);
|
GenerateFeatureMap(learner, custom_feature_names, n_features, &feature_map);
|
||||||
CHECK_LE(features.size(), n_features);
|
|
||||||
|
|
||||||
auto& feature_names = learner->GetThreadLocal().ret_vec_str;
|
auto& feature_names = learner->GetThreadLocal().ret_vec_str;
|
||||||
feature_names.resize(features.size());
|
feature_names.resize(features.size());
|
||||||
@ -1131,10 +1133,24 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle,
|
|||||||
feature_names[i] = feature_map.Name(features[i]);
|
feature_names[i] = feature_map.Name(features[i]);
|
||||||
feature_names_c[i] = feature_names[i].data();
|
feature_names_c[i] = feature_names[i].data();
|
||||||
}
|
}
|
||||||
|
*out_n_features = feature_names.size();
|
||||||
|
|
||||||
CHECK_EQ(scores.size(), features.size());
|
CHECK_LE(features.size(), scores.size());
|
||||||
CHECK_EQ(scores.size(), feature_names.size());
|
auto &shape = learner->GetThreadLocal().prediction_shape;
|
||||||
*out_length = scores.size();
|
if (scores.size() > features.size()) {
|
||||||
|
// Linear model multi-class model
|
||||||
|
CHECK_EQ(scores.size() % features.size(), 0ul);
|
||||||
|
auto n_classes = scores.size() / features.size();
|
||||||
|
*out_dim = 2;
|
||||||
|
shape = {n_features, n_classes};
|
||||||
|
} else {
|
||||||
|
CHECK_EQ(features.size(), scores.size());
|
||||||
|
*out_dim = 1;
|
||||||
|
shape.resize(1);
|
||||||
|
shape.front() = scores.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
*out_shape = dmlc::BeginPtr(shape);
|
||||||
*out_scores = scores.data();
|
*out_scores = scores.data();
|
||||||
*out_features = dmlc::BeginPtr(feature_names_c);
|
*out_features = dmlc::BeginPtr(feature_names_c);
|
||||||
API_END();
|
API_END();
|
||||||
|
|||||||
@ -194,8 +194,8 @@ inline FeatureMap LoadFeatureMap(std::string const& uri) {
|
|||||||
return feat;
|
return feat;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME(jiamingy): Use this for model dump.
|
|
||||||
inline void GenerateFeatureMap(Learner const *learner,
|
inline void GenerateFeatureMap(Learner const *learner,
|
||||||
|
std::vector<Json> const &custom_feature_names,
|
||||||
size_t n_features, FeatureMap *out_feature_map) {
|
size_t n_features, FeatureMap *out_feature_map) {
|
||||||
auto &feature_map = *out_feature_map;
|
auto &feature_map = *out_feature_map;
|
||||||
auto maybe = [&](std::vector<std::string> const &values, size_t i,
|
auto maybe = [&](std::vector<std::string> const &values, size_t i,
|
||||||
@ -205,15 +205,31 @@ inline void GenerateFeatureMap(Learner const *learner,
|
|||||||
if (feature_map.Size() == 0) {
|
if (feature_map.Size() == 0) {
|
||||||
// Use the feature names and types from booster.
|
// Use the feature names and types from booster.
|
||||||
std::vector<std::string> feature_names;
|
std::vector<std::string> feature_names;
|
||||||
learner->GetFeatureNames(&feature_names);
|
// priority:
|
||||||
|
// 1. feature map.
|
||||||
|
// 2. customized feature name.
|
||||||
|
// 3. from booster
|
||||||
|
// 4. default feature name.
|
||||||
|
if (!custom_feature_names.empty()) {
|
||||||
|
CHECK_EQ(custom_feature_names.size(), n_features)
|
||||||
|
<< "Incorrect number of feature names.";
|
||||||
|
feature_names.resize(custom_feature_names.size());
|
||||||
|
std::transform(custom_feature_names.begin(), custom_feature_names.end(),
|
||||||
|
feature_names.begin(),
|
||||||
|
[](Json const &name) { return get<String const>(name); });
|
||||||
|
} else {
|
||||||
|
learner->GetFeatureNames(&feature_names);
|
||||||
|
}
|
||||||
if (!feature_names.empty()) {
|
if (!feature_names.empty()) {
|
||||||
CHECK_EQ(feature_names.size(), n_features) << "Incorrect number of feature names.";
|
CHECK_EQ(feature_names.size(), n_features) << "Incorrect number of feature names.";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> feature_types;
|
std::vector<std::string> feature_types;
|
||||||
learner->GetFeatureTypes(&feature_types);
|
learner->GetFeatureTypes(&feature_types);
|
||||||
if (!feature_types.empty()) {
|
if (!feature_types.empty()) {
|
||||||
CHECK_EQ(feature_types.size(), n_features) << "Incorrect number of feature types.";
|
CHECK_EQ(feature_types.size(), n_features) << "Incorrect number of feature types.";
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < n_features; ++i) {
|
for (size_t i = 0; i < n_features; ++i) {
|
||||||
feature_map.PushBack(
|
feature_map.PushBack(
|
||||||
i,
|
i,
|
||||||
|
|||||||
@ -12,6 +12,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
#include "xgboost/gbm.h"
|
#include "xgboost/gbm.h"
|
||||||
#include "xgboost/json.h"
|
#include "xgboost/json.h"
|
||||||
@ -19,6 +20,7 @@
|
|||||||
#include "xgboost/linear_updater.h"
|
#include "xgboost/linear_updater.h"
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
#include "xgboost/learner.h"
|
#include "xgboost/learner.h"
|
||||||
|
#include "xgboost/linalg.h"
|
||||||
|
|
||||||
#include "gblinear_model.h"
|
#include "gblinear_model.h"
|
||||||
#include "../common/timer.h"
|
#include "../common/timer.h"
|
||||||
@ -219,6 +221,26 @@ class GBLinear : public GradientBooster {
|
|||||||
return model_.DumpModel(fmap, with_stats, format);
|
return model_.DumpModel(fmap, with_stats, format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FeatureScore(std::string const &importance_type,
|
||||||
|
std::vector<bst_feature_t> *out_features,
|
||||||
|
std::vector<float> *out_scores) const override {
|
||||||
|
CHECK(!model_.weight.empty()) << "Model is not initialized";
|
||||||
|
CHECK_EQ(importance_type, "weight")
|
||||||
|
<< "gblinear only has `weight` defined for feature importance.";
|
||||||
|
out_features->resize(this->learner_model_param_->num_feature, 0);
|
||||||
|
std::iota(out_features->begin(), out_features->end(), 0);
|
||||||
|
// Don't include the bias term in the feature importance scores
|
||||||
|
// The bias is the last weight
|
||||||
|
out_scores->resize(model_.weight.size() - learner_model_param_->num_output_group, 0);
|
||||||
|
auto n_groups = learner_model_param_->num_output_group;
|
||||||
|
MatrixView<float> scores{out_scores, {learner_model_param_->num_feature, n_groups}};
|
||||||
|
for (size_t i = 0; i < learner_model_param_->num_feature; ++i) {
|
||||||
|
for (bst_group_t g = 0; g < n_groups; ++g) {
|
||||||
|
scores(i, g) = model_[i][g];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool UseGPU() const override {
|
bool UseGPU() const override {
|
||||||
if (param_.updater == "gpu_coord_descent") {
|
if (param_.updater == "gpu_coord_descent") {
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -325,16 +325,19 @@ class GBTree : public GradientBooster {
|
|||||||
add_score([&](auto const &p_tree, bst_node_t, bst_feature_t split) {
|
add_score([&](auto const &p_tree, bst_node_t, bst_feature_t split) {
|
||||||
gain_map[split] = split_counts[split];
|
gain_map[split] = split_counts[split];
|
||||||
});
|
});
|
||||||
}
|
} else if (importance_type == "gain" || importance_type == "total_gain") {
|
||||||
if (importance_type == "gain" || importance_type == "total_gain") {
|
|
||||||
add_score([&](auto const &p_tree, bst_node_t nidx, bst_feature_t split) {
|
add_score([&](auto const &p_tree, bst_node_t nidx, bst_feature_t split) {
|
||||||
gain_map[split] += p_tree->Stat(nidx).loss_chg;
|
gain_map[split] += p_tree->Stat(nidx).loss_chg;
|
||||||
});
|
});
|
||||||
}
|
} else if (importance_type == "cover" || importance_type == "total_cover") {
|
||||||
if (importance_type == "cover" || importance_type == "total_cover") {
|
|
||||||
add_score([&](auto const &p_tree, bst_node_t nidx, bst_feature_t split) {
|
add_score([&](auto const &p_tree, bst_node_t nidx, bst_feature_t split) {
|
||||||
gain_map[split] += p_tree->Stat(nidx).sum_hess;
|
gain_map[split] += p_tree->Stat(nidx).sum_hess;
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
LOG(FATAL)
|
||||||
|
<< "Unknown feature importance type, expected one of: "
|
||||||
|
<< R"({"weight", "total_gain", "total_cover", "gain", "cover"}, got: )"
|
||||||
|
<< importance_type;
|
||||||
}
|
}
|
||||||
if (importance_type == "gain" || importance_type == "cover") {
|
if (importance_type == "gain" || importance_type == "cover") {
|
||||||
for (size_t i = 0; i < gain_map.size(); ++i) {
|
for (size_t i = 0; i < gain_map.size(); ++i) {
|
||||||
|
|||||||
@ -1197,23 +1197,6 @@ class LearnerImpl : public LearnerIO {
|
|||||||
std::vector<bst_feature_t> *features,
|
std::vector<bst_feature_t> *features,
|
||||||
std::vector<float> *scores) override {
|
std::vector<float> *scores) override {
|
||||||
this->Configure();
|
this->Configure();
|
||||||
std::vector<std::string> allowed_importance_type = {
|
|
||||||
"weight", "total_gain", "total_cover", "gain", "cover"
|
|
||||||
};
|
|
||||||
if (std::find(allowed_importance_type.begin(),
|
|
||||||
allowed_importance_type.end(),
|
|
||||||
importance_type) == allowed_importance_type.end()) {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << "importance_type mismatch, got: " << importance_type
|
|
||||||
<< "`, expected one of ";
|
|
||||||
for (size_t i = 0; i < allowed_importance_type.size(); ++i) {
|
|
||||||
ss << "`" << allowed_importance_type[i] << "`";
|
|
||||||
if (i != allowed_importance_type.size() - 1) {
|
|
||||||
ss << ", ";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG(FATAL) << ss.str();
|
|
||||||
}
|
|
||||||
gbm_->FeatureScore(importance_type, features, scores);
|
gbm_->FeatureScore(importance_type, features, scores);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -154,6 +154,9 @@ class TestBasic:
|
|||||||
dump4j = json.loads(dump4[0])
|
dump4j = json.loads(dump4[0])
|
||||||
assert 'gain' in dump4j, "Expected 'gain' to be dumped in JSON."
|
assert 'gain' in dump4j, "Expected 'gain' to be dumped in JSON."
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
bst.get_dump(fmap="foo")
|
||||||
|
|
||||||
def test_feature_score(self):
|
def test_feature_score(self):
|
||||||
rng = np.random.RandomState(0)
|
rng = np.random.RandomState(0)
|
||||||
data = rng.randn(100, 2)
|
data = rng.randn(100, 2)
|
||||||
|
|||||||
@ -211,6 +211,7 @@ def test_feature_importances_weight():
|
|||||||
digits = load_digits(n_class=2)
|
digits = load_digits(n_class=2)
|
||||||
y = digits['target']
|
y = digits['target']
|
||||||
X = digits['data']
|
X = digits['data']
|
||||||
|
|
||||||
xgb_model = xgb.XGBClassifier(random_state=0,
|
xgb_model = xgb.XGBClassifier(random_state=0,
|
||||||
tree_method="exact",
|
tree_method="exact",
|
||||||
learning_rate=0.1,
|
learning_rate=0.1,
|
||||||
@ -241,6 +242,33 @@ def test_feature_importances_weight():
|
|||||||
importance_type="weight").fit(X, y)
|
importance_type="weight").fit(X, y)
|
||||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
xgb_model.set_params(importance_type="foo")
|
||||||
|
xgb_model.feature_importances_
|
||||||
|
|
||||||
|
X, y = load_digits(n_class=3, return_X_y=True)
|
||||||
|
|
||||||
|
cls = xgb.XGBClassifier(booster="gblinear", n_estimators=4)
|
||||||
|
cls.fit(X, y)
|
||||||
|
assert cls.feature_importances_.shape[0] == X.shape[1]
|
||||||
|
assert cls.feature_importances_.shape[1] == 3
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
path = os.path.join(tmpdir, "model.json")
|
||||||
|
cls.save_model(path)
|
||||||
|
with open(path, "r") as fd:
|
||||||
|
model = json.load(fd)
|
||||||
|
weights = np.array(
|
||||||
|
model["learner"]["gradient_booster"]["model"]["weights"]
|
||||||
|
).reshape((cls.n_features_in_ + 1, 3))
|
||||||
|
weights = weights[:-1, ...]
|
||||||
|
np.testing.assert_allclose(
|
||||||
|
weights / weights.sum(), cls.feature_importances_, rtol=1e-6
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
cls.set_params(importance_type="cover")
|
||||||
|
cls.feature_importances_
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_feature_importances_gain():
|
def test_feature_importances_gain():
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user