diff --git a/Makefile b/Makefile index d9232b4d8..8bc136283 100644 --- a/Makefile +++ b/Makefile @@ -141,7 +141,7 @@ Rpack: clean_all R ?= R Rbuild: Rpack - $(R) CMD build --no-build-vignettes xgboost + $(R) CMD build xgboost rm -rf xgboost Rcheck: Rbuild diff --git a/R-package/R/xgb.ggplot.R b/R-package/R/xgb.ggplot.R index 339e0fac1..9f92c759b 100644 --- a/R-package/R/xgb.ggplot.R +++ b/R-package/R/xgb.ggplot.R @@ -118,8 +118,7 @@ xgb.ggplot.shap.summary <- function(data, shap_contrib = NULL, features = NULL, p_data <- prepare.ggplot.shap.data(data_list, normalize = TRUE) # Reverse factor levels so that the first level is at the top of the plot p_data[, "feature" := factor(feature, rev(levels(feature)))] - - p <- ggplot2::ggplot(p_data, ggplot2::aes(x = feature, y = shap_value, colour = feature_value)) + + p <- ggplot2::ggplot(p_data, ggplot2::aes(x = feature, y = p_data$shap_value, colour = p_data$feature_value)) + ggplot2::geom_jitter(alpha = 0.5, width = 0.1) + ggplot2::scale_colour_viridis_c(limits = c(-3, 3), option = "plasma", direction = -1) + ggplot2::geom_abline(slope = 0, intercept = 0, colour = "darkgrey") + diff --git a/R-package/R/xgb.plot.shap.R b/R-package/R/xgb.plot.shap.R index 70a357ee9..81a5b04c1 100644 --- a/R-package/R/xgb.plot.shap.R +++ b/R-package/R/xgb.plot.shap.R @@ -212,6 +212,9 @@ xgb.plot.shap.summary <- function(data, shap_contrib = NULL, features = NULL, to #' Prepare data for SHAP plots. To be used in xgb.plot.shap, xgb.plot.shap.summary, etc. #' Internal utility function. #' +#' @inheritParams xgb.plot.shap +#' @keywords internal +#' #' @return A list containing: 'data', a matrix containing sample observations #' and their feature values; 'shap_contrib', a matrix containing the SHAP contribution #' values for these observations. diff --git a/R-package/man/xgb.shap.data.Rd b/R-package/man/xgb.shap.data.Rd index 952445610..2f0e4adea 100644 --- a/R-package/man/xgb.shap.data.Rd +++ b/R-package/man/xgb.shap.data.Rd @@ -18,6 +18,31 @@ xgb.shap.data( max_observations = 1e+05 ) } +\arguments{ +\item{data}{data as a \code{matrix} or \code{dgCMatrix}.} + +\item{shap_contrib}{a matrix of SHAP contributions that was computed earlier for the above +\code{data}. When it is NULL, it is computed internally using \code{model} and \code{data}.} + +\item{features}{a vector of either column indices or of feature names to plot. When it is NULL, +feature importance is calculated, and \code{top_n} high ranked features are taken.} + +\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.} + +\item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib} +or \code{features} is missing.} + +\item{trees}{passed to \code{\link{xgb.importance}} when \code{features = NULL}.} + +\item{target_class}{is only relevant for multiclass models. When it is set to a 0-based class index, +only SHAP contributions for that specific class are used. +If it is not set, SHAP importances are averaged over all classes.} + +\item{approxcontrib}{passed to \code{\link{predict.xgb.Booster}} when \code{shap_contrib = NULL}.} + +\item{subsample}{a random fraction of data points to use for plotting. When it is NULL, +it is set so that up to 100K data points are used.} +} \value{ A list containing: 'data', a matrix containing sample observations and their feature values; 'shap_contrib', a matrix containing the SHAP contribution @@ -27,3 +52,4 @@ A list containing: 'data', a matrix containing sample observations Prepare data for SHAP plots. To be used in xgb.plot.shap, xgb.plot.shap.summary, etc. Internal utility function. } +\keyword{internal} diff --git a/rabit/include/rabit/internal/utils.h b/rabit/include/rabit/internal/utils.h index fd0b040bc..ad247fe99 100644 --- a/rabit/include/rabit/internal/utils.h +++ b/rabit/include/rabit/internal/utils.h @@ -69,10 +69,6 @@ inline bool StringToBool(const char* s) { return CompareStringsCaseInsensitive(s, "true") == 0 || atoi(s) != 0; } -inline void HandlePrint(const char *msg) { - printf("%s", msg); -} - /*! \brief printf, prints messages to the console */ inline void Printf(const char *fmt, ...) { std::string msg(kPrintBuffer, '\0'); @@ -80,7 +76,7 @@ inline void Printf(const char *fmt, ...) { va_start(args, fmt); vsnprintf(&msg[0], kPrintBuffer, fmt, args); va_end(args); - HandlePrint(msg.c_str()); + LOG(CONSOLE) << msg; } /*! \brief assert a condition is true, use this to handle debug information */ diff --git a/rabit/src/allreduce_base.cc b/rabit/src/allreduce_base.cc index 3cfd3008f..5c3bb363d 100644 --- a/rabit/src/allreduce_base.cc +++ b/rabit/src/allreduce_base.cc @@ -104,9 +104,7 @@ bool AllreduceBase::Init(int argc, char* argv[]) { } } if (dmlc_role != "worker") { - fprintf(stderr, "Rabit Module currently only work with dmlc worker"\ - ", quit this program by exit 0\n"); - exit(0); + LOG(FATAL) << "Rabit Module currently only work with dmlc worker"; } // clear the setting before start reconnection @@ -236,10 +234,10 @@ utils::TCPSocket AllreduceBase::ConnectTracker() const { do { if (!tracker.Connect(utils::SockAddr(tracker_uri.c_str(), tracker_port))) { if (++retry >= connect_retry) { - fprintf(stderr, "connect to (failed): [%s]\n", tracker_uri.c_str()); + LOG(WARNING) << "Connect to (failed): [" << tracker_uri << "]\n"; utils::Socket::Error("Connect"); } else { - fprintf(stderr, "retry connect to ip(retry time %d): [%s]\n", retry, tracker_uri.c_str()); + LOG(WARNING) << "Retry connect to ip(retry time " << retry << "): [" << tracker_uri << "]\n"; #if defined(_MSC_VER) || defined (__MINGW32__) Sleep(retry << 1); #else @@ -295,10 +293,11 @@ bool AllreduceBase::ReConnectLinks(const char *cmd) { "must keep rank to same if the node already have one"); rank = newrank; - // tracker got overwhelemed and not able to assign correct rank - if (rank == -1) exit(-1); + if (rank == -1) { + LOG(FATAL) << "tracker got overwhelemed and not able to assign correct rank"; + } - fprintf(stdout, "task %s got new rank %d\n", task_id.c_str(), rank); + LOG(CONSOLE) << "task " << task_id << " got new rank " << rank; Assert(tracker.RecvAll(&num_neighbors, sizeof(num_neighbors)) == \ sizeof(num_neighbors), "ReConnectLink failure 4"); @@ -424,7 +423,7 @@ bool AllreduceBase::ReConnectLinks(const char *cmd) { setsockopt(all_link.sock, IPPROTO_TCP, TCP_NODELAY, reinterpret_cast(&tcpNoDelay), sizeof(tcpNoDelay)); #else - fprintf(stderr, "tcp no delay is not implemented on non unix platforms\n"); + LOG(WARNING) << "tcp no delay is not implemented on non unix platforms"; #endif } if (tree_neighbors.count(all_link.rank) != 0) { @@ -444,7 +443,7 @@ bool AllreduceBase::ReConnectLinks(const char *cmd) { "cannot find next ring in the link"); return true; } catch (const std::exception& e) { - fprintf(stderr, "failed in ReconnectLink %s\n", e.what()); + LOG(WARNING) << "failed in ReconnectLink " << e.what(); return false; } } diff --git a/rabit/src/allreduce_base.h b/rabit/src/allreduce_base.h index 8f1b30490..2f0268a77 100644 --- a/rabit/src/allreduce_base.h +++ b/rabit/src/allreduce_base.h @@ -226,8 +226,7 @@ class AllreduceBase : public IEngine { */ inline void ReportStatus() const { if (hadoop_mode != 0) { - fprintf(stderr, "reporter:status:Rabit Phase[%03d] Operation %03d\n", - version_number, seq_counter); + LOG(CONSOLE) << "reporter:status:Rabit Phase[" << version_number << "] Operation " << seq_counter << "\n"; } }