This report aims to present the capabilities of the package DALEXtra
.
The document is a part of the paper “Landscape of R packages for eXplainable Machine Learning”, S. Maksymiuk, A. Gosiewska, and P. Biecek. (https://arxiv.org/abs/2009.13248). It contains a real life use-case with a hand of titanic_imputed data set described in Section Example gallery for XAI packages of the article.
We did our best to show the entire range of the implemented explanations. Please note that the examples may be incomplete. If you think something is missing, feel free to make a pull request at the GitHub repository MI2DataLab/XAI-tools.
The list of use-cases for all packages included in the article is here.
Load titanic_imputed
data set.
data(titanic_imputed, package = "DALEX")
head(titanic_imputed)
## gender age class embarked fare sibsp parch survived
## 1 male 42 3rd Southampton 7.11 0 0 0
## 2 male 13 3rd Southampton 20.05 0 2 0
## 3 male 16 3rd Southampton 20.05 1 1 0
## 4 female 39 3rd Southampton 20.05 1 1 1
## 5 female 16 3rd Southampton 7.13 0 0 1
## 6 male 25 3rd Southampton 7.13 0 0 1
Fit a forest type model and logistic regression to the titanic imputed data.
ranger_model <- ranger::ranger(survived~., data = titanic_imputed, classification = TRUE, probability = TRUE)
gbm_model <- gbm::gbm(survived~., data = titanic_imputed, distribution = "bernoulli")
library(DALEXtra)
explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model")
## Preparation of a new explainer is initiated
## -> model label : Ranger Model
## -> data : 2207 rows 8 cols
## -> target variable : 2207 values
## -> predict function : yhat.ranger will be used ( [33m default [39m )
## -> predicted values : numerical, min = 0.01559981 , mean = 0.322142 , max = 0.9888994
## -> model_info : package ranger , ver. 0.12.1 , task classification ( [33m default [39m )
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.7815942 , mean = 1.47828e-05 , max = 0.8801861
## [32m A new explainer has been created! [39m
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model")
## Preparation of a new explainer is initiated
## -> model label : GBM Model
## -> data : 2207 rows 8 cols
## -> target variable : 2207 values
## -> predict function : yhat.gbm will be used ( [33m default [39m )
## -> predicted values : numerical, min = 0.05005481 , mean = 0.3223963 , max = 0.9660827
## -> model_info : package gbm , ver. 2.1.8 , task classification ( [33m default [39m )
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.9660827 , mean = -0.0002395298 , max = 0.9470355
## [32m A new explainer has been created! [39m
fm <- funnel_measure(explainer_gbm, explainer_ranger, show_info = FALSE)
plot(fm)
## $`challanger_Ranger Model`
explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model", verbose = FALSE)
oc <- overall_comparison(explainer_gbm, explainer_ranger, type = "classification")
plot(oc)
## $radar_plot
##
## $accordance_plot
ind <- sample(1:nrow(titanic_imputed), 0.7*nrow(titanic_imputed))
train <- titanic_imputed[ind,]
test <- titanic_imputed[-ind,]
ranger_model <- ranger::ranger(survived~., data = train, classification = TRUE, probability = TRUE)
gbm_model <- gbm::gbm(survived~., data = train, distribution = "bernoulli")
explainer_ranger <- explain(ranger_model, data = test, y = test$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = test, y = test$survived, label = "GBM Model", verbose = FALSE)
tt <- training_test_comparison(explainer_gbm, explainer_ranger, training_data = train, training_y = train$survived)
plot(tt)
Compiled report can be seen under this link: https://mi2datalab.github.io/IML-tools/DALEXtra_champion_challenger.html.
explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model")
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model")
fm <- funnel_measure(explainer_gbm, explainer_ranger, show_info = FALSE)
explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model", verbose = FALSE)
oc <- overall_comparison(explainer_gbm, explainer_ranger, type = "classification")
ind <- sample(1:nrow(titanic_imputed), 0.7*nrow(titanic_imputed))
train <- titanic_imputed[ind,]
test <- titanic_imputed[-ind,]
ranger_model <- ranger::ranger(survived~., data = train, classification = TRUE, probability = TRUE)
gbm_model <- gbm::gbm(survived~., data = train, distribution = "bernoulli")
explainer_ranger <- explain(ranger_model, data = test, y = test$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = test, y = test$survived, label = "GBM Model", verbose = FALSE)
tt <- training_test_comparison(explainer_gbm, explainer_ranger, training_data = train, training_y = train$survived)
champion_challenger(list(fm, oc, tt), title = "DALEXtra: Champion - Challenger analysis", author = "Szymon Maksymiuk", model_performance_table = TRUE, output_name = "DALEXtra_champion_challenger")
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Polish_Poland.1250 LC_CTYPE=Polish_Poland.1250
## [3] LC_MONETARY=Polish_Poland.1250 LC_NUMERIC=C
## [5] LC_TIME=Polish_Poland.1250
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DALEXtra_2.0 DALEX_2.0.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.4.6 compiler_3.6.1 pillar_1.4.4 tools_3.6.1
## [5] digest_0.6.25 jsonlite_1.6.1 evaluate_0.14 lifecycle_0.2.0
## [9] tibble_3.0.1 gtable_0.3.0 lattice_0.20-40 pkgconfig_2.0.3
## [13] rlang_0.4.6 Matrix_1.2-18 ggrepel_0.8.2 yaml_2.2.1
## [17] gbm_2.1.8 xfun_0.12 gridExtra_2.3 ranger_0.12.1
## [21] stringr_1.4.0 dplyr_0.8.5 knitr_1.28 vctrs_0.3.1
## [25] grid_3.6.1 tidyselect_1.0.0 reticulate_1.14 glue_1.4.1
## [29] R6_2.4.1 survival_3.1-11 rmarkdown_2.1 hnp_1.2-6
## [33] auditor_1.2.1.0000 farver_2.0.3 ggplot2_3.3.2 purrr_0.3.4
## [37] magrittr_1.5 MASS_7.3-51.6 scales_1.1.1 htmltools_0.4.0
## [41] ellipsis_0.3.1 splines_3.6.1 assertthat_0.2.1 colorspace_1.4-1
## [45] labeling_0.3 stringi_1.4.6 munsell_0.5.0 crayon_1.3.4