This report aims to present the capabilities of the package DALEX.

The document is a part of the paper “Landscape of R packages for eXplainable Machine Learning”, S. Maksymiuk, A. Gosiewska, and P. Biecek. (https://arxiv.org/abs/2009.13248). It contains a real life use-case with a hand of titanic_imputed data set described in Section Example gallery for XAI packages of the article.

We did our best to show the entire range of the implemented explanations. Please note that the examples may be incomplete. If you think something is missing, feel free to make a pull request at the GitHub repository MI2DataLab/XAI-tools.

The list of use-cases for all packages included in the article is here.

Load titanic_imputed data set.

data(titanic_imputed, package = "DALEX")

head(titanic_imputed)
##   gender age class    embarked  fare sibsp parch survived
## 1   male  42   3rd Southampton  7.11     0     0        0
## 2   male  13   3rd Southampton 20.05     0     2        0
## 3   male  16   3rd Southampton 20.05     1     1        0
## 4 female  39   3rd Southampton 20.05     1     1        1
## 5 female  16   3rd Southampton  7.13     0     0        1
## 6   male  25   3rd Southampton  7.13     0     0        1
library(DALEX)

Fit a random forest to the titanic imputed data.

ranger_model <- ranger::ranger(survived~., data = titanic_imputed, classification = TRUE, probability = TRUE)
gbm_model <- gbm::gbm(survived~., data = titanic_imputed, distribution = "bernoulli")

Model parts

Variable importance

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model")
## Preparation of a new explainer is initiated
##   -> model label       :  Ranger Model 
##   -> data              :  2207  rows  8  cols 
##   -> target variable   :  2207  values 
##   -> predict function  :  yhat.ranger  will be used (  default  )
##   -> predicted values  :  numerical, min =  0.01633404 , mean =  0.3225624 , max =  0.988202  
##   -> model_info        :  package ranger , ver. 0.12.1 , task classification (  default  ) 
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -0.7818278 , mean =  -0.0004055999 , max =  0.8831279  
##   A new explainer has been created! 
fi_ranger <- model_parts(explainer_ranger)
plot(fi_ranger)

Model Profile

ALE Plot - One model

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

ale_ranger <- model_profile(explainer_ranger, variables = "fare", type = "accumulated")
plot(ale_ranger)

ALE Plot - Two models

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model", verbose = FALSE)

ale_ranger <- model_profile(explainer_ranger, variables = "fare", type = "accumulated")
ale_gbm <- model_profile(explainer_gbm, variables = "fare", type = "accumulated")
plot(ale_ranger$agr_profiles, ale_gbm$agr_profiles)

PDP Plot - One model

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

pdp_ranger <- model_profile(explainer_ranger, variables = "fare", type = "partial")
plot(pdp_ranger)

PDP Plot - Two models

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model", verbose = FALSE)

pdp_ranger <- model_profile(explainer_ranger, variables = "fare", type = "partial")
pdp_gbm <- model_profile(explainer_gbm, variables = "fare", type = "partial")
plot(pdp_ranger$agr_profiles, pdp_gbm$agr_profiles)

CDP Plot - One model

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

cdp_ranger <- model_profile(explainer_ranger, variables = "fare", type = "conditional")
plot(cdp_ranger)

CDP Plot - Two models

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model", verbose = FALSE)

cdp_ranger <- model_profile(explainer_ranger, variables = "fare", type = "conditional")
cdp_gbm <- model_profile(explainer_gbm, variables = "fare", type = "conditional")
plot(cdp_ranger$agr_profiles, cdp_gbm$agr_profiles)

Model Diagnostics

Residual plots - One model

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

md_ranger <- model_diagnostics(explainer_ranger)
plot(md_ranger)

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

md_ranger <- model_diagnostics(explainer_ranger)
plot(md_ranger, variable = "fare")

Residual plots - Two models

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model", verbose = FALSE)

md_ranger <- model_diagnostics(explainer_ranger)
md_gbm <- model_diagnostics(explainer_gbm)
plot(md_ranger, md_gbm)

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model", verbose = FALSE)

md_ranger <- model_diagnostics(explainer_ranger)
md_gbm <- model_diagnostics(explainer_gbm)
plot(md_ranger, md_gbm, variable = "fare")

Predict Parts

BreakDown

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

bd_ranger <- predict_parts(explainer_ranger, new_observation = titanic_imputed[1,])
plot(bd_ranger)

Oscillations

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

osc_ranger <- predict_parts(explainer_ranger, new_observation = titanic_imputed[1,], type = "oscillations")
plot(osc_ranger)

shap

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

shap_ranger <- predict_parts(explainer_ranger, new_observation = titanic_imputed[1,], type = "shap", B = 50)
plot(shap_ranger)

Predict Profile

Ceteris Paribus (ICE) - One model

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

cp_ranger <- predict_profile(explainer_ranger, new_observation = titanic_imputed[1,])
plot(cp_ranger)

Ceteris Paribus (ICE) - Two models

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)
explainer_gbm <- explain(gbm_model, data = titanic_imputed, y = titanic_imputed$survived, label = "GBM Model", verbose = FALSE)

cp_ranger <- predict_profile(explainer_ranger, new_observation = titanic_imputed[1,])
cp_gbm <- predict_profile(explainer_gbm, new_observation = titanic_imputed[1,])
plot(cp_ranger, cp_gbm)

Predict Diagnostics

Residual Diagnostics

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

rd_ranger <- predict_diagnostics(explainer_ranger, new_observation = titanic_imputed[1,], neighbors = 25)
plot(rd_ranger)

Stability plots

explainer_ranger <- explain(ranger_model, data = titanic_imputed, y = titanic_imputed$survived, label = "Ranger Model", verbose = FALSE)

sp_ranger <- predict_diagnostics(explainer_ranger, new_observation = titanic_imputed[1,], neighbors = 25, variables = c("fare", "age"))
plot(sp_ranger)

Session info

sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Polish_Poland.1250  LC_CTYPE=Polish_Poland.1250   
## [3] LC_MONETARY=Polish_Poland.1250 LC_NUMERIC=C                  
## [5] LC_TIME=Polish_Poland.1250    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] DALEX_2.0.1
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.4.6      gower_0.2.1       pillar_1.4.4      compiler_3.6.1   
##  [5] ingredients_2.0.1 tools_3.6.1       digest_0.6.25     nlme_3.1-140     
##  [9] evaluate_0.14     lifecycle_0.2.0   tibble_3.0.1      gtable_0.3.0     
## [13] lattice_0.20-40   mgcv_1.8-31       pkgconfig_2.0.3   rlang_0.4.6      
## [17] Matrix_1.2-18     yaml_2.2.1        gbm_2.1.8         xfun_0.12        
## [21] ranger_0.12.1     stringr_1.4.0     dplyr_0.8.5       knitr_1.28       
## [25] vctrs_0.3.1       grid_3.6.1        tidyselect_1.0.0  glue_1.4.1       
## [29] R6_2.4.1          survival_3.1-11   iBreakDown_1.3.1  rmarkdown_2.1    
## [33] farver_2.0.3      ggplot2_3.3.2     purrr_0.3.4       magrittr_1.5     
## [37] splines_3.6.1     scales_1.1.1      ellipsis_0.3.1    htmltools_0.4.0  
## [41] assertthat_0.2.1  colorspace_1.4-1  labeling_0.3      stringi_1.4.6    
## [45] munsell_0.5.0     crayon_1.3.4