Inquiries regarding this code contact gabriel.carrasco@upch.pe

rm(list=ls())
library(tidyverse); library(epiDisplay); library(GGally); library(haven); library(factoextra); library(reshape2); library(mctest)

Using PCA to detect collinearity

head(dat)
## # A tibble: 6 x 14
##     AGE SHOCKTYP   SBP   MAP   DBP CARDIND APPTIME   MCT    UO   HGB   HCT
##   <dbl>    <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1    68        2   114    88    73    0.66    11.5  22.5   110  11.3    34
## 2    37        2   149   115    97    3.55     8.2  15.6    40  12.7    39
## 3    50        2   146   101    74    4.05     5.6  12.5     0  13.4    41
## 4    53        2   107    83    70    0.95     6.4  38       0  15.5    46
## 5    75        2   141    65    82    1.9     12.6  29.7    42  13.7    42
## 6    66        2   114    59    44    3.48     9    16.8     0   9.3    28
## # … with 3 more variables: LCARDIND <dbl>, LAPPTIME <dbl>, LMCT <dbl>
res.pca <- dat %>% dplyr::select(SBP,MAP,DBP,LCARDIND,LAPPTIME,LMCT,UO,HGB,HCT) %>%
  princomp(cor = TRUE, scores = TRUE)
summary(res.pca)
## Importance of components:
##                           Comp.1    Comp.2    Comp.3    Comp.4    Comp.5
## Standard deviation     1.8301235 1.7168993 0.9822378 0.9650414 0.6997882
## Proportion of Variance 0.3721502 0.3275270 0.1071990 0.1034783 0.0544115
## Cumulative Proportion  0.3721502 0.6996773 0.8068763 0.9103546 0.9647661
##                            Comp.6     Comp.7      Comp.8      Comp.9
## Standard deviation     0.39435469 0.33144463 0.165823143 0.155680978
## Proportion of Variance 0.01727951 0.01220617 0.003055257 0.002692952
## Cumulative Proportion  0.98204562 0.99425179 0.997307048 1.000000000
eig <- (res.pca$sdev)^2
(CN <- sqrt(eig[1]/eig[length(eig)])) # Not close to rule-of-thumb.
##  Comp.1 
## 11.7556
# However, CN may not be a sensitive measure of collinearity. Certainly, if CN ≥ 30 , you should be concerned. 
# But CN < 30 does not mean you can stop assessing collinearity. Tolerance is more useful measure.

model<-lm(AGE ~ SBP + MAP + DBP + LCARDIND + LAPPTIME + LMCT + UO + HGB + HCT, data = dat)
summary(model)
## 
## Call:
## lm(formula = AGE ~ SBP + MAP + DBP + LCARDIND + LAPPTIME + LMCT + 
##     UO + HGB + HCT, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.5505  -9.1156   0.4135  10.0885  27.4634 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  7.63235   17.52191   0.436   0.6640   
## SBP          0.17712    0.11044   1.604   0.1118   
## MAP          0.08282    0.30236   0.274   0.7847   
## DBP         -0.53682    0.28279  -1.898   0.0605 . 
## LCARDIND    13.51371    7.12484   1.897   0.0607 . 
## LAPPTIME     7.79520   12.04777   0.647   0.5191   
## LMCT        37.79368   16.64552   2.271   0.0253 * 
## UO          -0.03575    0.01224  -2.920   0.0043 **
## HGB          3.43455    2.22755   1.542   0.1262   
## HCT         -1.30397    0.71748  -1.817   0.0721 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.92 on 103 degrees of freedom
## Multiple R-squared:  0.3532, Adjusted R-squared:  0.2967 
## F-statistic:  6.25 on 9 and 103 DF,  p-value: 5.212e-07
imcdiag(model$model,model$model$AGE)$idiags
##                VIF        TOL         Wi         Fi    Leamer CVIF Klein
## AGE       1.546103 0.64678755   6.249843   7.099336 0.8042310    0     0
## SBP       6.804409 0.14696353  66.428239  75.457320 0.3833582    0     0
## MAP      25.609977 0.03904728 281.647514 319.929701 0.1976039    0     0
## DBP      16.449463 0.06079226 176.810523 200.843022 0.2465609    0     0
## LCARDIND  2.255216 0.44341649  14.365250  16.317809 0.6658953    0     0
## LAPPTIME  4.040864 0.24747181  34.801003  39.531236 0.4974654    0     0
## LMCT      6.201536 0.16125037  59.528691  67.619969 0.4015599    0     0
## UO        1.183309 0.84508775   2.097871   2.383018 0.9192865    0     0
## HGB      18.918216 0.05285911 205.064031 232.936812 0.2299111    0     0
## HCT      18.714654 0.05343406 202.734368 230.290496 0.2311581    0     0
imcdiag(model$model,model$model$AGE)$idiags[,2]<0.1 # TOLj < 0.1 -> collinearity
##      AGE      SBP      MAP      DBP LCARDIND LAPPTIME     LMCT       UO 
##    FALSE    FALSE     TRUE     TRUE    FALSE    FALSE    FALSE    FALSE 
##      HGB      HCT 
##     TRUE     TRUE

Using PCA to identify outliers

fviz_pca_ind(res.pca,
             col.ind = "cos2", # Color by the quality of representation
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE)     # Avoid text overlapping