Inquiries regarding this code contact gabriel.carrasco@upch.pe
rm(list=ls())
library(tidyverse)
head(dat)
## SEX RESP1 PERFST CS2 SURVSTAT AGE HISTDX1 CLNSTG2 SURVTIME TRTGRP
## 1 0 2 100 1 3 49.2 1 0 8.45722 0
## 2 1 1 100 1 3 12.3 1 0 1.01848 1
## 3 1 2 30 1 3 62.4 1 0 0.70363 1
## 4 0 1 100 1 1 43.7 1 1 8.30116 1
## 5 1 1 70 1 3 49.8 1 1 3.27447 1
## 6 0 2 80 1 3 69.2 1 0 3.74538 1
## MAXSIZE RELSTAT RFS K67 PSGRP TMRSIZE
## 1 2 3 86.68639 13 1 0
## 2 10 3 10.55227 33 1 1
## 3 2 3 5.45694 76 0 0
## 4 15 3 15.87771 51 1 1
## 5 2 3 8.67850 54 0 0
## 6 1 3 10.38790 34 1 0
full_model<-lm(K67 ~ SEX + AGE + CS2 + CLNSTG2 + PSGRP + TMRSIZE, data = dat,na.action=na.exclude)
null_model<-lm(K67 ~ 1, data = full_model$model,na.action=na.exclude)
anova(null_model,full_model) # Anova of entire model SS(model)=3690, df(Model)=6, MSR=SS/df=615
## Analysis of Variance Table
##
## Model 1: K67 ~ 1
## Model 2: K67 ~ SEX + AGE + CS2 + CLNSTG2 + PSGRP + TMRSIZE
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 102 43060
## 2 96 39370 6 3690 1.4996 0.1865
summary(aov(full_model)) # Anova of each term, SS(error)=39370, df(error)=96, MSE=410.1
## Df Sum Sq Mean Sq F value Pr(>F)
## SEX 1 1774 1774.4 4.327 0.0402 *
## AGE 1 0 0.5 0.001 0.9723
## CS2 1 1248 1248.4 3.044 0.0842 .
## CLNSTG2 1 1 0.6 0.001 0.9699
## PSGRP 1 415 415.5 1.013 0.3167
## TMRSIZE 1 251 250.6 0.611 0.4363
## Residuals 96 39370 410.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
(Fs<-615/410.1) # F=MSR/MSE
## [1] 1.499634
(PrF <- 1-pf(Fs, df1=6, df2=96)) #df1=df(Model), df2=df(error)
## [1] 0.1864902
summary(full_model)
##
## Call:
## lm(formula = K67 ~ SEX + AGE + CS2 + CLNSTG2 + PSGRP + TMRSIZE,
## data = dat, na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.289 -14.812 -0.961 12.851 50.133
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.71803 7.16908 5.819 7.79e-08 ***
## SEX 8.72413 4.17346 2.090 0.0392 *
## AGE -0.02403 0.10264 -0.234 0.8154
## CS2 -6.06455 4.53965 -1.336 0.1847
## CLNSTG2 1.15575 5.07790 0.228 0.8204
## PSGRP -4.19745 4.21521 -0.996 0.3219
## TMRSIZE 3.87147 4.95254 0.782 0.4363
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.25 on 96 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.0857, Adjusted R-squared: 0.02855
## F-statistic: 1.5 on 6 and 96 DF, p-value: 0.1865
inspect <- dat %>% mutate(predicted=predict(full_model),
ord_res=residuals(full_model),
stu_res=rstandard(full_model),
jkn_res=rstudent(full_model)) %>%
dplyr::select(K67,predicted,ord_res,stu_res,jkn_res)
inspect %>% ggplot(aes(ord_res)) +
geom_histogram(aes(y = ..density..), bins = 8, fill="white", col="black") +
stat_function(fun = dnorm, col="red", linetype="dashed", args = list(mean = mean(inspect$ord_res,na.rm = T), sd = sd(inspect$ord_res,na.rm = T))) +
theme_classic()
inspect %>% ggplot(aes(sample = ord_res)) + stat_qq() + stat_qq_line(col="red", linetype="dashed") + theme_classic()
inspect %>% ggplot(aes(predicted,ord_res)) +
geom_point(size=2) +
geom_hline(yintercept = 0, col="red", linetype="dashed") +
theme_classic()
inspect %>% ggplot(aes(predicted,stu_res)) +
geom_point(size=2) +
geom_hline(yintercept = 0, col="red", linetype="dashed") +
theme_classic()
# pattern scattered around 0; no obvious departures from model assumptions. No Studentized residuals are <-3 or >+3
inspect %>% ggplot(aes(predicted,jkn_res)) +
geom_point(size=2) +
geom_hline(yintercept = 0, col="red", linetype="dashed") +
theme_classic()
# No extreme observations noted