獨斷論

R을 이용한 Survival Analysis Tutorial 본문

과학과 기술/R 통계

R을 이용한 Survival Analysis Tutorial

부르칸 2023. 3. 25. 07:34
###
### Tutorial of survival analysis
###
##    subject time event
## 1        1    3     0
## 2        2    5     1
## 3        3    7     1
## 4        4    2     1
## 5        5   18     0
## 6        6   16     1
## 7        7    2     1
## 8        8    9     1
## 9        9   16     1
## 10      10    5     0
##
## where:
##  
##  subject is the individual’s identifier
##  time is the time to event (in years)2
##  event is the event status (0 = censored, 1 = event happened)


# library(survival)

## Example 1
# Data input
survDat = data.frame(
  sTime = c(3, 5, 7, 2, 18, 16, 2, 9, 16, 5), 
  sEvent = c(0, 1, 1, 1, 0, 1, 1, 1, 1, 0)
)

KM.results = survfit(Surv(sTime, sEvent) ~ 1, data = survDat)
summary(KM.results)
plot(KM.results, 
     xlab = "Time", 
     ylab = "Survival Probability", 
     conf.int = FALSE)
     

# Using the survminer package
library(survminer)
ggsurvplot(KM.results, 
           conf.int = FALSE, 
           surv.median.line = "hv", 
           legend = "none")
summary(KM.results)$table["median"]
KM.results


# Example 2
rm(list=ls())
library(KMsurv)
data(tongue)

head(tongue)
# type is the tumor DNA profile (1 = aneuploid tumor, 2 = diploid tumor)
# time is the time to death or on-study time (in weeks)
# delta is the death indicator (0 = alive, 1 = dead)

# only aneuploid type
aneuploid = subset(tongue, type == 1)
KM.results = survfit(Surv(time, delta) ~ 1, 
                     data = aneuploid, 
                     conf.type = "log-log")
KM.results

# plot
ggsurvplot(KM.results, 
           surv.median.line = "hv", 
           legend = "none")



##
##  Log-rank test
##
##    patient group time event
## 1        1     1  4.1     1
## 2        2     1  7.8     0
## 3        3     1 10.0     1
## 4        4     1 10.0     1
## 5        5     1 12.3     0
## 6        6     1 17.2     1
## 7        7     2  9.7     1
## 8        8     2 10.0     1
## 9        9     2 11.1     0
## 10      10     2 13.1     0
## 11      11     2 19.7     1
## 12      12     2 24.1     0
## 
## patient is the patient’s identifier
## group is the group (group 1 or 2)
## time is the time to death (in years)9
## event is the event status (0 = censored, 1 = death)

# data input
patientDat = data.frame(
  group = c(rep(1, 6), rep(2, 6)), 
  time = c(4.1, 7.8, 10, 10, 12.3, 17.2, 9.7, 10, 11.1, 13.1, 19.7, 24.1),
  event = c(1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0)
)

survdiff(Surv(time, event) ~ group, 
         data = patientDat)

fitResults = survfit(Surv(time, event) ~ group, data = patientDat)
ggsurvplot(fitResults, 
           pval = TRUE, 
           pval.method = TRUE)


#
# Example 2
#
# the data on the times until staphylococcus infection of burn patients, 
#    also available in the {KMsurv}

data(burn)
head(burn)

# Using the log-rank test, we want to test the hypothesis of difference 
#    in the time to staphylococcus infection (T3 variable) between patients 
#    whose burns were cared for with a routine bathing care method (Z1 = 0) 
#   versus those whose body cleansing was initially performed using 4% 
#    chlorhexidine gluconate (Z1 = 1). The event indicator is in variable D3.

fit = survfit(Surv(T3, D3) ~ Z1, data = burn)
ggsurvplot(fit, 
           pval = TRUE, 
           pval.method = TRUE)
Comments