代码拉取完成,页面将自动刷新
library(openxlsx)
library(tidyverse)
library(randomForestSRC) #随机生存森林
library(survival) #cox回归建模
library(ggplot2)
library(rms) #列线图
library(riskRegression) #预测集校准曲线
library(modelr)
library(purrr)
library(broom)
library(survivalROC)
coxdata <- readRDS("C:/Users/99405/Desktop/work/公司/rapp/predict/data/coxdata.rds")
coxdata2 <- readRDS("C:/Users/99405/Desktop/work/公司/rapp/predict/data/coxdata2.rds")
train_data2 <-train_data2 %>% mutate(class=1)
validation_data2 <-validation_data2 %>% mutate(class=2)
recoxdata <- bind_rows(train_data2,validation_data2)
write.xlsx(recoxdata,"C:/Users/99405/Desktop/work/公司/recoxdata.xlsx")
#注意生存分析的死亡结局变量应该为数值型
glance(model2)
coxdata <- read.xlsx("C:/Users/Administrator/Desktop/coxdatar.xlsx")
#批量设置因子型变量
variables <-c("a2","a6","a14","a15","a18","a19","a20","a21","a22","a23","a27","a29","a31","a33","a35","a37","a40","a42","a44","a47","a49","a51")
coxdata <- coxdata %>% mutate(across(any_of(variables),factor))
#批量设置有序因子变量
variables <-c("a11","a13","a17")
coxdata <- coxdata %>% mutate(across(any_of(variables),ordered))
#批量设置有序因子变量
variables <-c("a11","a12","a13","a17")
coxdata <- coxdata %>% mutate(across(any_of(variables),ordered))
#批量设置数值变量
variables <-c("a12")
coxdata <- coxdata %>% mutate(a12=numeric(a12))
coxdata <- coxdata %>% mutate(across(a45,factor))
#提取分析数据集
coxdata1 <- coxdata
coxdata2 <- coxdata %>% filter(a24 >= 15)
coxdata3 <- coxdata2 %>% filter(a24 >= 30)
#变量筛选
#单因素回归
coxdata2 <- coxdata2 %>%rename(sex=a2,age=a3,sbp=a4,bmi=a5, etiology=a6, Hb=a7,Alb=a8,
Cr=a9,eGFR=a10,CKD_category=a12,dip_stick_proteinuria=a13,urinary_occult_blood=a15,UPCR=a16,hypertension=a18,CVD=a19,
diabetes=a20,use_RAASi=a21,use_CCB=a22,use_diuretics=a23,
event=a45,time=a46)
#随机生存森林
cc <- rfsrc(Surv(time,event) ~ sex+age+sbp+bmi+etiology+Hb+Alb+Cr+eGFR+CKD_category+dip_stick_proteinuria+urinary_occult_blood+UPCR+hypertension+CVD+diabetes+use_RAASi+use_CCB+use_diuretics,data = coxdata2,
ntree=300,
mtry=7,
nodesiza=15,
samptype="swor",
sampize=550,
block.size = 10,
importance="random",
splitrule="logrank",
nsplit=10
)
#查看模型信息
print(cc)
#查看模型拟合情况和变量重要性
plot(cc)
#变量重要性排序
print(sort(cc$importance,decreasing = TRUE))
#自动选择重要变量
selectvar <- var.select(object=cc)
#随机森林模型进行预测
rfpredicted <- predict(cc, newdata = validation_data, type = "response")
rfpredicted <- predict(cc, newdata = validation_data, type = "survival")
validation_data$a46
rfpredicted[["survival"]]
print(rfpredicted )
#划分coxdata为训练集和测试集
set.seed(123)
validation_data2 <- coxdata2 %>% slice_sample(prop=0.3)
train_data2 <- coxdata2 %>% anti_join(validation_data2)
set.seed(123)
validation_data3 <- coxdata3 %>% slice_sample(prop=0.3)
train_data3 <- coxdata3 %>% anti_join(validation_data3)
#cox多因素回归建模
model2 <- coxph(formula=Surv(a46,a45) ~ a3+a6+a7+a8+a9+a10+a12+a13+a16, data = train_data2,x = TRUE)
model3 <- coxph(formula=Surv(a46,a45) ~ a3+a6+a7+a8+a9+a10+a12+a13+a16, data = train_data3,x = TRUE)
#预测生存率
validation_data1$predicted_survival1 <- predict(model1, newdata = validation_data1, type = "survival")
validation_data2$predicted_survival2 <- predict(model2, newdata = validation_data2, type = "survival")
validation_data3$predicted_survival3 <- predict(model3, newdata = validation_data3, type = "survival")
# 计算训练集和测试集的C-index
c_statistic2 <- concordance(model2, newdata=validation_data2)
c_statistic2
#求疾病预测概率
validation_data1$diseasepred1 <- 1-validation_data1$predicted_survival1
validation_data2$diseasepred2 <- 1-validation_data2$predicted_survival2
validation_data3$diseasepred3 <- 1-validation_data3$predicted_survival3
#绘制model2列线图
train_data2 <- train_data2 %>%rename(age=a3, etiology=a6, Hb=a7,Alb=a8,
Cr=a9,eGFR=a10,CKD_category=a12,dip_stick_proteinuria=a13,UPCR=a16,
event=a45,time=a46)
dd<-datadist(train_data2)
options(datadist="dd")
options(na.action="na.delete")
model2 <- cph(formula=Surv(time,event) ~ age+etiology+Hb+Alb+Cr+eGFR+CKD_category+dip_stick_proteinuria+UPCR, data = train_data2,x=T,y=T,surv=T)
print(model2)
surv<-Survival(model2)
surv3<-function(x) surv(15,x)
surv4<-function(x) surv(30,x)
x<-nomogram(model2,fun = list(surv3,surv4),lp=T,
funlabel = c('15-month survival Probability','30-month survival Probability'),
maxscale = 100,fun.at = c(0.95,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1))
pdf("nomogram_classical2.pdf",width = 18,height = 10)
plot(x, lplabel="Linear Predictor",
xfrac=.35,varname.label=TRUE, varname.label.sep="=", ia.space=.2,
tck=NA, tcl=-0.20, lmgp=0.3,
points.label='Points', total.points.label='Total Points',
total.sep.page=FALSE,
cap.labels=FALSE,cex.var = 1.6,cex.axis = 1.05,lwd=5,
label.every = 1,col.grid = gray(c(0.8, 0.95)))
dev.off()
#绘制校准曲线
#方法1 预测生存率(暂未找到画测试集校准曲线的方法)
f5<-cph(formula=Surv(a46,a45) ~ a3+a6+a7+a8+a9+a10+a12+a13+a16, data = train_data2,x=T,y=T,surv=T,time.inc = 12)
#u表示预测时间应该和模型参数time.inc保持一致,参数m=50表示每组50个样本进行重复计算
cal5<-calibrate(f5, cmethod="KM", method="boot",u=12,m=50,B=1000)
plot(cal5)
f6<-cph(formula=Surv(a46,a45) ~ a3+a6+a7+a8+a9+a10+a12+a13+a16, data = train_data2,x=T,y=T,surv=T,time.inc = 12)
cal5<-calibrate(f6, cmethod="KM", method="boot",u=24,m=50,B=1000)
plot(cal5)
#pdf("calibration_5y.pdf",width = 8,height = 8) 导出pdf时使用
plot(cal5,
lwd = 2,#error bar的粗细
lty = 1,#error bar的类型,可以是0-6
errbar.col = c("#2166AC"),#error bar的颜色
xlim = c(0,1),ylim= c(0,1),
xlab = "Nomogram-prediced OS (%)",ylab = "Observed OS (%)",
cex.lab=1.2, cex.axis=1, cex.main=1.2, cex.sub=0.6) #字的大小
lines(cal5[,c('mean.predicted',"KM")],
type = 'b', #连线的类型,可以是"p","b","o"
lwd = 2, #连线的粗细
pch = 16, #点的形状,可以是0-20
col = c("#2166AC")) #连线的颜色
mtext("")
box(lwd = 1) #边框粗细
abline(0,1,lty = 3, #对角线为虚线
lwd = 2, #对角线的粗细
col = c("#224444")#对角线的颜色
)
#dev.off() 导出pdf时使用
cox_score <- Score(list("fit1" = f5),
formula=Surv(a46,a45)~1,
data=validation_data2,
times =15,
plots="cal"
)
plotCalibration(cox_score)
#方法2 预测死亡率
#model2 时间点1
set.seed(1)
cox_fit_t <- Score(list("fit1" = model2),
formula = Surv(a46, a45) ~ 1,
data = validation_data2, # 这里写测试集即可
plots = "calibration",
conf.int = T,
B = 500, # 重抽样500次
M = 50,
times=c(12) #可绘制某个时点的预测情况
)
# 画图即可
plotCalibration(cox_fit_t,
cens.method="local",
xlab = "1-year Predicted Risk",
ylab = "Observerd RISK")
#model2 时间点2
set.seed(1)
cox_fit_t <- Score(list("fit1" = model2),
formula = Surv(a46, a45) ~ 1,
data = validation_data2, # 这里写测试集即可
plots = "calibration",
conf.int = T,
B = 500, # 重抽样500次
M = 50,
times=c(24) #可绘制某个时点的预测情况
)
# 画图即可
plotCalibration(cox_fit_t,
cens.method="local",
xlab = "2-year Predicted Risk",
ylab = "Observerd RISK")
#model2 时间点3
set.seed(1)
cox_fit_t <- Score(list("fit1" = model2),
formula = Surv(a46, a45) ~ 1,
data = validation_data2, # 这里写测试集即可
plots = "calibration",
conf.int = T,
B = 500, # 重抽样500次
M = 50,
times=c(36) #可绘制某个时点的预测情况
)
# 画图即可
plotCalibration(cox_fit_t,
cens.method="local",
xlab = "3-year Predicted Risk",
ylab = "Observerd RISK")
#方法3 手绘版校准曲线(不能预测时间点)
#model2
#概率排序
revalidation_data2 <- validation_data2[order(validation_data2[["diseasepred2"]]), ]
# 划分分组,这里将数据分成10个桶
num_buckets <- 10
bucket_size <- nrow(validation_data2) / num_buckets
# 初始化存储观察概率的向量
observed_probabilities <- numeric(num_buckets)
mean_predicted_probabilities <- numeric(num_buckets)
# 计算每个桶的观察概率
for (i in 1:num_buckets) {
start_idx <- (i - 1) * bucket_size + 1
end_idx <- i * bucket_size
bucket_data <- revalidation_data2[start_idx:end_idx, ]
# 计算每个桶内事件发生的比例(Kaplan-Meier估计)
observed_prob <- sum(bucket_data$a45) / nrow(bucket_data)
observed_probabilities[i] <- observed_prob
# 计算每个桶内的平均预测概率
mean_predicted_prob <- mean(bucket_data[["diseasepred2"]])
mean_predicted_probabilities[i] <- mean_predicted_prob
}
# 绘制校准曲线
ggplot() +
geom_point(aes(x = mean_predicted_probabilities, y = observed_probabilities)) +
geom_line(aes(x = mean_predicted_probabilities, y = observed_probabilities)) + # 添加校准曲线
geom_abline(intercept = 0, slope = 1, color = "red") + # 添加对角线
xlab("Predicted Survival Probability(model2)") +
ylab("Observed Event Probability")
# 单图多曲线
#评价两个模型的校准效果(上述方法2的拓展)
cc1 <-cph(formula=Surv(a46,a45) ~ a3+a6+a7+a8+a9+a10+a12+a13+a16, data = train_data2,x=T,y=T,surv=T)
cc2 <-cph(formula=Surv(a46,a45) ~ a3+a6+a7+a8+a9+a10+a12+a13+a16, data = train_data2,x=T,y=T,surv=T)
cox_score <- Score(list("Cox1"=cc1,
"Cox2"=cc2),
formula=Surv(a46,a45)~1,
data=validation_data2,
times=c(15),
plots="cal"
)
plotCalibration(cox_score)
#roc曲线
par(mar= c(5,5,1,1),cex.lab=1.2,cex.axis= 1.2)
validation_data2$pred <- predict(model2,validation_data2,type="lp")
#model2 roc曲线
sROC=survivalROC(Stime=validation_data2$a46, status=validation_data2$a45, marker = validation_data2$pred, predict.time =12, method="KM")
plot(sROC$FP, sROC$TP, type="l", xlim=c(0,1), ylim=c(0,1),col="red",
xlab="False positive rate", ylab="True positive rate",
lwd = 2, cex.main=1.3, cex.lab=1.5, cex.axis=1.2, font=1.2)
abline(0,1)
aucText=paste0("1 years"," (AUC=",sprintf("%.3f",sROC$AUC),")") #这个后面添加legend用
#再加一个3年的线
sROC3=survivalROC(Stime=validation_data2$a46, status=validation_data2$a45, marker = validation_data2$pred, predict.time =24, method="KM")
lines(sROC3$FP, sROC3$TP, type="l", xlim=c(0,1), ylim=c(0,1),col="green",lwd = 2)
aucText3=paste0("2 years"," (AUC=",sprintf("%.3f",sROC3$AUC),")") #这个后面添加legend用
#再加一个5年的线
sROC5=survivalROC(Stime=validation_data2$a46, status=validation_data2$a45, marker = validation_data2$pred, predict.time =36, method="KM")
lines(sROC5$FP, sROC5$TP, type="l", xlim=c(0,1), ylim=c(0,1),col="blue",lwd = 2)
aucText5=paste0("3 years"," (AUC=",sprintf("%.3f",sROC5$AUC),")") #这个后面添加legend用
#添加legend
legend("bottomright", c(aucText,aucText3,aucText5),
lwd=2,bty="n",col=c("red","green","blue"),cex=1.2)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。