title |
author |
date |
output |
r语言程序笔记 |
sjy |
2023-09-26 |
html_document |
# 常用r包
```{r}
library(openxlsx) #读写xlsx文件
library(tidyverse) #数据处理,绘图,批量化(map、pmap)
library(modelr) #辅助建模(resample.crossv_kfold,seq_range,data_grid,add_prediction)
library(broom) #模型信息提取(tidy,glance,augment)
#统计建模包汇总
library(gee) #广义估计方程r包(不能做无序多分类logistic)
library(VGAM) #vglm()gee无序多分类
library(MASS)#polr()有序logsitic回归
library(brant)#brant()检验有序logistic回归平行性
library(nnet)#multinom()无序多分类logsitic回归
library(survival) #coxph()cox回归建模
library(randomForestSRC) #rfsrc()随机生存森林
library(pROC) #roc()求roc所用数据
library(survivalROC) #survivalROC()求时间依赖roc所用数据
#统计绘图包
library(rms) #nomogram()列线图
library(riskRegression) #plotCalibration()校正曲线
library(pheatmap) #pheatmap()热图
library(RColorBrewer) #调色板
```
# 使用across函数批量更改变量类型
```{r}
#批量设置因子型/有序型/字符型/数值型变量
variables % mutate(across(any_of(variables),as.character))#factor可替换为ordered/as.character/as.numeric
#连续型变量转因子
#截断值用原数据集产生
#分位数切断
result1 %mutate(across(any_of(cols_to_cut), ~cut(.,
breaks = c(min(.,na.rm=T)-1,quantile(., probs = c(0.25, 0.5, 0.75,1),na.rm = TRUE)),labels=F), .names = "{.col}class4"))
#极差等分
result3 % mutate(across(any_of(cols_to_cut), ~cut(., breaks=c(min(.,na.rm=T)-1,min(.,na.rm=T)+diff(range(.,na.rm=T))/4,min(.,na.rm=T)+2*diff(range(.,na.rm=T))/4,min(.,na.rm=T)+3*diff(range(.,na.rm=T))/4,max(.,na.rm=T)),labels=F), .names = "{.col}classd4"))
#截断值用roc曲线求转二分类(截断值数据集thresholds见下文)
#结合for循环
for (nm in predvars){
result2 % mutate(across(
nm, ~cut(., breaks =c(min(.,na.rm=T)-1,thresholds[[match(nm, thresholds$predvar),1]],max(.,na.rm=T)),labels=F), .names = "{.col}class2"))}
```
# 批量计算新列
```{r}
#求日期间隔时间
for (i in 2:53){
diff_col_name 0.9){eGFR=141*(scr/0.9)**-1.209*0.993**年龄}
else if(性别==0&scr<=0.9){eGFR=141*(scr/0.9)**-0.411*0.993**年龄}
else if(性别==1&scr>0.7){eGFR=144*(scr/0.7)**-1.209*0.993**年龄}
else if(性别==1&scr<=0.7){eGFR=144*(scr/0.7)**-0.329*0.993**年龄}
else{eGFR=NA}
return(eGFR)
}
}
for(i in 1:53){
diff_col_name % select(sorted_colnames)
#进阶方法2
num_at_end % mutate(fz2=case_when(str_detect(value,"Scr")~1,
str_detect(value,"eGFR")~2,
str_detect(value,"年龄")~3,
str_detect(value,"时间")~4,))
zz % arrange(fz1, fz2)
sorted_colnames % select(sorted_colnames)
```
# map批量建模
```{r}
#批量正态性检验
result %>% select(all_of(cols_to_cut)) %>% map(~shapiro.test(.))
variables % mutate(models=map(train,~coxph(Surv(a46,a45) ~ a3+a6+a7+a8+a9+a10+a12+a13+a16,.x))) %>% mutate(results=map(models,augment))
```