本内容为【科研私家菜】R语言机器学习与临床预测模型系列课程
R小盐准备介绍R语言机器学习与预测模型的学习笔记
你想要的R语言学习资料都在这里, 快来收藏关注【科研私家菜】
01 Logistic 回归列线图
library(foreign)
library(rms)
mydata<-read.spss("lweight.sav")
mydata<-as.data.frame(mydata)
head(mydata)
mydata$low <- ifelse(mydata$low =="低出生体重",1,0)
mydata$race1 <- ifelse(mydata$race =="白种人",1,0)
mydata$race2 <- ifelse(mydata$race =="黑种人",1,0)
mydata$race3 <- ifelse(mydata$race =="其他种族",1,0)
attach(mydata)
#f<-glm(low ~ age+ftv+ht+lwt+ptl+smoke+ui+race2+race3,data=mydata,family = binomial())
#summary(f)
dd<-datadist(mydata)
options(datadist='dd')
fit1<-lrm(low~age+ftv+ht+lwt+ptl+smoke+ui+race2+race3,data=mydata,x=T,y=T)
fit1
summary(fit1)
nom1 <- nomogram(fit1, fun=plogis,fun.at=c(.001, .01, .05, seq(.1,.9, by=.1), .95, .99, .999),lp=F, funlabel="Low weight rate")
plot(nom1)
fit3<-lrm(low~ht+lwt+ptl+smoke+race,data=mydata,x=T,y=T)
fit3
summary(fit3)
nom3 <- nomogram(fit3, fun=plogis,fun.at=c(.001, .01, .05, seq(.1,.9, by=.1), .95, .99, .999),lp=F, funlabel="Low weight rate")
plot(nom3)
cal3 <- calibrate(fit3, cmethod='hare',method='boot', B=1000)
plot(cal3,xlim=c(0,1.0),ylim=c(0,1.0))
效果如下:
02 Cox 回归列线图
##
library(foreign)
library(survival)
library(rms)
pancer <- read.spss('R语言进阶-第9-13章/ch09/pancer.sav')
pancer <- as.data.frame(pancer)
head(pancer)
pancer$censor <- ifelse(pancer$censor=='死亡',1,0)
pancer$Gender <- as.factor(ifelse(pancer$sex=='男',"Male","Female"))
pancer$ch <- as.factor(ifelse(pancer$ch=='CH3', "ch","nonch"))
dd<-datadist(pancer)
options(datadist='dd')
coxm1 <- cph(Surv(time,censor==1)~age+Gender+trt+bui+ch+p+stage,x=T,y=T,data=pancer,surv=T)
coxm1
summary(coxm1)
surv <- Survival(coxm1)
surv1 <- function(x)surv(1*3,lp=x)
surv2 <- function(x)surv(1*6,lp=x)
surv3 <- function(x)surv(1*12,lp=x)
nom1<-nomogram(coxm1,fun=list(surv1,surv2,surv3),lp = F,
funlabel=c('3-Month Survival probability',
'6-Month survival probability',
'12-Month survival probability'),
maxscale=100,
fun.at=c('0.9','0.85','0.80','0.70','0.6','0.5','0.4','0.3','0.2','0.1'))
plot(nom1)
#plot(nomogram(coxm1,fun=list(surv1,surv2,surv3),lp= F,funlabel=c('3-Month Survival probability','6-Month survival probability','12-Month survival probability'),maxscale=100,fun.at=c('0.9','0.85','0.80','0.70','0.6','0.5','0.4','0.3','0.2','0.1')),xfrac=.30)
coxm2 <- cph(Surv(time,censor==1)~age+trt+bui+p+stage,x=T,y=T,data=pancer,surv=T)
coxm2
summary(coxm2)
surv <- Survival(coxm2)
surv1 <- function(x)surv(1*3,lp=x)
surv2 <- function(x)surv(1*6,lp=x)
surv3 <- function(x)surv(1*12,lp=x)
nom2<-nomogram(coxm2,fun=list(surv1,surv2,surv3),lp = F,
funlabel=c('3-Month Survival probability',
'6-Month survival probability',
'12-Month survival probability'),
maxscale=100,
fun.at=c('0.9','0.85','0.80','0.70','0.6','0.5','0.4','0.3','0.2','0.1'))
plot(nom2)
##
library(survival)
f<-coxph(Surv(time,censor==1)~age+Gender+trt+bui+ch+p+stage,data=pancer)
summary(f)
sum.surv<-summary(f)
c_index<-sum.surv$concordance
cal <- calibrate(coxm2, cmethod='KM', method='boot', u=3, m=20, B=1000)
plot(cal,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),xlim=c(0,1),ylim=c(0,1),xlab="Nomogram-Predicted Probabilityof 3 months OS",ylab="Actual 3 months OS (proportion)",col=c(rgb(192,98,83,maxColorValue=255)))
#lines(cal[,c("mean.predicted","KM")],type="b",lwd=2,col=c(rgb(192,98,83,maxColorValue=255)),pch=16)
#abline(0,1,lty=3,lwd=2,col=c(rgb(0,118,192,maxColorValue=255)))
效果如下:
重采样模型校正
calibrate(fit, ...)
重采样模型校正使用自举法或自举法来获得预测值与观测值的偏差校正(过度拟合校正)估计值,这些估计值基于将预测值细分为区间(用于生存模型)或非参数模型(用于其他模型)。有 Cox (cph)、参数生存模型(psm)、二元和顺序逻辑模型(lrm)和一般最小平方法模型(ols)的校正函数。对于生存模型,”预测”是指在单一时间点预测生存概率,”观察”是指相应的 Kaplan-Meier 生存估计,按预测生存时间间隔分层,或者,如果安装了 polspline 软件包,则预测生存概率是使用灵活的危险回归方法(详见 val.surv 函数)转换后的预测生存概率的函数。对于逻辑和线性模型,一个非参数校正曲线估计超过一系列的预测值。配合必须指定 x = TRUE,y = TRUE。Lrm 和 ols 模型的打印和绘图方法(使用 calibrate.default)打印预测中的平均绝对误差、均方差和绝对误差的0.9分位数。在这里,误差指的是预测值和相应的偏差校正值之间的差异。
calibrate(fit, ...)
## Default S3 method:
calibrate(fit, predy,
method=c("boot","crossvalidation",".632","randomization"),
B=40, bw=FALSE, rule=c("aic","p"),
type=c("residual","individual"),
sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, kint,
smoother="lowess", digits=NULL, ...)
## S3 method for class 'cph'
calibrate(fit, cmethod=c('hare', 'KM'),
method="boot", u, m=150, pred, cuts, B=40,
bw=FALSE, rule="aic", type="residual", sls=0.05, aics=0, force=NULL,
estimates=TRUE,
pr=FALSE, what="observed-predicted", tol=1e-12, maxdim=5, ...)
## S3 method for class 'psm'
calibrate(fit, cmethod=c('hare', 'KM'),
method="boot", u, m=150, pred, cuts, B=40,
bw=FALSE,rule="aic",
type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE,
pr=FALSE, what="observed-predicted", tol=1e-12, maxiter=15,
rel.tolerance=1e-5, maxdim=5, ...)
## S3 method for class 'calibrate'
print(x, B=Inf, ...)
## S3 method for class 'calibrate.default'
print(x, B=Inf, ...)
## S3 method for class 'calibrate'
plot(x, xlab, ylab, subtitles=TRUE, conf.int=TRUE,
cex.subtitles=.75, riskdist=TRUE, add=FALSE,
scat1d.opts=list(nhistSpike=200), par.corrected=NULL, ...)
## S3 method for class 'calibrate.default'
plot(x, xlab, ylab, xlim, ylim,
legend=TRUE, subtitles=TRUE, cex.subtitles=.75, riskdist=TRUE,
scat1d.opts=list(nhistSpike=200), ...)
关注R小盐,关注科研私家菜(VX_GZH: SciPrivate),有问题请联系R小盐。让我们一起来学习 R语言机器学习与临床预测模型