1 Star 1 Fork 0

雨天隐形人/Medical related R language procedures

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
生信分析代码整理.R 21.31 KB
一键复制 编辑 原始数据 按行查看 历史
shanjiayu1 提交于 2024-01-25 16:12 . 24.1.25
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757
#下载R包如以下载可跳过这部分
rm(list = ls())
options()$repos
options()$BioC_mirror
options(BioC_mirror="http://mirrors.tuna.tsinghua.edu.cn/bioconductor/")
options("repos" = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
install.packages("devtools")
install.packages("htmltools")
install.packages('WGCNA')
install.packages(c("FactoMineR", "factoextra"))
install.packages(c("pheatmap","ggpubr"))
BiocManager::install(c("GSEABase","GSVA","clusterProfiler" ),ask = F,update = F)
BiocManager::install(c("GEOquery","limma","impute" ),ask = F,update = F)
BiocManager::install(c("org.Hs.eg.db","hgu133plus2.db" ),ask = F,update = F)
BiocManager::install("preprocessCore")
devtools::install_github("yanlinlin82/ggvenn")
install_github("jmzeng1314/idmap1")
install_github("jmzeng1314/idmap2")
install_github("jmzeng1314/idmap3")
# 所需r包
library(tidyverse) #数据处理
library(openxlsx) #xlsx格式问价读取和导出
library(GEOquery) #获取geo数据集
#基因探针号匹配通用基因id
library(idmap1) #基因id库1
library(idmap2) #基因id库2
library(idmap3) #基因id库3
library(limma) #差异性分析
library(WGCNA) #WGCNA求基因模块
library(GSEABase)#获取样本信息
library(GSVA)
library(clusterProfiler)
library(ggpubr)
library(ggplot2) #绘图
library(pheatmap) #热图
library(ggvenn) #韦恩图
library(ggrepel) # 标签相关
library(stringr) # 标签换行
library(clusterProfiler)
library(AnnotationDbi)
library(org.Hs.eg.db) # 参考物种的基因注释数据库hg19
library(DOSE)
# 获得数据集并贴基因id -------------------------------------------------------------------
gset1 <- getGEO('GSE32549', destdir=".",
AnnotGPL = F,
getGPL = F) #填写数据集的geo号GSE32549
gset2 <- getGEO('GSE128959', destdir=".",
AnnotGPL = F,
getGPL = F)
gset3 <- getGEO('GSE185264', destdir=".",
AnnotGPL = F,
getGPL = F)
b1 = gset1[[2]]
b2 = gset2[[1]]
b3 = gset3[[1]]
a1=exprs(b1)
raw_exprSet=a1
a2=exprs(b2)
raw_exprSet=a2
a3=exprs(b3)
raw_exprSet=a3
# ID转化
ids1=get_soft_IDs('gpl6947') #填写数据集1的基因平台号
ids2=get_soft_IDs('gpl6244')
ids3=get_pipe_IDs('gpl30818')
exprSet1=a1
exprSet2=a2
exprSet3=a3
exprSet1=exprSet1[rownames(exprSet1) %in% ids1$ID,]
ids1=ids1[match(rownames(exprSet1),ids1$ID),]
exprSet2=exprSet2[rownames(exprSet2) %in% ids2$ID,]
ids2=ids2[match(rownames(exprSet2),ids2$ID),]
kelly <- function(exprSet,ids){
tmp = by(exprSet,ids$symbol,
function(x) rownames(x)[which.max(rowMeans(x))] )
probes = as.character(tmp)
print(dim(exprSet))
exprSet=exprSet[rownames(exprSet) %in% probes ,]
print(dim(exprSet))
rownames(exprSet)=ids[match(rownames(exprSet),ids$ID),2]
return(exprSet)
}
new_exprSet1 <- kelly(exprSet1,ids1)
exprSet1=new_exprSet1
new_exprSet2 <- kelly(exprSet2,ids2)
exprSet2=new_exprSet2
# 筛选样本并分组 -----------------------------------------------------------------
phe1 <- pData(b1)# 查看数据集的样本信息
phe1 <- phe1 %>% mutate(row_num = row_number())
phe2 <- pData(b2)
phe2 <- phe2 %>% mutate(row_num = row_number())
phe3 <- pData(b3) #数据集3无侵袭性样本弃用
phe3 <- phe3 %>% mutate(row_num = row_number())
unique(phe1$`tumor stage:ch1`)
phe1 <- phe1 %>% mutate(group=if_else(`tumor stage:ch1`=="T2","mus","non"))
unique(phe2$`tumor stage:ch1`)
# phe2分类数据处理
phe2 <- phe2 %>%filter(!str_detect(`tumor stage:ch1`, "N/A|Metastasis"))
phe2 <- phe2 %>% mutate(group=if_else(`tumor stage:ch1`%in% c("T2", "T3","T4"),"mus","non"))
rows_with_non2 <-phe2 %>% pull(row_num)
exprSet2 <- exprSet2[,c(rows_with_non2)]
# 差异性分析 -------------------------------------------------------------------
#归一化
exprSet1=normalizeBetweenArrays(exprSet1)
exprSet2=normalizeBetweenArrays(exprSet2)
# 求差异基因函数
deg = function(exprSet,design,contrast.matrix){
##step1
fit <- lmFit(exprSet,design)
##step2
fit2 <- contrasts.fit(fit, contrast.matrix)
##这一步很重要,大家可以自行看看效果
fit2 <- eBayes(fit2) ## default no trend !!!
##eBayes() with trend=TRUE
##step3
tempOutput = topTable(fit2, coef=1, n=Inf)
nrDEG = na.omit(tempOutput)
#write.csv(nrDEG2,"limma_notrend.results.csv",quote = F)
head(nrDEG)
return(nrDEG)
}
#data1
groupname <- phe1 %>%pull(group)
design <- model.matrix(~0+factor(groupname))
colnames(design)=levels(factor(groupname))
rownames(design)=colnames(exprSet1)
contrast.matrix<-makeContrasts(mus-non,levels = design)
contrast.matrix
#求基因的差异显著性
re1 = deg(exprSet1,design,contrast.matrix)
nrDEG1=re1
#gse2
groupname <- phe2 %>%pull(group)
design <- model.matrix(~0+factor(groupname))
colnames(design)=levels(factor(groupname))
rownames(design)=colnames(exprSet2)
contrast.matrix<-makeContrasts(mus-non,levels = design)
contrast.matrix
re2 = deg(exprSet2,design,contrast.matrix)
nrDEG2=re2
#求差异基因数量
count1 <- nrDEG1 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% count()
count1
count2 <- nrDEG2 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% count()
count2
genedata <- nrDEG1 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% mutate(symbol=rownames(.))
write.xlsx(genedata,"genedata.xlsx")
#热图
choose_gene=head(rownames(nrDEG1),25)
choose_matrix=exprSet1[choose_gene,]
choose_matrix=t(scale(t(choose_matrix)))
pheatmap(choose_matrix)
#火山图
# 简略版1
plot(nrDEG1$logFC,-log10(nrDEG1$adj.P.Val))
#区分上下调基因版2
#数据集1
DEG=nrDEG1
#差异基因较少时使用 logFC_cutoff <- with(DEG,mean(abs( logFC)) + 2*sd(abs( logFC)) )
logFC_cutoff=0.5
DEG$change = as.factor(ifelse(DEG$adj.P.Val < 0.05 & abs(DEG$logFC) > logFC_cutoff,
ifelse(DEG$logFC > logFC_cutoff ,'UP','DOWN'),'NOT')
)
this_tile <- paste0('Cutoff for logFC is ',round(logFC_cutoff,3),
'\nThe number of up gene is ',nrow(DEG[DEG$change =='UP',]) ,
'\nThe number of down gene is ',nrow(DEG[DEG$change =='DOWN',])
)
g = ggplot(data=DEG,
aes(x=logFC, y=-log10(P.Value),
color=change)) +
geom_point(alpha=0.4, size=1.75) +
theme_set(theme_set(theme_bw(base_size=20)))+
xlab("log2 fold change") + ylab("-log10 p-value") +
ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+
scale_colour_manual(values = c('blue','black','red')) ## corresponding to the levels(res$change)
print(g)
ggsave(g,filename = 'volcano.png')
#数据集2
DEG=nrDEG2
logFC_cutoff=0.5
DEG$change = as.factor(ifelse(DEG$adj.P.Val < 0.05 & abs(DEG$logFC) > logFC_cutoff,
ifelse(DEG$logFC > logFC_cutoff ,'UP','DOWN'),'NOT')
)
this_tile <- paste0('Cutoff for logFC is ',round(logFC_cutoff,3),
'\nThe number of up gene is ',nrow(DEG[DEG$change =='UP',]) ,
'\nThe number of down gene is ',nrow(DEG[DEG$change =='DOWN',])
)
g = ggplot(data=DEG,
aes(x=logFC, y=-log10(P.Value),
color=change)) +
geom_point(alpha=0.4, size=1.75) +
theme_set(theme_set(theme_bw(base_size=20)))+
xlab("log2 fold change") + ylab("-log10 p-value") +
ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+
scale_colour_manual(values = c('blue','black','red')) ## corresponding to the levels(res$change)
print(g)
# 求两数据集显著差异基因的交集
diffdata1 <- nrDEG1 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% mutate(id1=rownames(.))
diffdata2 <- nrDEG2 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% mutate(id2=rownames(.))
x<-list(data1=diffdata1$id1,
data2=diffdata2$id2)
ggvenn(x)
# GO及KEGG富集分析 ----------------------------------------------------------------
#GO
# 挑出有统计学意义的差异基因
deg1data <- nrDEG1 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% mutate(symbol=rownames(.))
# 将gene symbol转换为Entrez ID,防止分析出错
id_list <- mapIds(org.Hs.eg.db,deg1data$symbol,"ENTREZID","SYMBOL")
# 去除转换不成功的结果,即id=NA的情况
go <- enrichGO(gene = id_list, # Entrez ID列表
OrgDb = org.Hs.eg.db, # 指定物种数据库
keyType = "ENTREZID", # 指定给定的名称类型
ont = "ALL", # 可选,BP(生物学过程)/CC(细胞组分)/MF(分子功能)/ALL(同时指定)
pAdjustMethod = "BH", # P值校正方法,还可以是fdr
pvalueCutoff = 0.05,qvalueCutoff = 0.2, # p/q值阈值
readable = T# 将ID转换为symbol
)
go.res <- as.data.frame(go) # 将GO结果转为数据框,方便后续分析(不转也可以,看个人习惯)
write.csv(go.res,"Table_GO_result.csv",quote = F) # 输出GO富集分析结果
# 绘制GO富集分析条形图,结果默认按qvalue升序,分别选出前十的term进行绘图即可
goBP <- subset(go.res,subset = (ONTOLOGY == "BP"))[1:10,]
goCC <- subset(go.res,subset = (ONTOLOGY == "CC"))[1:10,]
goMF <- subset(go.res,subset = (ONTOLOGY == "MF"))[1:10,]
go.df <- rbind(goBP,goCC,goMF)
# 使画出的GO term的顺序与输入一致
go.df$Description <- factor(go.df$Description,levels = rev(go.df$Description))
# 绘图
go_bar <- ggplot(data = go.df, # 绘图使用的数据
aes(x = Description, y = Count,fill = ONTOLOGY))+ # 横轴坐标及颜色分类填充
geom_bar(stat = "identity",width = 0.9)+ # 绘制条形图及宽度设置
coord_flip()+theme_bw()+ # 横纵坐标反转及去除背景色
scale_x_discrete(labels = function(x) str_wrap(x,width = 50))+ # 设置term名称过长时换行
labs(x = "GO terms",y = "GeneNumber",title = "Barplot of Enriched GO Terms")+ # 设置坐标轴标题及标题
theme(axis.title = element_text(size = 13), # 坐标轴标题大小
axis.text = element_text(size = 11), # 坐标轴标签大小
plot.title = element_text(size = 14,hjust = 0.5,face = "bold"), # 标题设置
legend.title = element_text(size = 13), # 图例标题大小
legend.text = element_text(size = 11), # 图例标签大小
plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm")) # 图边距
ggsave(go_bar,filename = "GO_Barplot.pdf",width = 9,height = 7)
dotplot(go, split="ONTOLOGY",showCategory = 10,label_format=100) + scale_size(rang=c(5.20))+ facet_grid(ONTOLOGY~., scale="free")
#kegg
kegg <- enrichKEGG(gene = id_list,
organism = "human",
keyType = "kegg",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.2)
# 绘图
barplot(kegg,showCategory = 25,title = 'KEGG Pathway')
dotplot(kegg)
# wgcna -------------------------------------------------------------------
datExpr0 = data.frame(t(a1))
colnames(datExpr0) <- rownames(a1)
rownames(datExpr0) <- colnames(a1)
datExpr1<-datExpr0
#筛选筛选方差前25%的基因
#m.vars=apply(datExpr0,2,var)
#expro.upper=datExpr0[,which(m.vars>quantile(m.vars, probs = seq(0, 1, 0.25))[4])]
#datExpr1<-data.matrix(expro.upper)
#判断是否有不好的sample或gene
gsg = goodSamplesGenes(datExpr1, verbose = 3);
gsg$allOK
#如果这里返回的结果是TRUE,说明所有基因都通过了检查。
#如果你用全部基因作为输入,很有可能返回FALSE,说明存在不好的基因或sample。
#下面的代码就会去除那些不好的基因或sample。
#去除不好的sample或gene
if (!gsg$allOK){
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:",
paste(names(datExpr0)[!gsg$goodGenes],
collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:",
paste(rownames(datExpr1)[!gsg$goodSamples],
collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr1 = datExpr1[gsg$goodSamples, gsg$goodGenes]
}
#判断是否有离群样本
sampleTree = hclust(dist(datExpr1), method = "average")
par(cex = 0.7);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
datExpr = as.data.frame(datExpr1)
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) +
#想用哪里切,就把“h = 110”和“cutHeight = 110”中换成你的cutoff
abline(h = 140, col = "red")
clust = cutreeStatic(sampleTree, cutHeight = 140, minSize = 10)
# 求删除离群样本后的数据集及删掉的数据集
keepSamples = (clust==1)
datExpr = datExpr1[keepSamples, ] #删除离群样本后的数据集
keepSamples1 = (clust==0)
datExprex <- datExpr1[keepSamples1, ]#删除的离群样本
groupname <- phe1 %>%pull(group)
design <- model.matrix(~0+factor(groupname))
colnames(design)=levels(factor(groupname))
rownames(design)=colnames(exprSet1)
design <- design[keepSamples, ]
#选择构建网络的合适阈值
#通过这步计算,找出scale free topology modle fit接近0.9的最小power(soft threshold),用于下一步构建网络。
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold(datExpr, powerVector = powers,
verbose = 5 )
pdf("1Threshold.pdf",width = 9, height = 5)
par(mfrow = c(1,2))
cex1 = 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence")) +
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")+
abline(h=0.9,col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity")) +
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
#验证所选power值4是否合适
power <- 4
ADJ=abs(cor(datExpr,use = 'p'))^power
k=apply(ADJ,2,sum) -1
hist(k)
cut1=cut(k,20)
binned.k=tapply(k,cut1,mean)
freq1=tapply(k,cut1,length)/length(k)
plot(log10(binned.k),log10(freq1+.000000001),xlab="log10(k)",ylab="log10(p(k))")
xx= as.vector(log10(binned.k))
lm1=lm(as.numeric(log10(freq1+.000000001))~ xx )
lines(xx,predict(lm1),col=2, lty = 1, lwd = 3)
title(paste( "scale free R^2=",as.character(round(summary(lm1)$adj.r.squared,2)), " slope=", round(lm1$coefficients[[2]],2)))
#构建网络,找出gene module
net1 = blockwiseModules(datExpr, power = 4,
TOMType = "unsigned", minModuleSize = 100,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
#saveTOMFileBase = "MyTOM",
verbose = 3)
table(net1$colors)
#模块太多调整参数
net1 = blockwiseModules(
datExpr,
power = 4,
maxBlockSize = 6000,
TOMType = "unsigned", minModuleSize = 200,
reassignThreshold = 0,
mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = F,
verbose = 3
)
table(net1$colors)
# Wgcna进一步筛选关键基因 --------------------------------------------------------------------
mergedColors = labels2colors(net1$colors)
pdf("2module.pdf",width = 10, height = 5)
plotDendroAndColors(net1$dendrograms[[1]], mergedColors[net1$blockGenes[[1]]], "Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()
moduleLabels = net1$colors
unique(net1$colors)
moduleColors = labels2colors(net1$colors)
unique(moduleColors)
table(moduleColors)
MEs = net1$MEs;
genetree = net1$dendrograms[[1]]
#表型与模块的相关性
moduleLabelsAutomatic = net1$colors
moduleColorsAutomatic = labels2colors(moduleLabelsAutomatic)
moduleColorsWW = moduleColorsAutomatic
MEs0 = moduleEigengenes(datExpr, moduleColorsWW)$eigengenes
MEsWW = orderMEs(MEs0)
modTraitCor = cor(MEsWW,design, use = "p")
colnames(MEsWW)
modlues=MEsWW
modTraitP = corPvalueStudent(modTraitCor, nSamples)
textMatrix = paste(signif(modTraitCor, 2), "\n(", signif(modTraitP, 1), ")", sep = "")
dim(textMatrix) = dim(modTraitCor)
pdf("3Module-trait.pdf",width = 6, height = 6)
labeledHeatmap(Matrix = modTraitCor,
xLabels = colnames(design),
yLabels = names(MEsWW), cex.lab = 0.5, yColorWidth=0.01,
xColorWidth = 0.03,
ySymbols = colnames(modlues),
colorLabels = FALSE, colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1)
, main = paste("Module-trait relationships"))
dev.off()
#根据相关性热图选择相关性最强的颜色模块
blue_module=t(assign("blue.expr",datExpr[moduleColors=="blue"]))
write.csv(blue_module,"blue_module.csv")
#把全部颜色模块输出为xlsx文件
# text <- unique(moduleColors)
# for (i in 1:length(text)) {
#
# y=t(assign(paste(text[i],"expr",sep = "."),
#
# datExpr[moduleColors==text[i]]))
#
# write.csv(y,paste(text[i],"csv",sep = "."),quote = F)
#
# }
#筛选hub基因
# 指定datTrait中感兴趣的一个性状,这里选择是否侵袭性
weight = as.data.frame(phe1$group)
names(weight) = "weight"
weight <- weight %>% mutate(weight=if_else(weight=="mus",1,2))
# 各基因模块的名字(颜色)
modNames = substring(names(MEsWW), 3)
# 计算MM的P值
geneModuleMembership = as.data.frame(cor(datExpr, MEsWW, use = "p"))
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership ), nSamples))
names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")
# 计算性状和基因表达量之间的相关性(GS)
geneTraitSignificance = as.data.frame(cor(datExpr, weight, use = "p"))
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance),
nSamples))
names(geneTraitSignificance) = paste("GS.", names(weight), sep="")
names(GSPvalue) = paste("p.GS.", names(weight), sep="")
#绘制散点图
module = "blue"
column = match(module, modNames)
moduleGenes = moduleColors==module
p("Module membership vs gene significance.png",width = 7, height=7)
par(mfrow = c(1,1))
verboseScatterplot(abs(geneModuleMembership[moduleGenes,
column]),abs(geneTraitSignificance[moduleGenes, 1]), xlab =
paste("Module Membership in", module, "module"), ylab = "Gene
significance for body weight", main = paste("Module membership
vs gene significance"), cex.main = 1.2, cex.lab = 1.2, cex.axis =
1.2, col = 'black')
dev.off()
#筛选hubgene
module = "blue"
column = match(module, modNames)
moduleGenes = moduleColors==module
blue_module<-as.data.frame(dimnames(data.frame(datExpr))[[2]][moduleGenes])
names(blue_module)="genename"
MM<-abs(geneModuleMembership[moduleGenes,column])
GS<-abs(geneTraitSignificance[moduleGenes, 1])
blue_module<-as.data.frame(cbind(MM,GS))
rownames(blue_module)=blue_module$genename
hub_b<-abs(blue_module$MM)>0.86&abs(blue_module$GS)>0.42#设置条件
table(hub_b)
blue_hub_b<-subset(blue_module, abs(blue_module$MM)>0.86&abs(blue_module$GS)>0.42)
table(blue_hub_b)
blue_hub_b <- blue_hub_b %>% mutate(id=rownames(.))
# 给基因贴上标签
blue_hub_b=blue_hub_b[rownames(blue_hub_b) %in% ids1$ID,]
ids1=ids1[match(rownames(blue_hub_b),ids1$ID),]
kelly <- function(exprSet,ids){
tmp = by(exprSet,ids$symbol,
function(x) rownames(x)[which.max(rowMeans(x))] )
probes = as.character(tmp)
print(dim(exprSet))
exprSet=exprSet[rownames(exprSet) %in% probes ,]
print(dim(exprSet))
rownames(exprSet)=ids[match(rownames(exprSet),ids$ID),2]
return(exprSet)
}
new_exprSet1 <- kelly(blue_hub_b,ids1)
blue_hub_b=new_exprSet1
blue_hub_b <- blue_hub_b %>% mutate(id=rownames(.))
#提取ppi网络在线分析所求关键基因合上述基因求交集
# mccgene <- read.csv("C:/Users/99405/Desktop/string_interactions.tsv_1_MCC_top50 default node.csv")
# x<-list(MCC=mccgene$name,
#
# WGCNA=blue_hub_b$id)
# ggvenn(x)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
R
1
https://gitee.com/sjy12345/lover.git
[email protected]:sjy12345/lover.git
sjy12345
lover
Medical related R language procedures
master

搜索帮助