master

分支 (1)

管理

管理

master

lover
/
生信分析代码整理.R

#下载R包如以下载可跳过这部分
rm(list = ls())
options()$repos
options()$BioC_mirror
options(BioC_mirror="http://mirrors.tuna.tsinghua.edu.cn/bioconductor/")
options("repos" = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
install.packages("devtools")
install.packages("htmltools")
install.packages('WGCNA')
install.packages(c("FactoMineR", "factoextra"))
install.packages(c("pheatmap","ggpubr"))
BiocManager::install(c("GSEABase","GSVA","clusterProfiler" ),ask = F,update = F)
BiocManager::install(c("GEOquery","limma","impute" ),ask = F,update = F)
BiocManager::install(c("org.Hs.eg.db","hgu133plus2.db" ),ask = F,update = F)
BiocManager::install("preprocessCore")
devtools::install_github("yanlinlin82/ggvenn")
install_github("jmzeng1314/idmap1")
install_github("jmzeng1314/idmap2")
install_github("jmzeng1314/idmap3")
# 所需r包
library(tidyverse) #数据处理
library(openxlsx) #xlsx格式问价读取和导出
library(GEOquery) #获取geo数据集
#基因探针号匹配通用基因id
library(idmap1)   #基因id库1
library(idmap2)   #基因id库2
library(idmap3)   #基因id库3

library(limma)  #差异性分析
library(WGCNA)  #WGCNA求基因模块
library(GSEABase)#获取样本信息
library(GSVA)
library(clusterProfiler)
library(ggpubr)


library(ggplot2) #绘图
library(pheatmap) #热图
library(ggvenn) #韦恩图
library(ggrepel) # 标签相关
library(stringr) # 标签换行
library(clusterProfiler)
library(AnnotationDbi)
library(org.Hs.eg.db) # 参考物种的基因注释数据库hg19
library(DOSE)

# 获得数据集并贴基因id -------------------------------------------------------------------
gset1 <- getGEO('GSE32549', destdir=".",
                AnnotGPL = F,
                getGPL = F) #填写数据集的geo号GSE32549
gset2 <- getGEO('GSE128959', destdir=".",
                AnnotGPL = F,
                getGPL = F)
gset3 <- getGEO('GSE185264', destdir=".",
                AnnotGPL = F,
                getGPL = F)

b1 = gset1[[2]]
b2 = gset2[[1]]
b3 = gset3[[1]]

a1=exprs(b1)
raw_exprSet=a1

a2=exprs(b2)
raw_exprSet=a2

a3=exprs(b3)
raw_exprSet=a3
# ID转化
ids1=get_soft_IDs('gpl6947') #填写数据集1的基因平台号
ids2=get_soft_IDs('gpl6244')
ids3=get_pipe_IDs('gpl30818')
exprSet1=a1
exprSet2=a2
exprSet3=a3

exprSet1=exprSet1[rownames(exprSet1) %in% ids1$ID,]
ids1=ids1[match(rownames(exprSet1),ids1$ID),]

exprSet2=exprSet2[rownames(exprSet2) %in% ids2$ID,]
ids2=ids2[match(rownames(exprSet2),ids2$ID),]

kelly <- function(exprSet,ids){
  tmp = by(exprSet,ids$symbol,
           function(x) rownames(x)[which.max(rowMeans(x))] )
  probes = as.character(tmp)
  print(dim(exprSet))
  exprSet=exprSet[rownames(exprSet) %in% probes ,]
  print(dim(exprSet))
  rownames(exprSet)=ids[match(rownames(exprSet),ids$ID),2]
  return(exprSet)
}
new_exprSet1 <- kelly(exprSet1,ids1)
exprSet1=new_exprSet1

new_exprSet2 <- kelly(exprSet2,ids2)
exprSet2=new_exprSet2


# 筛选样本并分组 -----------------------------------------------------------------
phe1 <- pData(b1)# 查看数据集的样本信息
phe1 <- phe1 %>% mutate(row_num = row_number())
phe2 <- pData(b2)
phe2 <- phe2 %>% mutate(row_num = row_number())
phe3 <- pData(b3) #数据集3无侵袭性样本弃用
phe3 <- phe3 %>% mutate(row_num = row_number())


unique(phe1$`tumor stage:ch1`)
phe1 <- phe1 %>% mutate(group=if_else(`tumor stage:ch1`=="T2","mus","non"))

unique(phe2$`tumor stage:ch1`)
# phe2分类数据处理
phe2 <- phe2 %>%filter(!str_detect(`tumor stage:ch1`, "N/A|Metastasis"))
phe2 <- phe2 %>% mutate(group=if_else(`tumor stage:ch1`%in% c("T2", "T3","T4"),"mus","non"))
rows_with_non2 <-phe2 %>% pull(row_num)
exprSet2 <- exprSet2[,c(rows_with_non2)]


# 差异性分析 -------------------------------------------------------------------
#归一化
exprSet1=normalizeBetweenArrays(exprSet1)
exprSet2=normalizeBetweenArrays(exprSet2)
# 求差异基因函数
deg = function(exprSet,design,contrast.matrix){

  ##step1

  fit <- lmFit(exprSet,design)

  ##step2

  fit2 <- contrasts.fit(fit, contrast.matrix)

  ##这一步很重要，大家可以自行看看效果

  fit2 <- eBayes(fit2)  ## default no trend !!!

  ##eBayes() with trend=TRUE

  ##step3

  tempOutput = topTable(fit2, coef=1, n=Inf)

  nrDEG = na.omit(tempOutput)

  #write.csv(nrDEG2,"limma_notrend.results.csv",quote = F)

  head(nrDEG)

  return(nrDEG)

}

#data1
groupname <- phe1 %>%pull(group)
design <- model.matrix(~0+factor(groupname))
colnames(design)=levels(factor(groupname))
rownames(design)=colnames(exprSet1)
contrast.matrix<-makeContrasts(mus-non,levels = design)
contrast.matrix
#求基因的差异显著性
re1 = deg(exprSet1,design,contrast.matrix)
nrDEG1=re1

#gse2
groupname <- phe2 %>%pull(group)
design <- model.matrix(~0+factor(groupname))
colnames(design)=levels(factor(groupname))
rownames(design)=colnames(exprSet2)
contrast.matrix<-makeContrasts(mus-non,levels = design)
contrast.matrix


re2 = deg(exprSet2,design,contrast.matrix)
nrDEG2=re2


#求差异基因数量
count1 <- nrDEG1 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% count()
count1
count2 <- nrDEG2 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% count()
count2


genedata <- nrDEG1 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% mutate(symbol=rownames(.))
write.xlsx(genedata,"genedata.xlsx")


#热图


choose_gene=head(rownames(nrDEG1),25)

choose_matrix=exprSet1[choose_gene,]

choose_matrix=t(scale(t(choose_matrix)))

pheatmap(choose_matrix)

#火山图

# 简略版1

plot(nrDEG1$logFC,-log10(nrDEG1$adj.P.Val))


#区分上下调基因版2
#数据集1
DEG=nrDEG1

#差异基因较少时使用 logFC_cutoff <- with(DEG,mean(abs( logFC)) + 2*sd(abs( logFC)) )

logFC_cutoff=0.5

DEG$change = as.factor(ifelse(DEG$adj.P.Val < 0.05 & abs(DEG$logFC) > logFC_cutoff,

                              ifelse(DEG$logFC > logFC_cutoff ,'UP','DOWN'),'NOT')

)


this_tile <- paste0('Cutoff for logFC is ',round(logFC_cutoff,3),

                    '\nThe number of up gene is ',nrow(DEG[DEG$change =='UP',]) ,

                    '\nThe number of down gene is ',nrow(DEG[DEG$change =='DOWN',])

)
g = ggplot(data=DEG,

           aes(x=logFC, y=-log10(P.Value),

               color=change)) +

  geom_point(alpha=0.4, size=1.75) +

  theme_set(theme_set(theme_bw(base_size=20)))+

  xlab("log2 fold change") + ylab("-log10 p-value") +

  ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+

  scale_colour_manual(values = c('blue','black','red')) ## corresponding to the levels(res$change)

print(g)

ggsave(g,filename = 'volcano.png')

#数据集2
DEG=nrDEG2

logFC_cutoff=0.5

DEG$change = as.factor(ifelse(DEG$adj.P.Val < 0.05 & abs(DEG$logFC) > logFC_cutoff,

                              ifelse(DEG$logFC > logFC_cutoff ,'UP','DOWN'),'NOT')

)


this_tile <- paste0('Cutoff for logFC is ',round(logFC_cutoff,3),

                    '\nThe number of up gene is ',nrow(DEG[DEG$change =='UP',]) ,

                    '\nThe number of down gene is ',nrow(DEG[DEG$change =='DOWN',])

)
g = ggplot(data=DEG,

           aes(x=logFC, y=-log10(P.Value),

               color=change)) +

  geom_point(alpha=0.4, size=1.75) +

  theme_set(theme_set(theme_bw(base_size=20)))+

  xlab("log2 fold change") + ylab("-log10 p-value") +

  ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+

  scale_colour_manual(values = c('blue','black','red')) ## corresponding to the levels(res$change)

print(g)
# 求两数据集显著差异基因的交集
diffdata1 <- nrDEG1 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% mutate(id1=rownames(.))
diffdata2 <- nrDEG2 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% mutate(id2=rownames(.))
x<-list(data1=diffdata1$id1,

        data2=diffdata2$id2)
ggvenn(x)


# GO及KEGG富集分析 ----------------------------------------------------------------

#GO
# 挑出有统计学意义的差异基因

deg1data <- nrDEG1 %>% filter(adj.P.Val<0.05 & abs(logFC)>0.5) %>% mutate(symbol=rownames(.))

# 将gene symbol转换为Entrez ID,防止分析出错

id_list <- mapIds(org.Hs.eg.db,deg1data$symbol,"ENTREZID","SYMBOL")

# 去除转换不成功的结果，即id=NA的情况


go <- enrichGO(gene = id_list, # Entrez ID列表

               OrgDb = org.Hs.eg.db, # 指定物种数据库

               keyType = "ENTREZID", # 指定给定的名称类型

               ont = "ALL", # 可选,BP(生物学过程)/CC(细胞组分)/MF(分子功能)/ALL(同时指定)

               pAdjustMethod = "BH", # P值校正方法,还可以是fdr

               pvalueCutoff = 0.05,qvalueCutoff = 0.2, # p/q值阈值

               readable = T# 将ID转换为symbol

)


go.res <- as.data.frame(go) # 将GO结果转为数据框，方便后续分析（不转也可以，看个人习惯）

write.csv(go.res,"Table_GO_result.csv",quote = F) # 输出GO富集分析结果

# 绘制GO富集分析条形图，结果默认按qvalue升序，分别选出前十的term进行绘图即可

goBP <- subset(go.res,subset = (ONTOLOGY == "BP"))[1:10,]

goCC <- subset(go.res,subset = (ONTOLOGY == "CC"))[1:10,]

goMF <- subset(go.res,subset = (ONTOLOGY == "MF"))[1:10,]

go.df <- rbind(goBP,goCC,goMF)

# 使画出的GO term的顺序与输入一致

go.df$Description <- factor(go.df$Description,levels = rev(go.df$Description))

# 绘图

go_bar <- ggplot(data = go.df, # 绘图使用的数据

                 aes(x = Description, y = Count,fill = ONTOLOGY))+ # 横轴坐标及颜色分类填充

  geom_bar(stat = "identity",width = 0.9)+ # 绘制条形图及宽度设置

  coord_flip()+theme_bw()+ # 横纵坐标反转及去除背景色

  scale_x_discrete(labels = function(x) str_wrap(x,width = 50))+ # 设置term名称过长时换行

  labs(x = "GO terms",y = "GeneNumber",title = "Barplot of Enriched GO Terms")+ # 设置坐标轴标题及标题

  theme(axis.title = element_text(size = 13), # 坐标轴标题大小

        axis.text = element_text(size = 11), # 坐标轴标签大小

        plot.title = element_text(size = 14,hjust = 0.5,face = "bold"), # 标题设置

        legend.title = element_text(size = 13), # 图例标题大小

        legend.text = element_text(size = 11), # 图例标签大小

        plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm")) # 图边距

ggsave(go_bar,filename = "GO_Barplot.pdf",width = 9,height = 7)

dotplot(go, split="ONTOLOGY",showCategory = 10,label_format=100)  + scale_size(rang=c(5.20))+ facet_grid(ONTOLOGY~., scale="free")

#kegg

kegg <- enrichKEGG(gene = id_list,

                   organism = "human",

                   keyType = "kegg",

                   pAdjustMethod = "BH",

                   pvalueCutoff = 0.05,

                   qvalueCutoff = 0.2)


# 绘图

barplot(kegg,showCategory = 25,title = 'KEGG Pathway')

dotplot(kegg)


# wgcna -------------------------------------------------------------------
datExpr0 = data.frame(t(a1))

colnames(datExpr0) <- rownames(a1)

rownames(datExpr0) <- colnames(a1)


datExpr1<-datExpr0

#筛选筛选方差前25%的基因
#m.vars=apply(datExpr0,2,var)
#expro.upper=datExpr0[,which(m.vars>quantile(m.vars, probs = seq(0, 1, 0.25))[4])]
#datExpr1<-data.matrix(expro.upper)

#判断是否有不好的sample或gene

gsg = goodSamplesGenes(datExpr1, verbose = 3);

gsg$allOK

#如果这里返回的结果是TRUE，说明所有基因都通过了检查。

#如果你用全部基因作为输入，很有可能返回FALSE，说明存在不好的基因或sample。

#下面的代码就会去除那些不好的基因或sample。

#去除不好的sample或gene

if (!gsg$allOK){

  # Optionally, print the gene and sample names that were removed:

  if (sum(!gsg$goodGenes)>0)

    printFlush(paste("Removing genes:",

                     paste(names(datExpr0)[!gsg$goodGenes],

                           collapse = ", ")));

  if (sum(!gsg$goodSamples)>0)

    printFlush(paste("Removing samples:",

                     paste(rownames(datExpr1)[!gsg$goodSamples],

                           collapse = ", ")));

  # Remove the offending genes and samples from the data:

  datExpr1 = datExpr1[gsg$goodSamples, gsg$goodGenes]

}

#判断是否有离群样本

sampleTree = hclust(dist(datExpr1), method = "average")

par(cex = 0.7);

par(mar = c(0,4,2,0))

plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,

     cex.axis = 1.5, cex.main = 2)


datExpr = as.data.frame(datExpr1)

nGenes = ncol(datExpr)

nSamples = nrow(datExpr)


plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2) +

  #想用哪里切，就把“h = 110”和“cutHeight = 110”中换成你的cutoff

  abline(h = 140, col = "red")

clust = cutreeStatic(sampleTree, cutHeight = 140, minSize = 10)
# 求删除离群样本后的数据集及删掉的数据集
keepSamples = (clust==1)
datExpr = datExpr1[keepSamples, ] #删除离群样本后的数据集
keepSamples1 = (clust==0)
datExprex <- datExpr1[keepSamples1, ]#删除的离群样本

groupname <- phe1 %>%pull(group)
design <- model.matrix(~0+factor(groupname))
colnames(design)=levels(factor(groupname))
rownames(design)=colnames(exprSet1)
design <- design[keepSamples, ]


#选择构建网络的合适阈值

#通过这步计算，找出scale free topology modle fit接近0.9的最小power（soft threshold），用于下一步构建网络。

powers = c(c(1:10), seq(from = 12, to=20, by=2))


sft = pickSoftThreshold(datExpr, powerVector = powers,

                        verbose = 5 )

pdf("1Threshold.pdf",width = 9, height = 5)

par(mfrow = c(1,2))

cex1 = 0.9

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",

     main = paste("Scale independence")) +

  text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

       labels=powers,cex=cex1,col="red")+

  abline(h=0.9,col="red")

plot(sft$fitIndices[,1], sft$fitIndices[,5],

     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",

     main = paste("Mean connectivity")) +

  text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

dev.off()


#验证所选power值4是否合适

power <- 4
ADJ=abs(cor(datExpr,use = 'p'))^power
k=apply(ADJ,2,sum) -1
hist(k)
cut1=cut(k,20)
binned.k=tapply(k,cut1,mean)
freq1=tapply(k,cut1,length)/length(k)
plot(log10(binned.k),log10(freq1+.000000001),xlab="log10(k)",ylab="log10(p(k))")
xx= as.vector(log10(binned.k))
lm1=lm(as.numeric(log10(freq1+.000000001))~ xx )
lines(xx,predict(lm1),col=2, lty = 1, lwd = 3)
title(paste( "scale free R^2=",as.character(round(summary(lm1)$adj.r.squared,2)), " slope=", round(lm1$coefficients[[2]],2)))

#构建网络，找出gene module

net1 = blockwiseModules(datExpr, power = 4,

                        TOMType = "unsigned", minModuleSize = 100,

                        reassignThreshold = 0, mergeCutHeight = 0.25,

                        numericLabels = TRUE, pamRespectsDendro = FALSE,

                        saveTOMs = TRUE,

                        #saveTOMFileBase = "MyTOM",

                        verbose = 3)

table(net1$colors)


#模块太多调整参数
net1 = blockwiseModules(
  datExpr,
  power = 4,
  maxBlockSize = 6000,
  TOMType = "unsigned", minModuleSize = 200,
  reassignThreshold = 0,
  mergeCutHeight = 0.25,
  numericLabels = TRUE, pamRespectsDendro = FALSE,
  saveTOMs = F,
  verbose = 3
)
table(net1$colors)

# Wgcna进一步筛选关键基因 --------------------------------------------------------------------


mergedColors = labels2colors(net1$colors)

pdf("2module.pdf",width = 10, height = 5)
plotDendroAndColors(net1$dendrograms[[1]], mergedColors[net1$blockGenes[[1]]], "Module colors",

                    dendroLabels = FALSE, hang = 0.03,

                    addGuide = TRUE, guideHang = 0.05)

dev.off()

moduleLabels = net1$colors
unique(net1$colors)
moduleColors = labels2colors(net1$colors)
unique(moduleColors)
table(moduleColors)
MEs = net1$MEs;

genetree = net1$dendrograms[[1]]


#表型与模块的相关性

moduleLabelsAutomatic = net1$colors

moduleColorsAutomatic = labels2colors(moduleLabelsAutomatic)

moduleColorsWW = moduleColorsAutomatic

MEs0 = moduleEigengenes(datExpr, moduleColorsWW)$eigengenes

MEsWW = orderMEs(MEs0)

modTraitCor = cor(MEsWW,design, use = "p")

colnames(MEsWW)

modlues=MEsWW

modTraitP = corPvalueStudent(modTraitCor, nSamples)

textMatrix = paste(signif(modTraitCor, 2), "\n(", signif(modTraitP, 1), ")", sep = "")

dim(textMatrix) = dim(modTraitCor)


pdf("3Module-trait.pdf",width = 6, height = 6)

labeledHeatmap(Matrix = modTraitCor,

               xLabels = colnames(design),

               yLabels = names(MEsWW), cex.lab = 0.5,  yColorWidth=0.01,

               xColorWidth = 0.03,

               ySymbols = colnames(modlues),

               colorLabels = FALSE, colors = blueWhiteRed(50),

               textMatrix = textMatrix,

               setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1)

               , main = paste("Module-trait relationships"))

dev.off()
#根据相关性热图选择相关性最强的颜色模块
blue_module=t(assign("blue.expr",datExpr[moduleColors=="blue"]))
write.csv(blue_module,"blue_module.csv")

#把全部颜色模块输出为xlsx文件
# text <- unique(moduleColors)
# for (i  in 1:length(text)) {
#
#   y=t(assign(paste(text[i],"expr",sep = "."),
#
#              datExpr[moduleColors==text[i]]))
#
#   write.csv(y,paste(text[i],"csv",sep = "."),quote = F)
#
# }


#筛选hub基因
# 指定datTrait中感兴趣的一个性状，这里选择是否侵袭性
weight = as.data.frame(phe1$group)
names(weight) = "weight"
weight <- weight %>% mutate(weight=if_else(weight=="mus",1,2))
#  各基因模块的名字（颜色）
modNames = substring(names(MEsWW), 3)
# 计算MM的P值
geneModuleMembership = as.data.frame(cor(datExpr, MEsWW, use = "p"))
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership ), nSamples))
names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")
# 计算性状和基因表达量之间的相关性（GS）
geneTraitSignificance = as.data.frame(cor(datExpr, weight, use = "p"))
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance),
                                          nSamples))
names(geneTraitSignificance) = paste("GS.", names(weight), sep="")
names(GSPvalue) = paste("p.GS.", names(weight), sep="")
#绘制散点图
module = "blue"
column = match(module, modNames)
moduleGenes = moduleColors==module
p("Module membership vs gene significance.png",width = 7, height=7)
par(mfrow = c(1,1))
verboseScatterplot(abs(geneModuleMembership[moduleGenes,
                                            column]),abs(geneTraitSignificance[moduleGenes, 1]), xlab =
                     paste("Module Membership in", module, "module"), ylab = "Gene
  significance for body weight", main = paste("Module membership
  vs gene significance"), cex.main = 1.2, cex.lab = 1.2, cex.axis =
                     1.2, col = 'black')
dev.off()
#筛选hubgene
module = "blue"
column = match(module, modNames)
moduleGenes = moduleColors==module
blue_module<-as.data.frame(dimnames(data.frame(datExpr))[[2]][moduleGenes])
names(blue_module)="genename"
MM<-abs(geneModuleMembership[moduleGenes,column])
GS<-abs(geneTraitSignificance[moduleGenes, 1])
blue_module<-as.data.frame(cbind(MM,GS))
rownames(blue_module)=blue_module$genename
hub_b<-abs(blue_module$MM)>0.86&abs(blue_module$GS)>0.42#设置条件
table(hub_b)
blue_hub_b<-subset(blue_module, abs(blue_module$MM)>0.86&abs(blue_module$GS)>0.42)
table(blue_hub_b)
blue_hub_b <- blue_hub_b %>% mutate(id=rownames(.))


# 给基因贴上标签
blue_hub_b=blue_hub_b[rownames(blue_hub_b) %in% ids1$ID,]
ids1=ids1[match(rownames(blue_hub_b),ids1$ID),]
kelly <- function(exprSet,ids){
  tmp = by(exprSet,ids$symbol,
           function(x) rownames(x)[which.max(rowMeans(x))] )
  probes = as.character(tmp)
  print(dim(exprSet))
  exprSet=exprSet[rownames(exprSet) %in% probes ,]
  print(dim(exprSet))
  rownames(exprSet)=ids[match(rownames(exprSet),ids$ID),2]
  return(exprSet)
}
new_exprSet1 <- kelly(blue_hub_b,ids1)
blue_hub_b=new_exprSet1
blue_hub_b <- blue_hub_b %>% mutate(id=rownames(.))

#提取ppi网络在线分析所求关键基因合上述基因求交集
# mccgene <- read.csv("C:/Users/99405/Desktop/string_interactions.tsv_1_MCC_top50 default node.csv")
# x<-list(MCC=mccgene$name,
#
#         WGCNA=blue_hub_b$id)
# ggvenn(x)