基於表達矩陣繪圖

基於表達矩陣繪圖

基於表達矩陣繪圖

basic visualization for expression matrix?

bio-info-trainee.com圖標

安裝並載入必須的packages

如果你還沒有安裝,就運行下面的代碼安裝:

BiocInstaller::biocLite(CLL)

install.packages(corrplot)

install.packages(gpairs)

install.packages(vioplot)

如果你安裝好了,就直接載入它們即可

library(CLL)

library(ggplot2)

library(reshape2)

library(gpairs)

library(corrplot)

載入內置的測試數據:

data(sCLLex)

sCLLex=sCLLex[,1:8] ## 樣本太多,我就取前面8個

group_list=sCLLex$Disease

exprSet=exprs(sCLLex)

head(exprSet)

## CLL11.CEL CLL12.CEL CLL13.CEL CLL14.CEL CLL15.CEL CLL16.CEL

## 1000_at 5.743132 6.219412 5.523328 5.340477 5.229904 4.920686

## 1001_at 2.285143 2.291229 2.287986 2.295313 2.662170 2.278040

## 1002_f_at 3.309294 3.318466 3.354423 3.327130 3.365113 3.568353

## 1003_s_at 1.085264 1.117288 1.084010 1.103217 1.074243 1.073097

## 1004_at 7.544884 7.671801 7.474025 7.152482 6.902932 7.368660

## 1005_at 5.083793 7.610593 7.631311 6.518594 5.059087 4.855161

## CLL17.CEL CLL18.CEL

## 1000_at 5.325348 4.826131

## 1001_at 2.350796 2.325163

## 1002_f_at 3.502440 3.394410

## 1003_s_at 1.091264 1.076470

## 1004_at 6.456285 6.824862

## 1005_at 5.176975 4.874563

group_list

## [1] progres. stable progres. progres. progres. progres. stable stable

## Levels: progres. stable

接下來進行一系列繪圖操作

主要用到ggplot2這個包,需要把我們的寬矩陣用reshape2包變成長矩陣

library(reshape2)

exprSet_L=melt(exprSet)

colnames(exprSet_L)=c(probe,sample,value)

exprSet_L$group=rep(group_list,each=nrow(exprSet))

head(exprSet_L)

## probe sample value group

## 1 1000_at CLL11.CEL 5.743132 progres.

## 2 1001_at CLL11.CEL 2.285143 progres.

## 3 1002_f_at CLL11.CEL 3.309294 progres.

## 4 1003_s_at CLL11.CEL 1.085264 progres.

## 5 1004_at CLL11.CEL 7.544884 progres.

## 6 1005_at CLL11.CEL 5.083793 progres.

boxplot

p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_boxplot()

print(p)

vioplot

#library(vioplot)

#?vioplot

#vioplot(exprSet)

#do.call(vioplot,c(unname(exprSet),col=red,drawRect=FALSE,names=list(names(exprSet))))

p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_violin()

print(p)

boxplot

p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_boxplot()

print(p)

histogram

p=ggplot(exprSet_L,aes(value,fill=group))+geom_histogram(bins = 200)+facet_wrap(~sample, nrow = 4)

print(p)

boxplot

p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_boxplot()

print(p)

density

p=ggplot(exprSet_L,aes(value,col=group))+geom_density()+facet_wrap(~sample, nrow = 4)

print(p)

p=ggplot(exprSet_L,aes(value,col=group))+geom_density()

print(p)

gpairs

library(gpairs)

gpairs(exprSet

#,upper.pars = list(scatter = stats)

#,lower.pars = list(scatter = corrgram)

)

cluster

out.dist=dist(t(exprSet),method=euclidean)

out.hclust=hclust(out.dist,method=complete)

plot(out.hclust)

PCA

pc <- prcomp(t(exprSet),scale=TRUE)

pcx=data.frame(pc$x)

pcr=cbind(samples=rownames(pcx),group_list, pcx)

p=ggplot(pcr, aes(PC1, PC2))+geom_point(size=5, aes(color=group_list)) +

geom_text(aes(label=samples),hjust=-0.1, vjust=-0.3)

print(p)

heatmap

choose_gene=names(sort(apply(exprSet, 1, mad),decreasing = T)[1:50])

choose_matrix=exprSet[choose_gene,]

choose_matrix=scale(choose_matrix)

heatmap(choose_matrix)

library(gplots)

##

## Attaching package: gplots

## The following object is masked from package:stats:

##

## lowess

heatmap.2(choose_matrix)

library(pheatmap)

pheatmap(choose_matrix)

DEG && volcano plot

library(limma)

##

## Attaching package: limma

## The following object is masked from package:BiocGenerics:

##

## plotMA

design=model.matrix(~factor(group_list))

fit=lmFit(exprSet,design)

fit=eBayes(fit)

DEG=topTable(fit,coef=2,n=Inf)

with(DEG, plot(logFC, -log10(P.Value), pch=20, main="Volcano plot"))

logFC_cutoff <- with(DEG,mean(abs( logFC)) + 2*sd(abs( logFC)) )

DEG$change = as.factor(ifelse(DEG$P.Value < 0.05 & abs(DEG$logFC) > logFC_cutoff,

ifelse(DEG$logFC > logFC_cutoff ,UP,DOWN),NOT)

)

this_tile <- paste0(Cutoff for logFC is ,round(logFC_cutoff,3),


The number of up gene is ,nrow(DEG[DEG$change ==UP,]) ,


The number of down gene is ,nrow(DEG[DEG$change ==DOWN,])

)

g = ggplot(data=DEG, aes(x=logFC, y=-log10(P.Value), color=change)) +

geom_point(alpha=0.4, size=1.75) +

theme_set(theme_set(theme_bw(base_size=20)))+

xlab("log2 fold change") + ylab("-log10 p-value") +

ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+

scale_colour_manual(values = c(blue,black,red)) ## corresponding to the levels(res$change)

print(g)

ggplot畫圖是可以切換主題的

其實繪圖有非常多的細節可以調整,還是略微有點繁瑣的!

p=ggplot(exprSet_L,aes(x=sample,y=value,fill=group))+geom_boxplot()

print(p)

p=p+stat_summary(fun.y="mean",geom="point",shape=23,size=3,fill="red")

p=p+theme_set(theme_set(theme_bw(base_size=20)))

p=p+theme(text=element_text(face=bold),axis.text.x=element_text(angle=30,hjust=1),axis.title=element_blank())

print(p)

可以很明顯看到,換了主題之後的圖美觀一些。


推薦閱讀:

矩陣和向量求導
矩陣論筆記(2)
語料庫語言學基礎知識:矩陣(Haskell版)
gal2mat:將gal權重文件轉成n-by-n矩陣

TAG:MATLAB | 矩陣 | 自然科學 |