R語言可視化包之ggplot2

01-30

一、條形圖1、簡單條形圖

你有一個包含兩列的數據框，

其中一列數據表示條形在x軸上的位置，另一列表示每個條形在y軸上的高度。

x軸為離散時，針對變數值繪製的條形圖（參數stat=「identity」）

> library(gcookbook)n> library(ggplot2)n> ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat = "identity")n

2、繪製簇狀條形圖

將分類變數映射到fill參數，並運行命令geom_bar(position="dodge")

> cabbage_expnn Cultivar Date Weight sd n sen1 c39 d16 3.18 0.9566144 10 0.30250803n2 c39 d20 2.80 0.2788867 10 0.08819171n3 c39 d21 2.74 0.9834181 10 0.31098410n4 c52 d16 2.26 0.4452215 10 0.14079141n5 c52 d20 3.11 0.7908505 10 0.25008887n6 c52 d21 1.47 0.2110819 10 0.06674995n

我們將Data和Cultivar映射給x和fill

> ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(position = "dodge",stat = "identity")n

3、堆積條形圖

> ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(stat = "identity")n

把position參數去掉就是堆積條形圖

4、對正負條形圖分別著色

> csub<-subset(climate,Source=="Berkeley"&Year>=1900)n> View(csub)n> csub$pos<-csub$Anomaly10y>=0n> head(csub)n Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y posn101 Berkeley 1900 NA NA -0.171 0.108 FALSEn102 Berkeley 1901 NA NA -0.162 0.109 FALSEn103 Berkeley 1902 NA NA -0.177 0.108 FALSEn104 Berkeley 1903 NA NA -0.199 0.104 FALSEn105 Berkeley 1904 NA NA -0.223 0.105 FALSEn

上述過程準備好後，將pos映射給填充色參數（fill）並繪製條形圖,注意這裡的條形圖參數設置為position="identity"

> ggplot(csub,aes(x=Year,y=Anomaly10y,fill=pos))+geom_bar(stat = "identity",position = "identity")n

> ggplot(csub,aes(x=Year,y=Anomaly10y,fill=pos))+geom_bar(stat = "identity",position = "identity")+scale_fill_manual(values = c("#CCEEFF","#FFDDDD"),guide=F)n

guide=F,刪除圖例。scale_fill_manual()對圖形顏色調整，將紅色設為正值，藍色為負值

5、繪製Cleveland點圖

二、直方圖-通常描述一個變數的數據分布

1、簡單直方圖

> library(gcookbook)n> ggplot(faithful,aes(x=waiting))+geom_histogram()n

2、單獨一個向量時數據集用NULL ， bins表示分組，binwidth表示組距

> w<-faithful$waitingn> ggplot(NULL,aes(x=w))+geom_histogram()n

> ggplot(faithful,aes(x=waiting))+geom_histogram(binwidth = 5,fill="white",colour="black")n

3、分組直方圖

> library(MASS)n> ggplot(birthwt,aes(x=bwt))+geom_histogram(fill="white",colour="black")+facet_grid(smoke~.)n

另一種方法將分組變數映射到fill,position="identity"很重要，沒有它，函數會將直方圖的條形進行垂直堆積，這樣難以看出數據分布信息

> birthwt$smoke<-factor(birthwt$smoke)n> ggplot(birthwt,aes(x=bwt,fill=smoke))+geom_histogram(position="identity",alpha=0.4)n

4、繪製密度曲線

> ggplot(faithful,aes(x=waiting))+geom_line(stat="density")+expand_limits(y=0)n

> ggplot(faithful,aes(x=waiting))+geom_density(fill="blue",colour=NA,alpha=0.2)+geom_line(stat="density")+xlim(35,105)n

將密度曲線疊加到直方圖上，可以對觀測值的理論分布和實際分布進行比較。由於密度曲線對應的y軸坐標較小（曲線下的總面積是1）如果將其疊加到未做任何變換的直方圖上，曲線很難看清楚，通過設置y=..density..可以減小直方圖的標度以使密度曲線相匹配

> ggplot(faithful,aes(x=waiting,y=..density..))+n geom_histogram(fill="cornsilk",colour="grey60",size=.2)+n geom_density()+n xlim(35,105)n

> ggplot(faithful,aes(x=waiting,y=..density..))+n geom_histogram(fill="cornsilk",colour="grey60",size=.2)+n geom_line(stat="density")+n xlim(35,105)n

繪製分組密度曲線，使用geom_density( )函數，將分組變數映射給colour或fill等圖形屬性即可

> library(MASS)n> birthwt1<-birthwtn> birthwt1$smoke<-factor(birthwt1$smoke)n> ggplot(birthwt1,aes(x=bwt,colour=smoke))+geom_density()n

> ggplot(birthwt1,aes(x=bwt,fill=smoke))+geom_density(alpha=.3)n

將smoke向量轉化成因子，plyr包提供revalue函數

> library(plyr)n> birthwt1$smoke<-revalue(birthwt1$smoke,c("0"="No Smoke","1"="Smoke"))n> ggplot(birthwt1,aes(x=bwt))+geom_density()+facet_grid(smoke~.)n

三、散點圖-通常用來刻畫兩個連續變數之間的關係

1、簡單散點圖

> library(gcookbook)n> ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point(shape=21,size=1.5)n> ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point(shape=16,size=1.5)n> ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point(shape=19,size=1.5)n

2、散點圖分組

> ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=sex))+geom_point() n

> ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=sex,shape=sex))+geom_point()n

將連續變數映射到點的顏色或大小屬性上，將連續變數映射到size或colour屬性上即可

> ggplot(heightweight,aes(x=ageYear,y=heightIn,size=weightLb,colour=sex))+geom_point(alpha=.5)n

3、離散箱線圖當散點圖的其中一個數據軸或者兩個數據軸都對應於離散數據時，會出現圖形堆疊的情況

> sql<-ggplot(ChickWeight,aes(x=Time,y=weight))+geom_point()n

我們可以通過象限圖完美的表達圖形

> ggplot(ChickWeight,aes(x=Time,y=weight))+geom_boxplot(aes(group=Time))n

4、添加回歸模型擬合線運行stat_smooth（）函數，並設定method=lm或者其他方法，默認情況下stat_smooth（）會為回歸擬合線添加95%的置信域，置信域對應的置信水平可通過設置level參數來進行調整，設定se=FALSE時，系統將不會對回歸擬合線添加置信域。

> sp<-ggplot(heightweight,aes(x=ageYear,y=heightIn))n> sp+geom_point()+stat_smooth(method=lm,level=0.99)n

> sp+geom_point(colour="grey60")+stat_smooth(method = loess)n> sp+geom_point(colour="grey60")+stat_smooth()n

四、繪製餅圖

dt2<-data.frame(group_by(mtcars,gear)%>%summarise(sum=sum(mpg)))n> dt2n gear sumn1 3 241.6n2 4 294.4n3 5 106.9nn> p<-ggplot(dt2,aes(x="",y=sum,fill=factor(gear)))+geom_bar(stat="identity")+coord_polar(theta = "y")n

R語言可視化之ggplot2擴展包

一、控制圖形的整體外觀函數

1、適用ggtitle（）設置標題，ggtitle（）與使用labs（title=「標題文本」）是一樣的。

p<-ggplot(height weight,aes(x=ageYear,y=heightIn))+geom_point()

p+ggtitle("Age and Height of Schoolchildren")

二、主題

1、使用主題控制圖形整體外觀：

theme_bw （）黑白主題ntheme_grey（）默認主題ntheme_few （）簡約主題n

如果你希望使用一套現成的主題並使用theme（）微調其中的一些部分，則theme（）必須在制定主題的語句之後。否則theme（）的設定會被你添加的主題還原。

2、設置主題：

> p=ggplot(heightweight,aes(x=ageYear,y=heightIn,col=sex))+geom_point()n> p+theme(panel.grid,major=element_line(col="red"))n> p+ggtitle("Plot title here")+n> theme(n> axis.title.x=element_text(col="red",size=14)n> axis.text.x=element_text(col="blue"))n

常用主題及屬性

三、圖例

1、設置圖例名字

p+scale_fill_discrete(name="condition")

2、設置項目順序

p+scale_fill+discrete(limits=c("trt1","trt2","ctr1"))

3、設置圖例位置

p+theme(legend.position="top")

4、刪除圖例

p+scale_fill_discrete(guide=F)

四、坐標軸設置

以x軸為例,分別設置x軸的坐標，名稱，x軸名字，上下兩個邊界值

p+scale_x_continuous(breaks=c(50,60,66,70,72),labels=c("tina","anny","banma","animal","xiaoming"),name="Age",limits=c(0,80))