《R語言實戰》第6章 筆記

1.條形圖:

> #輸入數據n> library(vcd)n> counts <- table(Arthritis$Improved)n> countsnn None Some Marked n 42 14 28 n

(1).簡單條形圖:

> barplot(counts,n+ main="Simple Bar Plot",n+ xlab="Improvement", ylab="Frequency")n

(2).水平條形圖:

> barplot(counts,n+ main="Horizontal Bar Plot",n+ xlab="Frequency", ylab="Improvement",n+ horiz=TRUE)n

(3).堆砌條形圖:

> #輸入數據n> counts <- table(Arthritis$Improved, Arthritis$Treatment)n> countsn n Placebo Treatedn None 29 13n Some 7 7n Marked 7 21n

> barplot(counts,n+ main="Stacked Bar Plot",n+ xlab="Treatment", ylab="Frequency",n+ col=c("red", "yellow","green"),n+ legend=rownames(counts))n

(4).分組條形圖:

> barplot(counts,n+ main="Grouped Bar Plot",n+ xlab="Treatment", ylab="Frequency",n+ col=c("red", "yellow", "green"),n+ legend=rownames(counts), beside=TRUE)n

(5).均值條形圖:

> states <- data.frame(state.region, state.x77)n> means <- aggregate(states$Illiteracy, by=list(state.region), FUN=mean)n> meansn Group.1 xn1 Northeast 1.00n2 South 1.74n3 North Central 0.70n4 West 1.02n> means <- means[order(means$x),]n> meansn Group.1 xn3 North Central 0.70n1 Northeast 1.00n4 West 1.02n2 South 1.74n> barplot(means$x, names.arg=means$Group.1)n> title("Mean Illiteracy Rate")n

條形圖的微調:

> #增加y邊界的大小n> par(mar=c(5,8,4,2))nn> #旋轉條形的標籤n> par(las=2)nn> counts <- table(Arthritis$Improved)n> barplot(counts,n+ main="Treatment Outcome",n+ horiz=TRUE,n+ cex.names=0.8,n+ names.arg=c("No Improvement", "Some Improvement",n+ "Marked Improvement"))n

(6).棘狀圖:

> library(vcd)n> attach(Arthritis)n> counts <- table(Treatment,Improved)n> spine(counts, main="Spinogram Example")n> detach(Arthritis)n

棘狀圖對堆砌條形圖進行了重縮放,這樣每個條形的高度均為1,每一段的高度即表示比例。棘狀圖可由vcd包中的函數spine()繪製。

2.餅圖:

> par(mfrow=c(1, 2))n> slices <- c(10, 12,4, 16, 8)n> lbls <- c("US", "UK", "Australia", "Germany", "France")n> pie(slices, labels = lbls,n+ main="Simple Pie Chart")nn> pct <- round(slices/sum(slices)*100)n> lbls2 <- paste(lbls, " ", pct, "%", sep="")n> pie(slices, labels=lbls2, col=rainbow(length(lbls2)),n+ main="Pie Chart with Percentages")n

3.直方圖:

> par(mfrow=c(2,2))nn> #簡單直方圖n> hist(mtcars$mpg)nn> #指定組數和顏色n> hist(mtcars$mpg,n+ breaks=12,n+ col="red",n+ xlab="Miles Per Gallon",n+ main="Colored histogram with 12 bins")nn> #添加軸須圖n> hist(mtcars$mpg,n+ freq=FALSE,n+ breaks=12,n+ col="red",n+ xlab="Miles Per Gallon",n+ main="Histogram, rug plot, density curve")n> rug(jitter(mtcars$mpg))n> lines(density(mtcars$mpg), col="blue", lwd=2)nn> #添加正態密度曲線和外框n> x <- mtcars$mpgn> h<-hist(x,n+ breaks=12,n+ col="red",n+ xlab="Miles Per Gallon",n+ main="Histogram with normal curve and box")n> xfit<-seq(min(x), max(x), length=40)n> yfit<-dnorm(xfit, mean=mean(x), sd=sd(x))n> yfit <- yfit*diff(h$mids[1:2])*length(x)n> lines(xfit, yfit, col="blue", lwd=2)n> box()n

4.核密度圖:

> par(mfrow=c(2,1))nn> #完全使用默認設置創建最簡圖形n> d <- density(mtcars$mpg)n> plot(d)nn> #添加參數n> d <- density(mtcars$mpg)n> plot(d, main="Kernel Density of Miles Per Gallon")n> polygon(d, col="red", border="blue")n> rug(mtcars$mpg, col="brown")n

使用sm包中的sm.density.compare()函數可向圖形疊加兩組或更多的核密度圖。

5.箱線圖(盒須圖):

> boxplot(mtcars$mpg, main="Box plot", ylab="Miles per Gallon")n

通過繪製連續型變數的五數總括,即最小值、下四分位數(第25百分位數)、中位數(第50百分位數)、上四分位數(第75百分位數)以及最大值,描述了連續型變數的分布。

> #具體的值n> boxplot.stats(mtcars$mpg)n$statsn[1] 10.4 15.3 19.2 22.8 33.9nn$nn[1] 32nn$confn[1] 17.1 21.3nn$outnnumeric(0)n

還可以使用並列箱線圖進行跨組比較。

6.點圖:

> dotchart(mtcars$mpg, labels=row.names(mtcars), cex=.7,n+ main="Gas Mileage for Car Models",n+ xlab="Miles Per Gallon")n

> #分組、排序、著色後的點圖n> x <- mtcars[order(mtcars$mpg),]n> x$cyl <- factor(x$cyl)n> x$color[x$cyl==4] <- "red"n> x$color[x$cyl==6] <- "blue"n> x$color[x$cyl==8] <- "darkgreen"n> dotchart(x$mpg,n+ labels = row.names(x),n+ cex=.7,n+ groups = x$cyl,n+ gcolor = "black",n+ color = x$color,n+ pch=19,n+ main = "Gas Mileage for Car Modelsngrouped by cylinder",n+ xlab = "Miles Per Gallon")n

總結:本章我們學習了描述連續型和類別型變數的方法。通過可視化的方式讓我們可以更好地理解現實世界的數量關係。以上所列出的各種圖形都有各自的優劣勢和較為適合的應用場景。關於圖形,我覺得簡單的圖形可以更容易反映出本質的數量關係,但是喪失了其美觀性,複雜的圖形可能使我們賞心悅目,但是有時候會轉移用戶的注意力,即為過度設計。如何取捨,視實際情況而定。


推薦閱讀:

移動互聯時代,RFM數據模型還有用嗎?
《R語言實戰》第三部分第十二章-重抽樣與自助法學習筆記
《R語言實戰》第四部分第十五章-時間序列學習筆記(II)
Python數據分析及可視化實例之Bokeh與Jupyter生成可視化圖表(8)
市場規模是怎麼估算出來的?都有哪些依據?

TAG:数据分析 | R编程语言 | 数据可视化 |