How to use R doing PCA for GWAS

05-02

#This is not a program/package, all data come from Eiji Yamamotos paper.

#PCA

data<- read.csv("data7-hcluster-s3.csv", stringsAsFactors = T,header = F)

#Exclude redundence data and H cluster can handle NA.

data<- data[,-2]

#PCA cannot handle NA, so we need to exclude NA value before transpose the data.

data<- na.omit(data)

#Need to transpose data, put object(Lines need to be clustered) on the first column, and #put variable names(SNPID or markerID) on the first row.

data<- t(data)

#This is what our data looks like, and PCA will not allow nonumerical value in this #matrix or dataframe, so we need to exlude the first row

1 2 4

V1 "SNP ID" "AX-95808842" "AX-95771895"

V5 "SL4" "2" "0"

V6 "SL5" "1" "0"

V7 "SL6" "2" "0"

V8 "SL7" "0" "0"

V9 "SL8" "2" "0"

V10 "SL9" "1" "0"

V11 "SL10" "2" "0"

V12 "SL11" "2" "0"

V13 "SL12" "2" "0"

data<- data[,-1]

#Then we make V1 as the head of the data, and convert type.

data<- as.data.frame(data)

names(data) <- as.matrix(data[1, ])

data <- data[-1, ]

data[] <- lapply(data, function(x) type.convert(as.character(x)))

#summary data and plot

summary(data.pca)

library(ggplot2)

library(plyr)

library(scales)

library(grid)

library(ggbiplot)

g <- ggbiplot(data.pca, obs.scale = 1, var.scale = 1, var.axes = F)

#done