Input: matrix where rows are variables and columns are entities (e.g. rows=genes, columns=samples).
Computation: Iteratively search for loadings that maximize the variance (var = average of the squared differences from mean)
Outputs:
(A) Loadings matrix of the normalized contribution of a gene to a PC
(B) Scores matrix
Fetch code and data from website.
Launch R, load data. Alter paths below for your setup ...
source("../../misc/utils.R")
library(ggplot2)
expRun<-utils_loadObject("../../misc/expDat.R")
stRun<-utils_loadObject("../../misc/sampTab.R")
dim(expRun)
## [1] 2000 1000
expRun[1:3,1:3]
## GSM501434 GSM287580 GSM287575
## Copg1 5.848932 5.569252 5.615026
## Atp6v0d1 7.841972 8.928719 8.832163
## Golga7 8.573669 8.924030 8.908230
dim(stRun)
## [1] 1000 10
stRun[1:3,c(2,3,4,10)]
## sample_id sample_name
## GSM501434 GSM501434 LX_ND_10_36_9_CH12LX_T_1_111908_Mouse430_2_RSTMSU601_9
## GSM287580 GSM287580 Memory Tx B cell (FG)_2nd generation screen
## GSM287575 GSM287575 Naive AA4.1- B cell (A)_2nd generation screen
## description1 description6
## GSM501434 bcell blood
## GSM287580 bcell blood
## GSM287575 bcell blood
system.time(xFF2<-prcomp(t(expRun), center=T, scale=T))
## user system elapsed
## 7.328 0.087 7.421
class(xFF2)
## [1] "prcomp"
names(xFF2)
## [1] "sdev" "rotation" "center" "scale" "x"
dim(xFF2$rotation) # loadings
## [1] 2000 1000
dim(xFF2$x) # scores
## [1] 1000 1000
pcaVals2<-xFF2$x
pcaVals2<-data.frame(pcaVals2);
pcaVals2<-cbind(pcaVals2, stRun) # don't worry about this now...
varExpl2<-xFF2$sdev**2 / sum(xFF2$sdev**2)
varExpl2[1:5]
## [1] 0.14946926 0.08713941 0.06243711 0.06170882 0.05702081
ggplot(pcaVals2, aes(x=PC1, y=PC2, colour=as.factor(description1)) ) +
geom_point(pch=19, alpha=3/4, size=1) +
theme_bw() +
guides(colour=guide_legend(ncol=2))
ggplot(pcaVals2, aes(x=PC1, y=PC2, colour=as.factor(description6)) ) +
geom_point(pch=19, alpha=3/4, size=1) +
theme_bw() +
guides(colour=guide_legend(ncol=2))
g1<-names(sort(abs(xFF2$rotation[,1]), decreasing=T)[1:25])
g2<-names(sort(abs(xFF2$rotation[,2]), decreasing=T)[1:25])
g1[1:4]
## [1] "Cct2" "Cct5" "Rbm12" "S100a1"
g2[1:4]
## [1] "Elf1" "Ikbke" "Ifi47" "Kifap3"
library(RColorBrewer)
ColorRamp <- colorRampPalette(rev(brewer.pal(n = 7,name = "RdYlBu")))(100)
pcaVals2<-cbind(pcaVals2, t(expRun))
ggplot(pcaVals2, aes(x=PC1, y=PC2, colour=Cct2) ) +
geom_point(pch=19, alpha=3/4, size=1) +
theme_bw() +
scale_colour_gradientn(colours=ColorRamp)
library(RColorBrewer)
ColorRamp <- colorRampPalette(rev(brewer.pal(n = 7,name = "RdYlBu")))(100)
pcaVals2<-cbind(pcaVals2, t(expRun))
ggplot(pcaVals2, aes(x=PC1, y=PC2, colour=Elf1) ) +
geom_point(pch=19, alpha=3/4, size=1) +
theme_bw() +
scale_colour_gradientn(colours=ColorRamp)
Single cell expression data described here: http://software.10xgenomics.com/files/samples/cell/pbmc3k