P7 cluster specific genes

Last update: 2018-01-18

Code version: 5996c19850dc5fc65723f30bc33ab1c8d083a7e6

Setting important directories

Also loading important libraries and custom functions for analysis.

seq_dir <- "/Volumes/PAULHOOK/sc-da-parkinsons/data"
file_dir <- "/Volumes/PAULHOOK/sc-da-parkinsons/output"
Rdata_dir <- "/Volumes/PAULHOOK/sc-da-parkinsons/data"
Script_dir <- "/Volumes/PAULHOOK/sc-da-parkinsons/code"
source(file.path(Script_dir,'init.R'))
source(file.path(Script_dir,"tools_R.r"))

Loading the P7 all cell .rds

P7.dat.filter <- readRDS(file = file.path(Rdata_dir, "P7.dat.filter.final.Rds"))
nrow(pData(P7.dat.filter)) # 224

## [1] 224

Performing differential expression

P7.Diff.Test <- differentialGeneTest(P7.dat.filter, 
                             fullModelFormulaStr = "~subset.cluster",
                             reducedModelFormulaStr = "~1",
                             relative_expr = FALSE,
                             cores=8,
                             verbose = F)


nrow(P7.Diff.Test)

## [1] 13746

# Only keeping differentially expressed genes with a FDF < 5%
P7.tmp<-P7.Diff.Test[P7.Diff.Test$qval < 0.05,]
# Ordering by q-value
P7.tmp.2 <- P7.tmp[order(P7.tmp$qval),]
P7.Diff.Genes <- as.character(P7.tmp.2$gene_short_name)
length(P7.Diff.Genes) # 3971

## [1] 3971

saveRDS(P7.Diff.Genes, file = file.path(Rdata_dir, "P7.diff.genes.rds"))

t-SNE plots of top 10 differentially expressed genes

q <- myTSNEPlotAlpha(P7.dat.filter,color="subset.cluster", shape="age",markers = P7.Diff.Genes[1:10],scaled = T) + scale_color_brewer(palette="Set1") + ggtitle("P7 - BCV tSNE - Top 10 Differentially Expressed Genes")

## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

Finding specific genes for each cluster

P7.dat.diff.filter <- P7.dat.filter[row.names(P7.tmp.2),]
nrow(P7.dat.diff.filter)

## Features 
##     3971

subset.clusters <- levels(pData(P7.dat.diff.filter)$subset.cluster)
mean.clusters <- data.frame(rownames(exprs(P7.dat.diff.filter)))
percent.clusters <- data.frame(rownames(exprs(P7.dat.diff.filter)))

for(i in 1:length(subset.clusters)){
  tmp.dat <- P7.dat.diff.filter[,pData(P7.dat.diff.filter)$subset.cluster == subset.clusters[i]]

  per.tmp<-as.data.frame(apply(exprs(tmp.dat),1, function(x) length(which(x > 1))/ncol(tmp.dat)))
  colnames(per.tmp) <- paste0(subset.clusters[i],".perc") 

  mean.tmp<- as.data.frame(apply(log2(exprs(tmp.dat)+1),1,mean))
  mean.tmp <- as.data.frame(mean.tmp)
  colnames(mean.tmp) <- paste0(subset.clusters[i],".mean")
  
  mean.clusters <- merge(x = mean.clusters, y = mean.tmp, by.x = "rownames.exprs.P7.dat.diff.filter..", by.y = 0)
  percent.clusters <- merge(x = percent.clusters, y = per.tmp, by.x = "rownames.exprs.P7.dat.diff.filter..", by.y = 0)
}

rownames(mean.clusters) <- mean.clusters$rownames.exprs.P7.dat.diff.filter..
mean.clusters <- mean.clusters[,-1]
mean.clusters <- as.matrix(mean.clusters)

dat.JSD <- .specificity(mat = mean.clusters,logMode = F, relative = F)
dat.JSD <- as.data.frame(dat.JSD)

rownames(percent.clusters) <- percent.clusters$rownames.exprs.P7.dat.diff.filter..
percent.clusters <- percent.clusters[,-1]

merged.df <- merge(x = dat.JSD, y = percent.clusters, by = 0)
row.names(merged.df) <- merged.df$Row.names
merged.df <- merged.df[,-1]

Specificity score

Determining the specificity score to use as a cutoff

color <- c("#F2C318","#E790AC","#F18421","#0168A5","#848483","#A4CAEB","#885793","#008957","#222222")
color.transparent <- adjustcolor(color, alpha.f = 0.3)
bins <- seq(from = 0, to = 1, by= 0.005)

hist(dat.JSD$P7.FB.1.mean, breaks = bins, col = color.transparent[1], main = "P7 Subset Cluster Specificity Distributions", ylab = "Genes", xlab = "Specificity score")
hist(dat.JSD$P7.FB.2.mean, breaks = bins, add = T, col = color.transparent[2])
hist(dat.JSD$P7.MB.1.mean, breaks = bins, add = T, col = color.transparent[3])
hist(dat.JSD$P7.MB.2.mean, breaks = bins, add = T, col = color.transparent[4])
hist(dat.JSD$P7.MB.3.mean, breaks = bins, add = T, col = color.transparent[5])
hist(dat.JSD$P7.MB.4.mean, breaks = bins, add = T, col = color.transparent[6])
hist(dat.JSD$P7.OB.1.mean, breaks = bins, add = T, col = color.transparent[7])
hist(dat.JSD$P7.OB.2.mean, breaks = bins, add = T, col = color.transparent[8])
hist(dat.JSD$P7.OB.3.mean, breaks = bins, add = T, col = color.transparent[9])
abline(v = 0.3, lwd = 2)

Specific genes

Determining specific genes for each cluster and writing out a table

P7.specific.genes <- data.frame()

for(i in 1:9){
  gene.tmp.1 <- merged.df[merged.df[,i] >= 0.3 & merged.df[,i+9] >= 0.4,]
  gene.ordered <- row.names(gene.tmp.1[order(-gene.tmp.1[,i]),])
  cluster.genes <- data.frame(lookupGeneName(P7.dat.filter, gene.ordered))
  P7.specific.genes <- cbindPad(P7.specific.genes, cluster.genes)
}

names(P7.specific.genes) <- c("P7.FB.1","P7.FB.2","P7.MB.1","P7.MB.2","P7.MB.3","P7.MB.4","P7.OB.1","P7.OB.2","P7.OB.3")

write.table(P7.specific.genes, file = file.path(file_dir, "P7.specific.genes.txt"), sep = "\t", quote = F, row.names = F)

Interactive table for specific genes

library(DT)
datatable(P7.specific.genes)

## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## http://rstudio.github.io/DT/server.html

Session Info

sessionInfo()

## R version 3.3.0 (2016-05-03)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      splines   stats4    parallel  stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DT_0.2              ggbiplot_0.55       scales_0.5.0       
##  [4] SC3_1.1.4           ROCR_1.0-7          jackstraw_1.1.1    
##  [7] lfa_1.2.2           tsne_0.1-3          gridExtra_2.3      
## [10] slackr_1.4.2        vegan_2.4-4         permute_0.9-4      
## [13] MASS_7.3-47         gplots_3.0.1        RColorBrewer_1.1-2 
## [16] Hmisc_4.0-3         Formula_1.2-2       survival_2.41-3    
## [19] lattice_0.20-35     Heatplus_2.18.0     Rtsne_0.13         
## [22] pheatmap_1.0.8      tidyr_0.7.1         dplyr_0.7.4        
## [25] plyr_1.8.4          heatmap.plus_1.3    stringr_1.2.0      
## [28] marray_1.50.0       limma_3.28.21       reshape2_1.4.3     
## [31] monocle_2.2.0       DDRTree_0.1.5       irlba_2.2.1        
## [34] VGAM_1.0-2          ggplot2_2.2.1       Biobase_2.32.0     
## [37] BiocGenerics_0.18.0 Matrix_1.2-11      
## 
## loaded via a namespace (and not attached):
##  [1] RSelenium_1.7.1        colorspace_1.3-2       class_7.3-14          
##  [4] rprojroot_1.2          htmlTable_1.9          corpcor_1.6.9         
##  [7] base64enc_0.1-3        mvtnorm_1.0-6          codetools_0.2-15      
## [10] doParallel_1.0.11      robustbase_0.92-7      knitr_1.17            
## [13] jsonlite_1.5           cluster_2.0.6          semver_0.2.0          
## [16] shiny_1.0.5            rrcov_1.4-3            httr_1.3.1            
## [19] backports_1.1.1        assertthat_0.2.0       lazyeval_0.2.1        
## [22] acepack_1.4.1          htmltools_0.3.6        tools_3.3.0           
## [25] bindrcpp_0.2           igraph_1.1.2           gtable_0.2.0          
## [28] glue_1.1.1             binman_0.1.0           doRNG_1.6.6           
## [31] Rcpp_0.12.14           slam_0.1-37            gdata_2.18.0          
## [34] nlme_3.1-131           iterators_1.0.8        mime_0.5              
## [37] rngtools_1.2.4         gtools_3.5.0           WriteXLS_4.0.0        
## [40] XML_3.98-1.9           DEoptimR_1.0-8         yaml_2.1.15           
## [43] pkgmaker_0.22          rpart_4.1-11           fastICA_1.2-1         
## [46] latticeExtra_0.6-28    stringi_1.1.5          pcaPP_1.9-72          
## [49] foreach_1.4.3          e1071_1.6-8            checkmate_1.8.4       
## [52] caTools_1.17.1         rlang_0.1.6            pkgconfig_2.0.1       
## [55] matrixStats_0.52.2     bitops_1.0-6           qlcMatrix_0.9.5       
## [58] evaluate_0.10.1        purrr_0.2.4            bindr_0.1             
## [61] labeling_0.3           htmlwidgets_0.9        magrittr_1.5          
## [64] R6_2.2.2               combinat_0.0-8         wdman_0.2.2           
## [67] foreign_0.8-69         mgcv_1.8-22            nnet_7.3-12           
## [70] tibble_1.3.4           KernSmooth_2.23-15     rmarkdown_1.8         
## [73] data.table_1.10.4      HSMMSingleCell_0.106.2 digest_0.6.12         
## [76] xtable_1.8-2           httpuv_1.3.5           openssl_0.9.7         
## [79] munsell_0.4.3          registry_0.3

This R Markdown site was created with workflowr