Last update: 2018-01-18
Code version: 5996c19850dc5fc65723f30bc33ab1c8d083a7e6
Also loading important libraries and custom functions for analysis.
seq_dir <- "/Volumes/PAULHOOK/sc-da-parkinsons/data"
file_dir <- "/Volumes/PAULHOOK/sc-da-parkinsons/output"
Rdata_dir <- "/Volumes/PAULHOOK/sc-da-parkinsons/data"
Script_dir <- "/Volumes/PAULHOOK/sc-da-parkinsons/code"
source(file.path(Script_dir,'init.R'))
source(file.path(Script_dir,"tools_R.r"))
library(DT)
In order to incorporate other information about expression of genes in SN DA neurons, we downloaded expression data from adult midbrain. This came from La Manno, et al. (2016) courtesy of GEO:GSE76381. This data was not in a very friendly format (an extensive header; see below), so it had to be extensively processed. This data is referred to as “linnarsson” (who is the senior author on the paper) because it was easier to remember.
# Loading in the data
linnarsson.adult <- read.csv(file = file.path(Rdata_dir,"GSE76381_MouseAdultDAMoleculeCounts.cef.txt"), stringsAsFactors = F)
dim(linnarsson.adult)
## [1] 18224 245
# This is a very convoluted header
test <- head(linnarsson.adult)[,1:5]
datatable(test)
In order to better work with this data, we needed to extract the cell annotation. This is contained in the “CELL_ID” and “Cell_type” data seen above. This annotation dataframe will have rows in the same order as columns.
# First, in order to make the data easier to work with, we transpose the data to make it easer to work with
linnarsson.t <- t(linnarsson.adult)
dim(linnarsson.t)
## [1] 245 18224
# Extracting just the columns of cell annotations
linnarsson.cells <- linnarsson.t[,2:3]
# Removing the first two rows (CEF and X1) that are useless
linnarsson.cells <- as_data_frame(linnarsson.cells[-1:-2,])
names(linnarsson.cells) <- c("CELL_ID", "CELL_TYPE")
head(linnarsson.cells)
## # A tibble: 6 x 2
## CELL_ID CELL_TYPE
## <chr> <chr>
## 1 1772072122_A04 DA-VTA4
## 2 1772072122_A05 DA-VTA2
## 3 1772072122_A06 DA-VTA3
## 4 1772072122_A07 DA-SNC
## 5 1772072122_A08 DA-VTA4
## 6 1772072122_A09 DA-VTA1
Now, we need to extract the expression data.
# Now extracting the expression matrix. Start with row 5 as thats where the expression data actually starts.
linnarsson.exprs <- linnarsson.adult[5:nrow(linnarsson.adult),]
#head(linnarsson.exprs)
dim(linnarsson.exprs)
## [1] 18220 245
# Removing the 2nd column which is blank ("X1")
linnarsson.exprs <- linnarsson.exprs[,-2]
#head(linnarsson.exprs)
# Renaming rows as gene names
rownames(linnarsson.exprs) <- linnarsson.exprs[,1]
linnarsson.exprs <- linnarsson.exprs[,-1]
#head(linnarsson.exprs)
dim(linnarsson.exprs)
## [1] 18220 243
Again, the column order of the expression matrix is in the same order as the cellular annotations, so we can just pop the cell names back on back on.
# Adding back in column names
names(linnarsson.exprs) <- linnarsson.cells$CELL_ID
#head(linnarsson.exprs)
# Making sure all columns are 'numeric'
linnarsson.final <- as.data.frame(apply(linnarsson.exprs, 2, as.numeric))
#head(linnarsson.final)
# Making sure rownames are the same
rownames(linnarsson.final) <- rownames(linnarsson.exprs)
#head(linnarsson.final)
Now, we want to just extract ‘DA-SNC’ data and calculate the average expression for each gene
# Extracting cells classified as 'DA-SNC'
lin.snc.cell <- linnarsson.cells[linnarsson.cells$CELL_TYPE == "DA-SNC",]
# Saving list of cell names that were classified as 'DA-SNC'
snc.cells <- lin.snc.cell$CELL_ID
# Extracting the expression for all 'DA-SNC' cells
lin.snc.exprs <- linnarsson.final[,colnames(linnarsson.final) %in% snc.cells]
# Calculating the mean
snc.mean.df <- as.data.frame(apply(lin.snc.exprs, 1, mean))
names(snc.mean.df) <- "snc.mean"
rownames(snc.mean.df) <- rownames(lin.snc.exprs)
head(snc.mean.df)
## snc.mean
## Xkr4 0.2465753
## Rp1 0.0000000
## Sox17 0.0000000
## Mrpl15 1.3013699
## Lypla1 0.8904110
## Tcea1 2.2602740
# Only keeping rownames (genes) that have an average expression >= 0.5. I have to duplicate a column because dataframes do not cooperate.
snc.mean.df$mean.2 <- snc.mean.df$snc.mean
lin.snc.expressed <- row.names(snc.mean.df[snc.mean.df$snc.mean >= 0.5,])
head(lin.snc.expressed)
## [1] "Mrpl15" "Lypla1" "Tcea1" "Atp6v1h" "Oprk1" "Rb1cc1"
# Saving the names of genes that are expressed above 0.5 for later use
saveRDS(object = lin.snc.expressed, file = file.path(Rdata_dir,"lin.snc.expressed.rds"))
sessionInfo()
## R version 3.3.0 (2016-05-03)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid splines stats4 parallel stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] DT_0.2 ggbiplot_0.55 scales_0.5.0
## [4] SC3_1.1.4 ROCR_1.0-7 jackstraw_1.1.1
## [7] lfa_1.2.2 tsne_0.1-3 gridExtra_2.3
## [10] slackr_1.4.2 vegan_2.4-4 permute_0.9-4
## [13] MASS_7.3-47 gplots_3.0.1 RColorBrewer_1.1-2
## [16] Hmisc_4.0-3 Formula_1.2-2 survival_2.41-3
## [19] lattice_0.20-35 Heatplus_2.18.0 Rtsne_0.13
## [22] pheatmap_1.0.8 tidyr_0.7.1 dplyr_0.7.4
## [25] plyr_1.8.4 heatmap.plus_1.3 stringr_1.2.0
## [28] marray_1.50.0 limma_3.28.21 reshape2_1.4.3
## [31] monocle_2.2.0 DDRTree_0.1.5 irlba_2.2.1
## [34] VGAM_1.0-2 ggplot2_2.2.1 Biobase_2.32.0
## [37] BiocGenerics_0.18.0 Matrix_1.2-11
##
## loaded via a namespace (and not attached):
## [1] RSelenium_1.7.1 colorspace_1.3-2 class_7.3-14
## [4] rprojroot_1.2 htmlTable_1.9 corpcor_1.6.9
## [7] base64enc_0.1-3 mvtnorm_1.0-6 codetools_0.2-15
## [10] doParallel_1.0.11 robustbase_0.92-7 knitr_1.17
## [13] jsonlite_1.5 cluster_2.0.6 semver_0.2.0
## [16] shiny_1.0.5 rrcov_1.4-3 httr_1.3.1
## [19] backports_1.1.1 assertthat_0.2.0 lazyeval_0.2.1
## [22] acepack_1.4.1 htmltools_0.3.6 tools_3.3.0
## [25] bindrcpp_0.2 igraph_1.1.2 gtable_0.2.0
## [28] glue_1.1.1 binman_0.1.0 doRNG_1.6.6
## [31] Rcpp_0.12.14 slam_0.1-37 gdata_2.18.0
## [34] nlme_3.1-131 iterators_1.0.8 mime_0.5
## [37] rngtools_1.2.4 gtools_3.5.0 WriteXLS_4.0.0
## [40] XML_3.98-1.9 DEoptimR_1.0-8 yaml_2.1.15
## [43] pkgmaker_0.22 rpart_4.1-11 fastICA_1.2-1
## [46] latticeExtra_0.6-28 stringi_1.1.5 pcaPP_1.9-72
## [49] foreach_1.4.3 e1071_1.6-8 checkmate_1.8.4
## [52] caTools_1.17.1 rlang_0.1.6 pkgconfig_2.0.1
## [55] matrixStats_0.52.2 bitops_1.0-6 qlcMatrix_0.9.5
## [58] evaluate_0.10.1 purrr_0.2.4 bindr_0.1
## [61] htmlwidgets_0.9 magrittr_1.5 R6_2.2.2
## [64] combinat_0.0-8 wdman_0.2.2 foreign_0.8-69
## [67] mgcv_1.8-22 nnet_7.3-12 tibble_1.3.4
## [70] KernSmooth_2.23-15 rmarkdown_1.8 data.table_1.10.4
## [73] HSMMSingleCell_0.106.2 digest_0.6.12 xtable_1.8-2
## [76] httpuv_1.3.5 openssl_0.9.7 munsell_0.4.3
## [79] registry_0.3
This R Markdown site was created with workflowr