TabulaMurisSenisData 1.4.0
library(SingleCellExperiment)
library(TabulaMurisSenisData)
library(ggplot2)
This package provides access to the processed bulk and single-cell RNA-seq data
from the Tabula Muris Senis data set
(Schaum et al. 2019; Tabula Muris Consortium 2020). The processed bulk RNA-seq
data was downloaded from GEO (accession number
GSE132040). The
single-cell data (droplet and FACS) was obtained from FigShare (see below for links).
All data sets are provided as SingleCellExperiment
objects for easy access
and use within the Bioconductor ecosystem.
The bulk RNA-seq data can be accessed via the TabulaMurisSenisBulk()
function. By setting the argument infoOnly
to TRUE
, we can get information
about the size of the data set before downloading it.
tmp <- TabulaMurisSenisBulk(infoOnly = TRUE)
#> snapshotDate(): 2022-10-24
#> Total download size: 59.8 MiB
tms_bulk <- TabulaMurisSenisBulk()
#> snapshotDate(): 2022-10-24
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
tms_bulk
#> class: SingleCellExperiment
#> dim: 54352 947
#> metadata(0):
#> assays(1): counts
#> rownames(54352): 0610005C13Rik 0610006L08Rik ... n-TSaga9 n-TStga1
#> rowData names(8): source type ... havana_gene tag
#> colnames(947): A1_384Bulk_Plate1_S1 A1_384Bulk_Plate3_S1 ...
#> P9_384Bulk_Plate2_S369 P9_384Bulk_Plate3_S369
#> colData names(19): Sample name title ... __alignment_not_unique organ
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):
We list the available tissues.
table(colData(tms_bulk)$organ)
#>
#> BAT Bone Brain GAT Heart
#> 54 55 56 56 54
#> Kidney Limb_Muscle Liver Lung MAT
#> 55 54 55 55 56
#> Marrow NA Pancreas SCAT Skin
#> 54 14 56 56 51
#> Small_Intestine Spleen WBC
#> 55 56 55
The data files for the droplet single-cell RNA-seq data were downloaded from FigShare:
We list the available tissues.
listTabulaMurisSenisTissues(dataset = "Droplet")
#> [1] "All" "Large_Intestine" "Pancreas" "Trachea"
#> [5] "Skin" "Fat" "Thymus" "Liver"
#> [9] "Heart_and_Aorta" "Mammary_Gland" "Bladder" "Lung"
#> [13] "Kidney" "Limb_Muscle" "Spleen" "Tongue"
#> [17] "Marrow"
As for the bulk data, we can print the size of the data set before downloading it.
tmp <- TabulaMurisSenisDroplet(tissues = "All", infoOnly = TRUE)
#> snapshotDate(): 2022-10-24
#> Total download size (All): 709.0 MiB
tms_droplet <- TabulaMurisSenisDroplet(tissues = "All")
#> snapshotDate(): 2022-10-24
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> require("rhdf5")
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
tms_droplet
#> $All
#> class: SingleCellExperiment
#> dim: 20138 245389
#> metadata(0):
#> assays(1): counts
#> rownames(20138): Xkr4 Rp1 ... Sly Erdr1
#> rowData names(6): n_cells means ... highly_variable varm
#> colnames(245389): AAACCTGCAGGGTACA-1-0-0-0 AAACCTGCAGTAAGCG-1-0-0-0 ...
#> 10X_P8_15_TTTGTCATCGGCTTGG-1 10X_P8_15_TTTGTCATCTTGTTTG-1
#> colData names(15): age cell ... louvain leiden
#> reducedDimNames(2): PCA UMAP
#> mainExpName: NULL
#> altExpNames(0):
We plot the UMAP of the entire data set and color by tissue, to re-create the plot from here.
# tissue colors
tissue_cols <- c(Pancreas = "#3182bd", Thymus = "#31a354",
Trachea = "#636363", Bladder = "#637939",
Lung = "#7b4173", Large_Intestine = "#843c39",
Fat = "#969696", Tongue = "#a1d99b",
Mammary_Gland = "#ce6dbd", Limb_Muscle = "#d6616b",
Marrow = "#de9ed6", Skin = "#e6550d",
Liver = "#e7969c", Heart_and_Aorta = "#e7ba52",
Kidney = "#e7cb94", Spleen = "#fd8d3c")
# get dataset with all tissues
se <- tms_droplet$All
se
#> class: SingleCellExperiment
#> dim: 20138 245389
#> metadata(0):
#> assays(1): counts
#> rownames(20138): Xkr4 Rp1 ... Sly Erdr1
#> rowData names(6): n_cells means ... highly_variable varm
#> colnames(245389): AAACCTGCAGGGTACA-1-0-0-0 AAACCTGCAGTAAGCG-1-0-0-0 ...
#> 10X_P8_15_TTTGTCATCGGCTTGG-1 10X_P8_15_TTTGTCATCTTGTTTG-1
#> colData names(15): age cell ... louvain leiden
#> reducedDimNames(2): PCA UMAP
#> mainExpName: NULL
#> altExpNames(0):
# prepare data set for ggplot
ds <- as.data.frame(reducedDim(se, "UMAP"))
ds <- cbind(ds, tissue = colData(se)$tissue)
head(ds)
#> UMAP1 UMAP2 tissue
#> AAACCTGCAGGGTACA-1-0-0-0 5.5556602 -10.160711 Tongue
#> AAACCTGCAGTAAGCG-1-0-0-0 2.9584570 -14.145093 Tongue
#> AAACCTGTCATTATCC-1-0-0-0 3.1235533 -14.481063 Tongue
#> AAACGGGGTACAGTGG-1-0-0-0 1.5939721 -14.062417 Tongue
#> AAACGGGGTCTTCTCG-1-0-0-0 0.5233619 -8.997872 Tongue
#> AAAGATGAGCTATGCT-1-0-0-0 1.0210617 -14.642970 Tongue
# plot
ggplot(ds, aes(x = UMAP1, y = UMAP2, color = tissue)) +
geom_point(size = 0.05) +
scale_color_manual(values = tissue_cols) +
theme_classic() +
guides(colour = guide_legend(override.aes = list(size = 5)))