## ---- echo=FALSE, include=FALSE----------------------------------------------- library(knitr) knitr::opts_chunk$set(cache = TRUE, warning = FALSE, message = FALSE, cache.lazy = FALSE) library(dplyr) library(tidyr) library(tibble) library(magrittr) library(ggplot2) library(ggrepel) library(tidybulk) library(tidySummarizedExperiment) my_theme = theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size=12), legend.position="bottom", aspect.ratio=1, strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)) ) tibble_counts = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() ## ----eval=FALSE--------------------------------------------------------------- # BiocManager::install("tidybulk") ## ---- eval=FALSE-------------------------------------------------------------- # devtools::install_github("stemangiola/tidybulk") ## ----------------------------------------------------------------------------- counts_SE ## ----------------------------------------------------------------------------- class(counts_SE) ## ----eval=FALSE--------------------------------------------------------------- # counts_SE %>% get_bibliography() ## ----aggregate, message=FALSE, warning=FALSE, results='hide', class.source='yellow'---- rowData(counts_SE)$gene_name = rownames(counts_SE) counts_SE.aggr = counts_SE %>% aggregate_duplicates(.transcript = gene_name) ## ----normalise---------------------------------------------------------------- counts_SE.norm = counts_SE.aggr %>% identify_abundant(factor_of_interest = condition) %>% scale_abundance() ## ---- include=FALSE----------------------------------------------------------- counts_SE.norm %>% select(`count`, count_scaled, .abundant, everything()) ## ----plot_normalise----------------------------------------------------------- counts_SE.norm %>% ggplot(aes(count_scaled + 1, group=sample, color=`Cell.type`)) + geom_density() + scale_x_log10() + my_theme ## ----filter variable---------------------------------------------------------- counts_SE.norm.variable = counts_SE.norm %>% keep_variable() ## ----filter variable long, eval=FALSE----------------------------------------- # library(edgeR) # # x = norm_counts.table # # s <- rowMeans((x-rowMeans(x))^2) # o <- order(s,decreasing=TRUE) # x <- x[o[1L:top],,drop=FALSE] # # norm_counts.table = norm_counts.table[rownames(x)] # # norm_counts.table$cell_type = tibble_counts[ # match( # tibble_counts$sample, # rownames(norm_counts.table) # ), # "Cell.type" # ] ## ----mds---------------------------------------------------------------------- counts_SE.norm.MDS = counts_SE.norm %>% reduce_dimensions(method="MDS", .dims = 6) ## ---- eval = FALSE------------------------------------------------------------ # library(limma) # # count_m_log = log(count_m + 1) # cmds = limma::plotMDS(ndim = .dims, plot = FALSE) # # cmds = cmds %$% # cmdscale.out %>% # setNames(sprintf("Dim%s", 1:6)) # # cmds$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(cmds)), # "Cell.type" # ] ## ----plot_mds----------------------------------------------------------------- counts_SE.norm.MDS %>% pivot_sample() %>% select(contains("Dim"), everything()) counts_SE.norm.MDS %>% pivot_sample() %>% GGally::ggpairs(columns = 6:(6+5), ggplot2::aes(colour=`Cell.type`)) ## ----pca, message=FALSE, warning=FALSE, results='hide'------------------------ counts_SE.norm.PCA = counts_SE.norm %>% reduce_dimensions(method="PCA", .dims = 6) ## ----eval=FALSE--------------------------------------------------------------- # count_m_log = log(count_m + 1) # pc = count_m_log %>% prcomp(scale = TRUE) # variance = pc$sdev^2 # variance = (variance / sum(variance))[1:6] # pc$cell_type = counts[ # match(counts$sample, rownames(pc)), # "Cell.type" # ] ## ----plot_pca----------------------------------------------------------------- counts_SE.norm.PCA %>% pivot_sample() %>% select(contains("PC"), everything()) counts_SE.norm.PCA %>% pivot_sample() %>% GGally::ggpairs(columns = 11:13, ggplot2::aes(colour=`Cell.type`)) ## ----tsne, message=FALSE, warning=FALSE, results='hide'----------------------- counts_SE.norm.tSNE = breast_tcga_mini_SE %>% identify_abundant() %>% reduce_dimensions( method = "tSNE", perplexity=10, pca_scale =TRUE ) ## ---- eval=FALSE-------------------------------------------------------------- # count_m_log = log(count_m + 1) # # tsne = Rtsne::Rtsne( # t(count_m_log), # perplexity=10, # pca_scale =TRUE # )$Y # tsne$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(tsne)), # "Cell.type" # ] ## ----------------------------------------------------------------------------- counts_SE.norm.tSNE %>% pivot_sample() %>% select(contains("tSNE"), everything()) counts_SE.norm.tSNE %>% pivot_sample() %>% ggplot(aes(x = `tSNE1`, y = `tSNE2`, color=Call)) + geom_point() + my_theme ## ----rotate------------------------------------------------------------------- counts_SE.norm.MDS.rotated = counts_SE.norm.MDS %>% rotate_dimensions(`Dim1`, `Dim2`, rotation_degrees = 45, action="get") ## ---- eval=FALSE-------------------------------------------------------------- # rotation = function(m, d) { # r = d * pi / 180 # ((bind_rows( # c(`1` = cos(r), `2` = -sin(r)), # c(`1` = sin(r), `2` = cos(r)) # ) %>% as_matrix) %*% m) # } # mds_r = pca %>% rotation(rotation_degrees) # mds_r$cell_type = counts[ # match(counts$sample, rownames(mds_r)), # "Cell.type" # ] ## ----plot_rotate_1------------------------------------------------------------ counts_SE.norm.MDS.rotated %>% ggplot(aes(x=`Dim1`, y=`Dim2`, color=`Cell.type` )) + geom_point() + my_theme ## ----plot_rotate_2------------------------------------------------------------ counts_SE.norm.MDS.rotated %>% pivot_sample() %>% ggplot(aes(x=`Dim1_rotated_45`, y=`Dim2_rotated_45`, color=`Cell.type` )) + geom_point() + my_theme ## ----de, message=FALSE, warning=FALSE, results='hide'------------------------- counts_SE.de = counts_SE %>% test_differential_abundance( ~ condition, action="get") counts_SE.de ## ---- eval=FALSE-------------------------------------------------------------- # library(edgeR) # # dgList <- DGEList(counts=counts_m,group=group) # keep <- filterByExpr(dgList) # dgList <- dgList[keep,,keep.lib.sizes=FALSE] # dgList <- calcNormFactors(dgList) # design <- model.matrix(~group) # dgList <- estimateDisp(dgList,design) # fit <- glmQLFit(dgList,design) # qlf <- glmQLFTest(fit,coef=2) # topTags(qlf, n=Inf) ## ----de contrast, message=FALSE, warning=FALSE, results='hide', eval=FALSE---- # counts_SE.de = # counts_SE %>% # identify_abundant(factor_of_interest = condition) %>% # test_differential_abundance( # ~ 0 + condition, # .contrasts = c( "conditionTRUE - conditionFALSE"), # action="get" # ) ## ----adjust, message=FALSE, warning=FALSE, results='hide'--------------------- counts_SE.norm.adj = counts_SE.norm %>% adjust_abundance( ~ factor_of_interest + batch) ## ---- eval=FALSE-------------------------------------------------------------- # library(sva) # # count_m_log = log(count_m + 1) # # design = # model.matrix( # object = ~ factor_of_interest + batch, # data = annotation # ) # # count_m_log.sva = # ComBat( # batch = design[,2], # mod = design, # ... # ) # # count_m_log.sva = ceiling(exp(count_m_log.sva) -1) # count_m_log.sva$cell_type = counts[ # match(counts$sample, rownames(count_m_log.sva)), # "Cell.type" # ] # ## ----cibersort---------------------------------------------------------------- counts_SE.cibersort = counts_SE %>% deconvolve_cellularity(action="get", cores=1, prefix = "cibersort__") ## ---- eval=FALSE-------------------------------------------------------------- # # source(‘CIBERSORT.R’) # count_m %>% write.table("mixture_file.txt") # results <- CIBERSORT( # "sig_matrix_file.txt", # "mixture_file.txt", # perm=100, QN=TRUE # ) # results$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(results)), # "Cell.type" # ] # ## ----plot_cibersort----------------------------------------------------------- counts_SE.cibersort %>% pivot_longer( names_to= "Cell_type_inferred", values_to = "proportion", names_prefix ="cibersort__", cols=contains("cibersort__") ) %>% ggplot(aes(x=`Cell_type_inferred`, y=proportion, fill=`Cell.type`)) + geom_boxplot() + facet_wrap(~`Cell.type`) + my_theme + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), aspect.ratio=1/5) ## ----DC, eval=FALSE----------------------------------------------------------- # # counts_SE %>% # test_differential_cellularity(. ~ condition ) # ## ----DC_censored, eval=FALSE-------------------------------------------------- # # counts_SE %>% # test_differential_cellularity(survival::Surv(time, dead) ~ .) # ## ----cluster------------------------------------------------------------------ counts_SE.norm.cluster = counts_SE.norm.MDS %>% cluster_elements(method="kmeans", centers = 2, action="get" ) ## ---- eval=FALSE-------------------------------------------------------------- # count_m_log = log(count_m + 1) # # k = kmeans(count_m_log, iter.max = 1000, ...) # cluster = k$cluster # # cluster$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(cluster)), # c("Cell.type", "Dim1", "Dim2") # ] # ## ----plot_cluster------------------------------------------------------------- counts_SE.norm.cluster %>% ggplot(aes(x=`Dim1`, y=`Dim2`, color=`cluster_kmeans`)) + geom_point() + my_theme ## ----SNN, eval=FALSE, cache=TRUE, message=FALSE, warning=FALSE, results='hide'---- # counts_SE.norm.SNN = # counts_SE.norm.tSNE %>% # cluster_elements(method = "SNN") ## ---- eval=FALSE-------------------------------------------------------------- # library(Seurat) # # snn = CreateSeuratObject(count_m) # snn = ScaleData( # snn, display.progress = TRUE, # num.cores=4, do.par = TRUE # ) # snn = FindVariableFeatures(snn, selection.method = "vst") # snn = FindVariableFeatures(snn, selection.method = "vst") # snn = RunPCA(snn, npcs = 30) # snn = FindNeighbors(snn) # snn = FindClusters(snn, method = "igraph", ...) # snn = snn[["seurat_clusters"]] # # snn$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(snn)), # c("Cell.type", "Dim1", "Dim2") # ] # ## ----SNN_plot, eval=FALSE----------------------------------------------------- # counts_SE.norm.SNN %>% # pivot_sample() %>% # select(contains("tSNE"), everything()) # # counts_SE.norm.SNN %>% # pivot_sample() %>% # gather(source, Call, c("cluster_SNN", "Call")) %>% # distinct() %>% # ggplot(aes(x = `tSNE1`, y = `tSNE2`, color=Call)) + geom_point() + facet_grid(~source) + my_theme # # # # Do differential transcription between clusters # counts_SE.norm.SNN %>% # mutate(factor_of_interest = `cluster_SNN` == 3) %>% # test_differential_abundance( # ~ factor_of_interest, # action="get" # ) ## ----drop--------------------------------------------------------------------- counts_SE.norm.non_redundant = counts_SE.norm.MDS %>% remove_redundancy( method = "correlation" ) ## ---- eval=FALSE-------------------------------------------------------------- # library(widyr) # # .data.correlated = # pairwise_cor( # counts, # sample, # transcript, # rc, # sort = TRUE, # diag = FALSE, # upper = FALSE # ) %>% # filter(correlation > correlation_threshold) %>% # distinct(item1) %>% # rename(!!.element := item1) # # # Return non redudant data frame # counts %>% anti_join(.data.correlated) %>% # spread(sample, rc, - transcript) %>% # left_join(annotation) # # # ## ----plot_drop---------------------------------------------------------------- counts_SE.norm.non_redundant %>% pivot_sample() %>% ggplot(aes(x=`Dim1`, y=`Dim2`, color=`Cell.type`)) + geom_point() + my_theme ## ----drop2-------------------------------------------------------------------- counts_SE.norm.non_redundant = counts_SE.norm.MDS %>% remove_redundancy( method = "reduced_dimensions", Dim_a_column = `Dim1`, Dim_b_column = `Dim2` ) ## ----plot_drop2--------------------------------------------------------------- counts_SE.norm.non_redundant %>% pivot_sample() %>% ggplot(aes(x=`Dim1`, y=`Dim2`, color=`Cell.type`)) + geom_point() + my_theme ## ----eval=FALSE--------------------------------------------------------------- # counts = tidybulk_SAM_BAM( # file_names, # genome = "hg38", # isPairedEnd = TRUE, # requireBothEndsMapped = TRUE, # checkFragLength = FALSE, # useMetaFeatures = TRUE # ) ## ----ensembl------------------------------------------------------------------ counts_ensembl %>% ensembl_to_symbol(ens) ## ----description-------------------------------------------------------------- counts_SE %>% describe_transcript() %>% select(feature, description, everything()) ## ----------------------------------------------------------------------------- sessionInfo()