## ----knitsetup, message=FALSE, warning=FALSE, include=FALSE-------------------
knitr::opts_chunk$set(collapse = TRUE, warning = TRUE)

## ----loadData, message=FALSE--------------------------------------------------
# As an example for the quick start, we will load an existing example
# See further down if starting from bigWigs or a feature based object
library(compartmap)

# Load in some example scRNA-seq data from K562
# These data are derived from Johnson and Rhodes et. al 2021 STORM-seq
# They have already been TF-IDF transformed with transformTFIDF()
# See the full workflow below for more details
data("k562_scrna_chr14", package = "compartmap")


## ----processData, message=FALSE-----------------------------------------------

#### Group level inference ####
#Process chr14 of the example K562 scRNA-seq data and infer higher-order chromatin at 1Mb resolution
k562_compartments <- scCompartments(k562_scrna_chr14,
                                    chr = "chr14",
                                    res = 1e6,
                                    group = TRUE,
                                    bootstrap = FALSE,
                                    genome = "hg19",
                                    assay = "rna")

#### Single-cell level inference ####
# To infer higher-order domains in single cells and quantifying sign coherence with the bootstrapping procedure, you can run:
# Sub-sample to 10 cells as an example
k562_scrna_chr14.sub <- k562_scrna_chr14[,sample(colnames(k562_scrna_chr14),
                                                 size = 10, replace = FALSE)]
k562_compartments.boot <- scCompartments(k562_scrna_chr14.sub,
                                         chr = "chr14",
                                         res = 1e6,
                                         group = FALSE,
                                         bootstrap = TRUE,
                                         num.bootstraps = 10,
                                         genome = "hg19",
                                         assay = "rna")

# Flip the domain sign if the sign coherence is discordant in 80% of the bootstraps
k562_compartments.boot.fix <- fixCompartments(k562_compartments.boot,
                                              min.conf = 0.8)

# Look at the first cell in the GRangesList object
k562_compartments.boot.fix[[1]]


## ----postprocessPlot, warning=FALSE-------------------------------------------

# Plot the "fixed" results in cell 1 from above with plotAB
# Include the confidence intervals and median confidence estimate
plotAB(k562_compartments.boot.fix[[1]], chr = "chr14",
       what = "flip.score", with.ci = TRUE, median.conf = TRUE)

# It is known that sometimes, the domains may be inverted relative to orthogonal data
# This is also true in Hi-C and scHi-C domain inference
# One can invert all domains by setting reverse = TRUE in the plotAB call

# plotAB(k562_compartments.boot.fix[[1]], chr = "chr14",
#        what = "flip.score", with.ci = TRUE, median.conf = TRUE,
#        reverse = TRUE)


## ----postprocessInflections, message=FALSE, warning=FALSE---------------------

# Extract single-cell domain inflections
# Domain inflections can be used to look for nearby CTCF sites, etc.
k562_cell_1_inflections <- getDomainInflections(k562_compartments.boot.fix[[1]],
                                                what = "flip.score",
                                                res = 1e6,
                                                chrs = "chr14",
                                                genome = "hg19")
# Show the inflection points
k562_cell_1_inflections


## ----importbw, eval=FALSE-----------------------------------------------------
#  
#  # We can import a list of bigWig files and merge them into a RangedSummarizedExperiment object
#  
#  # list the example bigWigs
#  bigwigs <- list.files(system.file("extdata", package = "compartmap"),
#                        full.names = TRUE)
#  
#  # generate the 1kb bins
#  data("hg19.gr", package = "compartmap")
#  kb_bins <- tileGenome(seqlengths = seqlengths(hg19.gr)["chr14"],
#                        tilewidth = 1000,
#                        cut.last.tile.in.chrom = TRUE)
#  
#  # import
#  bigwigs_lst <- lapply(bigwigs, function(x) {
#    importBigWig(x, bins = kb_bins,
#                 summarize = TRUE, genome = "hg19")
#  })
#  
#  # combine
#  bigwig_rse <- do.call(cbind, bigwigs_lst)
#  colnames(bigwig_rse) <- c("cell_1", "cell_2")
#  

## ----processCounts, warnings=FALSE, message=FALSE-----------------------------

# load the scATAC-seq example
# these data were pre-processed using the csaw package
data("k562_scatac_chr14", package = "compartmap")

# show the overall object structure
# NOTE that the colnames are also there and the assay name is 'counts'
k562_scatac_chr14


## ----rowranges----------------------------------------------------------------
rowRanges(k562_scatac_chr14)

## ----addrr, eval=FALSE--------------------------------------------------------
#  
#  # define a helper function for adding rowRanges
#  # modified from https://github.com/trichelab/velocessor/blob/master/R/import_plate_txis.R
#  # NOTE that you can modify the rtracklayer::import to not subset to gene level
#  # if using feature level information (e.g. a bed file of fragments for scATAC)
#  getRowRanges <- function(gtf, asys) {
#    gxs <- subset(rtracklayer::import(gtf), type=="gene")
#    names(gxs) <- gxs$gene_id
#    granges(gxs)[rownames(asys)]
#  }
#  
#  # import the example HEK293T SingleCellExperiment from the SMART-seq3 paper
#  data("ss3_umi_sce", package = "compartmap")
#  
#  # import the example GTF with the helper function
#  gtf_rowranges <- getRowRanges(system.file("extdata/grch38_91_hsapiens.gtf.gz",
#                                            package = "compartmap"),
#                                ss3_umi_sce)
#  
#  # add rowRanges to the SingleCellExperiment
#  rowRanges(ss3_umi_sce) <- gtf_rowranges
#  
#  # we can now proceed to the next steps and run the compartmap workflow
#  

## ----workflow, message=FALSE, warning=FALSE-----------------------------------

# Load example K562 data imported using importBigWig
data("k562_scrna_raw", package = "compartmap")

# TF-IDF transform the signals
k562_scrna_chr14_tfidf <- transformTFIDF(assay(k562_scrna_se_chr14))

# Add back the TF-IDF counts to the object in the counts slot
assay(k562_scrna_se_chr14, "counts") <- t(k562_scrna_chr14_tfidf)

# Compute chromatin domains at the group level
k562_scrna_chr14_raw_domains <- scCompartments(k562_scrna_se_chr14,
                                               chr = "chr14",
                                               res = 1e6,
                                               group = TRUE,
                                               bootstrap = TRUE,
                                               num.bootstraps = 10,
                                               genome = "hg19",
                                               assay = "rna")

# For single-cells, run the following
# k562_scrna_chr14_raw_domains <- scCompartments(k562_scrna_se_chr14,
#                                                chr = "chr14",
#                                                res = 1e6,
#                                                group = FALSE,
#                                                bootstrap = TRUE,
#                                                num.bootstraps = 10,
#                                                genome = "hg19",
#                                                assay = "rna")

# 'Fix' compartments with discordant sign coherence
k562_scrna_chr14_raw_domains.fix <- fixCompartments(k562_scrna_chr14_raw_domains)

# Plot results
plotAB(k562_scrna_chr14_raw_domains.fix,
       chr = "chr14",
       what = "flip.score",
       with.ci = TRUE,
       median.conf = TRUE)

## ----denoisermt, fig.height=4-------------------------------------------------

# Iterative denoising using Random Matrix Theory approach
k562_scrna_chr14_rmt <- getDenoisedCorMatrix(k562_scrna_chr14,
                                             res = 1e6,
                                             chr = "chr14",
                                             genome = "hg19",
                                             assay = "rna",
                                             iter = 2)

# Plot the interaction map
plotCorMatrix(k562_scrna_chr14_rmt,
              uppertri = TRUE)