## ----biocstyle, echo = FALSE, results = "asis"--------------------------------
BiocStyle::markdown()

## ----init, message = FALSE, echo = FALSE, results = "hide"--------------------
## Silently loading all packages
library(BiocStyle)
library(xcms)
library(faahKO)
library(pander)
## Use socket based parallel processing on Windows systems
## if (.Platform$OS.type == "unix") {
##     register(bpstart(MulticoreParam(3)))
## } else {
##     register(bpstart(SnowParam(3)))
## }
register(SerialParam())


## ----load-libs-pheno, message = FALSE-----------------------------------------
library(xcms)
library(faahKO)
library(RColorBrewer)
library(pander)
library(magrittr)
library(pheatmap)
library(SummarizedExperiment)

## Get the full path to the CDF files
cdfs <- dir(system.file("cdf", package = "faahKO"), full.names = TRUE,
            recursive = TRUE)[c(1, 2, 5, 6, 7, 8, 11, 12)]
## Create a phenodata data.frame
pd <- data.frame(sample_name = sub(basename(cdfs), pattern = ".CDF",
                                   replacement = "", fixed = TRUE),
                 sample_group = c(rep("KO", 4), rep("WT", 4)),
                 stringsAsFactors = FALSE)

## ----load-with-msnbase, message = FALSE---------------------------------------
raw_data <- readMSData(files = cdfs, pdata = new("NAnnotatedDataFrame", pd),
                       mode = "onDisk")

## ----subsetting, message = FALSE, echo = TRUE---------------------------------
raw_data <- filterRt(raw_data, c(2500, 3500))

## ----data-inspection-rtime, message = FALSE-----------------------------------
head(rtime(raw_data))

## ----data-inspection-mz, message = FALSE--------------------------------------
mzs <- mz(raw_data)

## Split the list by file
mzs_by_file <- split(mzs, f = fromFile(raw_data))

length(mzs_by_file)

## ----data-inspection-bpc, message = FALSE, fig.align = "center", fig.width = 12, fig.height = 6----
## Get the base peak chromatograms. This reads data from the files.
bpis <- chromatogram(raw_data, aggregationFun = "max")
## Define colors for the two groups
group_colors <- paste0(brewer.pal(3, "Set1")[1:2], "60")
names(group_colors) <- c("KO", "WT")

## Plot all chromatograms.
plot(bpis, col = group_colors[raw_data$sample_group])

## ----data-inspection-chromatogram, message = FALSE----------------------------
bpi_1 <- bpis[1, 1]
head(rtime(bpi_1))
head(intensity(bpi_1))

## ----data-inspection-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Distribution of total ion currents per file."----
## Get the total ion current by file
tc <- split(tic(raw_data), f = fromFile(raw_data))
boxplot(tc, col = group_colors[raw_data$sample_group],
        ylab = "intensity", main = "Total ion current")

## ----data-inspection-bpc-heatmap, message = FALSE, fig.align = "center", fig.width = 7, fig.height = 6, fig.cap = "Grouping of samples based on similarity of their base peak chromatogram."----
## Bin the BPC
bpis_bin <- MSnbase::bin(bpis, binSize = 2)

## Calculate correlation on the log2 transformed base peak intensities
cormat <- cor(log2(do.call(cbind, lapply(bpis_bin, intensity))))
colnames(cormat) <- rownames(cormat) <- raw_data$sample_name

## Define which phenodata columns should be highlighted in the plot
ann <- data.frame(group = raw_data$sample_group)
rownames(ann) <- raw_data$sample_name

## Perform the cluster analysis
pheatmap(cormat, annotation = ann,
         annotation_color = list(group = group_colors))

## ----peak-detection-plot-eic, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 5, fig.cap = "Extracted ion chromatogram for one peak."----
## Define the rt and m/z range of the peak area
rtr <- c(2700, 2900)
mzr <- c(334.9, 335.1)
## extract the chromatogram
chr_raw <- chromatogram(raw_data, mz = mzr, rt = rtr)
plot(chr_raw, col = group_colors[chr_raw$sample_group])

## ----peak-detection-plot-ms-data, message = FALSE, warning = FALSE, fig.aligh = "center", fig.width = 14, fig.height = 14, fig.cap = "Visualization of the raw MS data for one peak. For each plot: upper panel: chromatogram plotting the intensity values against the retention time, lower panel m/z against retention time plot. The individual data points are colored according to the intensity."----
raw_data %>%
    filterRt(rt = rtr) %>%
    filterMz(mz = mzr) %>%
    plot(type = "XIC")

## ----peak-detection-eic, message = FALSE--------------------------------------
xchr <- findChromPeaks(chr_raw, param = CentWaveParam(snthresh = 2))

## ----peak-detection-eic-chromPeaks--------------------------------------------
head(chromPeaks(xchr))

## ----peak-detection-chromatogram-chromPeakData--------------------------------
chromPeakData(xchr)

## ----peak-detection-eic-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. Peak area of identified chromatographic peaks are highlighted in the sample group color."----
sample_colors <- group_colors[xchr$sample_group]
plot(xchr, col = sample_colors,
     peakBg = sample_colors[chromPeaks(xchr)[, "column"]])


## ----peak-detection-centwave, message = FALSE, results = "hide"---------------
cwp <- CentWaveParam(peakwidth = c(20, 80), noise = 5000,
                     prefilter = c(6, 5000))
xdata <- findChromPeaks(raw_data, param = cwp)

## ----peak-detection-chromPeaks, message = FALSE-------------------------------
head(chromPeaks(xdata))

## ----peak-detection-chromPeakData---------------------------------------------
chromPeakData(xdata)

## ----peak-postprocessing, message = FALSE-------------------------------------
mpp <- MergeNeighboringPeaksParam(expandRt = 4)
xdata_pp <- refineChromPeaks(xdata, mpp)

## ----peak-postprocessing-merged, fig.widht = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The splitted peak was merged into one."----
mzr_1 <- 305.1 + c(-0.01, 0.01)
chr_1 <- chromatogram(filterFile(xdata, 1), mz = mzr_1)
chr_2 <- chromatogram(filterFile(xdata_pp, 1), mz = mzr_1)
par(mfrow = c(1, 2))
plot(chr_1)
plot(chr_2)

## ----peak-postprocessing-not-merged, fig.widht = 10, fig.height = 5, fig.cap = "Result from the peak refinement step. Left: data before processing, right: after refinement. The peaks were not merged."----
mzr_1 <- 496.2 + c(-0.01, 0.01)
chr_1 <- chromatogram(filterFile(xdata, 1), mz = mzr_1)
chr_2 <- chromatogram(filterFile(xdata_pp, 1), mz = mzr_1)
par(mfrow = c(1, 2))
plot(chr_1)
plot(chr_2)

## ----peak-postprocessing-chr, fig..width = 5, fig.height = 5------------------
res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05))
chromPeaks(res)
plot(res)

## -----------------------------------------------------------------------------
xdata <- xdata_pp

## ----peak-detection-peaks-per-sample, message = FALSE, results = "asis"-------
summary_fun <- function(z)
    c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"]))

T <- lapply(split.data.frame(
    chromPeaks(xdata), f = chromPeaks(xdata)[, "sample"]),
    FUN = summary_fun)
T <- do.call(rbind, T)
rownames(T) <- basename(fileNames(xdata))
pandoc.table(
    T,
    caption = paste0("Summary statistics on identified chromatographic",
                     " peaks. Shown are number of identified peaks per",
                     " sample and widths/duration of chromatographic ",
                     "peaks."))

## ----peak-detection-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "Identified chromatographic peaks in the m/z by retention time space for one sample."----
plotChromPeaks(xdata, file = 3)

## ----peak-detection-chrom-peak-image, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Frequency of identified chromatographic peaks along the retention time axis. The frequency is color coded with higher frequency being represented by yellow-white. Each line shows the peak frequency for one file."----
plotChromPeakImage(xdata)

## ----peak-detection-eic-example-peak, message = FALSE-------------------------
chr_ex <- chromatogram(xdata, mz = mzr, rt = rtr)
chromPeaks(chr_ex)

## ----peak-detection-highlight-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. The rectangles indicate the identified chromatographic peaks per sample."----
sample_colors <- group_colors[chr_ex$sample_group]
plot(chr_ex, col = sample_colors, peakType = "rectangle",
     peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]],
     peakBg = NA)

## ----peak-detection-highlight-chrom-peaks-plot-polygon, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. The signal area of identified chromatographic peaks are filled with a color."----
plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2,
     peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]])

## ----peak-detection-chrom-peak-table-selected, message = FALSE, results = "asis"----
pander(chromPeaks(xdata, mz = mzr, rt = rtr),
       caption = paste("Identified chromatographic peaks in a selected ",
                       "m/z and retention time range."))

## ----peak-detection-chrom-peak-intensity-boxplot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Peak intensity distribution per sample."----
## Extract a list of per-sample peak intensities (in log2 scale)
ints <- split(log2(chromPeaks(xdata)[, "into"]),
              f = chromPeaks(xdata)[, "sample"])
boxplot(ints, varwidth = TRUE, col = group_colors[xdata$sample_group],
        ylab = expression(log[2]~intensity), main = "Peak intensities")
grid(nx = NA, ny = NULL)

## ----alignment-obiwarp, message = FALSE, results = "hide"---------------------
xdata <- adjustRtime(xdata, param = ObiwarpParam(binSize = 0.6))

## ----alignment-rtime, message = FALSE-----------------------------------------
## Extract adjusted retention times
head(adjustedRtime(xdata))

## Or simply use the rtime method
head(rtime(xdata))

## ----alignment-obiwarp-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Obiwarp aligned data. Base peak chromatogram after alignment (top) and difference between adjusted and raw retention times along the retention time axis (bottom)."----
## Get the base peak chromatograms.
bpis_adj <- chromatogram(xdata, aggregationFun = "max", include = "none")
par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5))
plot(bpis_adj, col = group_colors[bpis_adj$sample_group])
## Plot also the difference of adjusted to raw retention time.
plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])

## ----alignment-peak-groups-example-peak, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example extracted ion chromatogram before (top) and after alignment (bottom)."----
par(mfrow = c(2, 1))
## Plot the raw data
plot(chr_raw, col = group_colors[chr_raw$sample_group])

## Extract the chromatogram from the adjusted object
chr_adj <- chromatogram(xdata, rt = rtr, mz = mzr)
plot(chr_adj, col = group_colors[chr_raw$sample_group], peakType = "none")

## ----subset-define, message = FALSE, warning = FALSE--------------------------
xdata <- dropAdjustedRtime(xdata)

## Define the experimental layout
xdata$sample_type <- "study"
xdata$sample_type[c(1, 4, 7)] <- "QC"

## ----alignment-subset, message = FALSE, warning = FALSE-----------------------
## Initial peak grouping. Use sample_type as grouping variable
pdp_subs <- PeakDensityParam(sampleGroups = xdata$sample_type,
                             minFraction = 0.9)
xdata <- groupChromPeaks(xdata, param = pdp_subs)

## Define subset-alignment options and perform the alignment
pgp_subs <- PeakGroupsParam(minFraction = 0.85,
                            subset = which(xdata$sample_type == "QC"),
                            subsetAdjust = "average", span = 0.4)
xdata <- adjustRtime(xdata, param = pgp_subs)

## ----alignment-subset-plot-2, message = FALSE, warning = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Subset-alignment results with option average. Difference between adjusted and raw retention times along the retention time axis. Samples on which the alignment models were estimated are shown in green, study samples in grey."----
clrs <- rep("#00000040", 8)
clrs[xdata$sample_type == "QC"] <- c("#00ce0080")
par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5))
plot(chromatogram(xdata, aggregationFun = "sum"),
     col = clrs, peakType = "none")
plotAdjustedRtime(xdata, col = clrs, peakGroupsPch = 1,
                  peakGroupsCol = "#00ce0040")

## ----correspondence-example-slice, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example for peak density correspondence. Upper panel: chromatogram for an mz slice with multiple chromatographic peaks. lower panel: identified chromatographic peaks at their retention time (x-axis) and index within samples of the experiments (y-axis) for different values of the bw parameter. The black line represents the peak density estimate. Grouping of peaks (based on the provided settings) is indicated by grey rectangles."----
## Define the mz slice.
mzr <- c(305.05, 305.15)

## Extract and plot the chromatograms
chr_mzr <- chromatogram(xdata, mz = mzr)
## Define the parameters for the peak density method
pdp <- PeakDensityParam(sampleGroups = xdata$sample_group,
                        minFraction = 0.4, bw = 30)
plotChromPeakDensity(chr_mzr, col = sample_colors, param = pdp,
                     peakBg = sample_colors[chromPeaks(chr_mzr)[, "sample"]],
                     peakCol = sample_colors[chromPeaks(chr_mzr)[, "sample"]],
                     peakPch = 16)


## ----correspondence, message = FALSE------------------------------------------
## Perform the correspondence
pdp <- PeakDensityParam(sampleGroups = xdata$sample_group,
                        minFraction = 0.4, bw = 30)
xdata <- groupChromPeaks(xdata, param = pdp)

## ----quantify-----------------------------------------------------------------
res <- quantify(xdata, value = "into")

## ----quantify-colData---------------------------------------------------------
colData(res)

## ----quantify-rowData---------------------------------------------------------
rowData(res)

## -----------------------------------------------------------------------------
head(assay(res))

## ----correspondence-featureDefs, message = FALSE------------------------------
## Extract the feature definitions
featureDefinitions(xdata)

## ----correspondence-feature-values, message = FALSE---------------------------
## Extract the into column for each feature.
head(featureValues(xdata, value = "into"))

## ----fill-chrom-peaks, message = FALSE----------------------------------------
xdata <- fillChromPeaks(xdata, param = ChromPeakAreaParam())

head(featureValues(xdata))

## ----fill-chrom-peaks-compare, message = FALSE--------------------------------
## Missing values before filling in peaks
apply(featureValues(xdata, filled = FALSE), MARGIN = 2,
      FUN = function(z) sum(is.na(z)))

## Missing values after filling in peaks
apply(featureValues(xdata), MARGIN = 2,
      FUN = function(z) sum(is.na(z)))


## ----export-result, eval = FALSE, echo = FALSE--------------------------------
#  save(xdata, file = "xdata.RData")

## ----featureSummary, message = FALSE------------------------------------------
head(featureSummary(xdata, group = xdata$sample_group))

## -----------------------------------------------------------------------------
assays(res)$raw_filled <- featureValues(xdata, filled = TRUE)

## -----------------------------------------------------------------------------
assayNames(res)

head(assay(res, "raw"))
head(assay(res, "raw_filled"))

## ----featureChromatograms, message = FALSE------------------------------------
feature_chroms <- featureChromatograms(xdata, features = 1:4)

feature_chroms

## ----feature-eic, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "Extracted ion chromatograms for features 1 to 4."----
plot(feature_chroms, col = sample_colors,
     peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]])


## -----------------------------------------------------------------------------
eic_2 <- feature_chroms[2, ]
chromPeaks(eic_2)

## ----final-pca, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "PCA for the faahKO data set, un-normalized intensities."----
## Extract the features and log2 transform them
ft_ints <- log2(assay(res, "raw_filled"))

## Perform the PCA omitting all features with an NA in any of the
## samples. Also, the intensities are mean centered.
pc <- prcomp(t(na.omit(ft_ints)), center = TRUE)

## Plot the PCA
cols <- group_colors[xdata$sample_group]
pcSummary <- summary(pc)
plot(pc$x[, 1], pc$x[,2], pch = 21, main = "",
     xlab = paste0("PC1: ", format(pcSummary$importance[2, 1] * 100,
                                   digits = 3), " % variance"),
     ylab = paste0("PC2: ", format(pcSummary$importance[2, 2] * 100,
                                   digits = 3), " % variance"),
     col = "darkgrey", bg = cols, cex = 2)
grid()
text(pc$x[, 1], pc$x[,2], labels = xdata$sample_name, col = "darkgrey",
     pos = 3, cex = 2)


## ----processhistory, message = FALSE------------------------------------------
processHistory(xdata)

## ----processhistory-select, message = FALSE-----------------------------------
ph <- processHistory(xdata, type = "Retention time correction")

ph

## ----processhistory-param, message = FALSE------------------------------------
## Access the parameter
processParam(ph[[1]])


## ----subset-filterFile, message = FALSE---------------------------------------
subs <- filterFile(xdata, file = c(2, 4))

## Do we have identified chromatographic peaks?
hasChromPeaks(subs)

## ----subset-filterFile-2, message = FALSE-------------------------------------
## Do we still have features?
hasFeatures(subs)

## Do we still have adjusted retention times?
hasAdjustedRtime(subs)

## ----subset-filterFile-3, message = FALSE-------------------------------------
subs <- filterFile(xdata, keepAdjustedRtime = TRUE)

hasAdjustedRtime(subs)

## ----subset-filterRt, message = FALSE-----------------------------------------
subs <- filterRt(xdata, rt = c(3000, 3500))

range(rtime(subs))

## ----subset-filterRt-2, message = FALSE---------------------------------------
hasAdjustedRtime(subs)

## ----subset-filterRt-3, message = FALSE---------------------------------------
hasChromPeaks(subs)

range(chromPeaks(subs)[, "rt"])

## ----subset-bracket, message = FALSE, warning = FALSE-------------------------
## Extract all data from the 3rd file.
one_file <- filterFile(xdata, file = 3)

one_file_2 <- xdata[fromFile(xdata) == 3]

## Is the content the same?
all.equal(one_file[[2]], one_file_2[[2]])

## ----subset-bracket-peaks, message = FALSE------------------------------------
## Subsetting with filterFile preserves chromatographic peaks
head(chromPeaks(one_file))

## Subsetting with [ not
head(chromPeaks(one_file_2))

## ----subset-bracket-keepAdjustedRtime, message = FALSE, warnings = FALSE------
subs <- xdata[20:30, keepAdjustedRtime = TRUE]

hasAdjustedRtime(subs)

## Access adjusted retention times:
rtime(subs)

## Access raw retention times:
rtime(subs, adjusted = FALSE)

## ----subset-double-bracket, message = FALSE-----------------------------------
## Extract a single spectrum
xdata[[14]]

## ----subset-split, message = FALSE--------------------------------------------
x_list <- split(xdata, f = fromFile(xdata), keepAdjustedRtime = TRUE)

lengths(x_list)

lapply(x_list, hasAdjustedRtime)

## ----multicore, message = FALSE, eval = FALSE---------------------------------
#  register(bpstart(MulticoreParam(2)))

## ----snow, message = FALSE, eval = FALSE--------------------------------------
#  register(bpstart(SnowParam(2)))