## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----installation, eval=FALSE------------------------------------------------- # if(!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("GeoDiff") ## ----setup, results='hide', warning=FALSE, message=FALSE---------------------- library(GeoDiff) library(dplyr) library(ggplot2) library(NanoStringNCTools) library(GeomxTools) library(Biobase) library(reshape2) ## ----load data---------------------------------------------------------------- data("kidney") kidney head(pData(kidney)) table(pData(kidney)$`slide name`) table(pData(kidney)$region) ## ----------------------------------------------------------------------------- kidney <- kidney[, which(kidney$`slide name` %in% c("disease3", "normal3"))][, c(1:4, 48:51, 60:63, 115:118)] table(kidney$region, kidney$`slide name`) table(kidney$`slide name`, kidney$class) ## ---- probe level------------------------------------------------------------- featureType(kidney) paste("## of Negative Probes:", sum(fData(kidney)$Negative)) ## ----------------------------------------------------------------------------- kidney <- fitPoisBG(kidney) summary(pData(kidney)$sizefact) summary(fData(kidney)$featfact[fData(kidney)$Negative]) ## ----------------------------------------------------------------------------- set.seed(123) kidney_diag <- diagPoisBG(kidney) notes(kidney_diag)$disper ## ---- eval=FALSE-------------------------------------------------------------- # which(assayDataElement(kidney_diag, "low_outlier") == 1, arr.ind = TRUE) # which(assayDataElement(kidney_diag, "up_outlier") == 1, arr.ind = TRUE) ## ----------------------------------------------------------------------------- kidney <- fitPoisBG(kidney, groupvar = "slide name") ## ----------------------------------------------------------------------------- set.seed(123) kidney_diag <- diagPoisBG(kidney, split = TRUE) notes(kidney_diag)$disper_sp ## ----------------------------------------------------------------------------- all0probeidx <- which(rowSums(exprs(kidney))==0) if (length(all0probeidx) > 0) { kidney <- kidney[-all0probeidx, ] } kidney <- aggreprobe(kidney, use = "cor") ## ----------------------------------------------------------------------------- kidney <- BGScoreTest(kidney) sum(fData(kidney)[["pvalues"]] < 1e-3, na.rm = TRUE) ## ----------------------------------------------------------------------------- kidneySplit <- BGScoreTest(kidney, split = TRUE, removeoutlier = FALSE, useprior = FALSE) sum(fData(kidneySplit)[["pvalues"]] < 1e-3, na.rm = TRUE) kidneyOutliers <- BGScoreTest(kidney, split = FALSE, removeoutlier = TRUE, useprior = FALSE) sum(fData(kidneyOutliers)[["pvalues"]] < 1e-3, na.rm = TRUE) kidneyPrior <- BGScoreTest(kidney, split = FALSE, removeoutlier = FALSE, useprior = TRUE) sum(fData(kidneyPrior)[["pvalues"]] < 1e-3, na.rm = TRUE) ## ----------------------------------------------------------------------------- set.seed(123) kidney <- fitNBth(kidney) features_high <- rownames(fData(kidney))[fData(kidney)$feature_high_fitNBth == 1] length(features_high) ## ----------------------------------------------------------------------------- bgMean <- mean(fData(kidney)$featfact, na.rm = TRUE) notes(kidney)[["threshold"]] bgMean ## ----------------------------------------------------------------------------- cor(kidney$sizefact, kidney$sizefact_fitNBth) plot(kidney$sizefact, kidney$sizefact_fitNBth, xlab = "Background Size Factor", ylab = "Signal Size Factor") abline(a = 0, b = 1) ## ----------------------------------------------------------------------------- # get only biological probes posdat <- kidney[-which(fData(kidney)$CodeClass == "Negative"), ] posdat <- exprs(posdat) quan <- sapply(c(0.75, 0.8, 0.9, 0.95), function(y) apply(posdat, 2, function(x) quantile(x, probs = y))) corrs <- apply(quan, 2, function(x) cor(x, kidney$sizefact_fitNBth)) names(corrs) <- c(0.75, 0.8, 0.9, 0.95) corrs quan75 <- apply(posdat, 2, function(x) quantile(x, probs = 0.75)) ## ----------------------------------------------------------------------------- kidney <- QuanRange(kidney, split = FALSE, probs = c(0.75, 0.8, 0.9, 0.95)) corrs <- apply(pData(kidney)[, as.character(c(0.75, 0.8, 0.9, 0.95))], 2, function(x) cor(x, kidney$sizefact_fitNBth)) names(corrs) <- c(0.75, 0.8, 0.9, 0.95) corrs ## ----------------------------------------------------------------------------- ROIs_high <- sampleNames(kidney)[which((quantile(fData(kidney)[["para"]][, 1], probs = 0.90, na.rm = TRUE) - notes(kidney)[["threshold"]])*kidney$sizefact_fitNBth>2)] features_all <- rownames(posdat) ## ----------------------------------------------------------------------------- NBthDEmod <- fitNBthDE(form = ~region, split = FALSE, object = kidney) str(NBthDEmod) ## ----------------------------------------------------------------------------- pData(kidney)$region <- factor(pData(kidney)$region, levels=c("glomerulus", "tubule")) table(pData(kidney)[, c("region", "slide name")]) features_high_subset <- features_high[1:30] ## ---- message=FALSE----------------------------------------------------------- set.seed(123) NBthmDEmod <- fitNBthmDE(object = kidney, form = ~ region+(1|`slide name`), ROIs_high = ROIs_high, split = FALSE, features_all = features_high_subset, preci1=NBthDEmod$preci1, threshold_mean = bgMean, sizescale = TRUE, controlRandom=list(nu=12, nmh_e=400, thin_e=60)) str(NBthmDEmod) ## ---- message=FALSE----------------------------------------------------------- set.seed(123) NBthmDEmodslope <- fitNBthmDE(object = kidney, form = ~ region+(1+region|`slide name`), ROIs_high = ROIs_high, split = FALSE, features_all = features_high_subset, preci1=NBthDEmod$preci1, threshold_mean = bgMean, sizescale = TRUE, controlRandom=list(nu=12, nmh_e=400, thin_e=60)) ## ---- fig.height=4, fig.width=4----------------------------------------------- plot(NBthDEmod$para[2,names(NBthmDEmod$para[2,])], NBthmDEmod$para[2,], xlab = "Fixed Effect Model Output Parameters", ylab = "Mixed Effect Model Output Parameters") abline(a=0,b=1) plot(NBthDEmod$para[2,names(NBthmDEmodslope$para[2,])], NBthmDEmodslope$para[2,], xlab = "Fixed Effect Model Output Parameters", ylab = "Random Slope Model Output Parameters") abline(a=0,b=1) ## ----------------------------------------------------------------------------- diff_high <- names(which(abs(NBthDEmod$para[2,names(NBthmDEmodslope$para[2,])]- NBthmDEmodslope$para[2,])>0.6)) diff_high set.seed(123) NBthmDEmodslope$theta[3, "ACADM"] annot <- pData(kidney) annot$ACADM <- posdat["ACADM",] ## ---- fig.height=4, fig.width=4----------------------------------------------- plot_dat <- annot[,c("region", "ACADM", "slide name")] p <- ggplot(plot_dat, aes(x=`slide name`, y=ACADM, fill=region)) + geom_boxplot() plot(p) ## ----------------------------------------------------------------------------- coeff <- coefNBth(NBthDEmod) coefr <- coefNBth(NBthmDEmod) coefrslope <- coefNBth(NBthmDEmodslope) str(coeff) ## ----------------------------------------------------------------------------- rownames(coeff$estimate)[-1] ## ----------------------------------------------------------------------------- DEtab <- DENBth(coeff, variable = "regiontubule") DEtabr <- DENBth(coefr, variable = "regiontubule") DEtabrslope <- DENBth(coefrslope, variable = "regiontubule") head(DEtab) ## ----------------------------------------------------------------------------- set.seed(123) names(assayData(kidney)) kidney <- fitPoisthNorm(object = kidney, ROIs_high = ROIs_high, threshold_mean = bgMean, sizescalebythreshold = TRUE) names(assayData(kidney)) head(fData(kidney)[,(ncol(fData(kidney))-6):ncol(fData(kidney))]) head(pData(kidney)) ## ----------------------------------------------------------------------------- set.seed(123) kidney <- fitPoisthNorm(object = kidney, split = TRUE, ROIs_high = ROIs_high, threshold_mean = bgMean, sizescalebythreshold = TRUE) names(assayData(kidney)) ## ----------------------------------------------------------------------------- norm_dat_backqu75 <- sweep(posdat[, ROIs_high], 2, (kidney[, ROIs_high]$sizefact * bgMean), FUN = "-") %>% sweep(., 2, quan75[ROIs_high], FUN = "/") %>% pmax(., 0) %>% `+`(., 0.01) %>% log2() ## ---- fig.height=4, fig.width=4----------------------------------------------- dat_plot <- cbind(pData(kidney)[ROIs_high, c("slide name", "region")], t(norm_dat_backqu75[features_all, ])) dat_plot <- cbind(dat_plot, ROI_ID = ROIs_high) dat_plot <- melt(dat_plot, id.vars = c("ROI_ID", "slide name", "region")) ggplot(dat_plot, aes(x = value)) + geom_density(aes(fill = region, group = ROI_ID, alpha = 0.01)) + facet_grid(~`slide name`) + ggtitle("Q3 Normalization")+ labs(x = "Q3 Normalized Value (log2)") ## ---- fig.height=4, fig.width=4----------------------------------------------- annot <- pData(kidney) dat_plot <- cbind(annot[ROIs_high, c("slide name", "region")], t(assayDataElement(kidney[features_high, ROIs_high], "normmat_sp"))) dat_plot <- cbind(dat_plot, ROI_ID = ROIs_high) dat_plot <- melt(dat_plot, id.vars = c("ROI_ID", "slide name", "region")) ggplot(dat_plot, aes(x = value)) + geom_density(aes(fill = region, group = ROI_ID, alpha = 0.01)) + facet_wrap(~`slide name`) + ggtitle("Poisson threshold normalization")+ labs(x = "Poisson Threshold Normalized Value (log2)") ## ---- fig.height=4, fig.width=4----------------------------------------------- dat <- t(norm_dat_backqu75[features_high, ]) dat_pca <- prcomp(dat, center = TRUE, scale. = TRUE) dat <- as.data.frame(dat) dat$PC1 <- dat_pca$x[, 1] dat$PC2 <- dat_pca$x[, 2] dat$id <- annot$`slide name`[match(ROIs_high, colnames(posdat))] dat$class <- annot$class[match(ROIs_high, colnames(posdat))] dat$region <- annot$region[match(ROIs_high, colnames(posdat))] dat$sizeratio <- kidney[, ROIs_high]$sizefact_fitNBth / kidney[, ROIs_high]$sizefact p <- ggplot(data = dat, aes(x = PC1, y = PC2)) + geom_point(aes(colour = paste(class, region))) + theme_bw()+ labs(title = "Q3 Normalized Data") plot(p) p <- ggplot(data = dat, aes(x = PC1, y = PC2)) + geom_point(aes(colour = log2(sizeratio))) + theme_bw()+ scale_color_gradient2(high = "gold", mid = "grey50", low = "darkblue", midpoint = 0.2)+ labs(title = "Q3 Normalized Data") plot(p) ## ---- fig.height=4, fig.width=4----------------------------------------------- dat <- t(assayDataElement(kidney[features_high, ROIs_high],"normmat_sp")) dat_pca <- prcomp(dat, center = TRUE, scale. = TRUE) dat <- as.data.frame(dat) dat$PC1 <- dat_pca$x[, 1] dat$PC2 <- dat_pca$x[, 2] dat$id <- annot$`slide name`[match(ROIs_high, colnames(posdat))] dat$class <- annot$class[match(ROIs_high, colnames(posdat))] dat$region <- annot$region[match(ROIs_high, colnames(posdat))] dat$sizeratio <- kidney[, ROIs_high]$sizefact_fitNBt / kidney[, ROIs_high]$sizefact p <- ggplot(data = dat, aes(x = PC1, y = PC2)) + geom_point(aes(colour = paste(class, region))) + theme_bw()+ labs(title = "Poisson Threshold Normalized Data") plot(p) p <- ggplot(data = dat, aes(x = PC1, y = PC2)) + geom_point(aes(colour = log2(sizeratio))) + theme_bw()+ scale_color_gradient2(high = "gold", mid = "grey50", low = "darkblue", midpoint = 0.2)+ labs(title = "Poisson Threshold Normalized Data") plot(p) ## ----------------------------------------------------------------------------- sessionInfo()