## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----message=FALSE------------------------------------------------------------ library(NestLink) ## ----fig.retina=1------------------------------------------------------------- library(ggplot2) ESP <- rbind(getFC(), getNB()) p <- ggplot(ESP, aes(x = ESP_Prediction, fill = cond)) + geom_histogram(bins = 50, alpha = 0.4, position="identity") + labs(x = "detectability in LC-MS (ESP prediction)") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) print(p) ESP <- rbind(getFC(), NB.unambiguous(getNB())) p <- ggplot(ESP, aes(x = ESP_Prediction, fill = cond)) + geom_histogram(bins = 50, alpha = 0.4, position="identity") + labs(x = "detectability in LC-MS (ESP prediction)") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) print(p) ESP <- rbind(getFC(), NB.unique(getNB())) p <- ggplot(ESP, aes(x = ESP_Prediction, fill = cond)) + geom_histogram(bins = 50, alpha = 0.4, position="identity") + labs(x = "detectability in LC-MS (ESP prediction)") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) print(p) ESP <- rbind(getNB(), NB.unambiguous(getNB()), NB.unique(getNB())) p <- ggplot(ESP, aes(x = ESP_Prediction, fill = cond)) + geom_histogram(bins = 50, alpha = 0.4, position="identity") + labs(x = "detectability in LC-MS (ESP prediction)") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) print(p) table(ESP$cond) ## ----------------------------------------------------------------------------- # library(ExperimentHub) # eh <- ExperimentHub(); # load(query(eh, c("NestLink", "WU160118.RData"))[[1]]) load(getExperimentHubFilename("WU160118.RData")) WU <- WU160118 ## ----------------------------------------------------------------------------- PATTERN <- "^GS[ASTNQDEFVLYWGP]{7}(WR|WLTVR|WQEGGR|WLR|WQSR)$" idx <- grepl(PATTERN, WU$pep_seq) WU <- WU[idx & WU$pep_score > 25,] ## ----------------------------------------------------------------------------- WU$chargeInt <- as.integer(substr(WU$query_charge, 0, 1)) table(WU$chargeInt) ## ----------------------------------------------------------------------------- round(100 * (table(WU$chargeInt) / nrow(WU)), 1) ## ----fig.retina=1, fig.height=8----------------------------------------------- library(scales) ggplot(WU, aes(x = moverz * chargeInt, fill = (query_charge), colour = (query_charge))) + facet_wrap(~ datfilename, ncol = 2) + geom_histogram(binwidth= 10, alpha=.3, position="identity") + labs(title = "Precursor mass to charge frequency plot", subtitle = "Plotting frequency of precursor masses for each charge state", x = "Precursor mass [neutral mass]", y = "Frequency [counts]", fill = "Charge State", colour = "Charge State") + scale_x_continuous(breaks = pretty_breaks(8)) + theme_light() ## ----------------------------------------------------------------------------- WU$suffix <- substr(WU$pep_seq, 10, 100) ggplot(WU, aes(x = moverz * chargeInt, fill = suffix, colour = suffix)) + geom_histogram(binwidth= 10, alpha=.2, position="identity") + #facet_wrap(~ substr(pep_seq, 10, 100)) + theme_light() ## ----------------------------------------------------------------------------- ggplot(WU, aes(x = moverz * chargeInt, fill = suffix)) + geom_histogram(binwidth= 10, alpha=.9, position="identity") + facet_wrap(~ substr(pep_seq, 10, 100)) + theme_light() ## ----------------------------------------------------------------------------- ggplot(WU, aes(x = pep_score, fill = query_charge, colour = query_charge)) + geom_histogram(binwidth= 2, alpha=.5, position="identity") + facet_wrap(~ substr(pep_seq, 10, 100)) + theme_light() ## ----------------------------------------------------------------------------- # FC <- read.table(system.file("extdata/FC.tryptic", package = "NestLink"), # header = TRUE) FC <- getFC() FC$peptide <- (as.character(FC$peptide)) idx <- grep (PATTERN, FC$peptide) FC$cond <- "FC" FC$pim <- parentIonMass(FC$peptide) FC <- FC[nchar(FC$peptide) >2, ] FC$ssrc <- sapply(FC$peptide, ssrc) FC$peptideLength <- nchar(as.character(FC$peptide)) FC <- FC[idx,] head(FC) ## ----------------------------------------------------------------------------- aa_pool_x7 <- c(rep('A', 18), rep('S', 6), rep('T', 12), rep('N', 1), rep('Q', 1), rep('D', 11), rep('E', 11), rep('V', 12), rep('L', 2), rep('F', 1), rep('Y', 4), rep('W', 1), rep('G', 8), rep('P', 12)) aa_pool_x7.post <- c(rep('WR', 20), rep('WLTVR', 20), rep('WQEGGR', 20), rep('WLR', 20), rep('WQSR', 20)) ## ----------------------------------------------------------------------------- FC.pep_seq.freq <- table(unlist(strsplit(substr(FC$peptide, 3, 9), ""))) FC.freq <- round(FC.pep_seq.freq / sum(FC.pep_seq.freq) * 100, 3) FC.pep_seq.freq.post <- table(unlist((substr(FC$peptide, 10, 100)))) FC.freq.post <- round(FC.pep_seq.freq.post / sum(FC.pep_seq.freq.post) * 100, 3) WU.pep_seq.freq <- table(unlist(strsplit(substr(WU$pep_seq, 3, 9), ""))) WU.freq <- round(WU.pep_seq.freq / sum(WU.pep_seq.freq) * 100,3) WU.pep_seq.freq.post <- table(unlist(substr(WU$pep_seq, 10, 100))) WU.freq.post <- round(WU.pep_seq.freq.post / sum(WU.pep_seq.freq.post) *100,3) ## ----------------------------------------------------------------------------- table(aa_pool_x7) FC.freq WU.freq ## ----------------------------------------------------------------------------- AAfreq1 <- data.frame(aa = table(aa_pool_x7)) names(AAfreq1) <- c('AA', 'freq') AAfreq1 <-rbind(AAfreq1, df<-data.frame(AA=c('C','H','I','M','K','R'), freq=rep(0,6))) AAfreq1$cond <- 'theoretical frequency' AAfreq2 <- data.frame(aa=FC.freq) names(AAfreq2) <- c('AA', 'freq') AAfreq2$cond <- "db10.fasta" AAfreq3 <- data.frame(aa=WU.freq) names(AAfreq3) <- c('AA', 'freq') AAfreq3$cond <- "WU" #FC.all <- read.table(system.file("extdata/FC.tryptic", package = "NestLink"), # header = TRUE) FC.all <- getFC() PATTERN.all <- "^GS[A-Z]{7}(WR|WLTVR|WQEGGR|WLR|WQSR)$" FC.all <- as.character(FC.all$peptide)[grep(PATTERN.all, as.character(FC.all$peptide))] FC.all.pep_seq.freq <- table(unlist(strsplit(substr(FC.all, 3, 9), ""))) FC.all.freq <- round(FC.all.pep_seq.freq / sum(FC.all.pep_seq.freq) * 100, 3) FC.all.freq[20] <- 0 names(FC.all.freq) <- c(names(FC.all.freq)[1:19], "K") AAfreq4 <- data.frame(AA=names(FC.all.freq), freq=as.numeric(FC.all.freq)) AAfreq4$cond <- "sequenced frequency" AAfreq <- do.call('rbind', list(AAfreq1, AAfreq2, AAfreq3, AAfreq4)) AAfreq$freq <- as.numeric(AAfreq$freq) ## ----fig.retina=1------------------------------------------------------------- ggplot(data=AAfreq, aes(x=AA, y=freq, fill=cond)) + geom_bar(stat="identity", position="dodge") + labs(title = "amino acid frequency plot") + theme_light() ## ----------------------------------------------------------------------------- AAfreq1.post <- data.frame(aa=table(aa_pool_x7.post)) names(AAfreq1.post) <- c('AA', 'freq') AAfreq1.post$cond <- "aa_pool_x7" AAfreq2.post <- data.frame(aa=FC.freq.post) names(AAfreq2.post) <- c('AA', 'freq') AAfreq2.post$cond <- "db10.fasta" AAfreq3.post <- data.frame(aa=WU.freq.post) names(AAfreq3.post) <- c('AA', 'freq') AAfreq3.post$cond <- "WU" AAfreq.post <- do.call('rbind', list(AAfreq1.post, AAfreq2.post, AAfreq3.post)) ## ----NestLink_AAFreqPercent, fig.retina=1, fig.path='Figs/'------------------- df<-AAfreq[(AAfreq$cond != 'WU' & AAfreq$cond != 'db10.fasta'), ] df$freq<- as.numeric(df$freq) ggplot(data=df, aes(x=AA, y=freq, fill=cond)) + geom_bar(stat="identity", position="dodge") + labs(title = "Amino acid frequency plot") + ylab("Frequency [%]") + theme_light() ## ----NestLink_FCAAfreqAbsolut, fig.retina=1, fig.path='Figs/'----------------- FCfreq <- data.frame(table(unlist(strsplit(substr(FC$peptide, 3, 9), "")))) colnames(FCfreq) <- c('AA', 'freq') ggplot(data=FCfreq, aes(x=AA, y=freq)) + geom_bar(stat="identity", position="dodge") + labs(title = "Amino acid frequency plot") + theme_light() ## ----fig.retina=1------------------------------------------------------------- ggplot(data=AAfreq.post, aes(x=AA, y=freq, fill=cond)) + geom_bar(stat="identity", position="dodge") + labs(title = "suffix frequency plot") + theme_light() ## ----fig.retina=1, fig.height=12---------------------------------------------- WU$ssrc <- sapply(as.character(WU$pep_seq), ssrc) WU$suffix <- substr(WU$pep_seq, 10,100) ggplot(WU, aes(x = RTINSECONDS, y = ssrc)) + geom_point(aes(alpha=pep_score, colour = datfilename)) + facet_wrap(~ datfilename * suffix, ncol = 5, nrow = 8) + geom_smooth(method = 'lm') ## ----------------------------------------------------------------------------- cor(WU$RTINSECONDS, WU$ssrc, method = 'spearman') ## ----------------------------------------------------------------------------- WU <- WU160118 PATTERN <- "^GS[ASTNQDEFVLYWGP]{7}(WR|WLTVR|WQEGGR|WLR|WQSR)$" idx <- grepl(PATTERN, WU$pep_seq) # Mascot score cut-off valye 25 WU <- WU[idx & WU$pep_score > 25,] ## ----fig.retina=1------------------------------------------------------------- WU$suffix <- substr(WU$pep_seq, 10, 100) WU$ssrc <- sapply(as.character(WU$pep_seq), ssrc) ggplot(WU, aes(x = RTINSECONDS, fill = datfilename)) + geom_histogram(bins = 50) ggplot(WU, aes(x = ssrc, fill = datfilename)) + geom_histogram(bins = 50) ggplot(WU, aes(x = RTINSECONDS, fill = suffix)) + geom_histogram(bins = 50) ggplot(WU, aes(x = ssrc, fill = suffix)) + geom_histogram(bins = 50) ## ----------------------------------------------------------------------------- WU <- WU[WU$datfilename == "F255737",] WU <- aggregate(WU$RTINSECONDS ~ WU$pep_seq, FUN = min) names(WU) <-c("pep_seq", "RTINSECONDS") WU$suffix <- substr(WU$pep_seq, 10, 100) WU$peptide_mass <- parentIonMass(as.character(WU$pep_seq)) WU$ssrc <- sapply(as.character(WU$pep_seq), ssrc) WU$datfilename <- "F255737" ## ----fig.retina=1, fig.width=12, fig.height=4--------------------------------- ggplot(WU, aes(x = RTINSECONDS, y = peptide_mass)) + geom_point(aes(colour = suffix)) + facet_wrap(~ datfilename * suffix, ncol=5) ggplot(WU, aes(x = RTINSECONDS, fill = suffix)) + geom_histogram(bins = 20) + facet_wrap(~ datfilename * suffix, ncol=5) ## ----fig.retina=1, fig.width=12, fig.height=4--------------------------------- ggplot(WU, aes(x = ssrc, y = peptide_mass)) + geom_point(aes(colour = suffix)) + facet_wrap(~ datfilename * suffix, ncol = 5) ggplot(WU, aes(x = ssrc,fill = suffix)) + geom_histogram(bins = 20) + facet_wrap(~ datfilename * suffix, ncol=5) ## ----fig.retina=1, fig.width=8, fig.height=4---------------------------------- ggplot(WU, aes(x = RTINSECONDS, y = peptide_mass)) + geom_point(aes(colour = suffix), size = 3.0, alpha = 0.1) ## ----fig.retina=1, fig.width=8, fig.height=4---------------------------------- ggplot(WU, aes(x = ssrc, y = peptide_mass)) + geom_point(aes(colour = suffix), size = 3.0, alpha = 0.1) ## ----fig.retina=1, fig.height=12---------------------------------------------- WU <- do.call('rbind', lapply(c(25,35,45), function(cutoff){ WU <- WU160118 PATTERN <- "^GS[ASTNQDEFVLYWGP]{7}(WR|WLTVR|WQEGGR|WLR|WQSR)$" idx <- grepl(PATTERN, WU$pep_seq) WU <- WU[idx & WU$pep_score > cutoff, ] WU <- WU[WU$datfilename == "F255737",] WU <- aggregate(WU$RTINSECONDS ~ WU$pep_seq, FUN=min) names(WU) <-c("pep_seq","RTINSECONDS") WU$suffix <- substr(WU$pep_seq, 10, 100) WU$peptide_mass <- parentIonMass(as.character(WU$pep_seq)) WU$ssrc <- sapply(as.character(WU$pep_seq), ssrc) WU$datfilename <- "F255737" WU$mascotScoreCutOff <- cutoff WU})) ggplot(WU, aes(x = RTINSECONDS, y = peptide_mass)) + geom_point(aes(colour = suffix), size = 3.0, alpha = 0.1) + facet_wrap(~ mascotScoreCutOff, ncol = 1) ggplot(WU, aes(x = ssrc, y = peptide_mass)) + geom_point(aes(colour = suffix), size = 3.0, alpha = 0.1) + facet_wrap(~ mascotScoreCutOff, ncol = 1 ) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()