## ----setup, echo=FALSE, cache=FALSE------------------------------------------- ## Global options options(max.print="100") knitr::opts_chunk$set( collapse = TRUE, comment = "#>") knitr::opts_knit$set(width=75) ## ----silentload, include=FALSE------------------------------------------------ #Silent load all dependencies for vignette library(factR) library(AnnotationHub) library(Biostrings) library(BSgenome.Mmusculus.UCSC.mm10) library(GenomicFeatures) library(rtracklayer) library(tidyverse) ## ----install.factR, eval = FALSE---------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("factR") ## ----loadfactR---------------------------------------------------------------- library(factR) ## ----importGTF---------------------------------------------------------------- gtf <- system.file("extdata", "sc_merged_sample.gtf.gz", package = "factR") custom.gtf <- importGTF(gtf) ## ----checktype---------------------------------------------------------------- class(custom.gtf) ## ----headobj------------------------------------------------------------------ head(custom.gtf) ## ----counttx------------------------------------------------------------------ length(unique(custom.gtf$transcript_id)) ## ----countcds----------------------------------------------------------------- length(unique(custom.gtf[custom.gtf$type=="CDS"]$transcript_id)) ## ----plottranscripts, message=FALSE, warning=FALSE---------------------------- viewTranscripts(custom.gtf, "Zfr") ## ----load.gencode, eval = TRUE------------------------------------------------ # query database for mouse gencode basic annotation library(AnnotationHub) ah <- AnnotationHub() query(ah, c('Mus musculus', 'gencode', 'gff')) # Download full annotation ref.gtf <- ah[['AH49546']] ## ----import.gencode, warning=FALSE-------------------------------------------- tmp <- tempfile() download.file("ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/gencode.vM25.annotation.gtf.gz", destfile = tmp) ref.gtf <- importGTF(tmp) ## ----matchgenemeta, warning=FALSE--------------------------------------------- # matching gene metadata custom_matched_1.gtf <- matchGeneInfo(custom.gtf, ref.gtf) ## ----matchgenemeta2, warning=FALSE-------------------------------------------- # matching gene metadata custom_matched_2.gtf <- matchGeneInfo(custom.gtf, ref.gtf, primary_gene_id = "gene_id", secondary_gene_id = "ref_gene_id") ## ----plottranscripts2--------------------------------------------------------- viewTranscripts(custom_matched_2.gtf, "Zfr") ## ----subsettx, warning=FALSE-------------------------------------------------- custom_new.gtf <- subsetNewTranscripts(custom_matched_2.gtf, ref.gtf) viewTranscripts(custom_new.gtf,"Zfr") ## ----subsettx2, warning=FALSE------------------------------------------------- custom_new.gtf <- subsetNewTranscripts(custom_matched_2.gtf, ref.gtf, refine.by = "intron") viewTranscripts(custom_new.gtf, "Zfr") ## ----genomeBSgenome, eval=FALSE----------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("BSgenome.Mmusculus.UCSC.mm10") ## ----loadBSgenome------------------------------------------------------------- library(BSgenome.Mmusculus.UCSC.mm10) Mmusculus <- BSgenome.Mmusculus.UCSC.mm10 ## ----genomeAhub--------------------------------------------------------------- library(AnnotationHub) ah <- AnnotationHub() query(ah, c("mm10","2bit")) ## ---- eval=FALSE-------------------------------------------------------------- # # Retrieve mouse genome # Mmusculus <- ah[['AH14005']] ## ----downloadGencode, eval=FALSE---------------------------------------------- # tmp <- tempfile() # download.file("ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/GRCm38.primary_assembly.genome.fa.gz", # tmp) # Mmusculus <- importFASTA(paste0(tmp, "/GRCm38.primary_assembly.genome.fa.gz")) ## ----buildcds, warning=FALSE-------------------------------------------------- custom_new_CDS.gtf <- buildCDS(custom_new.gtf, ref.gtf, Mmusculus) ## ----viewafterCDS------------------------------------------------------------- viewTranscripts(custom_new_CDS.gtf, "Zfr") ## ----viewafterCDSscale-------------------------------------------------------- viewTranscripts(custom_new_CDS.gtf, "Zfr", rescale_introns = TRUE) ## ----viewrefCDS--------------------------------------------------------------- viewTranscripts(ref.gtf, "Zfr", rescale_introns = TRUE) ## ----predictNMD1, warning=FALSE----------------------------------------------- NMDprediction.out <- predictNMD(custom_new_CDS.gtf) head(NMDprediction.out) ## ----predictNMD2-------------------------------------------------------------- NMDprediction.Zfr <- predictNMD(custom_new_CDS.gtf, gene_name == "Zfr") head(NMDprediction.Zfr) ## ----predDomain2-------------------------------------------------------------- predictDomains(custom_new_CDS.gtf, Mmusculus, gene_name == "Zfr", progress_bar = FALSE) ## ----predDomain3, eval = F---------------------------------------------------- # domains.Zfr <- predictDomains(custom_new_CDS.gtf, Mmusculus, gene_name == "Zfr", plot = TRUE, progress_bar = FALSE) ## ----predDomain1, eval=FALSE-------------------------------------------------- # domains.out <- predictDomains(custom_new_CDS.gtf, Mmusculus, progress_bar = FALSE) ## ----exportgtf, eval=FALSE---------------------------------------------------- # library(rtracklayer) # export(custom_new_CDS.gtf, "Custom_new.gtf", format = "GTF") ## ----exporttable, eval=FALSE-------------------------------------------------- # write.table(NMDprediction.out, "Custom_new_NMD.txt", sep = "\t", row.names = FALSE, quote = FALSE) # write.table(domains.out, "Custom_new_domains.txt", sep = "\t", row.names = FALSE, quote = FALSE) ## ----sessioninfo-------------------------------------------------------------- sessionInfo()