## ---- echo=FALSE, results="hide", warning=FALSE---------------------------- suppressPackageStartupMessages({ library('annotation') }) ## ---- echo=FALSE----------------------------------------------------------- suppressPackageStartupMessages(library(annotation)) ## -------------------------------------------------------------------------- hub <- AnnotationHub() ## -------------------------------------------------------------------------- mcols(query(hub, "clinvar.vcf", "GRCh37"))[,"sourceurl", drop=FALSE] ## -------------------------------------------------------------------------- fl <- query(hub, "clinvar.vcf", "GRCh37")[[1]] ## -------------------------------------------------------------------------- vcf <- readVcf(fl, "hg19") dim(vcf) ## -------------------------------------------------------------------------- txdb_hg19 <- TxDb.Hsapiens.UCSC.hg19.knownGene head(seqlevels(txdb_hg19)) seqlevels(vcf) seqlevels(vcf) <- paste0("chr", seqlevels(vcf)) ## -------------------------------------------------------------------------- seqlevels(vcf, pruning.mode="coarse") <- c("chr3", "chr18") seqlevels(txdb_hg19) <- c("chr3", "chr18") ## -------------------------------------------------------------------------- intersect(seqlevels(txdb_hg19), seqlevels(vcf)) ## -------------------------------------------------------------------------- unique(genome(txdb_hg19)) unique(genome(vcf)) ## -------------------------------------------------------------------------- gr_hg19 <- rowRanges(vcf) ## -------------------------------------------------------------------------- txdb_mm10 <- keepStandardChromosomes(TxDb.Mmusculus.UCSC.mm10.ensGene) ## -------------------------------------------------------------------------- head(seqlevels(txdb_mm10)) gr_mm10 <- GRanges("chr4", IRanges(c(4000000, 107889000), width=1000)) ## -------------------------------------------------------------------------- unique(genome(txdb_mm10)) genome(gr_mm10) <- "mm10" ## -------------------------------------------------------------------------- loc_hg19 <- locateVariants(gr_hg19, txdb_hg19, AllVariants()) table(loc_hg19$LOCATION) loc_mm10 <- locateVariants(gr_mm10, txdb_mm10, AllVariants()) table(loc_mm10$LOCATION) ## -------------------------------------------------------------------------- cols <- c("UNIPROT", "PFAM") keys <- na.omit(unique(loc_hg19$GENEID)) head(select(org.Hs.eg.db, keys, cols, keytype="ENTREZID")) ## -------------------------------------------------------------------------- keys <- unique(loc_mm10$GENEID) head(select(org.Mm.eg.db, keys, cols, keytype="ENSEMBL")) ## -------------------------------------------------------------------------- hub <- AnnotationHub() hub_hg19 <- subset(hub, (hub$species == "Homo sapiens") & (hub$genome == "hg19")) length(hub_hg19) ## ---- echo=FALSE----------------------------------------------------------- ov_hg19 <- lapply(1:3, function(i) subsetByOverlaps(hub_hg19[[i]], gr_hg19)) ## -------------------------------------------------------------------------- ov_hg19 <- lapply(1:3, function(i) subsetByOverlaps(hub_hg19[[i]], gr_hg19)) ## -------------------------------------------------------------------------- names(ov_hg19) <- names(hub_hg19)[1:3] lapply(ov_hg19, head, n=3) ## -------------------------------------------------------------------------- head(predictCoding(vcf, txdb_hg19, Hsapiens), 3) ## ----sess------------------------------------------------------------------ sessionInfo()