## ----first_load, echo=TRUE, warning=FALSE, results='hide', message=FALSE------
library(RITANdata)
library(RITAN)

## ----quick_start, echo=TRUE, warning=FALSE------------------------------------
# We define a query using gene symbols
my_genes <- c('ABCA2','ACAT2','ACSS2','CD9','CPEB2','CTNNB1','FASN','LDLR','LPL','LSS')

# Now, you can rapidly get enrichment results by:
# e <- term_enrichment(my_genes)

# However, we will make two simplifications here, for expediency.
#  1) only look at two resources (of the many available)
#  2) use a cached background gene list of human protien coding genes
resources <- c("ReactomePathways", "MSigDB_Hallmarks")
e <- term_enrichment(my_genes, resources = resources, all_symbols = cached_coding_genes)
summary(e)

## ----echo_geneset_list_names, echo=TRUE---------------------------------------
names(geneset_list)

## ----echo_geneset_list_names2, echo=TRUE--------------------------------------
head(geneset_list$MSigDB_C7$GSE9988_LPS_VS_LOW_LPS_MONOCYTE_UP)

## ----make_study_selection, echo = TRUE----------------------------------------
selection <- grepl( 'GSE9988_(LOW_)*LPS_VS_.+UP', names(geneset_list$MSigDB_C7), perl=TRUE )
study_set <- geneset_list$MSigDB_C7[selection]
str(study_set)

## ----apply_enrichment_to_selection, echo=TRUE---------------------------------
e <- term_enrichment_by_subset( study_set, q_value_threshold = 1e-5, 
                                resources = resources,
                                all_symbols = cached_coding_genes )

## ----ep1, echo=TRUE, fig.width = 7, fig.height = 6----------------------------
plot( e, show_values = FALSE, label_size_y = 7, label_size_x = 7 )

## ----ep1_cap, echo=TRUE, fig.width = 7, fig.height = 6------------------------
plot( e, show_values = FALSE, label_size_y = 7, label_size_x = 7, cap=10 )

## ----ep1_data, echo=TRUE, fig.width = 7, fig.height = 6-----------------------
prmatrix( e[1:3,], collab = c('name','n.set',1:7) )

## ----make_ann_mat, echo=TRUE--------------------------------------------------
mat <- matrix(c("LPS","LPS","LPS","LPS","LOW_LPS","LOW_LPS","LOW_LPS",
                "LOW_LPS","LPS_AND_ANTI_TREM1","CTRL_TREATED",
                "VEHICLE_TREATED","ANTI_TREM1_AND_LPS","CTRL_TREATED","VEHICLE_TREATED"),
                nrow = 2, byrow = TRUE )
rownames(mat) <- c('Condition1','Condition2')
colnames(mat) <- sprintf('Sample%s', 1:7)
print(mat)

## ----show_ann_mat, echo=TRUE, fig.width = 7, fig.height = 8-------------------
plot( e, show_values = TRUE, label_size_y = 7, label_size_x = 7, cap=10, 
      annotation_matrix = mat, grid_line_color = 'black' )

## ----apply_enrichment_to_selection_n, echo=TRUE-------------------------------
n <- term_enrichment_by_subset( study_set, q_value_threshold = 1e-5,
                                resources = resources,
                                all_symbols = cached_coding_genes,
                                display_type = 'n', phred = FALSE )

## ----apply_enrichment_to_selection_n_plot, echo=TRUE, fig.width = 7, fig.height = 6----
plot( n, show_values = TRUE, label_size_y = 7, label_size_x = 7, cap = 20 )

## ----term_enrichment1, eval=FALSE---------------------------------------------
#  data("vac1.day0vs31.de.genes")
#  te <- term_enrichment( geneset = vac1.day0vs31.de.genes )

## ----term_enrichment2, eval=FALSE---------------------------------------------
#  e <- term_enrichment( geneset = vac1.day0vs31.de.genes, verbose = TRUE,
#                        resources = c("Blood_Translation_Modules", "MSigDB_C7") )

## ----term_enrichment3, eval=FALSE---------------------------------------------
#  e <- term_enrichment( geneset = vac1.day0vs31.de.genes, verbose = TRUE,
#                        resources = names(geneset_list) )

## ----term_enrichment_add_gmt, echo=TRUE---------------------------------------
gs  <- geneset_list$MSigDB_C7[['GSE6269_HEALTHY_VS_FLU_INF_PBMC_UP']]
gmt <- system.file("extdata", "curated_gene_disease_associations.gmt.gz", package="RITAN")

# -->> Not running here for brevity
# geneset_list$DisGeNet <- readGMT(gmt)
# str(head(geneset_list$DisGeNet))

## ----term_enrichment_provided_gmt, echo=TRUE----------------------------------
e2  <- term_enrichment( gs, resources = gmt, all_symbols = cached_coding_genes )
print( e2[1:3,-5] )

## ----term_enrichment_search_gmt, echo=TRUE------------------------------------
geneset_list$DisGeNet <- readGMT(gmt)
print( geneset_list$DisGeNet[['Influenza, Human']] )

## ----show_hist, echo=TRUE-----------------------------------------------------
show_active_genesets_hist()
length(active_genesets)

## ----LDL, echo=TRUE-----------------------------------------------------------
geneset_list$LDL = list( LDL_import = c('APOB','APOE','LDLR'), 
                         LDL_processing = c('HMGR','ACAT2','HMGCS1',
                                            'HMGCR','MVD','MVK',
                                            'PMVK','IDI1','IDI2') )

## ----LDLe, eval=FALSE---------------------------------------------------------
#  e <- term_enrichment( gs, resources = c('GO','LDL') )