## ----style, echo=FALSE, results='hide', message=FALSE---------------------- library(BiocStyle) library(knitr) opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE) opts_chunk$set(fig.asp = 1) ## ----installation, echo=TRUE, eval=FALSE----------------------------------- # ## try http:// if https:// URLs are not supported # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("Melissa") # # ## Or download from Github repository # # install.packages("devtools") # devtools::install_github("andreaskapou/Melissa", build_vignettes = TRUE) ## ----melissa, fig.retina = NULL, fig.align='center', fig.cap="`Melissa` model overview. Melissa combines a likelihood computed from single cell methylation profiles fitted to each genomic region using a supervised regression approach (bottom left) and an unsupervised Bayesian clustering prior (top left). The posterior distribution provides a methylome-based clustering (top right) and imputation (bottom right) of single cells.", echo=FALSE---- knitr::include_graphics("../inst/figures/melissa.png") ## ----load_synth_data------------------------------------------------------- suppressPackageStartupMessages(library(Melissa)) # Load package dt_obj <- melissa_encode_dt # Load synthetic data ## ---- eval=FALSE, echo=FALSE, include=FALSE-------------------------------- # # For efficiency keep only the first 50 genomic regions # dt_obj$met <- dt_obj$met[1:50] # dt_obj$opts$C_true <- dt_obj$opts$C_true[1:50,] # #dt_obj$met <- lapply(dt_obj$met, function(x) x[1:50]) ## -------------------------------------------------------------------------- # Elements of `dt_obj` object names(dt_obj) ## -------------------------------------------------------------------------- head(dt_obj$met[[2]][[50]]) ## -------------------------------------------------------------------------- # Number of cells cat("Number of cells: ", length(dt_obj$met)) ## -------------------------------------------------------------------------- # Number of genomic regions in each cell cat("Number of genomic regions: ", length(dt_obj$met[[1]]) ) ## ----create_basis---------------------------------------------------------- library(BPRMeth) # Create RBF basis object with 4 RBFs basis_obj <- create_rbf_object(M = 4) ## ----show_basis------------------------------------------------------------ # Show the slots of the 'rbf' object basis_obj ## ----partition_data-------------------------------------------------------- set.seed(15) # Partition to training and test set dt_obj <- partition_dataset(dt_obj = dt_obj, data_train_prcg = 0.2, region_train_prcg = 1, cpg_train_prcg = 0.4, is_synth = TRUE) ## ----run_melissa----------------------------------------------------------- set.seed(15) # Run Melissa with K = 4 clusters melissa_obj <- melissa(X = dt_obj$met, K = 4, basis = basis_obj, vb_max_iter = 20, vb_init_nstart = 1, is_parallel = FALSE) ## ----summary_mixing_proportions-------------------------------------------- melissa_obj$pi_k ## ----summary_responsibilities---------------------------------------------- head(melissa_obj$r_nk) ## ----summary_weights------------------------------------------------------- melissa_obj$W[10, , 3] ## ----plot_profiles_1, fig.wide=TRUE---------------------------------------- # Plot profiles from all cell subtypes for genomic region 22 plot_melissa_profiles(melissa_obj = melissa_obj, region = 22, title = "Methylation profiles for region 22") ## ----plot_profiles_2, fig.wide=TRUE---------------------------------------- # Plot profiles from all cell subtypes for genomic region 77 plot_melissa_profiles(melissa_obj = melissa_obj, region = 77, title = "Methylation profiles for region 77") ## ----evaluate_cluster_perf------------------------------------------------- # Run clustering performance melissa_obj <- eval_cluster_performance(melissa_obj, dt_obj$opts$C_true) ## ----ari_measure----------------------------------------------------------- # ARI metric cat("ARI: ", melissa_obj$clustering$ari) ## ----cluster_assignment_error---------------------------------------------- # Clustering assignment error metric cat("Clustering assignment error: ", melissa_obj$clustering$error) ## ----perfrom_imputation---------------------------------------------------- imputation_obj <- impute_met_state(obj = melissa_obj, test = dt_obj$met_test) ## ----evaluate_imputation--------------------------------------------------- melissa_obj <- eval_imputation_performance(obj = melissa_obj, imputation_obj = imputation_obj) ## ----auc------------------------------------------------------------------- # AUC cat("AUC: ", melissa_obj$imputation$auc) ## ----f_measure------------------------------------------------------------- # F-measure cat("F-measure: ", melissa_obj$imputation$f_measure) ## ----session_info, echo=TRUE, message=FALSE-------------------------------- sessionInfo()