#install required R and bioconductor packages
tryCatch(expr = { library("RCurl")},
error = function(e) { install.packages("RCurl")},
finally = library("RCurl"))
package ‘RCurl’ was built under R version 3.6.2
#use library
#make sure biocManager is installed
tryCatch(expr = { library("BiocManager")},
error = function(e) {
install.packages("BiocManager")},
finally = library("BiocManager"))
tryCatch(expr = { library("ggplot2")},
error = function(e) { install.packages("ggplot2")},
finally = library("ggplot2"))
#use easy cyRest library to communicate with cytoscape.
tryCatch(expr = { library("RCy3")},
error = function(e) { BiocManager::install("RCy3")},
finally = library("RCy3"))
In order to run GSEA automatically through the notebook you will need to download the gsea jar from here. Specify the exact path to the gsea jar in the parameters in order to automatically compute enrichments using GSEA.
#path to GSEA jar
# In order to run GSEA automatically you need to speciry the path to the gsea jar file.
#With the latest release of gsea (4.0.2) they no longer release a bundled jar
# and instead release a scriptted way to launch the gsea client.
# specify the java version as 11 if you are using the later version gsea
# the gsea_jar also needs to be the full path to the GSEA 4.0.2 directory that you
# downloaded from GSEA. for example (/Users/johnsmith/GSEA_4.0.2/gsea-cli.sh)
gsea_jar <- params$gsea_jar
java_version <- params$java_version
#Gsea takes a long time to run. If you have already run GSEA manually or previously there is no need to re-run GSEA. Make sure the
# gsea results are in the current directory and the notebook will be able to find them and use them.
run_gsea = params$run_gsea
#navigate to the directory where you put the downloaded protocol files.
working_dir <- params$working_dir
# leave blank if you want the notebook to discover the gsea directory for itself
#gsea_directory = paste(working_dir,"Mesen_vs_Immuno.GseaPreranked.1497635459262",sep="/")
gsea_directory = params$gsea_directory
analysis_name <- params$analysis_name
rnk_file <- params$rnk_file
expression_file <- params$expression_file
classes_file <- params$classes_file
Only Human, Mouse and Rat gene set files are currently available on the baderlab downloads site. If you are working with a species other than human (and it is either rat or mouse) change the gmt_url below to correct species. Check here to see all available species.
(GSEA)[http://software.broadinstitute.org/gsea/index.jsp] is a stand alone java program with many customizable options. It can be easily run through its integrated user interface. To make this a seemless pipeline we can run GSEA from the command line with a set of options. Any of the supplied options can be customized and there are many additional options that can be specified. For more details see (here)[http://software.broadinstitute.org/gsea/doc/GSEAUserGuideTEXT.htm#_Running_GSEA_from]
In the below command the following options have been specified:
If you are using GSEA 4.0.2 you will need to update your system to use Java 11. The GSEA command line interface has changed slightly in this version of GSEA. Instead of launching GSEA using java you need to use one of the scripts supplied by GSEA. (gsea-cli.bat for Windows and gsea-cli.sh for Mac or linux systems).
In the GSEA 4.0.2 command the following options can been specified:
If you have encounter an out of memory error you will need to open you gsea-cli script and manually update your xmx parameter (more information on this can be found in the GSEA_4.0.2/README file)
#if you are using GSEA 4.0.2 then you need to use the command script
# as opposed to launching GSEA through java.
# in the later version of GSEA command line implementation the following
# parameters are no longer valid: -permute gene_set, -num 100, -gui false
# no longer need to specify the whole path to the GseaPreranked package
if(run_gsea && java_version == "11"){
command <- paste("",gsea_jar, "GSEAPreRanked -gmx", dest_gmt_file, "-rnk" ,file.path(working_dir,rnk_file), "-collapse false -nperm 1000 -scoring_scheme weighted -rpt_label ",analysis_name," -plot_top_x 20 -rnd_seed 12345 -set_max 200 -set_min 15 -zip_report false -out" ,working_dir, " > gsea_output.txt",sep=" ")
system(command)
} else if (run_gsea) {
command <- paste("java -Xmx1G -cp",gsea_jar, "xtools.gsea.GseaPreranked -gmx", dest_gmt_file, "-rnk" ,file.path(working_dir,rnk_file), "-collapse false -nperm 1000 -permute gene_set -scoring_scheme weighted -rpt_label ",analysis_name," -num 100 -plot_top_x 20 -rnd_seed 12345 -set_max 200 -set_min 15 -zip_report false -out" ,working_dir, "-gui false > gsea_output.txt",sep=" ")
system(command)
}
WARNING: package com.sun.java.swing.plaf.windows not in java.desktop
WARNING: package sun.awt.windows not in java.desktop
Although GSEA allows you to specify the name of the output directory and the destination folder it add additional words and numbers to the folder name. Some are predictable and some are automatically generated. Get all the GSEA results directories found in the current directory. If there are multiple GSEA results folders each will be used to create an enrichment map.
if(gsea_directory == ""){
gsea_directories <- list.files(path = working_dir, pattern = "\\.GseaPreranked")
#get the details on the files
details = file.info(file.path(getwd(),working_dir,gsea_directories))
#order according to newest to oldest
details = details[with(details, order(as.POSIXct(mtime),decreasing = TRUE)), ]
#use the newest file:
gsea_output_dir <- row.names(details)[1]
} else {
gsea_output_dir <- gsea_directory
}
Launch Cytoscape (by default cytoscape will automatically enable rest so as long as cytoscape 3.3 or higher is open R should be able to communicate with it). Make sure if you get an message asking you if you want communicate with other apps that you select "Allow".
cytoscapePing ()
[1] "You are connected to Cytoscape!"
cytoscapeVersionInfo ()
apiVersion cytoscapeVersion
"v1" "3.8.0"
#defined threshold for GSEA enrichments (need to be strings for cyrest call)
pvalue_gsea_threshold <- params$pval_thresh
qvalue_gsea_threshold <- params$fdr_thresh
similarity_threshold <- "0.375"
similarity_metric = "COMBINED"
cur_model_name <- analysis_name
gsea_results_path <- file.path(gsea_output_dir,"edb")
gsea_results_filename <- file.path(gsea_results_path,"results.edb")
#although there is a gmt file in the gsea edb results directory it have been filtered to
#contain only genes represented in the expression set. If you use this fltered file you
#will get different pathway connectivity depending on the dataset being used. We recommend
#using original gmt file used for the gsea analysis and not the filtered one in the results directory.
gmt_gsea_file <- file.path(getwd(),dest_gmt_file)
gsea_ranks_file <- file.path(gsea_results_path,list.files(gsea_results_path,pattern=".rnk"))
#######################################
#create EM
current_network_name <- paste(cur_model_name,pvalue_gsea_threshold,qvalue_gsea_threshold,sep="_")
em_command = paste('enrichmentmap build analysisType="gsea" gmtFile=',gmt_gsea_file,
'pvalue=',pvalue_gsea_threshold, 'qvalue=',qvalue_gsea_threshold,
'similaritycutoff=',similarity_threshold,
'coefficients=',similarity_metric,'ranksDataset1=',
gsea_ranks_file,'enrichmentsDataset1=',gsea_results_filename,
'filterByExpressions=false',
'expressionDataset1=',file.path(getwd(),working_dir,expression_file),
sep=" ")
#enrichment map command will return the suid of newly created network.
response <- commandsGET(em_command)
current_network_suid <- 0
#enrichment map command will return the suid of newly created network unless it Failed.
#If it failed it will contain the word failed
if(grepl(pattern="Failed", response)){
paste(response)
} else {
current_network_suid <- response
}
#check to see if the network name is unique
current_names <- getNetworkList()
if(current_network_name %in% current_names){
#if the name already exists in the network names then put the SUID in front
# of the name (this does not work if you put the suid at the end of the name)
current_network_name <- paste(current_network_suid,current_network_name, sep="_")
}
response <- renameNetwork(title=current_network_name,
network = as.numeric(current_network_suid),base.url)
fitContent()
output_network_file <- file.path(getwd(),"initial_screenshot_network.png")
if(file.exists(output_network_file)){
#cytoscape hangs waiting for user response if file already exists. Remove it first
response <- file.remove(output_network_file)
}
response <- exportImage(output_network_file, type = "png")
htmltools::img(src = knitr::image_uri(output_network_file),
alt = 'Initial Enrichment Map',
style = 'margin:0px auto;display:block')