1 Supplementary Protocol - Data Download

This protocol processes RNA-seq data using the R programming environment and specialized packages from Bioconductor to create genes lists. The scripts are available for download and novice users can copy and paste commands into R console. To create gene expression data for Protocol 1B, we downloaded gene expression data from the Ovarian Serous Cystadenocarcinoma project of The Cancer Genome Atlas (TCGA), http://cancergenome.nih.gov via the Genomic Data Commons (GDC) portal on 2017-06-14 using TCGABiolinks R package. The dataset includes 544 samples available as RMA-normalized microarray data (Affymetrix HG-U133A), and 309 samples available as RNA-seq data, with reads mapped to a reference genome using MapSplice58 and read counts per transcript determined using the RSEM method59. RNA-seq data are labeled in the dataset as ‘RNA-Seq V2’, see details at: https://wiki.nci.nih.gov/display/TCGA/RNA-Seq+Version+2). The RNA-SeqV2 dataset consists of raw counts similar to regular RNA-seq but RSEM data can be used with the edgeR method.

TCGABiolinks R package allows users to download raw or scored data directly from GDC portal. Both new datasets as well as legacy TCGA data are available for downlod

#for the latest version install from github
#devtools::install_github(repo = "BioinformaticsFMRP/TCGAbiolinks")
#source("https://bioconductor.org/biocLite.R")
#biocLite("TCGAbiolinks")
#make sure the version is at least or greater than 2.5.3
library("TCGAbiolinks")
library("SummarizedExperiment")

1.1 subtype definitions

For our analysis we want to restrict the samples in our supplementary files to just a few subtypes but for a general analysis this is not required.

#load class definitions
classDefinitions_verhaak <-  read.table( "data/Verhaak_supplementary_table1_OV_subtypes.txt", header = TRUE, sep = "\t", quote="\"",  stringsAsFactors = FALSE)

1.2 Download and format TCGA OV microarray data

#try and download the microarray expressions data.
query_microarray <- GDCquery(project = "TCGA-OV", 
                   data.category = "Gene expression",
                   data.type = "Gene expression quantification",
                   platform = "HT_HG-U133A",
                   access = "open",
                   legacy = TRUE)
  GDCdownload(query_microarray )
  OVMicroarray <- GDCprepare(query_microarray )
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |                                                                 |   1%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |=========                                                        |  15%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |===========                                                      |  18%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |=============                                                    |  21%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |===============                                                  |  24%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |======================                                           |  35%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |========================                                         |  38%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |==========================                                       |  41%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |============================                                     |  44%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |===================================                              |  55%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |=====================================                            |  58%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |=======================================                          |  61%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |=========================================                        |  64%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |=============================================                    |  70%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |==============================================                   |  72%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |==================================================               |  78%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |======================================================           |  84%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |===============================================================  |  98%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================|  99%
  |                                                                       
  |=================================================================| 100%
microarray <- assay(OVMicroarray)

#remove the duplicate genes and make gene names the matrix row names
microarray <- microarray[which(!duplicated(rownames(microarray))),]


#compute the 12 character barcode for each patients
microarrayPatients <- cbind(colnames(microarray), gsub('\\.','-',substring(colnames(microarray),1,12)))

#only include patients that were included in Verhaak dataset
microarray <- microarray[,which(microarrayPatients[,2] %in% classDefinitions_verhaak[which(!is.na(classDefinitions_verhaak$SUBTYPE)),"ID"])]


microarrayPatients <- merge(microarrayPatients,classDefinitions_verhaak[,c("ID","SUBTYPE")],by.x = 2, by.y =1)
colnames(microarrayPatients) <- c( "barcode","patient","SUBTYPE")
#only include patients that have microarray data for them
microarrayPatients <- microarrayPatients[which(microarrayPatients$patient %in% colnames(microarray)),]
microarrayPatients <- microarrayPatients[order(microarrayPatients$SUBTYPE),]

#convert the barcodes so that they will be compatible with colnames (R doesn't like "-" in column names)
microarrayPatients$patient <- gsub('-','\\.',microarrayPatients$patient)
colnames(microarray) <- gsub('-','\\.',colnames(microarray))
microarray <- microarray[,colnames(microarray)[order(match(colnames(microarray),microarrayPatients$patient))]]
write.table(microarray,"./data/Supplementary_Table10_TCGA_Microarray_rmanormalized.txt",col.name=TRUE,sep="\t",row.names=TRUE,quote=FALSE)

write.table(microarrayPatients,"./data/Supplementary_Table11_Microarray_classdefinitions.txt",col.name=TRUE,sep="\t",row.names=TRUE,quote=FALSE)

1.3 Download and format TCGA OV RNASeq data

Sometimes the server times out. If you initially get an error try running this block over.
Also, if you have run the above code immediately before this section the tar file will have the same name but a different format. Try deleting all tar files in the directory and re-run.

  query <- GDCquery(project = "TCGA-OV", 
                   data.category = "Gene expression",
                   data.type = "Gene expression quantification",
                   experimental.strategy = "RNA-Seq",
                   platform = "Illumina HiSeq",
                   access = "open",
                   file.type = "results",
                   legacy = TRUE)
## --------------------------------------
## o GDCquery: Searching in GDC database
## --------------------------------------
## Genome of reference: hg19
## --------------------------------------------
## oo Accessing GDC. This might take a while...
## --------------------------------------------
## ooo Project: TCGA-OV
## --------------------
## oo Filtering results
## --------------------
## ooo By platform
## ooo By access
## ooo By experimental.strategy
## ooo By data.type
## ooo By file.type
## ----------------
## oo Checking data
## ----------------
## ooo Check if there are duplicated cases
## ooo Check if there results for the query
## -------------------
## o Preparing output
## -------------------
  GDCdownload(query)
## Downloading data for project TCGA-OV
## Of the 309 files for download 309 already exist.
## All samples have been already downloaded
  OVRnaseqSE <- GDCprepare(query)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |                                                                 |   1%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |=========                                                        |  15%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |=============                                                    |  21%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |===============                                                  |  24%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |========================                                         |  38%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |============================                                     |  44%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |=====================================                            |  58%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |=======================================                          |  61%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |=========================================                        |  64%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |=============================================                    |  70%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |==============================================                   |  72%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |==================================================               |  78%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |======================================================           |  84%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================|  99%
  |                                                                       
  |=================================================================| 100%
## Downloading genome information (try:0) Using: Homo sapiens genes (GRCh37.p13)
## Loading from disk
## Starting to add information to samples
##  => Add clinical information to samples
## Add FFPE information. More information at: 
## => https://cancergenome.nih.gov/cancersselected/biospeccriteria 
## => http://gdac.broadinstitute.org/runs/sampleReports/latest/FPPP_FFPE_Cases.html
##  => Adding subtype information to samples
rnaseq <- assay(OVRnaseqSE)

#remove the duplicate genes and make gene names the matrix row names
rnaseq <- rnaseq[which(!duplicated(rownames(rnaseq))),]

#compute the 12 character barcode for each patients
rnaseqPatients <- cbind(colnames(rnaseq), gsub('\\.','-',substring(colnames(rnaseq),1,12)))

#only include patients that were included in Verhaak dataset
rnaseq <- rnaseq[,which(rnaseqPatients[,2] %in% classDefinitions_verhaak[which(!is.na(classDefinitions_verhaak$SUBTYPE)),"ID"])]

rnaseqPatients <- merge(rnaseqPatients,classDefinitions_verhaak[,c("ID","SUBTYPE")],by.x = 2, by.y =1)
colnames(rnaseqPatients) <- c( "barcode","patient","SUBTYPE")
#change the order of the classes so mesenchymal and immunoreactive are first
rnaseqPatients <- rbind(rnaseqPatients[which(rnaseqPatients$SUBTYPE == "Mesenchymal"),],
                        rnaseqPatients[which(rnaseqPatients$SUBTYPE == "Immunoreactive"),],
                        rnaseqPatients[which(rnaseqPatients$SUBTYPE == "Differentiated"),],
                        rnaseqPatients[which(rnaseqPatients$SUBTYPE == "Proliferative"),])
#rnaseqPatients <- rnaseqPatients[order(rnaseqPatients$SUBTYPE),]


#convert the barcodes so that they will be compatible with colnames (R doesn't like "-" in column names)
rnaseqPatients$patient <- gsub('-','\\.',rnaseqPatients$patient)
colnames(rnaseq) <- gsub('-','\\.',colnames(rnaseq))
rnaseq <- rnaseq[,colnames(rnaseq)[order(match(colnames(rnaseq),rnaseqPatients$patient))]]


#rnaseq <- rnaseq[,which(colnames(rnaseq) %in% rnaseqPatients[which(rnaseqPatients$SUBTYPE== "Immunoreactive" | rnaseqPatients$SUBTYPE == "Mesenchymal"  ),"patient"])]
write.table(rnaseq,"./data/Supplementary_Table12_TCGA_RNASeq_rawcounts.txt",col.name=TRUE,sep="\t",row.names=TRUE,quote=FALSE)


write.table(rnaseqPatients,"./data/Supplementary_Table13_RNASeq_classdefinitions.txt",col.name=TRUE,sep="\t",row.names=TRUE,quote=FALSE)