G34-gliomas project [source]


1 Configuration

Configuration of project directory & analysis outputs:

Show full config

library(here)

# Set up outputs
message("Document index: ", doc_id)
## Document index: 05
# Specify where to save outputs
out        <- here(subdir, "output", doc_id); dir.create(out, recursive = TRUE)
figout     <- here(subdir, "figures", doc_id, "/"); dir.create(figout, recursive = TRUE)
cache      <- paste0("~/tmp/", basename(here()), "/", subdir, "/", doc_id, "/")

message("Cache: ", cache)
## Cache: ~/tmp/G34-gliomas/bulk_transcriptome_epigenome/05/

The root directory of this project is:

## /lustre03/project/6004736/sjessa/from_beluga/HGG-G34/G34-gliomas

Outputs and figures will be saved at these paths, relative to project root:

## G34-gliomas/bulk_transcriptome_epigenome/output/05
## G34-gliomas/bulk_transcriptome_epigenome/figures/05//

Setting a random seed:

set.seed(100)

2 Overview

In this document, we load a 4C quantification of the contacts between parts of the genome from Hi-C data in tumors and normal brain, focusing on contacts anchored at PDGFRA or GSX2.

3 Libraries

library(readxl)
library(ggplot2)
library(cowplot)
library(tidyr)
library(dplyr)

# Theme
ggplot2::theme_set(theme_min(base_size = 13))

# Custom
source(here::here(subdir, "analysis/functions.R"))

4 Analysis

4.1 Anchored at PDGFRA

Generate the plots for contacts anchored at the PDGFRA promoter:

4.1.1 Human

data <- read_excel(here(subdir, "data/PDGFRA_promoter_4C_human.xlsx"))

# Convert to long format so that we have three columns in the data frame:
# 1) Group (G34 or Other), 2) Average value, and 3) Standard deviation value
data_long <- data %>%
    unite(G34,   Average_G34_pGBMs,   stdeva_G34_pGBMs) %>%
    unite(Other, Average_Other_pGBMs, stdeva_other_pGBMs) %>%
    gather(Group, Value, G34, Other) %>%
    separate(Value, into = c("Average", "Stdev"), sep = "_")

# Convert the missing values around PDGFRA to 0
data_long$Average <- ifelse(data_long$Average == "NA", 0, data_long$Average)
data_long$Stdev   <- ifelse(is.na(data_long$Stdev),    0, data_long$Stdev)

# Make sure we have numeric columns for plotting
data_long$Average <- as.numeric(data_long$Average)
data_long$Stdev   <- as.numeric(data_long$Stdev)
# Colour palette
palette_human <- c("red", "navy")

# Write a plotting function
plot_4c <- function(df, palette) {
  
  df %>%
    rr_ggplot(aes(x = START, y = Average), plot_num = 1) +
    # Plot the stdev as an area "ribbon"
    geom_ribbon(aes(ymin = Average - Stdev, ymax = Average + Stdev, fill = Group), alpha = 0.3) +
    # Plot the solid line for the average
    geom_line(aes(colour = Group)) +
    # Set the colours
    scale_colour_manual(values = palette) +
    scale_fill_manual(values = palette) +
    # Add more breaks to the x-axis
    scale_x_continuous(breaks = seq(from = min(data$START), to = max(data$START), by = 20000)) +
    theme_cowplot() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = rel(0.7)))
  
}

plot_4c(data_long, palette_human)
## Loading required package: readr


[figure/source data @ G34-gliomas/bulk_transcriptome_epigenome/figures/05//human_pdgfra_plot…]

4.1.2 Mouse

data <- read_excel(here(subdir, "data/PDGFRA_promoter_4C_mice.xlsx"))

# Convert to long format so that we have three columns in the data frame:
# 1) Group (G34 or Other), 2) Average value, and 3) Standard deviation value
data_long <- data %>%
  unite(GE,     Average_GE, STDEVA_GE) %>%
  unite(Cortex, Average_Cortex, STDEVA_Cortex) %>%
  unite(ESC,    Average_ESC, STDEVA_ESC) %>%
  gather(Group, Value, GE, Cortex, ESC) %>%
  separate(Value, into = c("Average", "Stdev"), sep = "_")

# Make sure we have numeric columns for plotting
data_long$Average <- as.numeric(data_long$Average)
data_long$Stdev   <- as.numeric(data_long$Stdev)
# Colour palette
palette_mouse <- c("blue", "orange", "gray50")

# Plot
plot_4c(data_long, palette_mouse)


[figure/source data @ G34-gliomas/bulk_transcriptome_epigenome/figures/05//mouse_pdgfra_plot…]

4.2 Anchored at GSX2

Generate the plots for contacts anchored at the GSX2 promoter:

4.2.1 Human

data <- read_excel(here(subdir, "data/GSX2_promoter_4C_human.xlsx"))

# Convert to long format so that we have three columns in the data frame:
# 1) Group (G34 or Other), 2) Average value, and 3) Standard deviation value
data_long <- data %>%
    unite(G34,   Average_G34, STDEVA_G34) %>%
    unite(Other, Average_other_pGBMs, STDEVA_non_G34) %>%
    gather(Group, Value, G34, Other) %>%
    separate(Value, into = c("Average", "Stdev"), sep = "_")

# Make sure we have numeric columns for plotting
data_long$Average <- as.numeric(data_long$Average)
data_long$Stdev   <- as.numeric(data_long$Stdev)
plot_4c(data_long, palette_human)


[figure/source data @ G34-gliomas/bulk_transcriptome_epigenome/figures/05//human_gsx2_plot…]

4.2.2 Mouse

data <- read_excel(here(subdir, "data/GSX2_promoter_4C_mice.xlsx"))

# Convert to long format so that we have three columns in the data frame:
# 1) Group (G34 or Other), 2) Average value, and 3) Standard deviation value
data_long <- data %>%
  unite(GE,     Average_GE, STDEVA_GE) %>%
  unite(Cortex, Average_Cortex, STDEVA_Cortex) %>%
  unite(ESC,    Average_ESC, STDEVA_ESC) %>%
  gather(Group, Value, GE, Cortex, ESC) %>%
  separate(Value, into = c("Average", "Stdev"), sep = "_") %>% 
  filter(Group != "ESC")

# Make sure we have numeric columns for plotting
data_long$Average <- as.numeric(data_long$Average)
data_long$Stdev   <- as.numeric(data_long$Stdev)
plot_4c(data_long, palette_mouse)


[figure/source data @ G34-gliomas/bulk_transcriptome_epigenome/figures/05//mouse_gsx2_plot…]


5 Reproducibility

This document was last rendered on:

## 2020-09-28 09:59:15

The git repository and last commit:

## Local:    master /lustre03/project/6004736/sjessa/from_beluga/HGG-G34/G34-gliomas
## Remote:   master @ origin (git@github.com:fungenomics/G34-gliomas.git)
## Head:     [f55adef] 2020-09-25: Add PDGFRA-GSX2 correlation for non-G34 samples

The random seed was set with set.seed(100)

The R session info:
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/imkl/2018.3.222/compilers_and_libraries_2018.3.222/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
## 
## locale:
##  [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8    
##  [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8   
##  [7] LC_PAPER=en_CA.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] readr_1.3.1   dplyr_0.8.0   tidyr_0.8.2   cowplot_0.9.4 ggplot2_3.1.0
## [6] readxl_1.2.0  here_0.1     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5         git2r_0.27.1       cellranger_1.1.0   compiler_3.5.1    
##  [5] pillar_1.4.6       RColorBrewer_1.1-2 plyr_1.8.6         tools_3.5.1       
##  [9] digest_0.6.25      evaluate_0.14      lifecycle_0.2.0    tibble_3.0.3      
## [13] gtable_0.3.0       pkgconfig_2.0.3    rlang_0.4.7        yaml_2.2.1        
## [17] xfun_0.17          withr_2.2.0        stringr_1.4.0      knitr_1.29        
## [21] hms_0.5.3          vctrs_0.3.4        rprojroot_1.3-2    grid_3.5.1        
## [25] tidyselect_1.1.0   glue_1.4.2         R6_2.4.1           rmarkdown_1.11    
## [29] farver_2.0.3       purrr_0.3.4        magrittr_1.5       codetools_0.2-15  
## [33] backports_1.1.9    scales_1.1.1       htmltools_0.5.0    ellipsis_0.3.1    
## [37] assertthat_0.2.1   colorspace_1.4-1   labeling_0.3       stringi_1.5.3     
## [41] lazyeval_0.2.2     munsell_0.5.0      crayon_1.3.4

A project of the Kleinman Lab at McGill University, using the rr reproducible research template.
home GitHub