G34-gliomas project [source]
Configuration of project directory & analysis outputs:
Show full config
library(here)
# Set up outputs
message("Document index: ", doc_id)
## Document index: 05
# Specify where to save outputs
out <- here(subdir, "output", doc_id); dir.create(out, recursive = TRUE)
figout <- here(subdir, "figures", doc_id, "/"); dir.create(figout, recursive = TRUE)
cache <- paste0("~/tmp/", basename(here()), "/", subdir, "/", doc_id, "/")
message("Cache: ", cache)
## Cache: ~/tmp/G34-gliomas/bulk_transcriptome_epigenome/05/
The root directory of this project is:
## /lustre03/project/6004736/sjessa/from_beluga/HGG-G34/G34-gliomas
Outputs and figures will be saved at these paths, relative to project root:
## G34-gliomas/bulk_transcriptome_epigenome/output/05
## G34-gliomas/bulk_transcriptome_epigenome/figures/05//
Setting a random seed:
set.seed(100)
In this document, we load a 4C quantification of the contacts between parts of the genome from Hi-C data in tumors and normal brain, focusing on contacts anchored at PDGFRA or GSX2.
library(readxl)
library(ggplot2)
library(cowplot)
library(tidyr)
library(dplyr)
# Theme
ggplot2::theme_set(theme_min(base_size = 13))
# Custom
source(here::here(subdir, "analysis/functions.R"))
Generate the plots for contacts anchored at the PDGFRA promoter:
data <- read_excel(here(subdir, "data/PDGFRA_promoter_4C_human.xlsx"))
# Convert to long format so that we have three columns in the data frame:
# 1) Group (G34 or Other), 2) Average value, and 3) Standard deviation value
data_long <- data %>%
unite(G34, Average_G34_pGBMs, stdeva_G34_pGBMs) %>%
unite(Other, Average_Other_pGBMs, stdeva_other_pGBMs) %>%
gather(Group, Value, G34, Other) %>%
separate(Value, into = c("Average", "Stdev"), sep = "_")
# Convert the missing values around PDGFRA to 0
data_long$Average <- ifelse(data_long$Average == "NA", 0, data_long$Average)
data_long$Stdev <- ifelse(is.na(data_long$Stdev), 0, data_long$Stdev)
# Make sure we have numeric columns for plotting
data_long$Average <- as.numeric(data_long$Average)
data_long$Stdev <- as.numeric(data_long$Stdev)
# Colour palette
palette_human <- c("red", "navy")
# Write a plotting function
plot_4c <- function(df, palette) {
df %>%
rr_ggplot(aes(x = START, y = Average), plot_num = 1) +
# Plot the stdev as an area "ribbon"
geom_ribbon(aes(ymin = Average - Stdev, ymax = Average + Stdev, fill = Group), alpha = 0.3) +
# Plot the solid line for the average
geom_line(aes(colour = Group)) +
# Set the colours
scale_colour_manual(values = palette) +
scale_fill_manual(values = palette) +
# Add more breaks to the x-axis
scale_x_continuous(breaks = seq(from = min(data$START), to = max(data$START), by = 20000)) +
theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = rel(0.7)))
}
plot_4c(data_long, palette_human)
## Loading required package: readr
[figure/source data @ G34-gliomas/bulk_transcriptome_epigenome/figures/05//human_pdgfra_plot…]
data <- read_excel(here(subdir, "data/PDGFRA_promoter_4C_mice.xlsx"))
# Convert to long format so that we have three columns in the data frame:
# 1) Group (G34 or Other), 2) Average value, and 3) Standard deviation value
data_long <- data %>%
unite(GE, Average_GE, STDEVA_GE) %>%
unite(Cortex, Average_Cortex, STDEVA_Cortex) %>%
unite(ESC, Average_ESC, STDEVA_ESC) %>%
gather(Group, Value, GE, Cortex, ESC) %>%
separate(Value, into = c("Average", "Stdev"), sep = "_")
# Make sure we have numeric columns for plotting
data_long$Average <- as.numeric(data_long$Average)
data_long$Stdev <- as.numeric(data_long$Stdev)
# Colour palette
palette_mouse <- c("blue", "orange", "gray50")
# Plot
plot_4c(data_long, palette_mouse)
[figure/source data @ G34-gliomas/bulk_transcriptome_epigenome/figures/05//mouse_pdgfra_plot…]
Generate the plots for contacts anchored at the GSX2 promoter:
data <- read_excel(here(subdir, "data/GSX2_promoter_4C_human.xlsx"))
# Convert to long format so that we have three columns in the data frame:
# 1) Group (G34 or Other), 2) Average value, and 3) Standard deviation value
data_long <- data %>%
unite(G34, Average_G34, STDEVA_G34) %>%
unite(Other, Average_other_pGBMs, STDEVA_non_G34) %>%
gather(Group, Value, G34, Other) %>%
separate(Value, into = c("Average", "Stdev"), sep = "_")
# Make sure we have numeric columns for plotting
data_long$Average <- as.numeric(data_long$Average)
data_long$Stdev <- as.numeric(data_long$Stdev)
plot_4c(data_long, palette_human)
[figure/source data @ G34-gliomas/bulk_transcriptome_epigenome/figures/05//human_gsx2_plot…]
data <- read_excel(here(subdir, "data/GSX2_promoter_4C_mice.xlsx"))
# Convert to long format so that we have three columns in the data frame:
# 1) Group (G34 or Other), 2) Average value, and 3) Standard deviation value
data_long <- data %>%
unite(GE, Average_GE, STDEVA_GE) %>%
unite(Cortex, Average_Cortex, STDEVA_Cortex) %>%
unite(ESC, Average_ESC, STDEVA_ESC) %>%
gather(Group, Value, GE, Cortex, ESC) %>%
separate(Value, into = c("Average", "Stdev"), sep = "_") %>%
filter(Group != "ESC")
# Make sure we have numeric columns for plotting
data_long$Average <- as.numeric(data_long$Average)
data_long$Stdev <- as.numeric(data_long$Stdev)
plot_4c(data_long, palette_mouse)
[figure/source data @ G34-gliomas/bulk_transcriptome_epigenome/figures/05//mouse_gsx2_plot…]
This document was last rendered on:
## 2020-09-28 09:59:15
The git repository and last commit:
## Local: master /lustre03/project/6004736/sjessa/from_beluga/HGG-G34/G34-gliomas
## Remote: master @ origin (git@github.com:fungenomics/G34-gliomas.git)
## Head: [f55adef] 2020-09-25: Add PDGFRA-GSX2 correlation for non-G34 samples
The random seed was set with set.seed(100)
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /cvmfs/soft.computecanada.ca/easybuild/software/2017/Core/imkl/2018.3.222/compilers_and_libraries_2018.3.222/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
##
## locale:
## [1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
## [5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
## [7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] readr_1.3.1 dplyr_0.8.0 tidyr_0.8.2 cowplot_0.9.4 ggplot2_3.1.0
## [6] readxl_1.2.0 here_0.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.5 git2r_0.27.1 cellranger_1.1.0 compiler_3.5.1
## [5] pillar_1.4.6 RColorBrewer_1.1-2 plyr_1.8.6 tools_3.5.1
## [9] digest_0.6.25 evaluate_0.14 lifecycle_0.2.0 tibble_3.0.3
## [13] gtable_0.3.0 pkgconfig_2.0.3 rlang_0.4.7 yaml_2.2.1
## [17] xfun_0.17 withr_2.2.0 stringr_1.4.0 knitr_1.29
## [21] hms_0.5.3 vctrs_0.3.4 rprojroot_1.3-2 grid_3.5.1
## [25] tidyselect_1.1.0 glue_1.4.2 R6_2.4.1 rmarkdown_1.11
## [29] farver_2.0.3 purrr_0.3.4 magrittr_1.5 codetools_0.2-15
## [33] backports_1.1.9 scales_1.1.1 htmltools_0.5.0 ellipsis_0.3.1
## [37] assertthat_0.2.1 colorspace_1.4-1 labeling_0.3 stringi_1.5.3
## [41] lazyeval_0.2.2 munsell_0.5.0 crayon_1.3.4
A project of the Kleinman Lab at McGill University, using the rr reproducible research template.
home
GitHub