1 Load data and R libraries

First, we load necessary R libraries.

library(data.table)
library(ggpubr)
library(ggbeeswarm)
library(rstatix)
library(janitor)
library(ggforce)
library(openxlsx)
library(dplyr)


options(stringsAsFactors = F,max.print = 10000,error=NULL,
        "openxlsx.dateFormat" = "dd/mm/yyyy"
        )

Then, we load the data source necessary to generate all the figures.

sheet.names <- getSheetNames("../../Revision/Results/CLL_fCpGs_Gene_expression_bulk/Data_source_gene_expression.xlsx")
sheet.names <- structure(sheet.names,names=sheet.names)
sheet.names

data.source <- lapply(sheet.names, function(sheet.i){
  res <- read.xlsx(xlsxFile = "../../Revision/Results/CLL_fCpGs_Gene_expression_bulk/Data_source_gene_expression.xlsx",
                   sheet = sheet.i,detectDates = T)
})

lapply(data.source,head)

2 Plotting!

2.1 PP plot of all cases

We can see how genes associated with fCpGs show a lower gene expression than those non-fCpGs genes.(The Illumina array annotation was used to link CpGs to genes).

df.pp.plot <- data.source$pp_plot

pp.plot <- 
  df.pp.plot %>%
  ggplot(aes(x=Non.fCpG.genes,
             y=fCpG.genes
  )
  )+
  xlab("Non-fCpG genes")+
  ylab("fCpG genes")+
  geom_abline(slope = 1,
              color="grey80",
              lty="dashed",
              lwd=0.3
  )+
  ggrastr::rasterise(geom_point(size=1.5),dpi=300)+
  # ggtitle(paste0("pp plot (%) of tpms, samples:",length(unique(df.tpms$sample))))+
  theme_bw()+
  theme(text=element_text(size = 5),
        line = element_line(linewidth = 0.3),
        panel.background = element_rect(fill = NA,colour = NA,linewidth = 0.3)
  )
print(pp.plot)

2.2 Example of 1 CLL patient

We selected a random patient and plot the gene expression of genes associated with fCpGs and those that are not associated with them. (The Illumina array annotation was used to link CpGs to genes).

random.sample.anonymous <- data.source$sample_genes_fCpGs_non.fCpgs$sample



plot.genes.sample <- 
  data.source$sample_genes_fCpGs_non.fCpgs %>%
  ggplot(aes(x = fCpG,
             y = tpm +1
  )
  )+
  geom_boxplot(
    aes(fill=fCpG),#disease_state,sample_tx
    show.legend = F,
    lwd=0.3
  )+
  geom_pwc(method = "wilcox_test",
           size = 0.3,
           p.adjust.method = "none",
           label = "P={p}",
           y.position = log10(8500),
           label.size = 1
  )+
  scale_fill_manual(values = c("orange2","grey70"))+
  coord_cartesian(ylim = c(0,max(data.source$sample_genes_fCpGs_non.fCpgs$tpm))+1)+
  scale_y_log10()+
  annotation_logticks(sides = "l",
                      size = 0.3)+
  ggtitle(paste0("Anonymous=",random.sample.anonymous))+
  theme_bw()+
  theme(legend.position = "none",
        plot.margin = unit(c(0,0,0,0),"pt"),
        text = element_text(size = 6),
        line = element_line(linewidth = 0.3),
        panel.border = element_rect(fill = NA,linewidth = 0.3),
        panel.background = element_blank()
  )
plot.genes.sample

2.3 Expression vs number of methylated alleles

All genes related to fCpGs are lowly expressed regardless of the number of alleles methylated. A beta binomial mixture model was used to categorize CpGs, and then Illumina array annotation was used to link CpGs to genes.

## plot genes segregated by meth allele of the random sample
plot.meth.alleles.sample <- 
  data.source$sample_meth_alleles_expression %>% 
  ggplot(aes(x = meth,
             y = tpm+1
  )
  )+
  geom_beeswarm(aes(fill=meth),
                color="grey10",
                corral = "wrap",
                pch=21,
                stroke=0.1,
                size=1
  )+
  geom_boxplot(aes(fill=meth),
               alpha=0.2,
               outlier.shape = NA,
               lwd=0.3
  )+
  scale_fill_manual(values = c("0"="#2F7FC2","2"="#BC5250","1"="grey80"))+
  geom_pwc(method = "wilcox_test",
           label = "P={p}",
           size=0.3,
           label.size = 1,
           step.increase = 0.05
  )+
  coord_cartesian(ylim = c(0,max(data.source$sample_genes_fCpGs_non.fCpgs$tpm))+1)+
  scale_y_log10()+
  annotation_logticks(sides = "l",
                      size=0.3)+
  ylab(NULL)+
  xlab("# of fCpG methylated alleles")+
  theme_bw()+
  theme(legend.position = "none",
        plot.margin = unit(c(0,0,0,0),"pt"),
        text = element_text(size = 6),
        line = element_line(linewidth = 0.3),
        panel.border = element_rect(fill = NA,linewidth = 0.3),
        panel.background = element_blank()
  )

print(plot.meth.alleles.sample)

2.4 All plots together

Plot all panels together

cowplot::plot_grid(pp.plot,
                   plot.genes.sample,
                   plot.meth.alleles.sample,
                   nrow = 1,
                   align = "h",
                   rel_widths = c(1,0.5,0.75)
                   )

3 Session info

print(sessionInfo())
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] dplyr_1.1.4       openxlsx_4.2.8    ggforce_0.5.0     janitor_2.2.1    
##  [5] rstatix_0.7.2.999 ggbeeswarm_0.7.2  ggpubr_0.6.1      ggplot2_3.5.2    
##  [9] data.table_1.17.8 BiocStyle_2.36.0 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        beeswarm_0.4.0      xfun_0.53          
##  [4] bslib_0.9.0         Cairo_1.6-5         vctrs_0.6.5        
##  [7] tools_4.5.0         generics_0.1.4      tibble_3.3.0       
## [10] pkgconfig_2.0.3     RColorBrewer_1.1-3  lifecycle_1.0.4    
## [13] compiler_4.5.0      farver_2.1.2        stringr_1.5.1      
## [16] tinytex_0.57        carData_3.0-5       snakecase_0.11.1   
## [19] vipor_0.4.7         htmltools_0.5.8.1   sass_0.4.10        
## [22] yaml_2.3.10         Formula_1.2-5       pillar_1.11.0      
## [25] car_3.1-3           jquerylib_0.1.4     tidyr_1.3.1        
## [28] MASS_7.3-65         cachem_1.1.0        magick_2.8.7       
## [31] abind_1.4-8         tidyselect_1.2.1    zip_2.3.3          
## [34] digest_0.6.37       stringi_1.8.7       purrr_1.1.0        
## [37] bookdown_0.44       labeling_0.4.3      cowplot_1.2.0      
## [40] polyclip_1.10-7     fastmap_1.2.0       grid_4.5.0         
## [43] cli_3.6.5           magrittr_2.0.3      dichromat_2.0-0.1  
## [46] broom_1.0.9         withr_3.0.2         scales_1.4.0       
## [49] backports_1.5.0     lubridate_1.9.4     timechange_0.3.0   
## [52] rmarkdown_2.29      ggsignif_0.6.4      evaluate_1.0.5     
## [55] knitr_1.50          ggrastr_1.0.2       rlang_1.1.6        
## [58] Rcpp_1.1.0          glue_1.8.0          tweenr_2.0.3       
## [61] BiocManager_1.30.26 rstudioapi_0.17.1   jsonlite_2.0.0     
## [64] R6_2.6.1