Analyses realted to control probes from Illumina arrays, showing that fCpGs are not SNPs.

1 Load packages and fCpGs information

##
## Load necessary packages and set global options
##

library(data.table)
library(janitor)
library(dplyr)
library(ggpubr)
library(pals)
library(GenomicRanges)
library(openxlsx)
library(Biostrings)
library(ComplexHeatmap)
library(circlize)
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
library(SNPlocs.Hsapiens.dbSNP155.GRCh38)
library(MafH5.gnomAD.v4.0.GRCh38)
library(patchwork)
library(ggrepel)


options(stringsAsFactors = F,max.print = 10000,error=NULL,
        "openxlsx.dateFormat" = "dd/mm/yyyy"
)



## load fCpGs with meth values

fcpgs.betas <- 
  fread("../Data/FMC/Final_data/beta_filtered_samples_fcpgs_laplacian.tsv",data.table = F) %>% 
  tibble::column_to_rownames(var = "V1")

fcpgs.betas[1:5,1:5]
dim(fcpgs.betas)


##load all metadata
sheets <- getSheetNames("../Revision/Data/Supplementary_Tables_revision_curated.3.xlsx")
sheets <- setNames(sheets,sheets)

fCpGs.metadata <- lapply(sheets,function(i){
  openxlsx::read.xlsx("../Revision/Data/Supplementary_Tables_revision_curated.3.xlsx",sheet = i,detectDates = T)
})




##compare 5 random fCpGs and plot
evoflux.meth <- fread("../Revision/Data/meth_palette_evoflux.tsv")
evoflux.meth


# color smaples
cols.paper <- c(
  NA_values="#666666",
  HPC="#E6E6E6",
  pre.Bcell="#D5D5D5",
  NBC="#C3C3C3",
  GCB="#AEAEAE",
  tPC="#969696",
  MBC="#787878",
  bmPC="#4D4D4D",
  Tcell="#F0F8FF",
  PBMCs="#FFF8DC",
  Whole_blood="#CDC8B1",
  Normal_lymphoid_cell="#D5D5D5",
  Normal_myeloid_cell="#E6E6FA",
  "T-ALL"="#CD3333",
  "T-ALL-remission"="#C7A5A5",
  "B-ALL"="#E69F00",
  "B-ALL-remission"="#D8CAAB",
  MCL="#F0E442",
  C1.or.cMCL="#F5D51B",
  C2.or.nnMCL="#0072B2",
  "DLBCL-NOS"="#56B4E9",
  MBL="#5A9A89",
  CLL="#009E73",
  RT="#02C590",
  CLL.unmutated="#e65100",
  CLL.mutated="#361379",
  nCLL="#006E93",
  iCLL="#FDC010",
  mCLL="#963736",
  Disease.Unclassified="#CCCCCC",
  MGUS="#BF99AE",
  MM="#CC79A7"
)

2 fCpG hg38

fcpgs.hg19.CG <- minfi::getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)[rownames(fcpgs.betas),] %>% 
  as.data.frame()

fcpgs.hg19.CG  <- 
  GRanges(fcpgs.hg19.CG$chr,IRanges(fcpgs.hg19.CG$pos,fcpgs.hg19.CG$pos),strand = fcpgs.hg19.CG$strand,
          fcpgs.hg19.CG %>% select(-c(chr,pos,strand))
  )
fcpgs.hg19.CG

getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.CG) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.CG[strand(fcpgs.hg19.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.CG[strand(fcpgs.hg19.CG)=="-"]) %>% as.character() %>% table

##perform liftover
hg19.to.hg38.chain <- rtracklayer::import.chain(con = "../Revision/Data/Nanopore/hg19ToHg38.over.chain")
fcpgs.hg38.CG <- rtracklayer::liftOver(x = fcpgs.hg19.CG,chain = hg19.to.hg38.chain)
fcpgs.hg38.CG <- unlist(fcpgs.hg38.CG)

length(fcpgs.hg19.CG)
length(fcpgs.hg38.CG)

fcpgs.hg19.CG$Name[which(!fcpgs.hg19.CG$Name %in% fcpgs.hg38.CG$Name)]

fcpgs.hg38.CG

getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]) %>% as.character() %>% table

fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"] %>% granges()

##liftover do not work on this CpG
fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"][which(getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]) %>% as.character() !="G"),]
exclude <- fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]$Name[which(getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]) %>% as.character() !="G")]
exclude

fcpgs.hg38.CG <- fcpgs.hg38.CG[fcpgs.hg38.CG$Name!=exclude]
fcpgs.hg38.CG

#check nucleotides ## OK!
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]) %>% as.character() %>% table

3 Load candidate CpGs

We next load all the CpGs passing inital QC.

## load background CPGs to test overlap with gnomADv4
cpgs.candidate.hg19.CG <- fread("../Data/FMC/Final_data/candidate_cpgs_24_07_23.txt",header = F,data.table = F)
head(cpgs.candidate.hg19.CG);dim(cpgs.candidate.hg19.CG)
colnames(cpgs.candidate.hg19.CG) <- "CpGs"
head(cpgs.candidate.hg19.CG);dim(cpgs.candidate.hg19.CG)
cpgs.candidate.hg19.CG <- getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)[cpgs.candidate.hg19.CG$CpGs,] %>%   as.data.frame()

cpgs.candidate.hg19.CG  <- 
  GRanges(cpgs.candidate.hg19.CG$chr,IRanges(cpgs.candidate.hg19.CG$pos,cpgs.candidate.hg19.CG$pos),strand = cpgs.candidate.hg19.CG$strand,
          cpgs.candidate.hg19.CG %>% dplyr::select(-c(chr,pos,strand))
  )
cpgs.candidate.hg19.CG

getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,cpgs.candidate.hg19.CG) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,cpgs.candidate.hg19.CG[strand(cpgs.candidate.hg19.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,cpgs.candidate.hg19.CG[strand(cpgs.candidate.hg19.CG)=="-"]) %>% as.character() %>% table



cpgs.candidate.hg38.CG <- rtracklayer::liftOver(x = cpgs.candidate.hg19.CG,chain = hg19.to.hg38.chain)
cpgs.candidate.hg38.CG <- unlist(cpgs.candidate.hg38.CG)

length(cpgs.candidate.hg19.CG)
length(cpgs.candidate.hg38.CG)

cpgs.candidate.hg19.CG$Name[which(!cpgs.candidate.hg19.CG$Name %in% cpgs.candidate.hg38.CG$Name)]
cpgs.candidate.hg38.CG

getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG[strand(cpgs.candidate.hg38.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG[strand(cpgs.candidate.hg38.CG)=="-"]) %>% as.character() %>% table


##liftover do not work on this CpG
exclude <- which(as.character(getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG))=="A")
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG)[exclude]


cpgs.candidate.hg38.CG <- cpgs.candidate.hg38.CG[-exclude]
cpgs.candidate.hg38.CG
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG) %>% as.character() %>% table

4 Proportion SNPs CpG vs fCpGs

We next use annotation packages to annotate both candidate CpGs and fCpGs, and perform fisher test. fCpGs have significantly less propability compared to background candidate CpGs to contain SNPs reported in gnomAD consotia, v4.0 (2024).

##NOTE: following packages do not consider strand specificy and rely on + strand (same as if was all ranges strand=*)


## get SNPs accroding to db155
snps <- SNPlocs.Hsapiens.dbSNP155.GRCh38  
# snpcount(snps)
# seqinfo(snps)

##check seqlevels styles
# seqlevels(snps)
# seqlevels(fcpgs.hg38.CG)

##arrange same style of levels
fcpgs.hg38.CG.ncbi <- fcpgs.hg38.CG
# seqlevelsStyle(fcpgs.hg38.CG.ncbi)
seqlevels(fcpgs.hg38.CG.ncbi) <- gsub("chr","",seqlevels(fcpgs.hg38.CG))
# seqlevelsStyle(fcpgs.hg38.CG.ncbi)


cpgs.candidate.hg38.CG.ncbi <- cpgs.candidate.hg38.CG
# seqlevelsStyle(cpgs.candidate.hg38.CG.ncbi)
seqlevels(cpgs.candidate.hg38.CG.ncbi) <- gsub("chr","",seqlevels(fcpgs.hg38.CG))
# seqlevelsStyle(cpgs.candidate.hg38.CG.ncbi)


##check strand specificity 
# fcpgs.hg38.Cs.ncbi <- fcpgs.hg38.Cs
# seqlevelsStyle(fcpgs.hg38.Cs.ncbi)
# seqlevels(fcpgs.hg38.Cs.ncbi) <- gsub("chr","",seqlevels(fcpgs.hg38.Cs))
# seqlevelsStyle(fcpgs.hg38.Cs.ncbi)

##get all reported SNPs regardless of MAF 
fcpgs.hg38.CG.snps <-  snpsByOverlaps(x = snps,
                                              # ranges=fcpgs.hg38.C[which(strand(fcpgs.hg38.Cs.ncbi)=="-")], ##all Gs if considering - strand with Cs positions
                                              ranges = fcpgs.hg38.CG.ncbi,
                                              genome=BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38)
fcpgs.hg38.CG.snps
## UnstitchedGPos object with 783 positions and 5 metadata columns:
##         seqnames       pos strand |    RefSNP_id alleles_as_ambig genome_compat
##            <Rle> <integer>  <Rle> |  <character>      <character>     <logical>
##     [1]        1    909931      * |  rs145001858                Y          TRUE
##     [2]        1   1011495      * |  rs945574781                Y          TRUE
##     [3]        1   1014513      * |  rs201328791                B          TRUE
##     [4]        1   1034445      * |  rs764173254                Y          TRUE
##     [5]        1   1117220      * |  rs942434937                B          TRUE
##     ...      ...       ...    ... .          ...              ...           ...
##   [779]       22  37070895      * |  rs761525378                B          TRUE
##   [780]       22  39665573      * |  rs758531004                H          TRUE
##   [781]       22  41681935      * |  rs184272182                B          TRUE
##   [782]       22  43089654      * | rs1929494260                S          TRUE
##   [783]       22  45413363      * | rs2087376347                Y          TRUE
##          ref_allele     alt_alleles
##         <character> <CharacterList>
##     [1]           C               T
##     [2]           C               T
##     [3]           C             G,T
##     [4]           C               T
##     [5]           C             G,T
##     ...         ...             ...
##   [779]           C             G,T
##   [780]           C             A,T
##   [781]           C             G,T
##   [782]           C               G
##   [783]           C               T
##   -------
##   seqinfo: 25 sequences (1 circular) from GRCh38.p13 genome
fcpgs.hg38.CG.snps@elementMetadata$ref_allele %>% table 
## .
##   C 
## 783
fcpgs.hg38.CG.snps@elementMetadata$alt_alleles %>% table %>% apply(.,2,table) ##majority C>T, as expected.
##     A   G   T
## 0 572 589 105
## 1 211 194 678
##get all reported SNPs regardless of MAF with 100 bp's extension on each site
fcpgs.hg38.101.CG.snps <-  snpsByOverlaps(x = snps,
                              ranges = resize(x = fcpgs.hg38.CG.ncbi,width = 101,fix = "center",ignore.strand=T),#fcpgs.hg38.CG.ncbi, 
                              genome=BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38)
# fcpgs.hg38.101.CG.snps


##
## Get MAFs from gnomAD consotia, v4.0 (2024)
##

mafh5 <- MafH5.gnomAD.v4.0.GRCh38
# mafh5
seqlevelsStyle(mafh5)
## [1] "UCSC"
seqlevelsStyle(fcpgs.hg38.CG)
## [1] "UCSC"
seqlevelsStyle(fcpgs.hg38.CG.ncbi) ## follwong code works despite having differnt styles, previously checked!
## [1] "NCBI"    "Ensembl"
##overlap fCpGs
fcpgs.hg38.CG.MAF.gnomAD.v4 <- gscores(x = mafh5,ranges = fcpgs.hg38.CG,pop=populations(mafh5)) 
fcpgs.hg38.CG.MAF.gnomAD.v4$AF %>% summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.00001 0.00001 0.00002 0.00118 0.00004 0.36000     394
names(fcpgs.hg38.CG.MAF.gnomAD.v4) <- fcpgs.hg38.CG.MAF.gnomAD.v4$Name
# fcpgs.hg38.CG.MAF.gnomAD.v4

#candidate
cpgs.candidate.hg38.CG.MAF.gnomAD.v4 <- gscores(x = mafh5,ranges = cpgs.candidate.hg38.CG,pop=populations(mafh5)) 
cpgs.candidate.hg38.CG.MAF.gnomAD.v4$AF %>% summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.00    0.00    0.36  134463
names(cpgs.candidate.hg38.CG.MAF.gnomAD.v4) <- cpgs.candidate.hg38.CG.MAF.gnomAD.v4$Name
cpgs.candidate.hg38.CG.MAF.gnomAD.v4$fCpG <-
  ifelse(cpgs.candidate.hg38.CG.MAF.gnomAD.v4$Name %in% fcpgs.hg38.CG.MAF.gnomAD.v4$Name,T,F)
# table(cpgs.candidate.hg38.CG.MAF.gnomAD.v4$fCpG)

##chi square background and fCpGs proportions
print(prop.table(table(is.na(fcpgs.hg38.CG.MAF.gnomAD.v4$AF)))) ## 60%
## 
##     FALSE      TRUE 
## 0.5963115 0.4036885
print(prop.table(table(is.na(cpgs.candidate.hg38.CG.MAF.gnomAD.v4$AF)))) ## 65%
## 
##     FALSE      TRUE 
## 0.6544496 0.3455504
tab <- table("present in gnomADv.40"=!is.na(cpgs.candidate.hg38.CG.MAF.gnomAD.v4$AF),
             fCpG=cpgs.candidate.hg38.CG.MAF.gnomAD.v4$fCpG)
print(tab)
##                      fCpG
## present in gnomADv.40  FALSE   TRUE
##                 FALSE 134069    394
##                 TRUE  254082    582
print(fisher.test(tab))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  tab
## p-value = 0.0001582
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.6846029 0.8881783
## sample estimates:
## odds ratio 
##  0.7794582

5 mAF of SNPs reported in 50bp’s extension from fCPGs

##map using all SNPs with 101 extendion of fCPGs
##set same seqleves for overlap
fcpgs.hg38.101.CG.snps
seqlevelsStyle(fcpgs.hg38.101.CG.snps)
genome(fcpgs.hg38.101.CG.snps)

##arrange levels and genome
seqlevels(fcpgs.hg38.101.CG.snps) <- paste0("chr",seqlevels(fcpgs.hg38.101.CG.snps))
seqlevels(fcpgs.hg38.101.CG.snps)
genome(fcpgs.hg38.101.CG.snps) <- "hg38"
genome(fcpgs.hg38.101.CG.snps)

##overlap
fcpgs.hg38.101.MAF.rs.gnomAD.v4 <- gscores(x = mafh5,ranges = fcpgs.hg38.101.CG.snps,
                                    pop=populations(mafh5))
fcpgs.hg38.101.MAF.rs.gnomAD.v4$AF %>% summary
fcpgs.hg38.101.MAF.rs.gnomAD.v4
fcpgs.hg38.101.MAF.rs.gnomAD.v4$AF %>% qplot()+ theme_classic() + ggtitle("mAF of all SNPs in 100 bp's extension (fCpGs at center) ")

6 mAF of SNPs mapping to fCpGs

We have found that some fCpGs have an SNP annotated in dbSNP155.GRCh38, so we then further investigate them.

6.1 Proportion and mAF

##get MAF of all previously maped SNPs of fCPGs
##set same seqleves for overlap
fcpgs.hg38.CG.snps
seqlevelsStyle(fcpgs.hg38.CG.snps)
genome(fcpgs.hg38.CG.snps)

##arrange levels and genome
seqlevels(fcpgs.hg38.CG.snps) <- paste0("chr",seqlevels(fcpgs.hg38.CG.snps))
seqlevels(fcpgs.hg38.CG.snps)
genome(fcpgs.hg38.CG.snps) <- "hg38"
genome(fcpgs.hg38.CG.snps)

#overlap
overlap.fCpGs <- findOverlaps(query = fcpgs.hg38.CG.MAF.gnomAD.v4,subject = fcpgs.hg38.CG.snps)
overlap.fCpGs


# complete fCPGs with snps info and BSEgenome hg38
fcpgs.hg38.CG.MAF.gnomAD.v4.snps <- as.data.frame(fcpgs.hg38.CG.MAF.gnomAD.v4)
head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps);dim(fcpgs.hg38.CG.MAF.gnomAD.v4.snps)
fcpgs.hg38.CG.MAF.gnomAD.v4.snps <- cbind(fcpgs.hg38.CG.MAF.gnomAD.v4.snps,
                                           RefSNP_id=NA,
                                           alleles_as_ambig=NA,
                                           ref_allele=NA,
                                           alt_alleles=NA
                                           )
head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps);dim(fcpgs.hg38.CG.MAF.gnomAD.v4.snps)

fcpgs.hg38.CG.MAF.gnomAD.v4.snps[queryHits(overlap.fCpGs),c("RefSNP_id","alleles_as_ambig","ref_allele","alt_alleles")] <- 
  as.data.frame(mcols(fcpgs.hg38.CG.snps[subjectHits(overlap.fCpGs),c("RefSNP_id","alleles_as_ambig","ref_allele","alt_alleles")]))

head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps);dim(fcpgs.hg38.CG.MAF.gnomAD.v4.snps)

fcpgs.hg38.CG.MAF.gnomAD.v4.snps %>% 
  dplyr::count(is.na(AF),is.na(RefSNP_id))

##the following SNPs do not have available MAF, (checked also at NCBI dbSNP)
fcpgs.hg38.CG.MAF.gnomAD.v4.snps %>% 
  filter(is.na(AF),!is.na(RefSNP_id)) %>% 
  select(seqnames,start,AF,RefSNP_id)

## Complete maf of all rs flanking 101 bps  of fCPGs
overlap.fCpGs.101 <- findOverlaps(query = resize(fcpgs.hg38.CG.MAF.gnomAD.v4,width = 101,fix = "center",ignore.strand=T),
                                  subject = fcpgs.hg38.101.MAF.rs.gnomAD.v4)
overlap.fCpGs.101
length(overlap.fCpGs.101)
length(unique(queryHits(overlap.fCpGs.101)))
length(subjectHits(overlap.fCpGs.101))

flanking_100 <- lapply(seq_along(as.list(overlap.fCpGs.101)),function(i){
  
  CpG <- fcpgs.hg38.CG.MAF.gnomAD.v4$Name[i]
  indx <- as.list(overlap.fCpGs.101)[[i]]
  
  dat <- fcpgs.hg38.101.MAF.rs.gnomAD.v4[indx,] %>% as.data.frame()
  dat <- dat[,c("RefSNP_id","seqnames","pos","alleles_as_ambig","ref_allele","alt_alleles","AF","AF_allpopmax")]
  colnames(dat) <- paste0(colnames(dat),"_50bp.flanking")
  res <- apply(dat,2,function(x)paste0(x,collapse = ";"))
  res[["alt_alleles_50bp.flanking"]] <- 
  gsub('c\\(|\\\\|\\"|\\)',"",res[["alt_alleles_50bp.flanking"]]) %>% gsub(", ","/",.)
  res <- res %>% t() %>% data.frame()
  res$fCpG_50bps.flanking <- CpG
  return(res)
  
})
flanking_100 <- do.call(rbind,flanking_100)
any(flanking_100=="") ## 1 CpG do not have any rs
flanking_100[flanking_100==""] <- NA
any(flanking_100=="") ## 1 CpG do not have any rs
head(flanking_100)

# plot gnomAD MAFs for all fCpGs
head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps);dim(fcpgs.hg38.CG.MAF.gnomAD.v4.snps)


##merge 50bp flanking info
fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking <- 
  cbind(fcpgs.hg38.CG.MAF.gnomAD.v4.snps,
        flanking_100
        )

head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking)

fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$AF_50bp.flanking_max <- 
  apply(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking,1,function(x){
  AFs <- x$AF_50bp.flanking
  res <- max(strsplit(AFs,";")[[1]] %>% as.numeric(),na.rm = T)
  if(res==-Inf){
    res <- NA
  }
  return(res)
})

##identical
# fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$AF_50bp.flanking_max %>% qplot
# fcpgs.hg38.CG.101.MAF.max.gnomAD.v4$AF %>% qplot()

library(patchwork)
library(ggrepel)

mAF <- 1e-3

p.snps <- fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
  mutate(entry=case_when(is.na(AF) ~ F, .default = T)) %>% 
  ggplot(aes(x=entry))+
  xlab("Minor allele frequency (mAF) reported in gnomADv.4.0")+
  ggtitle(paste0("Pan-lymphoid fCpGs (hg38, N=",nrow(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking),")"))+
  geom_bar()+
  geom_text(stat='count', aes(label=after_stat(count)),size=2, vjust=-1)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 5),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
  mutate(label=case_when(AF>=0.01 ~ paste0(RefSNP_id,"   (",Name,")"),.default = NA)) %>% 
  ggplot(aes(x = AF
             ))+
  geom_histogram()+
  scale_x_continuous(n.breaks = 10)+
  ggtitle("fCpGs with mAF reported in gnomADv.4.0 (True from left plot)")+
  # annotate("text",x = mAF+0.05,y = 400,label=paste0("mAF≥0.001"),size=2,color="red3")+
  # geom_vline(xintercept = mAF,lwd=0.3,linetype="dashed",color="red3")+
  geom_text(mapping = aes(y=5,
                          label=label
                          ),
            size=2,
            angle=90,
            hjust=0,
            vjust=0.5
            )+
  xlab("mAF")+
  ylab(NULL)+
    theme_classic()+
    theme(text=element_text(color="grey0",size = 5),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        plot.margin = margin(1,1,1,1),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )
p.snps

6.2 Heatmap with mAF≥1e-3

## Heatmap settings

ht_opt$legend_grid_width <- unit(2,"mm")
ht_opt$legend_grid_height <-unit(2,"mm")
ht_opt$legend_labels_gp <- gpar(fontsize=5)
ht_opt$legend_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_names_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_names_gp <- gpar(fontsize=5)
ht_opt$HEATMAP_LEGEND_PADDING <- unit(1,"mm")
ht_opt$ANNOTATION_LEGEND_PADDING <- unit(1,"mm")
ht_opt$legend_gap <- unit(c(1,1),"mm")
ht_opt$fast_hclust <- T



## SNPS at CpGs
fcpgs.snps <- 
  fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
  filter(AF>=1e-3)
fcpgs.snps %>% dim


table(colnames(fcpgs.betas) %in% fCpGs.metadata$supplementary_table_1$participant_id_anonymous)
table(fCpGs.metadata$supplementary_table_1$participant_id_anonymous %in% colnames(fcpgs.betas))


h.CpGs.snps <- Heatmap(fcpgs.betas[fcpgs.snps$Name,
                    fCpGs.metadata$supplementary_table_1$participant_id_anonymous],
        col = evoflux.meth$color,
        name="DNA methylation",
        column_title = paste0("fCpGs with reported SNP with mAF≥",mAF),
        row_title = paste0("N=",nrow(fcpgs.snps)),
        heatmap_legend_param = list(direction="horizontal"),
        clustering_distance_columns = "euclidean",
        clustering_method_columns = "single",
        show_column_names = F,
        show_row_names = F,
        row_dend_gp = gpar(lwd=0.1),
        column_dend_gp = gpar(lwd=0.1),
        raster_quality = 10,
        top_annotation = HeatmapAnnotation(annot2=factor(case_when(fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="PBMCs" & 
                                                                     fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Whole_blood" & 
                                                                     fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Normal_lymphoid_cell" &
                                                                     fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="B-ALL-remission" &
                                                                     fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="T-ALL-remission" ~ "Lymphoid tumour",
                                                                   .default = "Healthy sample/Blood from remission")
        ),
        annot=factor(fCpGs.metadata$supplementary_table_1$cell_type_annot_1,
                     levels = names(cols.paper)[names(cols.paper) %in% fCpGs.metadata$supplementary_table_1$cell_type_annot_1]),
        col=list(annot=cols.paper,
                 annot2=c("Healthy sample/Blood from remission"="grey90","Lymphoid tumour"="grey0")
        ),
        show_annotation_name = F,
        show_legend = T,
        annotation_label = "Samples",
        simple_anno_size = unit(3,"mm"),
        annotation_legend_param = list(direction="horizontal",nrow=3)
        ),
        right_annotation = HeatmapAnnotation(which="row",
                                             mAF=fcpgs.snps$AF,
                                             col=list(mAF=colorRamp2(seq(min(fcpgs.snps$AF),
                                                                        max(fcpgs.snps$AF),
                                                                        length.out=100
                                                                        ),
                                                                    colors = brewer.blues(100)
                                                                    )
                                                      ),
                                             na_col = "grey45",
                                             show_annotation_name = F,
                                             # annotation_name_side = "left",
                                             show_legend = T,
                                             annotation_name_gp = gpar(fontsize=5),
                                             simple_anno_size = unit(2,"mm"),
                                             annotation_legend_param = list(direction="horizontal")
                                             )
        )+
  HeatmapAnnotation(which="row",
                    annot=anno_text(which = "row",
                                    x = fcpgs.snps$Name,
                                    gp = gpar(fontsize=5)),
                    rs=anno_mark(at = which(fcpgs.snps$AF>=0.01),
                                 labels=fcpgs.snps$RefSNP_id[which(fcpgs.snps$AF>=0.01)],
                                 labels_gp = gpar(fontsize=5),link_gp = gpar(lwd=0.2)
                    ),
                    show_annotation_name = F,
                    annotation_legend_param = list(direction="horizontal")
                    )
draw(h.CpGs.snps,
     heatmap_legend_side = "bottom",
     annotation_legend_side = "bottom"
     )

##arrange plot
print((p.snps | grid.grabExpr(draw(h.CpGs.snps,merge_legend=F,
                             heatmap_legend_side = "bottom",
                             annotation_legend_side = "bottom"
                             )
                        )
  ) + 
  plot_layout(widths = c(0.75,1,3),nrow = 1,ncol = 3)
  )

7 Heatmap of flanking regions

We next plot a heatmap with fCpGs that have reported SNPs in their flanking region (50bp’s each side) of more than mAF≥0.01, and we see the same expected clustering with normal samples and tumors separated consistent with their clonality.

## SNPS at CpGs
fcpgs.snps <- 
  fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
  filter(AF_50bp.flanking_max>=0.01)
fcpgs.snps %>% dim
head(fcpgs.snps)

table(colnames(fcpgs.betas) %in% fCpGs.metadata$supplementary_table_1$participant_id_anonymous)
table(fCpGs.metadata$supplementary_table_1$participant_id_anonymous %in% colnames(fcpgs.betas))


h.fCpGs.101bp.flanking.snps <- Heatmap(fcpgs.betas[fcpgs.snps$Name,
                                   fCpGs.metadata$supplementary_table_1$participant_id_anonymous],
                       col = evoflux.meth$color,
                       name="DNA methylation",
                       column_title = paste0("fCpGs with mAF of 50bp's flanking sequences as reported in gnomADv.4.0 with mAF≥",mAF),
                       row_title = paste0("N=",nrow(fcpgs.snps)),
                       heatmap_legend_param = list(direction="horizontal"),
                       show_column_names = F,
                       show_row_names = F,
                       clustering_distance_columns = "euclidean",
                       clustering_method_columns = "single",
                       row_dend_gp = gpar(lwd=0.1),
                       column_dend_gp = gpar(lwd=0.05),
                       raster_quality = 5,
                       top_annotation = HeatmapAnnotation(annot=factor(fCpGs.metadata$supplementary_table_1$cell_type_annot_1,
                                                                       levels = names(cols.paper)[names(cols.paper) %in% fCpGs.metadata$supplementary_table_1$cell_type_annot_1]),
                                                          col=list(annot=cols.paper),
                                                          show_annotation_name = F,
                                                          annotation_label = "Samples",
                                                          simple_anno_size = unit(3,"mm"),
                                                          annotation_legend_param = list(direction="horizontal",nrow=3)
                       ),
                       right_annotation = HeatmapAnnotation(which="row",
                       mAF=fcpgs.snps$AF_50bp.flanking_max,
                       col=list(mAF=colorRamp2(seq(min(fcpgs.snps$AF_50bp.flanking_max),
                                                   max(fcpgs.snps$AF_50bp.flanking_max),
                                                   length.out=100
                                                   ),
                                               colors = brewer.blues(100)
                                               )
                                ),
                       na_col = "grey45",
                       show_annotation_name = F,
                       # annotation_name_side = "left",
                       show_legend = T,
                       annotation_label = "Maximum mAF from 50bp's flanking sequences",
                       annotation_name_gp = gpar(fontsize=5),
                       simple_anno_size = unit(2,"mm"),
                       annotation_legend_param = list(direction="horizontal")
                       )
)
draw(h.fCpGs.101bp.flanking.snps,
     heatmap_legend_side = "bottom",
     annotation_legend_side = "bottom"
)

8 Export all info

##
## EXPORT
##


library(IlluminaHumanMethylationEPICv2anno.20a1.hg38)
# checck availability of those fCPGs in the last EPICv2 version of Illumina ararys, which have been curated by the comunity

fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$Name %in% minfi::getAnnotation(IlluminaHumanMethylationEPICv2anno.20a1.hg38)$EPICv1_Loci %>%
  table %>% 
  prop.table()

fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$EPICv2 <- 
  ifelse(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$Name %in% minfi::getAnnotation(IlluminaHumanMethylationEPICv2anno.20a1.hg38)$EPICv1_Loci, T,F)


head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking)


## Merge hg19 anf hg38 info together with all snps, etc

##hg19
# get CG and C position hg19
fcpgs.hg19.CG <- sort(sortSeqlevels(fcpgs.hg19.CG))
fcpgs.hg19.Cs <- fcpgs.hg19.CG
fcpgs.hg19.Cs
fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")] <- fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")]+1
start(fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")]) <- end(fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")])
fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")]
#check nucleotides
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.Cs[strand(fcpgs.hg19.Cs)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.Cs[strand(fcpgs.hg19.Cs)=="-"]) %>% as.character() %>% table

fcpgs.hg19.CG.Cs <- 
  data.frame(fcpgs.hg19.CG %>% granges() %>% as.data.frame(),
                               fcpgs.hg19.Cs %>% granges() %>% as.data.frame(),
             Name=fcpgs.hg19.CG$Name
             ) %>% 
  select(Name.hg19=Name,
         chr_CG.hg19=seqnames,
         pos_CG.hg19=start,
         strand_CG.hg19=strand,
         chr_C.hg19=seqnames.1,
         pos_C.hg19=start.1,
         strand_C.hg19=strand.1,
         )
  
head(fcpgs.hg19.CG.Cs);dim(head(fcpgs.hg19.CG.Cs))
tail(fcpgs.hg19.CG.Cs);dim(head(fcpgs.hg19.CG.Cs))

##hg38
fcpgs.hg38.CG <- sort(sortSeqlevels(fcpgs.hg38.CG))
fcpgs.hg38.Cs <- fcpgs.hg38.CG
fcpgs.hg38.Cs
fcpgs.hg38.Cs[which(strand(fcpgs.hg38.CG)=="-")] <- fcpgs.hg38.CG[which(strand(fcpgs.hg38.CG)=="-")]+1
start(fcpgs.hg38.Cs[which(strand(fcpgs.hg38.Cs)=="-")]) <- end(fcpgs.hg38.Cs[which(strand(fcpgs.hg38.Cs)=="-")])
fcpgs.hg38.Cs[which(strand(fcpgs.hg38.Cs)=="-")]
#check nucleotides
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.Cs[strand(fcpgs.hg38.Cs)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.Cs[strand(fcpgs.hg38.Cs)=="-"]) %>% as.character() %>% table

fcpgs.hg38.CG.Cs <- 
  data.frame(fcpgs.hg38.CG %>% granges() %>% as.data.frame(),
             fcpgs.hg38.Cs %>% granges() %>% as.data.frame(),
             Name=fcpgs.hg38.CG$Name
  ) %>% 
  select(Name.hg38=Name,
         chr_CG.hg38=seqnames,
         pos_CG.hg38=start,
         strand_CG.hg38=strand,
         chr_C.hg38=seqnames.1,
         pos_C.hg38=start.1,
         strand_C.hg38=strand.1,
  )

head(fcpgs.hg38.CG.Cs);dim(fcpgs.hg38.CG.Cs)
tail(fcpgs.hg38.CG.Cs);dim(fcpgs.hg38.CG.Cs)


## merge all info together
fCpGs.loc <- left_join(x = fcpgs.hg19.CG.Cs,
                       y=fcpgs.hg38.CG.Cs,
                       by = join_by(Name.hg19==Name.hg38),
                       keep = T
                       )
head(fCpGs.loc);dim(fCpGs.loc)
tail(fCpGs.loc);dim(fCpGs.loc)


##merge annotation from Illumina and minfi annot package
Illumina.info.hg <- fcpgs.hg19.CG %>% mcols() %>% as.data.frame()
colnames(Illumina.info.hg) <- paste0("Illumina_",colnames(Illumina.info.hg))
head(Illumina.info.hg);dim(Illumina.info.hg)

fCpGs.loc.info <- left_join(x = fCpGs.loc,
                            y=Illumina.info.hg,
                            by = join_by(Name.hg19==Illumina_Name),
                            keep = T
                            )
head(fCpGs.loc.info);dim(fCpGs.loc.info)


fCpGs.complete.info <- 
  left_join(x = fCpGs.loc.info,
            y=fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
              select(Name,
                     all_of(colnames(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking)[
                       !colnames(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking) %in% 
                         c(gsub("Illumina_","",colnames(fCpGs.loc.info)),"seqnames","start","end","width","strand")
                     ])
                     ),
            by=join_by(Name.hg19==Name),
            keep = T
            )
head(fCpGs.complete.info);dim(fCpGs.complete.info)

##transform list to character to export with xlsx format
sapply(fCpGs.complete.info,class) %>% table
fCpGs.complete.info[,sapply(fCpGs.complete.info,class)=="list"] <- 
  sapply(fCpGs.complete.info[,sapply(fCpGs.complete.info,class)=="list"],function(x){
    res <- paste0(x,collapse = "/")
    if(res=="" | res=="NA"){
      res <- NA_character_
    }
    return(res)
  })

# write.xlsx(fCpGs.complete.info,"../Results/fCpGs_SNPs/fCpGs.complete.info.xlsx",na.string=c("","NA"))

9 Gapphuntig algorithm

To further exclude any rare genetic confounding, we next perform a the data-driven approach available at minfi package.

##
## Perform gaphunting in fCpGs 
##

##numbers analyzed originally in the manuscipt
threshold <- c(0.025,0.05,0.1,0.2)
outCutoff <- c(0.005,0.01,0.05,0.1)

for(threshold.i in threshold){
  
  writeLines(paste0("threshold.i:",threshold.i))
  
  for (outCutoff.i in outCutoff){
    
    writeLines(paste0("outCutoff.i=",outCutoff.i))
    
    
    gapres <- tryCatch(minfi::gaphunter(as.matrix(fcpgs.betas),threshold = threshold.i,outCutoff = outCutoff.i,keepOutliers = T),
                       error=function(e)return(NA)
    )
    if(is.list(gapres)){
      
      # gapres$proberesults %>% head
      # gapres$proberesults %>% dplyr::count(Groups)
      # gapres$proberesults %>% dplyr::count(Group3)
      # gtsummary::tbl_summary(gapres$proberesults)
      # grid::grid.newpage()
      # gridExtra::grid.table(gapres$proberesults)
      # 
      
      gapres.fCpGs.info <- fCpGs.complete.info[match(rownames(gapres$proberesults),fCpGs.complete.info$Name),]
      
      
      h.gaphunter <- 
        Heatmap(matrix = fcpgs.betas[rownames(gapres$proberesults),fCpGs.metadata$supplementary_table_1$participant_id_anonymous],
                col=evoflux.meth$color,
                column_title = paste0("gaphunted fCpGs at threshold:",threshold.i,", outCutoff=",outCutoff.i),
                row_title = paste0("N=",nrow(gapres$proberesults)),
                border=T,
                use_raster = T,
                raster_quality = 10,
                heatmap_legend_param = list(title="DNA methylation",direction="horizontal"),
                show_row_names = T,
                row_names_side = "left",
                show_column_names = F,
                row_dend_gp = gpar(lwd=0.1),
                column_dend_gp = gpar(lwd=0.1),
                clustering_distance_columns = "euclidean",
                clustering_method_columns = "single",
                top_annotation = HeatmapAnnotation(annot2=factor(case_when(fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="PBMCs" & 
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Whole_blood" & 
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Normal_lymphoid_cell" &
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="B-ALL-remission" &
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="T-ALL-remission" ~ "Lymphoid tumour",
                                                                           .default = "Healthy sample/Blood from remission")
                ),
                annot=factor(fCpGs.metadata$supplementary_table_1$cell_type_annot_1,
                             levels = names(cols.paper)[names(cols.paper) %in% fCpGs.metadata$supplementary_table_1$cell_type_annot_1]),
                col=list(annot=cols.paper,
                         annot2=c("Healthy sample/Blood from remission"="grey0","Lymphoid tumour"="grey90")
                ),
                show_annotation_name = F,
                annotation_label = "Samples",
                simple_anno_size = unit(3,"mm"),
                annotation_legend_param = list(direction="horizontal",nrow=3)
                ),
                right_annotation = HeatmapAnnotation(which="row",
                                                     mAF=gapres.fCpGs.info$AF,
                                                     col=list(mAF=colorRamp2(seq(min(gapres.fCpGs.info$AF,na.rm = T),
                                                                                 max(gapres.fCpGs.info$AF,na.rm = T),
                                                                                 length.out=100
                                                     ),
                                                     colors = brewer.blues(100)
                                                     )
                                                     ),
                                                     na_col = "grey45",
                                                     show_annotation_name = F,
                                                     # annotation_name_side = "left",
                                                     show_legend = T,
                                                     annotation_label = "mAF reporeted in gnomAD.v4.0",
                                                     annotation_name_gp = gpar(fontsize=5),
                                                     simple_anno_size = unit(2,"mm"),
                                                     annotation_legend_param = list(direction="horizontal")
                )
        )
      # draw(h.gaphunter,
      #      heatmap_legend_side = "bottom",
      #      annotation_legend_side = "bottom",
      #      merge_legend=T
      # )
      
      set.seed(123)
      fCpG.random <- sample(rownames(fcpgs.betas),nrow(gapres$proberesults),replace = F)
      fCpGs.random.info <-  fCpGs.complete.info[match(fCpG.random,fCpGs.complete.info$Name),]
      
      h.random <- 
        Heatmap(matrix = fcpgs.betas[fCpG.random,fCpGs.metadata$supplementary_table_1$participant_id_anonymous],
                col=evoflux.meth$color,
                column_title = "random fCpGs",
                border=T,
                use_raster = T,
                raster_quality = 10,
                clustering_distance_columns = "euclidean",
                clustering_method_columns = "single",
                show_heatmap_legend = F,
                show_row_names = T,
                row_names_side = "right",
                show_column_names = F,
                row_dend_gp = gpar(lwd=0.1),
                column_dend_gp = gpar(lwd=0.1),
                top_annotation = HeatmapAnnotation(annot2=factor(case_when(fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="PBMCs" & 
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Whole_blood" & 
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Normal_lymphoid_cell" &
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="B-ALL-remission" &
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="T-ALL-remission" ~ "Lymphoid tumour",
                                                                           .default = "Healthy sample/Blood from remission")
                ),
                annot=factor(fCpGs.metadata$supplementary_table_1$cell_type_annot_1,
                             levels = names(cols.paper)[names(cols.paper) %in% fCpGs.metadata$supplementary_table_1$cell_type_annot_1]),
                col=list(annot=cols.paper,
                         annot2=c("Healthy sample/Blood from remission"="grey0","Lymphoid tumour"="grey90")
                ),
                show_annotation_name = F,
                show_legend = F,
                annotation_label = "Samples",
                simple_anno_size = unit(3,"mm"),
                annotation_legend_param = list(direction="horizontal",nrow=3)
                ),
                right_annotation = HeatmapAnnotation(which="row",
                                                     mAF=fCpGs.random.info$AF,
                                                     col=list(mAF=colorRamp2(seq(min(fCpGs.random.info$AF,na.rm = T),
                                                                                 max(fCpGs.random.info$AF,na.rm = T),
                                                                                 length.out=100
                                                     ),
                                                     colors = brewer.blues(100)
                                                     )
                                                     ),
                                                     na_col = "grey45",
                                                     show_annotation_name = F,
                                                     # annotation_name_side = "left",
                                                     show_legend = T,
                                                     annotation_label = "mAF reporeted in gnomAD.v4.0",
                                                     annotation_name_gp = gpar(fontsize=5),
                                                     simple_anno_size = unit(2,"mm"),
                                                     annotation_legend_param = list(direction="horizontal")
                )
        )
      draw(h.gaphunter+h.random,
           heatmap_legend_side = "bottom",
           annotation_legend_side = "bottom",
           merge_legend=T
      )
      
    }else{
      
      p <- ggplot()+
        annotate("text",y=0.5,x=0.5,label=paste0("No results at threshold:",threshold.i,", outCutoff:",outCutoff.i),size=3)+
        theme_void()
      print(p)
      
    }
    
    
    
    
  }
  
}

10 Session info

print(sessionInfo())
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
##  [1] parallel  grid      stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] IlluminaHumanMethylationEPICv2anno.20a1.hg38_1.0.0
##  [2] ggrepel_0.9.6                                     
##  [3] patchwork_1.3.2                                   
##  [4] MafH5.gnomAD.v4.0.GRCh38_3.19.0                   
##  [5] GenomicScores_2.20.2                              
##  [6] SNPlocs.Hsapiens.dbSNP155.GRCh38_0.99.24          
##  [7] BSgenome_1.76.0                                   
##  [8] rtracklayer_1.68.0                                
##  [9] BiocIO_1.18.0                                     
## [10] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [11] minfi_1.54.1                                      
## [12] bumphunter_1.50.0                                 
## [13] locfit_1.5-9.12                                   
## [14] iterators_1.0.14                                  
## [15] foreach_1.5.2                                     
## [16] SummarizedExperiment_1.38.1                       
## [17] Biobase_2.68.0                                    
## [18] MatrixGenerics_1.20.0                             
## [19] matrixStats_1.5.0                                 
## [20] circlize_0.4.16                                   
## [21] ComplexHeatmap_2.24.1                             
## [22] Biostrings_2.76.0                                 
## [23] XVector_0.48.0                                    
## [24] openxlsx_4.2.8                                    
## [25] GenomicRanges_1.60.0                              
## [26] GenomeInfoDb_1.44.2                               
## [27] IRanges_2.42.0                                    
## [28] S4Vectors_0.46.0                                  
## [29] BiocGenerics_0.54.0                               
## [30] generics_0.1.4                                    
## [31] pals_1.10                                         
## [32] ggpubr_0.6.1                                      
## [33] ggplot2_3.5.2                                     
## [34] dplyr_1.1.4                                       
## [35] janitor_2.2.1                                     
## [36] data.table_1.17.8                                 
## [37] BiocStyle_2.36.0                                  
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.5.0                     filelock_1.0.3                   
##   [3] bitops_1.0-9                      tibble_3.3.0                     
##   [5] preprocessCore_1.70.0             XML_3.99-0.19                    
##   [7] lifecycle_1.0.4                   rstatix_0.7.2.999                
##   [9] fastcluster_1.3.0                 doParallel_1.0.17                
##  [11] lattice_0.22-7                    MASS_7.3-65                      
##  [13] base64_2.0.2                      scrime_1.3.5                     
##  [15] backports_1.5.0                   magrittr_2.0.3                   
##  [17] limma_3.64.3                      sass_0.4.10                      
##  [19] rmarkdown_2.29                    BSgenome.Hsapiens.UCSC.hg38_1.4.5
##  [21] jquerylib_0.1.4                   yaml_2.3.10                      
##  [23] doRNG_1.8.6.2                     zip_2.3.3                        
##  [25] askpass_1.2.1                     mapproj_1.2.12                   
##  [27] DBI_1.2.3                         RColorBrewer_1.1-3               
##  [29] lubridate_1.9.4                   maps_3.4.3                       
##  [31] abind_1.4-8                       quadprog_1.5-8                   
##  [33] purrr_1.1.0                       RCurl_1.98-1.17                  
##  [35] rappdirs_0.3.3                    GenomeInfoDbData_1.2.14          
##  [37] rentrez_1.2.4                     genefilter_1.90.0                
##  [39] annotate_1.86.1                   DelayedMatrixStats_1.30.0        
##  [41] codetools_0.2-20                  DelayedArray_0.34.1              
##  [43] xml2_1.4.0                        tidyselect_1.2.1                 
##  [45] shape_1.4.6.1                     UCSC.utils_1.4.0                 
##  [47] farver_2.1.2                      beanplot_1.3.1                   
##  [49] BiocFileCache_2.16.1              illuminaio_0.50.0                
##  [51] GenomicAlignments_1.44.0          jsonlite_2.0.0                   
##  [53] GetoptLong_1.0.5                  multtest_2.64.0                  
##  [55] Formula_1.2-5                     survival_3.8-3                   
##  [57] tools_4.5.0                       Rcpp_1.1.0                       
##  [59] glue_1.8.0                        SparseArray_1.8.1                
##  [61] xfun_0.53                         HDF5Array_1.36.0                 
##  [63] withr_3.0.2                       BiocManager_1.30.26              
##  [65] fastmap_1.2.0                     rhdf5filters_1.20.0              
##  [67] openssl_2.3.3                     digest_0.6.37                    
##  [69] timechange_0.3.0                  R6_2.6.1                         
##  [71] colorspace_2.1-1                  Cairo_1.6-5                      
##  [73] dichromat_2.0-0.1                 RSQLite_2.4.3                    
##  [75] h5mread_1.0.1                     tidyr_1.3.1                      
##  [77] httr_1.4.7                        S4Arrays_1.8.1                   
##  [79] pkgconfig_2.0.3                   gtable_0.3.6                     
##  [81] blob_1.2.4                        siggenes_1.82.0                  
##  [83] htmltools_0.5.8.1                 carData_3.0-5                    
##  [85] bookdown_0.44                     clue_0.3-66                      
##  [87] scales_1.4.0                      png_0.1-8                        
##  [89] snakecase_0.11.1                  knitr_1.50                       
##  [91] rstudioapi_0.17.1                 tzdb_0.5.0                       
##  [93] rjson_0.2.23                      nlme_3.1-168                     
##  [95] curl_7.0.0                        cachem_1.1.0                     
##  [97] rhdf5_2.52.1                      GlobalOptions_0.1.2              
##  [99] stringr_1.5.1                     BiocVersion_3.21.1               
## [101] AnnotationDbi_1.70.0              restfulr_0.0.16                  
## [103] GEOquery_2.76.0                   pillar_1.11.0                    
## [105] reshape_0.8.10                    vctrs_0.6.5                      
## [107] car_3.1-3                         dbplyr_2.5.0                     
## [109] xtable_1.8-4                      cluster_2.1.8.1                  
## [111] evaluate_1.0.5                    magick_2.8.7                     
## [113] tinytex_0.57                      readr_2.1.5                      
## [115] GenomicFeatures_1.60.0            cli_3.6.5                        
## [117] compiler_4.5.0                    Rsamtools_2.24.0                 
## [119] rlang_1.1.6                       crayon_1.5.3                     
## [121] rngtools_1.5.2                    ggsignif_0.6.4                   
## [123] labeling_0.4.3                    nor1mix_1.3-3                    
## [125] mclust_6.1.1                      plyr_1.8.9                       
## [127] stringi_1.8.7                     BiocParallel_1.42.1              
## [129] Matrix_1.7-4                      BSgenome.Hsapiens.UCSC.hg19_1.4.3
## [131] hms_1.1.3                         sparseMatrixStats_1.20.0         
## [133] bit64_4.6.0-1                     Rhdf5lib_1.30.0                  
## [135] KEGGREST_1.48.1                   statmod_1.5.0                    
## [137] AnnotationHub_3.16.1              broom_1.0.9                      
## [139] memoise_2.0.1                     bslib_0.9.0                      
## [141] bit_4.6.0