Analyses realted to control probes from Illumina arrays, showing that fCpGs are not SNPs.

1 Load packages and fCpGs information

##
## Load necessary packages and set global options
##

library(data.table)
library(janitor)
library(dplyr)
library(ggpubr)
library(pals)
library(GenomicRanges)
library(openxlsx)
library(Biostrings)
library(ComplexHeatmap)
library(circlize)
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
library(SNPlocs.Hsapiens.dbSNP155.GRCh38)
library(MafH5.gnomAD.v4.0.GRCh38)
library(patchwork)
library(ggrepel)


options(stringsAsFactors = F,max.print = 10000,error=NULL,
        "openxlsx.dateFormat" = "dd/mm/yyyy"
)



## load fCpGs with meth values

fcpgs.betas <- 
  fread("../Data/FMC/Final_data/beta_filtered_samples_fcpgs_laplacian.tsv",data.table = F) %>% 
  tibble::column_to_rownames(var = "V1")

fcpgs.betas[1:5,1:5]
dim(fcpgs.betas)


##load all metadata
sheets <- getSheetNames("../Revision/Data/Supplementary_Tables_revision_curated.3.xlsx")
sheets <- setNames(sheets,sheets)

fCpGs.metadata <- lapply(sheets,function(i){
  openxlsx::read.xlsx("../Revision/Data/Supplementary_Tables_revision_curated.3.xlsx",sheet = i,detectDates = T)
})




##compare 5 random fCpGs and plot
evoflux.meth <- fread("../Revision/Data/meth_palette_evoflux.tsv")
evoflux.meth


# color smaples
cols.paper <- c(
  NA_values="#666666",
  HPC="#E6E6E6",
  pre.Bcell="#D5D5D5",
  NBC="#C3C3C3",
  GCB="#AEAEAE",
  tPC="#969696",
  MBC="#787878",
  bmPC="#4D4D4D",
  Tcell="#F0F8FF",
  PBMCs="#FFF8DC",
  Whole_blood="#CDC8B1",
  Normal_lymphoid_cell="#D5D5D5",
  Normal_myeloid_cell="#E6E6FA",
  "T-ALL"="#CD3333",
  "T-ALL-remission"="#C7A5A5",
  "B-ALL"="#E69F00",
  "B-ALL-remission"="#D8CAAB",
  MCL="#F0E442",
  C1.or.cMCL="#F5D51B",
  C2.or.nnMCL="#0072B2",
  "DLBCL-NOS"="#56B4E9",
  MBL="#5A9A89",
  CLL="#009E73",
  RT="#02C590",
  CLL.unmutated="#e65100",
  CLL.mutated="#361379",
  nCLL="#006E93",
  iCLL="#FDC010",
  mCLL="#963736",
  Disease.Unclassified="#CCCCCC",
  MGUS="#BF99AE",
  MM="#CC79A7"
)

2 fCpG hg38

fcpgs.hg19.CG <- minfi::getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)[rownames(fcpgs.betas),] %>% 
  as.data.frame()

fcpgs.hg19.CG  <- 
  GRanges(fcpgs.hg19.CG$chr,IRanges(fcpgs.hg19.CG$pos,fcpgs.hg19.CG$pos),strand = fcpgs.hg19.CG$strand,
          fcpgs.hg19.CG %>% select(-c(chr,pos,strand))
  )
fcpgs.hg19.CG

getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.CG) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.CG[strand(fcpgs.hg19.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.CG[strand(fcpgs.hg19.CG)=="-"]) %>% as.character() %>% table

##perform liftover
hg19.to.hg38.chain <- rtracklayer::import.chain(con = "../Revision/Data/Nanopore/hg19ToHg38.over.chain")
fcpgs.hg38.CG <- rtracklayer::liftOver(x = fcpgs.hg19.CG,chain = hg19.to.hg38.chain)
fcpgs.hg38.CG <- unlist(fcpgs.hg38.CG)

length(fcpgs.hg19.CG)
length(fcpgs.hg38.CG)

fcpgs.hg19.CG$Name[which(!fcpgs.hg19.CG$Name %in% fcpgs.hg38.CG$Name)]

fcpgs.hg38.CG

getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]) %>% as.character() %>% table

fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"] %>% granges()

##liftover do not work on this CpG
fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"][which(getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]) %>% as.character() !="G"),]
exclude <- fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]$Name[which(getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]) %>% as.character() !="G")]
exclude

fcpgs.hg38.CG <- fcpgs.hg38.CG[fcpgs.hg38.CG$Name!=exclude]
fcpgs.hg38.CG

#check nucleotides ## OK!
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.CG[strand(fcpgs.hg38.CG)=="-"]) %>% as.character() %>% table

3 Load candidate CpGs

We next load all the CpGs passing inital QC.

## load background CPGs to test overlap with gnomADv4
cpgs.candidate.hg19.CG <- fread("../Data/FMC/Final_data/candidate_cpgs_24_07_23.txt",header = F,data.table = F)
head(cpgs.candidate.hg19.CG);dim(cpgs.candidate.hg19.CG)
colnames(cpgs.candidate.hg19.CG) <- "CpGs"
head(cpgs.candidate.hg19.CG);dim(cpgs.candidate.hg19.CG)
cpgs.candidate.hg19.CG <- getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)[cpgs.candidate.hg19.CG$CpGs,] %>%   as.data.frame()

cpgs.candidate.hg19.CG  <- 
  GRanges(cpgs.candidate.hg19.CG$chr,IRanges(cpgs.candidate.hg19.CG$pos,cpgs.candidate.hg19.CG$pos),strand = cpgs.candidate.hg19.CG$strand,
          cpgs.candidate.hg19.CG %>% dplyr::select(-c(chr,pos,strand))
  )
cpgs.candidate.hg19.CG

getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,cpgs.candidate.hg19.CG) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,cpgs.candidate.hg19.CG[strand(cpgs.candidate.hg19.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,cpgs.candidate.hg19.CG[strand(cpgs.candidate.hg19.CG)=="-"]) %>% as.character() %>% table



cpgs.candidate.hg38.CG <- rtracklayer::liftOver(x = cpgs.candidate.hg19.CG,chain = hg19.to.hg38.chain)
cpgs.candidate.hg38.CG <- unlist(cpgs.candidate.hg38.CG)

length(cpgs.candidate.hg19.CG)
length(cpgs.candidate.hg38.CG)

cpgs.candidate.hg19.CG$Name[which(!cpgs.candidate.hg19.CG$Name %in% cpgs.candidate.hg38.CG$Name)]
cpgs.candidate.hg38.CG

getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG[strand(cpgs.candidate.hg38.CG)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG[strand(cpgs.candidate.hg38.CG)=="-"]) %>% as.character() %>% table


##liftover do not work on this CpG
exclude <- which(as.character(getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG))=="A")
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG)[exclude]


cpgs.candidate.hg38.CG <- cpgs.candidate.hg38.CG[-exclude]
cpgs.candidate.hg38.CG
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,cpgs.candidate.hg38.CG) %>% as.character() %>% table

4 Proportion SNPs CpG vs fCpGs

We next use annotation packages to annotate both candidate CpGs and fCpGs, and perform fisher test. fCpGs have significantly less propability compared to background candidate CpGs to contain SNPs reported in gnomAD consotia, v4.0 (2024).

##NOTE: following packages do not consider strand specificy and rely on + strand (same as if was all ranges strand=*)


## get SNPs accroding to db155
snps <- SNPlocs.Hsapiens.dbSNP155.GRCh38  
# snpcount(snps)
# seqinfo(snps)

##check seqlevels styles
# seqlevels(snps)
# seqlevels(fcpgs.hg38.CG)

##arrange same style of levels
fcpgs.hg38.CG.ncbi <- fcpgs.hg38.CG
# seqlevelsStyle(fcpgs.hg38.CG.ncbi)
seqlevels(fcpgs.hg38.CG.ncbi) <- gsub("chr","",seqlevels(fcpgs.hg38.CG))
# seqlevelsStyle(fcpgs.hg38.CG.ncbi)


cpgs.candidate.hg38.CG.ncbi <- cpgs.candidate.hg38.CG
# seqlevelsStyle(cpgs.candidate.hg38.CG.ncbi)
seqlevels(cpgs.candidate.hg38.CG.ncbi) <- gsub("chr","",seqlevels(fcpgs.hg38.CG))
# seqlevelsStyle(cpgs.candidate.hg38.CG.ncbi)


##check strand specificity 
# fcpgs.hg38.Cs.ncbi <- fcpgs.hg38.Cs
# seqlevelsStyle(fcpgs.hg38.Cs.ncbi)
# seqlevels(fcpgs.hg38.Cs.ncbi) <- gsub("chr","",seqlevels(fcpgs.hg38.Cs))
# seqlevelsStyle(fcpgs.hg38.Cs.ncbi)

##get all reported SNPs regardless of MAF 
fcpgs.hg38.CG.snps <-  snpsByOverlaps(x = snps,
                                              # ranges=fcpgs.hg38.C[which(strand(fcpgs.hg38.Cs.ncbi)=="-")], ##all Gs if considering - strand with Cs positions
                                              ranges = fcpgs.hg38.CG.ncbi,
                                              genome=BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38)
fcpgs.hg38.CG.snps

## UnstitchedGPos object with 783 positions and 5 metadata columns:
##         seqnames       pos strand |    RefSNP_id alleles_as_ambig genome_compat
##            <Rle> <integer>  <Rle> |  <character>      <character>     <logical>
##     [1]        1    909931      * |  rs145001858                Y          TRUE
##     [2]        1   1011495      * |  rs945574781                Y          TRUE
##     [3]        1   1014513      * |  rs201328791                B          TRUE
##     [4]        1   1034445      * |  rs764173254                Y          TRUE
##     [5]        1   1117220      * |  rs942434937                B          TRUE
##     ...      ...       ...    ... .          ...              ...           ...
##   [779]       22  37070895      * |  rs761525378                B          TRUE
##   [780]       22  39665573      * |  rs758531004                H          TRUE
##   [781]       22  41681935      * |  rs184272182                B          TRUE
##   [782]       22  43089654      * | rs1929494260                S          TRUE
##   [783]       22  45413363      * | rs2087376347                Y          TRUE
##          ref_allele     alt_alleles
##         <character> <CharacterList>
##     [1]           C               T
##     [2]           C               T
##     [3]           C             G,T
##     [4]           C               T
##     [5]           C             G,T
##     ...         ...             ...
##   [779]           C             G,T
##   [780]           C             A,T
##   [781]           C             G,T
##   [782]           C               G
##   [783]           C               T
##   -------
##   seqinfo: 25 sequences (1 circular) from GRCh38.p13 genome

fcpgs.hg38.CG.snps@elementMetadata$ref_allele %>% table

## .
##   C 
## 783

fcpgs.hg38.CG.snps@elementMetadata$alt_alleles %>% table %>% apply(.,2,table) ##majority C>T, as expected.

##     A   G   T
## 0 572 589 105
## 1 211 194 678

##get all reported SNPs regardless of MAF with 100 bp's extension on each site
fcpgs.hg38.101.CG.snps <-  snpsByOverlaps(x = snps,
                              ranges = resize(x = fcpgs.hg38.CG.ncbi,width = 101,fix = "center",ignore.strand=T),#fcpgs.hg38.CG.ncbi, 
                              genome=BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38)
# fcpgs.hg38.101.CG.snps


##
## Get MAFs from gnomAD consotia, v4.0 (2024)
##

mafh5 <- MafH5.gnomAD.v4.0.GRCh38
# mafh5
seqlevelsStyle(mafh5)

## [1] "UCSC"

seqlevelsStyle(fcpgs.hg38.CG)

## [1] "UCSC"

seqlevelsStyle(fcpgs.hg38.CG.ncbi) ## follwong code works despite having differnt styles, previously checked!

## [1] "NCBI"    "Ensembl"

##overlap fCpGs
fcpgs.hg38.CG.MAF.gnomAD.v4 <- gscores(x = mafh5,ranges = fcpgs.hg38.CG,pop=populations(mafh5)) 
fcpgs.hg38.CG.MAF.gnomAD.v4$AF %>% summary

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.00001 0.00001 0.00002 0.00118 0.00004 0.36000     394

names(fcpgs.hg38.CG.MAF.gnomAD.v4) <- fcpgs.hg38.CG.MAF.gnomAD.v4$Name
# fcpgs.hg38.CG.MAF.gnomAD.v4

#candidate
cpgs.candidate.hg38.CG.MAF.gnomAD.v4 <- gscores(x = mafh5,ranges = cpgs.candidate.hg38.CG,pop=populations(mafh5)) 
cpgs.candidate.hg38.CG.MAF.gnomAD.v4$AF %>% summary

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.00    0.00    0.36  134463

names(cpgs.candidate.hg38.CG.MAF.gnomAD.v4) <- cpgs.candidate.hg38.CG.MAF.gnomAD.v4$Name
cpgs.candidate.hg38.CG.MAF.gnomAD.v4$fCpG <-
  ifelse(cpgs.candidate.hg38.CG.MAF.gnomAD.v4$Name %in% fcpgs.hg38.CG.MAF.gnomAD.v4$Name,T,F)
# table(cpgs.candidate.hg38.CG.MAF.gnomAD.v4$fCpG)

##chi square background and fCpGs proportions
print(prop.table(table(is.na(fcpgs.hg38.CG.MAF.gnomAD.v4$AF)))) ## 60%

## 
##     FALSE      TRUE 
## 0.5963115 0.4036885

print(prop.table(table(is.na(cpgs.candidate.hg38.CG.MAF.gnomAD.v4$AF)))) ## 65%

## 
##     FALSE      TRUE 
## 0.6544496 0.3455504

tab <- table("present in gnomADv.40"=!is.na(cpgs.candidate.hg38.CG.MAF.gnomAD.v4$AF),
             fCpG=cpgs.candidate.hg38.CG.MAF.gnomAD.v4$fCpG)
print(tab)

##                      fCpG
## present in gnomADv.40  FALSE   TRUE
##                 FALSE 134069    394
##                 TRUE  254082    582

print(fisher.test(tab))

## 
##  Fisher's Exact Test for Count Data
## 
## data:  tab
## p-value = 0.0001582
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.6846029 0.8881783
## sample estimates:
## odds ratio 
##  0.7794582

5 mAF of SNPs reported in 50bp’s extension from fCPGs

##map using all SNPs with 101 extendion of fCPGs
##set same seqleves for overlap
fcpgs.hg38.101.CG.snps
seqlevelsStyle(fcpgs.hg38.101.CG.snps)
genome(fcpgs.hg38.101.CG.snps)

##arrange levels and genome
seqlevels(fcpgs.hg38.101.CG.snps) <- paste0("chr",seqlevels(fcpgs.hg38.101.CG.snps))
seqlevels(fcpgs.hg38.101.CG.snps)
genome(fcpgs.hg38.101.CG.snps) <- "hg38"
genome(fcpgs.hg38.101.CG.snps)

##overlap
fcpgs.hg38.101.MAF.rs.gnomAD.v4 <- gscores(x = mafh5,ranges = fcpgs.hg38.101.CG.snps,
                                    pop=populations(mafh5))
fcpgs.hg38.101.MAF.rs.gnomAD.v4$AF %>% summary
fcpgs.hg38.101.MAF.rs.gnomAD.v4
fcpgs.hg38.101.MAF.rs.gnomAD.v4$AF %>% qplot()+ theme_classic() + ggtitle("mAF of all SNPs in 100 bp's extension (fCpGs at center) ")

6 mAF of SNPs mapping to fCpGs

We have found that some fCpGs have an SNP annotated in dbSNP155.GRCh38, so we then further investigate them.

6.1 Proportion and mAF

##get MAF of all previously maped SNPs of fCPGs
##set same seqleves for overlap
fcpgs.hg38.CG.snps
seqlevelsStyle(fcpgs.hg38.CG.snps)
genome(fcpgs.hg38.CG.snps)

##arrange levels and genome
seqlevels(fcpgs.hg38.CG.snps) <- paste0("chr",seqlevels(fcpgs.hg38.CG.snps))
seqlevels(fcpgs.hg38.CG.snps)
genome(fcpgs.hg38.CG.snps) <- "hg38"
genome(fcpgs.hg38.CG.snps)

#overlap
overlap.fCpGs <- findOverlaps(query = fcpgs.hg38.CG.MAF.gnomAD.v4,subject = fcpgs.hg38.CG.snps)
overlap.fCpGs


# complete fCPGs with snps info and BSEgenome hg38
fcpgs.hg38.CG.MAF.gnomAD.v4.snps <- as.data.frame(fcpgs.hg38.CG.MAF.gnomAD.v4)
head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps);dim(fcpgs.hg38.CG.MAF.gnomAD.v4.snps)
fcpgs.hg38.CG.MAF.gnomAD.v4.snps <- cbind(fcpgs.hg38.CG.MAF.gnomAD.v4.snps,
                                           RefSNP_id=NA,
                                           alleles_as_ambig=NA,
                                           ref_allele=NA,
                                           alt_alleles=NA
                                           )
head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps);dim(fcpgs.hg38.CG.MAF.gnomAD.v4.snps)

fcpgs.hg38.CG.MAF.gnomAD.v4.snps[queryHits(overlap.fCpGs),c("RefSNP_id","alleles_as_ambig","ref_allele","alt_alleles")] <- 
  as.data.frame(mcols(fcpgs.hg38.CG.snps[subjectHits(overlap.fCpGs),c("RefSNP_id","alleles_as_ambig","ref_allele","alt_alleles")]))

head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps);dim(fcpgs.hg38.CG.MAF.gnomAD.v4.snps)

fcpgs.hg38.CG.MAF.gnomAD.v4.snps %>% 
  dplyr::count(is.na(AF),is.na(RefSNP_id))

##the following SNPs do not have available MAF, (checked also at NCBI dbSNP)
fcpgs.hg38.CG.MAF.gnomAD.v4.snps %>% 
  filter(is.na(AF),!is.na(RefSNP_id)) %>% 
  select(seqnames,start,AF,RefSNP_id)

## Complete maf of all rs flanking 101 bps  of fCPGs
overlap.fCpGs.101 <- findOverlaps(query = resize(fcpgs.hg38.CG.MAF.gnomAD.v4,width = 101,fix = "center",ignore.strand=T),
                                  subject = fcpgs.hg38.101.MAF.rs.gnomAD.v4)
overlap.fCpGs.101
length(overlap.fCpGs.101)
length(unique(queryHits(overlap.fCpGs.101)))
length(subjectHits(overlap.fCpGs.101))

flanking_100 <- lapply(seq_along(as.list(overlap.fCpGs.101)),function(i){
  
  CpG <- fcpgs.hg38.CG.MAF.gnomAD.v4$Name[i]
  indx <- as.list(overlap.fCpGs.101)[[i]]
  
  dat <- fcpgs.hg38.101.MAF.rs.gnomAD.v4[indx,] %>% as.data.frame()
  dat <- dat[,c("RefSNP_id","seqnames","pos","alleles_as_ambig","ref_allele","alt_alleles","AF","AF_allpopmax")]
  colnames(dat) <- paste0(colnames(dat),"_50bp.flanking")
  res <- apply(dat,2,function(x)paste0(x,collapse = ";"))
  res[["alt_alleles_50bp.flanking"]] <- 
  gsub('c\\(|\\\\|\\"|\\)',"",res[["alt_alleles_50bp.flanking"]]) %>% gsub(", ","/",.)
  res <- res %>% t() %>% data.frame()
  res$fCpG_50bps.flanking <- CpG
  return(res)
  
})
flanking_100 <- do.call(rbind,flanking_100)
any(flanking_100=="") ## 1 CpG do not have any rs
flanking_100[flanking_100==""] <- NA
any(flanking_100=="") ## 1 CpG do not have any rs
head(flanking_100)

# plot gnomAD MAFs for all fCpGs
head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps);dim(fcpgs.hg38.CG.MAF.gnomAD.v4.snps)


##merge 50bp flanking info
fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking <- 
  cbind(fcpgs.hg38.CG.MAF.gnomAD.v4.snps,
        flanking_100
        )

head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking)

fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$AF_50bp.flanking_max <- 
  apply(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking,1,function(x){
  AFs <- x$AF_50bp.flanking
  res <- max(strsplit(AFs,";")[[1]] %>% as.numeric(),na.rm = T)
  if(res==-Inf){
    res <- NA
  }
  return(res)
})

##identical
# fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$AF_50bp.flanking_max %>% qplot
# fcpgs.hg38.CG.101.MAF.max.gnomAD.v4$AF %>% qplot()

library(patchwork)
library(ggrepel)

mAF <- 1e-3

p.snps <- fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
  mutate(entry=case_when(is.na(AF) ~ F, .default = T)) %>% 
  ggplot(aes(x=entry))+
  xlab("Minor allele frequency (mAF) reported in gnomADv.4.0")+
  ggtitle(paste0("Pan-lymphoid fCpGs (hg38, N=",nrow(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking),")"))+
  geom_bar()+
  geom_text(stat='count', aes(label=after_stat(count)),size=2, vjust=-1)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 5),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
  mutate(label=case_when(AF>=0.01 ~ paste0(RefSNP_id,"   (",Name,")"),.default = NA)) %>% 
  ggplot(aes(x = AF
             ))+
  geom_histogram()+
  scale_x_continuous(n.breaks = 10)+
  ggtitle("fCpGs with mAF reported in gnomADv.4.0 (True from left plot)")+
  # annotate("text",x = mAF+0.05,y = 400,label=paste0("mAF≥0.001"),size=2,color="red3")+
  # geom_vline(xintercept = mAF,lwd=0.3,linetype="dashed",color="red3")+
  geom_text(mapping = aes(y=5,
                          label=label
                          ),
            size=2,
            angle=90,
            hjust=0,
            vjust=0.5
            )+
  xlab("mAF")+
  ylab(NULL)+
    theme_classic()+
    theme(text=element_text(color="grey0",size = 5),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        plot.margin = margin(1,1,1,1),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )
p.snps

6.2 Heatmap with mAF≥1e-3

## Heatmap settings

ht_opt$legend_grid_width <- unit(2,"mm")
ht_opt$legend_grid_height <-unit(2,"mm")
ht_opt$legend_labels_gp <- gpar(fontsize=5)
ht_opt$legend_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_names_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_names_gp <- gpar(fontsize=5)
ht_opt$HEATMAP_LEGEND_PADDING <- unit(1,"mm")
ht_opt$ANNOTATION_LEGEND_PADDING <- unit(1,"mm")
ht_opt$legend_gap <- unit(c(1,1),"mm")
ht_opt$fast_hclust <- T



## SNPS at CpGs
fcpgs.snps <- 
  fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
  filter(AF>=1e-3)
fcpgs.snps %>% dim


table(colnames(fcpgs.betas) %in% fCpGs.metadata$supplementary_table_1$participant_id_anonymous)
table(fCpGs.metadata$supplementary_table_1$participant_id_anonymous %in% colnames(fcpgs.betas))


h.CpGs.snps <- Heatmap(fcpgs.betas[fcpgs.snps$Name,
                    fCpGs.metadata$supplementary_table_1$participant_id_anonymous],
        col = evoflux.meth$color,
        name="DNA methylation",
        column_title = paste0("fCpGs with reported SNP with mAF≥",mAF),
        row_title = paste0("N=",nrow(fcpgs.snps)),
        heatmap_legend_param = list(direction="horizontal"),
        clustering_distance_columns = "euclidean",
        clustering_method_columns = "single",
        show_column_names = F,
        show_row_names = F,
        row_dend_gp = gpar(lwd=0.1),
        column_dend_gp = gpar(lwd=0.1),
        raster_quality = 10,
        top_annotation = HeatmapAnnotation(annot2=factor(case_when(fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="PBMCs" & 
                                                                     fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Whole_blood" & 
                                                                     fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Normal_lymphoid_cell" &
                                                                     fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="B-ALL-remission" &
                                                                     fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="T-ALL-remission" ~ "Lymphoid tumour",
                                                                   .default = "Healthy sample/Blood from remission")
        ),
        annot=factor(fCpGs.metadata$supplementary_table_1$cell_type_annot_1,
                     levels = names(cols.paper)[names(cols.paper) %in% fCpGs.metadata$supplementary_table_1$cell_type_annot_1]),
        col=list(annot=cols.paper,
                 annot2=c("Healthy sample/Blood from remission"="grey90","Lymphoid tumour"="grey0")
        ),
        show_annotation_name = F,
        show_legend = T,
        annotation_label = "Samples",
        simple_anno_size = unit(3,"mm"),
        annotation_legend_param = list(direction="horizontal",nrow=3)
        ),
        right_annotation = HeatmapAnnotation(which="row",
                                             mAF=fcpgs.snps$AF,
                                             col=list(mAF=colorRamp2(seq(min(fcpgs.snps$AF),
                                                                        max(fcpgs.snps$AF),
                                                                        length.out=100
                                                                        ),
                                                                    colors = brewer.blues(100)
                                                                    )
                                                      ),
                                             na_col = "grey45",
                                             show_annotation_name = F,
                                             # annotation_name_side = "left",
                                             show_legend = T,
                                             annotation_name_gp = gpar(fontsize=5),
                                             simple_anno_size = unit(2,"mm"),
                                             annotation_legend_param = list(direction="horizontal")
                                             )
        )+
  HeatmapAnnotation(which="row",
                    annot=anno_text(which = "row",
                                    x = fcpgs.snps$Name,
                                    gp = gpar(fontsize=5)),
                    rs=anno_mark(at = which(fcpgs.snps$AF>=0.01),
                                 labels=fcpgs.snps$RefSNP_id[which(fcpgs.snps$AF>=0.01)],
                                 labels_gp = gpar(fontsize=5),link_gp = gpar(lwd=0.2)
                    ),
                    show_annotation_name = F,
                    annotation_legend_param = list(direction="horizontal")
                    )
draw(h.CpGs.snps,
     heatmap_legend_side = "bottom",
     annotation_legend_side = "bottom"
     )

##arrange plot
print((p.snps | grid.grabExpr(draw(h.CpGs.snps,merge_legend=F,
                             heatmap_legend_side = "bottom",
                             annotation_legend_side = "bottom"
                             )
                        )
  ) + 
  plot_layout(widths = c(0.75,1,3),nrow = 1,ncol = 3)
  )

7 Heatmap of flanking regions

We next plot a heatmap with fCpGs that have reported SNPs in their flanking region (50bp’s each side) of more than mAF≥0.01, and we see the same expected clustering with normal samples and tumors separated consistent with their clonality.

## SNPS at CpGs
fcpgs.snps <- 
  fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
  filter(AF_50bp.flanking_max>=0.01)
fcpgs.snps %>% dim
head(fcpgs.snps)

table(colnames(fcpgs.betas) %in% fCpGs.metadata$supplementary_table_1$participant_id_anonymous)
table(fCpGs.metadata$supplementary_table_1$participant_id_anonymous %in% colnames(fcpgs.betas))


h.fCpGs.101bp.flanking.snps <- Heatmap(fcpgs.betas[fcpgs.snps$Name,
                                   fCpGs.metadata$supplementary_table_1$participant_id_anonymous],
                       col = evoflux.meth$color,
                       name="DNA methylation",
                       column_title = paste0("fCpGs with mAF of 50bp's flanking sequences as reported in gnomADv.4.0 with mAF≥",mAF),
                       row_title = paste0("N=",nrow(fcpgs.snps)),
                       heatmap_legend_param = list(direction="horizontal"),
                       show_column_names = F,
                       show_row_names = F,
                       clustering_distance_columns = "euclidean",
                       clustering_method_columns = "single",
                       row_dend_gp = gpar(lwd=0.1),
                       column_dend_gp = gpar(lwd=0.05),
                       raster_quality = 5,
                       top_annotation = HeatmapAnnotation(annot=factor(fCpGs.metadata$supplementary_table_1$cell_type_annot_1,
                                                                       levels = names(cols.paper)[names(cols.paper) %in% fCpGs.metadata$supplementary_table_1$cell_type_annot_1]),
                                                          col=list(annot=cols.paper),
                                                          show_annotation_name = F,
                                                          annotation_label = "Samples",
                                                          simple_anno_size = unit(3,"mm"),
                                                          annotation_legend_param = list(direction="horizontal",nrow=3)
                       ),
                       right_annotation = HeatmapAnnotation(which="row",
                       mAF=fcpgs.snps$AF_50bp.flanking_max,
                       col=list(mAF=colorRamp2(seq(min(fcpgs.snps$AF_50bp.flanking_max),
                                                   max(fcpgs.snps$AF_50bp.flanking_max),
                                                   length.out=100
                                                   ),
                                               colors = brewer.blues(100)
                                               )
                                ),
                       na_col = "grey45",
                       show_annotation_name = F,
                       # annotation_name_side = "left",
                       show_legend = T,
                       annotation_label = "Maximum mAF from 50bp's flanking sequences",
                       annotation_name_gp = gpar(fontsize=5),
                       simple_anno_size = unit(2,"mm"),
                       annotation_legend_param = list(direction="horizontal")
                       )
)
draw(h.fCpGs.101bp.flanking.snps,
     heatmap_legend_side = "bottom",
     annotation_legend_side = "bottom"
)

8 Export all info

##
## EXPORT
##


library(IlluminaHumanMethylationEPICv2anno.20a1.hg38)
# checck availability of those fCPGs in the last EPICv2 version of Illumina ararys, which have been curated by the comunity

fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$Name %in% minfi::getAnnotation(IlluminaHumanMethylationEPICv2anno.20a1.hg38)$EPICv1_Loci %>%
  table %>% 
  prop.table()

fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$EPICv2 <- 
  ifelse(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking$Name %in% minfi::getAnnotation(IlluminaHumanMethylationEPICv2anno.20a1.hg38)$EPICv1_Loci, T,F)


head(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking)


## Merge hg19 anf hg38 info together with all snps, etc

##hg19
# get CG and C position hg19
fcpgs.hg19.CG <- sort(sortSeqlevels(fcpgs.hg19.CG))
fcpgs.hg19.Cs <- fcpgs.hg19.CG
fcpgs.hg19.Cs
fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")] <- fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")]+1
start(fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")]) <- end(fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")])
fcpgs.hg19.Cs[which(strand(fcpgs.hg19.Cs)=="-")]
#check nucleotides
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.Cs[strand(fcpgs.hg19.Cs)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,fcpgs.hg19.Cs[strand(fcpgs.hg19.Cs)=="-"]) %>% as.character() %>% table

fcpgs.hg19.CG.Cs <- 
  data.frame(fcpgs.hg19.CG %>% granges() %>% as.data.frame(),
                               fcpgs.hg19.Cs %>% granges() %>% as.data.frame(),
             Name=fcpgs.hg19.CG$Name
             ) %>% 
  select(Name.hg19=Name,
         chr_CG.hg19=seqnames,
         pos_CG.hg19=start,
         strand_CG.hg19=strand,
         chr_C.hg19=seqnames.1,
         pos_C.hg19=start.1,
         strand_C.hg19=strand.1,
         )
  
head(fcpgs.hg19.CG.Cs);dim(head(fcpgs.hg19.CG.Cs))
tail(fcpgs.hg19.CG.Cs);dim(head(fcpgs.hg19.CG.Cs))

##hg38
fcpgs.hg38.CG <- sort(sortSeqlevels(fcpgs.hg38.CG))
fcpgs.hg38.Cs <- fcpgs.hg38.CG
fcpgs.hg38.Cs
fcpgs.hg38.Cs[which(strand(fcpgs.hg38.CG)=="-")] <- fcpgs.hg38.CG[which(strand(fcpgs.hg38.CG)=="-")]+1
start(fcpgs.hg38.Cs[which(strand(fcpgs.hg38.Cs)=="-")]) <- end(fcpgs.hg38.Cs[which(strand(fcpgs.hg38.Cs)=="-")])
fcpgs.hg38.Cs[which(strand(fcpgs.hg38.Cs)=="-")]
#check nucleotides
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.Cs[strand(fcpgs.hg38.Cs)=="+"]) %>% as.character() %>% table
getSeq(BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,fcpgs.hg38.Cs[strand(fcpgs.hg38.Cs)=="-"]) %>% as.character() %>% table

fcpgs.hg38.CG.Cs <- 
  data.frame(fcpgs.hg38.CG %>% granges() %>% as.data.frame(),
             fcpgs.hg38.Cs %>% granges() %>% as.data.frame(),
             Name=fcpgs.hg38.CG$Name
  ) %>% 
  select(Name.hg38=Name,
         chr_CG.hg38=seqnames,
         pos_CG.hg38=start,
         strand_CG.hg38=strand,
         chr_C.hg38=seqnames.1,
         pos_C.hg38=start.1,
         strand_C.hg38=strand.1,
  )

head(fcpgs.hg38.CG.Cs);dim(fcpgs.hg38.CG.Cs)
tail(fcpgs.hg38.CG.Cs);dim(fcpgs.hg38.CG.Cs)


## merge all info together
fCpGs.loc <- left_join(x = fcpgs.hg19.CG.Cs,
                       y=fcpgs.hg38.CG.Cs,
                       by = join_by(Name.hg19==Name.hg38),
                       keep = T
                       )
head(fCpGs.loc);dim(fCpGs.loc)
tail(fCpGs.loc);dim(fCpGs.loc)


##merge annotation from Illumina and minfi annot package
Illumina.info.hg <- fcpgs.hg19.CG %>% mcols() %>% as.data.frame()
colnames(Illumina.info.hg) <- paste0("Illumina_",colnames(Illumina.info.hg))
head(Illumina.info.hg);dim(Illumina.info.hg)

fCpGs.loc.info <- left_join(x = fCpGs.loc,
                            y=Illumina.info.hg,
                            by = join_by(Name.hg19==Illumina_Name),
                            keep = T
                            )
head(fCpGs.loc.info);dim(fCpGs.loc.info)


fCpGs.complete.info <- 
  left_join(x = fCpGs.loc.info,
            y=fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking %>% 
              select(Name,
                     all_of(colnames(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking)[
                       !colnames(fcpgs.hg38.CG.MAF.gnomAD.v4.snps.50bp.flanking) %in% 
                         c(gsub("Illumina_","",colnames(fCpGs.loc.info)),"seqnames","start","end","width","strand")
                     ])
                     ),
            by=join_by(Name.hg19==Name),
            keep = T
            )
head(fCpGs.complete.info);dim(fCpGs.complete.info)

##transform list to character to export with xlsx format
sapply(fCpGs.complete.info,class) %>% table
fCpGs.complete.info[,sapply(fCpGs.complete.info,class)=="list"] <- 
  sapply(fCpGs.complete.info[,sapply(fCpGs.complete.info,class)=="list"],function(x){
    res <- paste0(x,collapse = "/")
    if(res=="" | res=="NA"){
      res <- NA_character_
    }
    return(res)
  })

# write.xlsx(fCpGs.complete.info,"../Results/fCpGs_SNPs/fCpGs.complete.info.xlsx",na.string=c("","NA"))

9 Gapphuntig algorithm

To further exclude any rare genetic confounding, we next perform a the data-driven approach available at minfi package.

##
## Perform gaphunting in fCpGs 
##

##numbers analyzed originally in the manuscipt
threshold <- c(0.025,0.05,0.1,0.2)
outCutoff <- c(0.005,0.01,0.05,0.1)

for(threshold.i in threshold){
  
  writeLines(paste0("threshold.i:",threshold.i))
  
  for (outCutoff.i in outCutoff){
    
    writeLines(paste0("outCutoff.i=",outCutoff.i))
    
    
    gapres <- tryCatch(minfi::gaphunter(as.matrix(fcpgs.betas),threshold = threshold.i,outCutoff = outCutoff.i,keepOutliers = T),
                       error=function(e)return(NA)
    )
    if(is.list(gapres)){
      
      # gapres$proberesults %>% head
      # gapres$proberesults %>% dplyr::count(Groups)
      # gapres$proberesults %>% dplyr::count(Group3)
      # gtsummary::tbl_summary(gapres$proberesults)
      # grid::grid.newpage()
      # gridExtra::grid.table(gapres$proberesults)
      # 
      
      gapres.fCpGs.info <- fCpGs.complete.info[match(rownames(gapres$proberesults),fCpGs.complete.info$Name),]
      
      
      h.gaphunter <- 
        Heatmap(matrix = fcpgs.betas[rownames(gapres$proberesults),fCpGs.metadata$supplementary_table_1$participant_id_anonymous],
                col=evoflux.meth$color,
                column_title = paste0("gaphunted fCpGs at threshold:",threshold.i,", outCutoff=",outCutoff.i),
                row_title = paste0("N=",nrow(gapres$proberesults)),
                border=T,
                use_raster = T,
                raster_quality = 10,
                heatmap_legend_param = list(title="DNA methylation",direction="horizontal"),
                show_row_names = T,
                row_names_side = "left",
                show_column_names = F,
                row_dend_gp = gpar(lwd=0.1),
                column_dend_gp = gpar(lwd=0.1),
                clustering_distance_columns = "euclidean",
                clustering_method_columns = "single",
                top_annotation = HeatmapAnnotation(annot2=factor(case_when(fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="PBMCs" & 
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Whole_blood" & 
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Normal_lymphoid_cell" &
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="B-ALL-remission" &
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="T-ALL-remission" ~ "Lymphoid tumour",
                                                                           .default = "Healthy sample/Blood from remission")
                ),
                annot=factor(fCpGs.metadata$supplementary_table_1$cell_type_annot_1,
                             levels = names(cols.paper)[names(cols.paper) %in% fCpGs.metadata$supplementary_table_1$cell_type_annot_1]),
                col=list(annot=cols.paper,
                         annot2=c("Healthy sample/Blood from remission"="grey0","Lymphoid tumour"="grey90")
                ),
                show_annotation_name = F,
                annotation_label = "Samples",
                simple_anno_size = unit(3,"mm"),
                annotation_legend_param = list(direction="horizontal",nrow=3)
                ),
                right_annotation = HeatmapAnnotation(which="row",
                                                     mAF=gapres.fCpGs.info$AF,
                                                     col=list(mAF=colorRamp2(seq(min(gapres.fCpGs.info$AF,na.rm = T),
                                                                                 max(gapres.fCpGs.info$AF,na.rm = T),
                                                                                 length.out=100
                                                     ),
                                                     colors = brewer.blues(100)
                                                     )
                                                     ),
                                                     na_col = "grey45",
                                                     show_annotation_name = F,
                                                     # annotation_name_side = "left",
                                                     show_legend = T,
                                                     annotation_label = "mAF reporeted in gnomAD.v4.0",
                                                     annotation_name_gp = gpar(fontsize=5),
                                                     simple_anno_size = unit(2,"mm"),
                                                     annotation_legend_param = list(direction="horizontal")
                )
        )
      # draw(h.gaphunter,
      #      heatmap_legend_side = "bottom",
      #      annotation_legend_side = "bottom",
      #      merge_legend=T
      # )
      
      set.seed(123)
      fCpG.random <- sample(rownames(fcpgs.betas),nrow(gapres$proberesults),replace = F)
      fCpGs.random.info <-  fCpGs.complete.info[match(fCpG.random,fCpGs.complete.info$Name),]
      
      h.random <- 
        Heatmap(matrix = fcpgs.betas[fCpG.random,fCpGs.metadata$supplementary_table_1$participant_id_anonymous],
                col=evoflux.meth$color,
                column_title = "random fCpGs",
                border=T,
                use_raster = T,
                raster_quality = 10,
                clustering_distance_columns = "euclidean",
                clustering_method_columns = "single",
                show_heatmap_legend = F,
                show_row_names = T,
                row_names_side = "right",
                show_column_names = F,
                row_dend_gp = gpar(lwd=0.1),
                column_dend_gp = gpar(lwd=0.1),
                top_annotation = HeatmapAnnotation(annot2=factor(case_when(fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="PBMCs" & 
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Whole_blood" & 
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="Normal_lymphoid_cell" &
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="B-ALL-remission" &
                                                                             fCpGs.metadata$supplementary_table_1$cell_type_annot_1!="T-ALL-remission" ~ "Lymphoid tumour",
                                                                           .default = "Healthy sample/Blood from remission")
                ),
                annot=factor(fCpGs.metadata$supplementary_table_1$cell_type_annot_1,
                             levels = names(cols.paper)[names(cols.paper) %in% fCpGs.metadata$supplementary_table_1$cell_type_annot_1]),
                col=list(annot=cols.paper,
                         annot2=c("Healthy sample/Blood from remission"="grey0","Lymphoid tumour"="grey90")
                ),
                show_annotation_name = F,
                show_legend = F,
                annotation_label = "Samples",
                simple_anno_size = unit(3,"mm"),
                annotation_legend_param = list(direction="horizontal",nrow=3)
                ),
                right_annotation = HeatmapAnnotation(which="row",
                                                     mAF=fCpGs.random.info$AF,
                                                     col=list(mAF=colorRamp2(seq(min(fCpGs.random.info$AF,na.rm = T),
                                                                                 max(fCpGs.random.info$AF,na.rm = T),
                                                                                 length.out=100
                                                     ),
                                                     colors = brewer.blues(100)
                                                     )
                                                     ),
                                                     na_col = "grey45",
                                                     show_annotation_name = F,
                                                     # annotation_name_side = "left",
                                                     show_legend = T,
                                                     annotation_label = "mAF reporeted in gnomAD.v4.0",
                                                     annotation_name_gp = gpar(fontsize=5),
                                                     simple_anno_size = unit(2,"mm"),
                                                     annotation_legend_param = list(direction="horizontal")
                )
        )
      draw(h.gaphunter+h.random,
           heatmap_legend_side = "bottom",
           annotation_legend_side = "bottom",
           merge_legend=T
      )
      
    }else{
      
      p <- ggplot()+
        annotate("text",y=0.5,x=0.5,label=paste0("No results at threshold:",threshold.i,", outCutoff:",outCutoff.i),size=3)+
        theme_void()
      print(p)
      
    }
    
    
    
    
  }
  
}

10 Session info

print(sessionInfo())

## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
##  [1] parallel  grid      stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] IlluminaHumanMethylationEPICv2anno.20a1.hg38_1.0.0
##  [2] ggrepel_0.9.6                                     
##  [3] patchwork_1.3.2                                   
##  [4] MafH5.gnomAD.v4.0.GRCh38_3.19.0                   
##  [5] GenomicScores_2.20.2                              
##  [6] SNPlocs.Hsapiens.dbSNP155.GRCh38_0.99.24          
##  [7] BSgenome_1.76.0                                   
##  [8] rtracklayer_1.68.0                                
##  [9] BiocIO_1.18.0                                     
## [10] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1
## [11] minfi_1.54.1                                      
## [12] bumphunter_1.50.0                                 
## [13] locfit_1.5-9.12                                   
## [14] iterators_1.0.14                                  
## [15] foreach_1.5.2                                     
## [16] SummarizedExperiment_1.38.1                       
## [17] Biobase_2.68.0                                    
## [18] MatrixGenerics_1.20.0                             
## [19] matrixStats_1.5.0                                 
## [20] circlize_0.4.16                                   
## [21] ComplexHeatmap_2.24.1                             
## [22] Biostrings_2.76.0                                 
## [23] XVector_0.48.0                                    
## [24] openxlsx_4.2.8                                    
## [25] GenomicRanges_1.60.0                              
## [26] GenomeInfoDb_1.44.2                               
## [27] IRanges_2.42.0                                    
## [28] S4Vectors_0.46.0                                  
## [29] BiocGenerics_0.54.0                               
## [30] generics_0.1.4                                    
## [31] pals_1.10                                         
## [32] ggpubr_0.6.1                                      
## [33] ggplot2_3.5.2                                     
## [34] dplyr_1.1.4                                       
## [35] janitor_2.2.1                                     
## [36] data.table_1.17.8                                 
## [37] BiocStyle_2.36.0                                  
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.5.0                     filelock_1.0.3                   
##   [3] bitops_1.0-9                      tibble_3.3.0                     
##   [5] preprocessCore_1.70.0             XML_3.99-0.19                    
##   [7] lifecycle_1.0.4                   rstatix_0.7.2.999                
##   [9] fastcluster_1.3.0                 doParallel_1.0.17                
##  [11] lattice_0.22-7                    MASS_7.3-65                      
##  [13] base64_2.0.2                      scrime_1.3.5                     
##  [15] backports_1.5.0                   magrittr_2.0.3                   
##  [17] limma_3.64.3                      sass_0.4.10                      
##  [19] rmarkdown_2.29                    BSgenome.Hsapiens.UCSC.hg38_1.4.5
##  [21] jquerylib_0.1.4                   yaml_2.3.10                      
##  [23] doRNG_1.8.6.2                     zip_2.3.3                        
##  [25] askpass_1.2.1                     mapproj_1.2.12                   
##  [27] DBI_1.2.3                         RColorBrewer_1.1-3               
##  [29] lubridate_1.9.4                   maps_3.4.3                       
##  [31] abind_1.4-8                       quadprog_1.5-8                   
##  [33] purrr_1.1.0                       RCurl_1.98-1.17                  
##  [35] rappdirs_0.3.3                    GenomeInfoDbData_1.2.14          
##  [37] rentrez_1.2.4                     genefilter_1.90.0                
##  [39] annotate_1.86.1                   DelayedMatrixStats_1.30.0        
##  [41] codetools_0.2-20                  DelayedArray_0.34.1              
##  [43] xml2_1.4.0                        tidyselect_1.2.1                 
##  [45] shape_1.4.6.1                     UCSC.utils_1.4.0                 
##  [47] farver_2.1.2                      beanplot_1.3.1                   
##  [49] BiocFileCache_2.16.1              illuminaio_0.50.0                
##  [51] GenomicAlignments_1.44.0          jsonlite_2.0.0                   
##  [53] GetoptLong_1.0.5                  multtest_2.64.0                  
##  [55] Formula_1.2-5                     survival_3.8-3                   
##  [57] tools_4.5.0                       Rcpp_1.1.0                       
##  [59] glue_1.8.0                        SparseArray_1.8.1                
##  [61] xfun_0.53                         HDF5Array_1.36.0                 
##  [63] withr_3.0.2                       BiocManager_1.30.26              
##  [65] fastmap_1.2.0                     rhdf5filters_1.20.0              
##  [67] openssl_2.3.3                     digest_0.6.37                    
##  [69] timechange_0.3.0                  R6_2.6.1                         
##  [71] colorspace_2.1-1                  Cairo_1.6-5                      
##  [73] dichromat_2.0-0.1                 RSQLite_2.4.3                    
##  [75] h5mread_1.0.1                     tidyr_1.3.1                      
##  [77] httr_1.4.7                        S4Arrays_1.8.1                   
##  [79] pkgconfig_2.0.3                   gtable_0.3.6                     
##  [81] blob_1.2.4                        siggenes_1.82.0                  
##  [83] htmltools_0.5.8.1                 carData_3.0-5                    
##  [85] bookdown_0.44                     clue_0.3-66                      
##  [87] scales_1.4.0                      png_0.1-8                        
##  [89] snakecase_0.11.1                  knitr_1.50                       
##  [91] rstudioapi_0.17.1                 tzdb_0.5.0                       
##  [93] rjson_0.2.23                      nlme_3.1-168                     
##  [95] curl_7.0.0                        cachem_1.1.0                     
##  [97] rhdf5_2.52.1                      GlobalOptions_0.1.2              
##  [99] stringr_1.5.1                     BiocVersion_3.21.1               
## [101] AnnotationDbi_1.70.0              restfulr_0.0.16                  
## [103] GEOquery_2.76.0                   pillar_1.11.0                    
## [105] reshape_0.8.10                    vctrs_0.6.5                      
## [107] car_3.1-3                         dbplyr_2.5.0                     
## [109] xtable_1.8-4                      cluster_2.1.8.1                  
## [111] evaluate_1.0.5                    magick_2.8.7                     
## [113] tinytex_0.57                      readr_2.1.5                      
## [115] GenomicFeatures_1.60.0            cli_3.6.5                        
## [117] compiler_4.5.0                    Rsamtools_2.24.0                 
## [119] rlang_1.1.6                       crayon_1.5.3                     
## [121] rngtools_1.5.2                    ggsignif_0.6.4                   
## [123] labeling_0.4.3                    nor1mix_1.3-3                    
## [125] mclust_6.1.1                      plyr_1.8.9                       
## [127] stringi_1.8.7                     BiocParallel_1.42.1              
## [129] Matrix_1.7-4                      BSgenome.Hsapiens.UCSC.hg19_1.4.3
## [131] hms_1.1.3                         sparseMatrixStats_1.20.0         
## [133] bit64_4.6.0-1                     Rhdf5lib_1.30.0                  
## [135] KEGGREST_1.48.1                   statmod_1.5.0                    
## [137] AnnotationHub_3.16.1              broom_1.0.9                      
## [139] memoise_2.0.1                     bslib_0.9.0                      
## [141] bit_4.6.0

Analyses of SNPs vs fCPGs, Gabbutt and Duran-Ferrer 2025

10/07/2025