fCpG methylation represents a previously unrecognized type of CpG methylation and is different from any other reported epigenetic clock or cell-type specific CpG, repreenting neutral and cell lineage markers.

1 Load packages and data

library(ggpubr)
library(patchwork)
library(ComplexHeatmap)
library(circlize)
library(pals)
library(dplyr)



# color pals# color smaples
cols.paper <- c(
  NA_values="#666666",
  HPC="#E6E6E6",
  pre.Bcell="#D5D5D5",
  NBC="#C3C3C3",
  GCB="#AEAEAE",
  tPC="#969696",
  MBC="#787878",
  bmPC="#4D4D4D",
  Tcell="#F0F8FF",
  PBMCs="#FFF8DC",
  Whole_blood="#CDC8B1",
  Normal_lymphoid_cell="#D5D5D5",
  Normal_myeloid_cell="#E6E6FA",
  "T-ALL"="#CD3333",
  "T-ALL-remission"="#C7A5A5",
  "B-ALL"="#E69F00",
  "B-ALL-remission"="#D8CAAB",
  MCL="#F0E442",
  C1.or.cMCL="#F5D51B",
  C2.or.nnMCL="#0072B2",
  "DLBCL-NOS"="#56B4E9",
  MBL="#5A9A89",
  CLL="#009E73",
  RT="#02C590",
  unmutated="#e65100",
  mutated="#361379",
  nCLL="#006E93",
  iCLL="#FDC010",
  mCLL="#963736",
  Disease.Unclassified="#CCCCCC",
  MGUS="#BF99AE",
  MM="#CC79A7"
)


# plot heatmap sorted by age and with colors

evoflux.meth <- data.table::fread("../Revision//Data/meth_palette_evoflux.tsv")
evoflux.meth


## load reads and metadata
metadata <- data.table::fread("../Revision/Results/fcpgs_Nanopore/Fig1_haplotypes/readinfo_chr7_25854120-25856220.csv",data.table = F)
colnames(metadata)[grep("V1",colnames(metadata))] <- "read_id"
metadata %>% head
metadata %>% select(patient,sample_name)
metadata <- 
  metadata %>% 
  mutate(patient=case_when(patient=="12" ~as.character(patient),
                           patient=="19" ~as.character(patient),
                           .default = sample_name
                           )
         )

metadata$patient
## set order factors in metadat for the heatmap

metadata <- 
  metadata %>% 
  mutate(patient=factor(patient,levels=c("12","19",
                                         "22_10_NBC","22_4_NBC","22_5_NBC",
                                         "22_4_MBC","22_5_MBC","22_9_MBC"
                                         )),
         sample_group=factor(sample_group,levels=c("NBC","MBC","CLL","RT"))
         )


##load reads
reads <- data.table::fread("../Revision/Results/fcpgs_Nanopore/Fig1_haplotypes/combined_reads_haplotype_chr7_25854120-25856220.csv",header = T,data.table = F)
colnames(reads)[grep("V1",colnames(reads))] <- "read_id"
reads %>% head

identical(metadata$read_id,reads$read_id)

2 Chromosome region chr7_25854120-25856220

We draw the chromosome region to plot

## draw chromosome and genomic coordinates on top
library(karyoploteR)


kp <- plotKaryotype(genome = "hg38",
                    plot.type = 7,
                    chromosomes = "chr7",
                    main = NULL,#"chr7:25854120-25856220 (hg38)",
                    srt=0,
                    cex=0.35,
                    lwd=0.2
                    )
kpRect(kp, chr="chr7", x0=25854120, x1=25956220, y0=0, y1=1, col="red3", data.panel="ideogram", border=NA)

3 Heatmap

We next plot heatmap with haplotypes information for the region chr7_25854120-25856220, which contains reads covering 4 fCpGs. We can see that 1) there is higher methylation similarity between haplotypes, and 2) that CLL and RT (cancer samples) show more homogenous values comapred to normal cells, consistent with fCpGs functioning as cell-sepcific barcodes.

##plot


ht_opt$legend_grid_width <- unit(2,"mm")
ht_opt$legend_grid_height <-unit(2,"mm")
ht_opt$legend_labels_gp <- gpar(fontsize=5)
ht_opt$legend_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_names_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_names_gp <- gpar(fontsize=5)
ht_opt$HEATMAP_LEGEND_PADDING <- unit(1,"mm")
ht_opt$ANNOTATION_LEGEND_PADDING <- unit(1,"mm")
ht_opt$legend_gap <- unit(c(1,1),"mm")
ht_opt$fast_hclust <- T


h.reads <- 
  Heatmap(reads %>% select(-read_id),
        column_title ="chr7:25854120-25856220 (hg38)",
        column_title_gp = gpar(fontsize=7),
        col=colorRamp2(breaks = c(0,1),colors = c("grey93","grey0")),
        heatmap_legend_param = list(title="Long-read fCpG methylation",
                                    color_bar="discrete",
                                    at=c(1,0),
                                    labels=c("Methylated","Unmethylated")
                                    ),
        row_split = metadata %>% select(patient,sample_group,haplotype),
        cluster_row_slices = F,
        cluster_rows = T,
        cluster_columns = F,
        row_title_rot = 0,
        border=T,
        show_row_dend = F,
        left_annotation = HeatmapAnnotation(which="row",
                                            df = metadata %>% select(sample_group,patient,haplotype),
                                            col=list(patient=setNames(cols25(length(metadata$patient %>% unique())),
                                                                          metadata$patient %>% unique()
                                                                          ),
                                                     sample_group=cols.paper,
                                                     haplotype=setNames(brewer.set2(3),c(1,2,3))
                                                     ),
                                            annotation_legend_param = list(labels_gp=gpar(fontsize=4),
                                                                           color_bar=list(haplotype="discrete")
                                                                           ),
                                            annotation_label = list(sample_group="Sample group",
                                                                    patient="Patient",
                                                                    haplotype="Haplotype"
                                                                    ),
                                            show_annotation_name = T,
                                            na_col = "grey45",
                                            show_legend = T,
                                            annotation_name_gp = gpar(fontsize=4),
                                            simple_anno_size = unit(1.5,"mm")
                                            ),
        )


draw(h.reads)

4 Session info

print(sessionInfo())
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] karyoploteR_1.34.2    regioneR_1.40.1       GenomicRanges_1.60.0 
##  [4] GenomeInfoDb_1.44.2   IRanges_2.42.0        S4Vectors_0.46.0     
##  [7] BiocGenerics_0.54.0   generics_0.1.4        dplyr_1.1.4          
## [10] pals_1.10             circlize_0.4.16       ComplexHeatmap_2.24.1
## [13] patchwork_1.3.2       ggpubr_0.6.1          ggplot2_3.5.2        
## [16] BiocStyle_2.36.0     
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3          rstudioapi_0.17.1          
##   [3] jsonlite_2.0.0              shape_1.4.6.1              
##   [5] magrittr_2.0.3              magick_2.8.7               
##   [7] GenomicFeatures_1.60.0      farver_2.1.2               
##   [9] rmarkdown_2.29              GlobalOptions_0.1.2        
##  [11] BiocIO_1.18.0               zlibbioc_1.54.0            
##  [13] vctrs_0.6.5                 Cairo_1.6-5                
##  [15] memoise_2.0.1               Rsamtools_2.24.0           
##  [17] RCurl_1.98-1.17             base64enc_0.1-3            
##  [19] tinytex_0.57                rstatix_0.7.2.999          
##  [21] htmltools_0.5.8.1           S4Arrays_1.8.1             
##  [23] curl_7.0.0                  broom_1.0.9                
##  [25] SparseArray_1.8.1           Formula_1.2-5              
##  [27] sass_0.4.10                 bslib_0.9.0                
##  [29] htmlwidgets_1.6.4           cachem_1.1.0               
##  [31] GenomicAlignments_1.44.0    lifecycle_1.0.4            
##  [33] iterators_1.0.14            pkgconfig_2.0.3            
##  [35] Matrix_1.7-4                R6_2.6.1                   
##  [37] fastmap_1.2.0               GenomeInfoDbData_1.2.14    
##  [39] MatrixGenerics_1.20.0       clue_0.3-66                
##  [41] digest_0.6.37               colorspace_2.1-1           
##  [43] AnnotationDbi_1.70.0        bezier_1.1.2               
##  [45] Hmisc_5.2-3                 RSQLite_2.4.3              
##  [47] httr_1.4.7                  abind_1.4-8                
##  [49] compiler_4.5.0              bit64_4.6.0-1              
##  [51] withr_3.0.2                 doParallel_1.0.17          
##  [53] htmlTable_2.4.3             backports_1.5.0            
##  [55] BiocParallel_1.42.1         carData_3.0-5              
##  [57] DBI_1.2.3                   maps_3.4.3                 
##  [59] ggsignif_0.6.4              DelayedArray_0.34.1        
##  [61] rjson_0.2.23                tools_4.5.0                
##  [63] foreign_0.8-90              nnet_7.3-20                
##  [65] glue_1.8.0                  restfulr_0.0.16            
##  [67] checkmate_2.3.3             cluster_2.1.8.1            
##  [69] gtable_0.3.6                BSgenome_1.76.0            
##  [71] tidyr_1.3.1                 ensembldb_2.32.0           
##  [73] data.table_1.17.8           car_3.1-3                  
##  [75] XVector_0.48.0              foreach_1.5.2              
##  [77] pillar_1.11.0               stringr_1.5.1              
##  [79] lattice_0.22-7              rtracklayer_1.68.0         
##  [81] bit_4.6.0                   biovizBase_1.56.0          
##  [83] tidyselect_1.2.1            Biostrings_2.76.0          
##  [85] knitr_1.50                  gridExtra_2.3              
##  [87] bookdown_0.44               ProtGenerics_1.40.0        
##  [89] SummarizedExperiment_1.38.1 xfun_0.53                  
##  [91] Biobase_2.68.0              matrixStats_1.5.0          
##  [93] stringi_1.8.7               UCSC.utils_1.4.0           
##  [95] lazyeval_0.2.2              yaml_2.3.10                
##  [97] evaluate_1.0.5              codetools_0.2-20           
##  [99] tibble_3.3.0                BiocManager_1.30.26        
## [101] cli_3.6.5                   rpart_4.1.24               
## [103] jquerylib_0.1.4             dichromat_2.0-0.1          
## [105] Rcpp_1.1.0                  mapproj_1.2.12             
## [107] png_0.1-8                   fastcluster_1.3.0          
## [109] XML_3.99-0.19               parallel_4.5.0             
## [111] blob_1.2.4                  AnnotationFilter_1.32.0    
## [113] bitops_1.0-9                VariantAnnotation_1.54.1   
## [115] scales_1.4.0                purrr_1.1.0                
## [117] crayon_1.5.3                bamsignals_1.40.0          
## [119] GetoptLong_1.0.5            rlang_1.1.6                
## [121] KEGGREST_1.48.1