We use several datasets available, including whole blood and purified cell types to study fCpG methylation changes throughout human lifespan. Collectively, we show how fluctuations remain stable during human lifespan, and an increase in standard deviation in samples, consistent with know cloncal expansions in the hematopoietic system during aging.

1 Load packages and fCpGs information

##
## Load necessary packages and set global options
##

library(data.table)
library(janitor)
library(dplyr)
library(ggpubr)
library(pals)
library(GEOquery)
library(ComplexHeatmap)
library(circlize)
library(EpiDISH)
library(openxlsx)
library(ggstats)
library(gt)
library(gtsummary)
library(gtExtras)
library(stringr)
library(patchwork)
library(ggstats)
library(gtsummary)
library(broom)
library(broom.helpers)



options(stringsAsFactors = F,max.print = 10000,error=NULL,
        "openxlsx.dateFormat" = "dd/mm/yyyy"
)


##
## 1. load fCpGs
##

fCpGs.betas <- 
  fread("../Data/FMC/Final_data/beta_all_samples_fcpgs_laplacian.csv",data.table = F)
rownames(fCpGs.betas) <- fCpGs.betas$V1
fCpGs.betas <- fCpGs.betas[,-1]
fCpGs.betas[1:5,1:15]
dim(fCpGs.betas)  

sheets <- getSheetNames("../final_revision/Supplementary_Tables_R3.xlsx")
sheets <- setNames(sheets,sheets)
sheets

fCpGs.metadata <- lapply(sheets,function(i){
  writeLines(i)
  openxlsx::read.xlsx("../final_revision/Supplementary_Tables_R3.xlsx",sheet = i,detectDates = T,startRow = 3)
})

names(fCpGs.metadata)
fCpGs.metadata$supplementary_table_2 %>% head


## load normal lymphoid samples used for the supplement.
fCpGs.metadata.all <- openxlsx::read.xlsx("../Data/FMC.blood.samples.v2.4.xlsx",detectDates = T)
head(fCpGs.metadata.all);dim(fCpGs.metadata.all)

fCpGs.metadata.all %>% count(CELL_TYPE,CELL_TYPE_ANNOT.1,CELL_TYPE_ANNOT.2)

all(colnames(fCpGs.betas) %in% fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS)
table(fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS %in% colnames(fCpGs.betas) )

## remove bad quality samples and leave those with meth info
fCpGs.metadata.all <- fCpGs.metadata.all[match(colnames(fCpGs.betas),fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS),]

fCpGs.metadata.all %>% dim

2 Cell fractions in blood

We will use the package EpiDISH to find cell types described in Salas 2022

ht_opt$legend_grid_width <- unit(2,"mm")
ht_opt$legend_grid_height <-unit(2,"mm")
ht_opt$legend_labels_gp <- gpar(fontsize=5)
ht_opt$legend_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_names_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_names_gp <- gpar(fontsize=5)
ht_opt$HEATMAP_LEGEND_PADDING <- unit(1,"mm")
ht_opt$ANNOTATION_LEGEND_PADDING <- unit(1,"mm")
ht_opt$legend_gap <- unit(c(1,1),"mm")
ht_opt$fast_hclust <- T

##
## Calculate cell fractions in using epiDISH data, ie Salas 2022 nat comms data
##

data(cent12CT450k.m)
Heatmap(cent12CT450k.m,col=coolwarm(),show_row_names = F)

3 B and T cell fCpG dynamics

We plot the correlation of fCpG mean methylation and standard deviation (SD) of cell types included in our study design (figure supplementary 7).

As age is not avaiable for some samples, we use here the Horvath value.

#
## 1. Data from our submitted manuscipt
##

all(colnames(fCpGs.betas) %in% fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS)
table(fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS %in% colnames(fCpGs.betas))
identical(colnames(fCpGs.betas),fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS)

## mean and sd for fCpGs
fCpGs.metadata.all$SD.fCpGs <- apply(fCpGs.betas[,fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS],2,sd,na.rm=T)
fCpGs.metadata.all$mean.fCpGs <- apply(fCpGs.betas[,fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS],2,mean,na.rm=T)
fCpGs.metadata.all$submitted <- 
  ifelse(fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS %in% fCpGs.metadata$supplementary_table_2$participant_id_anonymous,
                                       T,F
                                       )
fCpGs.metadata.all <- 
  fCpGs.metadata.all %>% 
  filter(submitted) %>% 
  mutate(CELL_TYPE_ANNOT.2=case_when(CELL_TYPE=="CD19_positive" ~ "Bcell (CD19+)",
                                     CELL_TYPE=="Granulocyte" ~ "Granulocyte",
                                     CELL_TYPE=="NK_cell" ~ "NK",
                                     CELL_TYPE_ANNOT.2=="Tcell" ~ "Tcell(CD3+)",
                                     CELL_TYPE=="Monocyte" ~ "Monocyte",
                                     .default = CELL_TYPE_ANNOT.2),
         Horvath=Clocks_Horvath
         )

##plot 
fCpGs.metadata.all %>% count(CELL_TYPE,CELL_TYPE_ANNOT.1,CELL_TYPE_ANNOT.2)
fCpGs.metadata.all %>% 
  filter(CELL_TYPE!="Macrophage_invitro_differentiated",
         CELL_TYPE_ANNOT.2!="pre-Bcell",
         CELL_TYPE_ANNOT.2!="bmPC",
         CELL_TYPE_ANNOT.2!="tPC",
         grepl("^Normal",CELL_TYPE_ANNOT.1)) %>% 
  select(SAMPLE_ID,CELL_TYPE,CELL_TYPE_ANNOT.1,CELL_TYPE_ANNOT.2,AGE_SAMPLING,Clocks_Horvath,submitted)

fCpGs.metadata.all.plot <- 
  fCpGs.metadata.all %>% 
  filter(CELL_TYPE!="Macrophage_invitro_differentiated",
         CELL_TYPE_ANNOT.2!="pre-Bcell",
         CELL_TYPE_ANNOT.2!="bmPC",
         CELL_TYPE_ANNOT.2!="tPC",
         grepl("^Normal",CELL_TYPE_ANNOT.1),
         submitted
         )

fCpGs.metadata.all.plot <- 
  fCpGs.metadata.all.plot %>% 
  mutate(CELL_TYPE_ANNOT.2=factor(CELL_TYPE_ANNOT.2,
                                  levels=c("NBC","GCB","MBC","Tcell(CD3+)"),
                                  labels=c("Naïve B cell","Germinal center B cell","Memory B cell","T cell")
                                  )
         )

fCpGs.metadata.all.plot %>% glimpse()



##plot
fCpGs.metadata.all.plot %>% 
  ggplot(aes(x = Horvath,
             y = AGE_SAMPLING,
             ))+
  geom_point()+
  geom_smooth(method = "lm",color="grey0")+
  stat_cor(size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
fCpGs.metadata.all.plot %>%
  ggplot(aes(x=Horvath))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Normal lymphoid and myeloid cells, N=",nrow(fCpGs.metadata.all.plot)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

##correlations
fCpGs.metadata.all.plot %>% 
  ggplot(aes(x=Horvath,
             y=mean.fCpGs
  ))+
  geom_point(
    aes(fill=CELL_TYPE_ANNOT.2),
    size=0.75,
    pch=21,
    stroke=0
  )+
  geom_smooth(aes(fill=CELL_TYPE_ANNOT.2),
              method = "lm",color="black",lwd=0.5,alpha=0.3
              )+
  scale_fill_manual(values = cols25())+
  facet_wrap(~CELL_TYPE_ANNOT.2)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
                              )
               ),
           label.y.npc="top",
           label.x.npc = "left",
           method = "pearson",
           size=1.25)+
  # stat_cor(method = "pearson",size=1.5)+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(2,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) | 
  fCpGs.metadata.all.plot %>% 
  ggplot(aes(x=Horvath,
             y=SD.fCpGs
  ))+
  geom_point(
    aes(fill=CELL_TYPE_ANNOT.2),
    size=0.75,
    pch=21,
    stroke=0
  )+
  geom_smooth(aes(fill=CELL_TYPE_ANNOT.2),
              method = "lm",color="black",lwd=0.5,alpha=0.3
  )+
  scale_fill_manual(values = cols25())+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
                              )
               ),
           label.y.npc="top",
           label.x.npc = "left",
           method = "pearson",
           size=1.25)+
  facet_wrap(~CELL_TYPE_ANNOT.2)+
  # stat_cor(method = "pearson",size=1.5)+
  guides(fill=guide_legend("Donor id",nrow=2))+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(2,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

We next plot a heatmap with all fCpGs, and see a whitish color heatmap, indicative of those samples being polyclonal.

##
## HEATMAP fCpGs
## 
# plot heatmap sorted by age and with colors

evoflux.meth <- fread("../Revision/Data/meth_palette_evoflux.tsv")
evoflux.meth



ht_opt$legend_grid_width <- unit(2,"mm")
ht_opt$legend_grid_height <-unit(2,"mm")
ht_opt$legend_labels_gp <- gpar(fontsize=5)
ht_opt$legend_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_names_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_names_gp <- gpar(fontsize=5)
ht_opt$HEATMAP_LEGEND_PADDING <- unit(1,"mm")
ht_opt$ANNOTATION_LEGEND_PADDING <- unit(1,"mm")
ht_opt$legend_gap <- unit(c(1,1),"mm")
ht_opt$fast_hclust <- T




h.fCpGs <- Heatmap(fCpGs.betas[,fCpGs.metadata.all.plot$PARTICIPANT_ID_ANONYMOUS],
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=fCpGs.metadata.all.plot %>% 
                       select(age_sampling=AGE_SAMPLING,Horvath,
                              annot_1=CELL_TYPE_ANNOT.1,annot_2=CELL_TYPE_ANNOT.2),
                     col=list(
                       age_sampling=colorRamp2(breaks = seq(min(fCpGs.metadata.all.plot$AGE_SAMPLING,na.rm = T),
                                                            max(fCpGs.metadata.all.plot$AGE_SAMPLING,na.rm = T),
                                                            length.out=100),brewer.blues(100)
                                               ),
                       Horvath=colorRamp2(breaks = seq(min(fCpGs.metadata.all.plot$Horvath,na.rm = T),
                                                            max(fCpGs.metadata.all.plot$Horvath,na.rm = T),
                                                            length.out=100),brewer.blues(100)
                                          ),
                       annot_1=setNames(kelly(fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.1 %>%
                                                unique() %>%
                                                length()
                                              ),
                                        fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.1 %>%
                                          unique()
                                        ),
                       annot_2=setNames(kelly(fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.2 %>%
                                                unique() %>%
                                                length()
                                              ),
                                        fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.2 %>%
                                          unique()
                                        )
                       ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)

draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4 External datasets

We next perform analyses separately in each dataset.

4.1 Purified cell types

4.1.1 GSE184269 - Roy 2021

We next download sorted immune cell types in adults, available in GSE184269. We see a similar scenario as those samples included in our study.

##
##  Download   # GSE184269, sorted immune cells in aging
##

gse <- getGEO(GEO = "GSE184269",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE184269_series_matrix.txt.gz
show(gse)

## phenodata
sorted.age.metadata <- 
  pData(phenoData(gse)) %>% 
  clean_names()
sorted.age.metadata %>% head
sorted.age.metadata %>% count(source_name_ch1)
sorted.age.metadata$colnames.betas <- 
  gsub("ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM5583nnn/GSM\\d+/suppl/GSM\\d+_|_Grn.idat.gz",
       "",
       sorted.age.metadata$supplementary_file)
sorted.age.metadata$colnames.betas
sorted.age.metadata$age <- as.numeric(sorted.age.metadata$age_ch1)

## get meth values
sorted.age.betas <- fread("../Revision/Data/450k-Aging/GSE184269_Matrix_processed_FINAL.txt.gz",data.table = F)
head(sorted.age.betas);dim(sorted.age.betas)
rownames(sorted.age.betas) <- sorted.age.betas$ID_RED
sorted.age.betas <- sorted.age.betas[,grep("Detection_Pval|ID_RED",colnames(sorted.age.betas),value = T,invert = T)]
head(sorted.age.betas);dim(sorted.age.betas)
all(colnames(sorted.age.betas) %in% sorted.age.metadata$colnames.betas)
sorted.age.betas <- sorted.age.betas[,sorted.age.metadata$colnames.betas]
colnames(sorted.age.betas) <- sorted.age.metadata$geo_accession

##fcpgs beta values
table(rownames(fCpGs.betas) %in% rownames(sorted.age.betas))
fCpGs.sorted.age <- sorted.age.betas[rownames(fCpGs.betas),]
identical(colnames(fCpGs.sorted.age),sorted.age.metadata$geo_accession)

##complete with fCPGs stats
sorted.age.metadata$SD.fCpGs <- apply(fCpGs.sorted.age,2,sd,na.rm=T)
sorted.age.metadata$mean.fCpGs <- apply(fCpGs.sorted.age,2,mean,na.rm=T)

##calculate cell fractions
sorted.age.metadata<- 
  cbind(sorted.age.metadata,
        epidish(beta.m = sorted.age.betas,ref.m = cent12CT450k.m)$estF
  )
head(sorted.age.metadata)

Age distribution:

sorted.age.metadata %>% head
sorted.age.metadata %>% dim
sorted.age.metadata %>% count(source_name_ch1)
sorted.age.metadata %>% count(source_name_ch1,title)
sorted.age.metadata <- 
  sorted.age.metadata %>% 
  mutate(source=case_when(source_name_ch1=="Peripheral blood" ~ "PBMCs",.default = source_name_ch1))
sorted.age.metadata %>% count(donor_id_ch1,source_name_ch1)
sorted.age.metadata %>% count(donor_id_ch1)


sorted.age.metadata %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Roy et al, Immunity 2021, N=",nrow(sorted.age.metadata)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

We see in the heatmap a similar scenario, but there seems to be donor-specific batches.

sorted.age.metadata$suspected.batch <- str_extract(sorted.age.metadata$description,"Gestalt_Expt_\\d")


### heatmap with sorted ageed samples
h.fCpGs <- Heatmap(fCpGs.sorted.age,
        col=evoflux.meth$color,
        cluster_columns = T,
        cluster_rows = T,
        show_row_names = F,
        show_column_names = F,
        show_row_dend = T,
        show_column_dend = T,
        name="DNA methylation",
        border = T,
        row_dend_gp = gpar(lwd=0.1),
        column_dend_gp = gpar(lwd=0.1),
        top_annotation = HeatmapAnnotation(
          df=sorted.age.metadata %>%
            select(suspected.batch,
                   donor_id=donor_id_ch1,source,Sex=characteristics_ch1_2,age),
          col=list(
            suspected.batch=setNames(tableau20(sorted.age.metadata$suspected.batch %>% unique() %>% length()),
                                               sorted.age.metadata$suspected.batch %>% unique()
                                               ),
            donor_id=setNames(alphabet2(sorted.age.metadata %>% 
                                               pull(donor_id_ch1) %>% 
                                               unique() %>% 
                                               length()
                                             ),
                                             sorted.age.metadata %>% 
                                               pull(donor_id_ch1) %>% 
                                               unique()
                                  ),
            source=setNames(kelly(sorted.age.metadata %>% 
                                               pull(source) %>% 
                                               unique() %>% 
                                               length()
                                             ),
                                             sorted.age.metadata %>% 
                                               pull(source) %>% 
                                               unique()
                                  ),
            Sex=setNames(c("deepskyblue","pink2"),c("Sex: M","Sex: F")),
            age=colorRamp2(breaks = seq(min(sorted.age.metadata$age),
                                        max(sorted.age.metadata$age),
                                        length.out=100),
                           brewer.blues(100))
            ),
          show_annotation_name = T,
          na_col = "grey45",
          annotation_name_side = "left",
          show_legend = T,
          annotation_name_gp = gpar(fontsize=4),
          annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
          simple_anno_size = unit(2,"mm")
        )
)

draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
     )

4.1.2 GSE137594 - Clark 2020

We follow with other cohort with purified cells, including B and T cells from patients with different inflammatory conditions. GSE137594

gse <- getGEO(GEO = "GSE137594",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE137594_series_matrix.txt.gz
show(gse)

## phenodata
Clark2020.metadata <- pData(phenoData(gse)) %>% clean_names()
Clark2020.metadata %>% head
Clark2020.metadata$age <- as.numeric(Clark2020.metadata$age_ch1)
qplot(Clark2020.metadata$age)

rownames(Clark2020.metadata)

## get meth values
Clark2020.betas <- fread("../Revision/Data/450k-Aging/GSE137594_norm_avg_beta_pval.txt.gz",data.table = F)
head(Clark2020.betas);dim(Clark2020.betas);gc()
rownames(Clark2020.betas) <- Clark2020.betas$ID_REF
Clark2020.betas <- Clark2020.betas[,grep("Detection|ID_REF",colnames(Clark2020.betas),value = T,invert = T)]
identical(colnames(Clark2020.betas) , (Clark2020.metadata$title %>% gsub(": Newcastle Early Arthritis Clinic patient \\d+","",.)))
Clark2020.betas <- Clark2020.betas[,gsub(": Newcastle Early Arthritis Clinic patient \\d+","",Clark2020.metadata$title)]
colnames(Clark2020.betas) <- Clark2020.metadata$geo_accession
head(Clark2020.betas);dim(Clark2020.betas)

##fcpgs
table(rownames(fCpGs.betas) %in% rownames(Clark2020.betas))
fCpGs.Clark2020 <- Clark2020.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(Clark2020.betas)],]
head(fCpGs.Clark2020);dim(fCpGs.Clark2020)
identical(colnames(fCpGs.Clark2020),Clark2020.metadata$geo_accession)


##complete with fCPGs stats
Clark2020.metadata$SD.fCpGs <- apply(fCpGs.Clark2020,2,sd,na.rm=T)
Clark2020.metadata$mean.fCpGs <- apply(fCpGs.Clark2020,2,mean,na.rm=T)
head(Clark2020.metadata);dim(Clark2020.metadata)

##cell franctions
Clark2020.metadata<- 
  cbind(Clark2020.metadata,
        epidish(beta.m = Clark2020.betas,ref.m = cent12CT450k.m)$estF
  )
head(Clark2020.metadata)

Age distribution

Clark2020.metadata %>% head
Clark2020.metadata %>% dim
Clark2020.metadata %>% count(source_name_ch1)
Clark2020.metadata %>% count(characteristics_ch1)
Clark2020.metadata %>% count(characteristics_ch1_2)
Clark2020.metadata %>% count(first_diagnosis_ch1,working_diagnosis_ch1)



Clark2020.metadata %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Clark et al 2020, N=",nrow(Clark2020.metadata)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

There is a clear increase in SD during aging, while mean remain stable

Clark2020.metadata %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  geom_point(aes(fill=characteristics_ch1_2),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1_2)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  Clark2020.metadata %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=characteristics_ch1_2),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1_2)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

We next plot a heatmap and see an overall polyclonal population of cells regardless of inflamatory condition.

Clark2020.metadata %>% head
Clark2020.metadata$description

##
## HEATMAP with sorted ageed samples
##

h.fCpGs <- Heatmap(fCpGs.Clark2020,
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=Clark2020.metadata %>%
                       select(Source=source_name_ch1,Working_diagnosis=working_diagnosis_ch1,age,Sex=gender_ch1),
                     col=list(
                       Source=setNames(kelly(Clark2020.metadata$source_name_ch1 %>% unique() %>% length()),
                                          Clark2020.metadata$source_name_ch1 %>% unique()
                       ),
                       Working_diagnosis=setNames(tableau20(Clark2020.metadata$working_diagnosis_ch1 %>% unique() %>% length()),
                                                Clark2020.metadata$working_diagnosis_ch1 %>% unique()
                       ),
                       Sex=setNames(c("deepskyblue","pink2"),c("Male","Female")),
                       age=colorRamp2(breaks = seq(min(Clark2020.metadata$age),
                                                   max(Clark2020.metadata$age),
                                                   length.out=100),
                                      brewer.blues(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)


draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4.2 Whole blood samples

We next follow the analyses with whole blood samples, accounting for know changes in cell type compositions during aging.

4.2.1 Pediatric samples

4.2.1.1 GSE36054 - Alisch 2015

We next load data from pediatric donors available at GSE36054

## 2. padiatric with ethnicity
gse <- getGEO(GEO = "GSE36054",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE36054_series_matrix.txt.gz
show(gse)

## phenodata
pediatrics.metadata <- 
  pData(phenoData(gse)) %>% 
  clean_names()

pediatrics.metadata %>% count(ethnicity_ch1)
pediatrics.metadata %>% count(source_name_ch1)
pediatrics.metadata %>% count(characteristics_ch1,ethnicity_ch1)

## get meth values
table(rownames(fCpGs.betas) %in% rownames(exprs(gse)))
pediatrics.betas <- exprs(gse)
fCpGs.pediatrics <- pediatrics.betas[rownames(fCpGs.betas),] %>% as.data.frame()
identical(colnames(fCpGs.pediatrics),pediatrics.metadata$geo_accession)

##complete with fCPGs stats
pediatrics.metadata$SD.fCpGs <- apply(fCpGs.pediatrics,2,sd,na.rm=T) 
pediatrics.metadata$mean.fCpGs <- apply(fCpGs.pediatrics,2,mean,na.rm=T)
class(pediatrics.metadata)


## complete cell types
pediatrics.metadata<- 
  cbind(pediatrics.metadata,
        epidish(beta.m = pediatrics.betas,ref.m = cent12CT450k.m)$estF
  )
head(pediatrics.metadata)

Age distribution

##
## PEDIATRIC POPULATION use other data sets to further strength previous results, and to show the same in pediatric population
##

# GSE36054 dataset

pediatrics.metadata %>% head
pediatrics.metadata %>% dim
pediatrics.metadata <- 
  pediatrics.metadata %>% 
  mutate(age=as.numeric(age_at_collection_months_ch1)/12)
pediatrics.metadata %>% count(characteristics_ch1_7)
pediatrics.metadata %>% count(cell_type_ch1)
pediatrics.metadata %>%  count(characteristics_ch1,ethnicity_ch1)
pediatrics.metadata %>% filter(is.na(age)) %>% count(characteristics_ch1,ethnicity_ch1)
head(pediatrics.betas);dim(pediatrics.betas)

pediatrics.metadata.plot <-
  pediatrics.metadata %>% 
  filter(!is.na(age),characteristics_ch1!="group: Harvard.Sib.Failed")

##plot age distribution in pediatrics
pediatrics.metadata.plot %>% head

pediatrics.metadata.plot %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Alisch et al, Genome Research 2015, N=",nrow(pediatrics.metadata.plot)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Contrary to adult donors, in these samples there is no association of SD with the age of the donors

pediatrics.metadata.plot %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=cell_type_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~cell_type_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  pediatrics.metadata.plot %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=cell_type_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~cell_type_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Also when considering the cellular composition of blood (a subtle signal already appears though in SD!)

## adjusting for cellular fractions
pediatrics.metadata.plot %>%
  reshape2::melt(measure.vars =  c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
       id.vars = "age") %>%
  ggplot(aes(x=age,
             y=value,
             fill=variable
  ))+
  geom_point(pch=21,size=1,stroke=0.1)+
  geom_smooth(method = "lm",color="black")+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
                              )
               ),
           # label.x.npc = 0.2,label.y.npc = 1,
           method = "pearson",
           size=1.25)+
  scale_fill_manual(values = cols25())+
  ylab("Cell fraction")+
  facet_wrap(.~variable,scales = "free")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        # legend.spacing.y = unit(3,"pt"),
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
        )

##liner model corrected by cell fracts and ethnicity
# pediatrics.metadata %>% head
# pediatrics.metadata %>% count(ethnicity_ch1)

lm.fit <- 
  pediatrics.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  filter(ethnicity!="Unknown") %>% 
  lm(SD.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
                 )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 129 -6e-04 -1e-03, -8e-05 2e-02 1e-01
ethnicity




    Asian 3 — —

    Black 74 -2e-03 -1e-02, 8e-03 7e-01 7e-01
    Other 44 -6e-03 -2e-02, 3e-03 2e-01 4e-01
    White 8 -7e-03 -2e-02, 3e-03 2e-01 4e-01
Mono 129 3e-02 -1e-01, 2e-01 7e-01 7e-01
Neu 129 8e-02 -4e-02, 2e-01 2e-01 4e-01
Baso 129 4e-01 1e-01, 7e-01 6e-03 5e-02
Eos 129 1e-01 -1e-02, 2e-01 8e-02 3e-01
NK 129 3e-02 -1e-01, 2e-01 7e-01 7e-01
CD8Tnv 129 5e-02 -9e-02, 2e-01 5e-01 6e-01
CD8Tmem 129 6e-02 -8e-02, 2e-01 4e-01 6e-01
CD4Tnv 129 1e-01 -1e-02, 2e-01 9e-02 3e-01
CD4Tmem 129 3e-01 1e-01, 4e-01 2e-04 3e-03
Treg 129 1e-01 -9e-02, 3e-01 3e-01 4e-01
Bnv 129 5e-02 -8e-02, 2e-01 5e-01 6e-01
Bmem 129



Abbreviation: CI = Confidence Interval
R² = 0.525; Adjusted R² = 0.462; Sigma = 0.008; Statistic = 8.32; p-value = <0.001; df = 15; Log-likelihood = 454; AIC = -874; BIC = -825; Deviance = 0.007; Residual df = 113; No. Obs. = 129
1 Benjamini & Hochberg correction for multiple testing
##mean fcpg

lm.fit <- 
  pediatrics.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  filter(ethnicity!="Unknown") %>% 
  lm(mean.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 129 1e-03 -3e-04, 2e-03 1e-01 5e-01
ethnicity




    Asian 3 — —

    Black 74 -5e-03 -3e-02, 2e-02 7e-01 7e-01
    Other 44 5e-04 -2e-02, 3e-02 1e+00 1e+00
    White 8 -1e-02 -4e-02, 2e-02 4e-01 5e-01
Mono 129 -2e-01 -6e-01, 2e-01 3e-01 5e-01
Neu 129 -2e-01 -5e-01, 1e-01 3e-01 5e-01
Baso 129 -7e-01 -2e+00, 1e-01 8e-02 5e-01
Eos 129 -2e-01 -6e-01, 1e-01 2e-01 5e-01
NK 129 -1e-01 -5e-01, 3e-01 5e-01 6e-01
CD8Tnv 129 -2e-01 -5e-01, 2e-01 4e-01 5e-01
CD8Tmem 129 -2e-01 -6e-01, 2e-01 4e-01 5e-01
CD4Tnv 129 -2e-01 -5e-01, 2e-01 3e-01 5e-01
CD4Tmem 129 -2e-01 -5e-01, 2e-01 3e-01 5e-01
Treg 129 -5e-01 -1e+00, 6e-02 8e-02 5e-01
Bnv 129 -1e-01 -4e-01, 2e-01 5e-01 6e-01
Bmem 129



Abbreviation: CI = Confidence Interval
R² = 0.128; Adjusted R² = 0.012; Sigma = 0.020; Statistic = 1.10; p-value = 0.4; df = 15; Log-likelihood = 327; AIC = -620; BIC = -572; Deviance = 0.047; Residual df = 113; No. Obs. = 129
1 Benjamini & Hochberg correction for multiple testing

In the heamtap we can also obseerve there is no association with the ethnicity, nor sex.

##
### HEATMAP FCPGS
##
pediatrics.metadata.plot %>% head()

h.fCpGs <- Heatmap(fCpGs.pediatrics %>% 
                     select( pediatrics.metadata.plot %>% pull(geo_accession)),
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=pediatrics.metadata.plot %>%
                       select(ethnicity=ethnicity_ch1,age,Sex=gender_ch1),
                     col=list(
                       ethnicity=setNames(tableau20(pediatrics.metadata.plot %>%
                                                     pull(ethnicity_ch1) %>%
                                                     unique() %>% na.omit() %>% 
                                                     length()
                                                    ),
                                          pediatrics.metadata.plot %>%
                                            pull(ethnicity_ch1) %>%na.omit() %>% 
                                            unique()
                                          ),
                       Sex=setNames(c("deepskyblue","pink2"),c("M","F")),
                       age=colorRamp2(breaks = seq(min(pediatrics.metadata.plot$age),
                                                   max(pediatrics.metadata.plot$age),
                                                   length.out=100),
                                      brewer.blues(100))
                       # Neu=colorRamp2(breaks = seq_range(range(pediatrics.metadata.plot$Neu),length.out = 100),brewer.orrd(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)


h.fCpGs <- draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4.2.2 Adult and centenarian samples

4.2.2.1 GSE40279 - Hannum 2012

We load Hannum dataset comprising 20-100 year old donors, available thorough GSE40279

##
## GSE40279
##

##download Hannum from GEO GSE40279
gse <- getGEO(GEO = "GSE40279",destdir = "../Revision/Data/450k-Aging") 
gse <- gse$GSE40279_series_matrix.txt.gz
show(gse)

## phenodata
hannum.metadata <- 
  pData(phenoData(gse)) %>% 
  clean_names()
hannum.metadata %>% head
hannum.metadata %>% select(contains("age")) %>% head
hannum.metadata <- 
  hannum.metadata %>% 
  mutate(age=as.numeric(age_y_ch1))


## get meth values
table(rownames(fCpGs.betas) %in% rownames(exprs(gse)))
hannum.betas <- exprs(gse)
head(hannum.betas)
colnames(hannum.betas)
identical(colnames(hannum.betas),hannum.metadata$geo_accession)

# calculate SD and mean for all samples for fCpGs
table(rownames(fCpGs.betas) %in% rownames(hannum.betas))
fCpGs.hannum <- hannum.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(hannum.betas)],]
identical(colnames(hannum.betas),hannum.metadata$geo_accession)


##complete with fCPGs stats
hannum.metadata$SD.fCpGs <- apply(fCpGs.hannum,2,sd,na.rm=T)
hannum.metadata$mean.fCpGs <- apply(fCpGs.hannum,2,mean,na.rm=T)


## complete cellular proportions
hannum.metadata<- 
  cbind(hannum.metadata,
        epidish(beta.m = hannum.betas,ref.m = cent12CT450k.m)$estF
        )
head(hannum.metadata)

Age distribution

hannum.metadata %>% head
hannum.metadata %>% dim
hannum.metadata %>% count(characteristics_ch1_4)
head(hannum.betas);dim(hannum.betas)


##plot age distribution in hannum
hannum.metadata %>% head

hannum.metadata %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Hannum et al, Mol Cell 2013, N=",nrow(hannum.metadata)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

mean and SD fCPG. We can see there is a clear correlation between SD methyaltion of fCpGs and age!

hannum.metadata %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=characteristics_ch1_5),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1_5)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  hannum.metadata %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=characteristics_ch1_5),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1_5)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

The subtle anticorrelation with mean disappear when adjusting for cell types, while SD remains highly significant.

## adjusting for cellular fractions
hannum.metadata %>%
  reshape2::melt(measure.vars =  c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
                 id.vars = "age") %>%
  ggplot(aes(x=age,
             y=value
  ))+
  geom_point(aes(fill=variable),
             pch=21,size=1,stroke=0.1
             )+
  geom_smooth(method = "lm",color="black")+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  # label.x.npc = 0.2,label.y.npc = 1,
  method = "pearson",
  size=1.25)+
  scale_fill_manual(values = cols25())+
  ylab("Cell fraction")+
  facet_wrap(.~variable,scales = "free")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        # legend.spacing.y = unit(3,"pt"),
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

##liner model corrected by cell fracts and ethnicity
# hannum.metadata %>% head
# hannum.metadata %>% count(ethnicity_ch1)

lm.fit <- 
  hannum.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  lm(SD.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 656 4e-04 3e-04, 5e-04 4e-14 6e-13
ethnicity




    Caucasian - European 426 — —

    Hispanic - Mexican 230 -2e-04 -3e-03, 3e-03 9e-01 9e-01
Mono 656 5e-02 -3e-02, 1e-01 2e-01 3e-01
Neu 656 8e-02 1e-02, 1e-01 2e-02 6e-02
Baso 656 3e-01 1e-01, 5e-01 4e-04 2e-03
Eos 656 7e-02 -2e-02, 2e-01 1e-01 2e-01
NK 656 2e-02 -7e-02, 1e-01 7e-01 7e-01
CD8Tnv 656 2e-01 4e-02, 3e-01 8e-03 3e-02
CD8Tmem 656 3e-02 -5e-02, 1e-01 4e-01 6e-01
CD4Tnv 656 7e-02 -4e-03, 2e-01 6e-02 1e-01
CD4Tmem 656 3e-02 -5e-02, 1e-01 5e-01 6e-01
Treg 656 3e-01 1e-01, 4e-01 6e-04 3e-03
Bnv 656 7e-02 -2e-02, 2e-01 1e-01 2e-01
Bmem 656



Abbreviation: CI = Confidence Interval
R² = 0.228; Adjusted R² = 0.212; Sigma = 0.014; Statistic = 14.6; p-value = <0.001; df = 13; Log-likelihood = 1,889; AIC = -3,747; BIC = -3,680; Deviance = 0.121; Residual df = 642; No. Obs. = 656
1 Benjamini & Hochberg correction for multiple testing
##mean fcpg

lm.fit <- 
  hannum.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  filter(ethnicity!="Unknown") %>% 
  lm(mean.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 656 7e-06 -1e-04, 1e-04 9e-01 9e-01
ethnicity




    Caucasian - European 426 — —

    Hispanic - Mexican 230 2e-03 -2e-03, 5e-03 3e-01 5e-01
Mono 656 -5e-02 -2e-01, 4e-02 3e-01 5e-01
Neu 656 -2e-02 -1e-01, 6e-02 6e-01 7e-01
Baso 656 9e-02 -1e-01, 3e-01 4e-01 6e-01
Eos 656 -7e-02 -2e-01, 4e-02 2e-01 5e-01
NK 656 -2e-02 -1e-01, 9e-02 7e-01 7e-01
CD8Tnv 656 1e-01 -2e-02, 3e-01 1e-01 4e-01
CD8Tmem 656 2e-01 8e-02, 3e-01 2e-04 3e-03
CD4Tnv 656 1e-01 2e-03, 2e-01 4e-02 3e-01
CD4Tmem 656 7e-02 -3e-02, 2e-01 2e-01 5e-01
Treg 656 -4e-02 -2e-01, 2e-01 7e-01 7e-01
Bnv 656 5e-02 -5e-02, 2e-01 3e-01 5e-01
Bmem 656



Abbreviation: CI = Confidence Interval
R² = 0.313; Adjusted R² = 0.299; Sigma = 0.017; Statistic = 22.5; p-value = <0.001; df = 13; Log-likelihood = 1,747; AIC = -3,464; BIC = -3,396; Deviance = 0.187; Residual df = 642; No. Obs. = 656
1 Benjamini & Hochberg correction for multiple testing

Heatmap

##
### HEATMAP FCPGS
##
hannum.metadata %>% head()

h.fCpGs <- Heatmap(fCpGs.hannum,
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=hannum.metadata %>%
                       select(ethnicity=ethnicity_ch1,age,Sex=gender_ch1),
                     col=list(
                       ethnicity=setNames(kelly(hannum.metadata %>%
                                                      pull(ethnicity_ch1) %>%
                                                      unique() %>% na.omit() %>% 
                                                      length()
                       ),
                       hannum.metadata %>%
                         pull(ethnicity_ch1) %>%na.omit() %>% 
                         unique()
                       ),
                       Sex=setNames(c("deepskyblue","pink2"),c("M","F")),
                       age=colorRamp2(breaks = seq(min(hannum.metadata$age),
                                                   max(hannum.metadata$age),
                                                   length.out=100),
                                      brewer.blues(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)


draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4.2.2.2 GSE55763 - Lehne 2015

Large dataset from adults and elderly people, available in GSE55763. We download processed data matrix and load it separately.

##
# 9.   Lehen cohort cohorts genome biology 2015 ≥ 2500 samples, age assoc.
##

gse <- getGEO(GEO = "GSE55763",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE55763_series_matrix.txt.gz
show(gse)

## phenodata
lehne2015.metadata <- pData(phenoData(gse)) %>% clean_names()
lehne2015.metadata %>% head
lehne2015.metadata$age <- as.numeric(lehne2015.metadata$age_ch1)
rownames(lehne2015.metadata)

## get meth values
lehne2015.betas <- fread("../Revision/Data/450k-Aging/GSE55763_normalized_betas.txt.gz",data.table = F)
head(lehne2015.betas);dim(lehne2015.betas);gc()
rownames(lehne2015.betas) <- lehne2015.betas$ID_REF
lehne2015.betas <- lehne2015.betas[,grep("Detection|ID_REF",colnames(lehne2015.betas),value = T,invert = T)]
all(gsub("Peripheral blood, ","",lehne2015.metadata$title) %in% colnames(lehne2015.betas))
all(colnames(lehne2015.betas) %in% gsub("Peripheral blood, ","",lehne2015.metadata$title))
lehne2015.betas <- lehne2015.betas[,gsub("Peripheral blood, ","",lehne2015.metadata$title)]
colnames(lehne2015.betas) <- lehne2015.metadata$geo_accession
head(lehne2015.betas);dim(lehne2015.betas)

##fcpgs
table(rownames(fCpGs.betas) %in% rownames(lehne2015.betas))
fCpGs.lehne2015 <- lehne2015.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(lehne2015.betas)],]
head(fCpGs.lehne2015);dim(fCpGs.lehne2015)
identical(colnames(fCpGs.lehne2015),lehne2015.metadata$geo_accession)
gc()

##complete with fCPGs stats
lehne2015.metadata$SD.fCpGs <- apply(fCpGs.lehne2015,2,sd,na.rm=T)
lehne2015.metadata$mean.fCpGs <- apply(fCpGs.lehne2015,2,mean,na.rm=T)
head(lehne2015.metadata);dim(lehne2015.metadata)

##cell fractions
lehne2015.metadata<- 
  cbind(lehne2015.metadata,
        epidish(beta.m = lehne2015.betas,ref.m = cent12CT450k.m)$estF
  )
head(lehne2015.metadata)

Age distribution

lehne2015.metadata %>% head
lehne2015.metadata %>% dim
lehne2015.metadata %>% count(characteristics_ch1)
lehne2015.metadata %>% count(characteristics_ch1_1)
lehne2015.metadata %>% count(description)
head(lehne2015.betas);dim(lehne2015.betas)

## remove techinical replicates
lehne2015.metadata.plot <- 
  lehne2015.metadata %>% 
  filter(characteristics_ch1_1=="dataset: population study")

##plot age distribution in lehne2015
lehne2015.metadata.plot %>% head
lehne2015.metadata.plot %>% dim

lehne2015.metadata.plot %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from lehne 2015 et al, Genome Biology , N=",nrow(lehne2015.metadata.plot)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Mean and SD fCpG methyaltion

lehne2015.metadata.plot %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=characteristics_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  lehne2015.metadata.plot %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=characteristics_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Adjusting cell types

## adjusting for cellular fractions
lehne2015.metadata.plot %>%
  reshape2::melt(measure.vars =  c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
                 id.vars = "age") %>%
  ggplot(aes(x=age,
             y=value
  ))+
  geom_point(aes(fill=variable),
             pch=21,size=1,stroke=0.1
  )+
  geom_smooth(method = "lm",color="black")+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  # label.x.npc = 0.2,label.y.npc = 1,
  method = "pearson",
  size=1.25)+
  scale_fill_manual(values = cols25())+
  ylab("Cell fraction")+
  facet_wrap(.~variable,scales = "free")+
  theme_classic()+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        # legend.spacing.y = unit(3,"pt"),
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

##liner model corrected by cell fracts and ethnicity
# lehne2015.metadata.plot %>% head


lm.fit <- 
  lehne2015.metadata.plot %>%
  lm(SD.fCpGs ~ 
       age+ 
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem,
     data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(lehne2015.metadata$age,lehne2015.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 2,639 2e-04 2e-04, 3e-04 4e-47 5e-46
Mono 2,639 2e-02 -7e-03, 5e-02 2e-01 5e-01
Neu 2,639 1e-02 -1e-02, 3e-02 4e-01 6e-01
Baso 2,639 4e-03 -5e-02, 6e-02 9e-01 9e-01
Eos 2,639 1e-02 -1e-02, 4e-02 3e-01 6e-01
NK 2,639 -1e-02 -4e-02, 2e-02 4e-01 6e-01
CD8Tnv 2,639 -4e-03 -4e-02, 3e-02 8e-01 9e-01
CD8Tmem 2,639 -3e-02 -5e-02, -2e-03 4e-02 1e-01
CD4Tnv 2,639 9e-03 -2e-02, 3e-02 5e-01 6e-01
CD4Tmem 2,639 -2e-02 -4e-02, 1e-02 2e-01 5e-01
Treg 2,639 -5e-02 -1e-01, -5e-03 3e-02 1e-01
Bnv 2,639 -1e-03 -3e-02, 2e-02 9e-01 9e-01
Bmem 2,639



Abbreviation: CI = Confidence Interval
R² = 0.146; Adjusted R² = 0.142; Sigma = 0.007; Statistic = 37.3; p-value = <0.001; df = 12; Log-likelihood = 9,230; AIC = -18,432; BIC = -18,349; Deviance = 0.142; Residual df = 2,626; No. Obs. = 2,639
1 Benjamini & Hochberg correction for multiple testing
##mean fcpg

lm.fit <- 
  lehne2015.metadata.plot %>%
  lm(mean.fCpGs ~ 
       age+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(lehne2015.metadata$age,lehne2015.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 2,639 1e-04 6e-05, 1e-04 1e-06 1e-05
Mono 2,639 -6e-02 -1e-01, -3e-02 8e-04 2e-03
Neu 2,639 -2e-02 -5e-02, 1e-02 2e-01 4e-01
Baso 2,639 -4e-02 -1e-01, 4e-02 4e-01 4e-01
Eos 2,639 -2e-02 -6e-02, 1e-02 2e-01 4e-01
NK 2,639 9e-04 -4e-02, 4e-02 1e+00 1e+00
CD8Tnv 2,639 3e-02 -2e-02, 7e-02 3e-01 4e-01
CD8Tmem 2,639 7e-02 3e-02, 1e-01 2e-04 7e-04
CD4Tnv 2,639 -4e-02 -7e-02, -4e-03 3e-02 7e-02
CD4Tmem 2,639 3e-02 -8e-03, 7e-02 1e-01 3e-01
Treg 2,639 2e-01 8e-02, 2e-01 2e-05 1e-04
Bnv 2,639 -2e-03 -4e-02, 3e-02 9e-01 1e+00
Bmem 2,639



Abbreviation: CI = Confidence Interval
R² = 0.176; Adjusted R² = 0.173; Sigma = 0.010; Statistic = 46.8; p-value = <0.001; df = 12; Log-likelihood = 8,309; AIC = -16,590; BIC = -16,508; Deviance = 0.285; Residual df = 2,626; No. Obs. = 2,639
1 Benjamini & Hochberg correction for multiple testing

Heatmap

##
### HEATMAP FCPGS
##

lehne2015.metadata.plot %>% head()

h.fCpGs <- Heatmap(fCpGs.lehne2015 %>% 
                     select( lehne2015.metadata.plot %>% pull(geo_accession)),
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=lehne2015.metadata.plot %>%
                       select(age,Sex=characteristics_ch1_2),
                     col=list(
                       Sex=setNames(c("deepskyblue","pink2"),c("gender: M","gender: F")),
                       age=colorRamp2(breaks = seq(min(lehne2015.metadata.plot$age),
                                                   max(lehne2015.metadata.plot$age),
                                                   length.out=100),
                                      brewer.blues(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   ),
                   use_raster = T, raster_quality = 10
)


draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4.2.2.3 GSE72773 - Horvath 2016

We next load dataset from Horvath 2016, available at GSE72773

gse <- getGEO(GEO = "GSE72773",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE72773_series_matrix.txt.gz
show(gse)

## phenodata
Horvath2016.metadata <- pData(phenoData(gse)) %>% clean_names()
Horvath2016.metadata %>% head
Horvath2016.metadata$age <- as.numeric(Horvath2016.metadata$age_ch1)
rownames(Horvath2016.metadata)

## get meth values
Horvath2016.betas <- fread("../Revision/Data/450k-Aging/GSE72773_datBetaNormalized.csv.gz",data.table = F,na.strings = c("","NA"))
sapply(Horvath2016.betas,class) %>% table()
any(is.na(Horvath2016.betas))
head(Horvath2016.betas);dim(Horvath2016.betas);gc()
rownames(Horvath2016.betas) <- Horvath2016.betas$ID
Horvath2016.betas <- Horvath2016.betas[,grep("^ID",colnames(Horvath2016.betas),invert = T)]
head(Horvath2016.betas);dim(Horvath2016.betas)


all(gsub("OldTsimane: genomic DNA from whole blood of subject ","",Horvath2016.metadata$title) %in% colnames(Horvath2016.betas))
all(colnames(Horvath2016.betas) %in% gsub("OldTsimane: genomic DNA from whole blood of subject ","",Horvath2016.metadata$title))

Horvath2016.betas <- 
  Horvath2016.betas[,gsub("OldTsimane: genomic DNA from whole blood of subject ","",Horvath2016.metadata$title)]
colnames(Horvath2016.betas) <- Horvath2016.metadata$geo_accession
head(Horvath2016.betas);dim(Horvath2016.betas)

##fcpgs

# some values are characters or NA, convert appropiately to numeric
# mat <- apply(fCpGs.Horvath2016[,which(sapply(fCpGs.Horvath2016,class)=="character")],2,as.numeric)
# apply(mat,2,function(x)any(is.na(x))) ##checked which samples gave NA, only 1 cpgs with NA value, so ok!
# sapply(fCpGs.Horvath2016,class)


table(rownames(fCpGs.betas) %in% rownames(Horvath2016.betas))
fCpGs.Horvath2016 <- 
  Horvath2016.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(Horvath2016.betas)],]

## some values are characters, convert to numeric!!
fCpGs.Horvath2016 <- as.data.frame(sapply(fCpGs.Horvath2016,as.numeric))
head(fCpGs.Horvath2016);dim(fCpGs.Horvath2016)
identical(colnames(fCpGs.Horvath2016),Horvath2016.metadata$geo_accession)


##complete with fCPGs stats
Horvath2016.metadata$SD.fCpGs <- apply(fCpGs.Horvath2016,2,sd,na.rm=T)
Horvath2016.metadata$mean.fCpGs <- apply(fCpGs.Horvath2016,2,mean,na.rm=T)
head(Horvath2016.metadata);dim(Horvath2016.metadata)

##cell fractions
all(rownames(cent12CT450k.m) %in% rownames(Horvath2016.betas))
horvath.mat.cells <- apply(Horvath2016.betas[rownames(cent12CT450k.m),],2,as.numeric)
head(horvath.mat.cells);dim(horvath.mat.cells)
rownames(horvath.mat.cells) <- rownames(cent12CT450k.m)


Horvath2016.metadata <- 
  cbind(Horvath2016.metadata,
        epidish(beta.m = horvath.mat.cells,ref.m = cent12CT450k.m)$estF
  )
head(Horvath2016.metadata)

Age distribution

Horvath2016.metadata %>% head
Horvath2016.metadata %>% dim
Horvath2016.metadata %>% count(source_name_ch1)
Horvath2016.metadata %>% count(ethnicity_ch1)
head(Horvath2016.betas);dim(Horvath2016.betas)


##plot age distribution in Horvath2016
Horvath2016.metadata %>% head

Horvath2016.metadata %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Horvath 2016 et al, Genome Biology 2016, N=",nrow(Horvath2016.metadata)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Mean and SD

Horvath2016.metadata %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=source_name_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~source_name_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  Horvath2016.metadata %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=source_name_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~source_name_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Adjusting for cell types

## adjusting for cellular fractions
Horvath2016.metadata %>%
  reshape2::melt(measure.vars =  c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
                 id.vars = "age") %>%
  ggplot(aes(x=age,
             y=value
  ))+
  geom_point(aes(fill=variable),
             pch=21,size=1,stroke=0.1
  )+
  geom_smooth(method = "lm",color="black")+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  # label.x.npc = 0.2,label.y.npc = 1,
  method = "pearson",
  size=1.25)+
  scale_fill_manual(values = cols25())+
  ylab("Cell fraction")+
  facet_wrap(.~variable,scales = "free")+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        # legend.spacing.y = unit(3,"pt"),
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

##liner model corrected by cell fracts and ethnicity
# Horvath2016.metadata %>% head
# Horvath2016.metadata %>% count(ethnicity_ch1)

lm.fit <- 
  Horvath2016.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  lm(SD.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 310 4e-04 2e-04, 5e-04 5e-08 7e-07
ethnicity




    Caucasian 235 — —

    Hispanic 38 -1e-03 -6e-03, 4e-03 6e-01 9e-01
    Tsimane 37 2e-02 6e-03, 2e-02 1e-03 8e-03
Mono 310 -2e-02 -2e-01, 2e-01 8e-01 9e-01
Neu 310 -3e-02 -2e-01, 1e-01 7e-01 9e-01
Baso 310 5e-02 -3e-01, 4e-01 8e-01 9e-01
Eos 310 -6e-02 -2e-01, 1e-01 5e-01 9e-01
NK 310 -7e-02 -3e-01, 1e-01 5e-01 9e-01
CD8Tnv 310 1e-01 -1e-01, 3e-01 4e-01 9e-01
CD8Tmem 310 -1e-01 -3e-01, 6e-02 2e-01 9e-01
CD4Tnv 310 -3e-02 -2e-01, 1e-01 7e-01 9e-01
CD4Tmem 310 -9e-02 -3e-01, 8e-02 3e-01 9e-01
Treg 310 6e-03 -2e-01, 2e-01 1e+00 1e+00
Bnv 310 -2e-02 -2e-01, 2e-01 8e-01 9e-01
Bmem 310



Abbreviation: CI = Confidence Interval
R² = 0.217; Adjusted R² = 0.180; Sigma = 0.014; Statistic = 5.85; p-value = <0.001; df = 14; Log-likelihood = 902; AIC = -1,772; BIC = -1,712; Deviance = 0.054; Residual df = 295; No. Obs. = 310
1 Benjamini & Hochberg correction for multiple testing
##mean fcpg

lm.fit <- 
  Horvath2016.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  lm(mean.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 310 -3e-04 -5e-04, -4e-06 5e-02 3e-01
ethnicity




    Caucasian 235 — —

    Hispanic 38 9e-03 -1e-03, 2e-02 9e-02 4e-01
    Tsimane 37 5e-03 -1e-02, 2e-02 6e-01 8e-01
Mono 310 1e-02 -3e-01, 4e-01 1e+00 1e+00
Neu 310 2e-03 -3e-01, 3e-01 1e+00 1e+00
Baso 310 5e-01 -1e-01, 1e+00 1e-01 4e-01
Eos 310 -3e-02 -4e-01, 3e-01 9e-01 1e+00
NK 310 -1e-01 -5e-01, 2e-01 5e-01 8e-01
CD8Tnv 310 2e-01 -2e-01, 6e-01 3e-01 8e-01
CD8Tmem 310 2e-01 -1e-01, 6e-01 2e-01 7e-01
CD4Tnv 310 -1e-01 -5e-01, 2e-01 5e-01 8e-01
CD4Tmem 310 1e-01 -2e-01, 5e-01 5e-01 8e-01
Treg 310 -6e-01 -1e+00, -1e-01 1e-02 2e-01
Bnv 310 1e-02 -4e-01, 4e-01 9e-01 1e+00
Bmem 310



Abbreviation: CI = Confidence Interval
R² = 0.209; Adjusted R² = 0.172; Sigma = 0.027; Statistic = 5.57; p-value = <0.001; df = 14; Log-likelihood = 689; AIC = -1,345; BIC = -1,285; Deviance = 0.214; Residual df = 295; No. Obs. = 310
1 Benjamini & Hochberg correction for multiple testing

Heatmap

##
### HEATMAP FCPGS
##
Horvath2016.metadata %>% head()

h.fCpGs <- Heatmap(fCpGs.Horvath2016,
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=Horvath2016.metadata %>%
                       select(ethnicity=ethnicity_ch1,age,Sex=characteristics_ch1),
                     col=list(
                       ethnicity=setNames(kelly(Horvath2016.metadata %>%
                                                  pull(ethnicity_ch1) %>%
                                                  unique() %>% na.omit() %>% 
                                                  length()
                       ),
                       Horvath2016.metadata %>%
                         pull(ethnicity_ch1) %>%na.omit() %>% 
                         unique()
                       ),
                       Sex=setNames(c("deepskyblue","pink2"),c("Sex: male","Sex: female")),
                       age=colorRamp2(breaks = seq(min(Horvath2016.metadata$age),
                                                   max(Horvath2016.metadata$age),
                                                   length.out=100),
                                      brewer.blues(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)


draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
     )

5 Extended Data Fig. 3

Here we collect 2 studies with sorted samples.

# figure manuscipt 2
Clark2020.metadata$source <- Clark2020.metadata$cell_type_ch1
Clark2020.metadata$Study <- "GSE137594"

sorted.age.metadata$Study <- "GSE184269"

sorted.manuscript <- bind_rows(sorted.age.metadata,Clark2020.metadata)
sorted.manuscript$source

sorted.manuscript %>% 
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  geom_point(
    aes(fill=source,
        shape=Study),
    size=0.75,
    stroke=0
  )+
  scale_shape_manual(values = c("GSE137594"=24,"GSE184269"=21))+
  geom_smooth(aes(fill=source),
              method = "lm",color="black",lwd=0.5,alpha=0.3
  )+
  scale_fill_manual(values = cols25())+
  facet_wrap(~source)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
  )
  ),
  label.y.npc="top",
  label.x.npc = "left",
  method = "pearson",
  size=1.25)+
  # stat_cor(method = "pearson",size=1.5)+
  guides(fill=guide_none(),
         shape=guide_legend(override.aes = list(stroke=0.25,color="grey0"))
         )+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(2,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) | 
  
  sorted.manuscript %>% 
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(
    aes(fill=source,
        shape=Study
        ),
    size=0.75,
    stroke=0
  )+
  scale_shape_manual(values = c("GSE137594"=24,"GSE184269"=21))+
  geom_smooth(aes(fill=source),
              method = "lm",color="black",lwd=0.5,alpha=0.3
  )+
  scale_fill_manual(values = cols25())+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
  )
  ),
  label.y.npc="top",
  label.x.npc = "left",
  method = "pearson",
  size=1.25)+
  facet_wrap(~source)+
  # stat_cor(method = "pearson",size=1.5)+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(2,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

6 Extended Data Fig. 3 and Supple.Fig. 7

We collect all blood samples across the distinct datasets, including GSE36054, GSE40279, GSE55763 and GSE72773 and perform the analyses with all samples. We correct for cell types, and show how mean fCpG remains stable when considering changes in cell types composition during aging, but fCpG SD increases, consistent with clonal expansions of the hematopoietic system during aging.

##
## Fig manuscipt 3 and 4, whole blood datasets including pediatrics, hannum, horvath2016 and lehne.
##

pediatrics.metadata.plot %>% head
hannum.metadata %>% head
Horvath2016.metadata %>% head
lehne2015.metadata.plot %>% head

wb.df <- bind_rows(cbind(pediatrics.metadata.plot,Study="GSE36054"),
                   cbind(hannum.metadata,Study="GSE40279"),
                   cbind(Horvath2016.metadata,Study="GSE72773"),
                   cbind(lehne2015.metadata.plot,Study="GSE55763")
                   )
wb.df %>% head


wb.df %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=Study),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25(),aesthetics = c("fill","color"))+
  stat_cor(
    aes(label = paste0(after_stat(r.label),
                       "~`,`~`P=`~",
                       p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
                       ),
        color=Study
        ),
    method = "pearson",
    size=1.5,label.y.npc = 0.1
  )+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
                              )
               ),
           method = "pearson",
           size=1.5)+
  annotate(geom = "text",x = 10,y=0.6,label=paste0("N=",nrow(wb.df)),size=2)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  wb.df %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=Study),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25(),aesthetics = c("fill","color"))+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
                              )
               ),
           method = "pearson",
           size=1.5)+
  stat_cor(
    aes(label = paste0(after_stat(r.label),
                       "~`,`~`P=`~",
                       p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
    ),
    color=Study
    ),
    method = "pearson",
    size=1.5,label.y.npc = 0.6
  )+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Adjust cell types

##multivariate lm model with cell fracs
##liner model corrected by cell fracts and ethnicity
# wb.df %>% head
# wb.df %>% count(ethnicity_ch1,Study)


lm.fit <- 
  wb.df %>%
  lm(SD.fCpGs ~ 
       age+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)+ggtitle()

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 3,738 4e-04 3e-04, 4e-04 1e-147 1e-146
Mono 3,738 6e-02 3e-02, 9e-02 6e-05 1e-04
Neu 3,738 6e-02 4e-02, 8e-02 2e-06 3e-06
Baso 3,738 1e-01 9e-02, 2e-01 9e-07 2e-06
Eos 3,738 7e-02 5e-02, 1e-01 7e-08 2e-07
NK 3,738 3e-02 -3e-03, 5e-02 8e-02 1e-01
CD8Tnv 3,738 1e-01 7e-02, 1e-01 1e-09 5e-09
CD8Tmem 3,738 2e-04 -3e-02, 3e-02 1e+00 1e+00
CD4Tnv 3,738 4e-02 1e-02, 7e-02 2e-03 3e-03
CD4Tmem 3,738 2e-02 -9e-03, 5e-02 2e-01 2e-01
Treg 3,738 2e-01 1e-01, 2e-01 4e-11 2e-10
Bnv 3,738 4e-02 1e-02, 7e-02 2e-03 3e-03
Bmem 3,738



Abbreviation: CI = Confidence Interval
R² = 0.321; Adjusted R² = 0.319; Sigma = 0.010; Statistic = 147; p-value = <0.001; df = 12; Log-likelihood = 11,952; AIC = -23,876; BIC = -23,789; Deviance = 0.365; Residual df = 3,725; No. Obs. = 3,738
1 Benjamini & Hochberg correction for multiple testing
##mean fcpg

lm.fit <- 
  wb.df %>%
  lm(mean.fCpGs ~ 
       age+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable
Explanatory variables N Beta 95% CI p-value q-value1
age 3,738 -2e-05 -6e-05, 3e-05 5e-01 5e-01
Mono 3,738 9e-03 -4e-02, 6e-02 7e-01 7e-01
Neu 3,738 5e-02 3e-03, 9e-02 4e-02 6e-02
Baso 3,738 1e-01 4e-02, 2e-01 5e-03 1e-02
Eos 3,738 5e-02 6e-03, 1e-01 3e-02 5e-02
NK 3,738 4e-02 -9e-03, 9e-02 1e-01 1e-01
CD8Tnv 3,738 3e-01 3e-01, 4e-01 2e-26 2e-25
CD8Tmem 3,738 1e-01 9e-02, 2e-01 7e-09 3e-08
CD4Tnv 3,738 4e-02 -8e-03, 9e-02 1e-01 1e-01
CD4Tmem 3,738 8e-02 3e-02, 1e-01 2e-03 5e-03
Treg 3,738 4e-01 3e-01, 5e-01 3e-19 2e-18
Bnv 3,738 1e-01 8e-02, 2e-01 4e-07 1e-06
Bmem 3,738



Abbreviation: CI = Confidence Interval
R² = 0.199; Adjusted R² = 0.196; Sigma = 0.017; Statistic = 77.0; p-value = <0.001; df = 12; Log-likelihood = 9,857; AIC = -19,686; BIC = -19,599; Deviance = 1.12; Residual df = 3,725; No. Obs. = 3,738
1 Benjamini & Hochberg correction for multiple testing
fCpGs.betas %>% dim
fCpGs.metadata$supplementary_table_2 %>% count(cell_type_annot_1)
fCpGs.metadata$supplementary_table_2 %>% 
  filter(cell_type_annot_1!="Normal_lymphoid_cell",
         cell_type_annot_1!="PBMCs",cell_type_annot_1!="Whole_blood") %>% 
  count(cell_type_annot_3)

tumors <- 
  fCpGs.metadata$supplementary_table_2 %>% 
  filter(cell_type_annot_1!="Normal_lymphoid_cell",
         cell_type_annot_1!="PBMCs",cell_type_annot_1!="Whole_blood")
tumors %>% dim

tumors$SD.fCpGs <- apply(fCpGs.betas[,tumors$participant_id_anonymous],2,sd)
tumors$Group <- tumors$cell_type_annot_3


##merge data frame
wb.df$Group <- cut(wb.df$age,breaks = c(0,30,50,70,80,max(wb.df$age)),include.lowest = T)
wb.df$Group %>% table

df.age <- bind_rows(wb.df,tumors)
df.age$Group <- factor(df.age$Group,levels = c("[0,30]","(30,50]","(50,70]","(70,80]","(80,101]",
                                "B-ALL","B-ALL-remission","B-ALL-relapse",
                                "T-ALL","T-ALL-remission","T-ALL-relapse",
                                "MCL","DLBCL-NOS","MBL","CLL","RT","MGUS","MM"
                                ))
df.age %>% count(Group)



cols.paper <- c(
  "30"="grey100",
  "50"="grey90",
  "70"="grey80",
  "90"="grey50",
  "101"="grey30",
  NA_values="#666666",
  HPC="#E6E6E6",
  pre.Bcell="#D5D5D5",
  NBC="#C3C3C3",
  GCB="#AEAEAE",
  tPC="#969696",
  MBC="#787878",
  bmPC="#4D4D4D",
  Bcell="#C3C3C3",
  Tcell="#F0F8FF",
  PBMCs="#FFF8DC",
  Whole_blood="#CDC8B1",
  "Whole blood"="#CDC8B1",
  Normal_lymphoid_cell="#D5D5D5",
  Normal_myeloid_cell="#E6E6FA",
  "T-ALL"="#CD3333",
  "T-ALL-remission"="#C7A5A5",
  "T-ALL-relapse"="#CD3333",
  "B-ALL"="#E69F00",
  "B-ALL-remission"="#D8CAAB",
  "B-ALL-relapse"="#E69F00",
  MCL="#F0E442",
  C1="#F5D51B",
  cMCL="#F5D51B",
  C2="#0072B2",
  nnMCL="#0072B2",
  "DLBCL-NOS"="#56B4E9",
  MBL="#5A9A89",
  CLL="#009E73",
  RT="#02C590",
  unmutated="#e65100",
  mutated="#361379",
  nCLL="#006E93",
  iCLL="#FDC010",
  mCLL="#963736",
  Disease.Unclassified="#CCCCCC",
  MGUS="#BF99AE",
  MM="#CC79A7"
)




df.age %>% 
  ggplot(aes(x=Group,
             y=SD.fCpGs
             ))+
  geom_boxplot(aes(fill=Group),outlier.size = 0.5,lwd=0.3)+
  scale_fill_manual(values = cols.paper)+
  stat_compare_means(method = "t.test",
                     comparisons = list(c("[0,30]","(30,50]"),
                                                          c("(30,50]","(50,70]"),
                                                          c("(50,70]","(70,80]"),
                                                          c("(70,80]","(80,101]"),
                                                          c("[0,30]","(80,101]")
                                                          ),
                     bracket.size = 0.2,
                     size=1,label.y=0.3,step.increase = 0.04)+
  xlab(NULL)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text.x = element_text(color="grey0",angle = 44,hjust = 1,vjust = 1),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

7 Session info

print(sessionInfo())
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] broom.helpers_1.21.0  broom_1.0.9           patchwork_1.3.2      
##  [4] stringr_1.5.1         gtExtras_0.6.0        gtsummary_2.4.0      
##  [7] gt_1.0.0              ggstats_0.10.0        openxlsx_4.2.8       
## [10] EpiDISH_2.24.0        circlize_0.4.16       ComplexHeatmap_2.24.1
## [13] GEOquery_2.76.0       Biobase_2.68.0        BiocGenerics_0.54.0  
## [16] generics_0.1.4        pals_1.10             ggpubr_0.6.1         
## [19] ggplot2_3.5.2         dplyr_1.1.4           janitor_2.2.1        
## [22] data.table_1.17.8     BiocStyle_2.36.0     
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3          rstudioapi_0.17.1          
##   [3] jsonlite_2.0.0              shape_1.4.6.1              
##   [5] magrittr_2.0.3              magick_2.8.7               
##   [7] farver_2.1.2                rmarkdown_2.29             
##   [9] GlobalOptions_0.1.2         vctrs_0.6.5                
##  [11] locfdr_1.1-8                Cairo_1.6-5                
##  [13] paletteer_1.6.0             rstatix_0.7.2.999          
##  [15] tinytex_0.57                forcats_1.0.0              
##  [17] htmltools_0.5.8.1           S4Arrays_1.8.1             
##  [19] haven_2.5.5                 curl_7.0.0                 
##  [21] SparseArray_1.8.1           Formula_1.2-5              
##  [23] sass_0.4.10                 bslib_0.9.0                
##  [25] fontawesome_0.5.3           plyr_1.8.9                 
##  [27] lubridate_1.9.4             cachem_1.1.0               
##  [29] commonmark_2.0.0            lifecycle_1.0.4            
##  [31] iterators_1.0.14            pkgconfig_2.0.3            
##  [33] Matrix_1.7-4                R6_2.6.1                   
##  [35] fastmap_1.2.0               GenomeInfoDbData_1.2.14    
##  [37] MatrixGenerics_1.20.0       snakecase_0.11.1           
##  [39] clue_0.3-66                 digest_0.6.37              
##  [41] colorspace_2.1-1            rematch2_2.1.2             
##  [43] S4Vectors_0.46.0            GenomicRanges_1.60.0       
##  [45] labeling_0.4.3              timechange_0.3.0           
##  [47] httr_1.4.7                  abind_1.4-8                
##  [49] mgcv_1.9-3                  compiler_4.5.0             
##  [51] proxy_0.4-27                withr_3.0.2                
##  [53] doParallel_1.0.17           backports_1.5.0            
##  [55] carData_3.0-5               R.utils_2.13.0             
##  [57] maps_3.4.3                  ggsignif_0.6.4             
##  [59] MASS_7.3-65                 DelayedArray_0.34.1        
##  [61] rjson_0.2.23                tools_4.5.0                
##  [63] zip_2.3.3                   rentrez_1.2.4              
##  [65] R.oo_1.27.1                 glue_1.8.0                 
##  [67] quadprog_1.5-8              nlme_3.1-168               
##  [69] reshape2_1.4.4              cluster_2.1.8.1            
##  [71] gtable_0.3.6                labelled_2.14.1            
##  [73] tzdb_0.5.0                  R.methodsS3_1.8.2          
##  [75] class_7.3-23                tidyr_1.3.1                
##  [77] hms_1.1.3                   xml2_1.4.0                 
##  [79] car_3.1-3                   XVector_0.48.0             
##  [81] markdown_2.0                foreach_1.5.2              
##  [83] pillar_1.11.0               limma_3.64.3               
##  [85] splines_4.5.0               lattice_0.22-7             
##  [87] tidyselect_1.2.1            knitr_1.50                 
##  [89] litedown_0.7                bookdown_0.44              
##  [91] IRanges_2.42.0              SummarizedExperiment_1.38.1
##  [93] stats4_4.5.0                xfun_0.53                  
##  [95] statmod_1.5.0               matrixStats_1.5.0          
##  [97] stringi_1.8.7               UCSC.utils_1.4.0           
##  [99] yaml_2.3.10                 evaluate_1.0.5             
## [101] codetools_0.2-20            tibble_3.3.0               
## [103] BiocManager_1.30.26         cli_3.6.5                  
## [105] jquerylib_0.1.4             dichromat_2.0-0.1          
## [107] Rcpp_1.1.0                  GenomeInfoDb_1.44.2        
## [109] mapproj_1.2.12              png_0.1-8                  
## [111] XML_3.99-0.19               fastcluster_1.3.0          
## [113] parallel_4.5.0              readr_2.1.5                
## [115] scales_1.4.0                e1071_1.7-16               
## [117] purrr_1.1.0                 crayon_1.5.3               
## [119] GetoptLong_1.0.5            rlang_1.1.6                
## [121] cards_0.7.0