We use several datasets available, including whole blood and purified cell types to study fCpG methylation changes throughout human lifespan. Collectively, we show how fluctuations remain stable during human lifespan, and an increase in standard deviation in samples, consistent with know cloncal expansions in the hematopoietic system during aging.

1 Load packages and fCpGs information

##
## Load necessary packages and set global options
##

library(data.table)
library(janitor)
library(dplyr)
library(ggpubr)
library(pals)
library(GEOquery)
library(ComplexHeatmap)
library(circlize)
library(EpiDISH)
library(openxlsx)
library(ggstats)
library(gt)
library(gtsummary)
library(gtExtras)
library(stringr)
library(patchwork)
library(ggstats)
library(gtsummary)
library(broom)
library(broom.helpers)



options(stringsAsFactors = F,max.print = 10000,error=NULL,
        "openxlsx.dateFormat" = "dd/mm/yyyy"
)


##
## 1. load fCpGs
##

fCpGs.betas <- 
  fread("../Data/FMC/Final_data/beta_all_samples_fcpgs_laplacian.csv",data.table = F)
rownames(fCpGs.betas) <- fCpGs.betas$V1
fCpGs.betas <- fCpGs.betas[,-1]
fCpGs.betas[1:5,1:15]
dim(fCpGs.betas)  

sheets <- getSheetNames("../final_revision/Supplementary_Tables_R3.xlsx")
sheets <- setNames(sheets,sheets)
sheets

fCpGs.metadata <- lapply(sheets,function(i){
  writeLines(i)
  openxlsx::read.xlsx("../final_revision/Supplementary_Tables_R3.xlsx",sheet = i,detectDates = T,startRow = 3)
})

names(fCpGs.metadata)
fCpGs.metadata$supplementary_table_2 %>% head


## load normal lymphoid samples used for the supplement.
fCpGs.metadata.all <- openxlsx::read.xlsx("../Data/FMC.blood.samples.v2.4.xlsx",detectDates = T)
head(fCpGs.metadata.all);dim(fCpGs.metadata.all)

fCpGs.metadata.all %>% count(CELL_TYPE,CELL_TYPE_ANNOT.1,CELL_TYPE_ANNOT.2)

all(colnames(fCpGs.betas) %in% fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS)
table(fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS %in% colnames(fCpGs.betas) )

## remove bad quality samples and leave those with meth info
fCpGs.metadata.all <- fCpGs.metadata.all[match(colnames(fCpGs.betas),fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS),]

fCpGs.metadata.all %>% dim

2 Cell fractions in blood

We will use the package EpiDISH to find cell types described in Salas 2022

ht_opt$legend_grid_width <- unit(2,"mm")
ht_opt$legend_grid_height <-unit(2,"mm")
ht_opt$legend_labels_gp <- gpar(fontsize=5)
ht_opt$legend_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_names_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_names_gp <- gpar(fontsize=5)
ht_opt$HEATMAP_LEGEND_PADDING <- unit(1,"mm")
ht_opt$ANNOTATION_LEGEND_PADDING <- unit(1,"mm")
ht_opt$legend_gap <- unit(c(1,1),"mm")
ht_opt$fast_hclust <- T

##
## Calculate cell fractions in using epiDISH data, ie Salas 2022 nat comms data
##

data(cent12CT450k.m)
Heatmap(cent12CT450k.m,col=coolwarm(),show_row_names = F)

3 B and T cell fCpG dynamics

We plot the correlation of fCpG mean methylation and standard deviation (SD) of cell types included in our study design (figure supplementary 7).

As age is not avaiable for some samples, we use here the Horvath value.

#
## 1. Data from our submitted manuscipt
##

all(colnames(fCpGs.betas) %in% fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS)
table(fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS %in% colnames(fCpGs.betas))
identical(colnames(fCpGs.betas),fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS)

## mean and sd for fCpGs
fCpGs.metadata.all$SD.fCpGs <- apply(fCpGs.betas[,fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS],2,sd,na.rm=T)
fCpGs.metadata.all$mean.fCpGs <- apply(fCpGs.betas[,fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS],2,mean,na.rm=T)
fCpGs.metadata.all$submitted <- 
  ifelse(fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS %in% fCpGs.metadata$supplementary_table_2$participant_id_anonymous,
                                       T,F
                                       )
fCpGs.metadata.all <- 
  fCpGs.metadata.all %>% 
  filter(submitted) %>% 
  mutate(CELL_TYPE_ANNOT.2=case_when(CELL_TYPE=="CD19_positive" ~ "Bcell (CD19+)",
                                     CELL_TYPE=="Granulocyte" ~ "Granulocyte",
                                     CELL_TYPE=="NK_cell" ~ "NK",
                                     CELL_TYPE_ANNOT.2=="Tcell" ~ "Tcell(CD3+)",
                                     CELL_TYPE=="Monocyte" ~ "Monocyte",
                                     .default = CELL_TYPE_ANNOT.2),
         Horvath=Clocks_Horvath
         )

##plot 
fCpGs.metadata.all %>% count(CELL_TYPE,CELL_TYPE_ANNOT.1,CELL_TYPE_ANNOT.2)
fCpGs.metadata.all %>% 
  filter(CELL_TYPE!="Macrophage_invitro_differentiated",
         CELL_TYPE_ANNOT.2!="pre-Bcell",
         CELL_TYPE_ANNOT.2!="bmPC",
         CELL_TYPE_ANNOT.2!="tPC",
         grepl("^Normal",CELL_TYPE_ANNOT.1)) %>% 
  select(SAMPLE_ID,CELL_TYPE,CELL_TYPE_ANNOT.1,CELL_TYPE_ANNOT.2,AGE_SAMPLING,Clocks_Horvath,submitted)

fCpGs.metadata.all.plot <- 
  fCpGs.metadata.all %>% 
  filter(CELL_TYPE!="Macrophage_invitro_differentiated",
         CELL_TYPE_ANNOT.2!="pre-Bcell",
         CELL_TYPE_ANNOT.2!="bmPC",
         CELL_TYPE_ANNOT.2!="tPC",
         grepl("^Normal",CELL_TYPE_ANNOT.1),
         submitted
         )

fCpGs.metadata.all.plot <- 
  fCpGs.metadata.all.plot %>% 
  mutate(CELL_TYPE_ANNOT.2=factor(CELL_TYPE_ANNOT.2,
                                  levels=c("NBC","GCB","MBC","Tcell(CD3+)"),
                                  labels=c("Naïve B cell","Germinal center B cell","Memory B cell","T cell")
                                  )
         )

fCpGs.metadata.all.plot %>% glimpse()



##plot
fCpGs.metadata.all.plot %>% 
  ggplot(aes(x = Horvath,
             y = AGE_SAMPLING,
             ))+
  geom_point()+
  geom_smooth(method = "lm",color="grey0")+
  stat_cor(size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
fCpGs.metadata.all.plot %>%
  ggplot(aes(x=Horvath))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Normal lymphoid and myeloid cells, N=",nrow(fCpGs.metadata.all.plot)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

##correlations
fCpGs.metadata.all.plot %>% 
  ggplot(aes(x=Horvath,
             y=mean.fCpGs
  ))+
  geom_point(
    aes(fill=CELL_TYPE_ANNOT.2),
    size=0.75,
    pch=21,
    stroke=0
  )+
  geom_smooth(aes(fill=CELL_TYPE_ANNOT.2),
              method = "lm",color="black",lwd=0.5,alpha=0.3
              )+
  scale_fill_manual(values = cols25())+
  facet_wrap(~CELL_TYPE_ANNOT.2)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
                              )
               ),
           label.y.npc="top",
           label.x.npc = "left",
           method = "pearson",
           size=1.25)+
  # stat_cor(method = "pearson",size=1.5)+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(2,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) | 
  fCpGs.metadata.all.plot %>% 
  ggplot(aes(x=Horvath,
             y=SD.fCpGs
  ))+
  geom_point(
    aes(fill=CELL_TYPE_ANNOT.2),
    size=0.75,
    pch=21,
    stroke=0
  )+
  geom_smooth(aes(fill=CELL_TYPE_ANNOT.2),
              method = "lm",color="black",lwd=0.5,alpha=0.3
  )+
  scale_fill_manual(values = cols25())+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
                              )
               ),
           label.y.npc="top",
           label.x.npc = "left",
           method = "pearson",
           size=1.25)+
  facet_wrap(~CELL_TYPE_ANNOT.2)+
  # stat_cor(method = "pearson",size=1.5)+
  guides(fill=guide_legend("Donor id",nrow=2))+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(2,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

We next plot a heatmap with all fCpGs, and see a whitish color heatmap, indicative of those samples being polyclonal.

##
## HEATMAP fCpGs
## 
# plot heatmap sorted by age and with colors

evoflux.meth <- fread("../Revision/Data/meth_palette_evoflux.tsv")
evoflux.meth



ht_opt$legend_grid_width <- unit(2,"mm")
ht_opt$legend_grid_height <-unit(2,"mm")
ht_opt$legend_labels_gp <- gpar(fontsize=5)
ht_opt$legend_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_names_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_names_gp <- gpar(fontsize=5)
ht_opt$HEATMAP_LEGEND_PADDING <- unit(1,"mm")
ht_opt$ANNOTATION_LEGEND_PADDING <- unit(1,"mm")
ht_opt$legend_gap <- unit(c(1,1),"mm")
ht_opt$fast_hclust <- T




h.fCpGs <- Heatmap(fCpGs.betas[,fCpGs.metadata.all.plot$PARTICIPANT_ID_ANONYMOUS],
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=fCpGs.metadata.all.plot %>% 
                       select(age_sampling=AGE_SAMPLING,Horvath,
                              annot_1=CELL_TYPE_ANNOT.1,annot_2=CELL_TYPE_ANNOT.2),
                     col=list(
                       age_sampling=colorRamp2(breaks = seq(min(fCpGs.metadata.all.plot$AGE_SAMPLING,na.rm = T),
                                                            max(fCpGs.metadata.all.plot$AGE_SAMPLING,na.rm = T),
                                                            length.out=100),brewer.blues(100)
                                               ),
                       Horvath=colorRamp2(breaks = seq(min(fCpGs.metadata.all.plot$Horvath,na.rm = T),
                                                            max(fCpGs.metadata.all.plot$Horvath,na.rm = T),
                                                            length.out=100),brewer.blues(100)
                                          ),
                       annot_1=setNames(kelly(fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.1 %>%
                                                unique() %>%
                                                length()
                                              ),
                                        fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.1 %>%
                                          unique()
                                        ),
                       annot_2=setNames(kelly(fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.2 %>%
                                                unique() %>%
                                                length()
                                              ),
                                        fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.2 %>%
                                          unique()
                                        )
                       ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)

draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4 External datasets

We next perform analyses separately in each dataset.

4.1 Purified cell types

4.1.1 GSE184269 - Roy 2021

We next download sorted immune cell types in adults, available in GSE184269. We see a similar scenario as those samples included in our study.

##
##  Download   # GSE184269, sorted immune cells in aging
##

gse <- getGEO(GEO = "GSE184269",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE184269_series_matrix.txt.gz
show(gse)

## phenodata
sorted.age.metadata <- 
  pData(phenoData(gse)) %>% 
  clean_names()
sorted.age.metadata %>% head
sorted.age.metadata %>% count(source_name_ch1)
sorted.age.metadata$colnames.betas <- 
  gsub("ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM5583nnn/GSM\\d+/suppl/GSM\\d+_|_Grn.idat.gz",
       "",
       sorted.age.metadata$supplementary_file)
sorted.age.metadata$colnames.betas
sorted.age.metadata$age <- as.numeric(sorted.age.metadata$age_ch1)

## get meth values
sorted.age.betas <- fread("../Revision/Data/450k-Aging/GSE184269_Matrix_processed_FINAL.txt.gz",data.table = F)
head(sorted.age.betas);dim(sorted.age.betas)
rownames(sorted.age.betas) <- sorted.age.betas$ID_RED
sorted.age.betas <- sorted.age.betas[,grep("Detection_Pval|ID_RED",colnames(sorted.age.betas),value = T,invert = T)]
head(sorted.age.betas);dim(sorted.age.betas)
all(colnames(sorted.age.betas) %in% sorted.age.metadata$colnames.betas)
sorted.age.betas <- sorted.age.betas[,sorted.age.metadata$colnames.betas]
colnames(sorted.age.betas) <- sorted.age.metadata$geo_accession

##fcpgs beta values
table(rownames(fCpGs.betas) %in% rownames(sorted.age.betas))
fCpGs.sorted.age <- sorted.age.betas[rownames(fCpGs.betas),]
identical(colnames(fCpGs.sorted.age),sorted.age.metadata$geo_accession)

##complete with fCPGs stats
sorted.age.metadata$SD.fCpGs <- apply(fCpGs.sorted.age,2,sd,na.rm=T)
sorted.age.metadata$mean.fCpGs <- apply(fCpGs.sorted.age,2,mean,na.rm=T)

##calculate cell fractions
sorted.age.metadata<- 
  cbind(sorted.age.metadata,
        epidish(beta.m = sorted.age.betas,ref.m = cent12CT450k.m)$estF
  )
head(sorted.age.metadata)

Age distribution:

sorted.age.metadata %>% head
sorted.age.metadata %>% dim
sorted.age.metadata %>% count(source_name_ch1)
sorted.age.metadata %>% count(source_name_ch1,title)
sorted.age.metadata <- 
  sorted.age.metadata %>% 
  mutate(source=case_when(source_name_ch1=="Peripheral blood" ~ "PBMCs",.default = source_name_ch1))
sorted.age.metadata %>% count(donor_id_ch1,source_name_ch1)
sorted.age.metadata %>% count(donor_id_ch1)


sorted.age.metadata %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Roy et al, Immunity 2021, N=",nrow(sorted.age.metadata)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

We see in the heatmap a similar scenario, but there seems to be donor-specific batches.

sorted.age.metadata$suspected.batch <- str_extract(sorted.age.metadata$description,"Gestalt_Expt_\\d")


### heatmap with sorted ageed samples
h.fCpGs <- Heatmap(fCpGs.sorted.age,
        col=evoflux.meth$color,
        cluster_columns = T,
        cluster_rows = T,
        show_row_names = F,
        show_column_names = F,
        show_row_dend = T,
        show_column_dend = T,
        name="DNA methylation",
        border = T,
        row_dend_gp = gpar(lwd=0.1),
        column_dend_gp = gpar(lwd=0.1),
        top_annotation = HeatmapAnnotation(
          df=sorted.age.metadata %>%
            select(suspected.batch,
                   donor_id=donor_id_ch1,source,Sex=characteristics_ch1_2,age),
          col=list(
            suspected.batch=setNames(tableau20(sorted.age.metadata$suspected.batch %>% unique() %>% length()),
                                               sorted.age.metadata$suspected.batch %>% unique()
                                               ),
            donor_id=setNames(alphabet2(sorted.age.metadata %>% 
                                               pull(donor_id_ch1) %>% 
                                               unique() %>% 
                                               length()
                                             ),
                                             sorted.age.metadata %>% 
                                               pull(donor_id_ch1) %>% 
                                               unique()
                                  ),
            source=setNames(kelly(sorted.age.metadata %>% 
                                               pull(source) %>% 
                                               unique() %>% 
                                               length()
                                             ),
                                             sorted.age.metadata %>% 
                                               pull(source) %>% 
                                               unique()
                                  ),
            Sex=setNames(c("deepskyblue","pink2"),c("Sex: M","Sex: F")),
            age=colorRamp2(breaks = seq(min(sorted.age.metadata$age),
                                        max(sorted.age.metadata$age),
                                        length.out=100),
                           brewer.blues(100))
            ),
          show_annotation_name = T,
          na_col = "grey45",
          annotation_name_side = "left",
          show_legend = T,
          annotation_name_gp = gpar(fontsize=4),
          annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
          simple_anno_size = unit(2,"mm")
        )
)

draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
     )

4.1.2 GSE137594 - Clark 2020

We follow with other cohort with purified cells, including B and T cells from patients with different inflammatory conditions. GSE137594

gse <- getGEO(GEO = "GSE137594",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE137594_series_matrix.txt.gz
show(gse)

## phenodata
Clark2020.metadata <- pData(phenoData(gse)) %>% clean_names()
Clark2020.metadata %>% head
Clark2020.metadata$age <- as.numeric(Clark2020.metadata$age_ch1)
qplot(Clark2020.metadata$age)

rownames(Clark2020.metadata)

## get meth values
Clark2020.betas <- fread("../Revision/Data/450k-Aging/GSE137594_norm_avg_beta_pval.txt.gz",data.table = F)
head(Clark2020.betas);dim(Clark2020.betas);gc()
rownames(Clark2020.betas) <- Clark2020.betas$ID_REF
Clark2020.betas <- Clark2020.betas[,grep("Detection|ID_REF",colnames(Clark2020.betas),value = T,invert = T)]
identical(colnames(Clark2020.betas) , (Clark2020.metadata$title %>% gsub(": Newcastle Early Arthritis Clinic patient \\d+","",.)))
Clark2020.betas <- Clark2020.betas[,gsub(": Newcastle Early Arthritis Clinic patient \\d+","",Clark2020.metadata$title)]
colnames(Clark2020.betas) <- Clark2020.metadata$geo_accession
head(Clark2020.betas);dim(Clark2020.betas)

##fcpgs
table(rownames(fCpGs.betas) %in% rownames(Clark2020.betas))
fCpGs.Clark2020 <- Clark2020.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(Clark2020.betas)],]
head(fCpGs.Clark2020);dim(fCpGs.Clark2020)
identical(colnames(fCpGs.Clark2020),Clark2020.metadata$geo_accession)


##complete with fCPGs stats
Clark2020.metadata$SD.fCpGs <- apply(fCpGs.Clark2020,2,sd,na.rm=T)
Clark2020.metadata$mean.fCpGs <- apply(fCpGs.Clark2020,2,mean,na.rm=T)
head(Clark2020.metadata);dim(Clark2020.metadata)

##cell franctions
Clark2020.metadata<- 
  cbind(Clark2020.metadata,
        epidish(beta.m = Clark2020.betas,ref.m = cent12CT450k.m)$estF
  )
head(Clark2020.metadata)

Age distribution

Clark2020.metadata %>% head
Clark2020.metadata %>% dim
Clark2020.metadata %>% count(source_name_ch1)
Clark2020.metadata %>% count(characteristics_ch1)
Clark2020.metadata %>% count(characteristics_ch1_2)
Clark2020.metadata %>% count(first_diagnosis_ch1,working_diagnosis_ch1)



Clark2020.metadata %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Clark et al 2020, N=",nrow(Clark2020.metadata)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

There is a clear increase in SD during aging, while mean remain stable

Clark2020.metadata %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  geom_point(aes(fill=characteristics_ch1_2),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1_2)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  Clark2020.metadata %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=characteristics_ch1_2),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1_2)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

We next plot a heatmap and see an overall polyclonal population of cells regardless of inflamatory condition.

Clark2020.metadata %>% head
Clark2020.metadata$description

##
## HEATMAP with sorted ageed samples
##

h.fCpGs <- Heatmap(fCpGs.Clark2020,
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=Clark2020.metadata %>%
                       select(Source=source_name_ch1,Working_diagnosis=working_diagnosis_ch1,age,Sex=gender_ch1),
                     col=list(
                       Source=setNames(kelly(Clark2020.metadata$source_name_ch1 %>% unique() %>% length()),
                                          Clark2020.metadata$source_name_ch1 %>% unique()
                       ),
                       Working_diagnosis=setNames(tableau20(Clark2020.metadata$working_diagnosis_ch1 %>% unique() %>% length()),
                                                Clark2020.metadata$working_diagnosis_ch1 %>% unique()
                       ),
                       Sex=setNames(c("deepskyblue","pink2"),c("Male","Female")),
                       age=colorRamp2(breaks = seq(min(Clark2020.metadata$age),
                                                   max(Clark2020.metadata$age),
                                                   length.out=100),
                                      brewer.blues(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)


draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4.2 Whole blood samples

We next follow the analyses with whole blood samples, accounting for know changes in cell type compositions during aging.

4.2.1 Pediatric samples

4.2.1.1 GSE36054 - Alisch 2015

We next load data from pediatric donors available at GSE36054

## 2. padiatric with ethnicity
gse <- getGEO(GEO = "GSE36054",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE36054_series_matrix.txt.gz
show(gse)

## phenodata
pediatrics.metadata <- 
  pData(phenoData(gse)) %>% 
  clean_names()

pediatrics.metadata %>% count(ethnicity_ch1)
pediatrics.metadata %>% count(source_name_ch1)
pediatrics.metadata %>% count(characteristics_ch1,ethnicity_ch1)

## get meth values
table(rownames(fCpGs.betas) %in% rownames(exprs(gse)))
pediatrics.betas <- exprs(gse)
fCpGs.pediatrics <- pediatrics.betas[rownames(fCpGs.betas),] %>% as.data.frame()
identical(colnames(fCpGs.pediatrics),pediatrics.metadata$geo_accession)

##complete with fCPGs stats
pediatrics.metadata$SD.fCpGs <- apply(fCpGs.pediatrics,2,sd,na.rm=T) 
pediatrics.metadata$mean.fCpGs <- apply(fCpGs.pediatrics,2,mean,na.rm=T)
class(pediatrics.metadata)


## complete cell types
pediatrics.metadata<- 
  cbind(pediatrics.metadata,
        epidish(beta.m = pediatrics.betas,ref.m = cent12CT450k.m)$estF
  )
head(pediatrics.metadata)

Age distribution

##
## PEDIATRIC POPULATION use other data sets to further strength previous results, and to show the same in pediatric population
##

# GSE36054 dataset

pediatrics.metadata %>% head
pediatrics.metadata %>% dim
pediatrics.metadata <- 
  pediatrics.metadata %>% 
  mutate(age=as.numeric(age_at_collection_months_ch1)/12)
pediatrics.metadata %>% count(characteristics_ch1_7)
pediatrics.metadata %>% count(cell_type_ch1)
pediatrics.metadata %>%  count(characteristics_ch1,ethnicity_ch1)
pediatrics.metadata %>% filter(is.na(age)) %>% count(characteristics_ch1,ethnicity_ch1)
head(pediatrics.betas);dim(pediatrics.betas)

pediatrics.metadata.plot <-
  pediatrics.metadata %>% 
  filter(!is.na(age),characteristics_ch1!="group: Harvard.Sib.Failed")

##plot age distribution in pediatrics
pediatrics.metadata.plot %>% head

pediatrics.metadata.plot %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Alisch et al, Genome Research 2015, N=",nrow(pediatrics.metadata.plot)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Contrary to adult donors, in these samples there is no association of SD with the age of the donors

pediatrics.metadata.plot %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=cell_type_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~cell_type_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  pediatrics.metadata.plot %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=cell_type_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~cell_type_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Also when considering the cellular composition of blood (a subtle signal already appears though in SD!)

## adjusting for cellular fractions
pediatrics.metadata.plot %>%
  reshape2::melt(measure.vars =  c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
       id.vars = "age") %>%
  ggplot(aes(x=age,
             y=value,
             fill=variable
  ))+
  geom_point(pch=21,size=1,stroke=0.1)+
  geom_smooth(method = "lm",color="black")+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
                              )
               ),
           # label.x.npc = 0.2,label.y.npc = 1,
           method = "pearson",
           size=1.25)+
  scale_fill_manual(values = cols25())+
  ylab("Cell fraction")+
  facet_wrap(.~variable,scales = "free")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        # legend.spacing.y = unit(3,"pt"),
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
        )

##liner model corrected by cell fracts and ethnicity
# pediatrics.metadata %>% head
# pediatrics.metadata %>% count(ethnicity_ch1)

lm.fit <- 
  pediatrics.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  filter(ethnicity!="Unknown") %>% 
  lm(SD.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
                 )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with SD.fCpGs as response variable
age	129	-6e-04	-1e-03, -8e-05	2e-02	1e-01
ethnicity
Asian	3	—	—
Black	74	-2e-03	-1e-02, 8e-03	7e-01	7e-01
Other	44	-6e-03	-2e-02, 3e-03	2e-01	4e-01
White	8	-7e-03	-2e-02, 3e-03	2e-01	4e-01
Mono	129	3e-02	-1e-01, 2e-01	7e-01	7e-01
Neu	129	8e-02	-4e-02, 2e-01	2e-01	4e-01
Baso	129	4e-01	1e-01, 7e-01	6e-03	5e-02
Eos	129	1e-01	-1e-02, 2e-01	8e-02	3e-01
NK	129	3e-02	-1e-01, 2e-01	7e-01	7e-01
CD8Tnv	129	5e-02	-9e-02, 2e-01	5e-01	6e-01
CD8Tmem	129	6e-02	-8e-02, 2e-01	4e-01	6e-01
CD4Tnv	129	1e-01	-1e-02, 2e-01	9e-02	3e-01
CD4Tmem	129	3e-01	1e-01, 4e-01	2e-04	3e-03
Treg	129	1e-01	-9e-02, 3e-01	3e-01	4e-01
Bnv	129	5e-02	-8e-02, 2e-01	5e-01	6e-01
Bmem	129
Abbreviation: CI = Confidence Interval
R² = 0.525; Adjusted R² = 0.462; Sigma = 0.008; Statistic = 8.32; p-value = <0.001; df = 15; Log-likelihood = 454; AIC = -874; BIC = -825; Deviance = 0.007; Residual df = 113; No. Obs. = 129
¹ Benjamini & Hochberg correction for multiple testing

##mean fcpg

lm.fit <- 
  pediatrics.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  filter(ethnicity!="Unknown") %>% 
  lm(mean.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with mean.fCpGs as response variable
age	129	1e-03	-3e-04, 2e-03	1e-01	5e-01
ethnicity
Asian	3	—	—
Black	74	-5e-03	-3e-02, 2e-02	7e-01	7e-01
Other	44	5e-04	-2e-02, 3e-02	1e+00	1e+00
White	8	-1e-02	-4e-02, 2e-02	4e-01	5e-01
Mono	129	-2e-01	-6e-01, 2e-01	3e-01	5e-01
Neu	129	-2e-01	-5e-01, 1e-01	3e-01	5e-01
Baso	129	-7e-01	-2e+00, 1e-01	8e-02	5e-01
Eos	129	-2e-01	-6e-01, 1e-01	2e-01	5e-01
NK	129	-1e-01	-5e-01, 3e-01	5e-01	6e-01
CD8Tnv	129	-2e-01	-5e-01, 2e-01	4e-01	5e-01
CD8Tmem	129	-2e-01	-6e-01, 2e-01	4e-01	5e-01
CD4Tnv	129	-2e-01	-5e-01, 2e-01	3e-01	5e-01
CD4Tmem	129	-2e-01	-5e-01, 2e-01	3e-01	5e-01
Treg	129	-5e-01	-1e+00, 6e-02	8e-02	5e-01
Bnv	129	-1e-01	-4e-01, 2e-01	5e-01	6e-01
Bmem	129
Abbreviation: CI = Confidence Interval
R² = 0.128; Adjusted R² = 0.012; Sigma = 0.020; Statistic = 1.10; p-value = 0.4; df = 15; Log-likelihood = 327; AIC = -620; BIC = -572; Deviance = 0.047; Residual df = 113; No. Obs. = 129
¹ Benjamini & Hochberg correction for multiple testing

In the heamtap we can also obseerve there is no association with the ethnicity, nor sex.

##
### HEATMAP FCPGS
##
pediatrics.metadata.plot %>% head()

h.fCpGs <- Heatmap(fCpGs.pediatrics %>% 
                     select( pediatrics.metadata.plot %>% pull(geo_accession)),
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=pediatrics.metadata.plot %>%
                       select(ethnicity=ethnicity_ch1,age,Sex=gender_ch1),
                     col=list(
                       ethnicity=setNames(tableau20(pediatrics.metadata.plot %>%
                                                     pull(ethnicity_ch1) %>%
                                                     unique() %>% na.omit() %>% 
                                                     length()
                                                    ),
                                          pediatrics.metadata.plot %>%
                                            pull(ethnicity_ch1) %>%na.omit() %>% 
                                            unique()
                                          ),
                       Sex=setNames(c("deepskyblue","pink2"),c("M","F")),
                       age=colorRamp2(breaks = seq(min(pediatrics.metadata.plot$age),
                                                   max(pediatrics.metadata.plot$age),
                                                   length.out=100),
                                      brewer.blues(100))
                       # Neu=colorRamp2(breaks = seq_range(range(pediatrics.metadata.plot$Neu),length.out = 100),brewer.orrd(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)


h.fCpGs <- draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4.2.2 Adult and centenarian samples

4.2.2.1 GSE40279 - Hannum 2012

We load Hannum dataset comprising 20-100 year old donors, available thorough GSE40279

##
## GSE40279
##

##download Hannum from GEO GSE40279
gse <- getGEO(GEO = "GSE40279",destdir = "../Revision/Data/450k-Aging") 
gse <- gse$GSE40279_series_matrix.txt.gz
show(gse)

## phenodata
hannum.metadata <- 
  pData(phenoData(gse)) %>% 
  clean_names()
hannum.metadata %>% head
hannum.metadata %>% select(contains("age")) %>% head
hannum.metadata <- 
  hannum.metadata %>% 
  mutate(age=as.numeric(age_y_ch1))


## get meth values
table(rownames(fCpGs.betas) %in% rownames(exprs(gse)))
hannum.betas <- exprs(gse)
head(hannum.betas)
colnames(hannum.betas)
identical(colnames(hannum.betas),hannum.metadata$geo_accession)

# calculate SD and mean for all samples for fCpGs
table(rownames(fCpGs.betas) %in% rownames(hannum.betas))
fCpGs.hannum <- hannum.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(hannum.betas)],]
identical(colnames(hannum.betas),hannum.metadata$geo_accession)


##complete with fCPGs stats
hannum.metadata$SD.fCpGs <- apply(fCpGs.hannum,2,sd,na.rm=T)
hannum.metadata$mean.fCpGs <- apply(fCpGs.hannum,2,mean,na.rm=T)


## complete cellular proportions
hannum.metadata<- 
  cbind(hannum.metadata,
        epidish(beta.m = hannum.betas,ref.m = cent12CT450k.m)$estF
        )
head(hannum.metadata)

Age distribution

hannum.metadata %>% head
hannum.metadata %>% dim
hannum.metadata %>% count(characteristics_ch1_4)
head(hannum.betas);dim(hannum.betas)


##plot age distribution in hannum
hannum.metadata %>% head

hannum.metadata %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Hannum et al, Mol Cell 2013, N=",nrow(hannum.metadata)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

mean and SD fCPG. We can see there is a clear correlation between SD methyaltion of fCpGs and age!

hannum.metadata %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=characteristics_ch1_5),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1_5)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  hannum.metadata %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=characteristics_ch1_5),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1_5)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

The subtle anticorrelation with mean disappear when adjusting for cell types, while SD remains highly significant.

## adjusting for cellular fractions
hannum.metadata %>%
  reshape2::melt(measure.vars =  c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
                 id.vars = "age") %>%
  ggplot(aes(x=age,
             y=value
  ))+
  geom_point(aes(fill=variable),
             pch=21,size=1,stroke=0.1
             )+
  geom_smooth(method = "lm",color="black")+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  # label.x.npc = 0.2,label.y.npc = 1,
  method = "pearson",
  size=1.25)+
  scale_fill_manual(values = cols25())+
  ylab("Cell fraction")+
  facet_wrap(.~variable,scales = "free")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        # legend.spacing.y = unit(3,"pt"),
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

##liner model corrected by cell fracts and ethnicity
# hannum.metadata %>% head
# hannum.metadata %>% count(ethnicity_ch1)

lm.fit <- 
  hannum.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  lm(SD.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with SD.fCpGs as response variable
age	656	4e-04	3e-04, 5e-04	4e-14	6e-13
ethnicity
Caucasian - European	426	—	—
Hispanic - Mexican	230	-2e-04	-3e-03, 3e-03	9e-01	9e-01
Mono	656	5e-02	-3e-02, 1e-01	2e-01	3e-01
Neu	656	8e-02	1e-02, 1e-01	2e-02	6e-02
Baso	656	3e-01	1e-01, 5e-01	4e-04	2e-03
Eos	656	7e-02	-2e-02, 2e-01	1e-01	2e-01
NK	656	2e-02	-7e-02, 1e-01	7e-01	7e-01
CD8Tnv	656	2e-01	4e-02, 3e-01	8e-03	3e-02
CD8Tmem	656	3e-02	-5e-02, 1e-01	4e-01	6e-01
CD4Tnv	656	7e-02	-4e-03, 2e-01	6e-02	1e-01
CD4Tmem	656	3e-02	-5e-02, 1e-01	5e-01	6e-01
Treg	656	3e-01	1e-01, 4e-01	6e-04	3e-03
Bnv	656	7e-02	-2e-02, 2e-01	1e-01	2e-01
Bmem	656
Abbreviation: CI = Confidence Interval
R² = 0.228; Adjusted R² = 0.212; Sigma = 0.014; Statistic = 14.6; p-value = <0.001; df = 13; Log-likelihood = 1,889; AIC = -3,747; BIC = -3,680; Deviance = 0.121; Residual df = 642; No. Obs. = 656
¹ Benjamini & Hochberg correction for multiple testing

##mean fcpg

lm.fit <- 
  hannum.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  filter(ethnicity!="Unknown") %>% 
  lm(mean.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with mean.fCpGs as response variable
age	656	7e-06	-1e-04, 1e-04	9e-01	9e-01
ethnicity
Caucasian - European	426	—	—
Hispanic - Mexican	230	2e-03	-2e-03, 5e-03	3e-01	5e-01
Mono	656	-5e-02	-2e-01, 4e-02	3e-01	5e-01
Neu	656	-2e-02	-1e-01, 6e-02	6e-01	7e-01
Baso	656	9e-02	-1e-01, 3e-01	4e-01	6e-01
Eos	656	-7e-02	-2e-01, 4e-02	2e-01	5e-01
NK	656	-2e-02	-1e-01, 9e-02	7e-01	7e-01
CD8Tnv	656	1e-01	-2e-02, 3e-01	1e-01	4e-01
CD8Tmem	656	2e-01	8e-02, 3e-01	2e-04	3e-03
CD4Tnv	656	1e-01	2e-03, 2e-01	4e-02	3e-01
CD4Tmem	656	7e-02	-3e-02, 2e-01	2e-01	5e-01
Treg	656	-4e-02	-2e-01, 2e-01	7e-01	7e-01
Bnv	656	5e-02	-5e-02, 2e-01	3e-01	5e-01
Bmem	656
Abbreviation: CI = Confidence Interval
R² = 0.313; Adjusted R² = 0.299; Sigma = 0.017; Statistic = 22.5; p-value = <0.001; df = 13; Log-likelihood = 1,747; AIC = -3,464; BIC = -3,396; Deviance = 0.187; Residual df = 642; No. Obs. = 656
¹ Benjamini & Hochberg correction for multiple testing

Heatmap

##
### HEATMAP FCPGS
##
hannum.metadata %>% head()

h.fCpGs <- Heatmap(fCpGs.hannum,
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=hannum.metadata %>%
                       select(ethnicity=ethnicity_ch1,age,Sex=gender_ch1),
                     col=list(
                       ethnicity=setNames(kelly(hannum.metadata %>%
                                                      pull(ethnicity_ch1) %>%
                                                      unique() %>% na.omit() %>% 
                                                      length()
                       ),
                       hannum.metadata %>%
                         pull(ethnicity_ch1) %>%na.omit() %>% 
                         unique()
                       ),
                       Sex=setNames(c("deepskyblue","pink2"),c("M","F")),
                       age=colorRamp2(breaks = seq(min(hannum.metadata$age),
                                                   max(hannum.metadata$age),
                                                   length.out=100),
                                      brewer.blues(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)


draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4.2.2.2 GSE55763 - Lehne 2015

Large dataset from adults and elderly people, available in GSE55763. We download processed data matrix and load it separately.

##
# 9.   Lehen cohort cohorts genome biology 2015 ≥ 2500 samples, age assoc.
##

gse <- getGEO(GEO = "GSE55763",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE55763_series_matrix.txt.gz
show(gse)

## phenodata
lehne2015.metadata <- pData(phenoData(gse)) %>% clean_names()
lehne2015.metadata %>% head
lehne2015.metadata$age <- as.numeric(lehne2015.metadata$age_ch1)
rownames(lehne2015.metadata)

## get meth values
lehne2015.betas <- fread("../Revision/Data/450k-Aging/GSE55763_normalized_betas.txt.gz",data.table = F)
head(lehne2015.betas);dim(lehne2015.betas);gc()
rownames(lehne2015.betas) <- lehne2015.betas$ID_REF
lehne2015.betas <- lehne2015.betas[,grep("Detection|ID_REF",colnames(lehne2015.betas),value = T,invert = T)]
all(gsub("Peripheral blood, ","",lehne2015.metadata$title) %in% colnames(lehne2015.betas))
all(colnames(lehne2015.betas) %in% gsub("Peripheral blood, ","",lehne2015.metadata$title))
lehne2015.betas <- lehne2015.betas[,gsub("Peripheral blood, ","",lehne2015.metadata$title)]
colnames(lehne2015.betas) <- lehne2015.metadata$geo_accession
head(lehne2015.betas);dim(lehne2015.betas)

##fcpgs
table(rownames(fCpGs.betas) %in% rownames(lehne2015.betas))
fCpGs.lehne2015 <- lehne2015.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(lehne2015.betas)],]
head(fCpGs.lehne2015);dim(fCpGs.lehne2015)
identical(colnames(fCpGs.lehne2015),lehne2015.metadata$geo_accession)
gc()

##complete with fCPGs stats
lehne2015.metadata$SD.fCpGs <- apply(fCpGs.lehne2015,2,sd,na.rm=T)
lehne2015.metadata$mean.fCpGs <- apply(fCpGs.lehne2015,2,mean,na.rm=T)
head(lehne2015.metadata);dim(lehne2015.metadata)

##cell fractions
lehne2015.metadata<- 
  cbind(lehne2015.metadata,
        epidish(beta.m = lehne2015.betas,ref.m = cent12CT450k.m)$estF
  )
head(lehne2015.metadata)

Age distribution

lehne2015.metadata %>% head
lehne2015.metadata %>% dim
lehne2015.metadata %>% count(characteristics_ch1)
lehne2015.metadata %>% count(characteristics_ch1_1)
lehne2015.metadata %>% count(description)
head(lehne2015.betas);dim(lehne2015.betas)

## remove techinical replicates
lehne2015.metadata.plot <- 
  lehne2015.metadata %>% 
  filter(characteristics_ch1_1=="dataset: population study")

##plot age distribution in lehne2015
lehne2015.metadata.plot %>% head
lehne2015.metadata.plot %>% dim

lehne2015.metadata.plot %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from lehne 2015 et al, Genome Biology , N=",nrow(lehne2015.metadata.plot)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Mean and SD fCpG methyaltion

lehne2015.metadata.plot %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=characteristics_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  lehne2015.metadata.plot %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=characteristics_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~characteristics_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Adjusting cell types

## adjusting for cellular fractions
lehne2015.metadata.plot %>%
  reshape2::melt(measure.vars =  c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
                 id.vars = "age") %>%
  ggplot(aes(x=age,
             y=value
  ))+
  geom_point(aes(fill=variable),
             pch=21,size=1,stroke=0.1
  )+
  geom_smooth(method = "lm",color="black")+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  # label.x.npc = 0.2,label.y.npc = 1,
  method = "pearson",
  size=1.25)+
  scale_fill_manual(values = cols25())+
  ylab("Cell fraction")+
  facet_wrap(.~variable,scales = "free")+
  theme_classic()+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        # legend.spacing.y = unit(3,"pt"),
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

##liner model corrected by cell fracts and ethnicity
# lehne2015.metadata.plot %>% head


lm.fit <- 
  lehne2015.metadata.plot %>%
  lm(SD.fCpGs ~ 
       age+ 
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem,
     data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(lehne2015.metadata$age,lehne2015.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with SD.fCpGs as response variable
age	2,639	2e-04	2e-04, 3e-04	4e-47	5e-46
Mono	2,639	2e-02	-7e-03, 5e-02	2e-01	5e-01
Neu	2,639	1e-02	-1e-02, 3e-02	4e-01	6e-01
Baso	2,639	4e-03	-5e-02, 6e-02	9e-01	9e-01
Eos	2,639	1e-02	-1e-02, 4e-02	3e-01	6e-01
NK	2,639	-1e-02	-4e-02, 2e-02	4e-01	6e-01
CD8Tnv	2,639	-4e-03	-4e-02, 3e-02	8e-01	9e-01
CD8Tmem	2,639	-3e-02	-5e-02, -2e-03	4e-02	1e-01
CD4Tnv	2,639	9e-03	-2e-02, 3e-02	5e-01	6e-01
CD4Tmem	2,639	-2e-02	-4e-02, 1e-02	2e-01	5e-01
Treg	2,639	-5e-02	-1e-01, -5e-03	3e-02	1e-01
Bnv	2,639	-1e-03	-3e-02, 2e-02	9e-01	9e-01
Bmem	2,639
Abbreviation: CI = Confidence Interval
R² = 0.146; Adjusted R² = 0.142; Sigma = 0.007; Statistic = 37.3; p-value = <0.001; df = 12; Log-likelihood = 9,230; AIC = -18,432; BIC = -18,349; Deviance = 0.142; Residual df = 2,626; No. Obs. = 2,639
¹ Benjamini & Hochberg correction for multiple testing

##mean fcpg

lm.fit <- 
  lehne2015.metadata.plot %>%
  lm(mean.fCpGs ~ 
       age+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(lehne2015.metadata$age,lehne2015.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with mean.fCpGs as response variable
age	2,639	1e-04	6e-05, 1e-04	1e-06	1e-05
Mono	2,639	-6e-02	-1e-01, -3e-02	8e-04	2e-03
Neu	2,639	-2e-02	-5e-02, 1e-02	2e-01	4e-01
Baso	2,639	-4e-02	-1e-01, 4e-02	4e-01	4e-01
Eos	2,639	-2e-02	-6e-02, 1e-02	2e-01	4e-01
NK	2,639	9e-04	-4e-02, 4e-02	1e+00	1e+00
CD8Tnv	2,639	3e-02	-2e-02, 7e-02	3e-01	4e-01
CD8Tmem	2,639	7e-02	3e-02, 1e-01	2e-04	7e-04
CD4Tnv	2,639	-4e-02	-7e-02, -4e-03	3e-02	7e-02
CD4Tmem	2,639	3e-02	-8e-03, 7e-02	1e-01	3e-01
Treg	2,639	2e-01	8e-02, 2e-01	2e-05	1e-04
Bnv	2,639	-2e-03	-4e-02, 3e-02	9e-01	1e+00
Bmem	2,639
Abbreviation: CI = Confidence Interval
R² = 0.176; Adjusted R² = 0.173; Sigma = 0.010; Statistic = 46.8; p-value = <0.001; df = 12; Log-likelihood = 8,309; AIC = -16,590; BIC = -16,508; Deviance = 0.285; Residual df = 2,626; No. Obs. = 2,639
¹ Benjamini & Hochberg correction for multiple testing

Heatmap

##
### HEATMAP FCPGS
##

lehne2015.metadata.plot %>% head()

h.fCpGs <- Heatmap(fCpGs.lehne2015 %>% 
                     select( lehne2015.metadata.plot %>% pull(geo_accession)),
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=lehne2015.metadata.plot %>%
                       select(age,Sex=characteristics_ch1_2),
                     col=list(
                       Sex=setNames(c("deepskyblue","pink2"),c("gender: M","gender: F")),
                       age=colorRamp2(breaks = seq(min(lehne2015.metadata.plot$age),
                                                   max(lehne2015.metadata.plot$age),
                                                   length.out=100),
                                      brewer.blues(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   ),
                   use_raster = T, raster_quality = 10
)


draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
)

4.2.2.3 GSE72773 - Horvath 2016

We next load dataset from Horvath 2016, available at GSE72773

gse <- getGEO(GEO = "GSE72773",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE72773_series_matrix.txt.gz
show(gse)

## phenodata
Horvath2016.metadata <- pData(phenoData(gse)) %>% clean_names()
Horvath2016.metadata %>% head
Horvath2016.metadata$age <- as.numeric(Horvath2016.metadata$age_ch1)
rownames(Horvath2016.metadata)

## get meth values
Horvath2016.betas <- fread("../Revision/Data/450k-Aging/GSE72773_datBetaNormalized.csv.gz",data.table = F,na.strings = c("","NA"))
sapply(Horvath2016.betas,class) %>% table()
any(is.na(Horvath2016.betas))
head(Horvath2016.betas);dim(Horvath2016.betas);gc()
rownames(Horvath2016.betas) <- Horvath2016.betas$ID
Horvath2016.betas <- Horvath2016.betas[,grep("^ID",colnames(Horvath2016.betas),invert = T)]
head(Horvath2016.betas);dim(Horvath2016.betas)


all(gsub("OldTsimane: genomic DNA from whole blood of subject ","",Horvath2016.metadata$title) %in% colnames(Horvath2016.betas))
all(colnames(Horvath2016.betas) %in% gsub("OldTsimane: genomic DNA from whole blood of subject ","",Horvath2016.metadata$title))

Horvath2016.betas <- 
  Horvath2016.betas[,gsub("OldTsimane: genomic DNA from whole blood of subject ","",Horvath2016.metadata$title)]
colnames(Horvath2016.betas) <- Horvath2016.metadata$geo_accession
head(Horvath2016.betas);dim(Horvath2016.betas)

##fcpgs

# some values are characters or NA, convert appropiately to numeric
# mat <- apply(fCpGs.Horvath2016[,which(sapply(fCpGs.Horvath2016,class)=="character")],2,as.numeric)
# apply(mat,2,function(x)any(is.na(x))) ##checked which samples gave NA, only 1 cpgs with NA value, so ok!
# sapply(fCpGs.Horvath2016,class)


table(rownames(fCpGs.betas) %in% rownames(Horvath2016.betas))
fCpGs.Horvath2016 <- 
  Horvath2016.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(Horvath2016.betas)],]

## some values are characters, convert to numeric!!
fCpGs.Horvath2016 <- as.data.frame(sapply(fCpGs.Horvath2016,as.numeric))
head(fCpGs.Horvath2016);dim(fCpGs.Horvath2016)
identical(colnames(fCpGs.Horvath2016),Horvath2016.metadata$geo_accession)


##complete with fCPGs stats
Horvath2016.metadata$SD.fCpGs <- apply(fCpGs.Horvath2016,2,sd,na.rm=T)
Horvath2016.metadata$mean.fCpGs <- apply(fCpGs.Horvath2016,2,mean,na.rm=T)
head(Horvath2016.metadata);dim(Horvath2016.metadata)

##cell fractions
all(rownames(cent12CT450k.m) %in% rownames(Horvath2016.betas))
horvath.mat.cells <- apply(Horvath2016.betas[rownames(cent12CT450k.m),],2,as.numeric)
head(horvath.mat.cells);dim(horvath.mat.cells)
rownames(horvath.mat.cells) <- rownames(cent12CT450k.m)


Horvath2016.metadata <- 
  cbind(Horvath2016.metadata,
        epidish(beta.m = horvath.mat.cells,ref.m = cent12CT450k.m)$estF
  )
head(Horvath2016.metadata)

Age distribution

Horvath2016.metadata %>% head
Horvath2016.metadata %>% dim
Horvath2016.metadata %>% count(source_name_ch1)
Horvath2016.metadata %>% count(ethnicity_ch1)
head(Horvath2016.betas);dim(Horvath2016.betas)


##plot age distribution in Horvath2016
Horvath2016.metadata %>% head

Horvath2016.metadata %>%
  ggplot(aes(x=age))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_histogram()+
  theme_classic()+
  ggtitle(paste0("Samples from Horvath 2016 et al, Genome Biology 2016, N=",nrow(Horvath2016.metadata)))+
  guides(fill=guide_legend("Donor id",nrow=2))+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Mean and SD

Horvath2016.metadata %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=source_name_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~source_name_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  Horvath2016.metadata %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=source_name_ch1),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25())+
  facet_wrap(~source_name_ch1)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  method = "pearson",
  size=1.5)+
  # stat_cor(method = "pearson",size=1.5)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Adjusting for cell types

## adjusting for cellular fractions
Horvath2016.metadata %>%
  reshape2::melt(measure.vars =  c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
                 id.vars = "age") %>%
  ggplot(aes(x=age,
             y=value
  ))+
  geom_point(aes(fill=variable),
             pch=21,size=1,stroke=0.1
  )+
  geom_smooth(method = "lm",color="black")+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
  )
  ),
  # label.x.npc = 0.2,label.y.npc = 1,
  method = "pearson",
  size=1.25)+
  scale_fill_manual(values = cols25())+
  ylab("Cell fraction")+
  facet_wrap(.~variable,scales = "free")+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        # legend.spacing.y = unit(3,"pt"),
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

##liner model corrected by cell fracts and ethnicity
# Horvath2016.metadata %>% head
# Horvath2016.metadata %>% count(ethnicity_ch1)

lm.fit <- 
  Horvath2016.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  lm(SD.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with SD.fCpGs as response variable
age	310	4e-04	2e-04, 5e-04	5e-08	7e-07
ethnicity
Caucasian	235	—	—
Hispanic	38	-1e-03	-6e-03, 4e-03	6e-01	9e-01
Tsimane	37	2e-02	6e-03, 2e-02	1e-03	8e-03
Mono	310	-2e-02	-2e-01, 2e-01	8e-01	9e-01
Neu	310	-3e-02	-2e-01, 1e-01	7e-01	9e-01
Baso	310	5e-02	-3e-01, 4e-01	8e-01	9e-01
Eos	310	-6e-02	-2e-01, 1e-01	5e-01	9e-01
NK	310	-7e-02	-3e-01, 1e-01	5e-01	9e-01
CD8Tnv	310	1e-01	-1e-01, 3e-01	4e-01	9e-01
CD8Tmem	310	-1e-01	-3e-01, 6e-02	2e-01	9e-01
CD4Tnv	310	-3e-02	-2e-01, 1e-01	7e-01	9e-01
CD4Tmem	310	-9e-02	-3e-01, 8e-02	3e-01	9e-01
Treg	310	6e-03	-2e-01, 2e-01	1e+00	1e+00
Bnv	310	-2e-02	-2e-01, 2e-01	8e-01	9e-01
Bmem	310
Abbreviation: CI = Confidence Interval
R² = 0.217; Adjusted R² = 0.180; Sigma = 0.014; Statistic = 5.85; p-value = <0.001; df = 14; Log-likelihood = 902; AIC = -1,772; BIC = -1,712; Deviance = 0.054; Residual df = 295; No. Obs. = 310
¹ Benjamini & Hochberg correction for multiple testing

##mean fcpg

lm.fit <- 
  Horvath2016.metadata %>%
  mutate(ethnicity=ethnicity_ch1) %>% 
  lm(mean.fCpGs ~ 
       age+
       ethnicity+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with mean.fCpGs as response variable
age	310	-3e-04	-5e-04, -4e-06	5e-02	3e-01
ethnicity
Caucasian	235	—	—
Hispanic	38	9e-03	-1e-03, 2e-02	9e-02	4e-01
Tsimane	37	5e-03	-1e-02, 2e-02	6e-01	8e-01
Mono	310	1e-02	-3e-01, 4e-01	1e+00	1e+00
Neu	310	2e-03	-3e-01, 3e-01	1e+00	1e+00
Baso	310	5e-01	-1e-01, 1e+00	1e-01	4e-01
Eos	310	-3e-02	-4e-01, 3e-01	9e-01	1e+00
NK	310	-1e-01	-5e-01, 2e-01	5e-01	8e-01
CD8Tnv	310	2e-01	-2e-01, 6e-01	3e-01	8e-01
CD8Tmem	310	2e-01	-1e-01, 6e-01	2e-01	7e-01
CD4Tnv	310	-1e-01	-5e-01, 2e-01	5e-01	8e-01
CD4Tmem	310	1e-01	-2e-01, 5e-01	5e-01	8e-01
Treg	310	-6e-01	-1e+00, -1e-01	1e-02	2e-01
Bnv	310	1e-02	-4e-01, 4e-01	9e-01	1e+00
Bmem	310
Abbreviation: CI = Confidence Interval
R² = 0.209; Adjusted R² = 0.172; Sigma = 0.027; Statistic = 5.57; p-value = <0.001; df = 14; Log-likelihood = 689; AIC = -1,345; BIC = -1,285; Deviance = 0.214; Residual df = 295; No. Obs. = 310
¹ Benjamini & Hochberg correction for multiple testing

Heatmap

##
### HEATMAP FCPGS
##
Horvath2016.metadata %>% head()

h.fCpGs <- Heatmap(fCpGs.Horvath2016,
                   col=evoflux.meth$color,
                   cluster_columns = T,
                   cluster_rows = T,
                   show_row_names = F,
                   show_column_names = F,
                   show_row_dend = T,
                   show_column_dend = T,
                   name="DNA methylation",
                   border = T,
                   row_dend_gp = gpar(lwd=0.1),
                   column_dend_gp = gpar(lwd=0.1),
                   top_annotation = HeatmapAnnotation(
                     df=Horvath2016.metadata %>%
                       select(ethnicity=ethnicity_ch1,age,Sex=characteristics_ch1),
                     col=list(
                       ethnicity=setNames(kelly(Horvath2016.metadata %>%
                                                  pull(ethnicity_ch1) %>%
                                                  unique() %>% na.omit() %>% 
                                                  length()
                       ),
                       Horvath2016.metadata %>%
                         pull(ethnicity_ch1) %>%na.omit() %>% 
                         unique()
                       ),
                       Sex=setNames(c("deepskyblue","pink2"),c("Sex: male","Sex: female")),
                       age=colorRamp2(breaks = seq(min(Horvath2016.metadata$age),
                                                   max(Horvath2016.metadata$age),
                                                   length.out=100),
                                      brewer.blues(100))
                     ),
                     show_annotation_name = T,
                     na_col = "grey45",
                     annotation_name_side = "left",
                     show_legend = T,
                     annotation_name_gp = gpar(fontsize=4),
                     annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
                     simple_anno_size = unit(2,"mm")
                   )
)


draw(h.fCpGs,
     merge_legend=T,
     heatmap_legend_side = "right",
     annotation_legend_side = "right"
     )

5 Extended Data Fig. 3

Here we collect 2 studies with sorted samples.

# figure manuscipt 2
Clark2020.metadata$source <- Clark2020.metadata$cell_type_ch1
Clark2020.metadata$Study <- "GSE137594"

sorted.age.metadata$Study <- "GSE184269"

sorted.manuscript <- bind_rows(sorted.age.metadata,Clark2020.metadata)
sorted.manuscript$source

sorted.manuscript %>% 
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  geom_point(
    aes(fill=source,
        shape=Study),
    size=0.75,
    stroke=0
  )+
  scale_shape_manual(values = c("GSE137594"=24,"GSE184269"=21))+
  geom_smooth(aes(fill=source),
              method = "lm",color="black",lwd=0.5,alpha=0.3
  )+
  scale_fill_manual(values = cols25())+
  facet_wrap(~source)+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
  )
  ),
  label.y.npc="top",
  label.x.npc = "left",
  method = "pearson",
  size=1.25)+
  # stat_cor(method = "pearson",size=1.5)+
  guides(fill=guide_none(),
         shape=guide_legend(override.aes = list(stroke=0.25,color="grey0"))
         )+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(2,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) | 
  
  sorted.manuscript %>% 
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(
    aes(fill=source,
        shape=Study
        ),
    size=0.75,
    stroke=0
  )+
  scale_shape_manual(values = c("GSE137594"=24,"GSE184269"=21))+
  geom_smooth(aes(fill=source),
              method = "lm",color="black",lwd=0.5,alpha=0.3
  )+
  scale_fill_manual(values = cols25())+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
  )
  ),
  label.y.npc="top",
  label.x.npc = "left",
  method = "pearson",
  size=1.25)+
  facet_wrap(~source)+
  # stat_cor(method = "pearson",size=1.5)+
  labs(caption = "* Benjamini & Hochberg corrected P-values")+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "none",
        legend.box.spacing = unit(2,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

6 Extended Data Fig. 3 and Supple.Fig. 7

We collect all blood samples across the distinct datasets, including GSE36054, GSE40279, GSE55763 and GSE72773 and perform the analyses with all samples. We correct for cell types, and show how mean fCpG remains stable when considering changes in cell types composition during aging, but fCpG SD increases, consistent with clonal expansions of the hematopoietic system during aging.

##
## Fig manuscipt 3 and 4, whole blood datasets including pediatrics, hannum, horvath2016 and lehne.
##

pediatrics.metadata.plot %>% head
hannum.metadata %>% head
Horvath2016.metadata %>% head
lehne2015.metadata.plot %>% head

wb.df <- bind_rows(cbind(pediatrics.metadata.plot,Study="GSE36054"),
                   cbind(hannum.metadata,Study="GSE40279"),
                   cbind(Horvath2016.metadata,Study="GSE72773"),
                   cbind(lehne2015.metadata.plot,Study="GSE55763")
                   )
wb.df %>% head


wb.df %>%
  ggplot(aes(x=age,
             y=mean.fCpGs
  ))+
  # geom_boxplot(outlier.shape = NA,fill=NA)+
  geom_point(aes(fill=Study),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25(),aesthetics = c("fill","color"))+
  stat_cor(
    aes(label = paste0(after_stat(r.label),
                       "~`,`~`P=`~",
                       p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
                       ),
        color=Study
        ),
    method = "pearson",
    size=1.5,label.y.npc = 0.1
  )+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
                              )
               ),
           method = "pearson",
           size=1.5)+
  annotate(geom = "text",x = 10,y=0.6,label=paste0("N=",nrow(wb.df)),size=2)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "top",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  ) |
  
  wb.df %>%
  ggplot(aes(x=age,
             y=SD.fCpGs
  ))+
  geom_point(aes(fill=Study),
             size=1,
             pch=21,
             stroke=0
  )+
  geom_smooth(method = "lm",color="black",lwd=0.5)+
  scale_fill_manual(values = cols25(),aesthetics = c("fill","color"))+
  stat_cor(aes(label = paste0(after_stat(r.label),
                              "~`,`~`P=`~",
                              p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
                              )
               ),
           method = "pearson",
           size=1.5)+
  stat_cor(
    aes(label = paste0(after_stat(r.label),
                       "~`,`~`P=`~",
                       p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
    ),
    color=Study
    ),
    method = "pearson",
    size=1.5,label.y.npc = 0.6
  )+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text = element_text(color="grey0"),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        # panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.text = element_text(size = 4),
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

Adjust cell types

##multivariate lm model with cell fracs
##liner model corrected by cell fracts and ethnicity
# wb.df %>% head
# wb.df %>% count(ethnicity_ch1,Study)


lm.fit <- 
  wb.df %>%
  lm(SD.fCpGs ~ 
       age+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)+ggtitle()

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with SD.fCpGs as response variable
age	3,738	4e-04	3e-04, 4e-04	1e-147	1e-146
Mono	3,738	6e-02	3e-02, 9e-02	6e-05	1e-04
Neu	3,738	6e-02	4e-02, 8e-02	2e-06	3e-06
Baso	3,738	1e-01	9e-02, 2e-01	9e-07	2e-06
Eos	3,738	7e-02	5e-02, 1e-01	7e-08	2e-07
NK	3,738	3e-02	-3e-03, 5e-02	8e-02	1e-01
CD8Tnv	3,738	1e-01	7e-02, 1e-01	1e-09	5e-09
CD8Tmem	3,738	2e-04	-3e-02, 3e-02	1e+00	1e+00
CD4Tnv	3,738	4e-02	1e-02, 7e-02	2e-03	3e-03
CD4Tmem	3,738	2e-02	-9e-03, 5e-02	2e-01	2e-01
Treg	3,738	2e-01	1e-01, 2e-01	4e-11	2e-10
Bnv	3,738	4e-02	1e-02, 7e-02	2e-03	3e-03
Bmem	3,738
Abbreviation: CI = Confidence Interval
R² = 0.321; Adjusted R² = 0.319; Sigma = 0.010; Statistic = 147; p-value = <0.001; df = 12; Log-likelihood = 11,952; AIC = -23,876; BIC = -23,789; Deviance = 0.365; Residual df = 3,725; No. Obs. = 3,738
¹ Benjamini & Hochberg correction for multiple testing

##mean fcpg

lm.fit <- 
  wb.df %>%
  lm(mean.fCpGs ~ 
       age+
       Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
     ,data = .)

# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)

tab <- 
  tbl_regression(lm.fit,
                 estimate_fun = function(i)format(x = i,digits=1,scientific=T),
                 pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
  )  %>% 
  add_n(location = "level") %>%
  add_q(method = "BH") %>% 
  add_glance_source_note() %>% #show_header_names() to show colum names to be modified
  modify_header(label = "**Explanatory variables**") %>% 
  bold_p(q = F) %>% 
  as_gt() %>% 
  tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)

Explanatory variables	N	Beta	95% CI	p-value	q-value¹
Multivariate linear model with mean.fCpGs as response variable
age	3,738	-2e-05	-6e-05, 3e-05	5e-01	5e-01
Mono	3,738	9e-03	-4e-02, 6e-02	7e-01	7e-01
Neu	3,738	5e-02	3e-03, 9e-02	4e-02	6e-02
Baso	3,738	1e-01	4e-02, 2e-01	5e-03	1e-02
Eos	3,738	5e-02	6e-03, 1e-01	3e-02	5e-02
NK	3,738	4e-02	-9e-03, 9e-02	1e-01	1e-01
CD8Tnv	3,738	3e-01	3e-01, 4e-01	2e-26	2e-25
CD8Tmem	3,738	1e-01	9e-02, 2e-01	7e-09	3e-08
CD4Tnv	3,738	4e-02	-8e-03, 9e-02	1e-01	1e-01
CD4Tmem	3,738	8e-02	3e-02, 1e-01	2e-03	5e-03
Treg	3,738	4e-01	3e-01, 5e-01	3e-19	2e-18
Bnv	3,738	1e-01	8e-02, 2e-01	4e-07	1e-06
Bmem	3,738
Abbreviation: CI = Confidence Interval
R² = 0.199; Adjusted R² = 0.196; Sigma = 0.017; Statistic = 77.0; p-value = <0.001; df = 12; Log-likelihood = 9,857; AIC = -19,686; BIC = -19,599; Deviance = 1.12; Residual df = 3,725; No. Obs. = 3,738
¹ Benjamini & Hochberg correction for multiple testing

fCpGs.betas %>% dim
fCpGs.metadata$supplementary_table_2 %>% count(cell_type_annot_1)
fCpGs.metadata$supplementary_table_2 %>% 
  filter(cell_type_annot_1!="Normal_lymphoid_cell",
         cell_type_annot_1!="PBMCs",cell_type_annot_1!="Whole_blood") %>% 
  count(cell_type_annot_3)

tumors <- 
  fCpGs.metadata$supplementary_table_2 %>% 
  filter(cell_type_annot_1!="Normal_lymphoid_cell",
         cell_type_annot_1!="PBMCs",cell_type_annot_1!="Whole_blood")
tumors %>% dim

tumors$SD.fCpGs <- apply(fCpGs.betas[,tumors$participant_id_anonymous],2,sd)
tumors$Group <- tumors$cell_type_annot_3


##merge data frame
wb.df$Group <- cut(wb.df$age,breaks = c(0,30,50,70,80,max(wb.df$age)),include.lowest = T)
wb.df$Group %>% table

df.age <- bind_rows(wb.df,tumors)
df.age$Group <- factor(df.age$Group,levels = c("[0,30]","(30,50]","(50,70]","(70,80]","(80,101]",
                                "B-ALL","B-ALL-remission","B-ALL-relapse",
                                "T-ALL","T-ALL-remission","T-ALL-relapse",
                                "MCL","DLBCL-NOS","MBL","CLL","RT","MGUS","MM"
                                ))
df.age %>% count(Group)



cols.paper <- c(
  "30"="grey100",
  "50"="grey90",
  "70"="grey80",
  "90"="grey50",
  "101"="grey30",
  NA_values="#666666",
  HPC="#E6E6E6",
  pre.Bcell="#D5D5D5",
  NBC="#C3C3C3",
  GCB="#AEAEAE",
  tPC="#969696",
  MBC="#787878",
  bmPC="#4D4D4D",
  Bcell="#C3C3C3",
  Tcell="#F0F8FF",
  PBMCs="#FFF8DC",
  Whole_blood="#CDC8B1",
  "Whole blood"="#CDC8B1",
  Normal_lymphoid_cell="#D5D5D5",
  Normal_myeloid_cell="#E6E6FA",
  "T-ALL"="#CD3333",
  "T-ALL-remission"="#C7A5A5",
  "T-ALL-relapse"="#CD3333",
  "B-ALL"="#E69F00",
  "B-ALL-remission"="#D8CAAB",
  "B-ALL-relapse"="#E69F00",
  MCL="#F0E442",
  C1="#F5D51B",
  cMCL="#F5D51B",
  C2="#0072B2",
  nnMCL="#0072B2",
  "DLBCL-NOS"="#56B4E9",
  MBL="#5A9A89",
  CLL="#009E73",
  RT="#02C590",
  unmutated="#e65100",
  mutated="#361379",
  nCLL="#006E93",
  iCLL="#FDC010",
  mCLL="#963736",
  Disease.Unclassified="#CCCCCC",
  MGUS="#BF99AE",
  MM="#CC79A7"
)




df.age %>% 
  ggplot(aes(x=Group,
             y=SD.fCpGs
             ))+
  geom_boxplot(aes(fill=Group),outlier.size = 0.5,lwd=0.3)+
  scale_fill_manual(values = cols.paper)+
  stat_compare_means(method = "t.test",
                     comparisons = list(c("[0,30]","(30,50]"),
                                                          c("(30,50]","(50,70]"),
                                                          c("(50,70]","(70,80]"),
                                                          c("(70,80]","(80,101]"),
                                                          c("[0,30]","(80,101]")
                                                          ),
                     bracket.size = 0.2,
                     size=1,label.y=0.3,step.increase = 0.04)+
  xlab(NULL)+
  theme_classic()+
  theme(text=element_text(color="grey0",size = 6),
        line = element_line(linewidth = 0.2),
        axis.text.x = element_text(color="grey0",angle = 44,hjust = 1,vjust = 1),
        strip.placement = "outside",
        strip.background = element_blank(),
        plot.background = element_blank(),
        panel.background = element_blank(),
        panel.spacing = unit(0.1,"pt"),
        legend.key.size = unit(0,"pt"),
        legend.position = "none",
        legend.margin=margin(0,0,0,0),
        legend.background = element_blank(),
        legend.box.background = element_blank(),
        legend.title.position =  "top",
        legend.box.spacing = unit(3,"pt"),
        legend.justification = "left",
        legend.title = element_text(vjust = 1),
        panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
  )

7 Session info

print(sessionInfo())

## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] broom.helpers_1.21.0  broom_1.0.9           patchwork_1.3.2      
##  [4] stringr_1.5.1         gtExtras_0.6.0        gtsummary_2.4.0      
##  [7] gt_1.0.0              ggstats_0.10.0        openxlsx_4.2.8       
## [10] EpiDISH_2.24.0        circlize_0.4.16       ComplexHeatmap_2.24.1
## [13] GEOquery_2.76.0       Biobase_2.68.0        BiocGenerics_0.54.0  
## [16] generics_0.1.4        pals_1.10             ggpubr_0.6.1         
## [19] ggplot2_3.5.2         dplyr_1.1.4           janitor_2.2.1        
## [22] data.table_1.17.8     BiocStyle_2.36.0     
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3          rstudioapi_0.17.1          
##   [3] jsonlite_2.0.0              shape_1.4.6.1              
##   [5] magrittr_2.0.3              magick_2.8.7               
##   [7] farver_2.1.2                rmarkdown_2.29             
##   [9] GlobalOptions_0.1.2         vctrs_0.6.5                
##  [11] locfdr_1.1-8                Cairo_1.6-5                
##  [13] paletteer_1.6.0             rstatix_0.7.2.999          
##  [15] tinytex_0.57                forcats_1.0.0              
##  [17] htmltools_0.5.8.1           S4Arrays_1.8.1             
##  [19] haven_2.5.5                 curl_7.0.0                 
##  [21] SparseArray_1.8.1           Formula_1.2-5              
##  [23] sass_0.4.10                 bslib_0.9.0                
##  [25] fontawesome_0.5.3           plyr_1.8.9                 
##  [27] lubridate_1.9.4             cachem_1.1.0               
##  [29] commonmark_2.0.0            lifecycle_1.0.4            
##  [31] iterators_1.0.14            pkgconfig_2.0.3            
##  [33] Matrix_1.7-4                R6_2.6.1                   
##  [35] fastmap_1.2.0               GenomeInfoDbData_1.2.14    
##  [37] MatrixGenerics_1.20.0       snakecase_0.11.1           
##  [39] clue_0.3-66                 digest_0.6.37              
##  [41] colorspace_2.1-1            rematch2_2.1.2             
##  [43] S4Vectors_0.46.0            GenomicRanges_1.60.0       
##  [45] labeling_0.4.3              timechange_0.3.0           
##  [47] httr_1.4.7                  abind_1.4-8                
##  [49] mgcv_1.9-3                  compiler_4.5.0             
##  [51] proxy_0.4-27                withr_3.0.2                
##  [53] doParallel_1.0.17           backports_1.5.0            
##  [55] carData_3.0-5               R.utils_2.13.0             
##  [57] maps_3.4.3                  ggsignif_0.6.4             
##  [59] MASS_7.3-65                 DelayedArray_0.34.1        
##  [61] rjson_0.2.23                tools_4.5.0                
##  [63] zip_2.3.3                   rentrez_1.2.4              
##  [65] R.oo_1.27.1                 glue_1.8.0                 
##  [67] quadprog_1.5-8              nlme_3.1-168               
##  [69] reshape2_1.4.4              cluster_2.1.8.1            
##  [71] gtable_0.3.6                labelled_2.14.1            
##  [73] tzdb_0.5.0                  R.methodsS3_1.8.2          
##  [75] class_7.3-23                tidyr_1.3.1                
##  [77] hms_1.1.3                   xml2_1.4.0                 
##  [79] car_3.1-3                   XVector_0.48.0             
##  [81] markdown_2.0                foreach_1.5.2              
##  [83] pillar_1.11.0               limma_3.64.3               
##  [85] splines_4.5.0               lattice_0.22-7             
##  [87] tidyselect_1.2.1            knitr_1.50                 
##  [89] litedown_0.7                bookdown_0.44              
##  [91] IRanges_2.42.0              SummarizedExperiment_1.38.1
##  [93] stats4_4.5.0                xfun_0.53                  
##  [95] statmod_1.5.0               matrixStats_1.5.0          
##  [97] stringi_1.8.7               UCSC.utils_1.4.0           
##  [99] yaml_2.3.10                 evaluate_1.0.5             
## [101] codetools_0.2-20            tibble_3.3.0               
## [103] BiocManager_1.30.26         cli_3.6.5                  
## [105] jquerylib_0.1.4             dichromat_2.0-0.1          
## [107] Rcpp_1.1.0                  GenomeInfoDb_1.44.2        
## [109] mapproj_1.2.12              png_0.1-8                  
## [111] XML_3.99-0.19               fastcluster_1.3.0          
## [113] parallel_4.5.0              readr_2.1.5                
## [115] scales_1.4.0                e1071_1.7-16               
## [117] purrr_1.1.0                 crayon_1.5.3               
## [119] GetoptLong_1.0.5            rlang_1.1.6                
## [121] cards_0.7.0

Analyses related to fCpG methylation and aging, Gabbutt and Duran-Ferrer 2025

10/07/2025

1 Load packages and fCpGs information

2 Cell fractions in blood

3 B and T cell fCpG dynamics

4 External datasets

4.1 Purified cell types

4.1.1 GSE184269 - Roy 2021

4.1.2 GSE137594 - Clark 2020

4.2 Whole blood samples

4.2.1 Pediatric samples

4.2.1.1 GSE36054 - Alisch 2015

4.2.2 Adult and centenarian samples

4.2.2.1 GSE40279 - Hannum 2012

4.2.2.2 GSE55763 - Lehne 2015

4.2.2.3 GSE72773 - Horvath 2016

5 Extended Data Fig. 3

6 Extended Data Fig. 3 and Supple.Fig. 7

7 Session info