We use several datasets available, including whole blood and purified cell types to study fCpG methylation changes throughout human lifespan. Collectively, we show how fluctuations remain stable during human lifespan, and an increase in standard deviation in samples, consistent with know cloncal expansions in the hematopoietic system during aging.
##
## Load necessary packages and set global options
##
library(data.table)
library(janitor)
library(dplyr)
library(ggpubr)
library(pals)
library(GEOquery)
library(ComplexHeatmap)
library(circlize)
library(EpiDISH)
library(openxlsx)
library(ggstats)
library(gt)
library(gtsummary)
library(gtExtras)
library(stringr)
library(patchwork)
library(ggstats)
library(gtsummary)
library(broom)
library(broom.helpers)
options(stringsAsFactors = F,max.print = 10000,error=NULL,
"openxlsx.dateFormat" = "dd/mm/yyyy"
)
##
## 1. load fCpGs
##
fCpGs.betas <-
fread("../Data/FMC/Final_data/beta_all_samples_fcpgs_laplacian.csv",data.table = F)
rownames(fCpGs.betas) <- fCpGs.betas$V1
fCpGs.betas <- fCpGs.betas[,-1]
fCpGs.betas[1:5,1:15]
dim(fCpGs.betas)
sheets <- getSheetNames("../final_revision/Supplementary_Tables_R3.xlsx")
sheets <- setNames(sheets,sheets)
sheets
fCpGs.metadata <- lapply(sheets,function(i){
writeLines(i)
openxlsx::read.xlsx("../final_revision/Supplementary_Tables_R3.xlsx",sheet = i,detectDates = T,startRow = 3)
})
names(fCpGs.metadata)
fCpGs.metadata$supplementary_table_2 %>% head
## load normal lymphoid samples used for the supplement.
fCpGs.metadata.all <- openxlsx::read.xlsx("../Data/FMC.blood.samples.v2.4.xlsx",detectDates = T)
head(fCpGs.metadata.all);dim(fCpGs.metadata.all)
fCpGs.metadata.all %>% count(CELL_TYPE,CELL_TYPE_ANNOT.1,CELL_TYPE_ANNOT.2)
all(colnames(fCpGs.betas) %in% fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS)
table(fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS %in% colnames(fCpGs.betas) )
## remove bad quality samples and leave those with meth info
fCpGs.metadata.all <- fCpGs.metadata.all[match(colnames(fCpGs.betas),fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS),]
fCpGs.metadata.all %>% dim
We will use the package EpiDISH to find cell types described in Salas 2022
ht_opt$legend_grid_width <- unit(2,"mm")
ht_opt$legend_grid_height <-unit(2,"mm")
ht_opt$legend_labels_gp <- gpar(fontsize=5)
ht_opt$legend_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_names_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_names_gp <- gpar(fontsize=5)
ht_opt$HEATMAP_LEGEND_PADDING <- unit(1,"mm")
ht_opt$ANNOTATION_LEGEND_PADDING <- unit(1,"mm")
ht_opt$legend_gap <- unit(c(1,1),"mm")
ht_opt$fast_hclust <- T
##
## Calculate cell fractions in using epiDISH data, ie Salas 2022 nat comms data
##
data(cent12CT450k.m)
Heatmap(cent12CT450k.m,col=coolwarm(),show_row_names = F)
We plot the correlation of fCpG mean methylation and standard deviation (SD) of cell types included in our study design (figure supplementary 7).
As age is not avaiable for some samples, we use here the Horvath value.
#
## 1. Data from our submitted manuscipt
##
all(colnames(fCpGs.betas) %in% fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS)
table(fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS %in% colnames(fCpGs.betas))
identical(colnames(fCpGs.betas),fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS)
## mean and sd for fCpGs
fCpGs.metadata.all$SD.fCpGs <- apply(fCpGs.betas[,fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS],2,sd,na.rm=T)
fCpGs.metadata.all$mean.fCpGs <- apply(fCpGs.betas[,fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS],2,mean,na.rm=T)
fCpGs.metadata.all$submitted <-
ifelse(fCpGs.metadata.all$PARTICIPANT_ID_ANONYMOUS %in% fCpGs.metadata$supplementary_table_2$participant_id_anonymous,
T,F
)
fCpGs.metadata.all <-
fCpGs.metadata.all %>%
filter(submitted) %>%
mutate(CELL_TYPE_ANNOT.2=case_when(CELL_TYPE=="CD19_positive" ~ "Bcell (CD19+)",
CELL_TYPE=="Granulocyte" ~ "Granulocyte",
CELL_TYPE=="NK_cell" ~ "NK",
CELL_TYPE_ANNOT.2=="Tcell" ~ "Tcell(CD3+)",
CELL_TYPE=="Monocyte" ~ "Monocyte",
.default = CELL_TYPE_ANNOT.2),
Horvath=Clocks_Horvath
)
##plot
fCpGs.metadata.all %>% count(CELL_TYPE,CELL_TYPE_ANNOT.1,CELL_TYPE_ANNOT.2)
fCpGs.metadata.all %>%
filter(CELL_TYPE!="Macrophage_invitro_differentiated",
CELL_TYPE_ANNOT.2!="pre-Bcell",
CELL_TYPE_ANNOT.2!="bmPC",
CELL_TYPE_ANNOT.2!="tPC",
grepl("^Normal",CELL_TYPE_ANNOT.1)) %>%
select(SAMPLE_ID,CELL_TYPE,CELL_TYPE_ANNOT.1,CELL_TYPE_ANNOT.2,AGE_SAMPLING,Clocks_Horvath,submitted)
fCpGs.metadata.all.plot <-
fCpGs.metadata.all %>%
filter(CELL_TYPE!="Macrophage_invitro_differentiated",
CELL_TYPE_ANNOT.2!="pre-Bcell",
CELL_TYPE_ANNOT.2!="bmPC",
CELL_TYPE_ANNOT.2!="tPC",
grepl("^Normal",CELL_TYPE_ANNOT.1),
submitted
)
fCpGs.metadata.all.plot <-
fCpGs.metadata.all.plot %>%
mutate(CELL_TYPE_ANNOT.2=factor(CELL_TYPE_ANNOT.2,
levels=c("NBC","GCB","MBC","Tcell(CD3+)"),
labels=c("Naïve B cell","Germinal center B cell","Memory B cell","T cell")
)
)
fCpGs.metadata.all.plot %>% glimpse()
##plot
fCpGs.metadata.all.plot %>%
ggplot(aes(x = Horvath,
y = AGE_SAMPLING,
))+
geom_point()+
geom_smooth(method = "lm",color="grey0")+
stat_cor(size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "top",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
) |
fCpGs.metadata.all.plot %>%
ggplot(aes(x=Horvath))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_histogram()+
theme_classic()+
ggtitle(paste0("Normal lymphoid and myeloid cells, N=",nrow(fCpGs.metadata.all.plot)))+
guides(fill=guide_legend("Donor id",nrow=2))+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "top",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
##correlations
fCpGs.metadata.all.plot %>%
ggplot(aes(x=Horvath,
y=mean.fCpGs
))+
geom_point(
aes(fill=CELL_TYPE_ANNOT.2),
size=0.75,
pch=21,
stroke=0
)+
geom_smooth(aes(fill=CELL_TYPE_ANNOT.2),
method = "lm",color="black",lwd=0.5,alpha=0.3
)+
scale_fill_manual(values = cols25())+
facet_wrap(~CELL_TYPE_ANNOT.2)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
)
),
label.y.npc="top",
label.x.npc = "left",
method = "pearson",
size=1.25)+
# stat_cor(method = "pearson",size=1.5)+
guides(fill=guide_legend("Donor id",nrow=2))+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(2,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
) |
fCpGs.metadata.all.plot %>%
ggplot(aes(x=Horvath,
y=SD.fCpGs
))+
geom_point(
aes(fill=CELL_TYPE_ANNOT.2),
size=0.75,
pch=21,
stroke=0
)+
geom_smooth(aes(fill=CELL_TYPE_ANNOT.2),
method = "lm",color="black",lwd=0.5,alpha=0.3
)+
scale_fill_manual(values = cols25())+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
)
),
label.y.npc="top",
label.x.npc = "left",
method = "pearson",
size=1.25)+
facet_wrap(~CELL_TYPE_ANNOT.2)+
# stat_cor(method = "pearson",size=1.5)+
guides(fill=guide_legend("Donor id",nrow=2))+
labs(caption = "* Benjamini & Hochberg corrected P-values")+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(2,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
We next plot a heatmap with all fCpGs, and see a whitish color heatmap, indicative of those samples being polyclonal.
##
## HEATMAP fCpGs
##
# plot heatmap sorted by age and with colors
evoflux.meth <- fread("../Revision/Data/meth_palette_evoflux.tsv")
evoflux.meth
ht_opt$legend_grid_width <- unit(2,"mm")
ht_opt$legend_grid_height <-unit(2,"mm")
ht_opt$legend_labels_gp <- gpar(fontsize=5)
ht_opt$legend_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_title_gp <- gpar(fontsize=5)
ht_opt$heatmap_row_names_gp <- gpar(fontsize=5)
ht_opt$heatmap_column_names_gp <- gpar(fontsize=5)
ht_opt$HEATMAP_LEGEND_PADDING <- unit(1,"mm")
ht_opt$ANNOTATION_LEGEND_PADDING <- unit(1,"mm")
ht_opt$legend_gap <- unit(c(1,1),"mm")
ht_opt$fast_hclust <- T
h.fCpGs <- Heatmap(fCpGs.betas[,fCpGs.metadata.all.plot$PARTICIPANT_ID_ANONYMOUS],
col=evoflux.meth$color,
cluster_columns = T,
cluster_rows = T,
show_row_names = F,
show_column_names = F,
show_row_dend = T,
show_column_dend = T,
name="DNA methylation",
border = T,
row_dend_gp = gpar(lwd=0.1),
column_dend_gp = gpar(lwd=0.1),
top_annotation = HeatmapAnnotation(
df=fCpGs.metadata.all.plot %>%
select(age_sampling=AGE_SAMPLING,Horvath,
annot_1=CELL_TYPE_ANNOT.1,annot_2=CELL_TYPE_ANNOT.2),
col=list(
age_sampling=colorRamp2(breaks = seq(min(fCpGs.metadata.all.plot$AGE_SAMPLING,na.rm = T),
max(fCpGs.metadata.all.plot$AGE_SAMPLING,na.rm = T),
length.out=100),brewer.blues(100)
),
Horvath=colorRamp2(breaks = seq(min(fCpGs.metadata.all.plot$Horvath,na.rm = T),
max(fCpGs.metadata.all.plot$Horvath,na.rm = T),
length.out=100),brewer.blues(100)
),
annot_1=setNames(kelly(fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.1 %>%
unique() %>%
length()
),
fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.1 %>%
unique()
),
annot_2=setNames(kelly(fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.2 %>%
unique() %>%
length()
),
fCpGs.metadata.all.plot$CELL_TYPE_ANNOT.2 %>%
unique()
)
),
show_annotation_name = T,
na_col = "grey45",
annotation_name_side = "left",
show_legend = T,
annotation_name_gp = gpar(fontsize=4),
annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
simple_anno_size = unit(2,"mm")
)
)
draw(h.fCpGs,
merge_legend=T,
heatmap_legend_side = "right",
annotation_legend_side = "right"
)
We next perform analyses separately in each dataset.
We next download sorted immune cell types in adults, available in GSE184269. We see a similar scenario as those samples included in our study.
##
## Download # GSE184269, sorted immune cells in aging
##
gse <- getGEO(GEO = "GSE184269",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE184269_series_matrix.txt.gz
show(gse)
## phenodata
sorted.age.metadata <-
pData(phenoData(gse)) %>%
clean_names()
sorted.age.metadata %>% head
sorted.age.metadata %>% count(source_name_ch1)
sorted.age.metadata$colnames.betas <-
gsub("ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM5583nnn/GSM\\d+/suppl/GSM\\d+_|_Grn.idat.gz",
"",
sorted.age.metadata$supplementary_file)
sorted.age.metadata$colnames.betas
sorted.age.metadata$age <- as.numeric(sorted.age.metadata$age_ch1)
## get meth values
sorted.age.betas <- fread("../Revision/Data/450k-Aging/GSE184269_Matrix_processed_FINAL.txt.gz",data.table = F)
head(sorted.age.betas);dim(sorted.age.betas)
rownames(sorted.age.betas) <- sorted.age.betas$ID_RED
sorted.age.betas <- sorted.age.betas[,grep("Detection_Pval|ID_RED",colnames(sorted.age.betas),value = T,invert = T)]
head(sorted.age.betas);dim(sorted.age.betas)
all(colnames(sorted.age.betas) %in% sorted.age.metadata$colnames.betas)
sorted.age.betas <- sorted.age.betas[,sorted.age.metadata$colnames.betas]
colnames(sorted.age.betas) <- sorted.age.metadata$geo_accession
##fcpgs beta values
table(rownames(fCpGs.betas) %in% rownames(sorted.age.betas))
fCpGs.sorted.age <- sorted.age.betas[rownames(fCpGs.betas),]
identical(colnames(fCpGs.sorted.age),sorted.age.metadata$geo_accession)
##complete with fCPGs stats
sorted.age.metadata$SD.fCpGs <- apply(fCpGs.sorted.age,2,sd,na.rm=T)
sorted.age.metadata$mean.fCpGs <- apply(fCpGs.sorted.age,2,mean,na.rm=T)
##calculate cell fractions
sorted.age.metadata<-
cbind(sorted.age.metadata,
epidish(beta.m = sorted.age.betas,ref.m = cent12CT450k.m)$estF
)
head(sorted.age.metadata)
Age distribution:
sorted.age.metadata %>% head
sorted.age.metadata %>% dim
sorted.age.metadata %>% count(source_name_ch1)
sorted.age.metadata %>% count(source_name_ch1,title)
sorted.age.metadata <-
sorted.age.metadata %>%
mutate(source=case_when(source_name_ch1=="Peripheral blood" ~ "PBMCs",.default = source_name_ch1))
sorted.age.metadata %>% count(donor_id_ch1,source_name_ch1)
sorted.age.metadata %>% count(donor_id_ch1)
sorted.age.metadata %>%
ggplot(aes(x=age))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_histogram()+
theme_classic()+
ggtitle(paste0("Samples from Roy et al, Immunity 2021, N=",nrow(sorted.age.metadata)))+
guides(fill=guide_legend("Donor id",nrow=2))+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "top",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
We see in the heatmap a similar scenario, but there seems to be donor-specific batches.
sorted.age.metadata$suspected.batch <- str_extract(sorted.age.metadata$description,"Gestalt_Expt_\\d")
### heatmap with sorted ageed samples
h.fCpGs <- Heatmap(fCpGs.sorted.age,
col=evoflux.meth$color,
cluster_columns = T,
cluster_rows = T,
show_row_names = F,
show_column_names = F,
show_row_dend = T,
show_column_dend = T,
name="DNA methylation",
border = T,
row_dend_gp = gpar(lwd=0.1),
column_dend_gp = gpar(lwd=0.1),
top_annotation = HeatmapAnnotation(
df=sorted.age.metadata %>%
select(suspected.batch,
donor_id=donor_id_ch1,source,Sex=characteristics_ch1_2,age),
col=list(
suspected.batch=setNames(tableau20(sorted.age.metadata$suspected.batch %>% unique() %>% length()),
sorted.age.metadata$suspected.batch %>% unique()
),
donor_id=setNames(alphabet2(sorted.age.metadata %>%
pull(donor_id_ch1) %>%
unique() %>%
length()
),
sorted.age.metadata %>%
pull(donor_id_ch1) %>%
unique()
),
source=setNames(kelly(sorted.age.metadata %>%
pull(source) %>%
unique() %>%
length()
),
sorted.age.metadata %>%
pull(source) %>%
unique()
),
Sex=setNames(c("deepskyblue","pink2"),c("Sex: M","Sex: F")),
age=colorRamp2(breaks = seq(min(sorted.age.metadata$age),
max(sorted.age.metadata$age),
length.out=100),
brewer.blues(100))
),
show_annotation_name = T,
na_col = "grey45",
annotation_name_side = "left",
show_legend = T,
annotation_name_gp = gpar(fontsize=4),
annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
simple_anno_size = unit(2,"mm")
)
)
draw(h.fCpGs,
merge_legend=T,
heatmap_legend_side = "right",
annotation_legend_side = "right"
)
We follow with other cohort with purified cells, including B and T cells from patients with different inflammatory conditions. GSE137594
gse <- getGEO(GEO = "GSE137594",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE137594_series_matrix.txt.gz
show(gse)
## phenodata
Clark2020.metadata <- pData(phenoData(gse)) %>% clean_names()
Clark2020.metadata %>% head
Clark2020.metadata$age <- as.numeric(Clark2020.metadata$age_ch1)
qplot(Clark2020.metadata$age)
rownames(Clark2020.metadata)
## get meth values
Clark2020.betas <- fread("../Revision/Data/450k-Aging/GSE137594_norm_avg_beta_pval.txt.gz",data.table = F)
head(Clark2020.betas);dim(Clark2020.betas);gc()
rownames(Clark2020.betas) <- Clark2020.betas$ID_REF
Clark2020.betas <- Clark2020.betas[,grep("Detection|ID_REF",colnames(Clark2020.betas),value = T,invert = T)]
identical(colnames(Clark2020.betas) , (Clark2020.metadata$title %>% gsub(": Newcastle Early Arthritis Clinic patient \\d+","",.)))
Clark2020.betas <- Clark2020.betas[,gsub(": Newcastle Early Arthritis Clinic patient \\d+","",Clark2020.metadata$title)]
colnames(Clark2020.betas) <- Clark2020.metadata$geo_accession
head(Clark2020.betas);dim(Clark2020.betas)
##fcpgs
table(rownames(fCpGs.betas) %in% rownames(Clark2020.betas))
fCpGs.Clark2020 <- Clark2020.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(Clark2020.betas)],]
head(fCpGs.Clark2020);dim(fCpGs.Clark2020)
identical(colnames(fCpGs.Clark2020),Clark2020.metadata$geo_accession)
##complete with fCPGs stats
Clark2020.metadata$SD.fCpGs <- apply(fCpGs.Clark2020,2,sd,na.rm=T)
Clark2020.metadata$mean.fCpGs <- apply(fCpGs.Clark2020,2,mean,na.rm=T)
head(Clark2020.metadata);dim(Clark2020.metadata)
##cell franctions
Clark2020.metadata<-
cbind(Clark2020.metadata,
epidish(beta.m = Clark2020.betas,ref.m = cent12CT450k.m)$estF
)
head(Clark2020.metadata)
Age distribution
Clark2020.metadata %>% head
Clark2020.metadata %>% dim
Clark2020.metadata %>% count(source_name_ch1)
Clark2020.metadata %>% count(characteristics_ch1)
Clark2020.metadata %>% count(characteristics_ch1_2)
Clark2020.metadata %>% count(first_diagnosis_ch1,working_diagnosis_ch1)
Clark2020.metadata %>%
ggplot(aes(x=age))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_histogram()+
theme_classic()+
ggtitle(paste0("Samples from Clark et al 2020, N=",nrow(Clark2020.metadata)))+
guides(fill=guide_legend("Donor id",nrow=2))+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "top",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
There is a clear increase in SD during aging, while mean remain stable
Clark2020.metadata %>%
ggplot(aes(x=age,
y=mean.fCpGs
))+
geom_point(aes(fill=characteristics_ch1_2),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~characteristics_ch1_2)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
) |
Clark2020.metadata %>%
ggplot(aes(x=age,
y=SD.fCpGs
))+
geom_point(aes(fill=characteristics_ch1_2),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~characteristics_ch1_2)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
labs(caption = "* Benjamini & Hochberg corrected P-values")+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "none",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
We next plot a heatmap and see an overall polyclonal population of cells regardless of inflamatory condition.
Clark2020.metadata %>% head
Clark2020.metadata$description
##
## HEATMAP with sorted ageed samples
##
h.fCpGs <- Heatmap(fCpGs.Clark2020,
col=evoflux.meth$color,
cluster_columns = T,
cluster_rows = T,
show_row_names = F,
show_column_names = F,
show_row_dend = T,
show_column_dend = T,
name="DNA methylation",
border = T,
row_dend_gp = gpar(lwd=0.1),
column_dend_gp = gpar(lwd=0.1),
top_annotation = HeatmapAnnotation(
df=Clark2020.metadata %>%
select(Source=source_name_ch1,Working_diagnosis=working_diagnosis_ch1,age,Sex=gender_ch1),
col=list(
Source=setNames(kelly(Clark2020.metadata$source_name_ch1 %>% unique() %>% length()),
Clark2020.metadata$source_name_ch1 %>% unique()
),
Working_diagnosis=setNames(tableau20(Clark2020.metadata$working_diagnosis_ch1 %>% unique() %>% length()),
Clark2020.metadata$working_diagnosis_ch1 %>% unique()
),
Sex=setNames(c("deepskyblue","pink2"),c("Male","Female")),
age=colorRamp2(breaks = seq(min(Clark2020.metadata$age),
max(Clark2020.metadata$age),
length.out=100),
brewer.blues(100))
),
show_annotation_name = T,
na_col = "grey45",
annotation_name_side = "left",
show_legend = T,
annotation_name_gp = gpar(fontsize=4),
annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
simple_anno_size = unit(2,"mm")
)
)
draw(h.fCpGs,
merge_legend=T,
heatmap_legend_side = "right",
annotation_legend_side = "right"
)
We next follow the analyses with whole blood samples, accounting for know changes in cell type compositions during aging.
We next load data from pediatric donors available at GSE36054
## 2. padiatric with ethnicity
gse <- getGEO(GEO = "GSE36054",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE36054_series_matrix.txt.gz
show(gse)
## phenodata
pediatrics.metadata <-
pData(phenoData(gse)) %>%
clean_names()
pediatrics.metadata %>% count(ethnicity_ch1)
pediatrics.metadata %>% count(source_name_ch1)
pediatrics.metadata %>% count(characteristics_ch1,ethnicity_ch1)
## get meth values
table(rownames(fCpGs.betas) %in% rownames(exprs(gse)))
pediatrics.betas <- exprs(gse)
fCpGs.pediatrics <- pediatrics.betas[rownames(fCpGs.betas),] %>% as.data.frame()
identical(colnames(fCpGs.pediatrics),pediatrics.metadata$geo_accession)
##complete with fCPGs stats
pediatrics.metadata$SD.fCpGs <- apply(fCpGs.pediatrics,2,sd,na.rm=T)
pediatrics.metadata$mean.fCpGs <- apply(fCpGs.pediatrics,2,mean,na.rm=T)
class(pediatrics.metadata)
## complete cell types
pediatrics.metadata<-
cbind(pediatrics.metadata,
epidish(beta.m = pediatrics.betas,ref.m = cent12CT450k.m)$estF
)
head(pediatrics.metadata)
Age distribution
##
## PEDIATRIC POPULATION use other data sets to further strength previous results, and to show the same in pediatric population
##
# GSE36054 dataset
pediatrics.metadata %>% head
pediatrics.metadata %>% dim
pediatrics.metadata <-
pediatrics.metadata %>%
mutate(age=as.numeric(age_at_collection_months_ch1)/12)
pediatrics.metadata %>% count(characteristics_ch1_7)
pediatrics.metadata %>% count(cell_type_ch1)
pediatrics.metadata %>% count(characteristics_ch1,ethnicity_ch1)
pediatrics.metadata %>% filter(is.na(age)) %>% count(characteristics_ch1,ethnicity_ch1)
head(pediatrics.betas);dim(pediatrics.betas)
pediatrics.metadata.plot <-
pediatrics.metadata %>%
filter(!is.na(age),characteristics_ch1!="group: Harvard.Sib.Failed")
##plot age distribution in pediatrics
pediatrics.metadata.plot %>% head
pediatrics.metadata.plot %>%
ggplot(aes(x=age))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_histogram()+
theme_classic()+
ggtitle(paste0("Samples from Alisch et al, Genome Research 2015, N=",nrow(pediatrics.metadata.plot)))+
guides(fill=guide_legend("Donor id",nrow=2))+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "top",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
Contrary to adult donors, in these samples there is no association of SD with the age of the donors
pediatrics.metadata.plot %>%
ggplot(aes(x=age,
y=mean.fCpGs
))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_point(aes(fill=cell_type_ch1),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~cell_type_ch1)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
) |
pediatrics.metadata.plot %>%
ggplot(aes(x=age,
y=SD.fCpGs
))+
geom_point(aes(fill=cell_type_ch1),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~cell_type_ch1)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
Also when considering the cellular composition of blood (a subtle signal already appears though in SD!)
## adjusting for cellular fractions
pediatrics.metadata.plot %>%
reshape2::melt(measure.vars = c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
id.vars = "age") %>%
ggplot(aes(x=age,
y=value,
fill=variable
))+
geom_point(pch=21,size=1,stroke=0.1)+
geom_smooth(method = "lm",color="black")+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
# label.x.npc = 0.2,label.y.npc = 1,
method = "pearson",
size=1.25)+
scale_fill_manual(values = cols25())+
ylab("Cell fraction")+
facet_wrap(.~variable,scales = "free")+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
# legend.spacing.y = unit(3,"pt"),
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
##liner model corrected by cell fracts and ethnicity
# pediatrics.metadata %>% head
# pediatrics.metadata %>% count(ethnicity_ch1)
lm.fit <-
pediatrics.metadata %>%
mutate(ethnicity=ethnicity_ch1) %>%
filter(ethnicity!="Unknown") %>%
lm(SD.fCpGs ~
age+
ethnicity+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
,data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 129 | -6e-04 | -1e-03, -8e-05 | 2e-02 | 1e-01 |
ethnicity |
|
|
|
|
|
    Asian | 3 | — | — |
|
|
    Black | 74 | -2e-03 | -1e-02, 8e-03 | 7e-01 | 7e-01 |
    Other | 44 | -6e-03 | -2e-02, 3e-03 | 2e-01 | 4e-01 |
    White | 8 | -7e-03 | -2e-02, 3e-03 | 2e-01 | 4e-01 |
Mono | 129 | 3e-02 | -1e-01, 2e-01 | 7e-01 | 7e-01 |
Neu | 129 | 8e-02 | -4e-02, 2e-01 | 2e-01 | 4e-01 |
Baso | 129 | 4e-01 | 1e-01, 7e-01 | 6e-03 | 5e-02 |
Eos | 129 | 1e-01 | -1e-02, 2e-01 | 8e-02 | 3e-01 |
NK | 129 | 3e-02 | -1e-01, 2e-01 | 7e-01 | 7e-01 |
CD8Tnv | 129 | 5e-02 | -9e-02, 2e-01 | 5e-01 | 6e-01 |
CD8Tmem | 129 | 6e-02 | -8e-02, 2e-01 | 4e-01 | 6e-01 |
CD4Tnv | 129 | 1e-01 | -1e-02, 2e-01 | 9e-02 | 3e-01 |
CD4Tmem | 129 | 3e-01 | 1e-01, 4e-01 | 2e-04 | 3e-03 |
Treg | 129 | 1e-01 | -9e-02, 3e-01 | 3e-01 | 4e-01 |
Bnv | 129 | 5e-02 | -8e-02, 2e-01 | 5e-01 | 6e-01 |
Bmem | 129 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.525; Adjusted R² = 0.462; Sigma = 0.008; Statistic = 8.32; p-value = <0.001; df = 15; Log-likelihood = 454; AIC = -874; BIC = -825; Deviance = 0.007; Residual df = 113; No. Obs. = 129 | |||||
1 Benjamini & Hochberg correction for multiple testing |
##mean fcpg
lm.fit <-
pediatrics.metadata %>%
mutate(ethnicity=ethnicity_ch1) %>%
filter(ethnicity!="Unknown") %>%
lm(mean.fCpGs ~
age+
ethnicity+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
,data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 129 | 1e-03 | -3e-04, 2e-03 | 1e-01 | 5e-01 |
ethnicity |
|
|
|
|
|
    Asian | 3 | — | — |
|
|
    Black | 74 | -5e-03 | -3e-02, 2e-02 | 7e-01 | 7e-01 |
    Other | 44 | 5e-04 | -2e-02, 3e-02 | 1e+00 | 1e+00 |
    White | 8 | -1e-02 | -4e-02, 2e-02 | 4e-01 | 5e-01 |
Mono | 129 | -2e-01 | -6e-01, 2e-01 | 3e-01 | 5e-01 |
Neu | 129 | -2e-01 | -5e-01, 1e-01 | 3e-01 | 5e-01 |
Baso | 129 | -7e-01 | -2e+00, 1e-01 | 8e-02 | 5e-01 |
Eos | 129 | -2e-01 | -6e-01, 1e-01 | 2e-01 | 5e-01 |
NK | 129 | -1e-01 | -5e-01, 3e-01 | 5e-01 | 6e-01 |
CD8Tnv | 129 | -2e-01 | -5e-01, 2e-01 | 4e-01 | 5e-01 |
CD8Tmem | 129 | -2e-01 | -6e-01, 2e-01 | 4e-01 | 5e-01 |
CD4Tnv | 129 | -2e-01 | -5e-01, 2e-01 | 3e-01 | 5e-01 |
CD4Tmem | 129 | -2e-01 | -5e-01, 2e-01 | 3e-01 | 5e-01 |
Treg | 129 | -5e-01 | -1e+00, 6e-02 | 8e-02 | 5e-01 |
Bnv | 129 | -1e-01 | -4e-01, 2e-01 | 5e-01 | 6e-01 |
Bmem | 129 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.128; Adjusted R² = 0.012; Sigma = 0.020; Statistic = 1.10; p-value = 0.4; df = 15; Log-likelihood = 327; AIC = -620; BIC = -572; Deviance = 0.047; Residual df = 113; No. Obs. = 129 | |||||
1 Benjamini & Hochberg correction for multiple testing |
In the heamtap we can also obseerve there is no association with the ethnicity, nor sex.
##
### HEATMAP FCPGS
##
pediatrics.metadata.plot %>% head()
h.fCpGs <- Heatmap(fCpGs.pediatrics %>%
select( pediatrics.metadata.plot %>% pull(geo_accession)),
col=evoflux.meth$color,
cluster_columns = T,
cluster_rows = T,
show_row_names = F,
show_column_names = F,
show_row_dend = T,
show_column_dend = T,
name="DNA methylation",
border = T,
row_dend_gp = gpar(lwd=0.1),
column_dend_gp = gpar(lwd=0.1),
top_annotation = HeatmapAnnotation(
df=pediatrics.metadata.plot %>%
select(ethnicity=ethnicity_ch1,age,Sex=gender_ch1),
col=list(
ethnicity=setNames(tableau20(pediatrics.metadata.plot %>%
pull(ethnicity_ch1) %>%
unique() %>% na.omit() %>%
length()
),
pediatrics.metadata.plot %>%
pull(ethnicity_ch1) %>%na.omit() %>%
unique()
),
Sex=setNames(c("deepskyblue","pink2"),c("M","F")),
age=colorRamp2(breaks = seq(min(pediatrics.metadata.plot$age),
max(pediatrics.metadata.plot$age),
length.out=100),
brewer.blues(100))
# Neu=colorRamp2(breaks = seq_range(range(pediatrics.metadata.plot$Neu),length.out = 100),brewer.orrd(100))
),
show_annotation_name = T,
na_col = "grey45",
annotation_name_side = "left",
show_legend = T,
annotation_name_gp = gpar(fontsize=4),
annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
simple_anno_size = unit(2,"mm")
)
)
h.fCpGs <- draw(h.fCpGs,
merge_legend=T,
heatmap_legend_side = "right",
annotation_legend_side = "right"
)
We load Hannum dataset comprising 20-100 year old donors, available thorough GSE40279
##
## GSE40279
##
##download Hannum from GEO GSE40279
gse <- getGEO(GEO = "GSE40279",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE40279_series_matrix.txt.gz
show(gse)
## phenodata
hannum.metadata <-
pData(phenoData(gse)) %>%
clean_names()
hannum.metadata %>% head
hannum.metadata %>% select(contains("age")) %>% head
hannum.metadata <-
hannum.metadata %>%
mutate(age=as.numeric(age_y_ch1))
## get meth values
table(rownames(fCpGs.betas) %in% rownames(exprs(gse)))
hannum.betas <- exprs(gse)
head(hannum.betas)
colnames(hannum.betas)
identical(colnames(hannum.betas),hannum.metadata$geo_accession)
# calculate SD and mean for all samples for fCpGs
table(rownames(fCpGs.betas) %in% rownames(hannum.betas))
fCpGs.hannum <- hannum.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(hannum.betas)],]
identical(colnames(hannum.betas),hannum.metadata$geo_accession)
##complete with fCPGs stats
hannum.metadata$SD.fCpGs <- apply(fCpGs.hannum,2,sd,na.rm=T)
hannum.metadata$mean.fCpGs <- apply(fCpGs.hannum,2,mean,na.rm=T)
## complete cellular proportions
hannum.metadata<-
cbind(hannum.metadata,
epidish(beta.m = hannum.betas,ref.m = cent12CT450k.m)$estF
)
head(hannum.metadata)
Age distribution
hannum.metadata %>% head
hannum.metadata %>% dim
hannum.metadata %>% count(characteristics_ch1_4)
head(hannum.betas);dim(hannum.betas)
##plot age distribution in hannum
hannum.metadata %>% head
hannum.metadata %>%
ggplot(aes(x=age))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_histogram()+
theme_classic()+
ggtitle(paste0("Samples from Hannum et al, Mol Cell 2013, N=",nrow(hannum.metadata)))+
guides(fill=guide_legend("Donor id",nrow=2))+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "top",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
mean and SD fCPG. We can see there is a clear correlation between SD methyaltion of fCpGs and age!
hannum.metadata %>%
ggplot(aes(x=age,
y=mean.fCpGs
))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_point(aes(fill=characteristics_ch1_5),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~characteristics_ch1_5)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
) |
hannum.metadata %>%
ggplot(aes(x=age,
y=SD.fCpGs
))+
geom_point(aes(fill=characteristics_ch1_5),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~characteristics_ch1_5)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "none",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
The subtle anticorrelation with mean disappear when adjusting for cell types, while SD remains highly significant.
## adjusting for cellular fractions
hannum.metadata %>%
reshape2::melt(measure.vars = c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
id.vars = "age") %>%
ggplot(aes(x=age,
y=value
))+
geom_point(aes(fill=variable),
pch=21,size=1,stroke=0.1
)+
geom_smooth(method = "lm",color="black")+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
# label.x.npc = 0.2,label.y.npc = 1,
method = "pearson",
size=1.25)+
scale_fill_manual(values = cols25())+
ylab("Cell fraction")+
facet_wrap(.~variable,scales = "free")+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
# legend.spacing.y = unit(3,"pt"),
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
##liner model corrected by cell fracts and ethnicity
# hannum.metadata %>% head
# hannum.metadata %>% count(ethnicity_ch1)
lm.fit <-
hannum.metadata %>%
mutate(ethnicity=ethnicity_ch1) %>%
lm(SD.fCpGs ~
age+
ethnicity+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
,data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 656 | 4e-04 | 3e-04, 5e-04 | 4e-14 | 6e-13 |
ethnicity |
|
|
|
|
|
    Caucasian - European | 426 | — | — |
|
|
    Hispanic - Mexican | 230 | -2e-04 | -3e-03, 3e-03 | 9e-01 | 9e-01 |
Mono | 656 | 5e-02 | -3e-02, 1e-01 | 2e-01 | 3e-01 |
Neu | 656 | 8e-02 | 1e-02, 1e-01 | 2e-02 | 6e-02 |
Baso | 656 | 3e-01 | 1e-01, 5e-01 | 4e-04 | 2e-03 |
Eos | 656 | 7e-02 | -2e-02, 2e-01 | 1e-01 | 2e-01 |
NK | 656 | 2e-02 | -7e-02, 1e-01 | 7e-01 | 7e-01 |
CD8Tnv | 656 | 2e-01 | 4e-02, 3e-01 | 8e-03 | 3e-02 |
CD8Tmem | 656 | 3e-02 | -5e-02, 1e-01 | 4e-01 | 6e-01 |
CD4Tnv | 656 | 7e-02 | -4e-03, 2e-01 | 6e-02 | 1e-01 |
CD4Tmem | 656 | 3e-02 | -5e-02, 1e-01 | 5e-01 | 6e-01 |
Treg | 656 | 3e-01 | 1e-01, 4e-01 | 6e-04 | 3e-03 |
Bnv | 656 | 7e-02 | -2e-02, 2e-01 | 1e-01 | 2e-01 |
Bmem | 656 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.228; Adjusted R² = 0.212; Sigma = 0.014; Statistic = 14.6; p-value = <0.001; df = 13; Log-likelihood = 1,889; AIC = -3,747; BIC = -3,680; Deviance = 0.121; Residual df = 642; No. Obs. = 656 | |||||
1 Benjamini & Hochberg correction for multiple testing |
##mean fcpg
lm.fit <-
hannum.metadata %>%
mutate(ethnicity=ethnicity_ch1) %>%
filter(ethnicity!="Unknown") %>%
lm(mean.fCpGs ~
age+
ethnicity+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
,data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(hannum.metadata$age,hannum.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 656 | 7e-06 | -1e-04, 1e-04 | 9e-01 | 9e-01 |
ethnicity |
|
|
|
|
|
    Caucasian - European | 426 | — | — |
|
|
    Hispanic - Mexican | 230 | 2e-03 | -2e-03, 5e-03 | 3e-01 | 5e-01 |
Mono | 656 | -5e-02 | -2e-01, 4e-02 | 3e-01 | 5e-01 |
Neu | 656 | -2e-02 | -1e-01, 6e-02 | 6e-01 | 7e-01 |
Baso | 656 | 9e-02 | -1e-01, 3e-01 | 4e-01 | 6e-01 |
Eos | 656 | -7e-02 | -2e-01, 4e-02 | 2e-01 | 5e-01 |
NK | 656 | -2e-02 | -1e-01, 9e-02 | 7e-01 | 7e-01 |
CD8Tnv | 656 | 1e-01 | -2e-02, 3e-01 | 1e-01 | 4e-01 |
CD8Tmem | 656 | 2e-01 | 8e-02, 3e-01 | 2e-04 | 3e-03 |
CD4Tnv | 656 | 1e-01 | 2e-03, 2e-01 | 4e-02 | 3e-01 |
CD4Tmem | 656 | 7e-02 | -3e-02, 2e-01 | 2e-01 | 5e-01 |
Treg | 656 | -4e-02 | -2e-01, 2e-01 | 7e-01 | 7e-01 |
Bnv | 656 | 5e-02 | -5e-02, 2e-01 | 3e-01 | 5e-01 |
Bmem | 656 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.313; Adjusted R² = 0.299; Sigma = 0.017; Statistic = 22.5; p-value = <0.001; df = 13; Log-likelihood = 1,747; AIC = -3,464; BIC = -3,396; Deviance = 0.187; Residual df = 642; No. Obs. = 656 | |||||
1 Benjamini & Hochberg correction for multiple testing |
Heatmap
##
### HEATMAP FCPGS
##
hannum.metadata %>% head()
h.fCpGs <- Heatmap(fCpGs.hannum,
col=evoflux.meth$color,
cluster_columns = T,
cluster_rows = T,
show_row_names = F,
show_column_names = F,
show_row_dend = T,
show_column_dend = T,
name="DNA methylation",
border = T,
row_dend_gp = gpar(lwd=0.1),
column_dend_gp = gpar(lwd=0.1),
top_annotation = HeatmapAnnotation(
df=hannum.metadata %>%
select(ethnicity=ethnicity_ch1,age,Sex=gender_ch1),
col=list(
ethnicity=setNames(kelly(hannum.metadata %>%
pull(ethnicity_ch1) %>%
unique() %>% na.omit() %>%
length()
),
hannum.metadata %>%
pull(ethnicity_ch1) %>%na.omit() %>%
unique()
),
Sex=setNames(c("deepskyblue","pink2"),c("M","F")),
age=colorRamp2(breaks = seq(min(hannum.metadata$age),
max(hannum.metadata$age),
length.out=100),
brewer.blues(100))
),
show_annotation_name = T,
na_col = "grey45",
annotation_name_side = "left",
show_legend = T,
annotation_name_gp = gpar(fontsize=4),
annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
simple_anno_size = unit(2,"mm")
)
)
draw(h.fCpGs,
merge_legend=T,
heatmap_legend_side = "right",
annotation_legend_side = "right"
)
Large dataset from adults and elderly people, available in GSE55763. We download processed data matrix and load it separately.
##
# 9. Lehen cohort cohorts genome biology 2015 ≥ 2500 samples, age assoc.
##
gse <- getGEO(GEO = "GSE55763",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE55763_series_matrix.txt.gz
show(gse)
## phenodata
lehne2015.metadata <- pData(phenoData(gse)) %>% clean_names()
lehne2015.metadata %>% head
lehne2015.metadata$age <- as.numeric(lehne2015.metadata$age_ch1)
rownames(lehne2015.metadata)
## get meth values
lehne2015.betas <- fread("../Revision/Data/450k-Aging/GSE55763_normalized_betas.txt.gz",data.table = F)
head(lehne2015.betas);dim(lehne2015.betas);gc()
rownames(lehne2015.betas) <- lehne2015.betas$ID_REF
lehne2015.betas <- lehne2015.betas[,grep("Detection|ID_REF",colnames(lehne2015.betas),value = T,invert = T)]
all(gsub("Peripheral blood, ","",lehne2015.metadata$title) %in% colnames(lehne2015.betas))
all(colnames(lehne2015.betas) %in% gsub("Peripheral blood, ","",lehne2015.metadata$title))
lehne2015.betas <- lehne2015.betas[,gsub("Peripheral blood, ","",lehne2015.metadata$title)]
colnames(lehne2015.betas) <- lehne2015.metadata$geo_accession
head(lehne2015.betas);dim(lehne2015.betas)
##fcpgs
table(rownames(fCpGs.betas) %in% rownames(lehne2015.betas))
fCpGs.lehne2015 <- lehne2015.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(lehne2015.betas)],]
head(fCpGs.lehne2015);dim(fCpGs.lehne2015)
identical(colnames(fCpGs.lehne2015),lehne2015.metadata$geo_accession)
gc()
##complete with fCPGs stats
lehne2015.metadata$SD.fCpGs <- apply(fCpGs.lehne2015,2,sd,na.rm=T)
lehne2015.metadata$mean.fCpGs <- apply(fCpGs.lehne2015,2,mean,na.rm=T)
head(lehne2015.metadata);dim(lehne2015.metadata)
##cell fractions
lehne2015.metadata<-
cbind(lehne2015.metadata,
epidish(beta.m = lehne2015.betas,ref.m = cent12CT450k.m)$estF
)
head(lehne2015.metadata)
Age distribution
lehne2015.metadata %>% head
lehne2015.metadata %>% dim
lehne2015.metadata %>% count(characteristics_ch1)
lehne2015.metadata %>% count(characteristics_ch1_1)
lehne2015.metadata %>% count(description)
head(lehne2015.betas);dim(lehne2015.betas)
## remove techinical replicates
lehne2015.metadata.plot <-
lehne2015.metadata %>%
filter(characteristics_ch1_1=="dataset: population study")
##plot age distribution in lehne2015
lehne2015.metadata.plot %>% head
lehne2015.metadata.plot %>% dim
lehne2015.metadata.plot %>%
ggplot(aes(x=age))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_histogram()+
theme_classic()+
ggtitle(paste0("Samples from lehne 2015 et al, Genome Biology , N=",nrow(lehne2015.metadata.plot)))+
guides(fill=guide_legend("Donor id",nrow=2))+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "top",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
Mean and SD fCpG methyaltion
lehne2015.metadata.plot %>%
ggplot(aes(x=age,
y=mean.fCpGs
))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_point(aes(fill=characteristics_ch1),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~characteristics_ch1)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
) |
lehne2015.metadata.plot %>%
ggplot(aes(x=age,
y=SD.fCpGs
))+
geom_point(aes(fill=characteristics_ch1),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~characteristics_ch1)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "none",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
Adjusting cell types
## adjusting for cellular fractions
lehne2015.metadata.plot %>%
reshape2::melt(measure.vars = c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
id.vars = "age") %>%
ggplot(aes(x=age,
y=value
))+
geom_point(aes(fill=variable),
pch=21,size=1,stroke=0.1
)+
geom_smooth(method = "lm",color="black")+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
# label.x.npc = 0.2,label.y.npc = 1,
method = "pearson",
size=1.25)+
scale_fill_manual(values = cols25())+
ylab("Cell fraction")+
facet_wrap(.~variable,scales = "free")+
theme_classic()+
labs(caption = "* Benjamini & Hochberg corrected P-values")+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
# legend.spacing.y = unit(3,"pt"),
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
##liner model corrected by cell fracts and ethnicity
# lehne2015.metadata.plot %>% head
lm.fit <-
lehne2015.metadata.plot %>%
lm(SD.fCpGs ~
age+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem,
data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(lehne2015.metadata$age,lehne2015.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 2,639 | 2e-04 | 2e-04, 3e-04 | 4e-47 | 5e-46 |
Mono | 2,639 | 2e-02 | -7e-03, 5e-02 | 2e-01 | 5e-01 |
Neu | 2,639 | 1e-02 | -1e-02, 3e-02 | 4e-01 | 6e-01 |
Baso | 2,639 | 4e-03 | -5e-02, 6e-02 | 9e-01 | 9e-01 |
Eos | 2,639 | 1e-02 | -1e-02, 4e-02 | 3e-01 | 6e-01 |
NK | 2,639 | -1e-02 | -4e-02, 2e-02 | 4e-01 | 6e-01 |
CD8Tnv | 2,639 | -4e-03 | -4e-02, 3e-02 | 8e-01 | 9e-01 |
CD8Tmem | 2,639 | -3e-02 | -5e-02, -2e-03 | 4e-02 | 1e-01 |
CD4Tnv | 2,639 | 9e-03 | -2e-02, 3e-02 | 5e-01 | 6e-01 |
CD4Tmem | 2,639 | -2e-02 | -4e-02, 1e-02 | 2e-01 | 5e-01 |
Treg | 2,639 | -5e-02 | -1e-01, -5e-03 | 3e-02 | 1e-01 |
Bnv | 2,639 | -1e-03 | -3e-02, 2e-02 | 9e-01 | 9e-01 |
Bmem | 2,639 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.146; Adjusted R² = 0.142; Sigma = 0.007; Statistic = 37.3; p-value = <0.001; df = 12; Log-likelihood = 9,230; AIC = -18,432; BIC = -18,349; Deviance = 0.142; Residual df = 2,626; No. Obs. = 2,639 | |||||
1 Benjamini & Hochberg correction for multiple testing |
##mean fcpg
lm.fit <-
lehne2015.metadata.plot %>%
lm(mean.fCpGs ~
age+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
,data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(lehne2015.metadata$age,lehne2015.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 2,639 | 1e-04 | 6e-05, 1e-04 | 1e-06 | 1e-05 |
Mono | 2,639 | -6e-02 | -1e-01, -3e-02 | 8e-04 | 2e-03 |
Neu | 2,639 | -2e-02 | -5e-02, 1e-02 | 2e-01 | 4e-01 |
Baso | 2,639 | -4e-02 | -1e-01, 4e-02 | 4e-01 | 4e-01 |
Eos | 2,639 | -2e-02 | -6e-02, 1e-02 | 2e-01 | 4e-01 |
NK | 2,639 | 9e-04 | -4e-02, 4e-02 | 1e+00 | 1e+00 |
CD8Tnv | 2,639 | 3e-02 | -2e-02, 7e-02 | 3e-01 | 4e-01 |
CD8Tmem | 2,639 | 7e-02 | 3e-02, 1e-01 | 2e-04 | 7e-04 |
CD4Tnv | 2,639 | -4e-02 | -7e-02, -4e-03 | 3e-02 | 7e-02 |
CD4Tmem | 2,639 | 3e-02 | -8e-03, 7e-02 | 1e-01 | 3e-01 |
Treg | 2,639 | 2e-01 | 8e-02, 2e-01 | 2e-05 | 1e-04 |
Bnv | 2,639 | -2e-03 | -4e-02, 3e-02 | 9e-01 | 1e+00 |
Bmem | 2,639 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.176; Adjusted R² = 0.173; Sigma = 0.010; Statistic = 46.8; p-value = <0.001; df = 12; Log-likelihood = 8,309; AIC = -16,590; BIC = -16,508; Deviance = 0.285; Residual df = 2,626; No. Obs. = 2,639 | |||||
1 Benjamini & Hochberg correction for multiple testing |
Heatmap
##
### HEATMAP FCPGS
##
lehne2015.metadata.plot %>% head()
h.fCpGs <- Heatmap(fCpGs.lehne2015 %>%
select( lehne2015.metadata.plot %>% pull(geo_accession)),
col=evoflux.meth$color,
cluster_columns = T,
cluster_rows = T,
show_row_names = F,
show_column_names = F,
show_row_dend = T,
show_column_dend = T,
name="DNA methylation",
border = T,
row_dend_gp = gpar(lwd=0.1),
column_dend_gp = gpar(lwd=0.1),
top_annotation = HeatmapAnnotation(
df=lehne2015.metadata.plot %>%
select(age,Sex=characteristics_ch1_2),
col=list(
Sex=setNames(c("deepskyblue","pink2"),c("gender: M","gender: F")),
age=colorRamp2(breaks = seq(min(lehne2015.metadata.plot$age),
max(lehne2015.metadata.plot$age),
length.out=100),
brewer.blues(100))
),
show_annotation_name = T,
na_col = "grey45",
annotation_name_side = "left",
show_legend = T,
annotation_name_gp = gpar(fontsize=4),
annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
simple_anno_size = unit(2,"mm")
),
use_raster = T, raster_quality = 10
)
draw(h.fCpGs,
merge_legend=T,
heatmap_legend_side = "right",
annotation_legend_side = "right"
)
We next load dataset from Horvath 2016, available at GSE72773
gse <- getGEO(GEO = "GSE72773",destdir = "../Revision/Data/450k-Aging")
gse <- gse$GSE72773_series_matrix.txt.gz
show(gse)
## phenodata
Horvath2016.metadata <- pData(phenoData(gse)) %>% clean_names()
Horvath2016.metadata %>% head
Horvath2016.metadata$age <- as.numeric(Horvath2016.metadata$age_ch1)
rownames(Horvath2016.metadata)
## get meth values
Horvath2016.betas <- fread("../Revision/Data/450k-Aging/GSE72773_datBetaNormalized.csv.gz",data.table = F,na.strings = c("","NA"))
sapply(Horvath2016.betas,class) %>% table()
any(is.na(Horvath2016.betas))
head(Horvath2016.betas);dim(Horvath2016.betas);gc()
rownames(Horvath2016.betas) <- Horvath2016.betas$ID
Horvath2016.betas <- Horvath2016.betas[,grep("^ID",colnames(Horvath2016.betas),invert = T)]
head(Horvath2016.betas);dim(Horvath2016.betas)
all(gsub("OldTsimane: genomic DNA from whole blood of subject ","",Horvath2016.metadata$title) %in% colnames(Horvath2016.betas))
all(colnames(Horvath2016.betas) %in% gsub("OldTsimane: genomic DNA from whole blood of subject ","",Horvath2016.metadata$title))
Horvath2016.betas <-
Horvath2016.betas[,gsub("OldTsimane: genomic DNA from whole blood of subject ","",Horvath2016.metadata$title)]
colnames(Horvath2016.betas) <- Horvath2016.metadata$geo_accession
head(Horvath2016.betas);dim(Horvath2016.betas)
##fcpgs
# some values are characters or NA, convert appropiately to numeric
# mat <- apply(fCpGs.Horvath2016[,which(sapply(fCpGs.Horvath2016,class)=="character")],2,as.numeric)
# apply(mat,2,function(x)any(is.na(x))) ##checked which samples gave NA, only 1 cpgs with NA value, so ok!
# sapply(fCpGs.Horvath2016,class)
table(rownames(fCpGs.betas) %in% rownames(Horvath2016.betas))
fCpGs.Horvath2016 <-
Horvath2016.betas[rownames(fCpGs.betas)[rownames(fCpGs.betas) %in% rownames(Horvath2016.betas)],]
## some values are characters, convert to numeric!!
fCpGs.Horvath2016 <- as.data.frame(sapply(fCpGs.Horvath2016,as.numeric))
head(fCpGs.Horvath2016);dim(fCpGs.Horvath2016)
identical(colnames(fCpGs.Horvath2016),Horvath2016.metadata$geo_accession)
##complete with fCPGs stats
Horvath2016.metadata$SD.fCpGs <- apply(fCpGs.Horvath2016,2,sd,na.rm=T)
Horvath2016.metadata$mean.fCpGs <- apply(fCpGs.Horvath2016,2,mean,na.rm=T)
head(Horvath2016.metadata);dim(Horvath2016.metadata)
##cell fractions
all(rownames(cent12CT450k.m) %in% rownames(Horvath2016.betas))
horvath.mat.cells <- apply(Horvath2016.betas[rownames(cent12CT450k.m),],2,as.numeric)
head(horvath.mat.cells);dim(horvath.mat.cells)
rownames(horvath.mat.cells) <- rownames(cent12CT450k.m)
Horvath2016.metadata <-
cbind(Horvath2016.metadata,
epidish(beta.m = horvath.mat.cells,ref.m = cent12CT450k.m)$estF
)
head(Horvath2016.metadata)
Age distribution
Horvath2016.metadata %>% head
Horvath2016.metadata %>% dim
Horvath2016.metadata %>% count(source_name_ch1)
Horvath2016.metadata %>% count(ethnicity_ch1)
head(Horvath2016.betas);dim(Horvath2016.betas)
##plot age distribution in Horvath2016
Horvath2016.metadata %>% head
Horvath2016.metadata %>%
ggplot(aes(x=age))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_histogram()+
theme_classic()+
ggtitle(paste0("Samples from Horvath 2016 et al, Genome Biology 2016, N=",nrow(Horvath2016.metadata)))+
guides(fill=guide_legend("Donor id",nrow=2))+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "top",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
Mean and SD
Horvath2016.metadata %>%
ggplot(aes(x=age,
y=mean.fCpGs
))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_point(aes(fill=source_name_ch1),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~source_name_ch1)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
) |
Horvath2016.metadata %>%
ggplot(aes(x=age,
y=SD.fCpGs
))+
geom_point(aes(fill=source_name_ch1),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25())+
facet_wrap(~source_name_ch1)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
# stat_cor(method = "pearson",size=1.5)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "none",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
Adjusting for cell types
## adjusting for cellular fractions
Horvath2016.metadata %>%
reshape2::melt(measure.vars = c("CD4Tnv","CD4Tmem","Bmem","Bnv","Treg","CD8Tmem","CD8Tnv","Eos","NK","Neu","Mono","Baso"),
id.vars = "age") %>%
ggplot(aes(x=age,
y=value
))+
geom_point(aes(fill=variable),
pch=21,size=1,stroke=0.1
)+
geom_smooth(method = "lm",color="black")+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
# label.x.npc = 0.2,label.y.npc = 1,
method = "pearson",
size=1.25)+
scale_fill_manual(values = cols25())+
ylab("Cell fraction")+
facet_wrap(.~variable,scales = "free")+
labs(caption = "* Benjamini & Hochberg corrected P-values")+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
# legend.spacing.y = unit(3,"pt"),
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
##liner model corrected by cell fracts and ethnicity
# Horvath2016.metadata %>% head
# Horvath2016.metadata %>% count(ethnicity_ch1)
lm.fit <-
Horvath2016.metadata %>%
mutate(ethnicity=ethnicity_ch1) %>%
lm(SD.fCpGs ~
age+
ethnicity+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
,data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 310 | 4e-04 | 2e-04, 5e-04 | 5e-08 | 7e-07 |
ethnicity |
|
|
|
|
|
    Caucasian | 235 | — | — |
|
|
    Hispanic | 38 | -1e-03 | -6e-03, 4e-03 | 6e-01 | 9e-01 |
    Tsimane | 37 | 2e-02 | 6e-03, 2e-02 | 1e-03 | 8e-03 |
Mono | 310 | -2e-02 | -2e-01, 2e-01 | 8e-01 | 9e-01 |
Neu | 310 | -3e-02 | -2e-01, 1e-01 | 7e-01 | 9e-01 |
Baso | 310 | 5e-02 | -3e-01, 4e-01 | 8e-01 | 9e-01 |
Eos | 310 | -6e-02 | -2e-01, 1e-01 | 5e-01 | 9e-01 |
NK | 310 | -7e-02 | -3e-01, 1e-01 | 5e-01 | 9e-01 |
CD8Tnv | 310 | 1e-01 | -1e-01, 3e-01 | 4e-01 | 9e-01 |
CD8Tmem | 310 | -1e-01 | -3e-01, 6e-02 | 2e-01 | 9e-01 |
CD4Tnv | 310 | -3e-02 | -2e-01, 1e-01 | 7e-01 | 9e-01 |
CD4Tmem | 310 | -9e-02 | -3e-01, 8e-02 | 3e-01 | 9e-01 |
Treg | 310 | 6e-03 | -2e-01, 2e-01 | 1e+00 | 1e+00 |
Bnv | 310 | -2e-02 | -2e-01, 2e-01 | 8e-01 | 9e-01 |
Bmem | 310 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.217; Adjusted R² = 0.180; Sigma = 0.014; Statistic = 5.85; p-value = <0.001; df = 14; Log-likelihood = 902; AIC = -1,772; BIC = -1,712; Deviance = 0.054; Residual df = 295; No. Obs. = 310 | |||||
1 Benjamini & Hochberg correction for multiple testing |
##mean fcpg
lm.fit <-
Horvath2016.metadata %>%
mutate(ethnicity=ethnicity_ch1) %>%
lm(mean.fCpGs ~
age+
ethnicity+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
,data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 310 | -3e-04 | -5e-04, -4e-06 | 5e-02 | 3e-01 |
ethnicity |
|
|
|
|
|
    Caucasian | 235 | — | — |
|
|
    Hispanic | 38 | 9e-03 | -1e-03, 2e-02 | 9e-02 | 4e-01 |
    Tsimane | 37 | 5e-03 | -1e-02, 2e-02 | 6e-01 | 8e-01 |
Mono | 310 | 1e-02 | -3e-01, 4e-01 | 1e+00 | 1e+00 |
Neu | 310 | 2e-03 | -3e-01, 3e-01 | 1e+00 | 1e+00 |
Baso | 310 | 5e-01 | -1e-01, 1e+00 | 1e-01 | 4e-01 |
Eos | 310 | -3e-02 | -4e-01, 3e-01 | 9e-01 | 1e+00 |
NK | 310 | -1e-01 | -5e-01, 2e-01 | 5e-01 | 8e-01 |
CD8Tnv | 310 | 2e-01 | -2e-01, 6e-01 | 3e-01 | 8e-01 |
CD8Tmem | 310 | 2e-01 | -1e-01, 6e-01 | 2e-01 | 7e-01 |
CD4Tnv | 310 | -1e-01 | -5e-01, 2e-01 | 5e-01 | 8e-01 |
CD4Tmem | 310 | 1e-01 | -2e-01, 5e-01 | 5e-01 | 8e-01 |
Treg | 310 | -6e-01 | -1e+00, -1e-01 | 1e-02 | 2e-01 |
Bnv | 310 | 1e-02 | -4e-01, 4e-01 | 9e-01 | 1e+00 |
Bmem | 310 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.209; Adjusted R² = 0.172; Sigma = 0.027; Statistic = 5.57; p-value = <0.001; df = 14; Log-likelihood = 689; AIC = -1,345; BIC = -1,285; Deviance = 0.214; Residual df = 295; No. Obs. = 310 | |||||
1 Benjamini & Hochberg correction for multiple testing |
Heatmap
##
### HEATMAP FCPGS
##
Horvath2016.metadata %>% head()
h.fCpGs <- Heatmap(fCpGs.Horvath2016,
col=evoflux.meth$color,
cluster_columns = T,
cluster_rows = T,
show_row_names = F,
show_column_names = F,
show_row_dend = T,
show_column_dend = T,
name="DNA methylation",
border = T,
row_dend_gp = gpar(lwd=0.1),
column_dend_gp = gpar(lwd=0.1),
top_annotation = HeatmapAnnotation(
df=Horvath2016.metadata %>%
select(ethnicity=ethnicity_ch1,age,Sex=characteristics_ch1),
col=list(
ethnicity=setNames(kelly(Horvath2016.metadata %>%
pull(ethnicity_ch1) %>%
unique() %>% na.omit() %>%
length()
),
Horvath2016.metadata %>%
pull(ethnicity_ch1) %>%na.omit() %>%
unique()
),
Sex=setNames(c("deepskyblue","pink2"),c("Sex: male","Sex: female")),
age=colorRamp2(breaks = seq(min(Horvath2016.metadata$age),
max(Horvath2016.metadata$age),
length.out=100),
brewer.blues(100))
),
show_annotation_name = T,
na_col = "grey45",
annotation_name_side = "left",
show_legend = T,
annotation_name_gp = gpar(fontsize=4),
annotation_legend_param = list(labels_gp=gpar(fontsize=4)),
simple_anno_size = unit(2,"mm")
)
)
draw(h.fCpGs,
merge_legend=T,
heatmap_legend_side = "right",
annotation_legend_side = "right"
)
Here we collect 2 studies with sorted samples.
# figure manuscipt 2
Clark2020.metadata$source <- Clark2020.metadata$cell_type_ch1
Clark2020.metadata$Study <- "GSE137594"
sorted.age.metadata$Study <- "GSE184269"
sorted.manuscript <- bind_rows(sorted.age.metadata,Clark2020.metadata)
sorted.manuscript$source
sorted.manuscript %>%
ggplot(aes(x=age,
y=mean.fCpGs
))+
geom_point(
aes(fill=source,
shape=Study),
size=0.75,
stroke=0
)+
scale_shape_manual(values = c("GSE137594"=24,"GSE184269"=21))+
geom_smooth(aes(fill=source),
method = "lm",color="black",lwd=0.5,alpha=0.3
)+
scale_fill_manual(values = cols25())+
facet_wrap(~source)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
)
),
label.y.npc="top",
label.x.npc = "left",
method = "pearson",
size=1.25)+
# stat_cor(method = "pearson",size=1.5)+
guides(fill=guide_none(),
shape=guide_legend(override.aes = list(stroke=0.25,color="grey0"))
)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(2,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
) |
sorted.manuscript %>%
ggplot(aes(x=age,
y=SD.fCpGs
))+
geom_point(
aes(fill=source,
shape=Study
),
size=0.75,
stroke=0
)+
scale_shape_manual(values = c("GSE137594"=24,"GSE184269"=21))+
geom_smooth(aes(fill=source),
method = "lm",color="black",lwd=0.5,alpha=0.3
)+
scale_fill_manual(values = cols25())+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(2)
)
),
label.y.npc="top",
label.x.npc = "left",
method = "pearson",
size=1.25)+
facet_wrap(~source)+
# stat_cor(method = "pearson",size=1.5)+
labs(caption = "* Benjamini & Hochberg corrected P-values")+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "none",
legend.box.spacing = unit(2,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
We collect all blood samples across the distinct datasets, including GSE36054, GSE40279, GSE55763 and GSE72773 and perform the analyses with all samples. We correct for cell types, and show how mean fCpG remains stable when considering changes in cell types composition during aging, but fCpG SD increases, consistent with clonal expansions of the hematopoietic system during aging.
##
## Fig manuscipt 3 and 4, whole blood datasets including pediatrics, hannum, horvath2016 and lehne.
##
pediatrics.metadata.plot %>% head
hannum.metadata %>% head
Horvath2016.metadata %>% head
lehne2015.metadata.plot %>% head
wb.df <- bind_rows(cbind(pediatrics.metadata.plot,Study="GSE36054"),
cbind(hannum.metadata,Study="GSE40279"),
cbind(Horvath2016.metadata,Study="GSE72773"),
cbind(lehne2015.metadata.plot,Study="GSE55763")
)
wb.df %>% head
wb.df %>%
ggplot(aes(x=age,
y=mean.fCpGs
))+
# geom_boxplot(outlier.shape = NA,fill=NA)+
geom_point(aes(fill=Study),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25(),aesthetics = c("fill","color"))+
stat_cor(
aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
),
color=Study
),
method = "pearson",
size=1.5,label.y.npc = 0.1
)+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
annotate(geom = "text",x = 10,y=0.6,label=paste0("N=",nrow(wb.df)),size=2)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "top",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
) |
wb.df %>%
ggplot(aes(x=age,
y=SD.fCpGs
))+
geom_point(aes(fill=Study),
size=1,
pch=21,
stroke=0
)+
geom_smooth(method = "lm",color="black",lwd=0.5)+
scale_fill_manual(values = cols25(),aesthetics = c("fill","color"))+
stat_cor(aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
)
),
method = "pearson",
size=1.5)+
stat_cor(
aes(label = paste0(after_stat(r.label),
"~`,`~`P=`~",
p.adjust(readr::parse_number(after_stat(p.label)),method = "BH") %>% signif(4)
),
color=Study
),
method = "pearson",
size=1.5,label.y.npc = 0.6
)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text = element_text(color="grey0"),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
# panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.text = element_text(size = 4),
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
Adjust cell types
##multivariate lm model with cell fracs
##liner model corrected by cell fracts and ethnicity
# wb.df %>% head
# wb.df %>% count(ethnicity_ch1,Study)
lm.fit <-
wb.df %>%
lm(SD.fCpGs ~
age+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
,data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)+ggtitle()
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with SD.fCpGs as response variable")
print(tab)
Multivariate linear model with SD.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 3,738 | 4e-04 | 3e-04, 4e-04 | 1e-147 | 1e-146 |
Mono | 3,738 | 6e-02 | 3e-02, 9e-02 | 6e-05 | 1e-04 |
Neu | 3,738 | 6e-02 | 4e-02, 8e-02 | 2e-06 | 3e-06 |
Baso | 3,738 | 1e-01 | 9e-02, 2e-01 | 9e-07 | 2e-06 |
Eos | 3,738 | 7e-02 | 5e-02, 1e-01 | 7e-08 | 2e-07 |
NK | 3,738 | 3e-02 | -3e-03, 5e-02 | 8e-02 | 1e-01 |
CD8Tnv | 3,738 | 1e-01 | 7e-02, 1e-01 | 1e-09 | 5e-09 |
CD8Tmem | 3,738 | 2e-04 | -3e-02, 3e-02 | 1e+00 | 1e+00 |
CD4Tnv | 3,738 | 4e-02 | 1e-02, 7e-02 | 2e-03 | 3e-03 |
CD4Tmem | 3,738 | 2e-02 | -9e-03, 5e-02 | 2e-01 | 2e-01 |
Treg | 3,738 | 2e-01 | 1e-01, 2e-01 | 4e-11 | 2e-10 |
Bnv | 3,738 | 4e-02 | 1e-02, 7e-02 | 2e-03 | 3e-03 |
Bmem | 3,738 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.321; Adjusted R² = 0.319; Sigma = 0.010; Statistic = 147; p-value = <0.001; df = 12; Log-likelihood = 11,952; AIC = -23,876; BIC = -23,789; Deviance = 0.365; Residual df = 3,725; No. Obs. = 3,738 | |||||
1 Benjamini & Hochberg correction for multiple testing |
##mean fcpg
lm.fit <-
wb.df %>%
lm(mean.fCpGs ~
age+
Mono+Neu+Baso+Eos+NK+CD8Tnv+CD8Tmem+CD4Tnv+CD4Tmem+Treg+Bnv+Bmem
,data = .)
# summary(lm.fit)
# broom::glance(lm.fit)$adj.r.squared %>% sqrt()
# broom.helpers::tidy_plus_plus(lm.fit) %>% data.frame()
# cor.test(Horvath2016.metadata$age,Horvath2016.metadata$SD.fCpGs)$est
# ggcoef_model(lm.fit)
# ggcoef_table(lm.fit)
tab <-
tbl_regression(lm.fit,
estimate_fun = function(i)format(x = i,digits=1,scientific=T),
pvalue_fun = function(i)format(x = i,digits=1,scientific=T)
) %>%
add_n(location = "level") %>%
add_q(method = "BH") %>%
add_glance_source_note() %>% #show_header_names() to show colum names to be modified
modify_header(label = "**Explanatory variables**") %>%
bold_p(q = F) %>%
as_gt() %>%
tab_header("Multivariate linear model with mean.fCpGs as response variable")
print(tab)
Multivariate linear model with mean.fCpGs as response variable | |||||
Explanatory variables | N | Beta | 95% CI | p-value | q-value1 |
---|---|---|---|---|---|
age | 3,738 | -2e-05 | -6e-05, 3e-05 | 5e-01 | 5e-01 |
Mono | 3,738 | 9e-03 | -4e-02, 6e-02 | 7e-01 | 7e-01 |
Neu | 3,738 | 5e-02 | 3e-03, 9e-02 | 4e-02 | 6e-02 |
Baso | 3,738 | 1e-01 | 4e-02, 2e-01 | 5e-03 | 1e-02 |
Eos | 3,738 | 5e-02 | 6e-03, 1e-01 | 3e-02 | 5e-02 |
NK | 3,738 | 4e-02 | -9e-03, 9e-02 | 1e-01 | 1e-01 |
CD8Tnv | 3,738 | 3e-01 | 3e-01, 4e-01 | 2e-26 | 2e-25 |
CD8Tmem | 3,738 | 1e-01 | 9e-02, 2e-01 | 7e-09 | 3e-08 |
CD4Tnv | 3,738 | 4e-02 | -8e-03, 9e-02 | 1e-01 | 1e-01 |
CD4Tmem | 3,738 | 8e-02 | 3e-02, 1e-01 | 2e-03 | 5e-03 |
Treg | 3,738 | 4e-01 | 3e-01, 5e-01 | 3e-19 | 2e-18 |
Bnv | 3,738 | 1e-01 | 8e-02, 2e-01 | 4e-07 | 1e-06 |
Bmem | 3,738 |
|
|
|
|
Abbreviation: CI = Confidence Interval | |||||
R² = 0.199; Adjusted R² = 0.196; Sigma = 0.017; Statistic = 77.0; p-value = <0.001; df = 12; Log-likelihood = 9,857; AIC = -19,686; BIC = -19,599; Deviance = 1.12; Residual df = 3,725; No. Obs. = 3,738 | |||||
1 Benjamini & Hochberg correction for multiple testing |
fCpGs.betas %>% dim
fCpGs.metadata$supplementary_table_2 %>% count(cell_type_annot_1)
fCpGs.metadata$supplementary_table_2 %>%
filter(cell_type_annot_1!="Normal_lymphoid_cell",
cell_type_annot_1!="PBMCs",cell_type_annot_1!="Whole_blood") %>%
count(cell_type_annot_3)
tumors <-
fCpGs.metadata$supplementary_table_2 %>%
filter(cell_type_annot_1!="Normal_lymphoid_cell",
cell_type_annot_1!="PBMCs",cell_type_annot_1!="Whole_blood")
tumors %>% dim
tumors$SD.fCpGs <- apply(fCpGs.betas[,tumors$participant_id_anonymous],2,sd)
tumors$Group <- tumors$cell_type_annot_3
##merge data frame
wb.df$Group <- cut(wb.df$age,breaks = c(0,30,50,70,80,max(wb.df$age)),include.lowest = T)
wb.df$Group %>% table
df.age <- bind_rows(wb.df,tumors)
df.age$Group <- factor(df.age$Group,levels = c("[0,30]","(30,50]","(50,70]","(70,80]","(80,101]",
"B-ALL","B-ALL-remission","B-ALL-relapse",
"T-ALL","T-ALL-remission","T-ALL-relapse",
"MCL","DLBCL-NOS","MBL","CLL","RT","MGUS","MM"
))
df.age %>% count(Group)
cols.paper <- c(
"30"="grey100",
"50"="grey90",
"70"="grey80",
"90"="grey50",
"101"="grey30",
NA_values="#666666",
HPC="#E6E6E6",
pre.Bcell="#D5D5D5",
NBC="#C3C3C3",
GCB="#AEAEAE",
tPC="#969696",
MBC="#787878",
bmPC="#4D4D4D",
Bcell="#C3C3C3",
Tcell="#F0F8FF",
PBMCs="#FFF8DC",
Whole_blood="#CDC8B1",
"Whole blood"="#CDC8B1",
Normal_lymphoid_cell="#D5D5D5",
Normal_myeloid_cell="#E6E6FA",
"T-ALL"="#CD3333",
"T-ALL-remission"="#C7A5A5",
"T-ALL-relapse"="#CD3333",
"B-ALL"="#E69F00",
"B-ALL-remission"="#D8CAAB",
"B-ALL-relapse"="#E69F00",
MCL="#F0E442",
C1="#F5D51B",
cMCL="#F5D51B",
C2="#0072B2",
nnMCL="#0072B2",
"DLBCL-NOS"="#56B4E9",
MBL="#5A9A89",
CLL="#009E73",
RT="#02C590",
unmutated="#e65100",
mutated="#361379",
nCLL="#006E93",
iCLL="#FDC010",
mCLL="#963736",
Disease.Unclassified="#CCCCCC",
MGUS="#BF99AE",
MM="#CC79A7"
)
df.age %>%
ggplot(aes(x=Group,
y=SD.fCpGs
))+
geom_boxplot(aes(fill=Group),outlier.size = 0.5,lwd=0.3)+
scale_fill_manual(values = cols.paper)+
stat_compare_means(method = "t.test",
comparisons = list(c("[0,30]","(30,50]"),
c("(30,50]","(50,70]"),
c("(50,70]","(70,80]"),
c("(70,80]","(80,101]"),
c("[0,30]","(80,101]")
),
bracket.size = 0.2,
size=1,label.y=0.3,step.increase = 0.04)+
xlab(NULL)+
theme_classic()+
theme(text=element_text(color="grey0",size = 6),
line = element_line(linewidth = 0.2),
axis.text.x = element_text(color="grey0",angle = 44,hjust = 1,vjust = 1),
strip.placement = "outside",
strip.background = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
panel.spacing = unit(0.1,"pt"),
legend.key.size = unit(0,"pt"),
legend.position = "none",
legend.margin=margin(0,0,0,0),
legend.background = element_blank(),
legend.box.background = element_blank(),
legend.title.position = "top",
legend.box.spacing = unit(3,"pt"),
legend.justification = "left",
legend.title = element_text(vjust = 1),
panel.border = element_rect(fill = NA,color=NA,linewidth = 0.3)
)
print(sessionInfo())
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Madrid
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] broom.helpers_1.21.0 broom_1.0.9 patchwork_1.3.2
## [4] stringr_1.5.1 gtExtras_0.6.0 gtsummary_2.4.0
## [7] gt_1.0.0 ggstats_0.10.0 openxlsx_4.2.8
## [10] EpiDISH_2.24.0 circlize_0.4.16 ComplexHeatmap_2.24.1
## [13] GEOquery_2.76.0 Biobase_2.68.0 BiocGenerics_0.54.0
## [16] generics_0.1.4 pals_1.10 ggpubr_0.6.1
## [19] ggplot2_3.5.2 dplyr_1.1.4 janitor_2.2.1
## [22] data.table_1.17.8 BiocStyle_2.36.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.1
## [3] jsonlite_2.0.0 shape_1.4.6.1
## [5] magrittr_2.0.3 magick_2.8.7
## [7] farver_2.1.2 rmarkdown_2.29
## [9] GlobalOptions_0.1.2 vctrs_0.6.5
## [11] locfdr_1.1-8 Cairo_1.6-5
## [13] paletteer_1.6.0 rstatix_0.7.2.999
## [15] tinytex_0.57 forcats_1.0.0
## [17] htmltools_0.5.8.1 S4Arrays_1.8.1
## [19] haven_2.5.5 curl_7.0.0
## [21] SparseArray_1.8.1 Formula_1.2-5
## [23] sass_0.4.10 bslib_0.9.0
## [25] fontawesome_0.5.3 plyr_1.8.9
## [27] lubridate_1.9.4 cachem_1.1.0
## [29] commonmark_2.0.0 lifecycle_1.0.4
## [31] iterators_1.0.14 pkgconfig_2.0.3
## [33] Matrix_1.7-4 R6_2.6.1
## [35] fastmap_1.2.0 GenomeInfoDbData_1.2.14
## [37] MatrixGenerics_1.20.0 snakecase_0.11.1
## [39] clue_0.3-66 digest_0.6.37
## [41] colorspace_2.1-1 rematch2_2.1.2
## [43] S4Vectors_0.46.0 GenomicRanges_1.60.0
## [45] labeling_0.4.3 timechange_0.3.0
## [47] httr_1.4.7 abind_1.4-8
## [49] mgcv_1.9-3 compiler_4.5.0
## [51] proxy_0.4-27 withr_3.0.2
## [53] doParallel_1.0.17 backports_1.5.0
## [55] carData_3.0-5 R.utils_2.13.0
## [57] maps_3.4.3 ggsignif_0.6.4
## [59] MASS_7.3-65 DelayedArray_0.34.1
## [61] rjson_0.2.23 tools_4.5.0
## [63] zip_2.3.3 rentrez_1.2.4
## [65] R.oo_1.27.1 glue_1.8.0
## [67] quadprog_1.5-8 nlme_3.1-168
## [69] reshape2_1.4.4 cluster_2.1.8.1
## [71] gtable_0.3.6 labelled_2.14.1
## [73] tzdb_0.5.0 R.methodsS3_1.8.2
## [75] class_7.3-23 tidyr_1.3.1
## [77] hms_1.1.3 xml2_1.4.0
## [79] car_3.1-3 XVector_0.48.0
## [81] markdown_2.0 foreach_1.5.2
## [83] pillar_1.11.0 limma_3.64.3
## [85] splines_4.5.0 lattice_0.22-7
## [87] tidyselect_1.2.1 knitr_1.50
## [89] litedown_0.7 bookdown_0.44
## [91] IRanges_2.42.0 SummarizedExperiment_1.38.1
## [93] stats4_4.5.0 xfun_0.53
## [95] statmod_1.5.0 matrixStats_1.5.0
## [97] stringi_1.8.7 UCSC.utils_1.4.0
## [99] yaml_2.3.10 evaluate_1.0.5
## [101] codetools_0.2-20 tibble_3.3.0
## [103] BiocManager_1.30.26 cli_3.6.5
## [105] jquerylib_0.1.4 dichromat_2.0-0.1
## [107] Rcpp_1.1.0 GenomeInfoDb_1.44.2
## [109] mapproj_1.2.12 png_0.1-8
## [111] XML_3.99-0.19 fastcluster_1.3.0
## [113] parallel_4.5.0 readr_2.1.5
## [115] scales_1.4.0 e1071_1.7-16
## [117] purrr_1.1.0 crayon_1.5.3
## [119] GetoptLong_1.0.5 rlang_1.1.6
## [121] cards_0.7.0