Due to the large time of execution, here we just provide the code.
##
## Get QC in batches of the sample sheets generated
##
options(stringsAsFactors = F,error=NULL)
sample.sheets <- list.files(path = "../../Data/Processings/2_create_QC_samplesheets/",pattern = ".tsv$")
batches <- gsub(".tsv$","",sample.sheets)
batches
for (batch in batches){
writeLines(batch)
#User defined parameters interactively
project <- paste0("/Users/martiduranferrer/Desktop/Marti_SSD/WORK/fCpGs/Data/QC_",batch)
if(grepl("450k",batch)){
#450k
samp.sheet.path.450k <- paste0("/Users/martiduranferrer/Desktop/Marti_SSD/WORK/fCpGs/Data/Processings/2_create_QC_samplesheets/",batch,".tsv")
baseDir.450k <- "/Users/martiduranferrer/Desktop/Marti_SSD/WORK/IDATS/IDATS_450k/"
samp.sheet.path.EPIC <- NULL
baseDir.EPIC <- NULL
}
if(grepl("EPIC",batch)){
#EPIC
samp.sheet.path.EPIC <- paste0("/Users/martiduranferrer/Desktop/Marti_SSD/WORK/fCpGs/Data/Processings/2_create_QC_samplesheets/",batch,".tsv")
baseDir.EPIC <- "/Users/martiduranferrer/Desktop/Marti_SSD/WORK/IDATS/IDATS_850k/"
samp.sheet.path.450k <- NULL
baseDir.450k <- NULL
}
# controls and calculations
excludingCpGs <- NULL#as.character(read.table("path")[,1]) ## Predefined set of CpGs to remove
##Names of samples and sample group analysis
Sample_Name_Analysis_column <- "SAMPLE_NAME_ANALYSIS"
Sample_Group_Analysis_column <- "SAMPLE_GROUP_ANALYSIS"
##qc
plot.minfi.qcReport <- TRUE
# NORMALIZATION
ArrayOutType<-NULL
Detection.Pval.Mean.Sample.cutoff <- 0.01
Detection.Pval.CpGs.cutoff <- 1e-16
Detect.Pval.pct.samples <- 90 ##options 50, 60, 70,80,90,100, from less to more restrictive.
RemoveSNPs <- TRUE
MAF <- 0
RemoveSexualChrs <- TRUE
Normalization.type <- "ssNob"#SWAN" #2 recomended, batch-independent normalizations. Check minfi package for further details
# Savings
saveRGset <- T
saveRGset.norm <- T
###########################################################
## Define dirs and necessary variables to start analysis
###########################################################
writeLines("Starting the analysis")
writeLines(paste0(rep("_",100),collapse = ""))
writeLines("Loading requires libraries")
writeLines(paste0(rep("_",100),collapse = ""))
suppressMessages(library(minfi))
suppressMessages(library(ggplot2))
suppressMessages(library(ggrepel))
suppressMessages(library(data.table))
suppressMessages(library(irlba))
suppressMessages(library(qs))
##########################################################
## Define dirs and necessary variables to start analysis
###########################################################
## mine arguments to extract things needed
project.name <- unlist(strsplit(x = project,split = "\\/"))[length(unlist(strsplit(x = project,split = "\\/")))]
dir.project <- unlist(strsplit(x = project,split = project.name))[[1]]
###########################################################
## Reading IDAT files and pheno data
###########################################################
writeLines("Reading IDAT files")
writeLines(paste0(rep("_",100),collapse = ""))
setwd(dir.project)
dir.create(project)
setwd(project)
read_idats_to_RGSet <- function(IDATS.path = NULL,Sample.sheet.path = NULL){
stopifnot(!is.null(IDATS.path) && !is.null(Sample.sheet.path))
baseDir <- IDATS.path
samp.sheet.path <- Sample.sheet.path
# checking
if(!dir.exists(dir.project)){
stop(paste0("The following directory do not exist!:\n",dir.project))
}
if(!file.exists(samp.sheet.path)){
stop(paste0("File do not exits!:\n",samp.sheet.path))
}
if(!dir.exists(baseDir)){
stop(paste0("Directory of IDAT files do no exist:\n",baseDir))
}
# v.3.1
#
# if(file.exists(file.path(baseDir,samp.sheet))){
# invisible(file.remove(file.path(baseDir,samp.sheet)))
# }
# FileCopied <- file.copy(from = samp.sheet.path,to = baseDir)
# if(!FileCopied){
# stop(paste0("Could not start analysis. Cannot arrange necessary directories!"))
# }
# targets <- read.metharray.sheet(base = baseDir,pattern = samp.sheet,ignore.case = F,verbose = T)
# stopifnot(all(!is.na(targets$Basename)))
##changed in v.3.2
##get targets data.frame with more freedom and w/o the necessity to copy it nto idat directory.
targets <- fread(input = Sample.sheet.path,data.table = F)
##infer slide and array columns
Slide.column <- grep("Slide|Sentrix_ID",colnames(targets),ignore.case = T,value = T)
Array.column <- grep("Array|Sentrix_Position",colnames(targets),ignore.case = T,value = T)
if(any(c(length(Slide.column)==0,length(Array.column)==0))){
stop("Please, your data sheet should contain columns with Slide/Sentrix_ID and/or Array/Sentrix_Position")
}
if(any(c(length(Slide.column)>1,length(Array.column)>1))){
stop("Please, your data sheet should contain only 1 column for Slide/Sentrix_ID and/or Array/Sentrix_Position")
}
targets$Basename <- paste0(IDATS.path,"/",targets$SLIDE,"/",targets$SLIDE,"_",targets$ARRAY)
# print(head(targets,20))
# print(tail(targets,20))
##little check of Sample sheet
if(!all(c(Sample_Name_Analysis_column,Sample_Group_Analysis_column)%in%colnames(targets))){
stop("You should specify 'Sample_Name_Analysis_column' and 'Sample_Group_Analysis_column' from your data sheet")
}
##sample name for analsyes should be unique!
stopifnot(!any(duplicated(targets[,Sample_Name_Analysis_column])))
RGset <- read.metharray.exp(targets = targets,verbose = T,force = T)
rownames(pData(RGset)) <- as.character(pData(RGset)[,Sample_Name_Analysis_column])
invisible(gc())
return(RGset)
}
## Read IDATS from 450k arrays
if(!is.null(baseDir.450k) && !is.null(samp.sheet.path.450k)){
RGset450k <- read_idats_to_RGSet(IDATS.path = baseDir.450k,Sample.sheet.path = samp.sheet.path.450k)
writeLines("450k data loaded!")
}
invisible(gc())
## Read IDATS from EPIC arrays
if(!is.null(baseDir.EPIC) && !is.null(samp.sheet.path.EPIC)){
RGsetEPIC <- read_idats_to_RGSet(IDATS.path = baseDir.EPIC,Sample.sheet.path = samp.sheet.path.EPIC)
writeLines("EPIC data loaded!")
}
invisible(gc())
if(exists("RGset450k") && exists("RGsetEPIC")){
RGset <- combineArrays(object1 = RGset450k,object2 = RGsetEPIC,outType = ArrayOutType,verbose=T)
rm(RGset450k)
rm(RGsetEPIC)
}else if(exists("RGset450k")){
RGset <- RGset450k
rm(RGset450k)
}else{
RGset <- RGsetEPIC
rm(RGsetEPIC)
}
invisible(gc())
##save RGSet
dir.create("RData")
if(saveRGset){
writeLines("Saving RGSet object")
qs::qsave(RGset,file=paste0("RData/",project.name,"_RGset.qs"))
}
###########################################################
## Quality Control
###########################################################
writeLines("Performing quality control of samples")
writeLines(paste0(rep("_",100),collapse = ""))
invisible(gc())
dir.create("QC")
##general QC report
if(plot.minfi.qcReport){
writeLines("minfi qcReport")
qcReport(rgSet = RGset,pdf = "QC/QC_RGset_minifi.qc.Report.pdf",
sampNames = pData(RGset)[,Sample_Name_Analysis_column],
sampGroups = factor(pData(RGset)[,Sample_Group_Analysis_column],levels = unique(pData(RGset)[,Sample_Group_Analysis_column])),
maxSamplesPerPage = ifelse(nrow(pData(RGset))>100,100,nrow(pData(RGset)))
)
invisible(gc())
}
###########################################################
## Start normalization
###########################################################
invisible(gc())
writeLines("Normalization")
writeLines(paste0(rep("_",100),collapse = ""))
if(Normalization.type=="Functional"){
set.seed(6)
warning("Using 'Functional Normalization' to normalize data. Between array normalization, beta values will change upon batches! Returning an grSet.")
RGset.norm <- preprocessFunnorm(rgSet = RGset,verbose = T)
}else if(Normalization.type=="Quantile"){
warning("Using 'Quantile normalization' to normalize data. Between array normalization, beta values will change upon batches! Returning an grSet.")
RGset.norm <- preprocessQuantile(object = RGset,verbose = T)
}else if(Normalization.type=="SWAN"){
set.seed(6)
writeLines("Using 'SWAN normalization' to normalize data")
RGset.norm <- preprocessSWAN(rgSet = RGset,verbose = T)
invisible(gc())
}else if(Normalization.type=="ssNob"){
set.seed(6)
writeLines("Using 'ssNob normalization' to normalize data")
RGset.norm <- preprocessNoob(rgSet = RGset,verbose = T)
invisible(gc())
}else{
writeLines("Using 'Illumina normalization' to normalize data")
RGset.norm <- preprocessIllumina(object = RGset)
invisible(gc())
}
invisible(gc())
## Save normalized objsect without any further filtering
if(saveRGset.norm){
writeLines("Saving normalized RGSet object")
qs::qsave(RGset.norm,file=paste0("RData/",project.name,"_RGset.normalized.",Normalization.type,".qs"))
}
##
## Transform normalized object to Genomic intervals
##
if(class(RGset.norm)=="MethylSet"){
RGset.norm <- mapToGenome(RGset.norm)
invisible(gc())
}
##
## perform minfi QC, including signal intensity and Sex predictions after normalization
##
writeLines("Performing qc on raw signal intensitites")
RGset.qc <- minfiQC(object = preprocessRaw(RGset),fixOutliers = T,verbose = T)$object
writeLines("Performing qc on normalized signal intensitites")
RGset.norm <- minfiQC(object = RGset.norm,fixOutliers = T,verbose = T)$object
invisible(gc())
pdf("QC/QC_minfiQC.pdf",height = 5,width = 6,useDingbats = F)
##
## Sex predictions
##
plotSex(object = RGset.qc)
title(main="Sex prediction with Raw signal intensities")
plotSex(object = RGset.norm)
title(main=paste0("Sex prediction with ",Normalization.type," normalized signal intensities"))
plotSex(object = RGset.qc,id = pData(RGset.qc)[,Sample_Name_Analysis_column])
title(main="Sex prediction with Raw signal intensities with sample names")
plotSex(object = RGset.norm,id = pData(RGset.norm)[,Sample_Name_Analysis_column])
title(main=paste0("Sex prediction with ",Normalization.type," normalized signal intensities"))
##
## Signal intensities
##
plotQC(qc = RGset.qc)
title(main="Raw signal intensities")
plotQC(qc = RGset.norm)
title(main=paste0(Normalization.type," normalized signal intensities"))
## QC raw data
##change qc names
colnames(pData(RGset.qc))[grep("^mMed$|^uMed$|^xMed$|^yMed$",colnames(pData(RGset.qc)))] <-
paste0("QC_raw_",grep("^mMed$|^uMed$|^xMed$|^yMed$",colnames(pData(RGset.norm)),value = T))
colnames(pData(RGset.qc))[which(colnames(pData(RGset.qc))=="predictedSex")] <- "QC_raw_Sex"
pData(RGset.qc)$QC_good_signal_intensity_raw_minfi <-
ifelse(((pData(RGset.qc)$QC_raw_mMed + pData(RGset.qc)$QC_raw_uMed)/2)>=10.5,TRUE,FALSE)
## QC normalized data
colnames(pData(RGset.norm))[grep("^mMed$|^uMed$|^xMed$|^yMed$",colnames(pData(RGset.norm)))] <-
paste0("QC_normalized_",grep("^mMed$|^uMed$|^xMed$|^yMed$",colnames(pData(RGset.norm)),value = T))
colnames(pData(RGset.norm))[which(colnames(pData(RGset.norm))=="predictedSex")] <- "QC_normalized_Sex"
pData(RGset.norm)$QC_good_signal_intensity_normalized_minfi <-
ifelse(((pData(RGset.norm)$QC_normalized_mMed + pData(RGset.norm)$QC_normalized_uMed)/2)>=10.5,TRUE,FALSE)
## Merge Raw and normalized QC into the normalized RGset
pData(RGset.norm) <- cbind(pData(RGset.norm)[,grep("^QC_",colnames(pData(RGset.norm)),invert = T)],
pData(RGset.qc)[,grep("^QC_",colnames(pData(RGset.qc)))],
pData(RGset.norm)[,grep("^QC_",colnames(pData(RGset.norm)))]
)
## plot minfiQC
qc.raw.ggplot <- ggplot(data.frame(pData(RGset.qc)),
aes(x = QC_raw_mMed,
y= QC_raw_uMed,
color=factor(QC_good_signal_intensity_raw_minfi,levels = c("FALSE","TRUE"),labels = c("Bad","Good"))
)
)+
geom_point(pch=1,size=2)+
geom_text_repel(aes(label=ifelse(!pData(RGset.qc)$QC_good_signal_intensity_raw_minfi,rownames(pData(RGset.qc)),NA)),
size=1,max.overlaps = Inf,min.segment.length = 0,segment.size=0.2
)+
guides(color=guide_legend(title = "Signal intensity"))+
xlab("Meth median raw intensity (log2)")+
ylab("Unmeth median raw intensity (log2)")+
ggtitle(label = "Raw signal intensities",
subtitle = paste0("Good=",length(which(pData(RGset.qc)$QC_good_signal_intensity_raw_minfi)),
", Bad=",length(which(!pData(RGset.qc)$QC_good_signal_intensity_raw_minfi))
)
)+
coord_cartesian(xlim = c(8,14),ylim = c(8,14))+
geom_abline(intercept = 10.5*2,slope = -1,lty=2)+
theme_bw()
print(qc.raw.ggplot)
## plot minfiQC
qc.norm.ggplot <- ggplot(data.frame(pData(RGset.norm)),
aes(x = QC_normalized_mMed,
y= QC_normalized_uMed,
color=factor(QC_good_signal_intensity_normalized_minfi,levels = c("FALSE","TRUE"),labels = c("Bad","Good"))
)
)+
geom_point(pch=1,size=2)+
geom_text_repel(aes(label=ifelse(!pData(RGset.norm)$QC_good_signal_intensity_normalized_minfi,rownames(pData(RGset.norm)),NA)),
size=1,max.overlaps = Inf,min.segment.length = 0,segment.size=0.2
)+
guides(color=guide_legend(title = "Signal intensity"))+
xlab("Meth median intensity (log2)")+
ylab("Unmeth median intensity (log2)")+
ggtitle(label = paste0(Normalization.type," normalized signal intensities"),
subtitle = paste0("Good=",length(which(pData(RGset.norm)$QC_good_signal_intensity_normalized_minfi)),
", Bad=",length(which(!pData(RGset.norm)$QC_good_signal_intensity_normalized_minfi))
)
)+
coord_cartesian(xlim = c(8,14),ylim = c(8,14))+
geom_abline(intercept = 10.5*2,slope = -1,lty=2)+
theme_bw()
print(qc.norm.ggplot)
dev.off()
##relase RAM
rm(RGset.qc)
invisible(gc())
## Filering based on pvalues
writeLines("Finding probes with bad signal intensities")
##get full annotation for reporting info of removed CpGs
Full.CpGs.annot <- getAnnotation(RGset.norm)
CpGs.detection.pvals <- detectionP(rgSet = RGset)
invisible(gc())
## visualize detection pvalues from all samples
pData(object = RGset.norm)$QC_mean_detection_pval <- colMeans(CpGs.detection.pvals)
pData(object = RGset.norm)$QC_good_mean_detection_pval <-
ifelse(pData(object = RGset.norm)$QC_mean_detection_pval<=Detection.Pval.Mean.Sample.cutoff,TRUE,FALSE)
#plot mean signal intensities across samples
pMean.detection.pval <- ggplot(data.frame(pData(RGset.norm)[order(pData(RGset.norm)$QC_mean_detection_pval),]),
aes(x = factor(get(Sample_Name_Analysis_column),levels = unique(get(Sample_Name_Analysis_column))),
y = QC_mean_detection_pval,
fill=QC_good_mean_detection_pval
)
)+
geom_col()+
geom_hline(yintercept = Detection.Pval.Mean.Sample.cutoff,color="red",linetype="dashed",lwd=0.2)+
coord_cartesian(ylim = c(0,0.05))+
facet_grid(.~get(Sample_Group_Analysis_column),scales = "free_x",space = "free_x")+
xlab("Samples")+
ylab("Mean detection pvalue")+
theme_bw()+
theme(axis.text.x = element_blank(),
panel.grid.major.x = element_blank(),
strip.text.x = element_text(size = 5,angle = 90)
)
ggsave(filename = "QC/QC_mean_detect_pval.pdf",plot = pMean.detection.pval,height = 3,width = 10,useDingbats=F)
## Select good samples based on Raw signal intensity and good mean detection pvalue
pData(RGset.norm)$QC_good_sample_quality <- ifelse(pData(RGset.norm)$QC_good_signal_intensity_raw_minfi & pData(RGset.norm)$QC_good_mean_detection_pval, TRUE, FALSE)
QC_good_sample_quality <- pData(RGset.norm)[which(pData(RGset.norm)$QC_good_sample_quality),Sample_Name_Analysis_column]
if(Detect.Pval.pct.samples==100){
CpGs.pval.ok <- rownames(CpGs.detection.pvals)[which(rowAlls(x = CpGs.detection.pvals[,QC_good_sample_quality]<=Detection.Pval.CpGs.cutoff,value = TRUE))] ## all samples with lower pval
writeLines(paste0("Number of probes with good detection pvalue criteria in all samples: ",length(CpGs.pval.ok)))
}else{
CpGs.pval.ok <- CpGs.detection.pvals<=Detection.Pval.CpGs.cutoff
CpGs.pval.ok <- names(which(round(rowMeans(CpGs.pval.ok[,QC_good_sample_quality],na.rm = T),1)>=(as.numeric(Detect.Pval.pct.samples)/100))) # Retain positions do have good pvalue in more than X fraction of the samples
writeLines(paste0("Number of probes with good detection pvalue criteria in ",Detect.Pval.pct.samples,"% of the samples: ",length(CpGs.pval.ok)))
}
## save detection p values matrix
qsave(CpGs.detection.pvals,"QC/QC_Detection_pvals.qs")
##calcualte mean detection pval for each cpgs acros samples to later report in removed cpgs
CpGs.mean.Detection.pval <- rowMeans(CpGs.detection.pvals[,QC_good_sample_quality],na.rm = T)
Fraction.samples.above.Detection.pval.cutoff <- rowMeans(CpGs.detection.pvals[,QC_good_sample_quality]<=Detection.Pval.CpGs.cutoff,na.rm = T)
##Release RAM
rm(CpGs.detection.pvals,RGset)
invisible(gc())
##
## Retain CpGs with good detection p value.
##
if(length(CpGs.pval.ok)<length(rownames(RGset.norm))){
writeLines("Removing CpGs with bad detection pvalues")
RGset.norm <- RGset.norm[rownames(RGset.norm)[which(rownames(RGset.norm) %in% CpGs.pval.ok)],] ## in b4 annotation, some cg are not mapped to annotation package.
}
invisible(gc())
## get out SNPs
if(RemoveSNPs){
writeLines(paste0("Removing SNPs"))
RGset.norm <- dropMethylationLoci(object = RGset.norm,dropRS = T,dropCH = T)
RGset.norm <- dropLociWithSnps(object = RGset.norm,snps=c("SBE","CpG"), maf=MAF)
}
invisible(gc())
## get out and individual specific CpGs
if(!is.null(excludingCpGs)){
writeLines("Getting out individual-specific CpGs...")
RGset.norm <- RGset.norm[which(!rownames(RGset.norm)%in%excludingCpGs),]
}
invisible(gc())
## Filter sexual chromosomes
if(RemoveSexualChrs){
writeLines("Removing sexual chromosomes")
RGset.norm <- RGset.norm[which(as.character(seqnames(RGset.norm))!="chrX" & as.character(seqnames(RGset.norm))!="chrY"),]
seqlevels(RGset.norm) <- as.character(unique(seqnames(RGset.norm)))
}
invisible(gc())
##report removed CpGs
Removed.CpGs <- Full.CpGs.annot[setdiff(rownames(Full.CpGs.annot),rownames(RGset.norm)),
c("chr","pos","strand","Name","Probe_rs","Probe_maf","CpG_rs","CpG_maf","SBE_rs",
"SBE_maf","Relation_to_Island","UCSC_RefGene_Group","UCSC_RefGene_Name")]
if(!is.null(excludingCpGs)){
Removed.CpGs$Prior.removal.CpGs <- F
Removed.CpGs$Prior.removal.CpGs[which(Removed.CpGs$Name%in%excludingCpGs)] <- T
}
invisible(gc())
## Add mean detection pvalue across samples
Removed.CpGs$CpGs.mean.Detection.pval <- CpGs.mean.Detection.pval[Removed.CpGs$Name]
Removed.CpGs$Fraction.samples.above.Detection.pval.cutoff <- Fraction.samples.above.Detection.pval.cutoff[Removed.CpGs$Name]
fwrite(as.data.frame(Removed.CpGs),"QC/QC_removed.CpGs.tsv",sep="\t")
###########################################################
## 3. Get beta values
###########################################################
writeLines("Getting beta values")
## Get betas
betas <- as.data.frame(getBeta(RGset.norm))
invisible(gc())
# ## Gett matrix without converting NA's by normalizations
# betasNAs <- betas
# whichBetasNAs <- whichBetasNAs[rownames(betas),]
# betasNAs[whichBetasNAs] <- NA
# rm("whichBetasNAs")
# invisible(gc())
##
## Plot PCA and densityplot of samples with bad quality if present
##
if(any(!pData(RGset.norm)$QC_good_sample_quality)){
pdf("QC/QC_Bad_samples_density_PCA.pdf",height = 5,width = 6,useDingbats = F)
writeLines("Ploting density estimates of bad quality samples")
##Densityplot
densityPlot(dat = as.matrix(betas),sampGroups = pData(RGset.norm)[,"QC_good_sample_quality"],
main = paste0(Normalization.type," normalized data for good(true) and bad(false) samples from QC"),
)
## PCA
writeLines("Ploting PCA of bad quality samples")
set.seed(6)
pca <- prcomp_irlba(x = t(betas),n = 5)
dfpca <- cbind(pca$x[,1:5],data.frame(pData(RGset.norm)))
pca.ggplot <- ggplot(dfpca,
aes(x = PC1,
y= PC2,
fill=QC_good_sample_quality
)
)+
xlab(paste0("PC1(",summary(pca)$importance["Proportion of Variance","PC1"]*100,"%)"))+
ylab(paste0("PC2(",summary(pca)$importance["Proportion of Variance","PC2"]*100,"%)"))+
geom_point(pch=21,size=2.5)+
theme_bw()+
theme(legend.key.size = unit(7,"pt"))
print(pca.ggplot)
dev.off()
}
##
## Plot density estimates for good quality data and pca
##
if(any(pData(RGset.norm)$QC_good_sample_quality)){
pdf("QC/QC_Good_samples_density_PCA.pdf",height = 5,width = 6,useDingbats = F)
writeLines("Ploting density estimates of good quality samples")
##Densityplot
densityPlot(dat = as.matrix(betas)[,pData(RGset.norm)[which(pData(RGset.norm)$QC_good_sample_quality),Sample_Name_Analysis_column]],
sampGroups = pData(RGset.norm)[which(pData(RGset.norm)$QC_good_sample_quality),Sample_Group_Analysis_column],
main = paste0(Normalization.type," normalized data for good samples from QC")
)
writeLines("Ploting PCA of good quality samples")
set.seed(6)
pca <- prcomp_irlba(x = t(betas[,pData(RGset.norm)[which(pData(RGset.norm)$QC_good_sample_quality),Sample_Name_Analysis_column]]),n = 5)
dfpca <- cbind(pca$x[,1:5],data.frame(pData(RGset.norm)[which(pData(RGset.norm)$QC_good_sample_quality),]))
pca.ggplot <- ggplot(dfpca,
aes(x = PC1,
y= PC2,
fill=get(Sample_Group_Analysis_column)
)
)+
geom_point(pch=21,size=2.5)+
guides(fill=guide_legend(title = "Sample group"))+
xlab(paste0("PC1(",summary(pca)$importance["Proportion of Variance","PC1"]*100,"%)"))+
ylab(paste0("PC2(",summary(pca)$importance["Proportion of Variance","PC2"]*100,"%)"))+
theme_bw()+
theme(legend.key.size = unit(7,"pt"))
print(pca.ggplot)
dev.off()
}
###########################################################
## 4. Export betas
###########################################################
writeLines("Exporting normalized betas")
writeLines(paste0(rep("_",100),collapse = ""))
dir.create("Normalized.Betas")
data.table::fwrite(x = cbind(CpGs=rownames(betas),as.data.frame(betas)),
file = paste0("Normalized.Betas/",project.name,"_Betas_",Normalization.type,".tsv"),sep = "\t")
## export all phenodata associated
data.table::fwrite(x = as.data.frame(pData(RGset.norm)),file = paste0("Normalized.Betas/QC_",project.name,".tsv"),sep = "\t")
# export sessioninfo and date of analsyes
writeLines(text = capture.output(sessionInfo()),con = paste0(project.name,"_sessionInfo_",gsub(" |:","_",Sys.time()),".txt"))
writeLines("ANALYSIS DONE! Congrats! :-)")
## clean memory and session
rm(list = ls())
invisible(gc())
}
The versions of packages used are detailed in the manuscript.