We first load all required packages and data
##
## Load necessary packages and set global options
##
library(GenomicRanges)
library(data.table)
library(janitor)
library(dplyr)
library(ggpubr)
library(pals)
library(openxlsx)
library(karyoploteR)
options(stringsAsFactors = F,max.print = 10000,error=NULL,
"openxlsx.dateFormat" = "dd/mm/yyyy"
)
## load CNAs
cll.cnas <-
openxlsx::read.xlsx("../Revision/Data/CNAs/CLLs_ICGC_CNAs_fCpGs_03_02_24.xlsx") %>%
clean_names()
head(cll.cnas)
mcl.cnas <-
openxlsx::read.xlsx("../Revision/Data/CNAs/MCL_CNAs_merged_fCpGs_04_04_24.xlsx") %>%
clean_names()
fCpGs <-
openxlsx::read.xlsx("../Manuscript/Submission/SUBMITTED_NATURE_171023/submission/SupplementaryTables.xlsx",
sheet = "Supplementary Table 3") %>%
clean_names()
fCpGs[1:5,1:5] %>% head
dim(fCpGs)
fCpGs.gr <- GRanges(fCpGs$chromosome,IRanges(fCpGs$position),strand=fCpGs$strand)
mcols(fCpGs.gr) <-
fCpGs %>%
select(-c(chromosome,position,strand))
fCpGs.gr
Here, the fCpGs are represented as rectangles in grey shades. The height of the y axis represents the realtive frequency of each alteration.
pct.max.gain <-
cll.cnas %>%
mutate(cna_type_simplified=case_when(grepl("gain",cna_type,ignore.case=T) ~ "Gain",
grepl("loss",cna_type,ignore.case=T) ~ "Loss",
.default = cna_type)
) %>%
filter(cna_type_simplified=="Gain") %>%
mutate(N=nrow(.)) %>%
group_by(chr,cna_type) %>%
summarise(n=n(),
pct=round((n/unique(N))*100,0)
) %>%
arrange(desc(pct)) %>%
ungroup() %>%
slice_head(n = 1) %>%
pull(pct,chr)
pct.max.gain
pct.max.loss <-
cll.cnas %>%
mutate(cna_type_simplified=case_when(grepl("gain",cna_type,ignore.case=T) ~ "Gain",
grepl("loss",cna_type,ignore.case=T) ~ "Loss",
.default = cna_type)
) %>%
filter(cna_type_simplified=="Loss") %>%
mutate(N=nrow(.)) %>%
group_by(chr,cna_type) %>%
summarise(n=n(),
pct=round((n/unique(N))*100,0)
) %>%
arrange(desc(pct)) %>%
ungroup() %>%
slice_head(n = 1) %>%
pull(pct,chr)
pct.max.loss
{
##plot regions
kp <- plotKaryotype(genome="hg19",
chromosomes = "autosomal",
plot.type = 3,
main = NULL,
srt=90,
cex=0.35,
lwd=0.2
)
##plot gains
# kpDataBackground(kp,data.panel = 1,color = "grey98")
kpRect(karyoplot = kp,
data = kp$genome,
lwd=0.1,
y0 = 0,y1 = 1,
data.panel = 1
)
kpAbline(karyoplot = kp,
h = seq(0,1,by=0.25),
lty="dotted",
lwd=0.2,
col=adjustcolor(col = "grey20",alpha.f = 1),
data.panel = 1
)
kpPlotCoverage(karyoplot = kp,
data = cll.cnas %>% filter(cna_type=="CN_Gain") %>% GRanges(),
col="red3",
# border = "red3",
lwd=1e-10,
data.panel = 1)
kpAxis(karyoplot = kp,
label.margin = 0.5,
cex=0.3,
lwd=0.2,
labels = c(seq(0,75,by=25),paste0("100 (",pct.max.gain,"%)")),
numticks = 5
)
kpAddLabels(karyoplot = kp,
labels = c("Scaled frequency (%) to the most prevalent alteration"),
label.margin = 0.05,
pos = 1,
srt=90,
cex=0.3,
data.panel = "ideogram"
)
## plot losses
# kpDataBackground(kp,data.panel = 2,color = "grey98")
kpRect(karyoplot = kp,
data = kp$genome,
lwd=0.1,
y0 = 0,y1 = 1,
data.panel = 2
)
kpAbline(karyoplot = kp,
h = seq(0,1,by=0.25),
lty="dotted",
lwd=0.2,col=adjustcolor(col = "grey20",alpha.f = 1),
data.panel = 2
)
kpAxis(karyoplot = kp,
label.margin = 0,
cex=0.3,
lwd=0.2,
labels = c(seq(0,75,by=25),paste0("100 (",pct.max.loss,"%)")),
numticks = 5,
data.panel = 2)
kpPlotCoverage(karyoplot = kp,
data = cll.cnas %>% filter(cna_type=="CN_Loss") %>% GRanges(),
col="deepskyblue2",
lwd=1e-10,
data.panel = 2)
##add fCPGs
kpSegments(karyoplot = kp,
data = fCpGs.gr,
y0 = 0,y1 = 1,
lwd=0.15,
col=adjustcolor(col = "grey20",alpha.f = 0.1),
data.panel = 1)
kpSegments(karyoplot = kp,
data = fCpGs.gr,
y0 = 0,y1 = 1,
lwd=0.15,
col=adjustcolor(col = "grey20",alpha.f = 0.1),
data.panel = 2)
}
# order matrix
cll.cnas <-
cll.cnas %>%
group_by(icgc_case) %>%
mutate(n_cnas=case_when(is.na(cna_type) ~ 0,
.default = n(),
),
bps_cnas=case_when(is.na(cna_type) ~ 0,
.default = sum(end-start),
)
) %>%
ungroup() %>%
arrange(bps_cnas) %>%
data.frame()
{
##plot regions
kp <- plotKaryotype(genome="hg19",
chromosomes = "autosomal",
ideogram.plotter = NULL,
plot.type = 4,
main = NULL,
srt=90,
cex=0.35,
lwd=0.2
)
##plot cnas
cases <- cll.cnas$icgc_case %>% unique()
# cases <- cases[c(460,480)]
total.tracks <- length(cases)
for(i in seq_len(total.tracks)) {
at <- autotrack(current.track = i, total.tracks = total.tracks,margin = 0,r0 = 0,r1 = 0.97)
# to better distinguish each patients, neutral CNA
if(i%%2==1){
kpDataBackground(kp, r0=at$r0, r1=at$r1, color = "grey98",lwd=0.1)
}
# }else{
# kpDataBackground(kp, r0=at$r0, r1=at$r1, color = "grey95",lwd=0.1)
# }
if(unique(cll.cnas[which(cll.cnas$icgc_case==cases[i]),"n_cnas"]) >0){
#CNN-LOH
kpPlotRegions(kp,
data= cll.cnas %>% filter(icgc_case==cases[i],cna_type=="CNN-LOH") %>% GRanges(),
col = okabe(4)[4],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
## homo losses
kpPlotRegions(kp,
data= cll.cnas %>% filter(icgc_case==cases[i],cna_type=="Homozygous_Copy_Loss") %>% GRanges(),
col = coolwarm(6)[1],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
##loss
kpPlotRegions(kp,
data= cll.cnas %>% filter(icgc_case==cases[i],cna_type=="CN_Loss") %>% GRanges(),
col = coolwarm(6)[2],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
#gain
kpPlotRegions(kp,
data= cll.cnas %>% filter(icgc_case==cases[i],cna_type=="CN_Gain") %>% GRanges(),
col = coolwarm(6)[5],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
# high copy gain
kpPlotRegions(kp,
data= cll.cnas %>% filter(icgc_case==cases[i],cna_type=="High_Copy_Gain") %>% GRanges(),
col = coolwarm(6)[6],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
}
}
##plot fCpGs regions
kpPlotRegions(kp,
data= fCpGs.gr,
col = "grey20",
lwd=0.1,
r0=0.98, r1=1,
data.panel = 1)
kpAddLabels(karyoplot = kp,labels = "fCpGs",pos = 1,r0 = 0.99,r1 = 1,cex=0.4,label.margin = 0.02)
##final parameters
kpAbline(karyoplot = kp,
v = kp$cytobands[which(kp$cytobands$gieStain=="acen"),] %>%
reduce() %>% resize(width = 1,fix = "center") %>% start(),
lty="dotted",
lwd=0.3,
col=adjustcolor(col = "grey20",alpha.f = 0.6),
data.panel = 1
)
kpAddCytobands(karyoplot = kp,lwd=0.01)
kpRect(karyoplot = kp,
data = kp$genome,
lwd=0.1,
y0 = 0,y1 = 0.97,
data.panel = 1
)
kpRect(karyoplot = kp,
data = kp$genome,
lwd=0.1,
y0 = 0.98,y1 = 1,
data.panel = 1
)
legend("top",
legend = c("Homozygous deletion",
"CNA loss",
"CNN-LOH",
"Neutral CN",
"CNA gain",
"High CNA gain"),
fill = c(coolwarm(6)[1],
coolwarm(6)[2],
okabe(4)[4],
"grey70",
coolwarm(6)[5],
coolwarm(6)[6]
),
cex=0.4,
border = NA,
# pch=15,
# pt.lwd = 0.1,
horiz = T,
xpd = T,
inset = c(-0.2),
bty="n")
}
##
## MCL
##
mcl.cnas %>% count(event)
pct.max.gain <-
mcl.cnas %>%
mutate(cna_type_simplified=case_when(grepl("gain",event,ignore.case=T) ~ "Gain",
grepl("loss",event,ignore.case=T) ~ "Loss",
.default = event)
) %>%
filter(cna_type_simplified=="Gain") %>%
mutate(N=nrow(.)) %>%
group_by(chr,cna_type_simplified) %>%
summarise(n=n(),
pct=round((n/unique(N))*100,1)
) %>%
arrange(desc(pct)) %>%
ungroup() %>%
slice_head(n = 1) %>%
pull(pct,chr)
pct.max.gain
pct.max.loss <-
mcl.cnas %>%
mutate(cna_type_simplified=case_when(grepl("gain",event,ignore.case=T) ~ "Gain",
grepl("loss",event,ignore.case=T) ~ "Loss",
.default = event)
) %>%
filter(cna_type_simplified=="Loss") %>%
mutate(N=nrow(.)) %>%
group_by(chr,cna_type_simplified) %>%
summarise(n=n(),
pct=round((n/unique(N))*100,0)
) %>%
arrange(desc(pct)) %>%
ungroup() %>%
slice_head(n = 1) %>%
pull(pct,chr)
pct.max.loss
{
##plot regions
kp <- plotKaryotype(genome="hg19",
chromosomes = "autosomal",
plot.type = 3,
main = NULL,
srt=90,
cex=0.35,
lwd=0.1
)
##plot gains
# kpDataBackground(kp,data.panel = 1,color = "grey98")
kpRect(karyoplot = kp,
data = kp$genome,
lwd=0.1,
y0 = 0,y1 = 1,
data.panel = 1
)
kpAbline(karyoplot = kp,
h = seq(0,1,by=0.25),
lty="dotted",
lwd=0.2,
col=adjustcolor(col = "grey20",alpha.f = 1),
data.panel = 1
)
kpPlotCoverage(karyoplot = kp,
data = mcl.cnas %>% filter(grepl("gain",event,ignore.case=T)) %>% GRanges(),
col="red3",
# border = "red3",
lwd=1e-10,
data.panel = 1)
kpAxis(karyoplot = kp,
label.margin = 0.5,
cex=0.3,
lwd=0.2,
labels = c(seq(0,75,by=25),paste0("100 (",pct.max.gain,"%)")),
numticks = 5
)
kpAddLabels(karyoplot = kp,
labels = c("Scaled frequency (%) to the most prevalent alteration"),
label.margin = 0.05,
pos = 1,
srt=90,
cex=0.3,
data.panel = "ideogram"
)
## plot losses
# kpDataBackground(kp,data.panel = 2,color = "grey98")
kpRect(karyoplot = kp,
data = kp$genome,
lwd=0.1,
y0 = 0,y1 = 1,
data.panel = 2
)
kpAbline(karyoplot = kp,
h = seq(0,1,by=0.25),
lty="dotted",
lwd=0.2,col=adjustcolor(col = "grey20",alpha.f = 1),
data.panel = 2
)
kpAxis(karyoplot = kp,
label.margin = 0,
cex=0.3,
lwd=0.2,
labels = c(seq(0,75,by=25),paste0("100 (",pct.max.loss,"%)")),
numticks = 5,
data.panel = 2)
kpPlotCoverage(karyoplot = kp,
data = mcl.cnas %>% filter(grepl("loss",event,ignore.case=T)) %>% GRanges(),
col="deepskyblue2",
lwd=1e-10,
data.panel = 2)
##add fCPGs
kpSegments(karyoplot = kp,
data = fCpGs.gr,
y0 = 0,y1 = 1,
lwd=0.15,
col=adjustcolor(col = "grey20",alpha.f = 0.1),
data.panel = 1)
kpSegments(karyoplot = kp,
data = fCpGs.gr,
y0 = 0,y1 = 1,
lwd=0.15,
col=adjustcolor(col = "grey20",alpha.f = 0.1),
data.panel = 2)
}
Representation as in the manuscript
# prepare cna table
mcl.cnas %>% count(event)
cll.cnas %>% count(cna_type)
##match to CLL nomeclature to apply same code
gsub(" ","$",unique(mcl.cnas$event))
##preapre data
mcl.cnas <-
mcl.cnas %>%
mutate(cna_type=event,
icgc_case=sample_id,
cna_type=case_when(cna_type=="Homozygous Loss" ~ "Homozygous_Copy_Loss",
cna_type=="Loss" ~ "CN_Loss",
cna_type==" Loss" ~ "CN_Loss",
cna_type==" Gain" ~ "CN_Gain",
cna_type=="Gain" ~ "CN_Gain",
cna_type=="High Gain" ~ "High_Copy_Gain",
cna_type=="High Copy Gain" ~ "High_Copy_Gain",
.default = NA_character_
),
) %>%
group_by(icgc_case) %>%
mutate(n_cnas=case_when(is.na(cna_type) ~ 0,
.default = n(),
),
bps_cnas=case_when(is.na(cna_type) ~ 0,
.default = sum(end-start),
)
) %>%
ungroup() %>%
arrange(bps_cnas) %>%
data.frame()
head(mcl.cnas)
{
##plot regions
kp <- plotKaryotype(genome="hg19",
chromosomes = "autosomal",
ideogram.plotter = NULL,
plot.type = 4,
main = NULL,
srt=90,
cex=0.35,
lwd=0.2
)
##plot cnas
cases <- mcl.cnas$icgc_case %>% unique()
# cases <- cases[c(460,480)]
total.tracks <- length(cases)
for(i in seq_len(total.tracks)) {
at <- autotrack(current.track = i, total.tracks = total.tracks,margin = 0,r0 = 0,r1 = 0.97)
# to better distinguish each patients, neutral CNA
if(i%%2==1){
kpDataBackground(kp, r0=at$r0, r1=at$r1, color = "grey98",lwd=0.1)
}
# }else{
# kpDataBackground(kp, r0=at$r0, r1=at$r1, color = "grey95",lwd=0.1)
# }
if(unique(mcl.cnas[which(mcl.cnas$icgc_case==cases[i]),"n_cnas"]) >0){
#CNN-LOH
kpPlotRegions(kp,
data= mcl.cnas %>% filter(icgc_case==cases[i],cna_type=="CNN-LOH") %>% GRanges(),
col = okabe(4)[4],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
## homo losses
kpPlotRegions(kp,
data= mcl.cnas %>% filter(icgc_case==cases[i],cna_type=="Homozygous_Copy_Loss") %>% GRanges(),
col = coolwarm(6)[1],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
##loss
kpPlotRegions(kp,
data= mcl.cnas %>% filter(icgc_case==cases[i],cna_type=="CN_Loss") %>% GRanges(),
col = coolwarm(6)[2],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
#gain
kpPlotRegions(kp,
data= mcl.cnas %>% filter(icgc_case==cases[i],cna_type=="CN_Gain") %>% GRanges(),
col = coolwarm(6)[5],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
# high copy gain
kpPlotRegions(kp,
data= mcl.cnas %>% filter(icgc_case==cases[i],cna_type=="High_Copy_Gain") %>% GRanges(),
col = coolwarm(6)[6],
lwd=0.1,
r0=at$r0, r1=at$r1,
data.panel = 1)
}
}
##plot fCpGs regions
kpPlotRegions(kp,
data= fCpGs.gr,
col = "grey20",
lwd=0.1,
r0=0.98, r1=1,
data.panel = 1)
kpAddLabels(karyoplot = kp,labels = "fCpGs",pos = 1,r0 = 0.99,r1 = 1,cex=0.4,label.margin = 0.02)
##final parameters
kpAbline(karyoplot = kp,
v = kp$cytobands[which(kp$cytobands$gieStain=="acen"),] %>%
reduce() %>% resize(width = 1,fix = "center") %>% start(),
lty="dotted",
lwd=0.3,
col=adjustcolor(col = "grey20",alpha.f = 0.6),
data.panel = 1
)
kpAddCytobands(karyoplot = kp,lwd=0.01)
kpRect(karyoplot = kp,
data = kp$genome,
lwd=0.1,
y0 = 0,y1 = 0.97,
data.panel = 1
)
kpRect(karyoplot = kp,
data = kp$genome,
lwd=0.1,
y0 = 0.98,y1 = 1,
data.panel = 1
)
legend("top",
legend = c("Homozygous deletion",
"CNA loss",
"CNN-LOH",
"Neutral CN",
"CNA gain",
"High CNA gain"),
fill = c(coolwarm(6)[1],
coolwarm(6)[2],
okabe(4)[4],
"grey70",
coolwarm(6)[5],
coolwarm(6)[6]
),
cex=0.4,
border = NA,
# pch=15,
# pt.lwd = 0.1,
horiz = T,
xpd = T,
inset = c(-0.2),
bty="n")
}
The version of some packages may differ with regard the manuscipt, as this notebook was prepared after the whole revision process.
print(sessionInfo())
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Ventura 13.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Madrid
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] karyoploteR_1.34.2 regioneR_1.40.1 openxlsx_4.2.8
## [4] pals_1.10 ggpubr_0.6.1 ggplot2_3.5.2
## [7] dplyr_1.1.4 janitor_2.2.1 data.table_1.17.8
## [10] GenomicRanges_1.60.0 GenomeInfoDb_1.44.2 IRanges_2.42.0
## [13] S4Vectors_0.46.0 BiocGenerics_0.54.0 generics_0.1.4
## [16] BiocStyle_2.36.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.1
## [3] jsonlite_2.0.0 magrittr_2.0.3
## [5] magick_2.8.7 GenomicFeatures_1.60.0
## [7] farver_2.1.2 rmarkdown_2.29
## [9] BiocIO_1.18.0 zlibbioc_1.54.0
## [11] vctrs_0.6.5 memoise_2.0.1
## [13] Rsamtools_2.24.0 RCurl_1.98-1.17
## [15] base64enc_0.1-3 rstatix_0.7.2.999
## [17] tinytex_0.57 htmltools_0.5.8.1
## [19] S4Arrays_1.8.1 curl_7.0.0
## [21] broom_1.0.9 SparseArray_1.8.1
## [23] Formula_1.2-5 sass_0.4.10
## [25] bslib_0.9.0 htmlwidgets_1.6.4
## [27] lubridate_1.9.4 cachem_1.1.0
## [29] GenomicAlignments_1.44.0 lifecycle_1.0.4
## [31] pkgconfig_2.0.3 Matrix_1.7-4
## [33] R6_2.6.1 fastmap_1.2.0
## [35] GenomeInfoDbData_1.2.14 MatrixGenerics_1.20.0
## [37] snakecase_0.11.1 digest_0.6.37
## [39] colorspace_2.1-1 AnnotationDbi_1.70.0
## [41] bezier_1.1.2 Hmisc_5.2-3
## [43] RSQLite_2.4.3 timechange_0.3.0
## [45] httr_1.4.7 abind_1.4-8
## [47] compiler_4.5.0 bit64_4.6.0-1
## [49] withr_3.0.2 htmlTable_2.4.3
## [51] backports_1.5.0 BiocParallel_1.42.1
## [53] carData_3.0-5 DBI_1.2.3
## [55] maps_3.4.3 ggsignif_0.6.4
## [57] DelayedArray_0.34.1 rjson_0.2.23
## [59] tools_4.5.0 foreign_0.8-90
## [61] zip_2.3.3 nnet_7.3-20
## [63] glue_1.8.0 restfulr_0.0.16
## [65] grid_4.5.0 checkmate_2.3.3
## [67] cluster_2.1.8.1 gtable_0.3.6
## [69] BSgenome_1.76.0 tidyr_1.3.1
## [71] ensembldb_2.32.0 car_3.1-3
## [73] XVector_0.48.0 pillar_1.11.0
## [75] stringr_1.5.1 lattice_0.22-7
## [77] rtracklayer_1.68.0 bit_4.6.0
## [79] biovizBase_1.56.0 tidyselect_1.2.1
## [81] Biostrings_2.76.0 knitr_1.50
## [83] gridExtra_2.3 bookdown_0.44
## [85] ProtGenerics_1.40.0 SummarizedExperiment_1.38.1
## [87] xfun_0.53 Biobase_2.68.0
## [89] matrixStats_1.5.0 stringi_1.8.7
## [91] UCSC.utils_1.4.0 lazyeval_0.2.2
## [93] yaml_2.3.10 evaluate_1.0.5
## [95] codetools_0.2-20 tibble_3.3.0
## [97] BiocManager_1.30.26 cli_3.6.5
## [99] rpart_4.1.24 jquerylib_0.1.4
## [101] dichromat_2.0-0.1 Rcpp_1.1.0
## [103] mapproj_1.2.12 png_0.1-8
## [105] XML_3.99-0.19 parallel_4.5.0
## [107] blob_1.2.4 AnnotationFilter_1.32.0
## [109] bitops_1.0-9 VariantAnnotation_1.54.1
## [111] scales_1.4.0 purrr_1.1.0
## [113] crayon_1.5.3 bamsignals_1.40.0
## [115] rlang_1.1.6 KEGGREST_1.48.1