vignettes/introduction.Rmd
introduction.Rmd
select
package provides function to generate the
backgorund model and other utilites functions.
# install.packages("devtools")
# devtools::install_github("CSOgroup/select")
library(select)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:select':
#>
#> select
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Check the data strucutre
## Load the data provided with the package
data(luad_data, package = "select")
str(luad_data)
#> List of 3
#> $ gam : num [1:502, 1:659] 0 0 0 0 0 0 0 0 1 0 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:502] "TCGA-05-4244-01" "TCGA-05-4249-01" "TCGA-05-4250-01" "TCGA-05-4382-01" ...
#> .. ..$ : chr [1:659] "MUT.ABL1" "MUT.ACVR1B" "MUT.ACVR2A" "MUT.AKT1" ...
#> $ alt : Named chr [1:659] "MUT" "MUT" "MUT" "MUT" ...
#> ..- attr(*, "names")= chr [1:659] "MUT.ABL1" "MUT.ACVR1B" "MUT.ACVR2A" "MUT.AKT1" ...
#> $ samples: Named chr [1:502] "LUAD" "LUAD" "LUAD" "LUAD" ...
#> ..- attr(*, "names")= chr [1:502] "TCGA-05-4244-01" "TCGA-05-4249-01" "TCGA-05-4250-01" "TCGA-05-4382-01" ...
select()
which generates the
background model and results.M
: the list object of GAMs & TMBsample.class
: a named vector of samples with
covariatesalteration.class
: a named vector of alteration with
covariates
alpi <- select::select(
M = luad_data$gam,
sample.class = luad_data$samples,
alteration.class = luad_data$alt,
folder = './',
r.seed = 110,
n.cores = 1,
vetos = NULL,
n.permut = 100,
min.feature.support=5,
min.feature.freq=0.001,
remove.0.samples=TRUE,
remove.unknown.class.samples=TRUE,
rho = 0.1,
lambda = 15,
save.intermediate.files = FALSE,
randomization.switch.threshold = 30,
max.memory.size=100,
calculate_APC_threshold = TRUE,
calculate_FDR = TRUE,
FDR.cutoff=0.1,
verbose = TRUE
)
#> [1] "Running SELECT... [ .// ]"
#> [1] "-> Parsing and Filtering GAM..."
#> [1] "-> Generating null model..."
#> user system elapsed
#> 3.362 0.006 3.368
#> [1] "Memory usage: 31.4 MB"
#> [1] "-> Collecting event stats on observed GAM..."
#> user system elapsed
#> 0.104 0.229 0.023
#> [1] "Memory usage: 31.6 MB"
#> [1] "-> Calculating wMI on observed GAM..."
#> user system elapsed
#> 0.025 0.000 0.024
#> [1] "Memory usage: 32.1 MB"
#> [1] "-> Collecting event stats on null model..."
#> user system elapsed
#> 9.555 31.501 2.059
#> [1] "Memory usage: 37.4 MB"
#> [1] "-> Comparing observed GAM vs null model..."
#> user system elapsed
#> 1.018 0.000 1.018
#> [1] "Memory usage: 38 MB"
#> [1] "-> Calculating wMI on null model..."
#> user system elapsed
#> 1.692 0.006 1.698
#> [1] "Memory usage: 59.8 MB"
#> [1] "-> Calculating P-value based on wMI..."
#> user system elapsed
#> 0.029 0.002 0.031
#> [1] "Memory usage: 60 MB"
#> [1] "-> Deriving average background wMI..."
#> user system elapsed
#> 0.91 0.00 0.91
#> [1] "Memory usage: 60.3 MB"
#> [1] "-> Building results table..."
#> [1] "-> Calculating APC score..."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> user system elapsed
#> 0.201 0.000 0.201
#> [1] "Memory usage: 64.1 MB"
#> [1] "-> Performing wMI FDR analysis..."
#> Warning in rbind(deparse.level, ...): number of columns of result, 7, is not a
#> multiple of vector length 6 of arg 2
#> user system elapsed
#> 3.819 0.024 3.843
#> [1] "Memory usage: 64.3 MB"
#> [1] "-> Calculating APC threshold..."
Colnames | Meaning |
---|---|
SFE_1 | Selected Functional Event (SFE_1) |
SFE_2 | Selected Functional Event (SFE_2) |
name | Interaction Motif |
type_1 | Type of mutation of SFE_1 |
type_2 | Type of mutation of SFE_2 |
int_type | Interaction Motif type |
support_1 | Samples mutated with SFE_1 |
support_2 | Samples mutated with SFE_2 |
freq_1 | Frequency of SFE_1 |
freq_2 | Frequency of SFE_2 |
overlap | Co-mutation between SFE_1 and SFE_2 |
max_overlap | Maximum possible Co-mutation |
freq_overlap | Frequency of Co-mutation |
r_overlap | Background Co-mutation |
r_freq_overlap | Background frequency of Co-mutation |
diff_overlap | Difference of co-mutations |
abs_diff_overlap | Absoulte Difference of co-mutations |
direction | Interaction Type |
wMI_stat | Weighted Mutual Information |
wMI_p.value | P-value on Mutual Information |
ME_p.value | P-value on co-mutation |
E.r.wMI_stat | Background Weighted Mutual Information |
MI_diff | Difference of Mutual Information |
wMI_p.value_FDR | FDR |
select_score_good_cancer_cell_2017_criterion_1 | Cancer Cell Paper Criteria |
select_score | Effect Size (select Score) |
options(width=100)
# Look into dataframe
(alpi %>% filter(wMI_p.value_FDR) %>% arrange(desc(select_score))) %>% head(2)
#> SFE_1
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP.consensus.chr14:35870717-36159897
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL.consensus.chr4:183089197-186421724
#> SFE_2
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP.consensus.chr14:37858832-38371493
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL.consensus.chr4:187186290-187647876
#> name
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876
#> type_1 type_2
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP AMP
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL DEL
#> int_type support_1
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP - AMP 57
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL - DEL 12
#> support_2
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 53
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 12
#> freq_1
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.11656442
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.02453988
#> freq_2 overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.10838446 47
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.02453988 11
#> max_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 53
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 12
#> freq_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.8867925
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.9166667
#> r_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 10.04
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.72
#> r_freq_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.189434
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.060000
#> diff_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 36.96
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 10.28
#> abs_diff_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 36.96
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 10.28
#> direction wMI_stat
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 CO 0.4241418
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 CO 0.3612067
#> wMI_p.value
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0
#> ME_p.value
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0
#> E.r.wMI_stat
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.003527209
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.016350097
#> MI_diff APC
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.4206146 0.4005788
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.3448566 0.3413985
#> wMI_p.value_FDR
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 TRUE
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 TRUE
#> select_score_good_cancer_cell_2017_criterion_1
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 TRUE
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 TRUE
#> select_score
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.4005788
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.3413985
# Total significant Hits
alpi %>% filter(wMI_p.value_FDR) %>% count(wMI_p.value_FDR,direction)
#> wMI_p.value_FDR direction n
#> 1 TRUE CO 108
#> 2 TRUE ME 18
# Print the sessionInfo
sessionInfo()
#> R version 4.3.2 (2023-10-31)
#> Platform: x86_64-conda-linux-gnu (64-bit)
#> Running under: CentOS Linux 7 (Core)
#>
#> Matrix products: default
#> BLAS/LAPACK: /mnt/ndata/arvind/envs/R_4/lib/libopenblasp-r0.3.25.so; LAPACK version 3.11.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
#> [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Europe/Zurich
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] dplyr_1.1.4 select_1.6.4
#>
#> loaded via a namespace (and not attached):
#> [1] sass_0.4.8 utf8_1.2.4 generics_0.1.3 slam_0.1-50 stringi_1.8.3
#> [6] lattice_0.22-5 digest_0.6.34 magrittr_2.0.3 evaluate_0.23 grid_4.3.2
#> [11] iterators_1.0.14 fastmap_1.1.1 foreach_1.5.2 doParallel_1.0.17 jsonlite_1.8.8
#> [16] Matrix_1.6-5 purrr_1.0.2 fansi_1.0.6 BiRewire_3.34.0 codetools_0.2-19
#> [21] textshaping_0.3.7 jquerylib_0.1.4 cli_3.6.2 rlang_1.1.3 cachem_1.0.8
#> [26] yaml_2.3.8 Rtsne_0.17 tools_4.3.2 parallel_4.3.2 memoise_2.0.1
#> [31] vctrs_0.6.5 R6_2.5.1 matrixStats_1.2.0 lifecycle_1.0.4 stringr_1.5.1
#> [36] fs_1.6.3 ragg_1.2.7 pkgconfig_2.0.3 desc_1.4.3 pkgdown_2.0.7
#> [41] bslib_0.6.1 pillar_1.9.0 glue_1.7.0 Rcpp_1.0.12 systemfonts_1.0.5
#> [46] xfun_0.41 tibble_3.2.1 tidyselect_1.2.0 knitr_1.45 htmltools_0.5.7
#> [51] igraph_1.6.0 rmarkdown_2.25 compiler_4.3.2