vignettes/introduction.Rmd
introduction.Rmd
select
package provides function to generate the backgorund model and other utilites functions.
# install.packages("devtools")
# devtools::install_github("CSOgroup/select")
library(select)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:select':
#>
#> select
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Check the data strucutre
## Load the data provided with the package
data(luad_data, package = "select")
str(luad_data)
#> List of 3
#> $ gam : num [1:502, 1:659] 0 0 0 0 0 0 0 0 1 0 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:502] "TCGA-05-4244-01" "TCGA-05-4249-01" "TCGA-05-4250-01" "TCGA-05-4382-01" ...
#> .. ..$ : chr [1:659] "MUT.ABL1" "MUT.ACVR1B" "MUT.ACVR2A" "MUT.AKT1" ...
#> $ alt : Named chr [1:659] "MUT" "MUT" "MUT" "MUT" ...
#> ..- attr(*, "names")= chr [1:659] "MUT.ABL1" "MUT.ACVR1B" "MUT.ACVR2A" "MUT.AKT1" ...
#> $ samples: Named chr [1:502] "LUAD" "LUAD" "LUAD" "LUAD" ...
#> ..- attr(*, "names")= chr [1:502] "TCGA-05-4244-01" "TCGA-05-4249-01" "TCGA-05-4250-01" "TCGA-05-4382-01" ...
select()
which generates the background model and results.M
: the list object of GAMs & TMBsample.class
: a named vector of samples with covariatesalteration.class
: a named vector of alteration with covariates
alpi <- select::select(
M = luad_data$gam,
sample.class = luad_data$samples,
alteration.class = luad_data$alt,
folder = './',
r.seed = 110,
n.cores = 1,
vetos = NULL,
n.permut = 100,
min.feature.support=5,
min.feature.freq=0.001,
remove.0.samples=TRUE,
remove.unknown.class.samples=TRUE,
rho = 0.1,
lambda = 15,
save.intermediate.files = FALSE,
randomization.switch.threshold = 30,
max.memory.size=100,
calculate_APC_threshold = TRUE,
calculate_FDR = TRUE,
verbose = TRUE
)
#> [1] "Running SELECT... [ .// ]"
#> [1] "-> Parsing and Filtering GAM..."
#> [1] "-> Generating null model..."
#> user system elapsed
#> 2.899 0.052 2.951
#> [1] "Memory usage: 29.9 MB"
#> [1] "-> Collecting event stats on observed GAM..."
#> user system elapsed
#> 0.686 3.043 0.054
#> [1] "Memory usage: 30.2 MB"
#> [1] "-> Calculating wMI on observed GAM..."
#> user system elapsed
#> 0.022 0.001 0.023
#> [1] "Memory usage: 30.6 MB"
#> [1] "-> Collecting event stats on null model..."
#> user system elapsed
#> 34.238 158.332 2.350
#> [1] "Memory usage: 36.2 MB"
#> [1] "-> Comparing observed GAM vs null model..."
#> user system elapsed
#> 1.186 0.002 1.188
#> [1] "Memory usage: 36.8 MB"
#> [1] "-> Calculating wMI on null model..."
#> user system elapsed
#> 1.370 0.020 1.391
#> [1] "Memory usage: 58.8 MB"
#> [1] "-> Calculating P-value based on wMI..."
#> user system elapsed
#> 0.027 0.006 0.034
#> [1] "Memory usage: 59 MB"
#> [1] "-> Deriving average background wMI..."
#> user system elapsed
#> 0.995 0.002 0.997
#> [1] "Memory usage: 59.3 MB"
#> [1] "-> Building results table..."
#> [1] "-> Calculating APC score..."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> [1] "Not correcting with APC as there are not enough interactions to estimate the background distribution. Uncorrected scores will be used."
#> user system elapsed
#> 0.167 0.000 0.166
#> [1] "Memory usage: 63.1 MB"
#> [1] "-> Performing wMI FDR analysis..."
#> user system elapsed
#> 3.398 0.093 3.491
#> [1] "Memory usage: 63.3 MB"
#> [1] "-> Calculating APC threshold..."
Colnames | Meaning |
---|---|
SFE_1 | Selected Functional Event (SFE_1) |
SFE_2 | Selected Functional Event (SFE_2) |
name | Interaction Motif |
type_1 | Type of mutation of SFE_1 |
type_2 | Type of mutation of SFE_2 |
int_type | Interaction Motif type |
support_1 | Samples mutated with SFE_1 |
support_2 | Samples mutated with SFE_2 |
freq_1 | Frequency of SFE_1 |
freq_2 | Frequency of SFE_2 |
overlap | Co-mutation between SFE_1 and SFE_2 |
max_overlap | Maximum possible Co-mutation |
freq_overlap | Frequency of Co-mutation |
r_overlap | Background Co-mutation |
r_freq_overlap | Background frequency of Co-mutation |
diff_overlap | Difference of co-mutations |
abs_diff_overlap | Absoulte Difference of co-mutations |
direction | Interaction Type |
wMI_stat | Weighted Mutual Information |
wMI_p.value | P-value on Mutual Information |
ME_p.value | P-value on co-mutation |
E.r.wMI_stat | Background Weighted Mutual Information |
MI_diff | Difference of Mutual Information |
wMI_p.value_FDR | FDR |
select_score_good_cancer_cell_2017_criterion_1 | Cancer Cell Paper Criteria |
select_score | Effect Size (select Score) |
options(width=100)
# Look into dataframe
(alpi %>% filter(wMI_p.value_FDR) %>% arrange(desc(select_score))) %>% head(2)
#> SFE_1
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP.consensus.chr14:35870717-36159897
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL.consensus.chr4:183089197-186421724
#> SFE_2
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP.consensus.chr14:37858832-38371493
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL.consensus.chr4:187186290-187647876
#> name
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876
#> type_1 type_2
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP AMP
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL DEL
#> int_type support_1
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 AMP - AMP 57
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 DEL - DEL 12
#> support_2
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 53
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 12
#> freq_1
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.11656442
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.02453988
#> freq_2 overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.10838446 47
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.02453988 11
#> max_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 53
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 12
#> freq_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.8867925
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.9166667
#> r_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 10.04
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.72
#> r_freq_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.189434
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.060000
#> diff_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 36.96
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 10.28
#> abs_diff_overlap
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 36.96
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 10.28
#> direction wMI_stat
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 CO 0.4241418
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 CO 0.3612067
#> wMI_p.value
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0
#> ME_p.value
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0
#> E.r.wMI_stat
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.003527209
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.016350097
#> MI_diff
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.4206146
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.3448566
#> wMI_p.value_FDR
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 TRUE
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 TRUE
#> select_score_good_cancer_cell_2017_criterion_1
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 TRUE
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 TRUE
#> select_score
#> AMP.consensus.chr14:35870717-36159897 - AMP.consensus.chr14:37858832-38371493 0.4005788
#> DEL.consensus.chr4:183089197-186421724 - DEL.consensus.chr4:187186290-187647876 0.3413985
# Total significant Hits
alpi %>% filter(wMI_p.value_FDR) %>% count(wMI_p.value_FDR,direction)
#> wMI_p.value_FDR direction n
#> 1 TRUE CO 108
#> 2 TRUE ME 18
# Print the sessionInfo
sessionInfo()
#> R version 4.1.3 (2022-03-10)
#> Platform: x86_64-conda-linux-gnu (64-bit)
#> Running under: CentOS Linux 7 (Core)
#>
#> Matrix products: default
#> BLAS/LAPACK: /home/arvind/miniconda3/envs/R_4/lib/libopenblasp-r0.3.18.so
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
#> [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] dplyr_1.1.1 select_1.6.3
#>
#> loaded via a namespace (and not attached):
#> [1] pillar_1.9.0 bslib_0.3.1 compiler_4.1.3 jquerylib_0.1.4 iterators_1.0.14
#> [6] tools_4.1.3 digest_0.6.29 tibble_3.2.1 jsonlite_1.7.2 evaluate_0.14
#> [11] memoise_2.0.1 lifecycle_1.0.3 lattice_0.20-45 pkgconfig_2.0.3 rlang_1.1.0
#> [16] Matrix_1.4-0 foreach_1.5.2 igraph_1.2.10 cli_3.6.1 parallel_4.1.3
#> [21] yaml_2.2.1 pkgdown_2.0.7 xfun_0.29 fastmap_1.1.0 stringr_1.4.0
#> [26] knitr_1.35 generics_0.1.2 desc_1.4.0 fs_1.5.2 vctrs_0.6.1
#> [31] sass_0.4.0 systemfonts_1.0.4 tidyselect_1.2.0 rprojroot_2.0.2 grid_4.1.3
#> [36] glue_1.6.2 BiRewire_3.26.0 R6_2.5.1 textshaping_0.3.6 fansi_1.0.3
#> [41] rmarkdown_2.11 purrr_1.0.1 magrittr_2.0.3 matrixStats_0.61.0 codetools_0.2-18
#> [46] htmltools_0.5.2 tsne_0.1-3 ragg_1.2.2 utf8_1.2.2 stringi_1.7.6
#> [51] doParallel_1.0.17 cachem_1.0.6 slam_0.1-49 crayon_1.5.1