get_ers defines expressed regions across an inputted range of mean coverage cut-offs (MCCs) and max region gaps (MRGs) from the coverage.

get_strand_ers defines ERs across an inputted range of mean coverage cut-offs (MCCs) and max region gaps (MRGs) from the coverage.

get_ers(coverage, mccs, mrgs)

get_strand_ers(
  bw_pos,
  bw_neg,
  auc_raw_pos,
  auc_raw_neg,
  auc_target,
  chrs,
  mccs,
  mrgs,
  bw_chr = "chr"
)

Arguments

coverage

the coverage of the bigwig files passed into get_coverage.

mccs

mean coverage cut-offs to apply.

mrgs

max region gaps to apply.

bw_pos

positive strand bigwig file

bw_neg

negative strand bigwig file

auc_raw_pos

vector containing AUCs(Area Under Coverage) matching the order of the positive bigwig paths.

auc_raw_neg

vector containing AUCs(Area Under Coverage) matching the order of the negative bigwig paths.

auc_target

total AUC to normalise all samples to. E.g. 40e6 * 100 would be the estimated total auc for sample sequenced to 40 million reads of 100bp in length.

chrs

chromosomes to obtain mean coverage for, default is "" giving every chromosome. Can take UCSC format(chrs = "chr1") or just the chromosome i.e. chrs = c(1,X)

bw_chr

specifies whether the bigwig files has the chromosomes labelled with a "chr" preceding the chromosome i.e. "chr1" vs "1". Can be either "chr" or "nochr" with "chr" being the default.

Value

list containing sets of ERs, each generated using a particular combination of MCC and MRG.

list containing sets of stranded ERs, each generated using a particular combination of MCC and MRG.

Functions

  • get_strand_ers: Method for getting ers from stranded BigWig files

Examples

data(gtex_SRP012682_SRX222703_lung_coverage_1, package = "ODER")

eg_ers <- get_ers(
    coverage = gtex_SRP012682_SRX222703_lung_coverage_1,
    mccs = c(5, 10),
    mrgs = c(10, 20)
)
#> 2021-10-08 16:10:22 - Generating ERs for chr21
#> 2021-10-08 16:10:23 - Generating ERs for chr22

eg_ers
#> $mcc_5
#> $mcc_5$mrg_10
#> GRanges object with 26459 ranges and 0 metadata columns:
#>           seqnames            ranges strand
#>              <Rle>         <IRanges>  <Rle>
#>       [1]    chr21   5026470-5026525      *
#>       [2]    chr21   5027955-5028030      *
#>       [3]    chr21   5032160-5032217      *
#>       [4]    chr21   5033408-5033441      *
#>       [5]    chr21   5034694-5034787      *
#>       ...      ...               ...    ...
#>   [26455]    chr22 50799669-50799744      *
#>   [26456]    chr22 50799915-50799990      *
#>   [26457]    chr22 50800460-50800587      *
#>   [26458]    chr22 50802354-50802398      *
#>   [26459]    chr22 50802615-50802631      *
#>   -------
#>   seqinfo: 2 sequences from an unspecified genome; no seqlengths
#> 
#> $mcc_5$mrg_20
#> GRanges object with 24404 ranges and 0 metadata columns:
#>           seqnames            ranges strand
#>              <Rle>         <IRanges>  <Rle>
#>       [1]    chr21   5026470-5026525      *
#>       [2]    chr21   5027955-5028030      *
#>       [3]    chr21   5032160-5032217      *
#>       [4]    chr21   5033408-5033441      *
#>       [5]    chr21   5034694-5034787      *
#>       ...      ...               ...    ...
#>   [24400]    chr22 50799669-50799744      *
#>   [24401]    chr22 50799915-50799990      *
#>   [24402]    chr22 50800460-50800587      *
#>   [24403]    chr22 50802354-50802398      *
#>   [24404]    chr22 50802615-50802631      *
#>   -------
#>   seqinfo: 2 sequences from an unspecified genome; no seqlengths
#> 
#> 
#> $mcc_10
#> $mcc_10$mrg_10
#> GRanges object with 17761 ranges and 0 metadata columns:
#>           seqnames            ranges strand
#>              <Rle>         <IRanges>  <Rle>
#>       [1]    chr21   5032176-5032217      *
#>       [2]    chr21   5033408-5033425      *
#>       [3]    chr21   5034717-5034756      *
#>       [4]    chr21   5035188-5035189      *
#>       [5]    chr21   5036577-5036581      *
#>       ...      ...               ...    ...
#>   [17757]    chr22 50799209-50799264      *
#>   [17758]    chr22 50799283-50799284      *
#>   [17759]    chr22 50799669-50799688      *
#>   [17760]    chr22 50799717-50799744      *
#>   [17761]    chr22 50800460-50800587      *
#>   -------
#>   seqinfo: 2 sequences from an unspecified genome; no seqlengths
#> 
#> $mcc_10$mrg_20
#> GRanges object with 15977 ranges and 0 metadata columns:
#>           seqnames            ranges strand
#>              <Rle>         <IRanges>  <Rle>
#>       [1]    chr21   5032176-5032217      *
#>       [2]    chr21   5033408-5033425      *
#>       [3]    chr21   5034717-5034756      *
#>       [4]    chr21   5035188-5035189      *
#>       [5]    chr21   5036577-5036581      *
#>       ...      ...               ...    ...
#>   [15973]    chr22 50798996-50799149      *
#>   [15974]    chr22 50799209-50799284      *
#>   [15975]    chr22 50799669-50799688      *
#>   [15976]    chr22 50799717-50799744      *
#>   [15977]    chr22 50800460-50800587      *
#>   -------
#>   seqinfo: 2 sequences from an unspecified genome; no seqlengths
#> 
#> 
library("magrittr")
gtex_metadata <- recount::all_metadata("gtex")
#> 2021-10-08 16:10:24 downloading the metadata to /tmp/Rtmp8R75GU/metadata_clean_gtex.Rdata
gtex_metadata <- gtex_metadata %>%
    as.data.frame() %>%
    dplyr::filter(project == "SRP012682")

rec_url <- recount::download_study(
    project = "SRP012682",
    type = "samples",
    download = FALSE
)
# file_cache is an internal function to download a bigwig file from a link
# if the file has been downloaded recently, it will be retrieved from a cache
bw_plus <- file_cache(rec_url[58])
bw_minus <- file_cache(rec_url[84])

# As of rtracklayer 1.25.16, BigWig is not supported on Windows.
if (!xfun::is_windows()) {
    stranded_ers <- get_strand_ers(
        bw_pos = bw_plus, bw_neg = bw_minus,
        auc_raw_pos = gtex_metadata[["auc"]][58],
        auc_raw_neg = gtex_metadata[["auc"]][84], auc_target = 40e6 * 100,
        chrs = "chr21", mccs = c(5, 10), mrgs = c(10, 20)
    )
    stranded_ers
}
#> 2021-10-08 16:10:26 - Obtaining mean coverage across 1 samples
#> 2021-10-08 16:10:26 - chr21
#> 2021-10-08 16:10:27 - Obtaining mean coverage across 1 samples
#> 2021-10-08 16:10:27 - chr21
#> 2021-10-08 16:10:27 - Generating ERs for chr21
#> 2021-10-08 16:10:28 - Generating ERs for chr21
#> $mcc_5
#> $mcc_5$mrg_10
#> GRanges object with 6355 ranges and 0 metadata columns:
#>          seqnames            ranges strand
#>             <Rle>         <IRanges>  <Rle>
#>      [1]    chr21   5116346-5117232      +
#>      [2]    chr21   5117340-5117507      +
#>      [3]    chr21   5117539-5117558      +
#>      [4]    chr21   5117587-5117592      +
#>      [5]    chr21   5117605-5117611      +
#>      ...      ...               ...    ...
#>   [6351]    chr21 46668354-46668391      -
#>   [6352]    chr21 46668488-46668581      -
#>   [6353]    chr21 46690842-46691238      -
#>   [6354]    chr21 46691775-46691807      -
#>   [6355]    chr21 46692377-46692446      -
#>   -------
#>   seqinfo: 1 sequence from an unspecified genome; no seqlengths
#> 
#> $mcc_5$mrg_20
#> GRanges object with 5503 ranges and 0 metadata columns:
#>          seqnames            ranges strand
#>             <Rle>         <IRanges>  <Rle>
#>      [1]    chr21   5116346-5117232      +
#>      [2]    chr21   5117340-5117507      +
#>      [3]    chr21   5117539-5117558      +
#>      [4]    chr21   5117587-5117611      +
#>      [5]    chr21   5117670-5117817      +
#>      ...      ...               ...    ...
#>   [5499]    chr21 46668354-46668391      -
#>   [5500]    chr21 46668488-46668581      -
#>   [5501]    chr21 46690842-46691238      -
#>   [5502]    chr21 46691775-46691807      -
#>   [5503]    chr21 46692377-46692446      -
#>   -------
#>   seqinfo: 1 sequence from an unspecified genome; no seqlengths
#> 
#> 
#> $mcc_10
#> $mcc_10$mrg_10
#> GRanges object with 4293 ranges and 0 metadata columns:
#>          seqnames            ranges strand
#>             <Rle>         <IRanges>  <Rle>
#>      [1]    chr21   5116356-5117231      +
#>      [2]    chr21   5117368-5117396      +
#>      [3]    chr21   5118691-5118847      +
#>      [4]    chr21   5120950-5120951      +
#>      [5]    chr21   5121718-5121803      +
#>      ...      ...               ...    ...
#>   [4289]    chr21 46665571-46665576      -
#>   [4290]    chr21 46665603-46665610      -
#>   [4291]    chr21 46690860-46691105      -
#>   [4292]    chr21 46691173-46691177      -
#>   [4293]    chr21 46691195-46691200      -
#>   -------
#>   seqinfo: 1 sequence from an unspecified genome; no seqlengths
#> 
#> $mcc_10$mrg_20
#> GRanges object with 3814 ranges and 0 metadata columns:
#>          seqnames            ranges strand
#>             <Rle>         <IRanges>  <Rle>
#>      [1]    chr21   5116356-5117231      +
#>      [2]    chr21   5117368-5117396      +
#>      [3]    chr21   5118691-5118847      +
#>      [4]    chr21   5120950-5120951      +
#>      [5]    chr21   5121718-5121803      +
#>      ...      ...               ...    ...
#>   [3810]    chr21 46665544-46665545      -
#>   [3811]    chr21 46665571-46665576      -
#>   [3812]    chr21 46665603-46665610      -
#>   [3813]    chr21 46690860-46691105      -
#>   [3814]    chr21 46691173-46691200      -
#>   -------
#>   seqinfo: 1 sequence from an unspecified genome; no seqlengths
#> 
#>