@@ -31,32 +31,15 @@ partner <- function(gr) {
3131# '
3232# '@export
3333findBreakpointOverlaps <- function (query , subject , maxgap = - 1L , minoverlap = 0L , ignore.strand = FALSE , sizemargin = 0.25 , restrictMarginToSizeMultiple = 0.5 ) {
34- hitdf <- as.data.frame(findOverlaps(query , subject , maxgap = maxgap , minoverlap = minoverlap , type = " any" , select = " all" , ignore.strand = ignore.strand ), row.names = NULL )
35- # instead of running findOverlaps(partner(query), partner(subject), ...
36- # we can reduce our runtime cost by just performing partner index lookups
37- # partner lookups
38- subjectPartnerIndexLookup <- seq_along(names(subject ))
39- names(subjectPartnerIndexLookup ) <- names(subject )
40- queryPartnerIndexLookup <- seq_along(names(query ))
41- names(queryPartnerIndexLookup ) <- names(query )
42- phitdf <- data.frame (
43- queryHits = queryPartnerIndexLookup [query $ partner [hitdf $ queryHits ]],
44- subjectHits = subjectPartnerIndexLookup [subject $ partner [hitdf $ subjectHits ]])
45- hits <- rbind(hitdf , phitdf , make.row.names = FALSE )
34+ hits <- dplyr :: bind_rows(
35+ as.data.frame(findOverlaps(query , subject , maxgap = maxgap , minoverlap = minoverlap , type = " any" , select = " all" , ignore.strand = ignore.strand ), row.names = NULL ),
36+ as.data.frame(findOverlaps(query , subject , maxgap = maxgap , minoverlap = minoverlap , type = " any" , select = " all" , ignore.strand = ignore.strand ), row.names = NULL ))
4637 # we now want to do:
4738 # hits <- hits[duplicated(hits),] # both breakends match
4839 # but for large hit sets (such as focal false positive loci) we run out of memory (>32GB)
49- # instead, we sort then check that we match the previous record
50- hits <- hits [base :: order(hits $ queryHits , hits $ subjectHits ), ]
51- lg <- function (x ) {
52- if (length(x ) == 0 ) {
53- return (x )
54- } else {
55- return (c(- 1 , x [1 : (length(x )- 1 )])) # -1 to ensure FALSE match instead of NA match
56- }
57- }
58- isDup <- hits $ queryHits == lg(hits $ queryHits ) & hits $ subjectHits == lg(hits $ subjectHits )
59- hits <- hits [isDup ,]
40+ # instead, we sort then check that we match the next record
41+ hits = hits %> % dplyr :: arrange(queryHits , subjectHits ) %> %
42+ dplyr :: filter(! is.na(dplyr :: lead(. $ queryHits )) & ! is.na(dplyr :: lead(. $ subjectHits )) & dplyr :: lead(. $ queryHits ) == . $ queryHits & dplyr :: lead(. $ subjectHits ) == . $ subjectHits )
6043 if (! is.null(sizemargin ) && ! is.na(sizemargin )) {
6144 # take into account confidence intervals when calculating event size
6245 callwidth <- .distance(query , partner(query ))
@@ -343,29 +326,28 @@ calculateBlastHomology <- function(gr, ref, db, anchorLength=150) {
343326# ' @param gr breakpoint GRanges object. Can contain both breakpoint and single breakend SV records
344327# '
345328# '@export
346- breakpointGRangesToVCF <- function (gr ) {
347- if (is.null(gr $ insSeq )) {
348- gr $ insSeq = rep(" " , length(gr ))
349- }
350- nominalgr = GRanges(seqnames = seqnames(gr ), ranges = IRanges(start = (end(gr ) + start(gr )) / 2 , width = 1 ))
351- if (is.null(gr $ REF )) {
352- gr $ REF = rep(" N" , length(gr ))
353- }
354- gr $ ALT [is.na(gr $ ALT )] = " "
355- if (is.null(gr $ ALT )) {
356- gr $ ALT = rep(" " , length(gr ))
357- }
358- gr $ ALT [is.na(gr $ ALT )] = " "
359- gr $ ALT [gr $ ALT == " " ] = .toVcfBreakendNotationAlt(gr )[gr $ ALT == " " ]
360- ciposstart = start(gr ) - start(nominalgr )
361- ciposend = end(gr ) - end(nominalgr )
362- vcf = VCF(rowRanges = nominalgr , collapsed = FALSE )
363- fixeddf = data.frame (
364- ALT = gr $ ALT ,
365- REF = gr $ REF ,
366- QUAL = gr $ QUAL ,
367- FILTER = gr $ FILTER )
368-
369- VCF(rowRanges = GRanges(), colData = DataFrame(), exptData = list (header = VCFHeader()), fixed = DataFrame(), info = DataFrame(), geno = SimpleList(), ... , collapsed = FALSE , verbose = FALSE
370- }
329+ # breakpointGRangesToVCF <- function(gr) {
330+ # if (is.null(gr$insSeq)) {
331+ # gr$insSeq = rep("", length(gr))
332+ # }
333+ # nominalgr = GRanges(seqnames=seqnames(gr), ranges=IRanges(start=(end(gr) + start(gr)) / 2, width=1))
334+ # if (is.null(gr$REF)) {
335+ # gr$REF = rep("N", length(gr))
336+ # }
337+ # gr$ALT[is.na(gr$ALT)] = ""
338+ # if (is.null(gr$ALT)) {
339+ # gr$ALT = rep("", length(gr))
340+ # }
341+ # gr$ALT[is.na(gr$ALT)] = ""
342+ # gr$ALT[gr$ALT == ""] = .toVcfBreakendNotationAlt(gr)[gr$ALT == ""]
343+ # ciposstart = start(gr) - start(nominalgr)
344+ # ciposend = end(gr) - end(nominalgr)
345+ # vcf = VCF(rowRanges=nominalgr, collapsed=FALSE)
346+ # fixeddf = data.frame(
347+ # ALT=gr$ALT,
348+ # REF=gr$REF,
349+ # QUAL=gr$QUAL,
350+ # FILTER=gr$FILTER)
351+ # VCF(rowRanges = GRanges(), colData = DataFrame(), exptData = list(header = VCFHeader()), fixed = DataFrame(), info = DataFrame(), geno = SimpleList(), ..., collapsed=FALSE, verbose = FALSE)
352+ # }
371353
0 commit comments