ryan
/
resume
espejo de https://github.com/DarwinAwardWinner/resume.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725
							#!/usr/bin/env Rscript

default.align.opts <- list(match=1, mismatch=3,
                           gapOpening=5, gapExtension=2)

parse_arguments <- function() {
    suppressMessages({
        library(optparse)
        library(parallel)
    })
    option_list <-
        list(make_option(c("-c", "--min-call"), type="integer", default=10, metavar="10",
                         help="Minimum perfect overlap required to call the presence of the subject (paired only). Imperfect overlap will need to be longer, based on specified mismatch and gap penalties."),
             make_option(c("-l", "--min-length"), type="integer", default=36, metavar="36",
                         help="Minimum length allowed after trimming a read. Any reads shorter than this after trimming will be discarded."),
             make_option(c("-i", "--interleaved"), action="store_true", default=FALSE,
                         help="Specify this option if you have paired-end sequences interleaved in a single FASTQ file. The default is to read paired-end sequences from a matched pair of files, and this option is ignored if two fastq files are provided. When you use this option, skip the \"READ2.fastq\" argument."),
             make_option(c("-o", "--read1-orientation"), type="character", default="in", metavar="in/out",
                         help="Orientation of read1. Can be either \"in\" or \"out\" (paired only). Note that Illumina reads are \"in\"."),
             make_option(c("-q", "--read2-orientation"), type="character", default="in", metavar="in/out",
                         help="Orientation of read2. Can be either \"in\" or \"out\" (paired only)"),
             make_option(c("-j", "--jobs"), type="integer",
                         default=parallel:::detectCores(),
                         metavar=as.character(parallel:::detectCores()),
                         help="Number of jobs to run in parallel for alignment. This should be autodetected by default."),
             make_option(c("-y", "--yield-size"), type="integer",
                         default=100000,
                         metavar="100000",
                         help="Number of reads to process at a time. Setting this higher will read more data into memory at once and result in faster runtime. Setting this lower will require less memory."),
             make_option(c("-m", "--match-bonus"), type="double",
                         default=default.align.opts$match,
                         metavar=as.character(default.align.opts$match),
                         help="Score bonus for a matching nucleotide"),
             make_option(c("-p", "--mismatch-penalty"), type="double",
                         default=default.align.opts$mismatch,
                         metavar=as.character(default.align.opts$mismatch),
                         help="Score penalty for a mismatched nucleotide (specify as a positive number)"),
             make_option(c("-g", "--gap-open-penalty"), type="double",
                         default=default.align.opts$gapOpening,
                         metavar=as.character(default.align.opts$gapOpening),
                         help="Score penalty for opening a gap in the alignment (specifiy as a positive number)"),
             make_option(c("-e", "--gap-extension-penalty"), type="double",
                         default=default.align.opts$match,
                         metavar=as.character(default.align.opts$gapExtension),
                         help="Score penalty for extending an alignment gap by two nucleotides (specify as a positive number)"),
             make_option(c("-s", "--single-read-mode"), action="store_true", default=FALSE,
                         help="Tell DeLoxer to run in single-end mode instead of paired-end mode. In this mode, the only a single input fastq file is provided, and only a single output file is created. No classification is performed, only trimming.  When you use this option, skip the \"READ2.fastq\" argument, and specify the full file name for OUTPUT_NAME instead of just the base name."))
    option_parser <- OptionParser(option_list=option_list,
                                  usage="%prog [options] adapter.fasta READ1.fastq READ2.fastq OUTPUT_NAME")
    opt <- parse_args(option_parser, positional_arguments=TRUE)
    return(opt)
}

## Call this here to handle --help quickly, before we waste 10 seconds
## loading all the libraries
invisible(parse_arguments())

print.option.list <- function(opt=parse_arguments()) {
    args <- opt$args
    opts <- opt$options
    message("Options:")
    foreach (o=opts, n=names(opts)) %do% {
        if (n != "help")
            message(" ", n, ": ", o)
    }
    message("Args: ", paste("\"", args, "\"", sep="", collapse=", "))
}

unimplemented <- function() stop("UNIMPLEMENTED")

## Timestampped message
tsmsg <- function(...) {
    message("# ", date(), ": ", ...)
}

tsmsg("Starting deloxer and loading required packages")

suppressMessages({
    library(ShortRead)
    library(optparse)
    library(foreach)
    library(iterators)
    library(itertools)
    library(doMC)
    registerDoMC()
    mcoptions <- list(preschedule=TRUE, set.seed=FALSE)
})

## Merge l1 and l2 by names
merge.lists <- function(l1, l2) {
    new.names <- setdiff(names(l2), names(l1))
    l1[new.names] <- l2[new.names]
    l1
}

## Return an object sans names
strip.names <- function(x) {
    names(x) <- NULL
    x
}

## Define some missing type coercions
setAs(from="ShortRead", to="DNAStringSet", def=function(from) sread(from))
setAs(from="PhredQuality", to="FastqQuality", def=function(from) FastqQuality(BStringSet(from)))
setAs(from="SolexaQuality", to="SFastqQuality", def=function(from) SFastqQuality(BStringSet(from)))
setAs(from="QualityScaledXStringSet", to="ShortReadQ", def=function(from) {
    q <- quality(from)
    new.quality.class <- switch(class(q),
                                SolexaQuality="SFastqQuality",
                                PhredQuality="FastqQuality",
                                stop("Unknown quality type: ", class(q)))
    q <- as(q, new.quality.class)
    ShortReadQ(sread=as(from, "DNAStringSet"),
               quality=q,
               id=BStringSet(names(from)))
})
## Override the provided method to keep the sequence names
setAs(from="ShortReadQ", to="QualityScaledDNAStringSet",
      def=function (from, to = "QualityScaledDNAStringSet", strict = TRUE) {
          q <- quality(from)
          new.quality.class <- switch(class(q),
                                      SFastqQuality="SolexaQuality",
                                      FastqQuality="PhredQuality",
                                      "XStringQuality")
          q <- as(q, new.quality.class)
          x <- QualityScaledDNAStringSet(sread(from), q)
          names(x) <- as.character(id(from))
          x
      })

## Define functions for reading fastq into standard Biostrings object
## and writing it back out. The standard functions readFastq and
## writeFastq operate on ShortRead objects. These simply wrap them in
## conversion to/from QualityScaledDNAStringSet.
read.QualityScaledDNAStringSet <- function(filepath, format = "fastq", ...) {
    switch(format,
           fastq=as(readFastq(filepath, withIds=TRUE, ...), "QualityScaledDNAStringSet"),
           ## Default
           stop("Unknown quality-scaled sequence format: ", format))
}

write.QualityScaledDNAStringSet <- function (x, filepath, append = FALSE, format = "fastq") {
    if(length(x) > 0) {
        sr <- as(x, "ShortReadQ")
        switch(format,
               fastq={
                   if (!append)
                       unlink(filepath);
                   writeFastq(object=sr,
                              file=filepath, mode=ifelse(append, "a", "w"))
               },
               ## Default
               stop("Unknown quality-scaled sequence format: ", format))
    } else {
        ## Zero-length sequence; just truncate/touch the file
        sink(file=filepath, append=append)
        sink()
    }
}

discard.short.reads <- function(reads, min.length=1) {
    kept.reads <- reads[width(reads) >= min.length]
    return(kept.reads)
}

## Takes a set of interleaved reads (or anything else) and
## de-interleaves them
deinterleave.pairs <- function(reads) {
    stopifnot(length(reads) %% 2 == 0)
    mask <- seq(from=1, to=length(reads), by=2)
    return(list(read1=reads[mask], read2=reads[-mask]))
}

.delox.trimmed.ranges <- function(subj, reads, min.length=36,
                                  include.scores=TRUE,
                                  include.deleted.ranges=TRUE,
                                  align.opts=list()) {

    align.opts <- merge.lists(align.opts, default.align.opts)

    aln <- list(forward=pairwiseAlignment(pattern=reads,
                subject=subj,
                type="overlap",
                substitutionMatrix=nucleotideSubstitutionMatrix(match = align.opts$match, mismatch = -align.opts$mismatch),
                gapOpening=-align.opts$gapOpening, gapExtension=-align.opts$gapExtension),
                revcomp=pairwiseAlignment(pattern=reads,
                subject=reverseComplement(DNAString(subj)),
                type="overlap",
                substitutionMatrix=nucleotideSubstitutionMatrix(match = align.opts$match, mismatch = -align.opts$mismatch),
                gapOpening=-align.opts$gapOpening, gapExtension=-align.opts$gapExtension))

    aln.scores <- Map(score, aln)
    aln.pat <- Map(pattern, aln)
    aln.ranges <- Map(function(x) IRanges(start=start(x), end=end(x)), aln.pat)
    aln.threebands <- Map(function (x) threebands(IRanges(start=1, end=width(reads)),
                                                  start=start(x), end=end(x)),
                          aln.ranges)

    ## For each read, decide whether the forward or reverse alignment
    ## was better.
    revcomp.better <- aln.scores$forward < aln.scores$revcomp

    ## For each read, take the threebands for the better alignment.
    best.threebands <- aln.threebands$forward
    for (band in names(best.threebands)) {
        best.threebands[[band]][revcomp.better] <- aln.threebands$revcomp[[band]][revcomp.better]
    }

    ## Use the left band if it is longer than either min.length or
    ## length of right band.
    use.right.band <- width(best.threebands$left) < pmin(min.length, width(best.threebands$right))
    ranges <- best.threebands$left
    ranges[use.right.band] <- best.threebands$right[use.right.band]

    ## Record which ranges are shorter than min.length
    too.short <- width(ranges) < min.length
    ## ranges[too.short] <- IRanges(start=1,end=0)

    ## Record what was trimmed off of each read (NOT what was kept!)
    trim <- factor(ifelse(use.right.band, "left", "right"), levels=c("right", "left", "all", "none"))
    ## If it's too short, then we trim "all", i.e. discard the whole
    ## read.
    trim[too.short] <- "all"
    ## If the read is not shorter after trimming, then nothing was
    ## actually trimmed.
    trim[width(ranges) == width(reads)] <- "none"

    emeta <- list()

    emeta$trim <- trim

    if (include.deleted.ranges) {
        deleted.start <- ifelse(too.short, 1,
                                ifelse(use.right.band,
                                       start(best.threebands$left),
                                       start(best.threebands$middle)))
        deleted.end <- ifelse(too.short, width(reads),
                              ifelse(use.right.band,
                                     end(best.threebands$middle),
                                     end(best.threebands$right)))
        emeta$deleted.range <- IRanges(deleted.start, deleted.end)
    }

    if (include.scores) {
        ## If requested, take the best score out of each pair of forward
        ## and reverse scores.
        scores <- ifelse(revcomp.better, aln.scores$revcomp, aln.scores$forward)
        emeta$score <- scores
    }

    mcols(ranges) <- DataFrame(emeta)

    return(ranges)
}

## Always call delox on the underlying DNAStringSet object when called
## on something more complicated.
suppressMessages({
    invisible(setMethod(".delox.trimmed.ranges", signature=c(reads="ShortRead"),
                        function (subj, reads, min.length, include.scores, include.deleted.ranges, align.opts) {
                            callGeneric(subj, as(reads, "DNAStringSet"), min.length, include.scores, include.deleted.ranges, align.opts)
                        }))
    invisible(setMethod(".delox.trimmed.ranges", signature=c(reads="QualityScaledDNAStringSet"),
                        function (subj, reads, min.length, include.scores, include.deleted.ranges, align.opts) {
                            callGeneric(subj, as(reads, "DNAStringSet"), min.length, include.scores, include.deleted.ranges, align.opts)
                        }))
    invisible(setMethod(".delox.trimmed.ranges", signature=c(reads="QualityScaledXStringSet"),
                        function (subj, reads, min.length, include.scores, include.deleted.ranges, align.opts) {
                            callGeneric(subj, as(reads, "XStringSet"), min.length, include.scores, include.deleted.ranges, align.opts)
                        }))
})

delox.single <- function(subj, reads , min.length=36,
                         include.scores=TRUE, align.opts=list()) {
    tsmsg("Saving read names")
    saved.names <- BStringSet(names(reads))
    reads <- strip.names(reads)
    invisible(gc())

    tsmsg("Doing alignments")
    nchunks <- min(getDoParWorkers(), ceiling(length(reads)/1000))
    deloxed.ranges <- foreach(reads=isplitVector(reads, chunks=nchunks), .combine=c) %dopar% {
        .delox.trimmed.ranges(reads=reads, subj=subj, min.length=min.length,
                              include.scores=include.scores,
                              include.deleted.ranges=FALSE,
                              align.opts=align.opts)
    }
    ## maybe.chunkapply(.delox.trimmed.ranges,
    ##                  VECTOR.ARGS=list(reads=reads),
    ##                  SCALAR.ARGS=list(subj=subj, min.length=min.length,
    ##                    include.scores=include.scores,
    ##                    include.deleted.ranges=FALSE,
    ##                    align.opts=align.opts),
    ##                  min.chunk.size=1000,
    ##                  MERGE=c)

    tsmsg("Trimming reads")
    trimmed.reads <- narrow(reads, start(deloxed.ranges), end(deloxed.ranges))

    tsmsg("Restoring read names")
    names(trimmed.reads) <- as.character(saved.names)

    tsmsg("Adding metadata")
    emeta <- list()
    if (include.scores) {
        emeta$score <- mcols(deloxed.ranges)$score
    }
    if (length(emeta) > 0) {
        mcols(trimmed.reads) <- DataFrame(emeta)
    }

    return(discard.short.reads(trimmed.reads, min.length))
}

delox.paired <- function(subj, read1, read2,
                         min.call=10, min.length=36,
                         include.scores=TRUE, align.opts=list()) {
    align.opts <- merge.lists(align.opts, default.align.opts)

    tsmsg("Checking read counts")
    stopifnot(length(read1) == length(read2))

    tsmsg("Listing reads")
    original.reads <- list(read1=read1,
                           read2=read2)
    rm(read1, read2)

    tsmsg("Saving read names")
    read.names <- foreach(r=original.reads) %do% BStringSet(names(r))
    names(read.names) <- names(original.reads)
    original.reads <- Map(strip.names, original.reads)
    invisible(gc())

    tsmsg("Doing alignments")
    deloxed.ranges <- lapply(original.reads, function(x) {
        nchunks <- min(getDoParWorkers(), ceiling(length(x)/1000))
        foreach(reads=isplitVector(x, chunks=nchunks), .combine=c) %dopar% {
            .delox.trimmed.ranges(reads=reads, subj=subj, min.length=min.length,
                                  include.scores=include.scores,
                                  include.deleted.ranges=FALSE,
                                  align.opts=align.opts)
        }

        ## maybe.chunkapply(.delox.trimmed.ranges,
        ##                  VECTOR.ARGS=list(reads=strip.names(x)),
        ##                  SCALAR.ARGS=list(subj=subj,
        ##                    min.length=min.length,
        ##                    include.scores=TRUE,
        ##                    include.deleted.ranges=TRUE,
        ##                    align.opts=align.opts),
        ##                  MERGE=c,
        ##                  min.chunk.size=1000)
    })

    tsmsg("Extracting metadata")
    delox.meta <- lapply(deloxed.ranges, mcols)

    ## Decide whether enough was trimmed on the inside (right end) of
    ## either read to call it a mate-pair.
    tsmsg("Calculating inside trim score")
    inside.trim.score <- Reduce(pmax,
                                lapply(delox.meta,
                                       function(x) ifelse(x$trim == "right", x$score, 0)))

    ## Decide whether enough was trimmed on the outside (left end) of
    ## either read to call it a non-mate-pair.
    tsmsg("Calculating outside trim score")
    outside.trim.score <- Reduce(pmax,
                                 lapply(delox.meta,
                                        function(x) ifelse(x$trim == "left", x$score, 0)))

    tsmsg("Calling presence of subject")
    calls <- list(inside=inside.trim.score >= min.call * align.opts$match,
                  outside=outside.trim.score >= min.call * align.opts$match)

    tsmsg("Categorizing reads")
    category <- factor(rep(NA, length(original.reads$read1)), levels=c("mate", "non-mate", "negative", "unpaired", "discard"))
    category[calls$inside] <- "mate"
    category[calls$outside] <- "non-mate"
    ## If they're either both true or both false, then it's ambiguous
    category[calls$inside == calls$outside] <- "negative"
    ## All categories should be filled in now
    stopifnot(all(!is.na(category)))

    too.short <- lapply(deloxed.ranges, function(x) width(x) < min.length)
    ## If either read in a pair is too short, then its partner is no
    ## longer paired at all.
    one.too.short <- Reduce(`|`, too.short)
    category[one.too.short] <- "unpaired"
    ## If both reads in a pair are too short, then the entire pair is
    ## discarded. This is highly unlikely, since Cre-Lox should not
    ## appear in the middle of both sequences.
    both.too.short <- Reduce(`&`, too.short)
    category[both.too.short] <- "discard"

    tsmsg("Trimming reads and restoring read names")
    trimmed.reads <- lapply(names(original.reads), function(x) {
        trimmed <- narrow(original.reads[[x]],
                          start=start(deloxed.ranges[[x]]),
                          end=end(deloxed.ranges[[x]]))
        names(trimmed) <- as.character(read.names[[x]])
        trimmed
    })
    names(trimmed.reads) <- names(original.reads)

    tsmsg("Assembling metadata")
    foreach (r=names(trimmed.reads)) %do% {
        emeta <- list()
        emeta$category <- category
        emeta$category[too.short[[r]]] <- "discard"
        if (include.scores) {
            emeta$score <- delox.meta[[r]]$score
        }
        mcols(trimmed.reads[[r]]) <- DataFrame(emeta)
    }

    return(trimmed.reads)
}

## Wrapper for both single and paired as appropriate
delox <- function(subj, read1, read2=NULL,
                  min.call=10, min.length=36,
                  interleaved=FALSE,
                  read1.orientation=c("in", "out")[1],
                  read2.orientation=c("in", "out")[1],
                  align.opts=list()) {
    if (is.null(read2)) {
        if (interleaved) {
            x <- deinterleave.pairs(read1)
            read1 <- x$read1
            read2 <- x$read2
        } else {
            tsmsg("Doing single-read delox")
            return(delox.single(subj=subj, reads=read1, min.length=min.length, align.opts=align.opts))
        }
    }

    ## Make sure both reads are oriented "in" before calling
    tsmsg("Ensuring correct read orientation")
    if (tolower(read1.orientation) == "out") {
        read1 <- reverseComplement(read1)
    }
    if (!is.null(read2) && tolower(read2.orientation) == "out") {
        read2 <- reverseComplement(read2)
    }

    tsmsg("Doing paired-end delox")
    deloxed.reads <- delox.paired(subj, read1, read2,
                                  min.call=min.call, min.length=min.length,
                                  align.opts=align.opts)

    ## If reads started "out", put them back that way before returning
    tsmsg("Restoring original read orientation")
    if (tolower(read1.orientation) == "out") {
        deloxed.reads$read1 <- reverseComplement(deloxed.reads$read1)
    }
    if (tolower(read2.orientation) == "out") {
        deloxed.reads$read2 <- reverseComplement(deloxed.reads$read2)
    }

    return(deloxed.reads)
}

## ## Hack to work around a bug in BioConductor that prevents subsetting
## ## of named XStringSet objects. Apparently, since DeLoxer was first
## ## published, the BioConductor devs broke the XStringSet subsetting
## ## code so that it can no longer handle XStringSets with names. The
## ## code below strips the names from the XStringSet, then calls the old
## ## code to subset the nameless object while subsetting the names
## ## separately, then finally puts the names back on and returns the
## ## result.
## old.XStringSet.subset.method <- selectMethod("[", "XStringSet")
## invisible(setMethod("[", signature="XStringSet", definition=function(x, i, j, ..., drop=TRUE) {
##     ## Save the names into a seaprate variable
##     xnames <- names(x)
##     ## Do the old behavior, which works on unnamed objects
##     x <- old.XStringSet.subset.method(unname(x), i, j, ..., drop=drop)
##     ## Put the names back on and return
##     setNames(x, xnames[i])
## }))

save.deloxed.pairs.as.fastq <- function(read1, read2, output.base,
                                        mate.ext="matepaired",
                                        nonmate.ext="paired",
                                        negative.ext="negative",
                                        unpaired.ext="unpaired",
                                        append=FALSE) {

    extension <- c(mate=mate.ext,
                   `non-mate`=nonmate.ext,
                   negative=negative.ext,
                   unpaired=unpaired.ext)

    ## ## Make sure that read1 and read2 are a match for each other
    ## stopifnot(identical(as.character(mcols(read1)$category),
    ##                     as.character(mcols(read2)$category)))

    ## ## Discard the shorter read on "unpaired"
    ## read1.shorter <- width(read1) < width(read2)
    ## mcols(read1)$category[mcols(read1)$category == "unpaired" & read1.shorter] <- NA
    ## mcols(read2)$category[mcols(read2)$category == "unpaired" & !read1.shorter] <- NA

    filename.template <- "%s_read%s.%s.fastq"

    for (cat in names(extension)) {
        read1.for.category <- read1[mcols(read1)$category == cat]
        read1.file.for.category <- sprintf(filename.template, output.base, 1, extension[[cat]])
        tsmsg("Writing ", read1.file.for.category)
        write.QualityScaledDNAStringSet(read1.for.category,
                                        file=read1.file.for.category,
                                        append=append)

        read2.for.category <- read2[mcols(read2)$category == cat]
        read2.file.for.category <- sprintf(filename.template, output.base, 2, extension[[cat]])
        tsmsg("Writing ", read2.file.for.category)
        write.QualityScaledDNAStringSet(read2.for.category,
                                        file=read2.file.for.category,
                                        append=append)
    }

    return(TRUE)
}

get.category.counts <- function(deloxed.pairs) {
    r1cat <- mcols(deloxed.pairs$read1)$category
    r2cat <- mcols(deloxed.pairs$read2)$category
    x <- table(r1cat)[c("mate", "non-mate", "negative")]
    x["r1.single"] <- sum(r1cat == "unpaired")
    x["r2.single"] <- sum(r2cat == "unpaired")
    x["discard"] <- length(r1cat) - sum(x)
    x
}

mcparallel.quiet <- function(expr, ...) {
    parallel:::mcparallel(suppressMessages(expr), ...)
}

print.stats <- function(category.counts) {
    category.pct <- setNames(sprintf("%.3g%%", category.counts / sum(category.counts) * 100),
                             names(category.counts))
    x <- rbind(Counts=category.counts, Fractions=category.pct)
    names(dimnames(x)) <- c("Stat", "Category")
    print(x, quote=FALSE, justify="right")
}

main <- function() {
    opt <- parse_arguments()
    print.option.list(opt)
    args <- opt$args
    opts <- opt$options

    if (!(tolower(opts[["read1-orientation"]]) %in% c("in", "out") &&
          tolower(opts[["read2-orientation"]]) %in% c("in", "out") )) {
        stop("Valid orientations are \"in\" and \"out\"")
    }

    align.opts <- list(match = opts[["match-bonus"]],
                       mismatch = opts[["mismatch-penalty"]],
                       gapOpening = opts[["gap-open-penalty"]],
                       gapExtension = opts[["gap-extension-penalty"]])

    stopifnot(opts$`min-call` >= 1 &&
              opts$`min-length` >= 0 &&
              opts$`jobs` >= 0)

    ## Set jobs if requested
    if (opts$jobs > 0) {
        options(cores=opts$jobs)
    }
    tsmsg("Using ", getDoParWorkers(), " cores.")

    paired <- !opts[["single-read-mode"]]
    interleaved <- opts[["interleaved"]]

    if (!paired && interleaved) {
        stop("ERROR: You cannot specify both --interleaved and --single-read-mode")
    } else if (!paired) {
        if (length(args) != 3) {
            stop("DeLoxer in single-read mode requires exactly 3 arguments")
        }
        subject.file <- args[[1]]
        read1.file <- args[[2]]
        read2.file <- NULL
        output.file <- args[[3]]
    } else if (interleaved) {
        if (length(args) != 3) {
            stop("DeLoxer interleaved input mode requires exactly 3 arguments")
        }
        subject.file <- args[[1]]
        read1.file <- args[[2]]
        read2.file <- NULL
        output.basename <- args[[3]]
    } else {
        if (length(args) != 4) {
            stop("DeLoxer requires exactly 4 arguments")
        }
        subject.file <- args[[1]]
        read1.file <- args[[2]]
        read2.file <- args[[3]]
        output.basename <- args[[4]]
    }

    subj <- readDNAStringSet(subject.file, format="fasta", nrec=1)[[1]]

    yieldSize <- opts[["yield-size"]]
    if (paired) {
        tsmsg("Deloxing and classifying paired sequences")
        read1.stream <- FastqStreamer(read1.file, n=yieldSize)
        read2.stream <- if (!interleaved) FastqStreamer(read2.file, n=yieldSize)
        process.chunk <- function(fq1, fq2, append) {
            if (length(fq1) < 1)
                return(TRUE)
            if (interleaved) {
                stopifnot(is.null(fq2))
                deint <- deinterleave.pairs(fq1)
                fq1 <- deint[[1]]
                fq2 <- deint[[2]]
            } else {
                if (length(fq1) != length(fq2))
                    stop("Both input files must have equal numbers of reads")
            }
            read1 <- as(fq1, "QualityScaledDNAStringSet")
            read2 <- as(fq2, "QualityScaledDNAStringSet")
            deloxed.pairs <-
                delox(subj, read1, read2,
                      min.call=opts[["min-call"]],
                      interleaved=interleaved,
                      read1.orientation=opts[["read1-orientation"]],
                      read2.orientation=opts[["read2-orientation"]],
                      align.opts=align.opts)
            save.deloxed.pairs.as.fastq(deloxed.pairs$read1, deloxed.pairs$read2, output.basename, append=append)

            ret <- get.category.counts(deloxed.pairs)
            return(ret)
        }
        fq1 <- yield(read1.stream)
        fq2 <- if (!interleaved) yield(read2.stream)
        if (length(fq1) == 0)
            warning("No reads were read from the input file.")
        proc <- mcparallel.quiet(process.chunk(fq1, fq2, append=FALSE))
        reads.processed <- length(fq1) / ifelse(interleaved, 2, 1)
        category.stats <-
            category.counts <- NULL
        while (length(fq1 <- yield(read1.stream))) {
            if (!interleaved)
                fq2 <- yield(read2.stream)
            prev.result <- mccollect(proc)[[1]]
            if (is(prev.result, "try-error")) {
                tsmsg("Encountered error in deloxing subprocess:")
                stop(attr(prev.result, "condition"))
            }
            if (is.null(category.counts)) {
                category.counts <- prev.result
            } else {
                category.counts <- category.counts + prev.result
            }
            tsmsg("Category stats after processing ", reads.processed, " reads:")
            ## category.pct <- setNames(sprintf("%.3g%%", category.counts / sum(category.counts) * 100),
            ##                          names(category.counts))
            print.stats(category.counts)
            proc <- mcparallel.quiet(process.chunk(fq1, fq2, append=TRUE))
            reads.processed <- reads.processed + length(fq1) / ifelse(interleaved, 2, 1)
        }
        close(read1.stream)
        if (!interleaved) close(read2.stream)
        prev.result <- mccollect(proc)[[1]]
        if (is.null(category.counts)) {
            category.counts <- prev.result
        } else {
            category.counts <- category.counts + prev.result
        }
        if (is(prev.result, "try-error")) {
            tsmsg("Encountered error in deloxing subprocess:")
            stop(attr(prev.result, "condition"))
            stop("Encountered error in deloxing")
        }
        tsmsg("Final category stats after processing ", reads.processed, " reads:")
        print.stats(category.counts)
    } else {
        tsmsg("Deloxing single sequences")
        read1.stream <- FastqStreamer(read1.file, n=yieldSize)
        process.chunk <- function(fq, append) {
            if (length(fq) < 1)
                return(TRUE)
            reads <- as(fq, "QualityScaledDNAStringSet")
            deloxed.reads <-
                delox(subj, reads, NULL,
                      min.call=opts[["min-call"]],
                      interleaved=interleaved,
                      read1.orientation=opts[["read1-orientation"]],
                      read2.orientation=opts[["read2-orientation"]],
                      align.opts=align.opts)
            write.QualityScaledDNAStringSet(deloxed.reads, output.file, append=append)
            return(TRUE)
        }
        ## First chunk is processed with append=FALSE to start the file
        fq <- yield(read1.stream)
        if (length(fq) == 0)
            warning("No reads were read from the input file.")
        proc <- mcparallel.quiet(suppressMessages(process.chunk(fq, append=FALSE)))
        reads.processed <- length(fq)
        while (length(fq <- yield(read1.stream))) {
            prev.result <- mccollect(proc)[[1]]
            if (is(prev.result, "try-error")) {
                tsmsg("Encountered error in deloxing subprocess:")
                stop(attr(prev.result, "condition"))
                stop("Encountered error in deloxing")
            }
            tsmsg("Processed ", reads.processed, " reads")
            proc <- mcparallel.quiet(suppressMessages(process.chunk(fq, append=TRUE)))
            reads.processed <- reads.processed + length(fq)
        }
        close(read1.stream)
        prev.result <- mccollect(proc)[[1]]
        if (is(prev.result, "try-error")) {
            tsmsg("Encountered error in deloxing subprocess:")
            stop(attr(prev.result, "condition"))
            stop("Encountered error in deloxing")
        }
        tsmsg("Processed ", reads.processed, " reads")
    }
    tsmsg("Finished successful run")
}

main()