/*
 * Decompiled with CFR 0.152.
 */
package picard.analysis;

import htsjdk.samtools.DuplicateSet;
import htsjdk.samtools.DuplicateSetIterator;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.filter.AggregateFilter;
import htsjdk.samtools.filter.AlignedFilter;
import htsjdk.samtools.filter.FilteringSamIterator;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.filter.SecondaryOrSupplementaryFilter;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.stream.IntStream;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.filter.CountingFilterWrapper;
import picard.filter.CountingMapQFilter;
import picard.filter.CountingPairedFilter;
import picard.nio.PicardHtsPath;

@DocumentedFeature
@ExperimentalFeature
@CommandLineProgramProperties(summary="Tally the counts of UMIs in duplicate sets within a bam. \n<p>This tool collects the Histogram of the number of duplicate sets that contain a given number of UMIs. Understanding this distribution can help understand the role that the UMIs have in the determination of consensus sets, the risk of UMI collisions, and of spurious reads that result from uncorrected UMIs.", oneLineSummary="Tally the counts of UMIs in duplicate sets within a bam. \n", programGroup=DiagnosticsAndQCProgramGroup.class)
public class CollectUmiPrevalenceMetrics
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Tally the counts of UMIs in duplicate sets within a bam. \n";
    static final String USAGE_DETAILS = "<p>This tool collects the Histogram of the number of duplicate sets that contain a given number of UMIs. Understanding this distribution can help understand the role that the UMIs have in the determination of consensus sets, the risk of UMI collisions, and of spurious reads that result from uncorrected UMIs.";
    @Argument(shortName="I", doc="Input (indexed) BAM/CRAM file.")
    public PicardHtsPath INPUT;
    @Argument(shortName="O", doc="Write metrics to this file")
    public File OUTPUT;
    @Argument(shortName="MQ", doc="minimal value for the mapping quality of the reads to be used in the estimation.", optional=true, minValue=0.0, maxValue=255.0)
    public Integer MINIMUM_MQ = 30;
    @Argument(doc="Barcode SAM tag.", optional=true)
    public String BARCODE_TAG = SAMTag.RX.name();
    @Argument(doc="Barcode Quality SAM tag.", optional=true)
    public String BARCODE_BQ = SAMTag.BQ.name();
    @Argument(shortName="BQ", doc="minimal value for the base quality of all the bases in a molecular barcode, for it to be used.", optional=true, minValue=0.0, maxValue=255.0)
    public Integer MINIMUM_BARCODE_BQ = 30;
    @Argument(shortName="FUR", doc="Whether to filter unpaired reads from the input.", optional=true)
    public boolean FILTER_UNPAIRED_READS = true;
    @Argument(fullName="PROGRESS_STEP_INTERVAL", doc="The interval between which progress will be displayed.", optional=true)
    public int PROGRESS_STEP_INTERVAL = 1000000;
    private static final Log log = Log.getInstance(CollectUmiPrevalenceMetrics.class);

    @Override
    protected int doWork() {
        IOUtil.assertFileIsReadable(this.INPUT.toPath());
        Histogram<Integer> umiCount = new Histogram<Integer>("numUmis", "duplicateSets");
        CountingPairedFilter countingPairedFilter = new CountingPairedFilter();
        CountingFilterWrapper countingAlignedFilter = new CountingFilterWrapper(new AlignedFilter(true));
        CountingFilterWrapper countingBarcodeFilter = new CountingFilterWrapper(new UMITagPresentFilter());
        CountingFilterWrapper countingBarcodeQualityFilter = new CountingFilterWrapper(new BarcodeQualityFilter(this.MINIMUM_BARCODE_BQ));
        CountingMapQFilter countingMapQFilter = new CountingMapQFilter(this.MINIMUM_MQ);
        CountingFilterWrapper countingSecondaryOrSupplementaryFilter = new CountingFilterWrapper(new SecondaryOrSupplementaryFilter());
        ProgressLogger progress = new ProgressLogger(log, this.PROGRESS_STEP_INTERVAL, "examined", "duplicate sets");
        try (SamReader in = SamReaderFactory.makeDefault().referenceSequence(this.REFERENCE_SEQUENCE).open(this.INPUT.toPath());){
            IOUtil.assertFileIsWritable(this.OUTPUT.toPath());
            SAMRecordIterator samRecordIterator = in.iterator();
            List<SamRecordFilter> samFilters = CollectionUtil.makeList(countingAlignedFilter, countingMapQFilter, countingSecondaryOrSupplementaryFilter, countingBarcodeFilter, countingBarcodeQualityFilter);
            if (this.FILTER_UNPAIRED_READS) {
                samFilters.add(countingPairedFilter);
            }
            FilteringSamIterator filteredSamRecordIterator = new FilteringSamIterator(samRecordIterator, new AggregateFilter(samFilters));
            log.info("Queried BAM, getting duplicate sets.");
            DuplicateSetIterator duplicateSets = new DuplicateSetIterator(filteredSamRecordIterator, in.getFileHeader(), false, null, log);
            log.info("Starting iteration on duplicate sets");
            while (duplicateSets.hasNext()) {
                DuplicateSet set = duplicateSets.next();
                SAMRecord setRep = set.getRepresentative();
                progress.record(setRep);
                HashSet barcodes = new HashSet();
                set.getRecords().forEach(r -> barcodes.add(r.getStringAttribute(this.BARCODE_TAG)));
                umiCount.increment(barcodes.size(), 1.0);
            }
        }
        catch (IOException e) {
            throw new RuntimeException("Problem while reading file: " + String.valueOf(this.INPUT), e);
        }
        log.info("Iteration done. Emitting metrics.");
        log.info(String.format("Processed %d sets", progress.getCount()));
        log.info(String.format("Filtered %d unpaired reads", countingPairedFilter.getFilteredRecords()));
        log.info(String.format("Filtered %d unaligned reads", countingAlignedFilter.getFilteredRecords()));
        log.info(String.format("Filtered %d low mapQ reads", countingMapQFilter.getFilteredRecords()));
        log.info(String.format("Filtered %d Secondary or Supplementary reads", countingSecondaryOrSupplementaryFilter.getFilteredRecords()));
        log.info(String.format("Filtered %d reads that had no UMI tag", countingBarcodeFilter.getFilteredRecords()));
        log.info(String.format("Filtered %d reads that had poor quality UMI", countingBarcodeQualityFilter.getFilteredRecords()));
        MetricsFile metricsFile = this.getMetricsFile();
        metricsFile.addHistogram(umiCount);
        metricsFile.write(this.OUTPUT);
        return 0;
    }

    private class UMITagPresentFilter
    implements SamRecordFilter {
        private UMITagPresentFilter() {
        }

        @Override
        public boolean filterOut(SAMRecord samRecord) {
            return !samRecord.hasAttribute(CollectUmiPrevalenceMetrics.this.BARCODE_TAG);
        }

        @Override
        public boolean filterOut(SAMRecord samRecord, SAMRecord samRecord1) {
            return this.filterOut(samRecord) && this.filterOut(samRecord1);
        }
    }

    private class BarcodeQualityFilter
    implements SamRecordFilter {
        Integer minValue;

        BarcodeQualityFilter(Integer minValue) {
            this.minValue = minValue;
        }

        @Override
        public boolean filterOut(SAMRecord samRecord) {
            if (!samRecord.hasAttribute(CollectUmiPrevalenceMetrics.this.BARCODE_BQ)) {
                return false;
            }
            String barcodeBQ = samRecord.getStringAttribute(CollectUmiPrevalenceMetrics.this.BARCODE_BQ).replace(" ", "");
            byte[] bytes = SAMUtils.fastqToPhred(barcodeBQ);
            boolean badQuality = IntStream.range(0, bytes.length).map(i -> bytes[i]).anyMatch(q -> q < this.minValue);
            return !badQuality;
        }

        @Override
        public boolean filterOut(SAMRecord samRecord, SAMRecord samRecord1) {
            return this.filterOut(samRecord) && this.filterOut(samRecord1);
        }
    }
}

