/*
 * Decompiled with CFR 0.152.
 */
package picard.analysis;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.analysis.MetricAccumulationLevel;
import picard.analysis.RrbsCpgDetailMetrics;
import picard.analysis.RrbsMetrics;
import picard.analysis.RrbsMetricsCollector;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.RExecutor;

@CommandLineProgramProperties(summary="<b>Collects metrics from reduced representation bisulfite sequencing (Rrbs) data.</b>  <p>This tool uses reduced representation bisulfite sequencing (Rrbs) data to determine cytosine methylation status across all reads of a genomic DNA sequence.  For a primer on bisulfite sequencing and cytosine methylation, please see the corresponding <a href='https://www.broadinstitute.org/gatk/guide/article?id=6330'>GATK Dictionary entry</a>. </p><p>Briefly, bisulfite reduction converts un-methylated cytosine (C) to uracil (U) bases.  Methylated sites are not converted because they are resistant to bisulfite reduction.  Subsequent to PCR amplification of the reaction products, bisulfite reduction manifests as [C -> T (+ strand) or G -> A (- strand)] base conversions.  Thus, conversion rates can be calculated from the reads as follows: [CR = converted/(converted + unconverted)]. Since methylated cytosines are protected against Rrbs-mediated conversion, the methylation rate (MR) is as follows:[MR = unconverted/(converted + unconverted) = (1 - CR)].</p><p>The CpG CollectRrbsMetrics tool outputs three files including summary and detail metrics tables as well as a PDF file containing four graphs. These graphs are derived from the summary table and include a comparison of conversion rates for both CpG and non-CpG sites, the distribution of total numbers of CpG sites as a function of the CpG conversion rates, the distribution of CpG sites by the level of read coverage (depth), and the numbers of reads discarded resulting from either exceeding the mismatch rate or size (too short).  The detailed metrics table includes the coordinates of all of the CpG sites for the experiment as well as the conversion rates observed for each site.</p><h4>Usage example:</h4><pre>java -jar picard.jar CollectRrbsMetrics \\<br />      R=reference_sequence.fasta \\<br />      I=input.bam \\<br />      M=basename_for_metrics_files</pre><p>Please see the CollectRrbsMetrics <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#RrbsCpgDetailMetrics'>definitions</a> for a complete description of both the detail and summary metrics produced by this tool.</p><hr />", oneLineSummary="<b>Collects metrics from reduced representation bisulfite sequencing (Rrbs) data.</b>  ", programGroup=DiagnosticsAndQCProgramGroup.class)
@DocumentedFeature
public class CollectRrbsMetrics
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "<b>Collects metrics from reduced representation bisulfite sequencing (Rrbs) data.</b>  ";
    static final String USAGE_DETAILS = "<p>This tool uses reduced representation bisulfite sequencing (Rrbs) data to determine cytosine methylation status across all reads of a genomic DNA sequence.  For a primer on bisulfite sequencing and cytosine methylation, please see the corresponding <a href='https://www.broadinstitute.org/gatk/guide/article?id=6330'>GATK Dictionary entry</a>. </p><p>Briefly, bisulfite reduction converts un-methylated cytosine (C) to uracil (U) bases.  Methylated sites are not converted because they are resistant to bisulfite reduction.  Subsequent to PCR amplification of the reaction products, bisulfite reduction manifests as [C -> T (+ strand) or G -> A (- strand)] base conversions.  Thus, conversion rates can be calculated from the reads as follows: [CR = converted/(converted + unconverted)]. Since methylated cytosines are protected against Rrbs-mediated conversion, the methylation rate (MR) is as follows:[MR = unconverted/(converted + unconverted) = (1 - CR)].</p><p>The CpG CollectRrbsMetrics tool outputs three files including summary and detail metrics tables as well as a PDF file containing four graphs. These graphs are derived from the summary table and include a comparison of conversion rates for both CpG and non-CpG sites, the distribution of total numbers of CpG sites as a function of the CpG conversion rates, the distribution of CpG sites by the level of read coverage (depth), and the numbers of reads discarded resulting from either exceeding the mismatch rate or size (too short).  The detailed metrics table includes the coordinates of all of the CpG sites for the experiment as well as the conversion rates observed for each site.</p><h4>Usage example:</h4><pre>java -jar picard.jar CollectRrbsMetrics \\<br />      R=reference_sequence.fasta \\<br />      I=input.bam \\<br />      M=basename_for_metrics_files</pre><p>Please see the CollectRrbsMetrics <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#RrbsCpgDetailMetrics'>definitions</a> for a complete description of both the detail and summary metrics produced by this tool.</p><hr />";
    private static final String R_SCRIPT = "picard/analysis/rrbsQc.R";
    @Argument(doc="The SAM/BAM/CRAM file containing aligned reads. Must be coordinate sorted", shortName="I")
    public File INPUT;
    @Argument(doc="Base name for output files", shortName="M")
    public String METRICS_FILE_PREFIX;
    @Argument(doc="Minimum read length")
    public int MINIMUM_READ_LENGTH = 5;
    @Argument(doc="Threshold for base quality of a C base before it is considered")
    public int C_QUALITY_THRESHOLD = 20;
    @Argument(doc="Threshold for quality of a base next to a C before the C base is considered")
    public int NEXT_BASE_QUALITY_THRESHOLD = 10;
    @Argument(doc="Maximum percentage of mismatches in a read for it to be considered, with a range of 0-1")
    public double MAX_MISMATCH_RATE = 0.1;
    @Argument(doc="Set of sequence names to consider, if not specified all sequences will be used", optional=true)
    public Set<String> SEQUENCE_NAMES = new HashSet<String>();
    @Argument(shortName="AS", doc="If true, assume that the input file is coordinate sorted even if the header says otherwise.")
    public boolean ASSUME_SORTED = false;
    @Argument(shortName="LEVEL", doc="The level(s) at which to accumulate metrics.  ")
    public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);
    public static final String DETAIL_FILE_EXTENSION = "rrbs_detail_metrics";
    public static final String SUMMARY_FILE_EXTENSION = "rrbs_summary_metrics";
    public static final String PDF_FILE_EXTENSION = "rrbs_qc.pdf";
    private static final Log log = Log.getInstance(CollectRrbsMetrics.class);

    @Override
    protected ReferenceArgumentCollection makeReferenceArgumentCollection() {
        return new CollectRrbsMetricsReferenceArgumentCollection();
    }

    @Override
    protected int doWork() {
        if (!this.METRICS_FILE_PREFIX.endsWith(".")) {
            this.METRICS_FILE_PREFIX = this.METRICS_FILE_PREFIX + ".";
        }
        File SUMMARY_OUT = new File(this.METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
        File DETAILS_OUT = new File(this.METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
        File PLOTS_OUT = new File(this.METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
        this.assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);
        SamReader samReader = SamReaderFactory.makeDefault().referenceSequence(this.REFERENCE_SEQUENCE).open(this.INPUT);
        if (!this.ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            throw new PicardException("The input file " + this.INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
        }
        ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(this.REFERENCE_SEQUENCE);
        ProgressLogger progressLogger = new ProgressLogger(log);
        RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(this.METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), this.C_QUALITY_THRESHOLD, this.NEXT_BASE_QUALITY_THRESHOLD, this.MINIMUM_READ_LENGTH, this.MAX_MISMATCH_RATE);
        for (SAMRecord samRecord : samReader) {
            progressLogger.record(samRecord);
            if (samRecord.getReadUnmappedFlag() || this.isSequenceFiltered(samRecord.getReferenceName())) continue;
            ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
            metricsCollector.acceptRecord(samRecord, referenceSequence);
        }
        metricsCollector.finish();
        MetricsFile rrbsMetrics = this.getMetricsFile();
        metricsCollector.addAllLevelsToFile(rrbsMetrics);
        MetricsFile summaryFile = this.getMetricsFile();
        MetricsFile detailsFile = this.getMetricsFile();
        for (RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
            summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
            for (RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
                detailsFile.addMetric(detailMetric);
            }
        }
        summaryFile.write(SUMMARY_OUT);
        detailsFile.write(DETAILS_OUT);
        RExecutor.executeFromClasspath(R_SCRIPT, DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath().replaceAll("%", "%%"));
        CloserUtil.close(samReader);
        return 0;
    }

    private boolean isSequenceFiltered(String sequenceName) {
        return this.SEQUENCE_NAMES != null && !this.SEQUENCE_NAMES.isEmpty() && !this.SEQUENCE_NAMES.contains(sequenceName);
    }

    private void assertIoFiles(File summaryFile, File detailsFile, File plotsFile) {
        IOUtil.assertFileIsReadable(this.INPUT);
        IOUtil.assertFileIsReadable(this.REFERENCE_SEQUENCE);
        IOUtil.assertFileIsWritable(summaryFile);
        IOUtil.assertFileIsWritable(detailsFile);
        IOUtil.assertFileIsWritable(plotsFile);
    }

    @Override
    protected String[] customCommandLineValidation() {
        ArrayList<String> errorMsgs = new ArrayList<String>();
        if (this.MAX_MISMATCH_RATE < 0.0 || this.MAX_MISMATCH_RATE > 1.0) {
            errorMsgs.add("MAX_MISMATCH_RATE must be in the range of 0-1");
        }
        if (this.C_QUALITY_THRESHOLD < 0) {
            errorMsgs.add("C_QUALITY_THRESHOLD must be >= 0");
        }
        if (this.NEXT_BASE_QUALITY_THRESHOLD < 0) {
            errorMsgs.add("NEXT_BASE_QUALITY_THRESHOLD must be >= 0");
        }
        if (this.MINIMUM_READ_LENGTH <= 0) {
            errorMsgs.add("MINIMUM_READ_LENGTH must be > 0");
        }
        if (!CollectRrbsMetrics.checkRInstallation(R_SCRIPT != null)) {
            errorMsgs.add("R is not installed on this machine. It is required for creating the chart.");
        }
        return errorMsgs.isEmpty() ? null : errorMsgs.toArray(new String[errorMsgs.size()]);
    }

    public static class CollectRrbsMetricsReferenceArgumentCollection
    implements ReferenceArgumentCollection {
        @Argument(doc="The reference sequence fasta file", shortName="R")
        public File REFERENCE;

        @Override
        public File getReferenceFile() {
            return this.REFERENCE;
        }
    }
}

