package picard.illumina.quality;

import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.commons.math3.optimization.direct.CMAESOptimizer;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.tukaani.xz.common.Util;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.illumina.parser.BaseIlluminaDataProvider;
import picard.illumina.parser.ClusterData;
import picard.illumina.parser.IlluminaDataProviderFactory;
import picard.illumina.parser.IlluminaDataType;
import picard.illumina.parser.ReadData;
import picard.illumina.parser.ReadStructure;
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;

@CommandLineProgramProperties(summary = "Classify PF-Failing reads in a HiSeqX Illumina Basecalling directory into various categories.<p>This tool categorizes the reads that did not pass filter (PF-Failing) into four groups.  These groups are based on a heuristic that was derived by looking at a few titration experiments. </p><p>After examining the called bases from the first 24 cycles of each read, the PF-Failed reads are grouped into the following four categories: <ul><li>MISALIGNED - The first 24 basecalls of a read are uncalled (numNs~24).   These types of reads appear to be flow cell artifacts because reads were only found near tile boundaries and were concentration (library) independent</li> <li>EMPTY - All 24 bases are called (numNs~0) but the number of bases with quality scores greater than two is less than or equal to eight (numQGtTwo<=8).  These reads were location independent within the tiles and were inversely proportional to the library concentration</li><li>POLYCLONAL - All 24 bases were called and numQGtTwo>=12, were independent of their location with the tiles, and were directly proportional to the library concentration.  These reads are likely the result of PCR artifacts </li><li>UNKNOWN - The remaining reads that are PF-Failing but did not fit into any of the groups listed above</li></ul></p>  <p>The tool defaults to the SUMMARY output which indicates the number of PF-Failed reads per tile and groups them into the categories described above accordingly.</p> <p>A DETAILED metrics option is also available that subdivides the SUMMARY outputs by the x- y- position of these reads within each tile.  To obtain the DETAILED metric table, you must add the PROB_EXPLICIT_READS option to your command line and set the value between 0 and 1.  This value represents the fractional probability of PF-Failed reads to send to output.  For example, if PROB_EXPLICIT_READS=0, then no metrics will be output.  If PROB_EXPLICIT_READS=1, then it will provide detailed metrics for all (100%) of the reads.  It follows that setting the PROB_EXPLICIT_READS=0.5, will provide detailed metrics for half of the PF-Failed reads.</p> <p>Note: Metrics labeled as percentages are actually expressed as fractions!</p><h4>Usage example: (SUMMARY Metrics)</h4> <pre>java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      OUTPUT=/metrics/ \\<br />      LANE=001</pre><h4>Usage example: (DETAILED Metrics)</h4><pre>java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      OUTPUT=/Detail_metrics/ \\<br />      LANE=001 \\<br />      PROB_EXPLICIT_READS=1</pre>Please see our documentation on the <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailSummaryMetric'>SUMMARY</a> and <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailDetailedMetric'>DETAILED</a> metrics for comprehensive explanations of the outputs produced by this tool.<hr />", oneLineSummary = CollectHiSeqXPfFailMetrics.USAGE_SUMMARY, programGroup = DiagnosticsAndQCProgramGroup.class)
@DocumentedFeature
/* loaded from: input_file:picard/illumina/quality/CollectHiSeqXPfFailMetrics.class */
public class CollectHiSeqXPfFailMetrics extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Classify PF-Failing reads in a HiSeqX Illumina Basecalling directory into various categories.";
    static final String USAGE_DETAILS = "<p>This tool categorizes the reads that did not pass filter (PF-Failing) into four groups.  These groups are based on a heuristic that was derived by looking at a few titration experiments. </p><p>After examining the called bases from the first 24 cycles of each read, the PF-Failed reads are grouped into the following four categories: <ul><li>MISALIGNED - The first 24 basecalls of a read are uncalled (numNs~24).   These types of reads appear to be flow cell artifacts because reads were only found near tile boundaries and were concentration (library) independent</li> <li>EMPTY - All 24 bases are called (numNs~0) but the number of bases with quality scores greater than two is less than or equal to eight (numQGtTwo<=8).  These reads were location independent within the tiles and were inversely proportional to the library concentration</li><li>POLYCLONAL - All 24 bases were called and numQGtTwo>=12, were independent of their location with the tiles, and were directly proportional to the library concentration.  These reads are likely the result of PCR artifacts </li><li>UNKNOWN - The remaining reads that are PF-Failing but did not fit into any of the groups listed above</li></ul></p>  <p>The tool defaults to the SUMMARY output which indicates the number of PF-Failed reads per tile and groups them into the categories described above accordingly.</p> <p>A DETAILED metrics option is also available that subdivides the SUMMARY outputs by the x- y- position of these reads within each tile.  To obtain the DETAILED metric table, you must add the PROB_EXPLICIT_READS option to your command line and set the value between 0 and 1.  This value represents the fractional probability of PF-Failed reads to send to output.  For example, if PROB_EXPLICIT_READS=0, then no metrics will be output.  If PROB_EXPLICIT_READS=1, then it will provide detailed metrics for all (100%) of the reads.  It follows that setting the PROB_EXPLICIT_READS=0.5, will provide detailed metrics for half of the PF-Failed reads.</p> <p>Note: Metrics labeled as percentages are actually expressed as fractions!</p><h4>Usage example: (SUMMARY Metrics)</h4> <pre>java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      OUTPUT=/metrics/ \\<br />      LANE=001</pre><h4>Usage example: (DETAILED Metrics)</h4><pre>java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />      BASECALLS_DIR=/BaseCalls/ \\<br />      OUTPUT=/Detail_metrics/ \\<br />      LANE=001 \\<br />      PROB_EXPLICIT_READS=1</pre>Please see our documentation on the <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailSummaryMetric'>SUMMARY</a> and <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailDetailedMetric'>DETAILED</a> metrics for comprehensive explanations of the outputs produced by this tool.<hr />";

    @Argument(doc = "The Illumina basecalls directory. ", shortName = "B")
    public File BASECALLS_DIR;

    @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Basename for metrics file. Resulting file will be <OUTPUT>.pffail_summary_metrics", optional = false)
    public File OUTPUT;

    @Argument(doc = "Lane number.", shortName = StandardOptionDefinitions.LANE_SHORT_NAME)
    public Integer LANE;
    private static final Log LOG = Log.getInstance(CollectHiSeqXPfFailMetrics.class);
    public static final String detailedMetricsExtension = ".pffail_detailed_metrics";
    public static final String summaryMetricsExtension = ".pffail_summary_metrics";

    @Argument(shortName = "P", doc = "The fraction of (non-PF) reads for which to output explicit classification. Output file will be <OUTPUT>.pffail_detailed_metrics (if PROB_EXPLICIT_READS != 0)", optional = true)
    public double PROB_EXPLICIT_READS = CMAESOptimizer.DEFAULT_STOPFITNESS;

    @Argument(shortName = "NP", doc = "Run this many PerTileBarcodeExtractors in parallel.  If NUM_PROCESSORS = 0, number of cores is automatically set to the number of cores available on the machine. If NUM_PROCESSORS < 0 then the number of cores used will be the number available on the machine less NUM_PROCESSORS.", optional = true)
    public int NUM_PROCESSORS = 1;

    @Argument(doc = "Number of cycles to look at. At time of writing PF status gets determined at cycle 24 so numbers greater than this will yield strange results. In addition, PF status is currently determined at cycle 24, so running this with any other value is neither tested nor recommended.", optional = true)
    public int N_CYCLES = 24;
    private final Map<Integer, PFFailSummaryMetric> tileToSummaryMetrics = new LinkedHashMap();
    private final Map<Integer, List<PFFailDetailedMetric>> tileToDetailedMetrics = new LinkedHashMap();
    private final ReadStructure READ_STRUCTURE = new ReadStructure(this.N_CYCLES + "T");

    /* loaded from: input_file:picard/illumina/quality/CollectHiSeqXPfFailMetrics$PFFailDetailedMetric.class */
    public static class PFFailDetailedMetric extends MetricBase {
        public Integer TILE;
        public int X;
        public int Y;
        public int NUM_N;
        public int NUM_Q_GT_TWO;
        public ReadClassifier.PfFailReason CLASSIFICATION;

        public PFFailDetailedMetric(Integer num, int i, int i2, int i3, int i4, ReadClassifier.PfFailReason pfFailReason) {
            this.TILE = num;
            this.X = i;
            this.Y = i2;
            this.NUM_N = i3;
            this.NUM_Q_GT_TWO = i4;
            this.CLASSIFICATION = pfFailReason;
        }

        public PFFailDetailedMetric() {
        }
    }

    /* loaded from: input_file:picard/illumina/quality/CollectHiSeqXPfFailMetrics$PFFailSummaryMetric.class */
    public static class PFFailSummaryMetric extends MetricBase {
        public String TILE;
        public int READS;
        public int PF_FAIL_READS;
        public double PCT_PF_FAIL_READS;
        public int PF_FAIL_EMPTY;
        public double PCT_PF_FAIL_EMPTY;
        public int PF_FAIL_POLYCLONAL;
        public double PCT_PF_FAIL_POLYCLONAL;
        public int PF_FAIL_MISALIGNED;
        public double PCT_PF_FAIL_MISALIGNED;
        public int PF_FAIL_UNKNOWN;
        public double PCT_PF_FAIL_UNKNOWN;

        public PFFailSummaryMetric(String str) {
            this.TILE = null;
            this.READS = 0;
            this.PF_FAIL_READS = 0;
            this.PCT_PF_FAIL_READS = CMAESOptimizer.DEFAULT_STOPFITNESS;
            this.PF_FAIL_EMPTY = 0;
            this.PCT_PF_FAIL_EMPTY = CMAESOptimizer.DEFAULT_STOPFITNESS;
            this.PF_FAIL_POLYCLONAL = 0;
            this.PCT_PF_FAIL_POLYCLONAL = CMAESOptimizer.DEFAULT_STOPFITNESS;
            this.PF_FAIL_MISALIGNED = 0;
            this.PCT_PF_FAIL_MISALIGNED = CMAESOptimizer.DEFAULT_STOPFITNESS;
            this.PF_FAIL_UNKNOWN = 0;
            this.PCT_PF_FAIL_UNKNOWN = CMAESOptimizer.DEFAULT_STOPFITNESS;
            this.TILE = str;
        }

        public PFFailSummaryMetric() {
            this.TILE = null;
            this.READS = 0;
            this.PF_FAIL_READS = 0;
            this.PCT_PF_FAIL_READS = CMAESOptimizer.DEFAULT_STOPFITNESS;
            this.PF_FAIL_EMPTY = 0;
            this.PCT_PF_FAIL_EMPTY = CMAESOptimizer.DEFAULT_STOPFITNESS;
            this.PF_FAIL_POLYCLONAL = 0;
            this.PCT_PF_FAIL_POLYCLONAL = CMAESOptimizer.DEFAULT_STOPFITNESS;
            this.PF_FAIL_MISALIGNED = 0;
            this.PCT_PF_FAIL_MISALIGNED = CMAESOptimizer.DEFAULT_STOPFITNESS;
            this.PF_FAIL_UNKNOWN = 0;
            this.PCT_PF_FAIL_UNKNOWN = CMAESOptimizer.DEFAULT_STOPFITNESS;
        }

        public void merge(PFFailSummaryMetric pFFailSummaryMetric) {
            this.READS += pFFailSummaryMetric.READS;
            this.PF_FAIL_READS += pFFailSummaryMetric.PF_FAIL_READS;
            this.PF_FAIL_EMPTY += pFFailSummaryMetric.PF_FAIL_EMPTY;
            this.PF_FAIL_MISALIGNED += pFFailSummaryMetric.PF_FAIL_MISALIGNED;
            this.PF_FAIL_POLYCLONAL += pFFailSummaryMetric.PF_FAIL_POLYCLONAL;
            this.PF_FAIL_UNKNOWN += pFFailSummaryMetric.PF_FAIL_UNKNOWN;
        }

        public void calculateDerivedFields() {
            if (this.READS != 0) {
                this.PCT_PF_FAIL_READS = this.PF_FAIL_READS / this.READS;
                this.PCT_PF_FAIL_EMPTY = this.PF_FAIL_EMPTY / this.READS;
                this.PCT_PF_FAIL_MISALIGNED = this.PF_FAIL_MISALIGNED / this.READS;
                this.PCT_PF_FAIL_POLYCLONAL = this.PF_FAIL_POLYCLONAL / this.READS;
                this.PCT_PF_FAIL_UNKNOWN = this.PF_FAIL_UNKNOWN / this.READS;
            }
        }
    }

    /* loaded from: input_file:picard/illumina/quality/CollectHiSeqXPfFailMetrics$PerTilePFMetricsExtractor.class */
    private static class PerTilePFMetricsExtractor implements Runnable {
        private final int tile;
        private final PFFailSummaryMetric summaryMetric;
        final Collection<PFFailDetailedMetric> detailedMetrics;
        private final BaseIlluminaDataProvider provider;
        private final double pWriteDetailed;
        private Exception exception = null;
        private final Random random = new Random();

        public PerTilePFMetricsExtractor(int i, PFFailSummaryMetric pFFailSummaryMetric, Collection<PFFailDetailedMetric> collection, IlluminaDataProviderFactory illuminaDataProviderFactory, double d) {
            this.tile = i;
            this.summaryMetric = pFFailSummaryMetric;
            this.detailedMetrics = collection;
            this.pWriteDetailed = d;
            this.provider = illuminaDataProviderFactory.makeDataProvider(Arrays.asList(Integer.valueOf(i)));
        }

        public Exception getException() {
            return this.exception;
        }

        @Override // java.lang.Runnable
        public void run() {
            try {
                CollectHiSeqXPfFailMetrics.LOG.info("Extracting PF metrics for tile " + this.tile);
                while (this.provider.hasNext()) {
                    ClusterData next = this.provider.next();
                    this.summaryMetric.READS++;
                    if (!next.isPf().booleanValue()) {
                        this.summaryMetric.PF_FAIL_READS++;
                        ReadClassifier readClassifier = new ReadClassifier(next.getRead(0));
                        if (this.random.nextDouble() < this.pWriteDetailed) {
                            this.detailedMetrics.add(new PFFailDetailedMetric(Integer.valueOf(this.tile), next.getX(), next.getY(), readClassifier.numNs, readClassifier.numQGtTwo, readClassifier.failClass));
                        }
                        switch (readClassifier.failClass) {
                            case EMPTY:
                                this.summaryMetric.PF_FAIL_EMPTY++;
                                break;
                            case MISALIGNED:
                                this.summaryMetric.PF_FAIL_MISALIGNED++;
                                break;
                            case POLYCLONAL:
                                this.summaryMetric.PF_FAIL_POLYCLONAL++;
                                break;
                            case UNKNOWN:
                                this.summaryMetric.PF_FAIL_UNKNOWN++;
                                break;
                            default:
                                CollectHiSeqXPfFailMetrics.LOG.error("Got unexpected fail Reason");
                                break;
                        }
                    }
                }
            } catch (Exception e) {
                CollectHiSeqXPfFailMetrics.LOG.error(e, "Error processing tile ", Integer.valueOf(this.tile));
                this.exception = e;
            } finally {
                this.provider.close();
            }
        }
    }

    /* loaded from: input_file:picard/illumina/quality/CollectHiSeqXPfFailMetrics$ReadClassifier.class */
    protected static class ReadClassifier {
        private final int numNs;
        private final int numQGtTwo;
        private PfFailReason failClass;

        /* loaded from: input_file:picard/illumina/quality/CollectHiSeqXPfFailMetrics$ReadClassifier$PfFailReason.class */
        public enum PfFailReason {
            EMPTY,
            POLYCLONAL,
            MISALIGNED,
            UNKNOWN
        }

        public ReadClassifier(ReadData readData) {
            this.failClass = null;
            int length = readData.getBases().length;
            this.numNs = CollectHiSeqXPfFailMetrics.countEquals(readData.getBases(), (byte) 46);
            this.numQGtTwo = CollectHiSeqXPfFailMetrics.countGreaterThan(readData.getQualities(), (byte) 2);
            this.failClass = PfFailReason.UNKNOWN;
            if (this.numNs >= length - 1) {
                this.failClass = PfFailReason.MISALIGNED;
                return;
            }
            if (this.numNs <= 1) {
                if (this.numQGtTwo <= length / 3) {
                    this.failClass = PfFailReason.EMPTY;
                } else if (this.numQGtTwo >= length / 2) {
                    this.failClass = PfFailReason.POLYCLONAL;
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // picard.cmdline.CommandLineProgram
    public String[] customCommandLineValidation() {
        ArrayList arrayList = new ArrayList();
        if (this.N_CYCLES < 0) {
            arrayList.add("Number of Cycles to look at must be greater than 0");
        }
        if (this.PROB_EXPLICIT_READS > 1.0d || this.PROB_EXPLICIT_READS < CMAESOptimizer.DEFAULT_STOPFITNESS) {
            arrayList.add("PROB_EXPLICIT_READS must be a probability, i.e., 0 <= PROB_EXPLICIT_READS <= 1");
        }
        return !arrayList.isEmpty() ? (String[]) arrayList.toArray(new String[arrayList.size()]) : super.customCommandLineValidation();
    }

    @Override // picard.cmdline.CommandLineProgram
    protected int doWork() {
        IlluminaDataProviderFactory illuminaDataProviderFactory = new IlluminaDataProviderFactory(this.BASECALLS_DIR, this.LANE.intValue(), this.READ_STRUCTURE, new BclQualityEvaluationStrategy(2), IlluminaDataType.BaseCalls, IlluminaDataType.PF, IlluminaDataType.QualityScores, IlluminaDataType.Position);
        File file = new File(this.OUTPUT + summaryMetricsExtension);
        File file2 = new File(this.OUTPUT + detailedMetricsExtension);
        IOUtil.assertFileIsWritable(file);
        if (this.PROB_EXPLICIT_READS != CMAESOptimizer.DEFAULT_STOPFITNESS) {
            IOUtil.assertFileIsWritable(file2);
        }
        int availableProcessors = this.NUM_PROCESSORS == 0 ? Runtime.getRuntime().availableProcessors() : this.NUM_PROCESSORS < 0 ? Runtime.getRuntime().availableProcessors() + this.NUM_PROCESSORS : this.NUM_PROCESSORS;
        LOG.info("Processing with " + availableProcessors + " PerTilePFMetricsExtractor(s).");
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(availableProcessors);
        ArrayList arrayList = new ArrayList(illuminaDataProviderFactory.getAvailableTiles().size());
        Iterator<Integer> it = illuminaDataProviderFactory.getAvailableTiles().iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            this.tileToSummaryMetrics.put(Integer.valueOf(intValue), new PFFailSummaryMetric(Integer.toString(intValue)));
            this.tileToDetailedMetrics.put(Integer.valueOf(intValue), new ArrayList());
            arrayList.add(new PerTilePFMetricsExtractor(intValue, this.tileToSummaryMetrics.get(Integer.valueOf(intValue)), this.tileToDetailedMetrics.get(Integer.valueOf(intValue)), illuminaDataProviderFactory, this.PROB_EXPLICIT_READS));
        }
        try {
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                newFixedThreadPool.submit((PerTilePFMetricsExtractor) it2.next());
            }
            newFixedThreadPool.shutdown();
            newFixedThreadPool.awaitTermination(Util.VLI_MAX, TimeUnit.DAYS);
            LOG.info("Processed " + arrayList.size() + " tiles.");
            Iterator it3 = arrayList.iterator();
            while (it3.hasNext()) {
                if (((PerTilePFMetricsExtractor) it3.next()).getException() != null) {
                    LOG.error("Abandoning metrics calculation because one or more PerTilePFMetricsExtractors failed.");
                    return 4;
                }
            }
            MetricsFile metricsFile = getMetricsFile();
            Iterator<List<PFFailDetailedMetric>> it4 = this.tileToDetailedMetrics.values().iterator();
            while (it4.hasNext()) {
                Iterator<PFFailDetailedMetric> it5 = it4.next().iterator();
                while (it5.hasNext()) {
                    metricsFile.addMetric(it5.next());
                }
            }
            if (this.PROB_EXPLICIT_READS > CMAESOptimizer.DEFAULT_STOPFITNESS) {
                metricsFile.write(file2);
            }
            PFFailSummaryMetric pFFailSummaryMetric = new PFFailSummaryMetric("All");
            Iterator<PFFailSummaryMetric> it6 = this.tileToSummaryMetrics.values().iterator();
            while (it6.hasNext()) {
                pFFailSummaryMetric.merge(it6.next());
            }
            pFFailSummaryMetric.calculateDerivedFields();
            MetricsFile metricsFile2 = getMetricsFile();
            metricsFile2.addMetric(pFFailSummaryMetric);
            for (PFFailSummaryMetric pFFailSummaryMetric2 : this.tileToSummaryMetrics.values()) {
                pFFailSummaryMetric2.calculateDerivedFields();
                metricsFile2.addMetric(pFFailSummaryMetric2);
            }
            metricsFile2.write(file);
            return 0;
        } catch (Throwable th) {
            LOG.error(th, "Parent thread encountered problem submitting extractors to thread pool or awaiting shutdown of threadpool.  Attempting to kill threadpool.");
            newFixedThreadPool.shutdownNow();
            return 2;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static int countEquals(byte[] bArr, byte b) {
        int i = 0;
        for (byte b2 : bArr) {
            if (b2 == b) {
                i++;
            }
        }
        return i;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static int countGreaterThan(byte[] bArr, byte b) {
        int i = 0;
        for (byte b2 : bArr) {
            if (b2 > b) {
                i++;
            }
        }
        return i;
    }
}
