package org.snpeff.spliceSites;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.io.IOUtils;
import org.snpeff.SnpEff;
import org.snpeff.fileIterator.FastaFileIterator;
import org.snpeff.interval.Exon;
import org.snpeff.interval.Gene;
import org.snpeff.interval.Intron;
import org.snpeff.interval.Marker;
import org.snpeff.interval.Markers;
import org.snpeff.interval.SpliceSite;
import org.snpeff.interval.SpliceSiteBranch;
import org.snpeff.interval.SpliceSiteBranchU12;
import org.snpeff.interval.Transcript;
import org.snpeff.motif.MotifLogo;
import org.snpeff.motif.Pwm;
import org.snpeff.probablility.FisherExactTest;
import org.snpeff.snpEffect.EffectType;
import org.snpeff.stats.CountByType;
import org.snpeff.stats.IntStats;
import org.snpeff.util.Gpr;
import org.snpeff.util.Timer;

/* loaded from: input_file:org/snpeff/spliceSites/SnpEffCmdSpliceAnalysis.class */
public class SnpEffCmdSpliceAnalysis extends SnpEff {
    public static final double THRESHOLD_ENTROPY = 0.05d;
    public static final int THRESHOLD_COUNT = 100;
    public static final double THRESHOLD_P = 0.95d;
    public static final double THRESHOLD_BRANCH_U12_PERCENTILE = 0.95d;
    public static final double THRESHOLD_U12_OBSERVED_EXPECTED = 5.0d;
    boolean saveDb;
    String genomeFasta;
    SpliceTypes spliceTypes;
    TranscriptSet transcriptSet;
    double thresholdPDonor;
    double thresholdEntropyDonor;
    double thresholdPAcc;
    double thresholdEntropyAcc;
    double thresholdU12Score;
    public static double P_VALUE_THRESHOLD = 0.001d;
    public static int SIZE_CONSENSUS_DONOR = 2;
    public static int SIZE_CONSENSUS_ACCEPTOR = 2;
    public static int HTML_WIDTH = 20;
    public static int HTML_HEIGHT = 100;
    String outputDir = ".";
    StringBuilder out = new StringBuilder();
    ArrayList<String> geneList = new ArrayList<>();
    HashMap<String, PwmSet> pwmSetsByName = new HashMap<>();
    HashMap<String, PwmSet> pwmSetsExonTypeByName = new HashMap<>();
    HashMap<String, Intron> intronsByStr = new HashMap<>();
    int countIntrons = 0;
    Random random = new Random();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/snpeff/spliceSites/SnpEffCmdSpliceAnalysis$PwmSet.class */
    public class PwmSet implements Comparable<PwmSet> {
        String name;
        int motifMatchedBases = 0;
        int motifMatchedStr = 0;
        int updates = 0;
        int countU12 = 0;
        Pwm pwmAcc = new Pwm((2 * SpliceTypes.MAX_SPLICE_SIZE) + 1);
        Pwm pwmDonor = new Pwm((2 * SpliceTypes.MAX_SPLICE_SIZE) + 1);
        IntStats lenStats = new IntStats();
        CountByType countMotif = new CountByType();
        CountByType countExonTypes = new CountByType();
        HashSet<Gene> genes = new HashSet<>();

        public PwmSet(String str) {
            this.name = str;
        }

        public void addGene(Gene gene) {
            this.genes.add(gene);
        }

        @Override // java.lang.Comparable
        public int compareTo(PwmSet pwmSet) {
            int i = pwmSet.updates - this.updates;
            return i != 0 ? i : this.name.compareTo(pwmSet.name);
        }

        public double countU12ObsExp() {
            return this.countU12 / (this.updates * 0.050000000000000044d);
        }

        String geneNames() {
            StringBuilder sb = new StringBuilder();
            ArrayList arrayList = new ArrayList();
            Iterator<Gene> it = this.genes.iterator();
            while (it.hasNext()) {
                arrayList.add(it.next().getGeneName());
            }
            Collections.sort(arrayList);
            int i = 1;
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                sb.append(((String) it2.next()) + " ");
                if (i % 30 == 0) {
                    sb.append(IOUtils.LINE_SEPARATOR_UNIX);
                }
                i++;
            }
            return sb.toString();
        }

        void incExonTypes(String str) {
            this.countExonTypes.inc(str);
        }

        void incU12() {
            this.countU12++;
        }

        void len(int i) {
            this.lenStats.sample(i);
        }

        String pExonTypes() {
            StringBuilder sb = new StringBuilder();
            Iterator<String> it = this.countExonTypes.keysSorted().iterator();
            while (it.hasNext()) {
                sb.append(pExonTypes(it.next()));
            }
            return sb.toString();
        }

        String pExonTypes(String str) {
            int i = 0;
            for (String str2 : this.countExonTypes.keysSorted()) {
                if (!str2.equals(str)) {
                    i = (int) (i + this.countExonTypes.get(str2));
                }
            }
            int i2 = (int) this.countExonTypes.get(str);
            CountByType countByType = SnpEffCmdSpliceAnalysis.this.getPwmSet(" ALL").countExonTypes;
            int i3 = 0;
            for (String str3 : countByType.keysSorted()) {
                if (!str3.equals(str)) {
                    i3 = (int) (i3 + countByType.get(str3));
                }
            }
            int i4 = (int) countByType.get(str);
            String str4 = "";
            double fisherExactTestDown = FisherExactTest.get().fisherExactTestDown(i2, i3 + i4, i4, i + i2);
            if (fisherExactTestDown > 0.0d && fisherExactTestDown < SnpEffCmdSpliceAnalysis.P_VALUE_THRESHOLD) {
                str4 = str4 + String.format("p-value Down (%s) : %.4e\n", str, Double.valueOf(fisherExactTestDown));
            }
            double fisherExactTestUp = FisherExactTest.get().fisherExactTestUp(i2, i3 + i4, i4, i + i2);
            if (fisherExactTestUp > 0.0d && fisherExactTestUp < SnpEffCmdSpliceAnalysis.P_VALUE_THRESHOLD) {
                str4 = str4 + String.format("p-value Down (%s) : %.4e\n", str, Double.valueOf(fisherExactTestUp));
            }
            return str4;
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("\t<td> <b>" + this.name + "</b> </td>\n");
            sb.append("\t<td> " + this.updates + "</td>\n");
            MotifLogo motifLogo = new MotifLogo(this.pwmDonor);
            sb.append("\t<td>\n");
            sb.append(motifLogo.toStringHtml(SnpEffCmdSpliceAnalysis.HTML_WIDTH, SnpEffCmdSpliceAnalysis.HTML_HEIGHT, EffectType.SPLICE_SITE_DONOR));
            sb.append("\t</td>\n");
            double countU12ObsExp = countU12ObsExp();
            Object obj = "ffffff";
            if (countU12ObsExp > 5.0d) {
                obj = "ff0000";
            } else if (countU12ObsExp > 2.0d) {
                obj = "ff8888";
            } else if (countU12ObsExp > 1.2d) {
                obj = "ffcccc";
            }
            sb.append(String.format("\t<td bgcolor=%s> <center> %d (%1.2f) </center> </td>\n", obj, Integer.valueOf(this.countU12), Double.valueOf(countU12ObsExp)));
            MotifLogo motifLogo2 = new MotifLogo(this.pwmAcc);
            sb.append("\t<td>\n");
            sb.append(motifLogo2.toStringHtml(SnpEffCmdSpliceAnalysis.HTML_WIDTH, SnpEffCmdSpliceAnalysis.HTML_HEIGHT, EffectType.SPLICE_SITE_ACCEPTOR));
            sb.append("\t</td>\n");
            sb.append("\t<td> <pre>\n");
            sb.append(this.lenStats.toString());
            sb.append("\t</pre></td>\n");
            sb.append("\t<td> <pre>\n");
            sb.append(this.countExonTypes);
            sb.append("\t</pre></td>\n");
            sb.append("\t<td> <pre>\n");
            sb.append(pExonTypes());
            sb.append("\t</pre></td>\n");
            sb.append("\t<td> <textarea rows=10 cols=120 readonly> \n");
            sb.append(geneNames());
            sb.append("\t</textarea> </td>\n");
            return sb.toString();
        }

        public void update(String str, String str2) {
            this.updates++;
            if (str != null) {
                this.pwmAcc.updateCounts(str);
            }
            if (str2 != null) {
                this.pwmDonor.updateCounts(str2);
            }
        }
    }

    int countDonorAcc(String str, String str2) {
        int i = 0;
        for (int i2 = 0; i2 < this.spliceTypes.getDonorAccPairSize(); i2++) {
            String donor = this.spliceTypes.getDonor(i2);
            String acceptor = this.spliceTypes.getAcceptor(i2);
            if (donor.startsWith(str) && acceptor.endsWith(str2)) {
                i++;
            }
        }
        return i;
    }

    double findEntropyThreshold(AcgtTree acgtTree) {
        List<Double> entropyAll = acgtTree.entropyAll(100);
        Collections.sort(entropyAll);
        return entropyAll.get((int) (entropyAll.size() * 0.05d)).doubleValue();
    }

    double findPthreshold(AcgtTree acgtTree) {
        List<Double> pAll = acgtTree.pAll(100);
        Collections.sort(pAll);
        return pAll.get((int) (pAll.size() * 0.95d)).doubleValue();
    }

    PwmSet getPwmSet(String str) {
        PwmSet pwmSet = this.pwmSetsByName.get(str);
        if (pwmSet == null) {
            pwmSet = new PwmSet(str);
            this.pwmSetsByName.put(str, pwmSet);
        }
        return pwmSet;
    }

    PwmSet getPwmSetExonType(String str) {
        PwmSet pwmSet = this.pwmSetsExonTypeByName.get(str);
        if (pwmSet == null) {
            pwmSet = new PwmSet(str);
            this.pwmSetsExonTypeByName.put(str, pwmSet);
        }
        return pwmSet;
    }

    void out(Object obj) {
        this.out.append(obj.toString() + IOUtils.LINE_SEPARATOR_UNIX);
    }

    @Override // org.snpeff.SnpEff, org.snpeff.snpEffect.commandLine.CommandLine
    public void parseArgs(String[] strArr) {
        if (strArr.length == 0) {
            usage(null);
        }
        for (String str : strArr) {
            if (isOpt(str)) {
                if (str.equals("-s")) {
                    this.saveDb = true;
                } else {
                    usage("Unknown option '" + str + "'");
                }
            } else if (this.genomeVer == null || this.genomeVer.isEmpty()) {
                this.genomeVer = str;
            }
        }
        if (this.genomeVer == null || this.genomeVer.isEmpty()) {
            usage("Missing argument: Genome version");
        }
    }

    @Override // org.snpeff.SnpEff, org.snpeff.snpEffect.commandLine.CommandLine
    public boolean run() {
        loadConfig();
        this.genomeFasta = this.config.getFileNameGenomeFasta();
        if (this.genomeFasta == null) {
            throw new RuntimeException("Cannot find reference genome: " + this.config.getFileListGenomeFasta());
        }
        this.outputDir = this.config.getDirData() + "/spliceSites";
        loadDb();
        this.transcriptSet = new TranscriptSet(this.config.getGenome());
        this.transcriptSet.setVerbose(this.verbose);
        this.transcriptSet.setDebug(this.debug);
        this.transcriptSet.filter();
        this.spliceTypes = new SpliceTypes(this.config);
        this.spliceTypes.setVerbose(this.verbose);
        this.spliceTypes.setDebug(this.debug);
        this.spliceTypes.setTranscriptSet(this.transcriptSet);
        this.spliceTypes.setGenomeFasta(this.genomeFasta);
        this.spliceTypes.analyzeAndCreate();
        this.thresholdU12Score = this.spliceTypes.branchU12Threshold(0.95d);
        this.spliceTypes.createSpliceFasta(this.outputDir);
        splicePwmAnalysis();
        String str = this.outputDir + "/" + getClass().getSimpleName() + "_" + this.genomeVer + ".html";
        if (this.verbose) {
            Timer.showStdErr("Saving output to: " + str);
        }
        Gpr.toFile(str, this.out);
        if (this.saveDb) {
            if (this.verbose) {
                Timer.showStdErr("Saving database to file: " + this.config.getFileSnpEffectPredictor());
            }
            this.config.getSnpEffectPredictor().save(this.config);
            if (this.verbose) {
                Timer.showStdErr("Done.");
            }
        } else if (this.verbose) {
            Timer.showStdErr("Not saving database.");
        }
        if (!this.verbose) {
            return true;
        }
        Timer.showStdErr("Finished!");
        return true;
    }

    void splicePwmAnalysis() {
        if (this.verbose) {
            Timer.showStdErr("Splice analysis (PWM). Reading fasta file: " + this.genomeFasta);
        }
        FastaFileIterator fastaFileIterator = new FastaFileIterator(this.genomeFasta);
        out("<pre>\n");
        Iterator<String> it = fastaFileIterator.iterator();
        while (it.hasNext()) {
            splicePwmAnalysis(fastaFileIterator.getName(), it.next());
        }
        out("</pre>\n");
        String str = this.outputDir + "/" + getClass().getSimpleName() + "_" + this.genomeVer + "_introns_branchSitesU12.bed";
        if (this.verbose) {
            Timer.showStdErr("Writing Introns and SpliceSitesBranchU12 file to BED file: '" + str + "'");
        }
        Markers markers = new Markers();
        int i = 0;
        for (String str2 : this.pwmSetsByName.keySet()) {
            PwmSet pwmSet = getPwmSet(str2);
            if (pwmSet.updates >= 100 && pwmSet.countU12ObsExp() > 5.0d) {
                for (SpliceSiteBranchU12 spliceSiteBranchU12 : this.spliceTypes.getBranchU12(str2)) {
                    Transcript transcript = (Transcript) spliceSiteBranchU12.getParent();
                    transcript.add((SpliceSite) spliceSiteBranchU12);
                    markers.add((Marker) spliceSiteBranchU12);
                    i++;
                    if (this.verbose) {
                        System.out.println("\tAdding BranchU12 '" + spliceSiteBranchU12 + "' to transcript " + transcript.getId() + "\tDonor-acceptor pair: " + str2 + "\tObs/Expected: " + getPwmSet(str2).countU12ObsExp());
                    }
                }
            }
        }
        markers.addAll(this.intronsByStr.values());
        markers.sort(false, false);
        StringBuilder sb = new StringBuilder();
        Iterator<Marker> it2 = markers.iterator();
        while (it2.hasNext()) {
            Marker next = it2.next();
            sb.append(next.getChromosomeName() + "\t" + (next.getStart() + 1) + "\t" + (next.getEnd() + 1) + "\t" + (next instanceof SpliceSiteBranch ? next.getType().toString() : next.getId()) + IOUtils.LINE_SEPARATOR_UNIX);
        }
        Gpr.toFile(str, sb);
        if (this.verbose) {
            Timer.showStdErr("Added " + i + " branch U12 to genome.");
        }
        if (this.verbose) {
            Timer.showStdErr("Filter out low count splice sites. Exons: " + this.countIntrons + "\tThreshold: 100");
        }
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(this.pwmSetsByName.values());
        Collections.sort(arrayList);
        out("<p><center><h3>Analysis by Donnor-Acceptor type</h3></center><p><table border=1>\n");
        out("<p><b>U12 PWM score threshold:</b> " + this.thresholdU12Score + "<p>\n");
        out("<tr> <th> Rank </th> <th> Donor-Acceptor </th>  <th> Count </th>  <th> Donor Motif </th> <th> U12 matches (Observed / Expected) </th> <th> Acceptor Motif </th> <th> Intron length </th> <th> Intron Type Count </th> <th> Intron Type p-values </th><th> Genes </th> </tr>\n");
        int i2 = 0;
        Iterator it3 = arrayList.iterator();
        while (it3.hasNext()) {
            PwmSet pwmSet2 = (PwmSet) it3.next();
            if (pwmSet2.updates >= 100) {
                int i3 = i2;
                i2++;
                out("<tr> <td> " + i3 + " </td> " + pwmSet2 + "</tr>\n");
            }
        }
        out("</table>\n");
        ArrayList arrayList2 = new ArrayList();
        arrayList2.addAll(this.pwmSetsExonTypeByName.values());
        Collections.sort(arrayList2);
        out("<p><hr><p><center><h3>Analysis by Exon-Exon types</h3></center><p><table border=1>\n");
        out("<tr> <th> Rank </th> <th> Exon_Type --- Exon_Type</th>  <th> Count </th>  <th> Donor Motif </th> <th> U12 matches (Observed / Expected) </th> <th> Acceptor Motif </th> <th> Intron length </th> <th> Intron Type Count </th> <th> Intron Type p-values </th><th> Genes </th> </tr>\n");
        int i4 = 0;
        Iterator it4 = arrayList2.iterator();
        while (it4.hasNext()) {
            PwmSet pwmSet3 = (PwmSet) it4.next();
            if (pwmSet3.updates >= 100) {
                int i5 = i4;
                i4++;
                out("<tr> <td> " + i5 + " </td> " + pwmSet3 + "</tr>\n");
            }
        }
        out("</table>\n");
    }

    void splicePwmAnalysis(String str, String str2) {
        int end;
        int start;
        int i = 0;
        int i2 = 0;
        HashSet hashSet = new HashSet();
        for (Transcript transcript : this.transcriptSet.getByChromo(str)) {
            Exon exon = null;
            Iterator<Exon> it = transcript.sortedStrand().iterator();
            while (it.hasNext()) {
                Exon next = it.next();
                i++;
                if (exon != null) {
                    if (transcript.isStrandPlus()) {
                        end = exon.getEnd();
                        start = next.getStart();
                    } else {
                        end = next.getEnd();
                        start = exon.getStart();
                    }
                    String str3 = (exon != null ? exon.getSpliceType().toString() : "") + HelpFormatter.DEFAULT_OPT_PREFIX + (next != null ? next.getSpliceType().toString() : "");
                    String str4 = str + ":" + end + HelpFormatter.DEFAULT_OPT_PREFIX + start;
                    if (!hashSet.contains(str4)) {
                        updatePwm(transcript, str2, end, start, str3);
                        hashSet.add(str4);
                        Intron intron = new Intron(transcript, end, start, false, str3, exon, next);
                        this.intronsByStr.put(intron.toString(), intron);
                    }
                }
                exon = next;
            }
            i2++;
        }
        if (this.verbose) {
            Timer.showStdErr("\tChromosome: " + str + "\tTranscripts: " + i2 + "\tExons: " + i);
        }
    }

    void updatePwm(Transcript transcript, String str, int i, int i2, String str2) {
        int length;
        int i3 = i2 - i;
        if (i3 < 2 * SpliceTypes.MAX_SPLICE_SIZE) {
            return;
        }
        String seqDonor = this.spliceTypes.seqDonor(transcript, str, i, i2);
        String seqAcceptor = this.spliceTypes.seqAcceptor(transcript, str, i, i2);
        String substring = seqDonor.substring(SpliceTypes.MAX_SPLICE_SIZE + 1);
        String substring2 = seqAcceptor.substring(0, SpliceTypes.MAX_SPLICE_SIZE);
        this.countIntrons++;
        String substring3 = seqDonor.substring(SpliceTypes.MAX_SPLICE_SIZE + 1, SpliceTypes.MAX_SPLICE_SIZE + 1 + SIZE_CONSENSUS_DONOR);
        if (substring3.indexOf(78) >= 0) {
            return;
        }
        String substring4 = seqAcceptor.substring(SpliceTypes.MAX_SPLICE_SIZE - SIZE_CONSENSUS_ACCEPTOR, SpliceTypes.MAX_SPLICE_SIZE);
        if (substring3.indexOf(78) >= 0) {
            return;
        }
        int i4 = 0;
        for (int i5 = 0; i5 < this.spliceTypes.getDonorAccPairSize(); i5++) {
            String donor = this.spliceTypes.getDonor(i5);
            String acceptor = this.spliceTypes.getAcceptor(i5);
            if (substring.startsWith(donor) && substring2.endsWith(acceptor) && (length = donor.length() + acceptor.length()) > i4) {
                i4 = length;
                substring3 = donor;
                substring4 = acceptor;
            }
        }
        String str3 = substring3 + "_" + substring4;
        double doubleValue = this.spliceTypes.addBestU12Score(transcript, str, str3, i, i2).first.doubleValue();
        PwmSet pwmSet = getPwmSet(str3);
        pwmSet.update(seqAcceptor, seqDonor);
        pwmSet.len(i3);
        pwmSet.incExonTypes(str2);
        pwmSet.addGene((Gene) transcript.getParent());
        if (doubleValue >= this.thresholdU12Score) {
            pwmSet.incU12();
        }
        PwmSet pwmSet2 = getPwmSet(" ALL");
        pwmSet2.update(seqAcceptor, seqDonor);
        pwmSet2.incExonTypes(str2);
        pwmSet2.len(i3);
        PwmSet pwmSetExonType = getPwmSetExonType(str2);
        pwmSetExonType.update(seqAcceptor, seqDonor);
        pwmSetExonType.len(i3);
        pwmSetExonType.addGene((Gene) transcript.getParent());
        if (doubleValue >= this.thresholdU12Score) {
            pwmSetExonType.incU12();
        }
    }

    @Override // org.snpeff.SnpEff, org.snpeff.snpEffect.commandLine.CommandLine
    public void usage(String str) {
        if (str != null) {
            System.err.println("Error: " + str + IOUtils.LINE_SEPARATOR_UNIX);
        }
        System.err.println("Usage: snpEff  spliceAnalysis [options] genome_version");
        System.err.println("Options:");
        System.err.println("    -s      : Save database");
        System.exit(-1);
    }
}
