package org.snpeff.spliceSites;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.io.IOUtils;
import org.forester.phylogeny.data.DomainArchitecture;
import org.snpeff.collections.AutoHashMap;
import org.snpeff.fileIterator.FastaFileIterator;
import org.snpeff.interval.Chromosome;
import org.snpeff.interval.Exon;
import org.snpeff.interval.Intron;
import org.snpeff.interval.SpliceSiteBranchU12;
import org.snpeff.interval.Transcript;
import org.snpeff.motif.Pwm;
import org.snpeff.snpEffect.Config;
import org.snpeff.util.Gpr;
import org.snpeff.util.GprSeq;
import org.snpeff.util.Timer;
import org.snpeff.util.Tuple;

/* loaded from: input_file:org/snpeff/spliceSites/SpliceTypes.class */
public class SpliceTypes {
    public static int MAX_SPLICE_SIZE = 10;
    public static int SIZE_BRANCH = 60;
    public static final double THRESHOLD_ENTROPY = 0.05d;
    public static final int THRESHOLD_COUNT = 100;
    public static final double THRESHOLD_P = 0.95d;
    String genomeFasta;
    Config config;
    Pwm pwmU12;
    TranscriptSet transcriptSet;
    double thresholdPDonor;
    double thresholdEntropyDonor;
    double thresholdPAcc;
    double thresholdEntropyAcc;
    double thresholdU12Score;
    boolean verbose = false;
    boolean debug = false;
    HashMap<String, String> donorsByIntron = new HashMap<>();
    HashMap<String, String> acceptorsByIntron = new HashMap<>();
    HashMap<String, String> branchByIntron = new HashMap<>();
    ArrayList<String> donorAccPairDonor = new ArrayList<>();
    ArrayList<String> donorAccPairAcc = new ArrayList<>();
    AutoHashMap<String, List<SpliceSiteBranchU12>> branchU12ByDonorAcc = new AutoHashMap<>(new ArrayList());
    HashMap<String, Integer> donorAcc = new HashMap<>();
    AcgtTree acgtTreeDonors = new AcgtTree();
    AcgtTree acgtTreeAcc = new AcgtTree();

    public SpliceTypes(Config config) {
        this.config = config;
    }

    void acc4donor(String str) {
        AcgtTree acgtTree = new AcgtTree();
        for (String str2 : this.donorsByIntron.keySet()) {
            if (this.donorsByIntron.get(str2).startsWith(str)) {
                String reverse = GprSeq.reverse(this.acceptorsByIntron.get(str2));
                if (reverse.indexOf(78) < 0) {
                    acgtTree.add(reverse);
                }
            }
        }
        for (String str3 : acgtTree.findNodeNames(this.thresholdEntropyAcc, this.thresholdPAcc, 100)) {
            if (str3.length() > 1) {
                add(str, GprSeq.reverse(str3));
            }
        }
    }

    void add(String str, String str2) {
        String format = String.format("%-10s\t%10s", str, str2);
        int countDonorAcc = countDonorAcc(str, str2);
        if (countDonorAcc >= 100) {
            this.donorAcc.put(format, Integer.valueOf(countDonorAcc));
        }
    }

    public Tuple<Double, Integer> addBestU12Score(Transcript transcript, String str, String str2, int i, int i2) {
        int length;
        int length2;
        Tuple<Double, Integer> bestU12Score = bestU12Score(seqBranch(transcript, str, i, i2));
        int intValue = bestU12Score.second.intValue();
        if (transcript.isStrandPlus()) {
            length = (i2 - SIZE_BRANCH) + intValue;
            length2 = length + this.pwmU12.length();
        } else {
            length = (((i + SIZE_BRANCH) - intValue) - this.pwmU12.length()) - 1;
            length2 = length + this.pwmU12.length();
        }
        addBranchU12(str2, new SpliceSiteBranchU12(transcript.findIntron(length), length, length2, transcript.isStrandMinus(), ""));
        return bestU12Score;
    }

    void addBranchU12(String str, SpliceSiteBranchU12 spliceSiteBranchU12) {
        this.branchU12ByDonorAcc.getOrCreate(str).add(spliceSiteBranchU12);
    }

    public boolean analyzeAndCreate() {
        if (this.verbose) {
            Timer.showStdErr("Splice site sequence conservation analysis: Start");
        }
        load();
        spliceSequences();
        spliceDonoAcceptorPairs();
        createSpliceSites();
        if (!this.verbose) {
            return true;
        }
        Timer.showStdErr("Splice site sequence conservation analysis: Done.");
        return true;
    }

    int bestMatchIndex(String str, String str2) {
        int length;
        if (str == null || str2 == null) {
            return -1;
        }
        int i = -1;
        int i2 = -1;
        for (int i3 = 0; i3 < this.donorAccPairDonor.size(); i3++) {
            String str3 = this.donorAccPairDonor.get(i3);
            String str4 = this.donorAccPairAcc.get(i3);
            if (str.startsWith(str3) && str2.endsWith(str4) && (length = str3.length() + str4.length()) > i) {
                i = length;
                i2 = i3;
            }
        }
        return i2;
    }

    public Tuple<Double, Integer> bestU12Score(String str) {
        int length = str.length() - this.pwmU12.length();
        double d = 0.0d;
        int i = -1;
        for (int i2 = 0; i2 < length; i2++) {
            String substring = str.substring(i2, i2 + this.pwmU12.length());
            if (substring.indexOf(78) < 0) {
                double score = this.pwmU12.score(substring);
                if (d < score) {
                    d = score;
                    i = i2;
                }
            }
        }
        return new Tuple<>(Double.valueOf(d), Integer.valueOf(i));
    }

    public double branchU12Threshold(double d) {
        Timer.showStdErr("Finding U12 PWM score distribution and threshold.");
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = this.branchByIntron.values().iterator();
        while (it.hasNext()) {
            arrayList.add(Double.valueOf(bestU12Score(it.next()).first.doubleValue()));
        }
        Collections.sort(arrayList);
        this.thresholdU12Score = ((Double) arrayList.get((int) (d * arrayList.size()))).doubleValue();
        return this.thresholdU12Score;
    }

    int countDonorAcc(String str, String str2) {
        int i = 0;
        for (String str3 : this.donorsByIntron.keySet()) {
            String str4 = this.donorsByIntron.get(str3);
            String str5 = this.acceptorsByIntron.get(str3);
            if (str4.startsWith(str) && str5.endsWith(str2)) {
                i++;
            }
        }
        return i;
    }

    public void createSpliceFasta(String str) {
        if (this.verbose) {
            Timer.showStdErr("Creating FASTA files for each dono-acceptor pair.");
        }
        for (int i = 0; i < getDonorAccPairSize(); i++) {
            String donor = getDonor(i);
            String acceptor = getAcceptor(i);
            createSpliceFasta(str + "/" + this.config.getGenome().getId() + "." + donor + HelpFormatter.DEFAULT_OPT_PREFIX + acceptor + ".fa", donor, acceptor);
        }
    }

    void createSpliceFasta(String str, String str2, String str3) {
        StringBuilder sb = new StringBuilder();
        for (String str4 : getIntronKeySet()) {
            String donorByIntron = getDonorByIntron(str4);
            String acceptorsByIntron = getAcceptorsByIntron(str4);
            if (donorByIntron.startsWith(str2) && acceptorsByIntron.endsWith(str3)) {
                getAcceptorsByIntron(str4);
                sb.append(DomainArchitecture.NHX_SEPARATOR + str4 + IOUtils.LINE_SEPARATOR_UNIX + donorByIntron + HelpFormatter.DEFAULT_OPT_PREFIX + acceptorsByIntron + IOUtils.LINE_SEPARATOR_UNIX);
            }
        }
        if (this.verbose) {
            Timer.showStdErr("\tWriting fasta sequences to file: " + str);
        }
        Gpr.toFile(str, sb);
    }

    void createSpliceSites() {
        if (this.verbose) {
            Timer.showStdErr("\tCreating splice sites.");
        }
        Iterator<Transcript> it = this.transcriptSet.iterator();
        while (it.hasNext()) {
            Iterator<Intron> it2 = it.next().introns().iterator();
            while (it2.hasNext()) {
                createSpliceSites(it2.next());
            }
        }
        if (this.verbose) {
            Timer.showStdErr("\tCreated : 0 splice sites.");
        }
    }

    int createSpliceSites(Intron intron) {
        int start = intron.getStart();
        int end = intron.getEnd();
        String str = intron.getChromosomeName() + ":" + start + HelpFormatter.DEFAULT_OPT_PREFIX + end;
        String str2 = this.donorsByIntron.get(str);
        String str3 = this.acceptorsByIntron.get(str);
        if (str2 == null || str3 == null) {
            return 0;
        }
        int bestMatchIndex = bestMatchIndex(str2, str3);
        int i = (end - start) - 1;
        int i2 = 0;
        if (bestMatchIndex > 0) {
            String str4 = this.donorAccPairDonor.get(bestMatchIndex);
            String str5 = this.donorAccPairAcc.get(bestMatchIndex);
            if (this.debug) {
                System.err.println("\tCreating splice sites:\t" + str2 + HelpFormatter.DEFAULT_OPT_PREFIX + str3 + "\tConserved:\t" + str4 + HelpFormatter.DEFAULT_OPT_PREFIX + str5);
            }
            if (str4.length() > 2) {
                intron.createSpliceSiteDonor(Math.min(str4.length(), i));
                i2 = 0 + 1;
            }
            if (str5.length() > 2) {
                intron.createSpliceSiteAcceptor(Math.min(str5.length(), i));
                i2++;
            }
        }
        return i2;
    }

    void donor4acc(String str) {
        AcgtTree acgtTree = new AcgtTree();
        for (String str2 : this.acceptorsByIntron.keySet()) {
            if (GprSeq.reverse(this.acceptorsByIntron.get(str2)).endsWith(str)) {
                String str3 = this.donorsByIntron.get(str2);
                if (str3.indexOf(78) < 0) {
                    acgtTree.add(str3);
                }
            }
        }
        for (String str4 : acgtTree.findNodeNames(this.thresholdEntropyDonor, this.thresholdPDonor, 100)) {
            if (str4.length() > 1) {
                add(str4, str);
            }
        }
    }

    double findEntropyThreshold(AcgtTree acgtTree) {
        List<Double> entropyAll = acgtTree.entropyAll(100);
        Collections.sort(entropyAll);
        return entropyAll.get((int) (entropyAll.size() * 0.05d)).doubleValue();
    }

    double findPthreshold(AcgtTree acgtTree) {
        List<Double> pAll = acgtTree.pAll(100);
        Collections.sort(pAll);
        return pAll.get((int) (pAll.size() * 0.95d)).doubleValue();
    }

    public String getAcceptor(int i) {
        return this.donorAccPairAcc.get(i);
    }

    public String getAcceptorsByIntron(String str) {
        return this.acceptorsByIntron.get(str);
    }

    public String getBranchByIntron(String str) {
        return this.branchByIntron.get(str);
    }

    public List<SpliceSiteBranchU12> getBranchU12(String str) {
        return this.branchU12ByDonorAcc.getOrCreate(str);
    }

    public String getDonor(int i) {
        return this.donorAccPairDonor.get(i);
    }

    public int getDonorAccPairSize() {
        return this.donorAccPairDonor.size();
    }

    public String getDonorByIntron(String str) {
        return this.donorsByIntron.get(str);
    }

    public Set<String> getIntronKeySet() {
        return this.donorsByIntron.keySet();
    }

    void load() {
        String str = this.config.getDirData() + "/spliceSites/u12_branch.pwm";
        if (this.verbose) {
            Timer.showStdErr("\tLoading U12 PWM form file '" + str + "'");
        }
        this.pwmU12 = new Pwm(str);
        if (this.config.getSnpEffectPredictor() == null) {
            if (this.verbose) {
                Timer.showStdErr("\tLoading: " + this.config.getGenome().getGenomeName());
            }
            this.config.loadSnpEffectPredictor();
            if (this.verbose) {
                Timer.showStdErr("\tdone.");
            }
        }
        if (this.transcriptSet == null) {
            this.transcriptSet = new TranscriptSet(this.config.getGenome());
            this.transcriptSet.setVerbose(this.verbose);
            this.transcriptSet.setDebug(this.debug);
            this.transcriptSet.filter();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public String seqAcceptor(Transcript transcript, String str, int i, int i2) {
        return i2 - i < MAX_SPLICE_SIZE ? "" : transcript.isStrandPlus() ? str.substring(i2 - MAX_SPLICE_SIZE, i2 + MAX_SPLICE_SIZE + 1).toUpperCase() : GprSeq.reverseWc(str.substring(i - MAX_SPLICE_SIZE, i + MAX_SPLICE_SIZE + 1).toUpperCase());
    }

    String seqBranch(Transcript transcript, String str, int i, int i2) {
        return i2 - i < SIZE_BRANCH ? "" : transcript.isStrandPlus() ? str.substring((i2 - SIZE_BRANCH) + 1, i2).toUpperCase() : GprSeq.reverseWc(str.substring(i + 1, i + SIZE_BRANCH).toUpperCase());
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public String seqDonor(Transcript transcript, String str, int i, int i2) {
        return i2 - i < MAX_SPLICE_SIZE ? "" : transcript.isStrandPlus() ? str.substring(i - MAX_SPLICE_SIZE, i + MAX_SPLICE_SIZE + 1).toUpperCase() : GprSeq.reverseWc(str.substring(i2 - MAX_SPLICE_SIZE, i2 + MAX_SPLICE_SIZE + 1).toUpperCase());
    }

    public void setDebug(boolean z) {
        this.debug = z;
    }

    public void setGenomeFasta(String str) {
        this.genomeFasta = str;
    }

    public void setTranscriptSet(TranscriptSet transcriptSet) {
        this.transcriptSet = transcriptSet;
    }

    public void setVerbose(boolean z) {
        this.verbose = z;
    }

    void spliceDonoAcceptorPairs() {
        if (this.verbose) {
            Timer.showStdErr("\tFinding donor-acceptor pairs: Creating quaternary trees");
        }
        for (String str : this.donorsByIntron.values()) {
            if (str.indexOf(78) < 0) {
                this.acgtTreeDonors.add(str);
            }
        }
        for (String str2 : this.acceptorsByIntron.values()) {
            if (str2.indexOf(78) < 0) {
                this.acgtTreeAcc.add(GprSeq.reverse(str2));
            }
        }
        if (this.verbose) {
            Timer.showStdErr("\tCalculate thresholds");
        }
        this.thresholdPDonor = findPthreshold(this.acgtTreeDonors);
        this.thresholdEntropyDonor = findEntropyThreshold(this.acgtTreeDonors);
        this.thresholdPAcc = findPthreshold(this.acgtTreeAcc);
        this.thresholdEntropyAcc = findEntropyThreshold(this.acgtTreeAcc);
        if (this.verbose) {
            Timer.showStdErr("\tDonors Thresholds:\t\tEntropy: " + this.thresholdEntropyDonor + "\t\tProbability: " + this.thresholdPDonor);
        }
        for (String str3 : this.acgtTreeDonors.findNodeNames(this.thresholdEntropyDonor, this.thresholdPDonor, 100)) {
            if (str3.length() > 1) {
                acc4donor(str3);
            }
        }
        if (this.verbose) {
            Timer.showStdErr("\tFind acceptors");
        }
        if (this.verbose) {
            Timer.showStdErr("\tAcceptors Thresholds:\t\tEntropy: " + this.thresholdEntropyAcc + "\t\tProbability: " + this.thresholdPAcc);
        }
        for (String str4 : this.acgtTreeAcc.findNodeNames(this.thresholdEntropyAcc, this.thresholdPAcc, 100)) {
            if (str4.length() > 1) {
                donor4acc(GprSeq.reverse(str4));
            }
        }
        if (this.verbose) {
            Timer.showStdErr("\tAdd Donor - Acceptors pairs: ");
        }
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(this.donorAcc.keySet());
        Collections.sort(arrayList, new Comparator<String>() { // from class: org.snpeff.spliceSites.SpliceTypes.1
            @Override // java.util.Comparator
            public int compare(String str5, String str6) {
                return SpliceTypes.this.donorAcc.get(str6).intValue() - SpliceTypes.this.donorAcc.get(str5).intValue();
            }
        });
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            String str5 = (String) it.next();
            if (this.donorAcc.get(str5).intValue() > 100) {
                String[] split = str5.trim().split("\\s+");
                this.donorAccPairDonor.add(split[0]);
                this.donorAccPairAcc.add(split[1]);
                if (this.verbose) {
                    Timer.showStdErr("\t\t\t" + this.donorAcc.get(str5) + "\t" + str5);
                }
            }
        }
    }

    void spliceSequences() {
        if (this.genomeFasta == null) {
            this.genomeFasta = this.config.getFileNameGenomeFasta();
        }
        if (this.verbose) {
            Timer.showStdErr("\tFinding splice sequences. Reading fasta file: " + this.genomeFasta);
        }
        FastaFileIterator fastaFileIterator = new FastaFileIterator(this.genomeFasta);
        Iterator<String> it = fastaFileIterator.iterator();
        while (it.hasNext()) {
            spliceSequences(Chromosome.simpleName(fastaFileIterator.getName()), it.next());
        }
    }

    void spliceSequences(String str, String str2) {
        int end;
        int start;
        int i = 0;
        int i2 = 0;
        for (Transcript transcript : this.transcriptSet.getByChromo(str)) {
            Exon exon = null;
            for (Exon exon2 : transcript.sortedStrand()) {
                i++;
                if (exon != null) {
                    if (transcript.isStrandPlus()) {
                        end = exon.getEnd();
                        start = exon2.getStart();
                    } else {
                        end = exon2.getEnd();
                        start = exon.getStart();
                    }
                    spliceSequences(transcript, str, str2, end, start);
                }
                exon = exon2;
            }
            i2++;
        }
        if (this.verbose) {
            Timer.showStdErr("\t\tChromosome: " + str + "\tTranscripts: " + i2 + "\tExons: " + i + "\tTotal Splice sites: " + this.donorsByIntron.size());
        }
    }

    void spliceSequences(Transcript transcript, String str, String str2, int i, int i2) {
        String str3 = str + ":" + i + HelpFormatter.DEFAULT_OPT_PREFIX + i2;
        if (this.donorsByIntron.containsKey(str3)) {
            return;
        }
        String seqDonor = seqDonor(transcript, str2, i, i2);
        String seqAcceptor = seqAcceptor(transcript, str2, i, i2);
        String seqBranch = seqBranch(transcript, str2, i, i2);
        String substring = seqDonor.isEmpty() ? "" : seqDonor.substring(MAX_SPLICE_SIZE + 1);
        String substring2 = seqAcceptor.isEmpty() ? "" : seqAcceptor.substring(0, MAX_SPLICE_SIZE);
        this.donorsByIntron.put(str3, substring);
        this.acceptorsByIntron.put(str3, substring2);
        this.branchByIntron.put(str3, seqBranch);
    }
}
