package org.forester.io.parsers.nexus;

import htsjdk.variant.vcf.VCFConstants;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.io.parsers.IteratingPhylogenyParser;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.nhx.NHXFormatException;
import org.forester.io.parsers.nhx.NHXParser;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.parsers.util.PhylogenyParserException;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.DomainArchitecture;
import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.sequence.BasicSequence;
import org.forester.sequence.MolecularSequence;
import org.forester.util.ForesterUtil;

/* loaded from: input_file:org/forester/io/parsers/nexus/NexusPhylogeniesParser.class */
public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, PhylogenyParser {
    private static final String endblock = "endblock";
    private BufferedReader _br;
    private boolean _in_taxalabels;
    private boolean _in_translate;
    private boolean _in_tree;
    private boolean _in_trees_block;
    private boolean _in_data_block;
    private boolean _is_rooted;
    private String _datatype;
    private String _name;
    private Phylogeny _next;
    private Object _nexus_source;
    private StringBuilder _nh;
    private boolean _rooted_info_present;
    private List<String> _taxlabels;
    private String _title;
    private Map<String, String> _translate_map;
    private StringBuilder _translate_sb;
    private Map<String, MolecularSequence> _seqs;
    private static final String begin_trees = NexusConstants.BEGIN_TREES.toLowerCase();
    private static final String end = NexusConstants.END.toLowerCase();
    private static final Pattern ROOTEDNESS_PATTERN = Pattern.compile(".+=\\s*\\[&([R|U])\\].*");
    private static final String taxlabels = NexusConstants.TAXLABELS.toLowerCase();
    private static final Pattern TITLE_PATTERN = Pattern.compile("TITLE.?\\s+([^;]+)", 2);
    private static final String translate = NexusConstants.TRANSLATE.toLowerCase();
    private static final String data = NexusConstants.BEGIN_CHARACTERS.toLowerCase();
    private static final String characters = NexusConstants.BEGIN_DATA.toLowerCase();
    private static final String tree = NexusConstants.TREE.toLowerCase();
    private static final Pattern TREE_NAME_PATTERN = Pattern.compile("\\s*.?Tree\\s+(.+?)\\s*=.+", 2);
    private static final Pattern TRANSLATE_PATTERN = Pattern.compile("([0-9A-Za-z]+)\\s+(.+)");
    private static final Pattern ALN_PATTERN = Pattern.compile("(.+)\\s+([A-Za-z-_\\*\\?]+)");
    private static final Pattern DATATYPE_PATTERN = Pattern.compile("datatype\\s?.\\s?([a-z]+)");
    private static final Pattern LINK_TAXA_PATTERN = Pattern.compile("link\\s+taxa\\s?.\\s?([^;]+)", 2);
    private static final String utree = NexusConstants.UTREE.toLowerCase();
    private boolean _ignore_quotes_in_nh_data = false;
    private boolean _replace_underscores = false;
    private NHXParser.TAXONOMY_EXTRACTION _taxonomy_extraction = NHXParser.TAXONOMY_EXTRACTION.NO;
    private final boolean _add_sequences = true;

    @Override // org.forester.io.parsers.PhylogenyParser
    public String getName() {
        return "Nexus Phylogenies Parser";
    }

    @Override // org.forester.io.parsers.IteratingPhylogenyParser
    public final boolean hasNext() {
        return this._next != null;
    }

    @Override // org.forester.io.parsers.IteratingPhylogenyParser
    public final Phylogeny next() throws NHXFormatException, IOException {
        Phylogeny phylogeny = this._next;
        getNext();
        return phylogeny;
    }

    @Override // org.forester.io.parsers.PhylogenyParser
    public final Phylogeny[] parse() throws IOException {
        ArrayList arrayList = new ArrayList();
        while (hasNext()) {
            arrayList.add(next());
        }
        Phylogeny[] phylogenyArr = new Phylogeny[arrayList.size()];
        for (int i = 0; i < arrayList.size(); i++) {
            phylogenyArr[i] = (Phylogeny) arrayList.get(i);
        }
        reset();
        return phylogenyArr;
    }

    @Override // org.forester.io.parsers.IteratingPhylogenyParser
    public final void reset() throws FileNotFoundException, IOException {
        this._taxlabels = new ArrayList();
        this._translate_map = new HashMap();
        this._nh = new StringBuilder();
        this._name = "";
        this._title = "";
        this._translate_sb = null;
        this._next = null;
        this._in_trees_block = false;
        this._in_taxalabels = false;
        this._in_translate = false;
        this._in_tree = false;
        this._rooted_info_present = false;
        this._is_rooted = false;
        this._seqs = new HashMap();
        this._br = ParserUtils.createReader(this._nexus_source);
        getNext();
    }

    public final void setIgnoreQuotes(boolean z) {
        this._ignore_quotes_in_nh_data = z;
    }

    public final void setReplaceUnderscores(boolean z) {
        this._replace_underscores = z;
    }

    @Override // org.forester.io.parsers.IteratingPhylogenyParser, org.forester.io.parsers.PhylogenyParser
    public final void setSource(Object obj) throws PhylogenyParserException, IOException {
        if (obj == null) {
            throw new PhylogenyParserException("attempt to parse null object");
        }
        this._nexus_source = obj;
        reset();
    }

    public final void setTaxonomyExtraction(NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction) {
        this._taxonomy_extraction = taxonomy_extraction;
    }

    private final void createPhylogeny(String str, String str2, StringBuilder sb, boolean z, boolean z2) throws IOException {
        this._next = null;
        NHXParser nHXParser = new NHXParser();
        nHXParser.setTaxonomyExtraction(this._taxonomy_extraction);
        nHXParser.setReplaceUnderscores(this._replace_underscores);
        nHXParser.setIgnoreQuotes(this._ignore_quotes_in_nh_data);
        if (z) {
            nHXParser.setGuessRootedness(false);
        }
        nHXParser.setSource(sb);
        Phylogeny next = nHXParser.next();
        if (next == null) {
            throw new PhylogenyParserException("failed to create phylogeny");
        }
        String str3 = null;
        if (!ForesterUtil.isEmpty(str) && !ForesterUtil.isEmpty(str2)) {
            str3 = str.replace('_', ' ').trim() + " (" + str2.trim() + ")";
        } else if (!ForesterUtil.isEmpty(str)) {
            str3 = str.replace('_', ' ').trim();
        } else if (!ForesterUtil.isEmpty(str2)) {
            str3 = str2.trim();
        }
        if (!ForesterUtil.isEmpty(str3)) {
            next.setName(str3);
        }
        if (z) {
            next.setRooted(z2);
        }
        if (this._taxlabels.size() > 0 || this._translate_map.size() > 0) {
            PhylogenyNodeIterator iteratorExternalForward = next.iteratorExternalForward();
            while (iteratorExternalForward.hasNext()) {
                PhylogenyNode next2 = iteratorExternalForward.next();
                if (this._translate_map.size() > 0 && this._translate_map.containsKey(next2.getName())) {
                    next2.setName(this._translate_map.get(next2.getName()).replaceAll("['\"]+", ""));
                } else if (this._taxlabels.size() > 0) {
                    int i = -1;
                    try {
                        i = Integer.parseInt(next2.getName());
                    } catch (NumberFormatException e) {
                    }
                    if (i > 0) {
                        next2.setName(this._taxlabels.get(i - 1).replaceAll("['\"]+", ""));
                    }
                }
                if (!this._replace_underscores && this._taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.NO) {
                    ParserUtils.extractTaxonomyDataFromNodeName(next2, this._taxonomy_extraction);
                } else if (this._replace_underscores && !ForesterUtil.isEmpty(next2.getName())) {
                    next2.setName(next2.getName().replace('_', ' ').trim());
                }
                if (this._seqs.containsKey(next2.getName())) {
                    Sequence sequence = new Sequence(this._seqs.get(next2.getName()));
                    sequence.setMolecularSequenceAligned(true);
                    next2.getNodeData().addSequence(sequence);
                }
            }
        }
        this._next = next;
    }

    private final void getNext() throws IOException, NHXFormatException {
        this._next = null;
        while (true) {
            String readLine = this._br.readLine();
            if (readLine == null) {
                if (this._nh.length() > 0) {
                    createPhylogeny(this._title, this._name, this._nh, this._rooted_info_present, this._is_rooted);
                    if (this._next != null) {
                        return;
                    } else {
                        return;
                    }
                }
                return;
            }
            String trim = readLine.trim();
            if (trim.length() > 0 && !trim.startsWith("#") && !trim.startsWith(DomainArchitecture.NHX_SEPARATOR)) {
                String removeWhiteSpaceBeforeSemicolon = removeWhiteSpaceBeforeSemicolon(ForesterUtil.collapseWhiteSpace(trim));
                String lowerCase = removeWhiteSpaceBeforeSemicolon.toLowerCase();
                if (lowerCase.startsWith(begin_trees)) {
                    this._in_trees_block = true;
                    this._in_taxalabels = false;
                    this._in_translate = false;
                    this._in_data_block = false;
                    this._datatype = null;
                    this._title = "";
                } else if (lowerCase.startsWith(taxlabels)) {
                    this._in_trees_block = false;
                    this._in_taxalabels = true;
                    this._in_translate = false;
                    this._in_data_block = false;
                    this._datatype = null;
                } else if (lowerCase.startsWith(translate)) {
                    this._translate_sb = new StringBuilder();
                    this._in_taxalabels = false;
                    this._in_translate = true;
                    this._in_data_block = false;
                    this._datatype = null;
                } else if (lowerCase.startsWith(characters) || lowerCase.startsWith(data)) {
                    this._in_taxalabels = false;
                    this._in_trees_block = false;
                    this._in_translate = false;
                    this._in_data_block = true;
                    this._datatype = null;
                } else if (this._in_trees_block) {
                    if (lowerCase.startsWith("title")) {
                        Matcher matcher = TITLE_PATTERN.matcher(removeWhiteSpaceBeforeSemicolon);
                        if (matcher.lookingAt()) {
                            this._title = matcher.group(1);
                        }
                    } else if (lowerCase.startsWith("link")) {
                        Matcher matcher2 = LINK_TAXA_PATTERN.matcher(removeWhiteSpaceBeforeSemicolon);
                        if (matcher2.lookingAt()) {
                            matcher2.group(1);
                        }
                    } else if (lowerCase.startsWith(end) || lowerCase.startsWith(endblock)) {
                        this._in_trees_block = false;
                        this._in_tree = false;
                        this._in_translate = false;
                        if (this._nh.length() > 0) {
                            createPhylogeny(this._title, this._name, this._nh, this._rooted_info_present, this._is_rooted);
                            this._nh = new StringBuilder();
                            this._name = "";
                            this._rooted_info_present = false;
                            this._is_rooted = false;
                            if (this._next != null) {
                                return;
                            }
                        }
                    } else if (lowerCase.startsWith(tree) || lowerCase.startsWith(utree)) {
                        boolean z = false;
                        if (this._nh.length() > 0) {
                            z = true;
                            createPhylogeny(this._title, this._name, this._nh, this._rooted_info_present, this._is_rooted);
                            this._nh = new StringBuilder();
                            this._name = "";
                            this._rooted_info_present = false;
                            this._is_rooted = false;
                        }
                        this._in_tree = true;
                        this._nh.append(removeWhiteSpaceBeforeSemicolon.substring(removeWhiteSpaceBeforeSemicolon.indexOf(61)));
                        Matcher matcher3 = TREE_NAME_PATTERN.matcher(removeWhiteSpaceBeforeSemicolon);
                        if (matcher3.matches()) {
                            this._name = matcher3.group(1);
                            this._name = this._name.replaceAll("['\"]+", "");
                        }
                        Matcher matcher4 = ROOTEDNESS_PATTERN.matcher(removeWhiteSpaceBeforeSemicolon);
                        if (matcher4.matches()) {
                            String group = matcher4.group(1);
                            removeWhiteSpaceBeforeSemicolon = removeWhiteSpaceBeforeSemicolon.replaceAll("\\[\\&.\\]", "");
                            this._rooted_info_present = true;
                            if (group.toUpperCase().equals(VCFConstants.PER_ALLELE_COUNT)) {
                                this._is_rooted = true;
                            }
                        }
                        if (z && this._next != null) {
                            return;
                        }
                    } else if (this._in_tree && !this._in_translate) {
                        this._nh.append(removeWhiteSpaceBeforeSemicolon);
                    }
                    if (!lowerCase.startsWith("title") && !lowerCase.startsWith("link") && !this._in_translate && !lowerCase.startsWith(end) && !lowerCase.startsWith(endblock) && lowerCase.endsWith(";")) {
                        this._in_tree = false;
                        this._in_translate = false;
                        createPhylogeny(this._title, this._name, this._nh, this._rooted_info_present, this._is_rooted);
                        this._nh = new StringBuilder();
                        this._name = "";
                        this._rooted_info_present = false;
                        this._is_rooted = false;
                        if (this._next != null) {
                            return;
                        }
                    }
                }
                if (this._in_taxalabels) {
                    if (lowerCase.startsWith(end) || lowerCase.startsWith(endblock)) {
                        this._in_taxalabels = false;
                    } else {
                        String[] split = removeWhiteSpaceBeforeSemicolon.split("\\s+");
                        int length = split.length;
                        for (int i = 0; i < length; i++) {
                            String str = split[i];
                            if (!str.toLowerCase().equals(taxlabels)) {
                                if (str.endsWith(";")) {
                                    this._in_taxalabels = false;
                                    str = str.substring(0, str.length() - 1);
                                }
                                if (str.length() > 0) {
                                    this._taxlabels.add(str);
                                }
                            }
                        }
                    }
                }
                if (this._in_translate) {
                    if (lowerCase.startsWith(end) || lowerCase.startsWith(endblock)) {
                        this._in_translate = false;
                    } else {
                        this._translate_sb.append(" ");
                        this._translate_sb.append(removeWhiteSpaceBeforeSemicolon.trim());
                        if (removeWhiteSpaceBeforeSemicolon.endsWith(";")) {
                            this._in_translate = false;
                            setTranslateKeyValuePairs(this._translate_sb);
                        }
                    }
                }
                if (this._in_data_block) {
                    if (lowerCase.startsWith(end) || lowerCase.startsWith(endblock)) {
                        this._in_data_block = false;
                        this._datatype = null;
                    } else if (lowerCase.startsWith("link")) {
                        Matcher matcher5 = LINK_TAXA_PATTERN.matcher(removeWhiteSpaceBeforeSemicolon);
                        if (matcher5.lookingAt()) {
                            matcher5.group(1);
                        }
                    } else {
                        Matcher matcher6 = DATATYPE_PATTERN.matcher(lowerCase);
                        if (matcher6.find()) {
                            this._datatype = matcher6.group(1);
                        } else if (this._datatype != null && (this._datatype.equals(PhyloXmlUtil.SEQ_TYPE_PROTEIN) || this._datatype.equals(PhyloXmlUtil.SEQ_TYPE_DNA) || this._datatype.equals(PhyloXmlUtil.SEQ_TYPE_RNA))) {
                            if (removeWhiteSpaceBeforeSemicolon.endsWith(";")) {
                                this._in_data_block = false;
                                removeWhiteSpaceBeforeSemicolon = removeWhiteSpaceBeforeSemicolon.substring(0, removeWhiteSpaceBeforeSemicolon.length() - 1);
                            }
                            Matcher matcher7 = ALN_PATTERN.matcher(removeWhiteSpaceBeforeSemicolon);
                            if (matcher7.matches()) {
                                String group2 = matcher7.group(1);
                                String group3 = matcher7.group(2);
                                this._seqs.put(group2, this._datatype.equals(PhyloXmlUtil.SEQ_TYPE_PROTEIN) ? BasicSequence.createAaSequence(group2, group3) : this._datatype.equals(PhyloXmlUtil.SEQ_TYPE_DNA) ? BasicSequence.createDnaSequence(group2, group3) : BasicSequence.createRnaSequence(group2, group3));
                            }
                        }
                    }
                }
            }
        }
    }

    private final void setTranslateKeyValuePairs(StringBuilder sb) throws IOException {
        String trim = sb.toString().trim();
        if (trim.endsWith(";")) {
            trim = trim.substring(0, trim.length() - 1).trim();
        }
        for (String str : trim.split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
            int indexOf = str.toLowerCase().indexOf("translate");
            if (indexOf > -1) {
                str = str.substring(indexOf + 9);
            }
            Matcher matcher = TRANSLATE_PATTERN.matcher(str);
            if (!matcher.find()) {
                throw new IOException("ill-formatted translate values: " + str);
            }
            String group = matcher.group(1);
            String trim2 = matcher.group(2).replaceAll("'", "").replaceAll("\"", "").trim();
            if (trim2.endsWith(";")) {
                trim2 = trim2.substring(0, trim2.length() - 1);
            }
            this._translate_map.put(group, trim2);
        }
    }

    private static final String removeWhiteSpaceBeforeSemicolon(String str) {
        return str.replaceAll("\\s+;", ";");
    }
}
