/*
 * Decompiled with CFR 0.152.
 */
package fork.lib.bio.seq.parser.gtfgff;

import fork.lib.base.file.FileName;
import fork.lib.bio.seq.parser.gtfgff.GtfGffLine;
import fork.lib.bio.seq.parser.gtfgff.GtfLine;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;

public class GtfSplitter {
    public static HashMap<String, HashSet<String>> catstr = new HashMap();
    protected ArrayList<String> head = new ArrayList();
    protected File f;
    protected HashMap<String, BufferedWriter> bws = new HashMap();

    public GtfSplitter(File f) throws Exception {
        this.f = f;
    }

    protected void init() throws Exception {
    }

    public void writeToOutdir(File od) throws Exception {
        String l;
        od.getAbsoluteFile().mkdirs();
        BufferedReader br = new BufferedReader(new FileReader(this.f));
        ArrayList<GtfLine> buff = new ArrayList<GtfLine>();
        GtfGffLine prev = null;
        while ((l = br.readLine()) != null) {
            if (l.charAt(0) == '#') {
                this.head.add(l);
                continue;
            }
            GtfLine gl = new GtfLine(l);
            if (gl.feature().equals(GtfLine.FEATURE_GENE)) {
                if (prev == null) {
                    prev = gl;
                } else if (!gl.getField(GtfLine.FIELD_GENE_ID).equals(prev.getField(GtfLine.FIELD_GENE_ID))) {
                    this.flush(buff, this.bws.get(GtfSplitter.category(prev.getField(GtfLine.FIELD_GENE_BIOTYPE))));
                    prev = gl;
                }
                String gtype = GtfSplitter.category(gl.getField(GtfLine.FIELD_GENE_BIOTYPE));
                if (!this.bws.containsKey(gtype)) {
                    System.out.println(gtype);
                    BufferedWriter bw = new BufferedWriter(new FileWriter(od + "/" + FileName.baseName(this.f) + "_" + gtype + ".gtf"));
                    this.bws.put(gtype, bw);
                    for (String h : this.head) {
                        bw.write(h + "\n");
                    }
                    bw.write("#!biotype:" + gtype + "\n");
                }
                buff.add(gl);
                continue;
            }
            buff.add(gl);
        }
        br.close();
        if (!buff.isEmpty()) {
            this.flush(buff, this.bws.get(GtfSplitter.category(prev.getField(GtfLine.FIELD_GENE_BIOTYPE))));
        }
        for (String k : this.bws.keySet()) {
            this.bws.get(k).close();
        }
    }

    private void flush(ArrayList<GtfLine> buff, BufferedWriter bw) throws Exception {
        for (GtfLine l : buff) {
            bw.write(this.toOutputLine(l).toString() + "\n");
        }
        buff.clear();
    }

    protected GtfLine toOutputLine(GtfLine gl) throws Exception {
        return gl;
    }

    public static String category(String type) {
        for (String k : catstr.keySet()) {
            if (!catstr.get(k).contains(type)) continue;
            return k;
        }
        return "other";
    }

    public static void main(String[] args) throws Exception {
        File dir = new File("C:\\muxingu\\genome/human/hg19\\ass");
        File f = new File(dir + "/Homo_sapiens.GRCh37.87.gtf");
        GtfSplitter gg = new GtfSplitter(f);
        gg.writeToOutdir(new File(dir + "/" + FileName.baseName(f) + "_split"));
    }

    static {
        HashSet<String> pc = new HashSet<String>();
        pc.add("IG_C_gene");
        pc.add("IG_D_gene");
        pc.add("IG_J_gene");
        pc.add("IG_LV_gene");
        pc.add("IG_M_gene");
        pc.add("IG_V_gene");
        pc.add("IG_Z_gene");
        pc.add("nonsense_mediated_decay");
        pc.add("nontranslating_CDS");
        pc.add("non_stop_decay");
        pc.add("polymorphic_pseudogene");
        pc.add("protein_coding");
        pc.add("TR_C_gene");
        pc.add("TR_D_gene");
        pc.add("TR_gene");
        pc.add("TR_J_gene");
        pc.add("TR_V_gene");
        catstr.put("protein-coding", pc);
        HashSet<String> ps = new HashSet<String>();
        ps.add("disrupted_domain");
        ps.add("IG_C_pseudogene");
        ps.add("IG_J_pseudogene");
        ps.add("IG_pseudogene");
        ps.add("IG_V_pseudogene");
        ps.add("processed_pseudogene");
        ps.add("pseudogene");
        ps.add("transcribed_processed_pseudogene");
        ps.add("transcribed_unprocessed_pseudogene");
        ps.add("translated_processed_pseudogene");
        ps.add("translated_unprocessed_pseudogene");
        ps.add("TR_J_pseudogene");
        ps.add("TR_V_pseudogene");
        ps.add("unitary_pseudogene");
        ps.add("unprocessed_pseudogene");
        catstr.put("pseudogene", ps);
        HashSet<String> ln = new HashSet<String>();
        ln.add("3prime_overlapping_ncrna");
        ln.add("ambiguous_orf");
        ln.add("antisense");
        ln.add("lincRNA");
        ln.add("ncrna_host");
        ln.add("non_coding");
        ln.add("processed_transcript");
        ln.add("retained_intron");
        ln.add("sense_intronic");
        ln.add("sense_overlapping");
        catstr.put("long-nc", ln);
        HashSet<String> sn = new HashSet<String>();
        sn.add("miRNA");
        sn.add("miRNA_pseudogene");
        sn.add("misc_RNA");
        sn.add("misc_RNA_pseudogene");
        sn.add("Mt_rRNA");
        sn.add("Mt_tRNA");
        sn.add("Mt_tRNA_pseudogene");
        sn.add("ncRNA");
        sn.add("pre_miRNA");
        sn.add("RNase_MRP_RNA");
        sn.add("RNase_P_RNA");
        sn.add("rRNA");
        sn.add("rRNA_pseudogene");
        sn.add("scRNA_pseudogene");
        sn.add("snlRNA");
        sn.add("snoRNA");
        sn.add("snoRNA_pseudogene");
        sn.add("snRNA");
        sn.add("snRNA_pseudogene");
        sn.add("SRP_RNA");
        sn.add("tmRNA,");
        sn.add("tRNA");
        sn.add("tRNA_pseudogene");
        catstr.put("short-nc", sn);
    }
}

