/*
 * Decompiled with CFR 0.152.
 */
package picard.sam;

import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceDictionaryCodec;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.AsciiWriter;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.Md5CalculatingOutputStream;
import htsjdk.samtools.util.StringUtil;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.programgroups.ReferenceProgramGroup;
import picard.nio.PicardHtsPath;
import picard.util.SequenceDictionaryUtils;

@DocumentedFeature
@CommandLineProgramProperties(summary="Creates a sequence dictionary for a reference sequence.  This tool creates a sequence dictionary file (with \".dict\" extension) from a reference sequence provided in FASTA format, which is required by many processing and analysis tools. The output file contains a header but no SAMRecords, and the header contains only sequence records.<br /><br />The reference sequence can be gzipped (both .fasta and .fasta.gz are supported).<h4>Usage example:</h4><pre>java -jar picard.jar CreateSequenceDictionary \\ <br />      R=reference.fasta \\ <br />      O=reference.dict</pre><hr />", oneLineSummary="Creates a sequence dictionary for a reference sequence.  ", programGroup=ReferenceProgramGroup.class)
public class CreateSequenceDictionary
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Creates a sequence dictionary for a reference sequence.  ";
    static final String USAGE_DETAILS = "This tool creates a sequence dictionary file (with \".dict\" extension) from a reference sequence provided in FASTA format, which is required by many processing and analysis tools. The output file contains a header but no SAMRecords, and the header contains only sequence records.<br /><br />The reference sequence can be gzipped (both .fasta and .fasta.gz are supported).<h4>Usage example:</h4><pre>java -jar picard.jar CreateSequenceDictionary \\ <br />      R=reference.fasta \\ <br />      O=reference.dict</pre><hr />";
    private static final Log logger = Log.getInstance(CreateSequenceDictionary.class);
    @Argument(doc="Output SAM file containing only the sequence dictionary. By default it will use the base name of the input reference with the .dict extension", shortName="O", optional=true)
    public PicardHtsPath OUTPUT;
    @Argument(shortName="AS", doc="Put into AS field of sequence dictionary entry if supplied", optional=true)
    public String GENOME_ASSEMBLY;
    @Argument(shortName="UR", doc="Put into UR field of sequence dictionary entry.  If not supplied, input reference file is used", optional=true)
    public String URI;
    @Argument(shortName="SP", doc="Put into SP field of sequence dictionary entry", optional=true)
    public String SPECIES;
    @Argument(doc="Make sequence name the first word from the > line in the fasta file.  By default the entire contents of the > line is used, excluding leading and trailing whitespace.")
    public boolean TRUNCATE_NAMES_AT_WHITESPACE = true;
    @Argument(doc="Stop after writing this many sequences.  For testing.")
    public int NUM_SEQUENCES = Integer.MAX_VALUE;
    @Argument(shortName="AN", doc="Optional file containing the alternative names for the contigs. Tools may use this information to consider different contig notations as identical (e.g: 'chr1' and '1'). The alternative names will be put into the appropriate @AN annotation for each contig. No header. First column is the original name, the second column is an alternative name. One contig may have more than one alternative name.", optional=true)
    public File ALT_NAMES = null;
    private final MessageDigest md5;
    private static final Pattern ALTERNATIVE_CONTIG_NAME_PATTERN = Pattern.compile("[0-9A-Za-z][0-9A-Za-z\\*\\+@\\|\\-]*");
    private static final String AN_ATTRIBUTE = "AN";

    public CreateSequenceDictionary() {
        try {
            this.md5 = MessageDigest.getInstance("MD5");
        }
        catch (NoSuchAlgorithmException e) {
            throw new PicardException("MD5 algorithm not found", e);
        }
    }

    public SAMSequenceDictionary makeSequenceDictionary(File referenceFile) {
        Iterable<SAMSequenceRecord> samSequenceRecordsIterable = this.getSamSequenceRecordsIterable();
        ArrayList<SAMSequenceRecord> ret = new ArrayList<SAMSequenceRecord>();
        HashSet<String> sequenceNames = new HashSet<String>();
        for (SAMSequenceRecord rec : samSequenceRecordsIterable) {
            if (sequenceNames.contains(rec.getSequenceName())) {
                throw new PicardException("Sequence name appears more than once in reference: " + rec.getSequenceName());
            }
            sequenceNames.add(rec.getSequenceName());
            ret.add(rec);
        }
        return new SAMSequenceDictionary(ret);
    }

    private Iterable<SAMSequenceRecord> getSamSequenceRecordsIterable() {
        return () -> {
            SequenceDictionaryUtils.SamSequenceRecordsIterator iterator = new SequenceDictionaryUtils.SamSequenceRecordsIterator(this.referenceSequence.getReferencePath(), this.TRUNCATE_NAMES_AT_WHITESPACE);
            iterator.setGenomeAssembly(this.GENOME_ASSEMBLY);
            iterator.setSpecies(this.SPECIES);
            iterator.setUri(this.URI);
            return iterator;
        };
    }

    @Override
    protected String[] customCommandLineValidation() {
        if (this.URI == null) {
            this.URI = this.referenceSequence.getHtsPath().getURIString();
        }
        if (this.OUTPUT == null) {
            Path outputPath = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(this.referenceSequence.getReferencePath());
            this.OUTPUT = new PicardHtsPath(outputPath.toString());
            logger.info("Output dictionary will be written in ", this.OUTPUT);
        }
        return super.customCommandLineValidation();
    }

    @Override
    protected ReferenceArgumentCollection makeReferenceArgumentCollection() {
        return new CreateSeqDictReferenceArgumentCollection();
    }

    @Override
    protected int doWork() {
        int sequencesWritten = 0;
        if (Files.exists(this.OUTPUT.toPath(), new LinkOption[0])) {
            throw new PicardException(this.OUTPUT.getURIString() + " already exists.  Delete this file and try again, or specify a different output file.");
        }
        if (this.OUTPUT.getScheme().equals("file")) {
            IOUtil.assertFileIsWritable(this.OUTPUT.toPath().toFile());
        }
        Map<String, Set<String>> aliasesByContig = this.loadContigAliasesMap();
        try (BufferedWriter writer = this.makeWriter();){
            Iterable<SAMSequenceRecord> samSequenceRecordIterable = this.getSamSequenceRecordsIterable();
            SAMSequenceDictionaryCodec samDictCodec = new SAMSequenceDictionaryCodec(writer);
            samDictCodec.encodeHeaderLine(false);
            for (SAMSequenceRecord samSequenceRecord : samSequenceRecordIterable) {
                Set<String> aliases = aliasesByContig.get(samSequenceRecord.getSequenceName());
                if (aliases != null) {
                    samSequenceRecord.setAttribute(AN_ATTRIBUTE, String.join((CharSequence)",", aliases));
                }
                samDictCodec.encodeSequenceRecord(samSequenceRecord);
                if (++sequencesWritten < this.NUM_SEQUENCES) continue;
                break;
            }
        }
        catch (IOException e) {
            throw new PicardException("Can't write to or close output file " + this.OUTPUT.getURIString(), e);
        }
        catch (IllegalArgumentException e) {
            if (Files.exists(this.OUTPUT.toPath(), new LinkOption[0])) {
                try {
                    Files.delete(this.OUTPUT.toPath());
                }
                catch (IOException e2) {
                    throw new PicardException("Unknown problem encountered, and failed to delete the incomplete output. " + e2.getMessage(), e);
                }
            }
            throw new PicardException("Unknown problem. Partial dictionary file was deleted.", e);
        }
        return 0;
    }

    private BufferedWriter makeWriter() throws IOException {
        return new BufferedWriter(new AsciiWriter(this.CREATE_MD5_FILE ? new Md5CalculatingOutputStream(Files.newOutputStream(this.OUTPUT.toPath(), StandardOpenOption.CREATE_NEW), new PicardHtsPath(this.OUTPUT.getURIString() + ".md5").toPath()) : Files.newOutputStream(this.OUTPUT.toPath(), StandardOpenOption.CREATE_NEW)));
    }

    private Map<String, Set<String>> loadContigAliasesMap() throws PicardException {
        if (this.ALT_NAMES == null) {
            return Collections.emptyMap();
        }
        HashMap<String, Set<String>> aliasesByContig = new HashMap<String, Set<String>>();
        try {
            for (String line : IOUtil.slurpLines(this.ALT_NAMES)) {
                if (StringUtil.isBlank(line)) continue;
                int tab = line.indexOf(9);
                if (tab == -1) {
                    throw new IOException("tabulation missing in " + line);
                }
                String contigName = line.substring(0, tab);
                String altName = line.substring(tab + 1);
                if (StringUtil.isBlank(contigName)) {
                    throw new IOException("empty contig in " + line);
                }
                if (StringUtil.isBlank(altName)) {
                    throw new IOException("empty alternative name in " + line);
                }
                if (altName.equals(contigName)) continue;
                try {
                    SAMSequenceRecord.validateSequenceName(altName);
                }
                catch (SAMException exception) {
                    throw new IOException("Illegal alternative reference sequence name in " + line, exception);
                }
                if (aliasesByContig.containsKey(altName)) {
                    throw new IOException("alternate name " + altName + " previously defined as a contig in " + line);
                }
                if (aliasesByContig.keySet().stream().filter(K2 -> !K2.equals(contigName)).anyMatch(K2 -> ((Set)aliasesByContig.get(K2)).contains(contigName))) {
                    throw new IOException("contig  " + contigName + " previously defined as an alternate name in " + line);
                }
                if (!aliasesByContig.containsKey(contigName)) {
                    aliasesByContig.put(contigName, new HashSet());
                }
                ((Set)aliasesByContig.get(contigName)).add(altName);
            }
            return aliasesByContig;
        }
        catch (IOException e) {
            throw new PicardException("Can't read alias file " + String.valueOf(this.ALT_NAMES), e);
        }
    }

    public static class CreateSeqDictReferenceArgumentCollection
    implements ReferenceArgumentCollection {
        @Argument(doc="Input reference fasta or fasta.gz", shortName="R")
        public PicardHtsPath REFERENCE;

        @Override
        public PicardHtsPath getHtsPath() {
            return this.REFERENCE;
        }

        @Override
        public File getReferenceFile() {
            return ReferenceArgumentCollection.getFileSafe(this.REFERENCE, logger);
        }
    }
}

