/*
 * Decompiled with CFR 0.152.
 */
package org.forester.application;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.math.RoundingMode;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.List;
import org.forester.io.parsers.FastaParser;
import org.forester.io.parsers.GeneralMsaParser;
import org.forester.msa.DeleteableMsa;
import org.forester.msa.Msa;
import org.forester.msa.MsaInferrer;
import org.forester.msa.MsaMethods;
import org.forester.msa_compactor.Chart;
import org.forester.msa_compactor.MsaCompactor;
import org.forester.msa_compactor.MsaProperties;
import org.forester.util.CommandLineArguments;
import org.forester.util.DescriptiveStatistics;
import org.forester.util.ForesterUtil;

public class msa_compactor {
    private static final NumberFormat NF_1 = new DecimalFormat("0.#");
    private static final NumberFormat NF_4 = new DecimalFormat("0.####");
    private static final String HELP_OPTION_1 = "help";
    private static final String HELP_OPTION_2 = "h";
    private static final String REMOVE_WORST_OFFENDERS_OPTION = "r";
    private static final String AV_GAPINESS_OPTION = "g";
    private static final String STEP_OPTION = "s";
    private static final String LENGTH_OPTION = "l";
    private static final String REALIGN_OPTION = "a";
    private static final String INFO_ONLY_OPTION = "i";
    private static final String STEP_FOR_DIAGNOSTICS_OPTION = "sd";
    private static final String MIN_LENGTH_OPTION = "ml";
    private static final String GAP_RATIO_LENGTH_OPTION = "gr";
    private static final String REPORT_ENTROPY = "e";
    private static final String OUTPUT_FORMAT_OPTION = "f";
    private static final String OUTPUT_REMOVED_SEQS_OPTION = "ro";
    private static final String MAFFT_OPTIONS = "mo";
    private static final String PERFORM_PHYLOGENETIC_INFERENCE = "t";
    private static final String PATH_TO_MAFFT_OPTION = "mafft";
    private static final String DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION = "nn";
    private static final String PRG_NAME = "msa_compactor";
    private static final String PRG_DESC = "multiple sequence aligment compactor";
    private static final String PRG_VERSION = "0.3";
    private static final String PRG_DATE = "140508";
    private static final String E_MAIL = "czmasek@sanfordburham.org";
    private static final String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";

    public static void main(String[] stringArray) {
        try {
            String string;
            boolean bl;
            CommandLineArguments commandLineArguments = new CommandLineArguments(stringArray);
            if (commandLineArguments.isOptionSet(HELP_OPTION_1) || commandLineArguments.isOptionSet(HELP_OPTION_2) || commandLineArguments.getNumberOfNames() < 1 || commandLineArguments.getNumberOfNames() > 2) {
                msa_compactor.printHelp();
                System.exit(0);
            }
            File file = commandLineArguments.getFile(0);
            File file2 = null;
            if (commandLineArguments.getNumberOfNames() > 1) {
                file2 = commandLineArguments.getFile(1);
            }
            int n = -1;
            double d = -1.0;
            int n2 = -1;
            int n3 = 1;
            boolean bl2 = false;
            boolean bl3 = true;
            String string2 = null;
            int n4 = 1;
            int n5 = -1;
            double d2 = -1.0;
            boolean bl4 = false;
            Msa.MSA_FORMAT mSA_FORMAT = Msa.MSA_FORMAT.FASTA;
            File file3 = null;
            String string3 = "--auto";
            boolean bl5 = false;
            ArrayList<String> arrayList = new ArrayList<String>();
            arrayList.add(REMOVE_WORST_OFFENDERS_OPTION);
            arrayList.add(AV_GAPINESS_OPTION);
            arrayList.add(LENGTH_OPTION);
            arrayList.add(REALIGN_OPTION);
            arrayList.add(DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION);
            arrayList.add(STEP_OPTION);
            arrayList.add(PATH_TO_MAFFT_OPTION);
            arrayList.add(STEP_FOR_DIAGNOSTICS_OPTION);
            arrayList.add(MIN_LENGTH_OPTION);
            arrayList.add(GAP_RATIO_LENGTH_OPTION);
            arrayList.add(REPORT_ENTROPY);
            arrayList.add(OUTPUT_FORMAT_OPTION);
            arrayList.add(OUTPUT_REMOVED_SEQS_OPTION);
            arrayList.add(MAFFT_OPTIONS);
            arrayList.add(PERFORM_PHYLOGENETIC_INFERENCE);
            arrayList.add(INFO_ONLY_OPTION);
            String string4 = commandLineArguments.validateAllowedOptionsAsString(arrayList);
            if (string4.length() > 0) {
                ForesterUtil.fatalError(PRG_NAME, "unknown option(s): " + string4);
            }
            DeleteableMsa deleteableMsa = null;
            FileInputStream fileInputStream = new FileInputStream(file);
            deleteableMsa = FastaParser.isLikelyFasta(file) ? DeleteableMsa.createInstance(FastaParser.parseMsa(fileInputStream)) : DeleteableMsa.createInstance(GeneralMsaParser.parseMsa(fileInputStream));
            DescriptiveStatistics descriptiveStatistics = MsaMethods.calculateEffectiveLengthStatistics(deleteableMsa);
            if (commandLineArguments.isOptionSet(INFO_ONLY_OPTION)) {
                msa_compactor.printInfo(file, deleteableMsa, descriptiveStatistics);
                System.exit(0);
            }
            boolean bl6 = bl = !commandLineArguments.isOptionSet(LENGTH_OPTION) && !commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION) && !commandLineArguments.isOptionSet(AV_GAPINESS_OPTION) && !commandLineArguments.isOptionSet(MIN_LENGTH_OPTION);
            if (!bl && file2 == null) {
                ForesterUtil.fatalError(PRG_NAME, "outfile file missing");
            }
            if (commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION) && ((n = commandLineArguments.getOptionValueAsInt(REMOVE_WORST_OFFENDERS_OPTION)) < 1 || n >= deleteableMsa.getNumberOfSequences() - 1)) {
                ForesterUtil.fatalError(PRG_NAME, "number of worst offender sequences to remove is out of range: " + n);
            }
            if (commandLineArguments.isOptionSet(AV_GAPINESS_OPTION)) {
                if (commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION)) {
                    msa_compactor.printHelp();
                    System.exit(0);
                }
                if ((d = commandLineArguments.getOptionValueAsDouble(AV_GAPINESS_OPTION)) < 0.0 || d >= 1.0) {
                    ForesterUtil.fatalError(PRG_NAME, "target gap-ratio is out of range: " + d);
                }
            }
            if (commandLineArguments.isOptionSet(LENGTH_OPTION)) {
                if (commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION) || commandLineArguments.isOptionSet(AV_GAPINESS_OPTION)) {
                    msa_compactor.printHelp();
                    System.exit(0);
                }
                if ((n2 = commandLineArguments.getOptionValueAsInt(LENGTH_OPTION)) >= deleteableMsa.getLength()) {
                    ForesterUtil.fatalError(PRG_NAME, "target length is out of range [longer than MSA (" + deleteableMsa.getLength() + ")]: " + n2);
                } else if ((double)n2 < descriptiveStatistics.getMin()) {
                    ForesterUtil.fatalError(PRG_NAME, "target length is out of range [shorter than the shortest sequence (" + descriptiveStatistics.getMin() + ") ]: " + n2);
                }
            }
            if (commandLineArguments.isOptionSet(MIN_LENGTH_OPTION)) {
                if (commandLineArguments.isOptionSet(LENGTH_OPTION) || commandLineArguments.isOptionSet(REMOVE_WORST_OFFENDERS_OPTION) || commandLineArguments.isOptionSet(AV_GAPINESS_OPTION) || commandLineArguments.isOptionSet(STEP_OPTION) || commandLineArguments.isOptionSet(REALIGN_OPTION) || commandLineArguments.isOptionSet(PATH_TO_MAFFT_OPTION) || commandLineArguments.isOptionSet(STEP_FOR_DIAGNOSTICS_OPTION) || commandLineArguments.isOptionSet(REPORT_ENTROPY) || commandLineArguments.isOptionSet(OUTPUT_REMOVED_SEQS_OPTION) || commandLineArguments.isOptionSet(PERFORM_PHYLOGENETIC_INFERENCE)) {
                    msa_compactor.printHelp();
                    System.exit(0);
                }
                if ((n5 = commandLineArguments.getOptionValueAsInt(MIN_LENGTH_OPTION)) < 2 || (double)n5 > descriptiveStatistics.getMax()) {
                    ForesterUtil.fatalError(PRG_NAME, "value for minimal sequence length is out of range: " + n5);
                }
            }
            if (commandLineArguments.isOptionSet(STEP_OPTION) && ((n3 = commandLineArguments.getOptionValueAsInt(STEP_OPTION)) < 1 || n3 > deleteableMsa.getNumberOfSequences() || n > 0 && n3 > n)) {
                ForesterUtil.fatalError(PRG_NAME, "value for step is out of range: " + n3);
            }
            if (commandLineArguments.isOptionSet(REALIGN_OPTION)) {
                bl2 = true;
            }
            if (commandLineArguments.isOptionSet(PATH_TO_MAFFT_OPTION)) {
                if (!bl2) {
                    ForesterUtil.fatalError(PRG_NAME, "no need to indicate path to MAFFT without realigning");
                }
                string2 = commandLineArguments.getOptionValueAsCleanString(PATH_TO_MAFFT_OPTION);
            }
            if (commandLineArguments.isOptionSet(DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION)) {
                bl3 = false;
            }
            if (commandLineArguments.isOptionSet(STEP_FOR_DIAGNOSTICS_OPTION) && ((n4 = commandLineArguments.getOptionValueAsInt(STEP_FOR_DIAGNOSTICS_OPTION)) < 1 || n4 > deleteableMsa.getNumberOfSequences() || n > 0 && n4 > n)) {
                ForesterUtil.fatalError(PRG_NAME, "value for diagnostic step is out of range: " + n4);
            }
            if (commandLineArguments.isOptionSet(GAP_RATIO_LENGTH_OPTION) && ((d2 = commandLineArguments.getOptionValueAsDouble(GAP_RATIO_LENGTH_OPTION)) < 0.0 || d2 > 1.0)) {
                ForesterUtil.fatalError(PRG_NAME, "gap ratio is out of range: " + d2);
            }
            if (commandLineArguments.isOptionSet(REPORT_ENTROPY)) {
                bl4 = true;
            }
            if (commandLineArguments.isOptionSet(OUTPUT_FORMAT_OPTION)) {
                string = commandLineArguments.getOptionValueAsCleanString(OUTPUT_FORMAT_OPTION);
                if (string.equalsIgnoreCase("p")) {
                    mSA_FORMAT = Msa.MSA_FORMAT.PHYLIP;
                } else if (string.equalsIgnoreCase(OUTPUT_FORMAT_OPTION)) {
                    mSA_FORMAT = Msa.MSA_FORMAT.FASTA;
                } else if (string.equalsIgnoreCase("n")) {
                    mSA_FORMAT = Msa.MSA_FORMAT.NEXUS;
                } else {
                    ForesterUtil.fatalError(PRG_NAME, "illegal or empty output format option: " + string);
                }
            }
            if (commandLineArguments.isOptionSet(OUTPUT_REMOVED_SEQS_OPTION)) {
                string = commandLineArguments.getOptionValueAsCleanString(OUTPUT_REMOVED_SEQS_OPTION);
                file3 = new File(string);
            }
            if (bl2) {
                if (ForesterUtil.isEmpty(string2)) {
                    string2 = MsaCompactor.guessPathToMafft();
                }
                msa_compactor.checkPathToMafft(string2);
                if (commandLineArguments.isOptionSet(MAFFT_OPTIONS) && (ForesterUtil.isEmpty(string3 = commandLineArguments.getOptionValueAsCleanString(MAFFT_OPTIONS)) || string3.length() < 3)) {
                    ForesterUtil.fatalError(PRG_NAME, "illegal or empty MAFFT options: " + string3);
                }
            } else if (commandLineArguments.isOptionSet(MAFFT_OPTIONS)) {
                ForesterUtil.fatalError(PRG_NAME, "no need to indicate MAFFT options without realigning");
            }
            if (commandLineArguments.isOptionSet(PERFORM_PHYLOGENETIC_INFERENCE)) {
                bl5 = true;
            }
            if (bl) {
                if (file2 != null || file3 != null) {
                    ForesterUtil.fatalError(PRG_NAME, "chart only, no outfile(s) produced, thus no need to indicate output file(s)");
                }
                if (!bl2 && commandLineArguments.isOptionSet(STEP_OPTION)) {
                    ForesterUtil.fatalError(PRG_NAME, "chart only, no re-aligning, thus no need to use step for output and re-aligning; use -sd instead");
                }
            }
            if (bl5 && n4 != 1) {
                ForesterUtil.fatalError(PRG_NAME, "step for diagnostics reports needs to be set to 1 for tree calculation");
            }
            msa_compactor.printInfo(file, deleteableMsa, descriptiveStatistics);
            if (!bl) {
                System.out.println("Output                               : " + file2);
            }
            if (file3 != null) {
                System.out.println("Write removed sequences to           : " + file3);
            }
            if (n > 0) {
                System.out.println("Number of worst offenders to remove  : " + n);
            }
            if (d > 0.0) {
                System.out.println("Target gap-ratio                     : " + d);
            }
            if (n2 > 0) {
                System.out.println("Target MSA length                    : " + n2);
            }
            if (n5 > 1) {
                System.out.println("Minimal effective sequence length    : " + n5);
            }
            if (d2 > -1.0) {
                System.out.println("Maximum allowed gap ratio per column : " + d2);
            }
            if (file2 != null || file3 != null) {
                System.out.print("Output format                        : ");
                if (mSA_FORMAT == Msa.MSA_FORMAT.FASTA) {
                    System.out.println("fasta");
                } else if (mSA_FORMAT == Msa.MSA_FORMAT.PHYLIP) {
                    System.out.println("phylip");
                } else if (mSA_FORMAT == Msa.MSA_FORMAT.NEXUS) {
                    System.out.println("nexus");
                }
            }
            if (n5 == -1) {
                if (bl && !bl2) {
                    System.out.println("Step for output and re-aligning      : n/a");
                } else if (bl) {
                    System.out.println("Step for re-aligning                 : " + n3);
                } else {
                    System.out.println("Step for output and re-aligning      : " + n3);
                }
                System.out.println("Step for diagnostics reports         : " + n4);
                System.out.println("Calculate normalized Shannon Entropy : " + bl4);
                if (bl3) {
                    System.out.println("Normalize                            : with individual, effective sequence lenghts");
                } else {
                    System.out.println("Normalize                            : with MSA length");
                }
                System.out.println("Realign with MAFFT                   : " + bl2);
                if (bl2) {
                    System.out.println("MAFFT options                        : " + string3);
                }
                System.out.println("Simple tree (Kimura distances, NJ)   : " + bl5);
            }
            System.out.println();
            int n6 = deleteableMsa.getNumberOfSequences();
            List<MsaProperties> list = null;
            MsaCompactor msaCompactor = new MsaCompactor(deleteableMsa);
            msaCompactor.setInfileName(file.getName());
            if (n > 0 || d > 0.0 || n2 > 0 || n5 != -1) {
                msaCompactor.setOutputFormat(mSA_FORMAT);
                msaCompactor.setOutFileBase(file2);
            }
            if (n5 != -1) {
                msaCompactor.removeSequencesByMinimalLength(n5);
            } else {
                msaCompactor.setPeformPhylogenticInference(bl5);
                if (file3 != null) {
                    msaCompactor.setRemovedSeqsOutBase(file3);
                }
                msaCompactor.setNorm(bl3);
                msaCompactor.setRealign(bl2);
                if (bl2) {
                    msaCompactor.setPathToMafft(string2);
                    msaCompactor.setMafftOptions(string3);
                }
                msaCompactor.setStep(n3);
                msaCompactor.setStepForDiagnostics(n4);
                msaCompactor.setCalculateNormalizedShannonEntropy(bl4);
                list = n > 0 ? msaCompactor.removeWorstOffenders(n) : (d > 0.0 ? msaCompactor.removeViaGapAverage(d) : (n2 > 0 ? msaCompactor.removeViaLength(n2) : msaCompactor.chart(n3, bl2, bl3)));
                Chart.display(list, n6, bl4, file.getName());
                System.out.println();
                System.out.println("Final MSA properties");
                msa_compactor.printMsaInfo(deleteableMsa, MsaMethods.calculateEffectiveLengthStatistics(deleteableMsa));
            }
        }
        catch (IllegalArgumentException illegalArgumentException) {
            ForesterUtil.fatalError(PRG_NAME, illegalArgumentException.getMessage());
        }
        catch (IOException iOException) {
            ForesterUtil.fatalError(PRG_NAME, iOException.getMessage());
        }
        catch (Exception exception) {
            ForesterUtil.unexpectedFatalError(PRG_NAME, exception);
        }
    }

    private static void printInfo(File file, DeleteableMsa deleteableMsa, DescriptiveStatistics descriptiveStatistics) {
        ForesterUtil.printProgramInformation(PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation());
        System.out.println("Input MSA                            : " + file);
        msa_compactor.printMsaInfo(deleteableMsa, descriptiveStatistics);
    }

    private static void printMsaInfo(DeleteableMsa deleteableMsa, DescriptiveStatistics descriptiveStatistics) {
        System.out.println("MSA length                           : " + deleteableMsa.getLength());
        System.out.println("Number of sequences                  : " + deleteableMsa.getNumberOfSequences());
        System.out.println("Median sequence length               : " + NF_1.format(descriptiveStatistics.median()));
        System.out.println("Mean sequence length                 : " + NF_1.format(descriptiveStatistics.arithmeticMean()));
        System.out.println("Max sequence length                  : " + (int)descriptiveStatistics.getMax());
        System.out.println("Min sequence length                  : " + (int)descriptiveStatistics.getMin());
        System.out.println("Gap ratio                            : " + NF_4.format(MsaMethods.calcGapRatio(deleteableMsa)));
        System.out.println("Mean gap count per sequence          : " + NF_1.format(MsaMethods.calcNumberOfGapsStats(deleteableMsa).arithmeticMean()));
        System.out.println("Normalized Shannon Entropy (entn7)   : " + NF_4.format(MsaMethods.calcNormalizedShannonsEntropy(7, deleteableMsa)));
        System.out.println("Normalized Shannon Entropy (entn21)  : " + NF_4.format(MsaMethods.calcNormalizedShannonsEntropy(21, deleteableMsa)));
    }

    private static void checkPathToMafft(String string) {
        if (ForesterUtil.isEmpty(string) || !MsaInferrer.isInstalled(string)) {
            if (ForesterUtil.isEmpty(string)) {
                ForesterUtil.fatalError(PRG_NAME, "no MAFFT executable found, use -\"mafft=<path to MAFFT>\" option");
            } else {
                ForesterUtil.fatalError(PRG_NAME, "no MAFFT executable at \"" + string + "\"");
            }
        }
    }

    private static void printHelp() {
        ForesterUtil.printProgramInformation(PRG_NAME, PRG_DESC, PRG_VERSION, PRG_DATE, E_MAIL, WWW, ForesterUtil.getForesterLibraryInformation());
        String string = MsaCompactor.guessPathToMafft();
        String string2 = !ForesterUtil.isEmpty(string) ? " (using " + string + ")" : " (no path to MAFFT found, use -\"mafft=<path to MAFFT>\" option";
        System.out.println("Usage:");
        System.out.println();
        System.out.println("msa_compactor [options] <msa input file> [output file base]");
        System.out.println();
        System.out.println(" options: ");
        System.out.println();
        System.out.println("   -i             to only display same basic information about the MSA");
        System.out.println("   -r=<integer>   number of worst offender sequences to remove");
        System.out.println("   -l=<integer>   target MSA length");
        System.out.println("   -g=<decimal>   target gap-ratio (0.0-1.0)");
        System.out.println("   -a             to realign using MAFFT" + string2);
        System.out.println("   -mo=<string>   options for MAFFT (default: --auto)");
        System.out.println("   -s=<integer>   step for output and re-aligning (default: 1)");
        System.out.println("   -sd=<integer>  step for diagnostics reports (default: 1)");
        System.out.println("   -e             to calculate normalized Shannon Entropy (not recommended for very large alignments)");
        System.out.println("   -f=<f|p|n>     format for output alignments: f for fasta (default), p for phylip, or n for nexus");
        System.out.println("   -ro=<file>     to output the removed sequences");
        System.out.println("   -ml=<integer>  minimal effecive sequence length (for deleting of shorter sequences)");
        System.out.println("   -gr=<decimal>  maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)");
        System.out.println("   -t             to calculate a simple phylogenetic tree (Kimura distances, NJ)");
        System.out.println("   -nn            to normalize gap-contributions with MSA length, instead of individual effective sequence lenghts");
        System.out.println();
        System.out.println();
        System.out.println();
    }

    static {
        NF_1.setRoundingMode(RoundingMode.HALF_UP);
        NF_4.setRoundingMode(RoundingMode.HALF_UP);
    }
}

