/*
 * Decompiled with CFR 0.152.
 */
package net.java.sen.compiler;

import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.ShortBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Vector;
import net.java.sen.compiler.CostMatrixBuilder;
import net.java.sen.compiler.StringCTokenTuple;
import net.java.sen.compiler.VirtualTupleList;
import net.java.sen.dictionary.CToken;
import net.java.sen.dictionary.DictionaryUtil;
import net.java.sen.trie.TrieBuilder;
import net.java.sen.util.CSVData;
import net.java.sen.util.CSVParser;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class DictionaryBuilder {
    private static final String DICTIONARY_CSV_FILENAME = "dictionary.csv";
    private static final String CONNECTION_CSV_FILENAME = "connection.csv";
    private static final String CONNECTION_COST_DATA_FILENAME = "connectionCost.sen";
    private static final String PART_OF_SPEECH_DATA_FILENAME = "partOfSpeech.sen";
    private static final String PART_OF_SPEECH_INDEX_FILENAME = "posIndex.sen";
    private static final String TOKEN_DATA_FILENAME = "token.sen";
    private static final String TRIE_DATA_FILENAME = "trie.sen";
    private static final String HEADER_DATA_FILENAME = "header.sen";
    private static final short DEFAULT_CONNECTION_COST = 10000;
    private static final int PART_OF_SPEECH_START = 2;
    private static final int PART_OF_SPEECH_SIZE = 7;
    private static final String BOS_PART_OF_SPEECH = "\u6587\u982d,*,*,*,*,*,*";
    private static final String EOS_PART_OF_SPEECH = "\u6587\u672b,*,*,*,*,*,*";
    private static final String UNKNOWN_PART_OF_SPEECH = "\u540d\u8a5e,\u30b5\u5909\u63a5\u7d9a,*,*,*,*,*";

    private static short[] resize(short[] current) {
        short[] tmp = new short[(int)((double)current.length * 1.5)];
        System.arraycopy(current, 0, tmp, 0, current.length);
        return tmp;
    }

    private List<String> splitCompoundField(String compoundField) {
        ArrayList<String> splitFieldList;
        if (compoundField.length() == 0 || !(compoundField.charAt(0) == '{' && compoundField.indexOf(125) > 0)) {
            splitFieldList = new ArrayList<String>(1);
            splitFieldList.add(compoundField);
        } else {
            splitFieldList = new ArrayList(4);
            String[] parts = compoundField.split("[{}]");
            String tail = parts.length == 3 ? parts[2] : "";
            String[] heads = parts[1].split("/");
            for (int i = 0; i < heads.length; ++i) {
                splitFieldList.add(heads[i] + tail);
            }
        }
        return splitFieldList;
    }

    private void createPartOfSpeechDataFile(List<String> dictionaryCSVFilenames, String partOfSpeechDataFilename, String partOfSpeechIndexFilename, CostMatrixBuilder[] matrixBuilders, int partOfSpeechStart, int partOfSpeechSize, String charset, String bosPartOfSpeech, String eosPartOfSpeech, String unknownPartOfSpeech, VirtualTupleList dictionaryList, CToken[] standardCTokens) throws IOException {
        String[] csvValues = null;
        CSVData key_b = new CSVData();
        CSVData pos_b = new CSVData();
        DataOutputStream outputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(partOfSpeechDataFilename)));
        ArrayList<String> posIndex = new ArrayList<String>();
        ArrayList<String> conjTypeIndex = new ArrayList<String>();
        ArrayList<String> conjFormIndex = new ArrayList<String>();
        for (String dictionaryCSVFilename : dictionaryCSVFilenames) {
            CSVParser parser = new CSVParser(new FileInputStream(dictionaryCSVFilename), charset);
            while ((csvValues = parser.nextTokens()) != null) {
                char ch;
                int i;
                int i2;
                if (csvValues.length < partOfSpeechSize + partOfSpeechStart) {
                    throw new RuntimeException("format error:" + parser.currentLine());
                }
                key_b.clear();
                pos_b.clear();
                for (i2 = partOfSpeechStart; i2 < partOfSpeechStart + partOfSpeechSize; ++i2) {
                    key_b.append(csvValues[i2]);
                    pos_b.append(csvValues[i2]);
                }
                for (i2 = partOfSpeechStart + partOfSpeechSize; i2 < csvValues.length; ++i2) {
                    pos_b.append(csvValues[i2]);
                }
                CToken ctoken = new CToken();
                ctoken.rcAttr2 = (short)matrixBuilders[0].getDicId(key_b.toString());
                ctoken.rcAttr1 = (short)matrixBuilders[1].getDicId(key_b.toString());
                ctoken.lcAttr = (short)matrixBuilders[2].getDicId(key_b.toString());
                ctoken.partOfSpeechIndex = outputStream.size();
                ctoken.length = (short)csvValues[0].length();
                ctoken.cost = (short)Integer.parseInt(csvValues[1]);
                dictionaryList.add(csvValues[0], ctoken);
                StringBuilder partOfSpeechBuilder = new StringBuilder();
                for (int i3 = partOfSpeechStart; i3 < partOfSpeechStart + 4; ++i3) {
                    if (csvValues[i3].equals("*")) continue;
                    partOfSpeechBuilder.append(csvValues[i3]);
                    partOfSpeechBuilder.append("-");
                }
                String partOfSpeech = partOfSpeechBuilder.substring(0, partOfSpeechBuilder.length() - 1);
                String conjugationalType = csvValues[partOfSpeechStart + 4];
                String conjugationalForm = csvValues[partOfSpeechStart + 5];
                String basicForm = csvValues[partOfSpeechStart + 6];
                List<String> readings = this.splitCompoundField(csvValues[partOfSpeechStart + 7]);
                List<String> pronunciations = this.splitCompoundField(csvValues[partOfSpeechStart + 8]);
                int index = posIndex.indexOf(partOfSpeech);
                if (index < 0) {
                    index = posIndex.size();
                    posIndex.add(partOfSpeech);
                }
                DictionaryUtil.writeVInt(outputStream, index);
                index = conjTypeIndex.indexOf(conjugationalType);
                if (index < 0) {
                    index = conjTypeIndex.size();
                    conjTypeIndex.add(conjugationalType);
                }
                DictionaryUtil.writeVInt(outputStream, index);
                index = conjFormIndex.indexOf(conjugationalForm);
                if (index < 0) {
                    index = conjFormIndex.size();
                    conjFormIndex.add(conjugationalForm);
                }
                DictionaryUtil.writeVInt(outputStream, index);
                if (basicForm.equals(csvValues[0])) {
                    DictionaryUtil.writeVInt(outputStream, 0);
                } else {
                    DictionaryUtil.writeVInt(outputStream, basicForm.length());
                    outputStream.writeChars(basicForm);
                }
                int encoding = 0;
                for (String reading : readings) {
                    for (i = 0; i < reading.length(); ++i) {
                        ch = reading.charAt(i);
                        if (ch >= '\u30a0' && ch <= '\u30ff') continue;
                        encoding = 1;
                    }
                }
                for (String pronunciation : pronunciations) {
                    for (i = 0; i < pronunciation.length(); ++i) {
                        ch = pronunciation.charAt(i);
                        if (ch >= '\u30a0' && ch <= '\u30ff') continue;
                        encoding = 1;
                    }
                }
                DictionaryUtil.writeVInt(outputStream, readings.size() << 1 | encoding);
                for (int i4 = 0; i4 < readings.size(); ++i4) {
                    String reading;
                    reading = readings.get(i4);
                    String pronunciation = pronunciations.get(i4);
                    if (pronunciation.equals(reading)) {
                        DictionaryUtil.writeVInt(outputStream, reading.length() << 1 | 0);
                        if (encoding == 0) {
                            DictionaryUtil.writeKatakana(outputStream, reading);
                            continue;
                        }
                        outputStream.writeChars(reading);
                        continue;
                    }
                    DictionaryUtil.writeVInt(outputStream, reading.length() << 1 | 1);
                    if (encoding == 0) {
                        DictionaryUtil.writeKatakana(outputStream, reading);
                    } else {
                        outputStream.writeChars(reading);
                    }
                    DictionaryUtil.writeVInt(outputStream, pronunciation.length());
                    if (encoding == 0) {
                        DictionaryUtil.writeKatakana(outputStream, pronunciation);
                        continue;
                    }
                    outputStream.writeChars(pronunciation);
                }
            }
        }
        outputStream.close();
        DataOutputStream index = new DataOutputStream(new FileOutputStream(partOfSpeechIndexFilename));
        index.writeChar(posIndex.size());
        for (String pos : posIndex) {
            index.writeUTF(pos);
        }
        index.writeChar(conjTypeIndex.size());
        for (String conjType : conjTypeIndex) {
            index.writeUTF(conjType);
        }
        index.writeChar(conjFormIndex.size());
        for (String conjForm : conjFormIndex) {
            index.writeUTF(conjForm);
        }
        index.close();
        dictionaryList.sort();
        CToken bosCToken = new CToken();
        bosCToken.rcAttr2 = (short)matrixBuilders[0].getDicId(bosPartOfSpeech);
        bosCToken.rcAttr1 = (short)matrixBuilders[1].getDicId(bosPartOfSpeech);
        bosCToken.lcAttr = (short)matrixBuilders[2].getDicId(bosPartOfSpeech);
        standardCTokens[0] = bosCToken;
        CToken eosCToken = new CToken();
        eosCToken.rcAttr2 = (short)matrixBuilders[0].getDicId(eosPartOfSpeech);
        eosCToken.rcAttr1 = (short)matrixBuilders[1].getDicId(eosPartOfSpeech);
        eosCToken.lcAttr = (short)matrixBuilders[2].getDicId(eosPartOfSpeech);
        standardCTokens[1] = eosCToken;
        CToken unknownCToken = new CToken();
        unknownCToken.rcAttr2 = (short)matrixBuilders[0].getDicId(unknownPartOfSpeech);
        unknownCToken.rcAttr1 = (short)matrixBuilders[1].getDicId(unknownPartOfSpeech);
        unknownCToken.lcAttr = (short)matrixBuilders[2].getDicId(unknownPartOfSpeech);
        unknownCToken.partOfSpeechIndex = -1;
        standardCTokens[2] = unknownCToken;
    }

    private CostMatrixBuilder[] createConnectionCostFile(String connectionCSVFilename, String connectionCostDataFilename, short defaultCost, String charset) throws IOException {
        int i;
        String[] t;
        CostMatrixBuilder[] matrixBuilders = new CostMatrixBuilder[]{new CostMatrixBuilder(), new CostMatrixBuilder(), new CostMatrixBuilder()};
        Vector<String> rule1 = new Vector<String>();
        Vector<String> rule2 = new Vector<String>();
        Vector<String> rule3 = new Vector<String>();
        short[] scores = new short[30000];
        CSVParser parser = new CSVParser(new FileInputStream(connectionCSVFilename), charset);
        int line = 0;
        while ((t = parser.nextTokens()) != null) {
            if (t.length < 4) {
                throw new IOException("Connection cost CSV format error");
            }
            matrixBuilders[0].add(t[0]);
            rule1.add(t[0]);
            matrixBuilders[1].add(t[1]);
            rule2.add(t[1]);
            matrixBuilders[2].add(t[2]);
            rule3.add(t[2]);
            if (line == scores.length) {
                scores = DictionaryBuilder.resize(scores);
            }
            scores[line++] = (short)Integer.parseInt(t[3]);
        }
        matrixBuilders[0].build();
        matrixBuilders[1].build();
        matrixBuilders[2].build();
        int size1 = matrixBuilders[0].size();
        int size2 = matrixBuilders[1].size();
        int size3 = matrixBuilders[2].size();
        int ruleSize = rule1.size();
        MappedByteBuffer buffer = null;
        ShortBuffer shortBuffer = null;
        int matrixSizeBytes = size1 * size2 * size3 * 2;
        int headerSizeBytes = 6;
        RandomAccessFile file = new RandomAccessFile(connectionCostDataFilename, "rw");
        file.setLength(0L);
        file.writeShort(size1);
        file.writeShort(size2);
        file.writeShort(size3);
        file.setLength(headerSizeBytes + matrixSizeBytes);
        FileChannel indexChannel = file.getChannel();
        buffer = indexChannel.map(FileChannel.MapMode.READ_WRITE, headerSizeBytes, matrixSizeBytes);
        shortBuffer = buffer.asShortBuffer();
        indexChannel.close();
        for (i = 0; i < size1 * size2 * size3; ++i) {
            shortBuffer.put(i, defaultCost);
        }
        for (i = 0; i < ruleSize; ++i) {
            Vector<Integer> r1 = matrixBuilders[0].getRuleIdList((String)rule1.get(i));
            Vector<Integer> r2 = matrixBuilders[1].getRuleIdList((String)rule2.get(i));
            Vector<Integer> r3 = matrixBuilders[2].getRuleIdList((String)rule3.get(i));
            for (int ii1 : r1) {
                for (int ii2 : r2) {
                    for (int ii3 : r3) {
                        int position = size3 * (size2 * ii1 + ii2) + ii3;
                        shortBuffer.put(position, scores[i]);
                    }
                }
            }
        }
        buffer.force();
        return matrixBuilders;
    }

    private TrieData createTokenFile(String tokenDataFilename, CToken[] standardCTokens, VirtualTupleList tupleList) throws IOException {
        TrieData trieData = new TrieData();
        trieData.values = new int[tupleList.size()];
        trieData.keys = new String[tupleList.size()];
        trieData.size = 0;
        int spos = 0;
        int bsize = 0;
        String prev = "";
        DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tokenDataFilename)));
        CToken.write(out, standardCTokens[0]);
        CToken.write(out, standardCTokens[1]);
        CToken.write(out, standardCTokens[2]);
        for (int i = 0; i < trieData.keys.length; ++i) {
            StringCTokenTuple tuple = tupleList.get(i);
            String k = tuple.key;
            if (!prev.equals(k) && i != 0) {
                trieData.keys[trieData.size] = tupleList.get((int)spos).key;
                trieData.values[trieData.size] = bsize + (spos << 8);
                ++trieData.size;
                bsize = 1;
                spos = i;
            } else {
                ++bsize;
            }
            prev = tuple.key;
            CToken.write(out, tuple.value);
        }
        out.flush();
        out.close();
        trieData.keys[trieData.size] = tupleList.get((int)spos).key;
        trieData.values[trieData.size] = bsize + (spos << 8);
        ++trieData.size;
        return trieData;
    }

    private void createTrieFile(String trieDataFilename, TrieData trieData) throws IOException {
        TrieBuilder builder = new TrieBuilder(trieData.keys, trieData.values, trieData.size);
        builder.build(trieDataFilename);
    }

    private void createHeaderFile(String headerFilename) throws IOException {
        DataOutputStream os = new DataOutputStream(new FileOutputStream(headerFilename));
        os.writeInt((int)new File(CONNECTION_COST_DATA_FILENAME).length());
        os.writeInt((int)new File(PART_OF_SPEECH_DATA_FILENAME).length());
        os.writeInt((int)new File(TOKEN_DATA_FILENAME).length());
        os.writeInt((int)new File(TRIE_DATA_FILENAME).length());
        os.close();
    }

    public DictionaryBuilder(String[] customDictionaryCSVFilenames) throws IOException {
        ArrayList<String> dictionaryCSVFilenames = new ArrayList<String>();
        dictionaryCSVFilenames.add(DICTIONARY_CSV_FILENAME);
        dictionaryCSVFilenames.addAll(Arrays.asList(customDictionaryCSVFilenames));
        String charset = "UTF-8";
        CostMatrixBuilder[] matrixBuilders = this.createConnectionCostFile(CONNECTION_CSV_FILENAME, CONNECTION_COST_DATA_FILENAME, (short)10000, charset);
        VirtualTupleList dictionaryList = new VirtualTupleList();
        CToken[] standardCTokens = new CToken[3];
        this.createPartOfSpeechDataFile(dictionaryCSVFilenames, PART_OF_SPEECH_DATA_FILENAME, PART_OF_SPEECH_INDEX_FILENAME, matrixBuilders, 2, 7, charset, BOS_PART_OF_SPEECH, EOS_PART_OF_SPEECH, UNKNOWN_PART_OF_SPEECH, dictionaryList, standardCTokens);
        matrixBuilders = null;
        TrieData trieData = this.createTokenFile(TOKEN_DATA_FILENAME, standardCTokens, dictionaryList);
        dictionaryList = null;
        this.createTrieFile(TRIE_DATA_FILENAME, trieData);
        this.createHeaderFile(HEADER_DATA_FILENAME);
    }

    private static class TrieData {
        public String[] keys;
        public int[] values;
        public int size;

        private TrieData() {
        }
    }
}

