package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.process.WordSegmenter;
import edu.stanford.nlp.process.WordSegmentingTokenizer;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.international.pennchinese.ChineseEscaper;
import edu.stanford.nlp.trees.international.pennchinese.ChineseTreebankLanguagePack;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InvalidClassException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.StreamCorruptedException;
import java.io.Writer;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ChineseLexiconAndWordSegmenter.class */
public class ChineseLexiconAndWordSegmenter implements Lexicon, WordSegmenter {
    private final ChineseLexicon chineseLexicon;
    private final WordSegmenter wordSegmenter;
    private Options op;
    private static final long serialVersionUID = -6554995189795187918L;

    public ChineseLexiconAndWordSegmenter(ChineseLexicon chineseLexicon, WordSegmenter wordSegmenter) {
        this.chineseLexicon = chineseLexicon;
        this.wordSegmenter = wordSegmenter;
        ChineseTreebankLanguagePack.setTokenizerFactory(WordSegmentingTokenizer.factory(wordSegmenter));
    }

    @Override // edu.stanford.nlp.process.WordSegmenter
    public ArrayList<Word> segmentWords(String str) {
        return this.wordSegmenter.segmentWords(str);
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public boolean isKnown(int i) {
        return this.chineseLexicon.isKnown(i);
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public boolean isKnown(String str) {
        return this.chineseLexicon.isKnown(str);
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public Iterator<IntTaggedWord> ruleIteratorByWord(int i, int i2, String str) {
        return this.chineseLexicon.ruleIteratorByWord(i, i2, null);
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public int numRules() {
        return this.chineseLexicon.numRules();
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public void train(Collection<Tree> collection) {
        this.chineseLexicon.train(collection);
        this.wordSegmenter.train(collection);
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public float score(IntTaggedWord intTaggedWord, int i) {
        return this.chineseLexicon.score(intTaggedWord, i);
    }

    @Override // edu.stanford.nlp.process.WordSegmenter
    public void loadSegmenter(String str) {
        throw new UnsupportedOperationException();
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public void readData(BufferedReader bufferedReader) throws IOException {
        this.chineseLexicon.readData(bufferedReader);
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public void writeData(Writer writer) throws IOException {
        this.chineseLexicon.writeData(writer);
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        ChineseTreebankLanguagePack.setTokenizerFactory(WordSegmentingTokenizer.factory(this.wordSegmenter));
    }

    private static int numSubArgs(String[] strArr, int i) {
        int i2 = i;
        while (i2 + 1 < strArr.length && strArr[i2 + 1].charAt(0) != '-') {
            i2++;
        }
        return i2 - i;
    }

    public ChineseLexiconAndWordSegmenter(Treebank treebank, Options options) {
        ChineseLexiconAndWordSegmenter segmenterDataFromTreebank = getSegmenterDataFromTreebank(treebank, options);
        this.chineseLexicon = segmenterDataFromTreebank.chineseLexicon;
        this.wordSegmenter = segmenterDataFromTreebank.wordSegmenter;
    }

    private static ChineseLexiconAndWordSegmenter getSegmenterDataFromTreebank(Treebank treebank, Options options) {
        TreeAnnotatorAndBinarizer treeAnnotatorAndBinarizer;
        System.out.println("Currently " + new Date());
        Timing.startTime();
        TreebankLangParserParams treebankLangParserParams = options.tlpParams;
        if (Test.verbose) {
            System.out.print("Training ");
            System.out.println(treebank.textualSummary());
        }
        System.out.print("Binarizing trees...");
        if (Train.leftToRight) {
            treeAnnotatorAndBinarizer = new TreeAnnotatorAndBinarizer(treebankLangParserParams.headFinder(), new LeftHeadFinder(), treebankLangParserParams, options.forceCNF, !Train.outsideFactor(), true);
        } else {
            treeAnnotatorAndBinarizer = new TreeAnnotatorAndBinarizer(treebankLangParserParams, options.forceCNF, !Train.outsideFactor(), true);
        }
        CollinsPuncTransformer collinsPuncTransformer = null;
        if (Train.collinsPunc) {
            collinsPuncTransformer = new CollinsPuncTransformer(treebankLangParserParams.treebankLanguagePack());
        }
        ArrayList arrayList = new ArrayList();
        if (Train.selectiveSplit) {
            Train.splitters = ParentAnnotationStats.getSplitCategories(treebank, true, 0, Train.selectiveSplitCutOff, Train.tagSelectiveSplitCutOff, treebankLangParserParams.treebankLanguagePack());
            if (Test.verbose) {
                System.err.println("Parent split categories: " + Train.splitters);
            }
        }
        if (Train.selectivePostSplit) {
            Train.postSplitters = ParentAnnotationStats.getSplitCategories(treebank.transform(new TreeAnnotator(treebankLangParserParams.headFinder(), treebankLangParserParams)), true, 0, Train.selectivePostSplitCutOff, Train.tagSelectivePostSplitCutOff, treebankLangParserParams.treebankLanguagePack());
            if (Test.verbose) {
                System.err.println("Parent post annotation split categories: " + Train.postSplitters);
            }
        }
        if (Train.hSelSplit) {
            treeAnnotatorAndBinarizer.setDoSelectiveSplit(false);
            Iterator<Tree> it = treebank.iterator();
            while (it.hasNext()) {
                Tree next = it.next();
                if (Train.collinsPunc) {
                    next = collinsPuncTransformer.transformTree(next);
                }
                treeAnnotatorAndBinarizer.transformTree(next);
            }
            treeAnnotatorAndBinarizer.setDoSelectiveSplit(true);
        }
        Iterator<Tree> it2 = treebank.iterator();
        while (it2.hasNext()) {
            Tree next2 = it2.next();
            if (Train.collinsPunc) {
                next2 = collinsPuncTransformer.transformTree(next2);
            }
            arrayList.add(treeAnnotatorAndBinarizer.transformTree(next2));
        }
        Timing.tick("done.");
        if (Test.verbose) {
            treeAnnotatorAndBinarizer.dumpStats();
        }
        System.out.print("Extracting Lexicon...");
        ChineseLexiconAndWordSegmenter chineseLexiconAndWordSegmenter = (ChineseLexiconAndWordSegmenter) options.tlpParams.lex(options.lexOptions);
        chineseLexiconAndWordSegmenter.train(arrayList);
        Timing.tick("done.");
        return chineseLexiconAndWordSegmenter;
    }

    private static void printArgs(String[] strArr, PrintStream printStream) {
        printStream.print("ChineseLexiconAndWordSegmenter invoked with arguments:");
        for (String str : strArr) {
            printStream.print(" " + str);
        }
        printStream.println();
    }

    static void saveSegmenterDataToSerialized(ChineseLexiconAndWordSegmenter chineseLexiconAndWordSegmenter, String str) {
        try {
            System.err.print("Writing segmenter in serialized format to file " + str + " ");
            ObjectOutputStream writeStreamFromString = IOUtils.writeStreamFromString(str);
            writeStreamFromString.writeObject(chineseLexiconAndWordSegmenter);
            writeStreamFromString.close();
            System.err.println("done.");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    static void saveSegmenterDataToText(ChineseLexiconAndWordSegmenter chineseLexiconAndWordSegmenter, String str) {
        try {
            System.err.print("Writing parser in text grammar format to file " + str);
            PrintWriter printWriter = new PrintWriter(str.endsWith(".gz") ? new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(str))) : new BufferedOutputStream(new FileOutputStream(str)));
            printWriter.println(String.valueOf("BEGIN ") + "LEXICON");
            if (chineseLexiconAndWordSegmenter != null) {
                chineseLexiconAndWordSegmenter.writeData(printWriter);
            }
            printWriter.println();
            System.err.print(".");
            printWriter.flush();
            printWriter.close();
            System.err.println("done.");
        } catch (IOException e) {
            System.err.println("Trouble saving segmenter data to ASCII format.");
            e.printStackTrace();
        }
    }

    private static Treebank makeTreebank(String str, Options options, FileFilter fileFilter) {
        System.err.println("Training a segmenter from treebank dir: " + str);
        MemoryTreebank memoryTreebank = options.tlpParams.memoryTreebank();
        System.err.print("Reading trees...");
        if (fileFilter == null) {
            memoryTreebank.loadPath(str);
        } else {
            memoryTreebank.loadPath(str, fileFilter);
        }
        Timing.tick("done [read " + memoryTreebank.size() + " trees].");
        return memoryTreebank;
    }

    public ChineseLexiconAndWordSegmenter(String str, Options options) {
        ChineseLexiconAndWordSegmenter segmenterDataFromFile = getSegmenterDataFromFile(str, options);
        this.op = segmenterDataFromFile.op;
        this.chineseLexicon = segmenterDataFromFile.chineseLexicon;
        this.wordSegmenter = segmenterDataFromFile.wordSegmenter;
    }

    public static ChineseLexiconAndWordSegmenter getSegmenterDataFromFile(String str, Options options) {
        return getSegmenterDataFromSerializedFile(str);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v27, types: [java.io.InputStream] */
    protected static ChineseLexiconAndWordSegmenter getSegmenterDataFromSerializedFile(String str) {
        try {
            System.err.print("Loading segmenter from serialized file " + str + " ...");
            FileInputStream inputStream = str.startsWith("http://") ? new URL(str).openConnection().getInputStream() : new FileInputStream(str);
            ObjectInputStream objectInputStream = str.endsWith(".gz") ? new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(inputStream))) : new ObjectInputStream(new BufferedInputStream(inputStream));
            ChineseLexiconAndWordSegmenter chineseLexiconAndWordSegmenter = (ChineseLexiconAndWordSegmenter) objectInputStream.readObject();
            objectInputStream.close();
            System.err.println(" done.");
            return chineseLexiconAndWordSegmenter;
        } catch (FileNotFoundException e) {
            System.err.println();
            throw new RuntimeException(e);
        } catch (InvalidClassException e2) {
            System.err.println();
            throw new RuntimeException(e2);
        } catch (StreamCorruptedException e3) {
            return null;
        } catch (Exception e4) {
            System.err.println();
            e4.printStackTrace();
            return null;
        }
    }

    public static void main(String[] strArr) {
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        String str = null;
        String str2 = null;
        String str3 = null;
        String str4 = null;
        MemoryTreebank memoryTreebank = null;
        String str5 = null;
        FileFilter fileFilter = null;
        FileFilter fileFilter2 = null;
        new ChineseEscaper();
        int i = 0;
        if (strArr.length < 1) {
            System.err.println("usage: java edu.stanford.nlp.parser.lexparser.LexicalizedParser parserFileOrUrl filename*");
            System.exit(1);
        }
        Options options = new Options();
        options.tlpParams = new ChineseTreebankParserParams();
        while (i < strArr.length && strArr[i].charAt(0) == '-') {
            if (strArr[i].equalsIgnoreCase("-train")) {
                z = true;
                z2 = true;
                int numSubArgs = numSubArgs(strArr, i);
                int i2 = i + 1;
                if (numSubArgs <= 1) {
                    throw new RuntimeException("Error: -train option must have treebankPath as first argument.");
                }
                str4 = strArr[i2];
                i = i2 + 1;
                if (numSubArgs == 2) {
                    i++;
                    fileFilter2 = new NumberRangesFileFilter(strArr[i], true);
                } else if (numSubArgs >= 3) {
                    try {
                        fileFilter2 = new NumberRangeFileFilter(Integer.parseInt(strArr[i]), Integer.parseInt(strArr[i + 1]), true);
                        i += 2;
                    } catch (NumberFormatException e) {
                        fileFilter2 = new NumberRangesFileFilter(strArr[i], true);
                        i++;
                    }
                }
            } else if (strArr[i].equalsIgnoreCase("-encoding")) {
                String str6 = strArr[i + 1];
                options.tlpParams.setInputEncoding(str6);
                options.tlpParams.setOutputEncoding(str6);
                i += 2;
            } else if (strArr[i].equalsIgnoreCase("-loadFromSerializedFile")) {
                str = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equalsIgnoreCase("-saveToSerializedFile")) {
                z2 = true;
                str2 = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equalsIgnoreCase("-saveToTextFile")) {
                z3 = true;
                str3 = strArr[i + 1];
                i += 2;
            } else if (strArr[i].equalsIgnoreCase("-treebank")) {
                int numSubArgs2 = numSubArgs(strArr, i);
                i++;
                if (numSubArgs2 == 1) {
                    i++;
                    fileFilter = new NumberRangesFileFilter(strArr[i], true);
                } else if (numSubArgs2 > 1) {
                    i++;
                    str5 = strArr[i];
                    if (numSubArgs2 == 2) {
                        i++;
                        fileFilter = new NumberRangesFileFilter(strArr[i], true);
                    } else if (numSubArgs2 >= 3) {
                        try {
                            fileFilter = new NumberRangeFileFilter(Integer.parseInt(strArr[i]), Integer.parseInt(strArr[i + 1]), true);
                            i += 2;
                        } catch (NumberFormatException e2) {
                            int i3 = i;
                            i++;
                            fileFilter = new NumberRangesFileFilter(strArr[i3], true);
                        }
                    }
                }
            } else {
                int optionFlag = options.tlpParams.setOptionFlag(strArr, i);
                if (optionFlag == i) {
                    System.err.println("Unknown option ignored: " + strArr[i]);
                    optionFlag++;
                }
                i = optionFlag;
            }
        }
        TreebankLangParserParams treebankLangParserParams = options.tlpParams;
        ChineseLexiconAndWordSegmenter chineseLexiconAndWordSegmenter = null;
        if (!z && Test.verbose) {
            System.out.println("Currently " + new Date());
            printArgs(strArr, System.out);
        }
        if (z) {
            printArgs(strArr, System.out);
            if (str4 == null) {
                str4 = strArr[i];
                i++;
                if (strArr.length > i + 1) {
                    try {
                        fileFilter2 = new NumberRangeFileFilter(Integer.parseInt(strArr[i]), Integer.parseInt(strArr[i + 1]), true);
                        i += 2;
                    } catch (NumberFormatException e3) {
                        fileFilter2 = new NumberRangesFileFilter(strArr[i], true);
                        i++;
                    }
                }
            }
            chineseLexiconAndWordSegmenter = new ChineseLexiconAndWordSegmenter(makeTreebank(str4, options, fileFilter2), options);
        } else {
            if (str == null) {
                str = strArr[i];
                i++;
            }
            try {
                chineseLexiconAndWordSegmenter = new ChineseLexiconAndWordSegmenter(str, options);
            } catch (IllegalArgumentException e4) {
                System.err.println("Error loading segmenter, exiting...");
                System.exit(0);
            }
        }
        Test.treePrint(treebankLangParserParams);
        if (fileFilter != null) {
            if (str5 == null) {
                if (str4 == null) {
                    throw new RuntimeException("No test treebank path specified...");
                }
                System.err.println("No test treebank path specified.  Using train path: \"" + str4 + "\"");
                str5 = str4;
            }
            memoryTreebank = treebankLangParserParams.testMemoryTreebank();
            memoryTreebank.loadPath(str5, fileFilter);
        }
        Train.sisterSplitters = new HashSet(Arrays.asList(treebankLangParserParams.sisterSplitters()));
        if (Test.verbose) {
            System.err.println("Lexicon is " + chineseLexiconAndWordSegmenter.getClass().getName());
        }
        treebankLangParserParams.pw();
        treebankLangParserParams.pw(System.err);
        if (z3) {
            if (str3 != null) {
                saveSegmenterDataToText(chineseLexiconAndWordSegmenter, str3);
            } else {
                System.err.println("Usage: must specify a text segmenter data output path");
            }
        }
        if (z2) {
            if (str2 == null && i < strArr.length) {
                str2 = strArr[i];
                i++;
            }
            if (str2 != null) {
                saveSegmenterDataToSerialized(chineseLexiconAndWordSegmenter, str2);
            } else if (str3 == null && memoryTreebank == null) {
                System.err.println("usage: java edu.stanford.nlp.parser.lexparser.ChineseLexiconAndWordSegmenter-train trainFilesPath [start stop] serializedParserFilename");
            }
        }
        boolean z4 = Test.verbose;
        if ((memoryTreebank != null || (i < strArr.length && strArr[i].equalsIgnoreCase("-treebank"))) && memoryTreebank == null) {
            MemoryTreebank testMemoryTreebank = treebankLangParserParams.testMemoryTreebank();
            if (strArr.length < i + 4) {
                testMemoryTreebank.loadPath(strArr[i + 1]);
                return;
            }
            testMemoryTreebank.loadPath(strArr[i + 1], new NumberRangeFileFilter(Integer.parseInt(strArr[i + 2]), Integer.parseInt(strArr[i + 3]), true));
        }
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public UnknownWordModel getUnknownWordModel() {
        return this.chineseLexicon.getUnknownWordModel();
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public void setUnknownWordModel(UnknownWordModel unknownWordModel) {
        this.chineseLexicon.setUnknownWordModel(unknownWordModel);
    }
}
