package edu.stanford.nlp.sequences;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.XMLBeginEndIterator;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/sequences/TrueCasingDocumentReaderAndWriter.class */
public class TrueCasingDocumentReaderAndWriter implements DocumentReaderAndWriter<CoreLabel> {
    private static final long serialVersionUID = 1731527027473052481L;
    private static final Pattern sgml = Pattern.compile("<[^>]*>");
    private static final Pattern allLower = Pattern.compile("[^A-Z]*?[a-z]+[^A-Z]*?");
    private static final Pattern allUpper = Pattern.compile("[^a-z]*?[A-Z]+[^a-z]*?");
    private static final Pattern startUpper = Pattern.compile("[A-Z].*");
    private static WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor<>();
    public static Set<String> knownWords;

    @Override // edu.stanford.nlp.sequences.DocumentReaderAndWriter
    public void init(SeqClassifierFlags seqClassifierFlags) {
    }

    public static boolean known(String str) {
        return knownWords.contains(str.toLowerCase());
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // edu.stanford.nlp.objectbank.IteratorFromReaderFactory
    public Iterator<List<CoreLabel>> getIterator(Reader reader) {
        ArrayList<List> arrayList = new ArrayList();
        String slurpReader = IOUtils.slurpReader(reader);
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        XMLBeginEndIterator xMLBeginEndIterator = new XMLBeginEndIterator(new StringReader(slurpReader), "TEXT");
        while (xMLBeginEndIterator.hasNext()) {
            PTBTokenizer<CoreLabel> newPTBTokenizer = PTBTokenizer.newPTBTokenizer(new StringReader((String) xMLBeginEndIterator.next()), false, true);
            ArrayList arrayList2 = new ArrayList();
            HashSet<String> hashSet3 = new HashSet();
            while (newPTBTokenizer.hasNext()) {
                CoreLabel coreLabel = (CoreLabel) newPTBTokenizer.next();
                hashSet3.add(coreLabel.word().toLowerCase());
                if (!sgml.matcher(coreLabel.word()).matches()) {
                    arrayList2.add(coreLabel);
                } else if (arrayList2.size() > 0) {
                    arrayList.addAll(wts.process(arrayList2));
                    arrayList2 = new ArrayList();
                }
            }
            if (arrayList2.size() > 0) {
                arrayList.addAll(wts.process(arrayList2));
            }
            for (String str : hashSet3) {
                if (!hashSet2.contains(str)) {
                    if (hashSet.contains(str)) {
                        hashSet.remove(str);
                        hashSet2.add(str);
                    } else {
                        hashSet.add(str);
                    }
                }
            }
        }
        XMLBeginEndIterator xMLBeginEndIterator2 = new XMLBeginEndIterator(new StringReader(slurpReader), "TXT");
        while (xMLBeginEndIterator2.hasNext()) {
            PTBTokenizer<CoreLabel> newPTBTokenizer2 = PTBTokenizer.newPTBTokenizer(new StringReader((String) xMLBeginEndIterator2.next()), false, true);
            ArrayList arrayList3 = new ArrayList();
            HashSet<String> hashSet4 = new HashSet();
            while (newPTBTokenizer2.hasNext()) {
                CoreLabel coreLabel2 = (CoreLabel) newPTBTokenizer2.next();
                hashSet4.add(coreLabel2.word().toLowerCase());
                if (!sgml.matcher(coreLabel2.word()).matches()) {
                    arrayList3.add(coreLabel2);
                } else if (arrayList3.size() > 0) {
                    arrayList.addAll(wts.process(arrayList3));
                    arrayList3 = new ArrayList();
                }
            }
            if (arrayList3.size() > 0) {
                arrayList.addAll(wts.process(arrayList3));
            }
            for (String str2 : hashSet4) {
                if (!hashSet2.contains(str2)) {
                    if (hashSet.contains(str2)) {
                        hashSet.remove(str2);
                        hashSet2.add(str2);
                    } else {
                        hashSet.add(str2);
                    }
                }
            }
        }
        knownWords = hashSet2;
        knownWords.addAll(hashSet);
        ArrayList arrayList4 = new ArrayList();
        for (List<CoreLabel> list : arrayList) {
            System.err.println(list);
            ArrayList arrayList5 = new ArrayList();
            int i = 0;
            for (CoreLabel coreLabel3 : list) {
                CoreLabel coreLabel4 = new CoreLabel();
                if (allLower.matcher(coreLabel3.word()).matches()) {
                    coreLabel4.set(CoreAnnotations.AnswerAnnotation.class, "LOWER");
                } else if (allUpper.matcher(coreLabel3.word()).matches()) {
                    coreLabel4.set(CoreAnnotations.AnswerAnnotation.class, "UPPER");
                } else if (startUpper.matcher(coreLabel3.word()).matches()) {
                    coreLabel4.set(CoreAnnotations.AnswerAnnotation.class, "INIT_UPPER");
                } else {
                    coreLabel4.set(CoreAnnotations.AnswerAnnotation.class, "O");
                }
                coreLabel4.setWord(coreLabel3.word().toLowerCase());
                coreLabel4.set(CoreAnnotations.UnknownAnnotation.class, hashSet.contains(coreLabel3.word().toLowerCase()) ? "true" : "false");
                coreLabel4.set(CoreAnnotations.PositionAnnotation.class, Integer.toString(i));
                if (((String) coreLabel4.get(CoreAnnotations.UnknownAnnotation.class)).equals("true")) {
                    System.err.println(String.valueOf(coreLabel4.word()) + " :: " + ((String) coreLabel4.get(CoreAnnotations.UnknownAnnotation.class)) + " :: " + ((String) coreLabel4.get(CoreAnnotations.PositionAnnotation.class)));
                }
                arrayList5.add(coreLabel4);
                i++;
            }
            System.err.println();
            arrayList4.add(arrayList5);
        }
        return arrayList4.iterator();
    }

    @Override // edu.stanford.nlp.sequences.DocumentReaderAndWriter
    public void printAnswers(List<CoreLabel> list, PrintWriter printWriter) {
        for (CoreLabel coreLabel : list) {
            printWriter.print((String) coreLabel.get(CoreAnnotations.BeforeAnnotation.class));
            String word = coreLabel.word();
            if (((String) coreLabel.get(CoreAnnotations.AnswerAnnotation.class)).equals("UPPER")) {
                printWriter.print(word.toUpperCase());
            } else if (((String) coreLabel.get(CoreAnnotations.AnswerAnnotation.class)).equals("LOWER")) {
                printWriter.print(word.toLowerCase());
            } else if (((String) coreLabel.get(CoreAnnotations.AnswerAnnotation.class)).equals("INIT_UPPER")) {
                printWriter.print(word.substring(0, 1).toUpperCase());
                printWriter.print(word.substring(1));
            } else {
                printWriter.print(word);
            }
            printWriter.print((String) coreLabel.get(CoreAnnotations.AfterAnnotation.class));
        }
        printWriter.println();
    }
}
