package edu.stanford.nlp.ie.regexp;

import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.PTBLexer;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter;
import edu.stanford.nlp.trees.international.negra.NegraLabel;
import edu.stanford.nlp.util.PaddedList;
import edu.stanford.nlp.util.StringUtils;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/ie/regexp/NumberSequenceClassifier.class */
public class NumberSequenceClassifier extends AbstractSequenceClassifier<CoreLabel> {
    private static final boolean DEBUG = false;
    public static final Pattern MONTH_PATTERN = Pattern.compile("January|Jan\\.?|February|Feb\\.?|March|Mar\\.?|April|Apr\\.?|May|June|Jun\\.?|July|Jul\\.?|August|Aug\\.?|September|Sept?\\.?|October|Oct\\.?|November|Nov\\.?|December|Dec\\.");
    public static final Pattern YEAR_PATTERN = Pattern.compile("[1-3][0-9]{3}|'?[0-9]{2}");
    public static final Pattern DAY_PATTERN = Pattern.compile("(?:[1-9]|[12][0-9]|3[01])(?:st|nd|rd)?");
    public static final Pattern DATE_PATTERN = Pattern.compile("[0-3][0-9]/[0-3][0-9]/[1-9][0-9]{3}");
    public static final Pattern DATE_PATTERN2 = Pattern.compile("[12][0-9]{3}[-/][01][0-9][-/][0-3][0-9]");
    public static final Pattern TIME_PATTERN = Pattern.compile("[0-2]?[0-9]:[0-5][0-9]");
    public static final Pattern TIME_PATTERN2 = Pattern.compile("[0-2][0-9]:[0-5][0-9]:[0-5][0-9]");
    public static final Pattern CURRENCY_WORD_PATTERN = Pattern.compile("(?:dollar|cent|euro|pound)s?|penny|pence|yen|yuan");
    public static final Pattern ORDINAL_PATTERN = Pattern.compile("(?i)[2-9]?1st|[2-9]?2nd|[2-9]?3rd|1[0-9]th|[2-9]?[04-9]th|100+th|zeroth|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth|hundredth|thousandth|millionth");
    public static final Pattern ARMY_TIME_MORNING = Pattern.compile("0([0-9])([0-9]){2}");
    public static final Pattern GENERIC_TIME_WORDS = Pattern.compile("(morning|evening|night|noon|midnight|teatime|lunchtime|dinnertime|suppertime|afternoon|midday|dusk|dawn|sunup|sundown|daybreak|day)");

    public NumberSequenceClassifier() {
        this(new Properties());
        if (CURRENCY_WORD_PATTERN.matcher("pounds").matches()) {
            return;
        }
        System.err.println("NumberSequence: Currency pattern broken");
    }

    public NumberSequenceClassifier(Properties properties) {
        super(properties);
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public List<CoreLabel> classify(List<CoreLabel> list) {
        PaddedList paddedList = new PaddedList(list, (CoreLabel) this.pad);
        int size = paddedList.size();
        for (int i = 0; i < size; i++) {
            CoreLabel coreLabel = (CoreLabel) paddedList.get(i);
            CoreLabel coreLabel2 = (CoreLabel) paddedList.get(i - 1);
            CoreLabel coreLabel3 = (CoreLabel) paddedList.get(i + 1);
            CoreLabel coreLabel4 = (CoreLabel) paddedList.get(i + 2);
            coreLabel.set(CoreAnnotations.AnswerAnnotation.class, this.flags.backgroundSymbol);
            if ((coreLabel.word().equals("$") || coreLabel.word().equals("&#163") || coreLabel.word().equals("£") || coreLabel.word().equals("¥") || coreLabel.word().equals(NegraLabel.FEATURE_SEP) || coreLabel.word().equals("€") || coreLabel.word().equals("US$") || coreLabel.word().equals("HK$") || coreLabel.word().equals("A$")) && (coreLabel2.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD") || coreLabel3.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD"))) {
                coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "MONEY");
            } else if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) {
                if (TIME_PATTERN.matcher(coreLabel.word()).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "TIME");
                } else if (TIME_PATTERN2.matcher(coreLabel.word()).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "TIME");
                } else if (DATE_PATTERN.matcher(coreLabel.word()).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                } else if (DATE_PATTERN2.matcher(coreLabel.word()).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                } else if (coreLabel3.get(CoreAnnotations.TextAnnotation.class) != null && coreLabel.get(CoreAnnotations.TextAnnotation.class) != null && DAY_PATTERN.matcher((CharSequence) coreLabel.get(CoreAnnotations.TextAnnotation.class)).matches() && MONTH_PATTERN.matcher((CharSequence) coreLabel3.get(CoreAnnotations.TextAnnotation.class)).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                } else if (coreLabel2.get(CoreAnnotations.TextAnnotation.class) != null && MONTH_PATTERN.matcher((CharSequence) coreLabel2.get(CoreAnnotations.TextAnnotation.class)).matches() && coreLabel.get(CoreAnnotations.TextAnnotation.class) != null && DAY_PATTERN.matcher((CharSequence) coreLabel.get(CoreAnnotations.TextAnnotation.class)).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                } else if (rightScanFindsMoneyWord(paddedList, i) && !leftScanFindsWeightWord(paddedList, i)) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "MONEY");
                } else if (ARMY_TIME_MORNING.matcher(coreLabel.word()).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "TIME");
                } else if (YEAR_PATTERN.matcher(coreLabel.word()).matches() && coreLabel2.getString(CoreAnnotations.AnswerAnnotation.class).equals("DATE") && (MONTH_PATTERN.matcher(coreLabel2.word()).matches() || ((String) ((CoreLabel) paddedList.get(i - 2)).get(CoreAnnotations.AnswerAnnotation.class)).equals("DATE"))) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                } else if (coreLabel2.getString(CoreAnnotations.AnswerAnnotation.class).equals("MONEY")) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "MONEY");
                } else {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "NUMBER");
                }
            } else if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class) != null && coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals(",") && coreLabel2.getString(CoreAnnotations.AnswerAnnotation.class).equals("DATE") && coreLabel3.word() != null && YEAR_PATTERN.matcher(coreLabel3.word()).matches()) {
                coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
            } else if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNP") && MONTH_PATTERN.matcher(coreLabel.word()).matches()) {
                if (coreLabel2.getString(CoreAnnotations.AnswerAnnotation.class).equals("DATE") || coreLabel3.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                }
            } else if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class) == null || !coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CC")) {
                if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class) == null || !(coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NN") || coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNS"))) {
                    if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("JJ")) {
                        if ((coreLabel3.word() != null && MONTH_PATTERN.matcher(coreLabel3.word()).matches()) || (coreLabel3.word() != null && coreLabel3.word().equalsIgnoreCase("of") && coreLabel4.word() != null && MONTH_PATTERN.matcher(coreLabel4.word()).matches())) {
                            coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                        } else if (ORDINAL_PATTERN.matcher(coreLabel.word()).matches()) {
                            coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "ORDINAL");
                        }
                    } else if (coreLabel.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("IN") && coreLabel.word().equalsIgnoreCase("of") && coreLabel2.get(CoreAnnotations.TextAnnotation.class) != null && ORDINAL_PATTERN.matcher((CharSequence) coreLabel2.get(CoreAnnotations.TextAnnotation.class)).matches() && coreLabel3.get(CoreAnnotations.TextAnnotation.class) != null && MONTH_PATTERN.matcher((CharSequence) coreLabel3.get(CoreAnnotations.TextAnnotation.class)).matches()) {
                        coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                    }
                } else if (CURRENCY_WORD_PATTERN.matcher(coreLabel.word()).matches()) {
                    if (coreLabel2.getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD") && coreLabel2.getString(CoreAnnotations.AnswerAnnotation.class).equals("MONEY")) {
                        coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "MONEY");
                    }
                } else if (coreLabel.word().equals("m") || coreLabel.word().equals("b")) {
                    if (coreLabel2.getString(CoreAnnotations.AnswerAnnotation.class).equals("MONEY")) {
                        coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "MONEY");
                    } else {
                        coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "NUMBER");
                    }
                } else if (ORDINAL_PATTERN.matcher(coreLabel.word()).matches()) {
                    if ((coreLabel3.word() != null && MONTH_PATTERN.matcher(coreLabel3.word()).matches()) || (coreLabel3.word() != null && coreLabel3.word().equalsIgnoreCase("of") && coreLabel4.word() != null && MONTH_PATTERN.matcher(coreLabel4.word()).matches())) {
                        coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "DATE");
                    }
                } else if (GENERIC_TIME_WORDS.matcher(coreLabel.word()).matches()) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "TIME");
                }
            } else if (coreLabel2.get(CoreAnnotations.PartOfSpeechAnnotation.class) != null && ((String) coreLabel2.get(CoreAnnotations.PartOfSpeechAnnotation.class)).equals("CD") && coreLabel3.get(CoreAnnotations.PartOfSpeechAnnotation.class) != null && ((String) coreLabel3.get(CoreAnnotations.PartOfSpeechAnnotation.class)).equals("CD") && coreLabel.get(CoreAnnotations.TextAnnotation.class) != null && ((String) coreLabel.get(CoreAnnotations.TextAnnotation.class)).equalsIgnoreCase("and")) {
                String word = coreLabel2.word();
                if (word.equalsIgnoreCase("hundred") || word.equalsIgnoreCase("thousand") || word.equalsIgnoreCase("million") || word.equalsIgnoreCase("billion") || word.equalsIgnoreCase("trillion")) {
                    coreLabel.set(CoreAnnotations.AnswerAnnotation.class, "NUMBER");
                }
            }
        }
        return list;
    }

    private static boolean leftScanFindsWeightWord(List<CoreLabel> list, int i) {
        for (int i2 = i - 1; i2 >= 0 && i2 >= i - 3; i2--) {
            if (list.get(i2).word().startsWith("weigh")) {
                return true;
            }
        }
        return false;
    }

    private static boolean rightScanFindsMoneyWord(List<CoreLabel> list, int i) {
        int i2 = i;
        int size = list.size();
        while (i2 < size && list.get(i2).getString(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD")) {
            i2++;
        }
        if (i2 >= size) {
            return false;
        }
        String string = list.get(i2).getString(CoreAnnotations.PartOfSpeechAnnotation.class);
        return (string.equals("NN") || string.equals("NNS")) && CURRENCY_WORD_PATTERN.matcher(list.get(i2).word()).matches();
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void train(Collection<List<CoreLabel>> collection) {
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void printProbsDocument(List<CoreLabel> list) {
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void serializeClassifier(String str) {
        System.err.print("Serializing classifier to " + str + PTBLexer.ptb3EllipsisStr);
        System.err.println("done.");
    }

    @Override // edu.stanford.nlp.ie.AbstractSequenceClassifier
    public void loadClassifier(ObjectInputStream objectInputStream, Properties properties) throws IOException, ClassCastException, ClassNotFoundException {
    }

    public static void main(String[] strArr) throws Exception {
        Properties argsToProperties = StringUtils.argsToProperties(strArr);
        NumberSequenceClassifier numberSequenceClassifier = new NumberSequenceClassifier(argsToProperties);
        String str = numberSequenceClassifier.flags.trainFile;
        String str2 = numberSequenceClassifier.flags.testFile;
        String str3 = numberSequenceClassifier.flags.textFile;
        String str4 = numberSequenceClassifier.flags.loadClassifier;
        String str5 = numberSequenceClassifier.flags.serializeTo;
        if (str4 != null) {
            numberSequenceClassifier.loadClassifierNoExceptions(str4);
            numberSequenceClassifier.flags.setProperties(argsToProperties);
        } else if (str != null) {
            numberSequenceClassifier.train(str);
        }
        if (str5 != null) {
            numberSequenceClassifier.serializeClassifier(str5);
        }
        if (str2 != null) {
            numberSequenceClassifier.classifyAndWriteAnswers(str2);
        }
        if (str3 != null) {
            DocumentReaderAndWriter<IN> documentReaderAndWriter = numberSequenceClassifier.readerAndWriter;
            numberSequenceClassifier.readerAndWriter = new PlainTextDocumentReaderAndWriter();
            numberSequenceClassifier.classifyAndWriteAnswers(str3);
            numberSequenceClassifier.readerAndWriter = documentReaderAndWriter;
        }
    }
}
