package edu.stanford.nlp.dcoref;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.util.CoreMap;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/dcoref/MUCMentionExtractor.class */
public class MUCMentionExtractor extends MentionExtractor {
    private TokenizerFactory<CoreLabel> tokenizerFactory;
    private LexicalizedParser parser;
    private StanfordCoreNLP stanfordProcessor;
    private String fileContents;
    private int currentOffset;

    private void setParser(LexicalizedParser lexicalizedParser) {
        if (this.parser == null) {
            this.tokenizerFactory = PTBTokenizer.factory(false, (LexedTokenFactory) new CoreLabelTokenFactory(false));
            this.parser = lexicalizedParser;
        }
    }

    public MUCMentionExtractor(LexicalizedParser lexicalizedParser, Dictionaries dictionaries, Properties properties) throws IOException {
        super(dictionaries);
        this.parser = null;
        setParser(lexicalizedParser);
        this.fileContents = IOUtils.slurpFile(properties.getProperty(Constants.MUC_PROP));
        this.currentOffset = 0;
        this.stanfordProcessor = new StanfordCoreNLP(properties, false);
    }

    @Override // edu.stanford.nlp.dcoref.MentionExtractor
    public List<List<Mention>> nextDoc() {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        Annotation annotation = new Annotation("");
        if (this.parser == null) {
            throw new RuntimeException("Must call MUCMentionExtractor.initialize() before actually using this extractor!");
        }
        Pattern compile = Pattern.compile("<DOC>(.*?)</DOC>", 34);
        Pattern compile2 = Pattern.compile("(<s>|<hl>|<dd>|<DATELINE>)(.*?)(</s>|</hl>|</dd>|</DATELINE>)", 34);
        Matcher matcher = compile.matcher(this.fileContents);
        if (!matcher.find(this.currentOffset)) {
            return null;
        }
        this.currentOffset = matcher.end();
        String group = matcher.group(1);
        Matcher matcher2 = compile2.matcher(group);
        String str = null;
        Matcher matcher3 = Pattern.compile("<DOCNO>(.*?)</DOCNO>", 34).matcher(group);
        if (matcher3.find()) {
            this.currentDocumentID = matcher3.group(1);
        } else {
            this.currentDocumentID = "documentAfter " + this.currentDocumentID;
        }
        while (matcher2.find()) {
            List<CoreLabel> list = this.tokenizerFactory.getTokenizer(new StringReader(matcher2.group(2))).tokenize();
            int i = 0;
            while (i < list.size()) {
                CoreLabel coreLabel = list.get(i);
                if (i <= 0 || !coreLabel.word().equals("$")) {
                    if (coreLabel.word().equals("\\/") && !list.get(i - 1).word().equals("</COREF>")) {
                        coreLabel.set(CoreAnnotations.TextAnnotation.class, String.valueOf(list.get(i - 1).word()) + "\\/" + list.get(i + 1).word());
                        list.remove(i + 1);
                        list.remove(i - 1);
                    }
                } else if (list.get(i - 1).word().endsWith("PRP") || list.get(i - 1).word().endsWith("WP")) {
                    list.get(i - 1).set(CoreAnnotations.TextAnnotation.class, String.valueOf(list.get(i - 1).word()) + "$");
                    list.remove(i);
                    i--;
                }
                i++;
            }
            ArrayList arrayList4 = new ArrayList();
            Stack stack = new Stack();
            ArrayList arrayList5 = new ArrayList();
            ArrayList arrayList6 = new ArrayList();
            arrayList.add(arrayList4);
            arrayList2.add(arrayList5);
            for (CoreLabel coreLabel2 : list) {
                String str2 = (String) coreLabel2.get(CoreAnnotations.TextAnnotation.class);
                if (!str2.startsWith("<") && str2.contains("\\/") && str2.lastIndexOf("\\/") != str2.length() - 2) {
                    String substring = str2.substring(0, str2.lastIndexOf("\\/"));
                    coreLabel2.set(CoreAnnotations.TextAnnotation.class, substring);
                    coreLabel2.remove(CoreAnnotations.CurrentAnnotation.class);
                    if (str != null) {
                        coreLabel2.set(CoreAnnotations.NamedEntityTagAnnotation.class, str);
                    }
                    arrayList4.add(coreLabel2);
                    CoreLabel coreLabel3 = new CoreLabel();
                    coreLabel3.setWord(substring);
                    coreLabel3.set(CoreAnnotations.TextAnnotation.class, substring);
                    arrayList6.add(coreLabel3);
                } else if (str2.startsWith("<") && !str2.startsWith("<COREF") && !str2.startsWith("</")) {
                    Matcher matcher4 = Pattern.compile("<(.*?)>").matcher(str2);
                    matcher4.find();
                    str = matcher4.group(1);
                } else if (str2.startsWith("</") && !str2.startsWith("</COREF")) {
                    Matcher matcher5 = Pattern.compile("</(.*?)>").matcher(str2);
                    matcher5.find();
                    String group2 = matcher5.group(1);
                    if (str != null && !str.equals(group2)) {
                        throw new RuntimeException("Unmatched NE labels in MUC file: " + str + " v. " + group2);
                    }
                    str = null;
                } else if (str2.startsWith("<COREF")) {
                    Mention mention = new Mention();
                    mention.originalStartIndex = arrayList4.size();
                    Pattern compile3 = Pattern.compile("ID=\\\"(.*?)\\\"");
                    Pattern compile4 = Pattern.compile("REF=\\\"(.*?)\\\"");
                    Matcher matcher6 = compile3.matcher(str2);
                    matcher6.find();
                    mention.originalID = Integer.valueOf(matcher6.group(1)).intValue();
                    Matcher matcher7 = compile4.matcher(str2);
                    if (matcher7.find()) {
                        mention.originalRef = Integer.valueOf(matcher7.group(1)).intValue();
                    }
                    stack.push(mention);
                } else if (str2.equals("</COREF>")) {
                    Mention mention2 = (Mention) stack.pop();
                    mention2.originalEndIndex = arrayList4.size();
                    arrayList5.add(mention2);
                } else {
                    coreLabel2.remove(CoreAnnotations.CurrentAnnotation.class);
                    if (str != null) {
                        coreLabel2.set(CoreAnnotations.NamedEntityTagAnnotation.class, str);
                    }
                    arrayList4.add(coreLabel2);
                    CoreLabel coreLabel4 = new CoreLabel();
                    coreLabel4.setWord((String) coreLabel2.get(CoreAnnotations.TextAnnotation.class));
                    coreLabel4.set(CoreAnnotations.TextAnnotation.class, (String) coreLabel2.get(CoreAnnotations.TextAnnotation.class));
                    arrayList6.add(coreLabel4);
                }
            }
            StringBuilder sb = new StringBuilder();
            for (int i2 = 0; i2 < arrayList4.size(); i2++) {
                CoreLabel coreLabel5 = arrayList4.get(i2);
                if (i2 > 0) {
                    sb.append(" ");
                }
                sb.append(coreLabel5.getString(CoreAnnotations.TextAnnotation.class));
            }
            Annotation annotation2 = new Annotation(sb.toString());
            arrayList3.add(annotation2);
            annotation2.set(CoreAnnotations.TokensAnnotation.class, arrayList6);
        }
        HashMap hashMap = new HashMap();
        for (int i3 = 0; i3 < arrayList2.size(); i3++) {
            for (int i4 = 0; i4 < arrayList2.get(i3).size(); i4++) {
                Mention mention3 = arrayList2.get(i3).get(i4);
                hashMap.put(Integer.valueOf(mention3.originalID), mention3);
            }
        }
        for (int i5 = 0; i5 < arrayList2.size(); i5++) {
            for (int i6 = 0; i6 < arrayList2.get(i5).size(); i6++) {
                Mention mention4 = arrayList2.get(i5).get(i6);
                if (mention4.originalCorefChainID == -1) {
                    if (mention4.originalRef == -1) {
                        mention4.originalCorefChainID = mention4.originalID;
                    } else {
                        int i7 = mention4.originalRef;
                        while (true) {
                            Mention mention5 = (Mention) hashMap.get(Integer.valueOf(i7));
                            if (mention5.originalCorefChainID != -1) {
                                mention4.originalCorefChainID = mention5.originalCorefChainID;
                                break;
                            }
                            if (mention5.originalRef == -1) {
                                mention5.originalCorefChainID = mention5.originalID;
                                mention4.originalCorefChainID = mention5.originalCorefChainID;
                                break;
                            }
                            i7 = mention5.originalRef;
                        }
                    }
                }
            }
        }
        annotation.set(CoreAnnotations.SentencesAnnotation.class, arrayList3);
        this.stanfordProcessor.annotate(annotation);
        if (arrayList3.size() != arrayList.size()) {
            throw new RuntimeException();
        }
        for (int i8 = 0; i8 < arrayList3.size(); i8++) {
            List list2 = (List) ((CoreMap) arrayList3.get(i8)).get(CoreAnnotations.TokensAnnotation.class);
            List<CoreLabel> list3 = arrayList.get(i8);
            if (list2.size() != list3.size()) {
                throw new RuntimeException();
            }
            int i9 = 0;
            int i10 = 0;
            while (i10 < list2.size()) {
                CoreLabel coreLabel6 = (CoreLabel) list2.get(i10);
                CoreLabel coreLabel7 = list3.get(i9);
                if (!((String) coreLabel6.get(CoreAnnotations.TextAnnotation.class)).equals(coreLabel7.get(CoreAnnotations.TextAnnotation.class))) {
                    throw new RuntimeException();
                }
                coreLabel7.set(CoreAnnotations.NamedEntityTagAnnotation.class, (String) coreLabel6.get(CoreAnnotations.NamedEntityTagAnnotation.class));
                i10++;
                i9++;
            }
        }
        ArrayList arrayList7 = new ArrayList();
        for (int i11 = 0; i11 < arrayList.size(); i11++) {
            arrayList7.add(this.parser.apply((Object) arrayList.get(i11)));
        }
        return arrange(arrayList, arrayList7, arrayList2, this.parser, true);
    }
}
