package edu.stanford.nlp.trees.international.arabic;

import edu.stanford.nlp.international.arabic.pipeline.DefaultLexicalMapper;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.international.french.FrenchTreeReader;
import edu.stanford.nlp.trees.tregex.ParseException;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.util.Filter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/trees/international/arabic/ArabicTreeNormalizer.class */
public class ArabicTreeNormalizer extends BobChrisTreeNormalizer {
    private static final String MORPHO_MARK = "~#";
    private final boolean retainNPTmp;
    private final boolean retainNPSbj;
    private final boolean markPRDverb;
    private final boolean changeNoLabels;
    private final boolean retainPPClr;
    private final DefaultLexicalMapper lexMapper;
    private final Pattern prdPattern;
    private final TregexPattern prdVerbPattern;
    private final TregexPattern npSbjPattern;
    private final String rootLabel;
    private static final long serialVersionUID = -1592231121068698494L;

    /* loaded from: input_file:edu/stanford/nlp/trees/international/arabic/ArabicTreeNormalizer$ArabicEmptyFilter.class */
    public static class ArabicEmptyFilter implements Filter<Tree> {
        private static final long serialVersionUID = 7417844982953945964L;

        @Override // edu.stanford.nlp.util.Filter
        public boolean accept(Tree tree) {
            if (tree.isPreTerminal() && ((tree.value().equals("PRON_1S") || tree.value().equals("PRP")) && (tree.firstChild().value().equals("nullp") || tree.firstChild().value().equals("نللة") || tree.firstChild().value().equals("-~a")))) {
                return false;
            }
            return (tree.isPreTerminal() && tree.value() != null && tree.value().equals(FrenchTreeReader.EMPTY_LEAF)) ? false : true;
        }
    }

    public ArabicTreeNormalizer(boolean z, boolean z2, boolean z3, boolean z4, boolean z5) {
        super(new ArabicTreebankLanguagePack());
        this.retainNPTmp = z;
        this.retainNPSbj = z4;
        this.markPRDverb = z2;
        this.changeNoLabels = z3;
        this.retainPPClr = z5;
        this.rootLabel = this.tlp.startSymbol();
        try {
            this.prdVerbPattern = TregexPattern.compile("/^V[^P]/ > VP $ /-PRD$/=prd");
            this.prdPattern = Pattern.compile("^[A-Z]+-PRD");
            this.npSbjPattern = TregexPattern.compile("/^NP-SBJ/ !> @VP");
            this.emptyFilter = new ArabicEmptyFilter();
            this.lexMapper = new DefaultLexicalMapper();
        } catch (ParseException e) {
            e.printStackTrace();
            throw new RuntimeException();
        }
    }

    public ArabicTreeNormalizer(boolean z, boolean z2, boolean z3) {
        this(z, z2, z3, false, false);
    }

    public ArabicTreeNormalizer(boolean z, boolean z2) {
        this(z, z2, false);
    }

    public ArabicTreeNormalizer(boolean z) {
        this(z, false);
    }

    public ArabicTreeNormalizer() {
        this(false);
    }

    @Override // edu.stanford.nlp.trees.BobChrisTreeNormalizer, edu.stanford.nlp.trees.TreeNormalizer
    public String normalizeNonterminal(String str) {
        String str2;
        if (this.changeNoLabels) {
            str2 = str;
        } else if (this.retainNPTmp && str != null && str.startsWith("NP-TMP")) {
            str2 = "NP-TMP";
        } else if (this.retainNPSbj && str != null && str.startsWith("NP-SBJ")) {
            str2 = "NP-SBJ";
        } else if (this.retainPPClr && str != null && str.startsWith("PP-CLR")) {
            str2 = "PP-CLR";
        } else {
            if (!this.markPRDverb || str == null || !this.prdPattern.matcher(str).matches()) {
                return super.normalizeNonterminal(str);
            }
            str2 = str;
        }
        return str2.intern();
    }

    @Override // edu.stanford.nlp.trees.BobChrisTreeNormalizer, edu.stanford.nlp.trees.TreeNormalizer
    public String normalizeTerminal(String str) {
        if (!this.changeNoLabels && !str.contains(MORPHO_MARK)) {
            str = this.lexMapper.map(null, str);
        }
        return str.intern();
    }

    @Override // edu.stanford.nlp.trees.BobChrisTreeNormalizer, edu.stanford.nlp.trees.TreeNormalizer
    public Tree normalizeWholeTree(Tree tree, TreeFactory treeFactory) {
        Tree spliceOut = tree.prune(this.emptyFilter, treeFactory).spliceOut(this.aOverAFilter, treeFactory);
        Iterator<Tree> it = spliceOut.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            if (next.isLeaf()) {
                if (next.value().contains(MORPHO_MARK)) {
                    String[] split = next.value().split(MORPHO_MARK);
                    if (split.length != 2) {
                        System.err.printf("%s: Word contains malformed morph annotation: %s%n", getClass().getName(), next.value());
                    } else if (next.label() instanceof CoreLabel) {
                        ((CoreLabel) next.label()).setValue(split[0].trim().intern());
                        ((CoreLabel) next.label()).setWord(split[0].trim().intern());
                        ((CoreLabel) next.label()).setCurrent(split[1].trim().intern());
                    } else {
                        System.err.printf("%s: Cannot store morph analysis in non-CoreLabel: %s%n", getClass().getName(), next.label().getClass().getName());
                    }
                }
            } else if (!next.isPreTerminal()) {
                int numChildren = next.numChildren();
                ArrayList arrayList = new ArrayList(numChildren);
                for (int i = 0; i < numChildren; i++) {
                    Tree child = next.getChild(i);
                    if (child.isLeaf()) {
                        System.err.printf("%s: Splicing in DUMMYTAG for%n%s%n", getClass().getName(), next.toString());
                        arrayList.add(treeFactory.newTreeNode("DUMMYTAG", Collections.singletonList(child)));
                    } else {
                        arrayList.add(child);
                    }
                }
                next.setChildren(arrayList);
            } else if (next.value() == null || next.value().equals("")) {
                System.err.printf("%s: missing tag for\n%s\n", getClass().getName(), next.pennString());
            }
        }
        if (this.markPRDverb) {
            TregexMatcher matcher = this.prdVerbPattern.matcher(spliceOut);
            Tree tree2 = null;
            while (matcher.find()) {
                if (matcher.getMatch() != tree2) {
                    tree2 = matcher.getMatch();
                    tree2.label().setValue(tree2.label().value() + "-PRDverb");
                    Tree node = matcher.getNode("prd");
                    node.label().setValue(super.normalizeNonterminal(node.label().value()));
                }
            }
        }
        if (this.retainNPSbj) {
            TregexMatcher matcher2 = this.npSbjPattern.matcher(spliceOut);
            while (matcher2.find()) {
                matcher2.getMatch().label().setValue("NP");
            }
        }
        if (spliceOut.isPreTerminal()) {
            String value = spliceOut.label().value();
            if (value.equals("CC") || value.startsWith(ATBTreeUtils.puncTag) || value.equals("CONJ")) {
                System.err.printf("%s: Bare tagged word being wrapped in FRAG\n%s\n", getClass().getName(), spliceOut.pennString());
                spliceOut = treeFactory.newTreeNode("FRAG", Collections.singletonList(spliceOut));
            } else {
                System.err.printf("%s: Bare tagged word\n%s\n", getClass().getName(), spliceOut.pennString());
            }
        }
        while (spliceOut != null && ((spliceOut.value() == null || spliceOut.value().equals("")) && spliceOut.numChildren() <= 1)) {
            spliceOut = spliceOut.firstChild();
        }
        if (spliceOut != null && !spliceOut.value().equals(this.rootLabel)) {
            spliceOut = treeFactory.newTreeNode(this.rootLabel, Collections.singletonList(spliceOut));
        }
        return spliceOut;
    }
}
