package de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch;

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.inception.recommendation.api.evaluation.DataSplitter;
import de.tudarmstadt.ukp.inception.recommendation.api.evaluation.EvaluationResult;
import de.tudarmstadt.ukp.inception.recommendation.api.evaluation.LabelPair;
import de.tudarmstadt.ukp.inception.recommendation.api.model.Recommender;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngine;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationException;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommenderContext;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.gazeteer.GazeteerService;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.Gazeteer;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.model.GazeteerEntry;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.trie.Trie;
import de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.trie.WhitespaceNormalizingSanitizer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.CasUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/tudarmstadt/ukp/inception/recommendation/imls/stringmatch/StringMatchingRecommender.class */
public class StringMatchingRecommender extends RecommendationEngine {
    public static final RecommenderContext.Key<Trie<DictEntry>> KEY_MODEL = new RecommenderContext.Key<>("model");
    private static final String UNKNOWN_LABEL = "unknown";
    private static final String NO_LABEL = "O";
    private final Logger log;
    private final StringMatchingRecommenderTraits traits;
    private final GazeteerService gazeteerService;

    /* renamed from: de.tudarmstadt.ukp.inception.recommendation.imls.stringmatch.StringMatchingRecommender$1, reason: invalid class name */
    /* loaded from: input_file:de/tudarmstadt/ukp/inception/recommendation/imls/stringmatch/StringMatchingRecommender$1.class */
    static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$de$tudarmstadt$ukp$inception$recommendation$api$evaluation$DataSplitter$TargetSet = new int[DataSplitter.TargetSet.values().length];

        static {
            try {
                $SwitchMap$de$tudarmstadt$ukp$inception$recommendation$api$evaluation$DataSplitter$TargetSet[DataSplitter.TargetSet.TRAIN.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$de$tudarmstadt$ukp$inception$recommendation$api$evaluation$DataSplitter$TargetSet[DataSplitter.TargetSet.TEST.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/tudarmstadt/ukp/inception/recommendation/imls/stringmatch/StringMatchingRecommender$DictEntry.class */
    public static class DictEntry {
        private String key;
        private String[] labels;
        private int[] counts;

        public DictEntry(String str) {
            this.key = str;
        }

        public void put(String str) {
            if (this.labels == null) {
                this.labels = new String[]{str};
                this.counts = new int[]{1};
                return;
            }
            int indexOf = Arrays.asList(this.labels).indexOf(str);
            if (indexOf != -1) {
                int[] iArr = this.counts;
                iArr[indexOf] = iArr[indexOf] + 1;
                return;
            }
            String[] strArr = new String[this.labels.length + 1];
            System.arraycopy(this.labels, 0, strArr, 0, this.labels.length);
            this.labels = strArr;
            int[] iArr2 = new int[this.counts.length + 1];
            System.arraycopy(this.counts, 0, iArr2, 0, this.counts.length);
            this.counts = iArr2;
            this.labels[this.labels.length - 1] = str;
            this.counts[this.counts.length - 1] = 1;
        }

        public List<LabelStats> getBest(int i) {
            int sum = IntStream.of(this.counts).sum();
            ArrayList arrayList = new ArrayList();
            for (int i2 = 0; i2 < this.labels.length; i2++) {
                arrayList.add(new LabelStats(this.labels[i2], this.counts[i2], this.counts[i2] / sum));
            }
            return (List) arrayList.stream().sorted(Comparator.comparingInt((v0) -> {
                return v0.getCount();
            }).reversed()).limit(i).collect(Collectors.toList());
        }

        public String toString() {
            return "DictEntry [key=" + this.key + "]";
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/tudarmstadt/ukp/inception/recommendation/imls/stringmatch/StringMatchingRecommender$LabelStats.class */
    public static class LabelStats {
        private final String label;
        private final int count;
        private final double relFreq;

        public LabelStats(String str, int i, double d) {
            this.label = str;
            this.count = i;
            this.relFreq = d;
        }

        public String getLabel() {
            return this.label;
        }

        public int getCount() {
            return this.count;
        }

        public double getRelFreq() {
            return this.relFreq;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/tudarmstadt/ukp/inception/recommendation/imls/stringmatch/StringMatchingRecommender$Sample.class */
    public static class Sample {
        private final int docNo;
        private final int begin;
        private final int end;
        private final String text;
        private final List<TokenSpan> tokens;
        private final List<Span> spans;

        public Sample(int i, int i2, int i3, String str, Collection<AnnotationFS> collection, Collection<Span> collection2) {
            this.docNo = i;
            this.begin = i2;
            this.end = i3;
            this.text = str;
            this.tokens = (List) collection.stream().map(annotationFS -> {
                return new TokenSpan(annotationFS.getBegin(), annotationFS.getEnd());
            }).collect(Collectors.toList());
            this.spans = Arrays.asList((Span[]) collection2.toArray(new Span[collection2.size()]));
        }

        public Optional<Span> getCoveringSpan(int i, int i2) {
            return this.spans.stream().filter(span -> {
                return span.getBegin() <= i && span.getEnd() >= i2;
            }).findFirst();
        }

        public int getDocNo() {
            return this.docNo;
        }

        public int getBegin() {
            return this.begin;
        }

        public int getEnd() {
            return this.end;
        }

        public int getLength() {
            return this.end - this.begin;
        }

        public String getText() {
            return this.text;
        }

        public List<TokenSpan> getTokens() {
            return this.tokens;
        }

        public List<Span> getSpans() {
            return this.spans;
        }

        public boolean hasTokenEndingAt(int i) {
            return this.tokens.stream().filter(tokenSpan -> {
                return tokenSpan.end == i;
            }).findAny().isPresent();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/tudarmstadt/ukp/inception/recommendation/imls/stringmatch/StringMatchingRecommender$Span.class */
    public static class Span {
        private final int begin;
        private final int end;
        private final String text;
        private final String label;
        private final double score;

        public Span(int i, int i2, String str, String str2, double d) {
            this.begin = i;
            this.end = i2;
            this.text = str;
            this.label = str2;
            this.score = d;
        }

        public int getBegin() {
            return this.begin;
        }

        public int getEnd() {
            return this.end;
        }

        public String getText() {
            return this.text;
        }

        public String getLabel() {
            return this.label;
        }

        public double getScore() {
            return this.score;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/tudarmstadt/ukp/inception/recommendation/imls/stringmatch/StringMatchingRecommender$TokenSpan.class */
    public static class TokenSpan {
        private final int begin;
        private final int end;

        public TokenSpan(int i, int i2) {
            this.begin = i;
            this.end = i2;
        }

        public int getBegin() {
            return this.begin;
        }

        public int getEnd() {
            return this.end;
        }
    }

    public StringMatchingRecommender(Recommender recommender, StringMatchingRecommenderTraits stringMatchingRecommenderTraits) {
        this(recommender, stringMatchingRecommenderTraits, null);
    }

    public StringMatchingRecommender(Recommender recommender, StringMatchingRecommenderTraits stringMatchingRecommenderTraits, GazeteerService gazeteerService) {
        super(recommender);
        this.log = LoggerFactory.getLogger(getClass());
        this.traits = stringMatchingRecommenderTraits;
        this.gazeteerService = gazeteerService;
    }

    public boolean isReadyForPrediction(RecommenderContext recommenderContext) {
        return ((Boolean) recommenderContext.get(KEY_MODEL).map((v0) -> {
            return Objects.nonNull(v0);
        }).orElse(false)).booleanValue();
    }

    public void pretrain(List<GazeteerEntry> list, RecommenderContext recommenderContext) {
        Trie<DictEntry> trie = (Trie) recommenderContext.get(KEY_MODEL).orElseGet(this::createTrie);
        if (list != null) {
            for (GazeteerEntry gazeteerEntry : list) {
                learn(trie, gazeteerEntry.text, gazeteerEntry.label);
            }
        }
        recommenderContext.put(KEY_MODEL, trie);
    }

    private <T> Trie<T> createTrie() {
        return new Trie<>(WhitespaceNormalizingSanitizer.factory());
    }

    public void train(RecommenderContext recommenderContext, List<CAS> list) throws RecommendationException {
        if (this.gazeteerService != null) {
            for (Gazeteer gazeteer : this.gazeteerService.listGazeteers(this.recommender)) {
                try {
                    pretrain(this.gazeteerService.readGazeteerFile(gazeteer), recommenderContext);
                } catch (IOException e) {
                    this.log.info("Unable to load gazeteer [{}] for recommender [{}]({}) in project [{}]({})", new Object[]{gazeteer.getName(), gazeteer.getRecommender().getName(), gazeteer.getRecommender().getId(), gazeteer.getRecommender().getProject().getName(), gazeteer.getRecommender().getProject().getId(), e});
                }
            }
        }
        Trie<DictEntry> trie = (Trie) recommenderContext.get(KEY_MODEL).orElseGet(this::createTrie);
        for (CAS cas : list) {
            Type predictedType = getPredictedType(cas);
            Feature predictedFeature = getPredictedFeature(cas);
            for (AnnotationFS annotationFS : CasUtil.select(cas, predictedType)) {
                learn(trie, annotationFS.getCoveredText(), annotationFS.getFeatureValueAsString(predictedFeature));
            }
        }
        recommenderContext.put(KEY_MODEL, trie);
        this.log.debug("Learned dictionary model with {} entries", Integer.valueOf(trie.size()));
    }

    public void predict(RecommenderContext recommenderContext, CAS cas) throws RecommendationException {
        Trie<DictEntry> trie = (Trie) recommenderContext.get(KEY_MODEL).orElseThrow(() -> {
            return new RecommendationException("Key [" + KEY_MODEL + "] not found in context");
        });
        Type predictedType = getPredictedType(cas);
        Feature predictedFeature = getPredictedFeature(cas);
        Feature isPredictionFeature = getIsPredictionFeature(cas);
        Feature scoreFeature = getScoreFeature(cas);
        Iterator<Sample> it = predict(0, cas, trie).iterator();
        while (it.hasNext()) {
            for (Span span : it.next().getSpans()) {
                AnnotationFS createAnnotation = cas.createAnnotation(predictedType, span.getBegin(), span.getEnd());
                createAnnotation.setStringValue(predictedFeature, span.getLabel());
                createAnnotation.setDoubleValue(scoreFeature, span.getScore());
                createAnnotation.setBooleanValue(isPredictionFeature, true);
                cas.addFsToIndexes(createAnnotation);
            }
        }
    }

    private List<Sample> predict(int i, CAS cas, Trie<DictEntry> trie) {
        Type type = CasUtil.getType(cas, Sentence.class);
        Type type2 = CasUtil.getType(cas, Token.class);
        ArrayList arrayList = new ArrayList();
        String documentText = cas.getDocumentText();
        for (AnnotationFS annotationFS : CasUtil.select(cas, type)) {
            ArrayList arrayList2 = new ArrayList();
            List<AnnotationFS> selectCovered = CasUtil.selectCovered(type2, annotationFS);
            for (AnnotationFS annotationFS2 : selectCovered) {
                Trie<DictEntry>.Node node = trie.getNode(documentText, annotationFS2.getBegin());
                if (node != null) {
                    int begin = annotationFS2.getBegin();
                    int i2 = begin + node.level;
                    if (selectCovered.stream().filter(annotationFS3 -> {
                        return annotationFS3.getEnd() == i2;
                    }).findAny().isPresent()) {
                        for (LabelStats labelStats : node.value.getBest(this.maxRecommendations)) {
                            String label = labelStats.getLabel();
                            if (label == UNKNOWN_LABEL) {
                                label = null;
                            }
                            arrayList2.add(new Span(begin, i2, documentText.substring(begin, i2), label, labelStats.getRelFreq()));
                        }
                    }
                }
            }
            arrayList.add(new Sample(i, annotationFS.getBegin(), annotationFS.getEnd(), annotationFS.getCoveredText(), selectCovered, arrayList2));
        }
        return arrayList;
    }

    public EvaluationResult evaluate(List<CAS> list, DataSplitter dataSplitter) {
        List<Sample> extractData = extractData(list, this.layerName, this.featureName);
        ArrayList arrayList = new ArrayList();
        ArrayList<Sample> arrayList2 = new ArrayList();
        for (Sample sample : extractData) {
            switch (AnonymousClass1.$SwitchMap$de$tudarmstadt$ukp$inception$recommendation$api$evaluation$DataSplitter$TargetSet[dataSplitter.getTargetSet(sample).ordinal()]) {
                case 1:
                    arrayList.add(sample);
                    break;
                case 2:
                    arrayList2.add(sample);
                    break;
            }
        }
        int size = arrayList.size();
        int size2 = arrayList2.size();
        double size3 = extractData.size() - size2;
        double d = size3 > 0.0d ? size / size3 : 0.0d;
        if (size < 2 || size2 < 2) {
            String format = String.format("Not enough training data: training set [%s] items, test set [%s] of total [%s].", Integer.valueOf(size), Integer.valueOf(size2), Integer.valueOf(extractData.size()));
            this.log.info(format);
            EvaluationResult evaluationResult = new EvaluationResult(size, size2, d);
            evaluationResult.setEvaluationSkipped(true);
            evaluationResult.setErrorMsg(format);
            return evaluationResult;
        }
        this.log.info("Training on [{}] items, predicting on [{}] of total [{}].", new Object[]{Integer.valueOf(arrayList.size()), Integer.valueOf(arrayList2.size()), Integer.valueOf(extractData.size())});
        Trie<DictEntry> createTrie = createTrie();
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            for (Span span : ((Sample) it.next()).getSpans()) {
                learn(createTrie, span.getText(), span.getLabel());
            }
        }
        ArrayList arrayList3 = new ArrayList();
        for (Sample sample2 : arrayList2) {
            for (TokenSpan tokenSpan : sample2.getTokens()) {
                Trie<DictEntry>.Node node = createTrie.getNode(sample2.getText(), tokenSpan.getBegin() - sample2.getBegin());
                int begin = tokenSpan.getBegin();
                int end = tokenSpan.getEnd();
                String str = NO_LABEL;
                if (node != null && sample2.hasTokenEndingAt(tokenSpan.getBegin() + node.level)) {
                    List<LabelStats> best = node.value.getBest(1);
                    if (!best.isEmpty()) {
                        str = best.get(0).getLabel();
                    }
                }
                Optional<Span> coveringSpan = sample2.getCoveringSpan(begin, end);
                String str2 = NO_LABEL;
                if (coveringSpan.isPresent()) {
                    str2 = coveringSpan.get().getLabel();
                }
                arrayList3.add(new LabelPair(str2, str));
            }
        }
        return (EvaluationResult) arrayList3.stream().collect(EvaluationResult.collector(size, size2, d, new String[]{NO_LABEL}));
    }

    private void learn(Trie<DictEntry> trie, String str, String str2) {
        String str3 = StringUtils.isBlank(str2) ? UNKNOWN_LABEL : str2;
        DictEntry dictEntry = trie.get(str);
        if (dictEntry == null) {
            dictEntry = new DictEntry(str);
            trie.put(str, dictEntry);
        }
        dictEntry.put(str3);
    }

    private List<Sample> extractData(List<CAS> list, String str, String str2) {
        long currentTimeMillis = System.currentTimeMillis();
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (CAS cas : list) {
            Type type = CasUtil.getType(cas, Sentence.class);
            Type type2 = CasUtil.getType(cas, Token.class);
            Type type3 = CasUtil.getType(cas, str);
            Feature featureByBaseName = type3.getFeatureByBaseName(str2);
            for (AnnotationFS annotationFS : CasUtil.select(cas, type)) {
                ArrayList arrayList2 = new ArrayList();
                for (AnnotationFS annotationFS2 : CasUtil.selectCovered(type3, annotationFS)) {
                    if (StringUtils.isNotEmpty(annotationFS2.getFeatureValueAsString(featureByBaseName))) {
                        arrayList2.add(new Span(annotationFS2.getBegin(), annotationFS2.getEnd(), annotationFS2.getCoveredText(), annotationFS2.getFeatureValueAsString(featureByBaseName), -1.0d));
                    }
                }
                arrayList.add(new Sample(i, annotationFS.getBegin(), annotationFS.getEnd(), annotationFS.getCoveredText(), CasUtil.selectCovered(type2, annotationFS), arrayList2));
            }
            i++;
        }
        this.log.trace("Extracting data took {}ms", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
        return arrayList;
    }
}
