package org.dkpro.tc.core.task.uima;

import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.LogFactory;
import org.apache.uima.UIMAException;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.tc.api.features.Feature;
import org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase;
import org.dkpro.tc.api.features.Instance;
import org.dkpro.tc.api.type.JCasId;
import org.dkpro.tc.api.type.TextClassificationOutcome;
import org.dkpro.tc.api.type.TextClassificationSequence;
import org.dkpro.tc.api.type.TextClassificationTarget;
import org.dkpro.tc.core.Constants;
import org.dkpro.tc.core.io.DataWriter;

/* loaded from: input_file:org/dkpro/tc/core/task/uima/ExtractFeaturesConnector.class */
public class ExtractFeaturesConnector extends JCasAnnotator_ImplBase implements ConnectorConstants {
    public static final String PARAM_OUTPUT_DIRECTORY = "outputDirectory";

    @ConfigurationParameter(name = "outputDirectory", mandatory = true)
    private File outputDirectory;
    public static final String PARAM_ADD_INSTANCE_ID = "addInstanceId";

    @ConfigurationParameter(name = PARAM_ADD_INSTANCE_ID, mandatory = true, defaultValue = {"true"})
    private boolean addInstanceId;

    @ConfigurationParameter(name = "featureFilters", mandatory = true)
    private String[] featureFilters;

    @ConfigurationParameter(name = ConnectorConstants.PARAM_OUTCOMES, mandatory = true)
    private String[] outcomes;

    @ConfigurationParameter(name = "useSparseFeatures", mandatory = true)
    private boolean useSparseFeatures;

    @ConfigurationParameter(name = ConnectorConstants.PARAM_DATA_WRITER_CLASS, mandatory = true)
    private String dataWriterClass;

    @ConfigurationParameter(name = "learningMode", mandatory = true, defaultValue = {Constants.LM_SINGLE_LABEL})
    private String learningMode;

    @ConfigurationParameter(name = "featureMode", mandatory = true, defaultValue = {Constants.FM_DOCUMENT})
    private String featureMode;

    @ConfigurationParameter(name = ConnectorConstants.PARAM_APPLY_WEIGHTING, mandatory = true, defaultValue = {"false"})
    private boolean applyWeighting;

    @ConfigurationParameter(name = ConnectorConstants.PARAM_IS_TESTING, mandatory = true)
    private boolean isTesting;

    @ConfigurationParameter(name = ConnectorConstants.PARAM_REQUIRED_TYPES, mandatory = false)
    private Set<String> requiredTypes;

    @ConfigurationParameter(name = "enforceMatchingFeatures", mandatory = false)
    private boolean enforceMatchingFeatures;

    @ExternalResource(key = ConnectorConstants.PARAM_FEATURE_EXTRACTORS, mandatory = true)
    protected FeatureExtractorResource_ImplBase[] featureExtractors;
    DataWriter dsw;
    boolean writeFeatureNames = true;
    private InstanceExtractor instanceExtractor;
    private FeatureMetaData featureMeta;
    private DocumentMetaLogger documentMetaLogger;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            this.documentMetaLogger = new DocumentMetaLogger(this.outputDirectory);
            this.instanceExtractor = new InstanceExtractor(this.featureMode, this.featureExtractors, this.addInstanceId);
            this.featureMeta = new FeatureMetaData();
            if (this.isTesting) {
                this.featureMeta.setFeatureNames(new TreeSet<>(FileUtils.readLines(new File(this.outputDirectory, "featureNames.txt"), StandardCharsets.UTF_8)));
            }
            if (this.featureExtractors.length == 0) {
                LogFactory.getLog(getClass()).error("No feature extractors have been defined.");
                throw new ResourceInitializationException();
            }
            this.dsw = (DataWriter) Class.forName(this.dataWriterClass).newInstance();
            this.dsw.init(this.outputDirectory, this.useSparseFeatures, this.learningMode, this.featureMode, this.applyWeighting, this.outcomes);
        } catch (Exception e) {
            throw new ResourceInitializationException(e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        checkRequiredTypes(jCas);
        this.documentMetaLogger.writeMeta(jCas);
        if (!this.featureMeta.didCollect()) {
            getFeatureNames(jCas);
        }
        LogFactory.getLog(getClass()).debug("--- feature extraction for CAS with id [" + JCasUtil.selectSingle(jCas, JCasId.class).getId() + "] ---");
        List<Instance> instances = this.instanceExtractor.getInstances(jCas, this.useSparseFeatures);
        LogFactory.getLog(getClass()).trace("--- Extracted [" + instances.size() + " feature instances] ---");
        if (this.enforceMatchingFeatures) {
            instances = enforceMatchingFeatures(instances);
        }
        if (isFilteringRequestedOrNoStreamingAvailable()) {
            this.dsw.writeGenericFormat(instances);
        } else {
            this.dsw.writeClassifierFormat(instances);
        }
    }

    private void checkRequiredTypes(JCas jCas) throws AnalysisEngineProcessException {
        if (this.requiredTypes == null || this.requiredTypes.isEmpty()) {
            return;
        }
        try {
            Iterator<String> it = this.requiredTypes.iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\|");
                String str = split[0];
                for (int i = 1; i < split.length; i++) {
                    String str2 = split[i];
                    if (!JCasUtil.exists(jCas, Class.forName(str2))) {
                        throw new IllegalStateException("The feature extractor [" + str + "] requires the annotation of the type [" + str2 + "] which was not found, did you forget to configure a tokenizer, PoS tagger, etc. in your pre-processing setup?");
                    }
                }
            }
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    private boolean isFilteringRequestedOrNoStreamingAvailable() {
        return this.featureFilters.length > 0 || !this.dsw.canStream();
    }

    private void getFeatureNames(JCas jCas) throws AnalysisEngineProcessException {
        LogFactory.getLog(getClass()).debug("--- collecting feature names ---");
        try {
            this.featureMeta.collectMetaData(this.instanceExtractor.getInstances(buildMockCAS((DocumentMetaData) JCasUtil.selectSingle(jCas, DocumentMetaData.class)), false));
            this.featureMeta.writeMetaData(this.outputDirectory);
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    private JCas buildMockCAS(DocumentMetaData documentMetaData) throws UIMAException {
        JCas createJCas = JCasFactory.createJCas();
        DocumentMetaData documentMetaData2 = new DocumentMetaData(createJCas);
        documentMetaData2.setLanguage(documentMetaData.getLanguage());
        documentMetaData2.setDocumentId(System.currentTimeMillis() + "");
        documentMetaData2.addToIndexes();
        for (JCas jCas : new JCas[]{createJCas, createJCas.createView(Constants.PART_ONE), createJCas.createView(Constants.PART_TWO)}) {
            jCas.setDocumentText("dummyText");
            new TextClassificationSequence(jCas, 0, "dummyText".length()).addToIndexes();
            new TextClassificationTarget(jCas, 0, "dummyText".length()).addToIndexes();
            new TextClassificationOutcome(jCas, 0, "dummyText".length()).addToIndexes();
            JCasId jCasId = new JCasId(jCas);
            jCasId.setId(Integer.MIN_VALUE);
            jCasId.addToIndexes();
        }
        return createJCas;
    }

    private List<Instance> enforceMatchingFeatures(List<Instance> list) {
        if (!this.isTesting) {
            return list;
        }
        ArrayList arrayList = new ArrayList();
        for (Instance instance : list) {
            ArrayList arrayList2 = new ArrayList();
            for (Feature feature : instance.getFeatures()) {
                if (this.featureMeta.getFeatureNames().contains(feature.getName())) {
                    arrayList2.add(feature);
                }
            }
            instance.setFeatures(arrayList2);
            arrayList.add(instance);
        }
        return arrayList;
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        super.collectionProcessComplete();
        try {
            if (this.featureFilters.length > 0) {
                applyFilter(new File(this.outputDirectory, this.dsw.getGenericFileName()));
            }
            if (this.featureFilters.length > 0 || !this.dsw.canStream()) {
                this.dsw.transformFromGeneric();
            }
            this.dsw.close();
            this.documentMetaLogger.close();
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    private void applyFilter(File file) throws AnalysisEngineProcessException {
        new InstanceFilter(this.featureFilters, this.isTesting).filter(file);
    }
}
