package dkpro.similarity.experiments.sts2013;

import de.tudarmstadt.ukp.dkpro.core.api.resources.DkproContext;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Document;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.gate.GateLemmatizer;
import de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger;
import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter;
import dkpro.similarity.algorithms.lexical.string.LongestCommonSubsequenceComparator;
import dkpro.similarity.algorithms.lexical.string.LongestCommonSubsequenceNormComparator;
import dkpro.similarity.algorithms.lexical.string.LongestCommonSubstringComparator;
import dkpro.similarity.experiments.sts2013.Pipeline;
import dkpro.similarity.experiments.sts2013.util.CharacterNGramIdfValuesGenerator;
import dkpro.similarity.experiments.sts2013.util.StopwordFilter;
import dkpro.similarity.experiments.sts2013.util.WordIdfValuesGenerator;
import dkpro.similarity.ml.FeatureConfig;
import dkpro.similarity.ml.io.SimilarityScoreWriter;
import dkpro.similarity.uima.annotator.SimilarityScorer;
import dkpro.similarity.uima.io.CombinationReader;
import dkpro.similarity.uima.io.SemEvalCorpusReader;
import dkpro.similarity.uima.resource.SimpleTextSimilarityResource;
import dkpro.similarity.uima.resource.lexical.ngrams.CharacterNGramResource;
import dkpro.similarity.uima.resource.lexical.ngrams.WordNGramContainmentResource;
import dkpro.similarity.uima.resource.lexical.ngrams.WordNGramJaccardResource;
import dkpro.similarity.uima.resource.lexical.string.GreedyStringTilingMeasureResource;
import dkpro.similarity.uima.resource.lexsub.TWSISubstituteWrapperResource;
import dkpro.similarity.uima.resource.lsr.ResnikRelatednessResource;
import dkpro.similarity.uima.resource.lsr.aggregate.MCS06AggregateResource;
import dkpro.similarity.uima.resource.vsm.VectorIndexSourceRelatednessResource;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.io.FileUtils;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;

/* loaded from: input_file:dkpro/similarity/experiments/sts2013/FeatureGeneration.class */
public class FeatureGeneration {
    public static void generateFeatures(Pipeline.Dataset dataset, Pipeline.Mode mode) throws Exception {
        ArrayList<FeatureConfig> arrayList = new ArrayList();
        for (int i : new int[]{2, 3, 4}) {
            CharacterNGramIdfValuesGenerator.computeIdfScores(mode, dataset, i);
        }
        WordIdfValuesGenerator.computeIdfScores(mode, dataset);
        arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(GreedyStringTilingMeasureResource.class, new Object[]{"MinMatchLength", "3"}), Document.class.getName(), false, "string", "GreedyStringTiling_3"));
        arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(SimpleTextSimilarityResource.class, new Object[]{"Mode", "text", "TextSimilarityMeasure", LongestCommonSubsequenceComparator.class.getName()}), Document.class.getName(), false, "string", "LongestCommonSubsequenceComparator"));
        arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(SimpleTextSimilarityResource.class, new Object[]{"Mode", "text", "TextSimilarityMeasure", LongestCommonSubsequenceNormComparator.class.getName()}), Document.class.getName(), false, "string", "LongestCommonSubsequenceNormComparator"));
        arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(SimpleTextSimilarityResource.class, new Object[]{"Mode", "text", "TextSimilarityMeasure", LongestCommonSubstringComparator.class.getName()}), Document.class.getName(), false, "string", "LongestCommonSubstringComparator"));
        for (int i2 : new int[]{2, 3, 4}) {
            arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(CharacterNGramResource.class, new Object[]{"N", new Integer(i2).toString(), "IdfValuesFile", "target/utils/character-ngrams-idf/" + mode.toString().toLowerCase() + "/" + i2 + "/" + dataset.toString() + ".txt"}), Document.class.getName(), false, "n-grams", "CharacterNGramMeasure_" + i2));
        }
        for (int i3 : new int[]{1, 2}) {
            arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(WordNGramContainmentResource.class, new Object[]{"N", new Integer(i3).toString()}), Token.class.getName(), true, "n-grams", "WordNGramContainmentMeasure_" + i3 + "_stopword-filtered"));
        }
        for (int i4 : new int[]{1, 3, 4}) {
            arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(WordNGramJaccardResource.class, new Object[]{"N", new Integer(i4).toString()}), Token.class.getName(), false, "n-grams", "WordNGramJaccardMeasure_" + i4));
        }
        for (int i5 : new int[]{2, 4}) {
            arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(WordNGramJaccardResource.class, new Object[]{"N", new Integer(i5).toString()}), Token.class.getName(), true, "n-grams", "WordNGramJaccardMeasure_" + i5 + "_stopword-filtered"));
        }
        arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(MCS06AggregateResource.class, new Object[]{"TermSimilarityMeasure", ExternalResourceFactory.createExternalResourceDescription(ResnikRelatednessResource.class, new Object[]{"LsrResourceName", "wordnet", "LSRResourceLanguage", "en"}), "IdfValuesFile", "target/utils/word-idf/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".txt"}), Lemma.class.getName() + "/value", false, "word-sim", "MCS06_Resnik_WordNet"));
        arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(TWSISubstituteWrapperResource.class, new Object[]{"TextSimilarityMeasure", ExternalResourceFactory.createExternalResourceDescription(MCS06AggregateResource.class, new Object[]{"TermSimilarityMeasure", ExternalResourceFactory.createExternalResourceDescription(ResnikRelatednessResource.class, new Object[]{"LsrResourceName", "wordnet", "LSRResourceLanguage", "en"}), "IdfValuesFile", "target/utils/word-idf/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".txt"})}), "word-sim", "TWSI_MCS06_Resnik_WordNet"));
        arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(VectorIndexSourceRelatednessResource.class, new Object[]{"modelLocation", DkproContext.getContext().getWorkspace().getAbsolutePath() + "/ESA/VectorIndexes/wordnet_eng_lem_nc_c"}), Lemma.class.getName() + "/value", false, "esa", "ESA_WordNet"));
        arrayList.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(VectorIndexSourceRelatednessResource.class, new Object[]{"modelLocation", DkproContext.getContext().getWorkspace().getAbsolutePath() + "/ESA/VectorIndexes/wiktionary_en"}), Lemma.class.getName() + "/value", false, "esa", "ESA_Wiktionary"));
        for (FeatureConfig featureConfig : arrayList) {
            System.out.println(featureConfig.getMeasureName());
            File file = new File("target/features/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "/" + featureConfig.getTargetPath() + "/" + featureConfig.getMeasureName() + ".txt");
            if (file.exists()) {
                System.out.println(" - skipped, feature already generated");
            } else {
                CollectionReader createReader = CollectionReaderFactory.createReader(SemEvalCorpusReader.class, new Object[]{"InputFile", "classpath:/datasets/sts-2013/" + mode.toString().toLowerCase() + "/STS.input." + dataset.toString() + ".txt", "CombinationStrategy", CombinationReader.CombinationStrategy.SAME_ROW_ONLY.toString()});
                AnalysisEngineDescription createEngineDescription = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class, new Object[0]);
                AggregateBuilder aggregateBuilder = new AggregateBuilder();
                aggregateBuilder.add(createEngineDescription, new String[]{"_InitialView", "View1"});
                aggregateBuilder.add(createEngineDescription, new String[]{"_InitialView", "View2"});
                AnalysisEngine createAggregate = aggregateBuilder.createAggregate();
                AnalysisEngineDescription createEngineDescription2 = AnalysisEngineFactory.createEngineDescription(OpenNlpPosTagger.class, new Object[]{"language", "en"});
                AggregateBuilder aggregateBuilder2 = new AggregateBuilder();
                aggregateBuilder2.add(createEngineDescription2, new String[]{"_InitialView", "View1"});
                aggregateBuilder2.add(createEngineDescription2, new String[]{"_InitialView", "View2"});
                AnalysisEngine createAggregate2 = aggregateBuilder2.createAggregate();
                AnalysisEngineDescription createEngineDescription3 = AnalysisEngineFactory.createEngineDescription(GateLemmatizer.class, new Object[0]);
                AggregateBuilder aggregateBuilder3 = new AggregateBuilder();
                aggregateBuilder3.add(createEngineDescription3, new String[]{"_InitialView", "View1"});
                aggregateBuilder3.add(createEngineDescription3, new String[]{"_InitialView", "View2"});
                AnalysisEngine createAggregate3 = aggregateBuilder3.createAggregate();
                AnalysisEngineDescription createEngineDescription4 = AnalysisEngineFactory.createEngineDescription(StopwordFilter.class, new Object[]{StopwordFilter.PARAM_STOPWORD_LIST, "classpath:/stopwords/stopwords_english_punctuation.txt", StopwordFilter.PARAM_ANNOTATION_TYPE_NAME, Lemma.class.getName(), StopwordFilter.PARAM_STRING_REPRESENTATION_METHOD_NAME, "getValue"});
                AggregateBuilder aggregateBuilder4 = new AggregateBuilder();
                aggregateBuilder4.add(createEngineDescription4, new String[]{"_InitialView", "View1"});
                aggregateBuilder4.add(createEngineDescription4, new String[]{"_InitialView", "View2"});
                AnalysisEngine createAggregate4 = aggregateBuilder4.createAggregate();
                AnalysisEngine createEngine = AnalysisEngineFactory.createEngine(SimilarityScorer.class, new Object[]{"NameView1", "View1", "NameView2", "View2", "SegmentFeaturePath", featureConfig.getSegmentFeaturePath(), "TextRelatednessResource", featureConfig.getResource()});
                AnalysisEngine createEngine2 = AnalysisEngineFactory.createEngine(SimilarityScoreWriter.class, new Object[]{"OutputFile", file.getAbsolutePath(), "OutputScoresOnly", true});
                if (featureConfig.filterStopwords()) {
                    SimplePipeline.runPipeline(createReader, new AnalysisEngine[]{createAggregate, createAggregate2, createAggregate3, createAggregate4, createEngine, createEngine2});
                } else {
                    SimplePipeline.runPipeline(createReader, new AnalysisEngine[]{createAggregate, createAggregate2, createAggregate3, createEngine, createEngine2});
                }
                System.out.println(" - done");
            }
        }
        System.out.println("Successful.");
    }

    public static void combineFeatureSets(Pipeline.Mode mode, Pipeline.Dataset dataset, Pipeline.Dataset... datasetArr) throws IOException {
        String dataset2 = dataset.toString();
        System.out.println("Combining feature sets");
        File file = new File("target/features/" + mode.toString().toLowerCase() + "/" + dataset.toString());
        if (file.exists()) {
            System.out.println(" - cleaned target directory");
            FileUtils.deleteDirectory(file);
        }
        for (File file2 : FileUtils.listFiles(new File("target/features/" + mode.toString().toLowerCase() + "/" + datasetArr[0].toString()), new String[]{"txt"}, true)) {
            if (!file2.isDirectory()) {
                boolean z = true;
                for (int i = 1; i < datasetArr.length; i++) {
                    if (!new File(file2.getAbsolutePath().replace(datasetArr[0].toString(), datasetArr[i].toString())).exists()) {
                        z = false;
                    }
                }
                if (z) {
                    System.out.println(" - processing " + file2.getName());
                    String readFileToString = FileUtils.readFileToString(file2);
                    for (int i2 = 1; i2 < datasetArr.length; i2++) {
                        readFileToString = readFileToString + FileUtils.readFileToString(new File(file2.getAbsolutePath().replaceAll(datasetArr[0].toString(), datasetArr[i2].toString())));
                    }
                    FileUtils.writeStringToFile(new File(file2.getAbsolutePath().replace(datasetArr[0].toString(), dataset2)), readFileToString);
                }
            }
        }
        System.out.println(" - done");
    }
}
