package dkpro.similarity.experiments.rte.util;

import dkpro.similarity.algorithms.lexical.ngrams.CharacterNGramMeasure;
import dkpro.similarity.experiments.rte.Pipeline;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:dkpro/similarity/experiments/rte/util/CharacterNGramIdfValuesGenerator.class */
public class CharacterNGramIdfValuesGenerator {
    static final String LF = System.getProperty("line.separator");

    public static void computeIdfScores(Pipeline.Dataset dataset, int i) throws Exception {
        System.out.println("Computing character " + i + "-grams");
        File file = new File("target/utils/character-ngrams-idf/" + i + "/" + RteUtil.getCommonDatasetName(dataset) + ".txt");
        if (file.exists()) {
            System.out.println(" - skipping, already exists");
            return;
        }
        Collection listFiles = FileUtils.listFiles(new File("target/utils/plaintexts/" + RteUtil.getCommonDatasetName(dataset)), new String[]{"txt"}, false);
        HashMap hashMap = new HashMap();
        CharacterNGramMeasure characterNGramMeasure = new CharacterNGramMeasure(i, new HashMap());
        ArrayList arrayList = new ArrayList();
        Iterator it = listFiles.iterator();
        while (it.hasNext()) {
            arrayList.add(characterNGramMeasure.getNGrams(FileUtils.readFileToString((File) it.next())));
        }
        HashSet<String> hashSet = new HashSet();
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            hashSet.addAll((Set) it2.next());
        }
        for (String str : hashSet) {
            double d = 0.0d;
            Iterator it3 = arrayList.iterator();
            while (it3.hasNext()) {
                if (((Set) it3.next()).contains(str)) {
                    d += 1.0d;
                }
            }
            hashMap.put(str, Double.valueOf(d));
        }
        for (String str2 : hashMap.keySet()) {
            hashMap.put(str2, Double.valueOf(Math.log10(listFiles.size() / ((Double) hashMap.get(str2)).doubleValue())));
        }
        StringBuilder sb = new StringBuilder();
        for (String str3 : hashMap.keySet()) {
            sb.append(str3 + "\t" + hashMap.get(str3) + LF);
        }
        FileUtils.writeStringToFile(file, sb.toString());
        System.out.println(" - done");
    }
}
