package de.tudarmstadt.ukp.clarin.webanno.api.dao;

import de.tudarmstadt.ukp.clarin.webanno.api.AnnotationSchemaService;
import de.tudarmstadt.ukp.clarin.webanno.api.CasStorageService;
import de.tudarmstadt.ukp.clarin.webanno.api.ImportExportService;
import de.tudarmstadt.ukp.clarin.webanno.api.RepositoryProperties;
import de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil;
import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport;
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature;
import de.tudarmstadt.ukp.clarin.webanno.model.Mode;
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument;
import de.tudarmstadt.ukp.clarin.webanno.model.TagSet;
import de.tudarmstadt.ukp.clarin.webanno.support.ZipUtils;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.ClassUtils;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.CasFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.factory.ConfigurationParameterFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.fit.util.CasUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Lazy;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.context.event.EventListener;
import org.springframework.core.annotation.AnnotationAwareOrderComparator;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;

@Component("importExportService")
/* loaded from: input_file:de/tudarmstadt/ukp/clarin/webanno/api/dao/ImportExportServiceImpl.class */
public class ImportExportServiceImpl implements ImportExportService {
    private final Logger log = LoggerFactory.getLogger(getClass());
    private final RepositoryProperties repositoryProperties;
    private final CasStorageService casStorageService;
    private final AnnotationSchemaService annotationService;
    private final List<FormatSupport> formatsProxy;
    private Map<String, FormatSupport> formats;

    public ImportExportServiceImpl(@Autowired RepositoryProperties repositoryProperties, @Autowired(required = false) @Lazy List<FormatSupport> list, @Autowired CasStorageService casStorageService, @Autowired AnnotationSchemaService annotationSchemaService) {
        this.repositoryProperties = repositoryProperties;
        this.casStorageService = casStorageService;
        this.annotationService = annotationSchemaService;
        this.formatsProxy = list;
    }

    @EventListener({ContextRefreshedEvent.class})
    public void onContextRefreshedEvent() {
        init();
    }

    void init() {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        if (this.formatsProxy != null) {
            ArrayList arrayList = new ArrayList(this.formatsProxy);
            AnnotationAwareOrderComparator.sort(arrayList);
            arrayList.forEach(formatSupport -> {
                linkedHashMap.put(formatSupport.getId(), formatSupport);
                this.log.info("Found format: {} ({}, {})", new Object[]{ClassUtils.getAbbreviatedName(formatSupport.getClass(), 20), formatSupport.getId(), readWriteMsg(formatSupport)});
            });
        }
        this.formats = Collections.unmodifiableMap(linkedHashMap);
    }

    private String readWriteMsg(FormatSupport formatSupport) {
        if (formatSupport.isReadable() && !formatSupport.isWritable()) {
            return "read only";
        }
        if (!formatSupport.isReadable() && formatSupport.isWritable()) {
            return "write only";
        }
        if (formatSupport.isReadable() && formatSupport.isWritable()) {
            return "read/write";
        }
        throw new IllegalStateException("Format [" + formatSupport.getId() + "] must be at least readable or writable.");
    }

    public List<FormatSupport> getFormats() {
        return Collections.unmodifiableList(new ArrayList(this.formats.values()));
    }

    @Transactional
    public File exportAnnotationDocument(SourceDocument sourceDocument, String str, FormatSupport formatSupport, String str2, Mode mode) throws UIMAException, IOException, ClassNotFoundException {
        return exportAnnotationDocument(sourceDocument, str, formatSupport, str2, mode, true);
    }

    @Transactional
    public File exportAnnotationDocument(SourceDocument sourceDocument, String str, FormatSupport formatSupport, String str2, Mode mode, boolean z) throws UIMAException, IOException, ClassNotFoundException {
        File annotationFolder = this.casStorageService.getAnnotationFolder(sourceDocument);
        String str3 = (mode.equals(Mode.ANNOTATION) || mode.equals(Mode.AUTOMATION) || mode.equals(Mode.CORRECTION)) ? str + ".ser" : "CURATION_USER.ser";
        File file = new File(annotationFolder, str3);
        if (!file.exists()) {
            throw new FileNotFoundException("CAS file [" + str3 + "] not found in [" + annotationFolder + "]");
        }
        CAS createCas = WebAnnoCasUtil.createCas();
        CasPersistenceUtils.readSerializedCas(createCas, file);
        File exportCasToFile = exportCasToFile(createCas, sourceDocument, str2, formatSupport, z);
        Project project = sourceDocument.getProject();
        MDC.MDCCloseable putCloseable = MDC.putCloseable("projectId", String.valueOf(project.getId()));
        try {
            this.log.info("Exported annotations [{}]({}) for user [{}] from project [{}]({}) using format [{}]", new Object[]{sourceDocument.getName(), sourceDocument.getId(), str, project.getName(), project.getId(), formatSupport.getId()});
            if (putCloseable != null) {
                putCloseable.close();
            }
            return exportCasToFile;
        } catch (Throwable th) {
            if (putCloseable != null) {
                try {
                    putCloseable.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public CAS importCasFromFile(File file, Project project, String str) throws UIMAException, IOException {
        CAS createCas = CasFactory.createCas(this.annotationService.getFullProjectTypeSystem(project));
        CollectionReaderDescription readerDescription = ((FormatSupport) getReadableFormatById(str).orElseThrow(() -> {
            return new IOException("No reader available for format [" + str + "]");
        })).getReaderDescription();
        ConfigurationParameterFactory.addConfigurationParameters(readerDescription, new Object[]{"sourceLocation", file.getParentFile().getAbsolutePath(), "patterns", new String[]{"[+]" + file.getName()}});
        CollectionReader createReader = CollectionReaderFactory.createReader(readerDescription, new Object[0]);
        if (!createReader.hasNext()) {
            throw new FileNotFoundException("Source file [" + file.getName() + "] not found in [" + file.getPath() + "]");
        }
        createReader.getNext(createCas);
        boolean exists = WebAnnoCasUtil.exists(createCas, CasUtil.getType(createCas, Token.class));
        if (!WebAnnoCasUtil.exists(createCas, CasUtil.getType(createCas, Sentence.class))) {
            splitSentences(createCas);
        }
        if (!exists) {
            tokenize(createCas);
        }
        if (WebAnnoCasUtil.exists(createCas, CasUtil.getType(createCas, Token.class)) && WebAnnoCasUtil.exists(createCas, CasUtil.getType(createCas, Sentence.class))) {
            return createCas;
        }
        throw new IOException("The document appears to be empty. Unable to detect any tokens or sentences. Empty documents cannot be imported.");
    }

    public static void splitSentences(CAS cas) {
        BreakIterator sentenceInstance = BreakIterator.getSentenceInstance(Locale.US);
        sentenceInstance.setText(cas.getDocumentText());
        int first = sentenceInstance.first();
        int next = sentenceInstance.next();
        while (true) {
            int i = next;
            if (i == -1) {
                return;
            }
            int[] iArr = {first, i};
            trim(cas.getDocumentText(), iArr);
            if (!isEmpty(iArr[0], iArr[1])) {
                cas.addFsToIndexes(WebAnnoCasUtil.createSentence(cas, iArr[0], iArr[1]));
            }
            first = i;
            next = sentenceInstance.next();
        }
    }

    public static void tokenize(CAS cas) {
        BreakIterator wordInstance = BreakIterator.getWordInstance(Locale.US);
        for (AnnotationFS annotationFS : WebAnnoCasUtil.selectSentences(cas)) {
            wordInstance.setText(annotationFS.getCoveredText());
            int first = wordInstance.first();
            int next = wordInstance.next();
            while (true) {
                int i = next;
                if (i != -1) {
                    int[] iArr = {first, i};
                    trim(annotationFS.getCoveredText(), iArr);
                    if (!isEmpty(iArr[0], iArr[1])) {
                        cas.addFsToIndexes(WebAnnoCasUtil.createToken(cas, iArr[0] + annotationFS.getBegin(), iArr[1] + annotationFS.getBegin()));
                    }
                    first = i;
                    next = wordInstance.next();
                }
            }
        }
    }

    public static void trim(String str, int[] iArr) {
        int i = iArr[0];
        int i2 = iArr[1] - 1;
        while (i2 > 0 && trimChar(str.charAt(i2))) {
            i2--;
        }
        int i3 = i2 + 1;
        while (i < i3 && trimChar(str.charAt(i))) {
            i++;
        }
        iArr[0] = i;
        iArr[1] = i3;
    }

    public static boolean isEmpty(int i, int i2) {
        return i >= i2;
    }

    public static boolean trimChar(char c) {
        switch (c) {
            case '\t':
                return true;
            case '\n':
                return true;
            case '\r':
                return true;
            case 8206:
                return true;
            case 8207:
                return true;
            case 8232:
                return true;
            case 8233:
                return true;
            default:
                return Character.isWhitespace(c);
        }
    }

    public File exportCasToFile(CAS cas, SourceDocument sourceDocument, String str, FormatSupport formatSupport, boolean z) throws IOException, UIMAException {
        File file;
        CAS prepareCasForExport = this.annotationService.prepareCasForExport(cas, sourceDocument);
        Project project = sourceDocument.getProject();
        File file2 = new File(this.repositoryProperties.getPath().getAbsolutePath() + "/project/" + project.getId() + "/document/" + sourceDocument.getId() + "/source");
        DocumentMetaData documentMetaData = DocumentMetaData.get(prepareCasForExport.getJCas());
        documentMetaData.setDocumentBaseUri(file2.toURI().toURL().toExternalForm());
        documentMetaData.setDocumentUri(new File(file2, str).toURI().toURL().toExternalForm());
        documentMetaData.setCollectionId(file2.toURI().toURL().toExternalForm());
        documentMetaData.setDocumentId(str);
        for (AnnotationFeature annotationFeature : this.annotationService.listAnnotationFeature(project)) {
            TagSet tagset = annotationFeature.getTagset();
            if (tagset != null && !annotationFeature.getLayer().getType().equals("chain")) {
                updateCasWithTagSet(prepareCasForExport, annotationFeature.getLayer().getName(), tagset.getName());
            }
        }
        File createTempFile = File.createTempFile("webanno", "export");
        try {
            createTempFile.delete();
            createTempFile.mkdirs();
            AnalysisEngineDescription writerDescription = formatSupport.getWriterDescription(sourceDocument.getProject(), prepareCasForExport);
            ConfigurationParameterFactory.addConfigurationParameters(writerDescription, new Object[]{"useDocumentId", true, "escapeFilename", false, "targetLocation", createTempFile, "stripExtension", Boolean.valueOf(z)});
            SimplePipeline.runPipeline(prepareCasForExport, new AnalysisEngineDescription[]{writerDescription});
            if (createTempFile.listFiles().length > 1) {
                file = new File(createTempFile.getAbsolutePath() + ".zip");
                try {
                    ZipUtils.zipFolder(createTempFile, file);
                } catch (Exception e) {
                    MDC.MDCCloseable putCloseable = MDC.putCloseable("projectId", String.valueOf(project.getId()));
                    try {
                        this.log.info("Unable to create zip File");
                        if (putCloseable != null) {
                            putCloseable.close();
                        }
                    } finally {
                    }
                }
            } else {
                file = new File(createTempFile.getParent(), createTempFile.listFiles()[0].getName());
                FileUtils.copyFile(createTempFile.listFiles()[0], file);
            }
            return file;
        } finally {
            if (createTempFile != null) {
                FileUtils.forceDelete(createTempFile);
            }
        }
    }

    private static void updateCasWithTagSet(CAS cas, String str, String str2) {
        Type type = CasUtil.getType(cas, TagsetDescription.class);
        Feature featureByBaseName = type.getFeatureByBaseName("layer");
        Feature featureByBaseName2 = type.getFeatureByBaseName("name");
        boolean z = false;
        Iterator it = CasUtil.select(cas, type).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            FeatureStructure featureStructure = (FeatureStructure) it.next();
            String stringValue = featureStructure.getStringValue(featureByBaseName);
            String stringValue2 = featureStructure.getStringValue(featureByBaseName2);
            if (stringValue.equals(str)) {
                if (!str2.equals(stringValue2)) {
                    featureStructure.setStringValue(featureByBaseName2, str2);
                    cas.addFsToIndexes(featureStructure);
                }
                z = true;
            }
        }
        if (z) {
            return;
        }
        FeatureStructure createFS = cas.createFS(type);
        createFS.setStringValue(featureByBaseName, str);
        createFS.setStringValue(featureByBaseName2, str2);
        cas.addFsToIndexes(createFS);
    }
}
