package org.alfresco.repo.content.transform;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import org.alfresco.cmis.PropertyFilter;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.model.ContentModel;
import org.alfresco.repo.action.ParameterDefinitionImpl;
import org.alfresco.repo.action.executer.ActionExecuterAbstractBase;
import org.alfresco.repo.forms.processor.node.FormFieldConstants;
import org.alfresco.service.cmr.action.Action;
import org.alfresco.service.cmr.action.ParameterDefinition;
import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
import org.alfresco.service.cmr.repository.ContentService;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.NodeRef;
import org.alfresco.service.cmr.repository.NodeService;
import org.alfresco.service.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.ContainerExtractor;
import org.apache.tika.extractor.EmbeddedResourceHandler;
import org.apache.tika.extractor.ParserContainerExtractor;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;

/* loaded from: input_file:org/alfresco/repo/content/transform/TikaPoweredContainerExtractor.class */
public class TikaPoweredContainerExtractor {
    private static final Log logger = LogFactory.getLog(TikaPoweredContainerExtractor.class);
    private NodeService nodeService;
    private ContentService contentService;
    private TikaConfig config;
    private AutoDetectParser parser;
    private Detector detector;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/alfresco/repo/content/transform/TikaPoweredContainerExtractor$Extractor.class */
    public class Extractor implements EmbeddedResourceHandler {
        private List<NodeRef> extracted;
        private Set<MediaType> acceptTypes;
        private NodeRef folder;
        private int anonymousCount;

        private Extractor(NodeRef nodeRef, List<String> list) {
            this.anonymousCount = 0;
            this.folder = nodeRef;
            this.extracted = new ArrayList();
            if (list == null || list.size() <= 0) {
                return;
            }
            this.acceptTypes = new HashSet();
            Iterator<String> it = list.iterator();
            while (it.hasNext()) {
                this.acceptTypes.add(MediaType.parse(it.next()));
            }
        }

        public void handle(String str, MediaType mediaType, InputStream inputStream) {
            if (this.acceptTypes != null && !this.acceptTypes.contains(mediaType)) {
                TikaPoweredContainerExtractor.logger.info("Skipping embedded " + mediaType + " entry " + str);
                return;
            }
            if (str == null) {
                this.anonymousCount++;
                str = "embedded" + this.anonymousCount + FormFieldConstants.DOT_CHARACTER + mediaType.getSubtype();
            }
            TikaPoweredContainerExtractor.logger.info("Extracting embedded " + mediaType + " entry " + str);
            HashMap hashMap = new HashMap();
            hashMap.put(ContentModel.PROP_NAME, str);
            ContentWriter writer = TikaPoweredContainerExtractor.this.contentService.getWriter(TikaPoweredContainerExtractor.this.nodeService.createNode(this.folder, ContentModel.ASSOC_CONTAINS, QName.createQName(str), ContentModel.TYPE_CONTENT, hashMap).getChildRef(), ContentModel.PROP_CONTENT, true);
            writer.setMimetype(mediaType.toString());
            writer.putContent(inputStream);
        }
    }

    /* loaded from: input_file:org/alfresco/repo/content/transform/TikaPoweredContainerExtractor$ExtractorActionExecutor.class */
    public static class ExtractorActionExecutor extends ActionExecuterAbstractBase {
        public static final String NAME = "extractEmbeddedResources";
        public static final String PARAM_MIME_TYPES = "mime-types";
        private TikaPoweredContainerExtractor extractor;

        public void setTikaPoweredContainerExtractor(TikaPoweredContainerExtractor tikaPoweredContainerExtractor) {
            this.extractor = tikaPoweredContainerExtractor;
        }

        @Override // org.alfresco.repo.action.ParameterizedItemAbstractBase
        protected void addParameterDefinitions(List<ParameterDefinition> list) {
            list.add(new ParameterDefinitionImpl(PARAM_MIME_TYPES, DataTypeDefinition.TEXT, false, getParamDisplayLabel(PARAM_MIME_TYPES)));
        }

        @Override // org.alfresco.repo.action.executer.ActionExecuterAbstractBase
        protected void executeImpl(Action action, NodeRef nodeRef) {
            ArrayList arrayList = null;
            String str = (String) action.getParameterValue(PARAM_MIME_TYPES);
            if (str != null && str.length() > 0) {
                arrayList = new ArrayList();
                StringTokenizer stringTokenizer = new StringTokenizer(str, PropertyFilter.PROPERTY_NAME_TOKENS_DELIMITER);
                while (stringTokenizer.hasMoreTokens()) {
                    arrayList.add(stringTokenizer.nextToken().trim());
                }
            }
            this.extractor.extract(nodeRef, arrayList);
        }
    }

    public void setNodeService(NodeService nodeService) {
        this.nodeService = nodeService;
    }

    public void setContentService(ContentService contentService) {
        this.contentService = contentService;
    }

    public void setTikaConfig(TikaConfig tikaConfig) {
        this.config = tikaConfig;
        this.detector = new DefaultDetector(this.config.getMimeRepository());
        this.parser = new AutoDetectParser(this.detector);
    }

    public List<NodeRef> extract(NodeRef nodeRef, List<String> list) {
        NodeRef parentRef = this.nodeService.getPrimaryParent(nodeRef).getParentRef();
        TikaInputStream tikaInputStream = TikaInputStream.get(this.contentService.getReader(nodeRef, ContentModel.PROP_CONTENT).getContentInputStream());
        Extractor extractor = new Extractor(parentRef, list);
        ParserContainerExtractor parserContainerExtractor = new ParserContainerExtractor(this.parser, this.detector);
        try {
            logger.info("Beginning extraction of " + nodeRef.toString());
            parserContainerExtractor.extract(tikaInputStream, (ContainerExtractor) null, extractor);
            logger.info("Completed extraction of " + nodeRef.toString());
            try {
                tikaInputStream.close();
            } catch (IOException e) {
            }
            return extractor.extracted;
        } catch (TikaException e2) {
            throw new AlfrescoRuntimeException("Extraction Failed", e2);
        } catch (IOException e3) {
            throw new AlfrescoRuntimeException("Extraction Failed", e3);
        }
    }
}
