package org.alfresco.repo.content.metadata;

import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import org.alfresco.api.AlfrescoPublicApi;
import org.alfresco.repo.content.filestore.FileContentReader;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.repository.datatype.TypeConversionException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.embedder.Embedder;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.DateTimeFormatterBuilder;
import org.joda.time.format.DateTimeParser;
import org.joda.time.format.DateTimePrinter;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

@AlfrescoPublicApi
/* loaded from: input_file:org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter.class */
public abstract class TikaPoweredMetadataExtracter extends AbstractMappingMetadataExtracter implements MetadataEmbedder {
    protected static Log logger = LogFactory.getLog(TikaPoweredMetadataExtracter.class);
    protected static final String KEY_AUTHOR = "author";
    protected static final String KEY_TITLE = "title";
    protected static final String KEY_SUBJECT = "subject";
    protected static final String KEY_CREATED = "created";
    protected static final String KEY_DESCRIPTION = "description";
    protected static final String KEY_COMMENTS = "comments";
    protected static final String KEY_TAGS = "dc:subject";
    private DateTimeFormatter tikaUTCDateFormater;
    private DateTimeFormatter tikaDateFormater;
    protected DocumentSelector documentSelector;
    private String extractorContext;
    private String metadataSeparator;

    /* loaded from: input_file:org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter$HeadContentHandler.class */
    protected static class HeadContentHandler extends ContentHandlerDecorator {
        private static final XPathParser PARSER = new XPathParser("xhtml", "http://www.w3.org/1999/xhtml");
        private static final Matcher MATCHER = PARSER.parse("/xhtml:html/xhtml:head/descendant:node()");

        protected HeadContentHandler(ContentHandler contentHandler) {
            super(new MatchingContentHandler(contentHandler, MATCHER));
        }
    }

    /* loaded from: input_file:org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter$MapCaptureContentHandler.class */
    protected static class MapCaptureContentHandler implements ContentHandler {
        protected Map<String, String> tags = new HashMap();
        private StringBuffer text;

        protected MapCaptureContentHandler() {
        }

        @Override // org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) {
            if (this.text != null) {
                this.text.append(cArr, i, i2);
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) {
            if (this.text != null && this.text.length() > 0) {
                this.tags.put(str3, this.text.toString());
            }
            this.text = null;
        }

        @Override // org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) {
            for (int i = 0; i < attributes.getLength(); i++) {
                this.tags.put(attributes.getQName(i), attributes.getValue(i));
            }
            this.text = new StringBuffer();
        }

        @Override // org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endPrefixMapping(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void processingInstruction(String str, String str2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void setDocumentLocator(Locator locator) {
        }

        @Override // org.xml.sax.ContentHandler
        public void skippedEntity(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startPrefixMapping(String str, String str2) throws SAXException {
        }
    }

    /* loaded from: input_file:org/alfresco/repo/content/metadata/TikaPoweredMetadataExtracter$NullContentHandler.class */
    protected static class NullContentHandler implements ContentHandler {
        protected NullContentHandler() {
        }

        @Override // org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endPrefixMapping(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void processingInstruction(String str, String str2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void setDocumentLocator(Locator locator) {
        }

        @Override // org.xml.sax.ContentHandler
        public void skippedEntity(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startPrefixMapping(String str, String str2) throws SAXException {
        }
    }

    public String getMetadataSeparator() {
        return this.metadataSeparator;
    }

    public void setMetadataSeparator(String str) {
        this.metadataSeparator = str;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static ArrayList<String> buildSupportedMimetypes(String[] strArr, Parser... parserArr) {
        ArrayList<String> arrayList = new ArrayList<>();
        for (String str : strArr) {
            if (!arrayList.contains(str)) {
                arrayList.add(str);
            }
        }
        if (parserArr != null) {
            for (Parser parser : parserArr) {
                Iterator it = parser.getSupportedTypes(new ParseContext()).iterator();
                while (it.hasNext()) {
                    String mediaType = ((MediaType) it.next()).toString();
                    if (!arrayList.contains(mediaType)) {
                        arrayList.add(mediaType);
                    }
                }
            }
        }
        return arrayList;
    }

    public TikaPoweredMetadataExtracter(String str, ArrayList<String> arrayList) {
        this(str, new HashSet(arrayList), null);
    }

    public TikaPoweredMetadataExtracter(ArrayList<String> arrayList) {
        this(null, new HashSet(arrayList), null);
    }

    public TikaPoweredMetadataExtracter(ArrayList<String> arrayList, ArrayList<String> arrayList2) {
        this(null, new HashSet(arrayList), new HashSet(arrayList2));
    }

    public TikaPoweredMetadataExtracter(HashSet<String> hashSet) {
        this(null, hashSet, null);
    }

    public TikaPoweredMetadataExtracter(HashSet<String> hashSet, HashSet<String> hashSet2) {
        this(null, hashSet, hashSet2);
    }

    public TikaPoweredMetadataExtracter(String str, HashSet<String> hashSet, HashSet<String> hashSet2) {
        super(hashSet, hashSet2);
        this.extractorContext = null;
        this.metadataSeparator = ",";
        this.extractorContext = str;
        DateTimeParser[] dateTimeParserArr = {DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").getParser(), DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ssZ").getParser()};
        DateTimeParser[] dateTimeParserArr2 = {DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss").getParser(), DateTimeFormat.forPattern("yyyy-MM-dd").getParser(), DateTimeFormat.forPattern("yyyy/MM/dd HH:mm:ss").getParser(), DateTimeFormat.forPattern("yyyy/MM/dd").getParser(), DateTimeFormat.forPattern("EEE MMM dd hh:mm:ss zzz yyyy").getParser()};
        this.tikaUTCDateFormater = new DateTimeFormatterBuilder().append((DateTimePrinter) null, dateTimeParserArr).toFormatter().withZone(DateTimeZone.UTC);
        this.tikaDateFormater = new DateTimeFormatterBuilder().append((DateTimePrinter) null, dateTimeParserArr2).toFormatter();
    }

    protected String getExtractorContext() {
        return this.extractorContext;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter
    public Date makeDate(String str) {
        try {
            return this.tikaUTCDateFormater.parseDateTime(str).toDate();
        } catch (IllegalArgumentException unused) {
            try {
                return this.tikaUTCDateFormater.withLocale(Locale.US).parseDateTime(str).toDate();
            } catch (IllegalArgumentException unused2) {
                try {
                    return this.tikaDateFormater.parseDateTime(str).toDate();
                } catch (IllegalArgumentException unused3) {
                    try {
                        return this.tikaDateFormater.withLocale(Locale.US).parseDateTime(str).toDate();
                    } catch (IllegalArgumentException unused4) {
                        return super.makeDate(str);
                    }
                }
            }
        }
    }

    protected abstract Parser getParser();

    protected Embedder getEmbedder() {
        return null;
    }

    protected boolean needHeaderContents() {
        return false;
    }

    protected Map<String, Serializable> extractSpecific(Metadata metadata, Map<String, Serializable> map, Map<String, String> map2) {
        return map;
    }

    protected InputStream getInputStream(ContentReader contentReader) throws IOException {
        if (contentReader instanceof FileContentReader) {
            return TikaInputStream.get(((FileContentReader) contentReader).getFile());
        }
        InputStream contentInputStream = contentReader.getContentInputStream();
        if (!"image/jpeg".equals(contentReader.getMimetype()) && !"image/tiff".equals(contentReader.getMimetype())) {
            return contentInputStream;
        }
        TikaInputStream tikaInputStream = TikaInputStream.get(contentInputStream, new TemporaryResources());
        tikaInputStream.getFile();
        return tikaInputStream;
    }

    public void setDocumentSelector(DocumentSelector documentSelector) {
        this.documentSelector = documentSelector;
    }

    protected DocumentSelector getDocumentSelector(Metadata metadata, String str) {
        return this.documentSelector;
    }

    protected ParseContext buildParseContext(Metadata metadata, String str) {
        ParseContext parseContext = new ParseContext();
        DocumentSelector documentSelector = getDocumentSelector(metadata, str);
        if (documentSelector != null) {
            parseContext.set(DocumentSelector.class, documentSelector);
        }
        return parseContext;
    }

    @Override // org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter
    protected Map<String, Serializable> extractRaw(ContentReader contentReader) throws Throwable {
        ContentHandlerDecorator nullContentHandler;
        Map<String, Serializable> newRawMap = newRawMap();
        InputStream inputStream = null;
        try {
            inputStream = getInputStream(contentReader);
            Parser parser = getParser();
            Metadata metadata = new Metadata();
            metadata.add("Content-Type", contentReader.getMimetype());
            ParseContext buildParseContext = buildParseContext(metadata, contentReader.getMimetype());
            Map<String, String> map = null;
            if (needHeaderContents()) {
                MapCaptureContentHandler mapCaptureContentHandler = new MapCaptureContentHandler();
                map = mapCaptureContentHandler.tags;
                nullContentHandler = new HeadContentHandler(mapCaptureContentHandler);
            } else {
                nullContentHandler = new NullContentHandler();
            }
            parser.parse(inputStream, nullContentHandler, metadata, buildParseContext);
            for (String str : metadata.names()) {
                putRawValue(str, getMetadataValue(metadata, str), newRawMap);
            }
            putRawValue(KEY_AUTHOR, getMetadataValue(metadata, "Author"), newRawMap);
            putRawValue("title", getMetadataValue(metadata, "title"), newRawMap);
            putRawValue(KEY_COMMENTS, getMetadataValue(metadata, "Comments"), newRawMap);
            putRawValue(KEY_TAGS, getMetadataValues(metadata, KEY_TAGS), newRawMap);
            String metadataValue = getMetadataValue(metadata, "subject");
            String metadataValue2 = getMetadataValue(metadata, "description");
            if (metadataValue != null && metadataValue2 != null) {
                putRawValue("description", metadataValue2, newRawMap);
                putRawValue("subject", metadataValue, newRawMap);
            } else if (metadataValue != null) {
                putRawValue("description", metadataValue, newRawMap);
                putRawValue("subject", metadataValue, newRawMap);
            } else if (metadataValue2 != null) {
                putRawValue("description", metadataValue2, newRawMap);
                putRawValue("subject", metadataValue2, newRawMap);
            }
            if (metadata.get(MSOffice.CREATION_DATE) != null) {
                putRawValue("created", metadata.get(MSOffice.CREATION_DATE), newRawMap);
            } else if (metadata.get(Metadata.DATE) != null) {
                putRawValue("created", metadata.get(Metadata.DATE), newRawMap);
            }
            Map<String, Serializable> extractSpecific = extractSpecific(metadata, newRawMap, map);
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException unused) {
                }
            }
            return extractSpecific;
        } catch (Throwable th) {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException unused2) {
                }
            }
            throw th;
        }
    }

    @Override // org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter
    protected void embedInternal(Map<String, Serializable> map, ContentReader contentReader, ContentWriter contentWriter) throws Throwable {
        Embedder embedder = getEmbedder();
        if (embedder == null) {
            return;
        }
        Metadata metadata = new Metadata();
        for (String str : map.keySet()) {
            Serializable serializable = map.get(str);
            if (serializable != null) {
                if (serializable instanceof Collection) {
                    Iterator it = ((Collection) serializable).iterator();
                    while (it.hasNext()) {
                        try {
                            metadata.add(str, (String) DefaultTypeConverter.INSTANCE.convert(String.class, it.next()));
                        } catch (TypeConversionException e) {
                            logger.info("Could not convert " + str + ": " + e.getMessage());
                        }
                    }
                } else {
                    try {
                        metadata.add(str, (String) DefaultTypeConverter.INSTANCE.convert(String.class, serializable));
                    } catch (TypeConversionException e2) {
                        logger.info("Could not convert " + str + ": " + e2.getMessage());
                    }
                }
            }
        }
        embedder.embed(metadata, getInputStream(contentReader), contentWriter.getContentOutputStream(), (ParseContext) null);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v9, types: [java.lang.String[], java.io.Serializable] */
    private Serializable getMetadataValues(Metadata metadata, String str) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        for (String str2 : metadata.getValues(str)) {
            for (String str3 : str2.split(this.metadataSeparator)) {
                linkedHashSet.add(str3.trim());
            }
        }
        Object[] array = linkedHashSet.toArray();
        ?? r0 = (String[]) Arrays.copyOf(array, array.length, String[].class);
        if (r0.length == 0) {
            return null;
        }
        return r0.length == 1 ? r0[0] : r0;
    }

    private String getMetadataValue(Metadata metadata, String str) {
        if (!metadata.isMultiValued(str)) {
            return metadata.get(str);
        }
        String[] values = metadata.getValues(str);
        LinkedHashSet linkedHashSet = new LinkedHashSet(values.length);
        for (String str2 : values) {
            linkedHashSet.add(str2);
        }
        String obj = linkedHashSet.toString();
        return obj.substring(1, obj.length() - 1);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String extractSize(String str) {
        StringBuilder sb = new StringBuilder();
        for (char c : str.toCharArray()) {
            if (!Character.isDigit(c)) {
                break;
            }
            sb.append(c);
        }
        return sb.toString();
    }
}
