package org.apache.tika.parser.microsoft;

import com.gargoylesoftware.htmlunit.html.HtmlParagraph;
import java.io.IOException;
import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Set;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:lib/tika-parsers-1.1.jar:org/apache/tika/parser/microsoft/OfficeParser.class */
public class OfficeParser extends AbstractParser {
    private static final long serialVersionUID = 7393462244028653479L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(POIFSDocumentType.WORKBOOK.type, POIFSDocumentType.OLE10_NATIVE.type, POIFSDocumentType.WORDDOCUMENT.type, POIFSDocumentType.UNKNOWN.type, POIFSDocumentType.ENCRYPTED.type, POIFSDocumentType.POWERPOINT.type, POIFSDocumentType.PUBLISHER.type, POIFSDocumentType.PROJECT.type, POIFSDocumentType.VISIO.type, POIFSDocumentType.XLR.type, POIFSDocumentType.OUTLOOK.type)));

    /* loaded from: input_file:lib/tika-parsers-1.1.jar:org/apache/tika/parser/microsoft/OfficeParser$POIFSDocumentType.class */
    public enum POIFSDocumentType {
        WORKBOOK("xls", MediaType.application("vnd.ms-excel")),
        OLE10_NATIVE("ole", MediaType.application("x-tika-msoffice-embedded")),
        WORDDOCUMENT("doc", MediaType.application("msword")),
        UNKNOWN("unknown", MediaType.application("x-tika-msoffice")),
        ENCRYPTED("ole", MediaType.application("x-tika-ooxml-protected")),
        POWERPOINT("ppt", MediaType.application("vnd.ms-powerpoint")),
        PUBLISHER("pub", MediaType.application("x-mspublisher")),
        PROJECT("mpp", MediaType.application("vnd.ms-project")),
        VISIO("vsd", MediaType.application("vnd.visio")),
        WORKS("wps", MediaType.application("vnd.ms-works")),
        XLR("xlr", MediaType.application("x-tika-msworks-spreadsheet")),
        OUTLOOK("msg", MediaType.application("vnd.ms-outlook"));

        private final String extension;
        private final MediaType type;

        POIFSDocumentType(String str, MediaType mediaType) {
            this.extension = str;
            this.type = mediaType;
        }

        public String getExtension() {
            return this.extension;
        }

        public MediaType getType() {
            return this.type;
        }

        public static POIFSDocumentType detectType(POIFSFileSystem pOIFSFileSystem) {
            return detectType(pOIFSFileSystem.getRoot());
        }

        public static POIFSDocumentType detectType(NPOIFSFileSystem nPOIFSFileSystem) {
            return detectType(nPOIFSFileSystem.getRoot());
        }

        public static POIFSDocumentType detectType(DirectoryEntry directoryEntry) {
            HashSet hashSet = new HashSet();
            Iterator<Entry> it = directoryEntry.iterator();
            while (it.hasNext()) {
                hashSet.add(it.next().getName());
            }
            MediaType detect = POIFSContainerDetector.detect(hashSet, directoryEntry);
            for (POIFSDocumentType pOIFSDocumentType : values()) {
                if (detect.equals(pOIFSDocumentType.type)) {
                    return pOIFSDocumentType;
                }
            }
            return UNKNOWN;
        }
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        DirectoryNode root;
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        TikaInputStream cast = TikaInputStream.cast(inputStream);
        if (cast == null) {
            root = new NPOIFSFileSystem(new CloseShieldInputStream(inputStream)).getRoot();
        } else {
            Object openContainer = cast.getOpenContainer();
            root = openContainer instanceof NPOIFSFileSystem ? ((NPOIFSFileSystem) openContainer).getRoot() : openContainer instanceof DirectoryNode ? (DirectoryNode) openContainer : cast.hasFile() ? new NPOIFSFileSystem(cast.getFileChannel()).getRoot() : new NPOIFSFileSystem(new CloseShieldInputStream(cast)).getRoot();
        }
        parse(root, parseContext, metadata, xHTMLContentHandler);
        xHTMLContentHandler.endDocument();
    }

    protected void parse(DirectoryNode directoryNode, ParseContext parseContext, Metadata metadata, XHTMLContentHandler xHTMLContentHandler) throws IOException, SAXException, TikaException {
        new SummaryExtractor(metadata).parseSummaries(directoryNode);
        POIFSDocumentType detectType = POIFSDocumentType.detectType(directoryNode);
        if (detectType != POIFSDocumentType.UNKNOWN) {
            setType(metadata, detectType.getType());
        }
        switch (detectType) {
            case PUBLISHER:
                xHTMLContentHandler.element(HtmlParagraph.TAG_NAME, new PublisherTextExtractor(directoryNode).getText());
                return;
            case WORDDOCUMENT:
                new WordExtractor(parseContext).parse(directoryNode, xHTMLContentHandler);
                return;
            case POWERPOINT:
                new HSLFExtractor(parseContext).parse(directoryNode, xHTMLContentHandler);
                return;
            case WORKBOOK:
            case XLR:
                new ExcelExtractor(parseContext).parse(directoryNode, xHTMLContentHandler, (Locale) parseContext.get(Locale.class, Locale.getDefault()));
                return;
            case PROJECT:
            default:
                return;
            case VISIO:
                for (String str : new VisioTextExtractor(directoryNode).getAllText()) {
                    xHTMLContentHandler.element(HtmlParagraph.TAG_NAME, str);
                }
                return;
            case OUTLOOK:
                new OutlookExtractor(directoryNode, parseContext).parse(xHTMLContentHandler, metadata);
                return;
            case ENCRYPTED:
                Decryptor decryptor = Decryptor.getInstance(new EncryptionInfo(directoryNode));
                try {
                    String str2 = Decryptor.DEFAULT_PASSWORD;
                    PasswordProvider passwordProvider = (PasswordProvider) parseContext.get(PasswordProvider.class);
                    if (passwordProvider != null) {
                        str2 = passwordProvider.getPassword(metadata);
                    }
                    if (!decryptor.verifyPassword(str2)) {
                        throw new EncryptedDocumentException();
                    }
                    new OOXMLParser().parse(decryptor.getDataStream(directoryNode), new EmbeddedContentHandler(new BodyContentHandler(xHTMLContentHandler)), metadata, parseContext);
                    return;
                } catch (GeneralSecurityException e) {
                    throw new EncryptedDocumentException(e);
                }
        }
    }

    private void setType(Metadata metadata, MediaType mediaType) {
        metadata.set("Content-Type", mediaType.toString());
    }
}
