package org.alfresco.transformer.executors;

import com.google.common.collect.ImmutableList;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
import java.util.regex.Pattern;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.parser.pkg.PackageParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.ccil.cowan.tagsoup.XMLWriter;
import org.slf4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:BOOT-INF/lib/alfresco-transform-tika-2.5.6.jar:org/alfresco/transformer/executors/Tika.class */
public class Tika {
    public static final String TARGET_MIMETYPE = "--targetMimetype=";
    public static final String TARGET_ENCODING = "--targetEncoding=";
    public static final String INCLUDE_CONTENTS = "--includeContents";
    public static final String NOT_EXTRACT_BOOKMARKS_TEXT = "--notExtractBookmarksText";
    public static final String CSV = "csv";
    public static final String DOC = "doc";
    public static final String DOCX = "docx";
    public static final String HTML = "html";
    public static final String MSG = "msg";
    public static final String PDF = "pdf";
    public static final String PPTX = "pptx";
    public static final String TXT = "txt";
    public static final String XHTML = "xhtml";
    public static final String XSLX = "xslx";
    public static final String XML = "xml";
    public static final String ZIP = "zip";
    public static final String ARCHIVE = "Archive";
    public static final String OUTLOOK_MSG = "OutlookMsg";
    public static final String PDF_BOX = "PdfBox";
    public static final String POI_OFFICE = "Office";
    public static final String POI = "Poi";
    public static final String POI_OO_XML = "OOXML";
    public static final String TIKA_AUTO = "TikaAuto";
    public static final String TEXT_MINING = "TextMining";
    public static final List<String> TRANSFORM_NAMES = ImmutableList.of(ARCHIVE, OUTLOOK_MSG, PDF_BOX, POI_OFFICE, POI, POI_OO_XML, TIKA_AUTO, TEXT_MINING);
    public static final DocumentSelector pdfBoxEmbededDocumentSelector = new DocumentSelector() { // from class: org.alfresco.transformer.executors.Tika.1
        private final List<String> disabledMediaTypes = ImmutableList.of("image/jpeg", "image/tiff", "image/png");

        @Override // org.apache.tika.extractor.DocumentSelector
        public boolean select(Metadata metadata) {
            String str = metadata.get("Content-Type");
            return str == null || str.equals("") || this.disabledMediaTypes == null || !this.disabledMediaTypes.contains(str);
        }
    };
    private final Parser packageParser = new PackageParser();
    private final Parser pdfParser = new PDFParser();
    private final Parser officeParser = new OfficeParser();
    private final Parser ooXmlParser = new OOXMLParser();
    private final Parser tikaOfficeDetectParser = new TikaOfficeDetectParser();
    private final PDFParserConfig pdfParserConfig = new PDFParserConfig();
    private final Parser autoDetectParser = new AutoDetectParser(readTikaConfig());

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:BOOT-INF/lib/alfresco-transform-tika-2.5.6.jar:org/alfresco/transformer/executors/Tika$CsvContentHandler.class */
    public static class CsvContentHandler extends BodyContentHandler {
        private static final char[] comma = {','};
        private static final Pattern all_nums = Pattern.compile("[\\d\\.\\-\\+]+");
        private boolean inCell;
        private boolean needsComma;

        protected CsvContentHandler(Writer writer) {
            super(writer);
            this.inCell = false;
            this.needsComma = false;
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
            if (i2 == 1 && cArr[0] == '\t') {
                return;
            }
            super.ignorableWhitespace(cArr, i, i2);
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (!this.inCell) {
                super.characters(cArr, i, i2);
                return;
            }
            StringBuffer stringBuffer = new StringBuffer(new String(cArr, i, i2));
            if (all_nums.matcher(stringBuffer).matches()) {
                super.characters(cArr, i, i2);
                return;
            }
            int length = stringBuffer.length() - 1;
            while (length >= 0) {
                if (stringBuffer.charAt(length) == '\"') {
                    stringBuffer.insert(length, '\"');
                    length--;
                }
                length--;
            }
            stringBuffer.insert(0, '\"');
            stringBuffer.append('\"');
            char[] charArray = stringBuffer.toString().toCharArray();
            super.characters(charArray, 0, charArray.length);
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (!str2.equals("td")) {
                super.startElement(str, str2, str3, attributes);
                return;
            }
            this.inCell = true;
            if (this.needsComma) {
                super.characters(comma, 0, 1);
                this.needsComma = true;
            }
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (str2.equals("td")) {
                this.needsComma = true;
                this.inCell = false;
            } else {
                if (str2.equals("tr")) {
                    this.needsComma = false;
                }
                super.endElement(str, str2, str3);
            }
        }
    }

    public static TikaConfig readTikaConfig(Logger logger) {
        try {
            return readTikaConfig();
        } catch (Exception e) {
            logger.error("Failed to read tika-config.xml", (Throwable) e);
            return null;
        }
    }

    private static TikaConfig readTikaConfig() throws TikaException, IOException, SAXException {
        return new TikaConfig(Tika.class.getClassLoader().getResource("tika-config.xml"));
    }

    public static void main(String[] strArr) {
        long currentTimeMillis = System.currentTimeMillis();
        try {
            new Tika().transform(strArr);
        } catch (IOException | IllegalStateException | TikaException | SAXException e) {
            System.err.println("ERROR " + e.getMessage());
            e.printStackTrace();
            System.exit(-2);
        } catch (IllegalArgumentException e2) {
            System.err.println("ERROR " + e2.getMessage());
            System.exit(-1);
        }
        System.out.println("Finished in " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
    }

    public void transform(String[] strArr) {
        String str = null;
        String str2 = null;
        String str3 = null;
        String str4 = null;
        String str5 = null;
        Boolean bool = null;
        Boolean bool2 = null;
        for (String str6 : strArr) {
            if (str6.startsWith("--")) {
                if (INCLUDE_CONTENTS.startsWith(str6)) {
                    getValue(str6, false, bool, INCLUDE_CONTENTS);
                    bool = true;
                } else if (str6.startsWith(TARGET_ENCODING)) {
                    str3 = getValue(str6, true, str3, TARGET_ENCODING);
                } else if (str6.startsWith(TARGET_MIMETYPE)) {
                    str2 = getValue(str6, true, str2, TARGET_MIMETYPE);
                } else {
                    if (!str6.startsWith(NOT_EXTRACT_BOOKMARKS_TEXT)) {
                        throw new IllegalArgumentException("Unexpected argument " + str6);
                    }
                    getValue(str6, false, bool2, NOT_EXTRACT_BOOKMARKS_TEXT);
                    bool2 = true;
                }
            } else if (str == null) {
                str = str6;
            } else if (str4 == null) {
                str4 = str6;
            } else {
                if (str5 != null) {
                    throw new IllegalArgumentException("Unexpected argument " + str6);
                }
                str5 = str6;
            }
        }
        if (str5 == null) {
            throw new IllegalArgumentException("Missing arguments");
        }
        transform(str, Boolean.valueOf(bool == null ? false : bool.booleanValue()), Boolean.valueOf(bool2 == null ? false : bool2.booleanValue()), str4, str5, str2, str3);
    }

    private String getValue(String str, boolean z, Object obj, String str2) {
        if (obj != null) {
            throw new IllegalArgumentException("Duplicate " + str2);
        }
        String trim = str.substring(str2.length()).trim();
        if (!z && trim.length() > 0) {
            throw new IllegalArgumentException("Unexpected value with " + str2);
        }
        if (z && trim.length() == 0) {
            throw new IllegalArgumentException("Expected value with " + str2);
        }
        return trim;
    }

    private void transform(String str, Boolean bool, Boolean bool2, String str2, String str3, String str4, String str5) {
        Parser parser = null;
        DocumentSelector documentSelector = null;
        boolean z = -1;
        switch (str.hashCode()) {
            case -2074227974:
                if (str.equals(TIKA_AUTO)) {
                    z = 7;
                    break;
                }
                break;
            case -1935922468:
                if (str.equals(POI_OFFICE)) {
                    z = 2;
                    break;
                }
                break;
            case -1909177447:
                if (str.equals(PDF_BOX)) {
                    z = 4;
                    break;
                }
                break;
            case -631689219:
                if (str.equals(TEXT_MINING)) {
                    z = 3;
                    break;
                }
                break;
            case 80426:
                if (str.equals(POI)) {
                    z = 5;
                    break;
                }
                break;
            case 75398679:
                if (str.equals(POI_OO_XML)) {
                    z = 6;
                    break;
                }
                break;
            case 916551842:
                if (str.equals(ARCHIVE)) {
                    z = false;
                    break;
                }
                break;
            case 1288825204:
                if (str.equals(OUTLOOK_MSG)) {
                    z = true;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                parser = this.packageParser;
                break;
            case true:
            case true:
            case true:
                parser = this.officeParser;
                break;
            case true:
                parser = this.pdfParser;
                documentSelector = pdfBoxEmbededDocumentSelector;
                break;
            case true:
                parser = this.tikaOfficeDetectParser;
                break;
            case true:
                parser = this.ooXmlParser;
                break;
            case true:
                parser = this.autoDetectParser;
                break;
        }
        transform(parser, documentSelector, bool, bool2, str2, str3, str4, str5);
    }

    private void transform(Parser parser, DocumentSelector documentSelector, Boolean bool, Boolean bool2, String str, String str2, String str3, String str4) {
        try {
            BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(str));
            try {
                FileOutputStream fileOutputStream = new FileOutputStream(str2);
                try {
                    BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(fileOutputStream, str4));
                    try {
                        parser.parse(bufferedInputStream, getContentHandler(str3, bufferedWriter), new Metadata(), buildParseContext(documentSelector, bool, bool2));
                        bufferedWriter.close();
                        fileOutputStream.close();
                        bufferedInputStream.close();
                    } catch (Throwable th) {
                        try {
                            bufferedWriter.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                        throw th;
                    }
                } catch (Throwable th3) {
                    try {
                        fileOutputStream.close();
                    } catch (Throwable th4) {
                        th3.addSuppressed(th4);
                    }
                    throw th3;
                }
            } finally {
            }
        } catch (IOException | TikaException | SAXException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }
    }

    private ContentHandler getContentHandler(String str, Writer writer) {
        ContentHandler contentHandler;
        try {
            if ("text/plain".equals(str)) {
                contentHandler = new BodyContentHandler(writer);
            } else {
                TransformerHandler newTransformerHandler = ((SAXTransformerFactory) SAXTransformerFactory.newInstance()).newTransformerHandler();
                newTransformerHandler.getTransformer().setOutputProperty(XMLWriter.INDENT, "yes");
                newTransformerHandler.setResult(new StreamResult(writer));
                contentHandler = newTransformerHandler;
                if ("text/html".equals(str)) {
                    newTransformerHandler.getTransformer().setOutputProperty("method", "html");
                    return new ExpandedTitleContentHandler(newTransformerHandler);
                }
                if ("application/xhtml+xml".equals(str) || "text/xml".equals(str)) {
                    newTransformerHandler.getTransformer().setOutputProperty("method", "xml");
                } else {
                    if (!"text/csv".equals(str)) {
                        throw new IllegalArgumentException("Invalid target mimetype " + str);
                    }
                    contentHandler = new CsvContentHandler(writer);
                }
            }
            return contentHandler;
        } catch (TransformerConfigurationException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }
    }

    private ParseContext buildParseContext(DocumentSelector documentSelector, Boolean bool, Boolean bool2) {
        ParseContext parseContext = new ParseContext();
        if (documentSelector != null) {
            parseContext.set(DocumentSelector.class, documentSelector);
        }
        if (bool2.equals(true)) {
            this.pdfParserConfig.setExtractBookmarksText(false);
            parseContext.set(PDFParserConfig.class, this.pdfParserConfig);
        }
        if (bool != null) {
            parseContext.set(Parser.class, bool.booleanValue() ? this.autoDetectParser : new EmptyParser());
        }
        return parseContext;
    }
}
