package org.apache.tika.cli;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.FileUtils;
import org.apache.tika.exception.TikaException;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

/* loaded from: input_file:org/apache/tika/cli/TikaCLITest.class */
public class TikaCLITest {
    private ByteArrayOutputStream outContent = null;
    private PrintStream stdout = null;
    private File testDataFile = new File("src/test/resources/test-data");
    private URI testDataURI = this.testDataFile.toURI();
    private String resourcePrefix;

    @Before
    public void setUp() throws Exception {
        this.outContent = new ByteArrayOutputStream();
        this.resourcePrefix = this.testDataURI.toString();
        this.stdout = System.out;
        System.setOut(new PrintStream((OutputStream) this.outContent, true, StandardCharsets.UTF_8.name()));
    }

    @Test
    public void testListParserDetail() throws Exception {
        TikaCLI.main(new String[]{"--list-parser-detail"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("application/vnd.oasis.opendocument.text-web"));
    }

    @Test
    public void testListParsers() throws Exception {
        TikaCLI.main(new String[]{"--list-parser"});
    }

    @Test
    public void testXMLOutput() throws Exception {
        TikaCLI.main(new String[]{"-x", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("?xml version=\"1.0\" encoding=\"UTF-8\"?"));
        TikaCLI.main(new String[]{"-x", "--digest=SHA256", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("<meta name=\"X-TIKA:digest:SHA256\" content=\"e90779adbac09c4ee"));
    }

    @Test
    public void testHTMLOutput() throws Exception {
        TikaCLI.main(new String[]{"-h", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString("UTF-8").contains("html xmlns=\"http://www.w3.org/1999/xhtml"));
        Assert.assertTrue("Expanded <title></title> element should be present", this.outContent.toString(StandardCharsets.UTF_8.name()).contains("<title></title>"));
        TikaCLI.main(new String[]{"-h", "--digest=SHA384", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString("UTF-8").contains("<meta name=\"X-TIKA:digest:SHA384\" content=\"c69ea023f5da95a026"));
    }

    @Test
    public void testTextOutput() throws Exception {
        TikaCLI.main(new String[]{"-t", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("finished off the cake"));
    }

    @Test
    public void testMetadataOutput() throws Exception {
        TikaCLI.main(new String[]{"-m", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("text/plain"));
        TikaCLI.main(new String[]{"-m", "--digest=SHA512", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("text/plain"));
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("X-TIKA:digest:SHA512: dd459d99bc19ff78fd31fbae46e0"));
    }

    @Test
    public void testJsonMetadataOutput() throws Exception {
        TikaCLI.main(new String[]{"--json", "--digest=MD2", this.resourcePrefix + "testJsonMultipleInts.html"});
        String byteArrayOutputStream = this.outContent.toString(StandardCharsets.UTF_8.name());
        Assert.assertTrue(byteArrayOutputStream.contains("\"fb:admins\":\"1,2,3,4\","));
        int indexOf = byteArrayOutputStream.indexOf("\"Content-Encoding\"");
        int indexOf2 = byteArrayOutputStream.indexOf("fb:admins");
        int indexOf3 = byteArrayOutputStream.indexOf("\"title\"");
        Assert.assertTrue(indexOf > -1 && indexOf2 > -1 && indexOf < indexOf2);
        Assert.assertTrue(indexOf2 > -1 && indexOf3 > -1 && indexOf2 < indexOf3);
        Assert.assertTrue(byteArrayOutputStream.contains("\"X-TIKA:digest:MD2\":"));
    }

    @Test
    public void testJsonMetadataPrettyPrintOutput() throws Exception {
        TikaCLI.main(new String[]{"--json", "-r", this.resourcePrefix + "testJsonMultipleInts.html"});
        String byteArrayOutputStream = this.outContent.toString(StandardCharsets.UTF_8.name());
        Assert.assertTrue(byteArrayOutputStream.contains("  \"X-Parsed-By\": [\n    \"org.apache.tika.parser.DefaultParser\",\n    \"org.apache.tika.parser.html.HtmlParser\"\n  ],\n"));
        int indexOf = byteArrayOutputStream.indexOf("\"Content-Encoding\"");
        int indexOf2 = byteArrayOutputStream.indexOf("fb:admins");
        int indexOf3 = byteArrayOutputStream.indexOf("\"title\"");
        Assert.assertTrue(indexOf > -1 && indexOf2 > -1 && indexOf < indexOf2);
        Assert.assertTrue(indexOf2 > -1 && indexOf3 > -1 && indexOf2 < indexOf3);
    }

    @Test
    public void testLanguageOutput() throws Exception {
        TikaCLI.main(new String[]{"-l", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("en"));
    }

    @Test
    public void testDetectOutput() throws Exception {
        TikaCLI.main(new String[]{"-d", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("text/plain"));
    }

    @Test
    public void testListMetModels() throws Exception {
        TikaCLI.main(new String[]{"--list-met-models", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("text/plain"));
    }

    @Test
    public void testListSupportedTypes() throws Exception {
        TikaCLI.main(new String[]{"--list-supported-types", this.resourcePrefix + "alice.cli.test"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("supertype: application/octet-stream"));
    }

    @After
    public void tearDown() throws Exception {
        System.setOut(this.stdout);
    }

    @Test
    public void testExtract() throws Exception {
        File createTempFile = File.createTempFile("tika-test-", "");
        createTempFile.delete();
        createTempFile.mkdir();
        try {
            TikaCLI.main(new String[]{"--extract-dir=" + createTempFile.getAbsolutePath(), "-z", this.resourcePrefix + "/coffee.xls"});
            StringBuffer stringBuffer = new StringBuffer();
            for (String str : createTempFile.list()) {
                if (stringBuffer.length() > 0) {
                    stringBuffer.append(" : ");
                }
                stringBuffer.append(str);
            }
            File file = new File(createTempFile, "MBD002B040A.cdx");
            File file2 = new File(createTempFile, "file4.png");
            File file3 = new File(createTempFile, "MBD002B0FA6_file5.bin");
            File file4 = new File(createTempFile, "MBD00262FE3.txt");
            File file5 = new File(createTempFile, "file0.emf");
            assertExtracted(file, stringBuffer.toString());
            assertExtracted(file2, stringBuffer.toString());
            assertExtracted(file3, stringBuffer.toString());
            assertExtracted(file4, stringBuffer.toString());
            assertExtracted(file5, stringBuffer.toString());
            FileUtils.deleteDirectory(createTempFile);
        } catch (Throwable th) {
            FileUtils.deleteDirectory(createTempFile);
            throw th;
        }
    }

    protected static void assertExtracted(File file, String str) {
        Assert.assertTrue("File " + file.getName() + " not found in " + str, file.exists());
        Assert.assertFalse("File " + file.getName() + " is a directory!", file.isDirectory());
        Assert.assertTrue("File " + file.getName() + " wasn't extracted with contents", file.length() > 0);
    }

    @Test
    public void testMultiValuedMetadata() throws Exception {
        TikaCLI.main(new String[]{"-m", this.resourcePrefix + "testMultipleSheets.numbers"});
        String byteArrayOutputStream = this.outContent.toString(StandardCharsets.UTF_8.name());
        Assert.assertTrue(byteArrayOutputStream.contains("sheetNames: Checking"));
        Assert.assertTrue(byteArrayOutputStream.contains("sheetNames: Secon sheet"));
        Assert.assertTrue(byteArrayOutputStream.contains("sheetNames: Logical Sheet 3"));
        Assert.assertTrue(byteArrayOutputStream.contains("sheetNames: Sheet 4"));
    }

    @Test
    public void testZipWithSubdirs() throws Exception {
        String[] strArr = {"-z", "--extract-dir=target", this.resourcePrefix + "testWithSubdirs.zip"};
        new File("subdir/foo.txt").delete();
        new File("subdir").delete();
        TikaCLI.main(strArr);
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("Extracting 'subdir/foo.txt'"));
        new File("target/subdir/foo.txt").delete();
        new File("target/subdir").delete();
    }

    @Test
    public void testExtractInlineImages() throws Exception {
        File createTempFile = File.createTempFile("tika-test-", "");
        createTempFile.delete();
        createTempFile.mkdir();
        try {
            TikaCLI.main(new String[]{"--extract-dir=" + createTempFile.getAbsolutePath(), "-z", this.resourcePrefix + "/testPDF_childAttachments.pdf"});
            StringBuffer stringBuffer = new StringBuffer();
            for (String str : createTempFile.list()) {
                if (stringBuffer.length() > 0) {
                    stringBuffer.append(" : ");
                }
                stringBuffer.append(str);
            }
            File file = new File(createTempFile, "image0.jpg");
            File file2 = new File(createTempFile, "Press Quality(1).joboptions");
            File file3 = new File(createTempFile, "Unit10.doc");
            assertExtracted(file, stringBuffer.toString());
            assertExtracted(file2, stringBuffer.toString());
            assertExtracted(file3, stringBuffer.toString());
            FileUtils.deleteDirectory(createTempFile);
        } catch (Throwable th) {
            FileUtils.deleteDirectory(createTempFile);
            throw th;
        }
    }

    @Test
    public void testDefaultConfigException() throws Exception {
        boolean z = false;
        try {
            TikaCLI.main(new String[]{this.resourcePrefix + "bad_xml.xml"});
        } catch (TikaException e) {
            z = true;
        }
        Assert.assertTrue(z);
    }

    @Test
    public void testConfig() throws Exception {
        TikaCLI.main(new String[]{"--config=" + this.testDataFile.toString() + "/tika-config1.xml", this.resourcePrefix + "bad_xml.xml"});
        String byteArrayOutputStream = this.outContent.toString(StandardCharsets.UTF_8.name());
        Assert.assertTrue(byteArrayOutputStream.contains("apple"));
        Assert.assertTrue(byteArrayOutputStream.contains("org.apache.tika.parser.html.HtmlParser"));
    }

    @Test
    public void testConfigIgnoreInit() throws Exception {
        TikaCLI.main(new String[]{"--config=" + this.testDataFile.toString() + "/TIKA-2389-ignore-init-problems.xml", this.resourcePrefix + "test_recursive_embedded.docx"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("embed_1a"));
    }

    @Test
    public void testJsonRecursiveMetadataParserMetadataOnly() throws Exception {
        TikaCLI.main(new String[]{"-m", "-J", "-r", this.resourcePrefix + "test_recursive_embedded.docx"});
        String byteArrayOutputStream = this.outContent.toString(StandardCharsets.UTF_8.name());
        Assert.assertTrue(byteArrayOutputStream.contains("[\n  {\n    \"Application-Name\": \"Microsoft Office Word\",\n    \"Application-Version\": \"15.0000\",\n    \"Character Count\": \"28\",\n    \"Character-Count-With-Spaces\": \"31\","));
        Assert.assertTrue(byteArrayOutputStream.contains("\"X-TIKA:embedded_resource_path\": \"/embed1.zip\""));
        Assert.assertFalse(byteArrayOutputStream.contains("X-TIKA:content"));
    }

    @Test
    public void testJsonRecursiveMetadataParserDefault() throws Exception {
        TikaCLI.main(new String[]{"-J", "-r", this.resourcePrefix + "test_recursive_embedded.docx"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("\"X-TIKA:content\": \"\\u003chtml xmlns\\u003d\\\"http://www.w3.org/1999/xhtml"));
    }

    @Test
    public void testJsonRecursiveMetadataParserText() throws Exception {
        TikaCLI.main(new String[]{"-J", "-r", "-t", this.resourcePrefix + "test_recursive_embedded.docx"});
        String byteArrayOutputStream = this.outContent.toString(StandardCharsets.UTF_8.name());
        Assert.assertTrue(byteArrayOutputStream.contains("\\n\\nembed_4\\n"));
        Assert.assertTrue(byteArrayOutputStream.contains("\\n\\nembed_0"));
    }

    @Test
    public void testDigestInJson() throws Exception {
        TikaCLI.main(new String[]{"-J", "-r", "-t", "--digest=MD5", this.resourcePrefix + "test_recursive_embedded.docx"});
        String byteArrayOutputStream = this.outContent.toString(StandardCharsets.UTF_8.name());
        Assert.assertTrue(byteArrayOutputStream.contains("\"X-TIKA:digest:MD5\": \"59f626e09a8c16ab6dbc2800c685f772\","));
        Assert.assertTrue(byteArrayOutputStream.contains("\"X-TIKA:digest:MD5\": \"f9627095ef86c482e61d99f0cc1cf87d\""));
    }

    @Test
    public void testConfigSerializationStaticAndCurrent() throws Exception {
        TikaCLI.main(new String[]{"--dump-static-config"});
        String byteArrayOutputStream = this.outContent.toString(StandardCharsets.UTF_8.name());
        Assert.assertTrue(byteArrayOutputStream.contains("<detector class=\"org.apache.tika.parser.microsoft.POIFSContainerDetector\"/>"));
        Assert.assertTrue(byteArrayOutputStream.contains("<parser class=\"org.apache.tika.parser.executable.ExecutableParser\"/>"));
        TikaCLI.main(new String[]{"--dump-current-config"});
        String byteArrayOutputStream2 = this.outContent.toString(StandardCharsets.UTF_8.name());
        Assert.assertTrue(byteArrayOutputStream2.contains("<detector class=\"org.apache.tika.parser.microsoft.POIFSContainerDetector\"/>"));
        Assert.assertTrue(byteArrayOutputStream2.contains("<parser class=\"org.apache.tika.parser.executable.ExecutableParser\"/>"));
    }

    @Test
    public void testConfigSerializationCustomMinimal() throws Exception {
        TikaCLI.main(new String[]{"--config=" + this.testDataFile.toString() + "/tika-config2.xml", "--dump-minimal-config"});
        Assert.assertTrue(this.outContent.toString(StandardCharsets.UTF_8.name()).replaceAll("[\r\n\t ]+", " ").contains("<parser class=\"org.apache.tika.parser.DefaultParser\"> <mime-exclude>application/pdf</mime-exclude> <mime-exclude>image/jpeg</mime-exclude> </parser> <parser class=\"org.apache.tika.parser.EmptyParser\"> <mime>application/pdf</mime> </parser>"));
    }

    @Test
    public void testConfigSerializationCustomStatic() throws Exception {
        TikaCLI.main(new String[]{"--config=" + this.testDataFile.toString() + "/tika-config2.xml", "--dump-static-config"});
        Assert.assertFalse(this.outContent.toString(StandardCharsets.UTF_8.name()).contains("org.apache.tika.parser.executable.Executable"));
    }
}
