package org.apache.tika.parser.microsoft.ooxml;

import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.PrintStream;
import java.io.StringWriter;
import java.util.Locale;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.tika.TikaTest;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.WordParserTest;
import org.apache.tika.sax.BodyContentHandler;
import org.junit.Assert;
import org.junit.Test;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.class */
public class OOXMLParserTest extends TikaTest {
    private Parser parser = new AutoDetectParser();

    private InputStream getTestDocument(String str) {
        return TikaInputStream.get(OOXMLParserTest.class.getResourceAsStream("/test-documents/" + str));
    }

    @Test
    public void testExcel() throws Exception {
        Metadata metadata = new Metadata();
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        ParseContext parseContext = new ParseContext();
        parseContext.set(Locale.class, Locale.US);
        InputStream testDocument = getTestDocument("testEXCEL.xlsx");
        try {
            this.parser.parse(testDocument, bodyContentHandler, metadata, parseContext);
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
            Assert.assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
            Assert.assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
            Assert.assertEquals("Keith Bennett", metadata.get("Author"));
            String obj = bodyContentHandler.toString();
            Assert.assertTrue(obj.contains("Sample Excel Worksheet"));
            Assert.assertTrue(obj.contains("Numbers and their Squares"));
            Assert.assertTrue(obj.contains("9"));
            Assert.assertFalse(obj.contains("9.0"));
            Assert.assertTrue(obj.contains("196"));
            Assert.assertFalse(obj.contains("196.0"));
            Assert.assertEquals("false", metadata.get("protected"));
            testDocument.close();
        } catch (Throwable th) {
            testDocument.close();
            throw th;
        }
    }

    @Test
    public void testExcelFormats() throws Exception {
        Metadata metadata = new Metadata();
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        ParseContext parseContext = new ParseContext();
        parseContext.set(Locale.class, Locale.US);
        InputStream testDocument = getTestDocument("testEXCEL-formats.xlsx");
        try {
            this.parser.parse(testDocument, bodyContentHandler, metadata, parseContext);
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
            String obj = bodyContentHandler.toString();
            Assert.assertTrue(obj.contains("1,599.99"));
            Assert.assertTrue(obj.contains("-1,599.99"));
            Assert.assertTrue(obj.contains("$1,599.99"));
            Assert.assertTrue(obj.contains("$1,599.99)"));
            Assert.assertTrue(obj.contains("1.98E08") || obj.contains("1.98E+08"));
            Assert.assertTrue(obj.contains("-1.98E08") || obj.contains("-1.98E+08"));
            Assert.assertTrue(obj.contains("2.50%"));
            if (System.getProperty("java.version").startsWith("1.5")) {
                Assert.assertTrue(obj.contains("2%"));
            } else {
                Assert.assertTrue(obj.contains("3%"));
            }
            Assert.assertTrue(obj.contains("6:15"));
            Assert.assertTrue(obj.contains("18:15"));
            Assert.assertTrue(obj.contains("17-May-07"));
            Assert.assertTrue(obj.contains("$1,599.99"));
            Assert.assertTrue(obj.contains("($1,599.99)"));
            testDocument.close();
        } catch (Throwable th) {
            testDocument.close();
            throw th;
        }
    }

    @Test
    public void testPowerPoint() throws Exception {
        String[] strArr = {"pptx", "pptm", "ppsm", "ppsx", "potm"};
        String[] strArr2 = {"application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.ms-powerpoint.presentation.macroenabled.12", "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "application/vnd.ms-powerpoint.template.macroenabled.12"};
        for (int i = 0; i < strArr.length; i++) {
            String str = strArr[i];
            String str2 = "testPPT." + str;
            AutoDetectParser autoDetectParser = new AutoDetectParser();
            Metadata metadata = new Metadata();
            metadata.set("resourceName", str2);
            BodyContentHandler bodyContentHandler = new BodyContentHandler();
            ParseContext parseContext = new ParseContext();
            InputStream testDocument = getTestDocument(str2);
            try {
                autoDetectParser.parse(testDocument, bodyContentHandler, metadata, parseContext);
                Assert.assertEquals("Mime-type checking for " + str2, strArr2[i], metadata.get("Content-Type"));
                Assert.assertEquals("Attachment Test", metadata.get(TikaCoreProperties.TITLE));
                Assert.assertEquals("Rajiv", metadata.get(TikaCoreProperties.CREATOR));
                Assert.assertEquals("Rajiv", metadata.get("Author"));
                String obj = bodyContentHandler.toString();
                if (str.equals("thmx")) {
                    Assert.assertEquals("", obj);
                } else {
                    Assert.assertTrue("Text missing for " + str2 + "\n" + obj, obj.contains("Attachment Test"));
                    Assert.assertTrue("Text missing for " + str2 + "\n" + obj, obj.contains("This is a test file data with the same content"));
                    Assert.assertTrue("Text missing for " + str2 + "\n" + obj, obj.contains("content parsing"));
                    Assert.assertTrue("Text missing for " + str2 + "\n" + obj, obj.contains("Different words to test against"));
                    Assert.assertTrue("Text missing for " + str2 + "\n" + obj, obj.contains("Mystery"));
                }
            } finally {
                testDocument.close();
            }
        }
    }

    @Test
    public void testPowerPointMetadataEarly() throws Exception {
        String[] strArr = {"pptx", "pptm", "ppsm", "ppsx", "potm"};
        final String[] strArr2 = {"application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.ms-powerpoint.presentation.macroenabled.12", "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "application/vnd.ms-powerpoint.template.macroenabled.12"};
        for (int i = 0; i < strArr.length; i++) {
            final String str = "testPPT." + strArr[i];
            AutoDetectParser autoDetectParser = new AutoDetectParser();
            final Metadata metadata = new Metadata();
            metadata.set("resourceName", str);
            final int i2 = i;
            BodyContentHandler bodyContentHandler = new BodyContentHandler() { // from class: org.apache.tika.parser.microsoft.ooxml.OOXMLParserTest.1
                public void startDocument() {
                    Assert.assertEquals("Mime-type checking for " + str, strArr2[i2], metadata.get("Content-Type"));
                    Assert.assertEquals("Attachment Test", metadata.get(TikaCoreProperties.TITLE));
                    Assert.assertEquals("Rajiv", metadata.get(TikaCoreProperties.CREATOR));
                    Assert.assertEquals("Rajiv", metadata.get("Author"));
                }
            };
            ParseContext parseContext = new ParseContext();
            InputStream testDocument = getTestDocument(str);
            try {
                autoDetectParser.parse(testDocument, bodyContentHandler, metadata, parseContext);
                testDocument.close();
            } catch (Throwable th) {
                testDocument.close();
                throw th;
            }
        }
    }

    @Test
    public void testUnsupportedPowerPoint() throws Exception {
        String[] strArr = {"xps", "thmx"};
        String[] strArr2 = {"application/vnd.ms-xpsdocument", "application/vnd.openxmlformats-officedocument"};
        for (int i = 0; i < strArr.length; i++) {
            String str = "testPPT." + strArr[i];
            AutoDetectParser autoDetectParser = new AutoDetectParser();
            Metadata metadata = new Metadata();
            metadata.set("resourceName", str);
            BodyContentHandler bodyContentHandler = new BodyContentHandler();
            ParseContext parseContext = new ParseContext();
            InputStream testDocument = getTestDocument(str);
            try {
                autoDetectParser.parse(testDocument, bodyContentHandler, metadata, parseContext);
                Assert.assertEquals("Mime-type checking for " + str, strArr2[i], metadata.get("Content-Type"));
                testDocument.close();
            } catch (Throwable th) {
                testDocument.close();
                throw th;
            }
        }
    }

    @Test
    public void testWord() throws Exception {
        Metadata metadata = new Metadata();
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        ParseContext parseContext = new ParseContext();
        InputStream testDocument = getTestDocument("testWORD.docx");
        try {
            this.parser.parse(testDocument, bodyContentHandler, metadata, parseContext);
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
            Assert.assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
            Assert.assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
            Assert.assertEquals("Keith Bennett", metadata.get("Author"));
            Assert.assertTrue(bodyContentHandler.toString().contains("Sample Word Document"));
            testDocument.close();
        } catch (Throwable th) {
            testDocument.close();
            throw th;
        }
    }

    @Test
    public void testWordFootnote() throws Exception {
        Metadata metadata = new Metadata();
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        ParseContext parseContext = new ParseContext();
        InputStream testDocument = getTestDocument("footnotes.docx");
        try {
            this.parser.parse(testDocument, bodyContentHandler, metadata, parseContext);
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
            Assert.assertTrue(bodyContentHandler.toString().contains("snoska"));
            testDocument.close();
        } catch (Throwable th) {
            testDocument.close();
            throw th;
        }
    }

    @Test
    public void testWordHTML() throws Exception {
        TikaTest.XMLResult xml = getXML("testWORD.docx");
        String str = xml.xml;
        Metadata metadata = xml.metadata;
        Assert.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
        Assert.assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
        Assert.assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
        Assert.assertEquals("Keith Bennett", metadata.get("Author"));
        Assert.assertTrue(str.contains("Sample Word Document"));
        Assert.assertTrue(str.contains("<h1 class=\"title\">"));
        Assert.assertTrue(str.contains("<h1>Heading Level 1</h1>"));
        Assert.assertTrue(str.contains("<h2>Heading Level 2</h2>"));
        Assert.assertTrue(str.contains("<h3><a name=\"OnLevel3\" />Heading Level 3</h3>"));
        Assert.assertTrue(str.contains("<b>BOLD</b>"));
        Assert.assertTrue(str.contains("<i>ITALIC</i>"));
        Assert.assertTrue(str.contains("<table>"));
        Assert.assertTrue(str.contains("<td>"));
        Assert.assertTrue(str.contains("<a href=\"http://tika.apache.org/\">Tika</a>"));
        Assert.assertTrue(str.contains("<a href=\"#OnMainHeading\">The Main Heading Bookmark</a>"));
        Assert.assertTrue(str.contains("<p class=\"signature\">This one"));
        String str2 = getXML("testWORD_3imgs.docx").xml;
        Assert.assertTrue("Image not found in:\n" + str2, str2.contains("<img src=\"embedded:image2.png\" alt=\"A description...\" />"));
        Assert.assertTrue("Image not found in:\n" + str2, str2.contains("<img src=\"embedded:image3.jpeg\" alt=\"A description...\" />"));
        Assert.assertTrue("Image not found in:\n" + str2, str2.contains("<img src=\"embedded:image4.png\" alt=\"A description...\" />"));
        Assert.assertTrue(str2.contains("<p>The end!</p>"));
        String str3 = getXML("testWORD_bold_character_runs.docx").xml;
        Assert.assertTrue("Bold text wasn't contiguous: " + str3, str3.contains("F<b>oob</b>a<b>r</b>"));
        String str4 = getXML("testWORD_bold_character_runs2.docx").xml;
        Assert.assertTrue("Bold text wasn't contiguous: " + str4, str4.contains("F<b>oob</b>a<b>r</b>"));
    }

    @Test
    public void testWordPicturesInHeader() throws Exception {
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        StringWriter stringWriter = new StringWriter();
        TransformerHandler newTransformerHandler = ((SAXTransformerFactory) SAXTransformerFactory.newInstance()).newTransformerHandler();
        newTransformerHandler.getTransformer().setOutputProperty("method", "xml");
        newTransformerHandler.getTransformer().setOutputProperty("indent", "yes");
        newTransformerHandler.setResult(new StreamResult(stringWriter));
        InputStream testDocument = getTestDocument("headerPic.docx");
        try {
            this.parser.parse(testDocument, newTransformerHandler, metadata, parseContext);
            String stringWriter2 = stringWriter.toString();
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
            Assert.assertTrue(stringWriter2.contains("<img"));
            testDocument.close();
        } catch (Throwable th) {
            testDocument.close();
            throw th;
        }
    }

    @Test
    public void testProtectedExcelSheets() throws Exception {
        InputStream resourceAsStream = OOXMLParserTest.class.getResourceAsStream("/test-documents/protectedSheets.xlsx");
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        try {
            autoDetectParser.parse(resourceAsStream, new BodyContentHandler(), metadata, new ParseContext());
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
            Assert.assertEquals("true", metadata.get("protected"));
            resourceAsStream.close();
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testProtectedExcelFile() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        ParseContext parseContext = new ParseContext();
        InputStream testDocument = getTestDocument("protectedFile.xlsx");
        try {
            autoDetectParser.parse(testDocument, bodyContentHandler, metadata, parseContext);
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
            Assert.assertEquals("true", metadata.get("protected"));
            Assert.assertTrue(bodyContentHandler.toString().contains("Office"));
            testDocument.close();
        } catch (Throwable th) {
            testDocument.close();
            throw th;
        }
    }

    @Test
    public void testNullHeaders() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        ParseContext parseContext = new ParseContext();
        InputStream testDocument = getTestDocument("NullHeader.docx");
        try {
            autoDetectParser.parse(testDocument, bodyContentHandler, metadata, parseContext);
            Assert.assertFalse(bodyContentHandler.toString().length() == 0);
            testDocument.close();
        } catch (Throwable th) {
            testDocument.close();
            throw th;
        }
    }

    @Test
    public void testVarious() throws Exception {
        ContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        InputStream resourceAsStream = OOXMLParserTest.class.getResourceAsStream("/test-documents/testWORD_various.docx");
        try {
            new AutoDetectParser().parse(resourceAsStream, bodyContentHandler, metadata, new ParseContext());
            resourceAsStream.close();
            String obj = bodyContentHandler.toString();
            assertContains("Footnote appears here", obj);
            assertContains("This is a footnote.", obj);
            assertContains("This is the header text.", obj);
            assertContains("This is the footer text.", obj);
            assertContains("Here is a text box", obj);
            assertContains("Bold", obj);
            assertContains("italic", obj);
            assertContains("underline", obj);
            assertContains("superscript", obj);
            assertContains("subscript", obj);
            assertContains("Here is a citation:", obj);
            assertContains("Figure 1 This is a caption for Figure 1", obj);
            assertContains("(Kramer)", obj);
            assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2 Col 2 Row 2 Col 3", obj.replaceAll("\\s+", " "));
            assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", obj.replaceAll("\\s+", " "));
            assertContains("This is a hyperlink", obj);
            assertContains("Here is a list:", obj);
            for (int i = 1; i <= 3; i++) {
                assertContains("Bullet " + i, obj);
            }
            assertContains("Here is a numbered list:", obj);
            for (int i2 = 1; i2 <= 3; i2++) {
                assertContains("Number bullet " + i2, obj);
            }
            for (int i3 = 1; i3 <= 2; i3++) {
                for (int i4 = 1; i4 <= 3; i4++) {
                    assertContains("Row " + i3 + " Col " + i4, obj);
                }
            }
            assertContains("Keyword1 Keyword2", obj);
            Assert.assertEquals("Keyword1 Keyword2", metadata.get("Keywords"));
            assertContains("Subject is here", obj);
            Assert.assertEquals("Subject is here", metadata.get("subject"));
            Assert.assertEquals("Subject is here", metadata.get(OfficeOpenXMLCore.SUBJECT));
            assertContains("Suddenly some Japanese text:", obj);
            assertContains("（ＧＨＱ）", obj);
            assertContains("ゾルゲと尾崎、淡々と最期", obj);
            assertContains("And then some Gothic text:", obj);
            assertContains("������������", obj);
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testVariousPPTX() throws Exception {
        ContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        InputStream resourceAsStream = OOXMLParserTest.class.getResourceAsStream("/test-documents/testPPT_various.pptx");
        try {
            new AutoDetectParser().parse(resourceAsStream, bodyContentHandler, metadata, new ParseContext());
            resourceAsStream.close();
            String obj = bodyContentHandler.toString();
            assertContains("Footnote appears here", obj);
            assertContains("This is a footnote.", obj);
            assertContains("This is the header text.", obj);
            assertContains("This is the footer text.", obj);
            assertContains("Here is a text box", obj);
            assertContains("Bold", obj);
            assertContains("italic", obj);
            assertContains("underline", obj);
            assertContains("superscript", obj);
            assertContains("subscript", obj);
            assertContains("Here is a citation:", obj);
            assertContains("Figure 1 This is a caption for Figure 1", obj);
            assertContains("(Kramer)", obj);
            assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2 Col 2 Row 2 Col 3", obj.replaceAll("\\s+", " "));
            assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", obj.replaceAll("\\s+", " "));
            assertContains("This is a hyperlink", obj);
            assertContains("Here is a list:", obj);
            for (int i = 1; i <= 3; i++) {
                assertContains("Bullet " + i, obj);
            }
            assertContains("Here is a numbered list:", obj);
            for (int i2 = 1; i2 <= 3; i2++) {
                assertContains("Number bullet " + i2, obj);
            }
            for (int i3 = 1; i3 <= 2; i3++) {
                for (int i4 = 1; i4 <= 3; i4++) {
                    assertContains("Row " + i3 + " Col " + i4, obj);
                }
            }
            assertContains("Keyword1 Keyword2", obj);
            Assert.assertEquals("Keyword1 Keyword2", metadata.get("Keywords"));
            assertContains("Subject is here", obj);
            Assert.assertEquals("Subject is here", metadata.get("subject"));
            Assert.assertEquals("Subject is here", metadata.get(OfficeOpenXMLCore.SUBJECT));
            assertContains("Suddenly some Japanese text:", obj);
            assertContains("（ＧＨＱ）", obj);
            assertContains("ゾルゲと尾崎、淡々と最期", obj);
            assertContains("And then some Gothic text:", obj);
            assertContains("������������", obj);
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testMasterFooter() throws Exception {
        ContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        InputStream resourceAsStream = OOXMLParserTest.class.getResourceAsStream("/test-documents/testPPT_masterFooter.pptx");
        try {
            new AutoDetectParser().parse(resourceAsStream, bodyContentHandler, metadata, new ParseContext());
            resourceAsStream.close();
            assertContains("Master footer is here", bodyContentHandler.toString());
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testWordArt() throws Exception {
        ContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        InputStream resourceAsStream = OOXMLParserTest.class.getResourceAsStream("/test-documents/testWordArt.pptx");
        try {
            new AutoDetectParser().parse(resourceAsStream, bodyContentHandler, metadata, new ParseContext());
            resourceAsStream.close();
            assertContains("Here is some red word Art", bodyContentHandler.toString());
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testExcelCustomProperties() throws Exception {
        InputStream resourceAsStream = OOXMLParserTest.class.getResourceAsStream("/test-documents/testEXCEL_custom_props.xlsx");
        Metadata metadata = new Metadata();
        try {
            BodyContentHandler bodyContentHandler = new BodyContentHandler(-1);
            ParseContext parseContext = new ParseContext();
            parseContext.set(Locale.class, Locale.US);
            new OOXMLParser().parse(resourceAsStream, bodyContentHandler, metadata, parseContext);
            resourceAsStream.close();
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
            Assert.assertEquals((Object) null, metadata.get(TikaCoreProperties.CREATOR));
            Assert.assertEquals((Object) null, metadata.get(TikaCoreProperties.MODIFIER));
            Assert.assertEquals("2006-09-12T15:06:44Z", metadata.get(TikaCoreProperties.CREATED));
            Assert.assertEquals("2006-09-12T15:06:44Z", metadata.get(Metadata.CREATION_DATE));
            Assert.assertEquals("2011-08-22T14:24:38Z", metadata.get(Metadata.LAST_MODIFIED));
            Assert.assertEquals("2011-08-22T14:24:38Z", metadata.get(TikaCoreProperties.MODIFIED));
            Assert.assertEquals("2011-08-22T14:24:38Z", metadata.get(Metadata.DATE));
            Assert.assertEquals("Microsoft Excel", metadata.get("Application-Name"));
            Assert.assertEquals("Microsoft Excel", metadata.get(OfficeOpenXMLExtended.APPLICATION));
            Assert.assertEquals("true", metadata.get("custom:myCustomBoolean"));
            Assert.assertEquals("3", metadata.get("custom:myCustomNumber"));
            Assert.assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
            Assert.assertEquals("2010-12-30T22:00:00Z", metadata.get("custom:MyCustomDate"));
            Assert.assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testWordCustomProperties() throws Exception {
        InputStream resourceAsStream = OOXMLParserTest.class.getResourceAsStream("/test-documents/testWORD_custom_props.docx");
        Metadata metadata = new Metadata();
        try {
            BodyContentHandler bodyContentHandler = new BodyContentHandler(-1);
            ParseContext parseContext = new ParseContext();
            parseContext.set(Locale.class, Locale.US);
            new OOXMLParser().parse(resourceAsStream, bodyContentHandler, metadata, parseContext);
            resourceAsStream.close();
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
            Assert.assertEquals("EJ04325S", metadata.get(TikaCoreProperties.CREATOR));
            Assert.assertEquals("Etienne Jouvin", metadata.get(TikaCoreProperties.MODIFIER));
            Assert.assertEquals("Etienne Jouvin", metadata.get("Last-Author"));
            Assert.assertEquals("2011-07-29T16:52:00Z", metadata.get(TikaCoreProperties.CREATED));
            Assert.assertEquals("2011-07-29T16:52:00Z", metadata.get(Metadata.CREATION_DATE));
            Assert.assertEquals("2012-01-03T22:14:00Z", metadata.get(TikaCoreProperties.MODIFIED));
            Assert.assertEquals("2012-01-03T22:14:00Z", metadata.get(Metadata.DATE));
            Assert.assertEquals("Microsoft Office Word", metadata.get("Application-Name"));
            Assert.assertEquals("Microsoft Office Word", metadata.get(OfficeOpenXMLExtended.APPLICATION));
            Assert.assertEquals("1", metadata.get(Office.PAGE_COUNT));
            Assert.assertEquals("2", metadata.get(Office.WORD_COUNT));
            Assert.assertEquals("My Title", metadata.get(TikaCoreProperties.TITLE));
            Assert.assertEquals("My Keyword", metadata.get(TikaCoreProperties.KEYWORDS));
            Assert.assertEquals("Normal.dotm", metadata.get("Template"));
            Assert.assertEquals("Normal.dotm", metadata.get(OfficeOpenXMLExtended.TEMPLATE));
            Assert.assertEquals("My subject", metadata.get("subject"));
            Assert.assertEquals("My subject", metadata.get(OfficeOpenXMLCore.SUBJECT));
            Assert.assertEquals("EDF-DIT", metadata.get(TikaCoreProperties.PUBLISHER));
            Assert.assertEquals("true", metadata.get("custom:myCustomBoolean"));
            Assert.assertEquals("3", metadata.get("custom:myCustomNumber"));
            Assert.assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
            Assert.assertEquals("2010-12-30T23:00:00Z", metadata.get("custom:MyCustomDate"));
            Assert.assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testPowerPointCustomProperties() throws Exception {
        InputStream resourceAsStream = OOXMLParserTest.class.getResourceAsStream("/test-documents/testPPT_custom_props.pptx");
        Metadata metadata = new Metadata();
        try {
            BodyContentHandler bodyContentHandler = new BodyContentHandler(-1);
            ParseContext parseContext = new ParseContext();
            parseContext.set(Locale.class, Locale.US);
            new OOXMLParser().parse(resourceAsStream, bodyContentHandler, metadata, parseContext);
            resourceAsStream.close();
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.presentationml.presentation", metadata.get("Content-Type"));
            Assert.assertEquals("JOUVIN ETIENNE", metadata.get(TikaCoreProperties.CREATOR));
            Assert.assertEquals("EJ04325S", metadata.get(TikaCoreProperties.MODIFIER));
            Assert.assertEquals("EJ04325S", metadata.get("Last-Author"));
            Assert.assertEquals("2011-08-22T13:30:53Z", metadata.get(TikaCoreProperties.CREATED));
            Assert.assertEquals("2011-08-22T13:30:53Z", metadata.get(Metadata.CREATION_DATE));
            Assert.assertEquals("2011-08-22T13:32:49Z", metadata.get(TikaCoreProperties.MODIFIED));
            Assert.assertEquals("2011-08-22T13:32:49Z", metadata.get(Metadata.DATE));
            Assert.assertEquals("1", metadata.get(Office.SLIDE_COUNT));
            Assert.assertEquals("3", metadata.get(Office.WORD_COUNT));
            Assert.assertEquals("Test extraction properties pptx", metadata.get(TikaCoreProperties.TITLE));
            Assert.assertEquals("true", metadata.get("custom:myCustomBoolean"));
            Assert.assertEquals("3", metadata.get("custom:myCustomNumber"));
            Assert.assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
            Assert.assertEquals("2010-12-30T22:00:00Z", metadata.get("custom:MyCustomDate"));
            Assert.assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testEmbeddedPDF() throws Exception {
        InputStream resourceAsStream = OOXMLParserTest.class.getResourceAsStream("/test-documents/testWORD_embedded_pdf.docx");
        Metadata metadata = new Metadata();
        StringWriter stringWriter = new StringWriter();
        TransformerHandler newTransformerHandler = ((SAXTransformerFactory) SAXTransformerFactory.newInstance()).newTransformerHandler();
        newTransformerHandler.getTransformer().setOutputProperty("method", "xml");
        newTransformerHandler.getTransformer().setOutputProperty("indent", "no");
        newTransformerHandler.setResult(new StreamResult(stringWriter));
        try {
            new OOXMLParser().parse(resourceAsStream, newTransformerHandler, metadata, new ParseContext());
            resourceAsStream.close();
            String stringWriter2 = stringWriter.toString();
            int indexOf = stringWriter2.indexOf("Here is the pdf file:");
            int indexOf2 = stringWriter2.indexOf("<div class=\"embedded\" id=\"rId5\"/>");
            int indexOf3 = stringWriter2.indexOf("Bye Bye");
            int indexOf4 = stringWriter2.indexOf("<div class=\"embedded\" id=\"rId6\"/>");
            int indexOf5 = stringWriter2.indexOf("Bye for real.");
            Assert.assertTrue(indexOf != -1);
            Assert.assertTrue(indexOf2 != -1);
            Assert.assertTrue(indexOf3 != -1);
            Assert.assertTrue(indexOf4 != -1);
            Assert.assertTrue(indexOf5 != -1);
            Assert.assertTrue(indexOf < indexOf2);
            Assert.assertTrue(indexOf2 < indexOf3);
            Assert.assertTrue(indexOf3 < indexOf4);
            Assert.assertTrue(indexOf4 < indexOf5);
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testEmbeddedZipInPPTX() throws Exception {
        String str = getXML("test_embedded_zip.pptx").xml;
        int indexOf = str.indexOf("<div class=\"embedded\" id=\"slide1_rId3\" />");
        int indexOf2 = str.indexOf("Send me a note");
        int indexOf3 = str.indexOf("<div class=\"embedded\" id=\"slide2_rId4\" />");
        int indexOf4 = str.indexOf("<p>No title</p>");
        Assert.assertTrue(indexOf != -1);
        Assert.assertTrue(indexOf2 != -1);
        Assert.assertTrue(indexOf3 != -1);
        Assert.assertTrue(indexOf4 != -1);
        Assert.assertTrue(indexOf < indexOf2);
        Assert.assertTrue(indexOf2 < indexOf3);
        Assert.assertTrue(indexOf3 < indexOf4);
    }

    @Test
    public void testWordNullStyle() throws Exception {
        assertContains("Test av styrt dokument", getXML("testWORD_null_style.docx").xml);
    }

    @Test
    public void testNoFormat() throws Exception {
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        InputStream resourceAsStream = WordParserTest.class.getResourceAsStream("/test-documents/testWORD_no_format.docx");
        try {
            new OOXMLParser().parse(resourceAsStream, bodyContentHandler, metadata, new ParseContext());
            resourceAsStream.close();
            assertContains("This is a piece of text that causes an exception", bodyContentHandler.toString());
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    @Test
    public void testTextInsideTextBox() throws Exception {
        String str = getXML("testWORD_text_box.docx").xml;
        assertContains("This text is directly in the body of the document.", str);
        assertContains("This text is inside of a text box in the body of the document.", str);
        assertContains("This text is inside of a text box in the header of the document.", str);
        assertContains("This text is inside of a text box in the footer of the document.", str);
    }

    @Test
    public void testEmbeddedPPTXTwoSlides() throws Exception {
        String str = getXML("testPPT_embedded_two_slides.pptx").xml;
        assertContains("<div class=\"embedded\" id=\"slide1_rId7\" />", str);
        assertContains("<div class=\"embedded\" id=\"slide2_rId7\" />", str);
    }

    @Test
    public void testMissingText() throws Exception {
        Metadata metadata = new Metadata();
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        ParseContext parseContext = new ParseContext();
        InputStream testDocument = getTestDocument("testWORD_missing_text.docx");
        try {
            this.parser.parse(testDocument, bodyContentHandler, metadata, parseContext);
            Assert.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
            Assert.assertTrue(bodyContentHandler.toString().contains("BigCompany"));
            Assert.assertTrue(bodyContentHandler.toString().contains("Seasoned"));
            testDocument.close();
        } catch (Throwable th) {
            testDocument.close();
            throw th;
        }
    }

    @Test
    public void testExcelTextBox() throws Exception {
        Metadata metadata = new Metadata();
        metadata.add("TIKA_PARSER_PARSE_SHAPES", "true");
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        ParseContext parseContext = new ParseContext();
        this.parser.parse(getTestDocument("testEXCEL_textbox.xlsx"), bodyContentHandler, metadata, parseContext);
        assertContains("some autoshape", bodyContentHandler.toString());
    }

    @Test
    public void testWordMissingOOXMLBeans() throws Exception {
        PrintStream printStream = System.err;
        for (String str : new String[]{"testWORD_missing_ooxml_bean1.docx"}) {
            Metadata metadata = new Metadata();
            BodyContentHandler bodyContentHandler = new BodyContentHandler();
            ParseContext parseContext = new ParseContext();
            InputStream testDocument = getTestDocument(str);
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            System.setErr(new PrintStream(byteArrayOutputStream));
            this.parser.parse(testDocument, bodyContentHandler, metadata, parseContext);
            System.setErr(printStream);
            Assert.assertTrue(byteArrayOutputStream.toString().length() == 0);
            testDocument.close();
        }
    }

    @Test
    public void testPPTXAutodate() throws Exception {
        assertContains("<p>Now</p>\n<p>2011-12-19 10:20:04 AM</p>\n", getXML("testPPT_autodate.pptx").xml);
    }

    @Test
    public void testDOCXThumbnail() throws Exception {
        String str = getXML("testDOCX_Thumbnail.docx").xml;
        int indexOf = str.indexOf("This file contains a thumbnail");
        int indexOf2 = str.indexOf("<div class=\"embedded\" id=\"thumbnail_0.emf\" />");
        int indexOf3 = str.indexOf("<div class=\"package-entry\"><h1>thumbnail_0.emf</h1></div>");
        Assert.assertTrue(indexOf != -1);
        Assert.assertTrue(indexOf2 != -1);
        Assert.assertTrue(indexOf3 != -1);
        Assert.assertTrue(indexOf < indexOf2);
        Assert.assertTrue(indexOf2 < indexOf3);
    }

    @Test
    public void testXLSXThumbnail() throws Exception {
        String str = getXML("testXLSX_Thumbnail.xlsx").xml;
        int indexOf = str.indexOf("This file contains an embedded thumbnail by default");
        int indexOf2 = str.indexOf("<div class=\"embedded\" id=\"thumbnail_0.wmf\" />");
        int indexOf3 = str.indexOf("<div class=\"package-entry\"><h1>thumbnail_0.wmf</h1></div>");
        Assert.assertTrue(indexOf != -1);
        Assert.assertTrue(indexOf2 != -1);
        Assert.assertTrue(indexOf3 != -1);
        Assert.assertTrue(indexOf < indexOf2);
        Assert.assertTrue(indexOf2 < indexOf3);
    }

    @Test
    public void testPPTXThumbnail() throws Exception {
        String str = getXML("testPPTX_Thumbnail.pptx").xml;
        int indexOf = str.indexOf("<body><p>This file contains an embedded thumbnail</p>");
        int indexOf2 = str.indexOf("<div class=\"embedded\" id=\"thumbnail_0.jpeg\" />");
        int indexOf3 = str.indexOf("<div class=\"package-entry\"><h1>thumbnail_0.jpeg</h1></div>");
        Assert.assertTrue(indexOf != -1);
        Assert.assertTrue(indexOf2 != -1);
        Assert.assertTrue(indexOf3 != -1);
        Assert.assertTrue(indexOf < indexOf2);
        Assert.assertTrue(indexOf2 < indexOf3);
    }
}
