package org.apache.tika.parser.rtf;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import org.apache.tika.Tika;
import org.apache.tika.TikaTest;
import org.apache.tika.extractor.ParserContainerExtractor;
import org.apache.tika.io.FilenameUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.RTFMetadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.WriteOutContentHandler;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:org/apache/tika/parser/rtf/RTFParserTest.class */
public class RTFParserTest extends TikaTest {
    private Tika tika = new Tika();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/tika/parser/rtf/RTFParserTest$Result.class */
    public static class Result {
        public final String text;
        public final Metadata metadata;

        public Result(String str, Metadata metadata) {
            this.text = str;
            this.metadata = metadata;
        }
    }

    @Test
    public void testBasicExtraction() throws Exception {
        File resourceAsFile = getResourceAsFile("/test-documents/testRTF.rtf");
        Metadata metadata = new Metadata();
        StringWriter stringWriter = new StringWriter();
        this.tika.getParser().parse(new FileInputStream(resourceAsFile), new WriteOutContentHandler(stringWriter), metadata, new ParseContext());
        String stringWriter2 = stringWriter.toString();
        Assert.assertEquals("application/rtf", metadata.get("Content-Type"));
        assertContains("Test", stringWriter2);
        assertContains("indexation Word", stringWriter2);
    }

    @Test
    public void testUmlautSpacesExtraction2() throws Exception {
        Assert.assertEquals("Übersicht", getText("testRTFUmlautSpaces2.rtf").replaceAll("\\s+", ""));
    }

    @Test
    public void testUnicodeUCNControlWordCharacterDoublingExtraction() throws Exception {
        String text = getText("testRTFUnicodeUCNControlWordCharacterDoubling.rtf");
        assertContains("年", text);
        assertContains("念", text);
        assertContains("0 ", text);
        assertContains("abc", text);
        Assert.assertFalse("Doubled character 年", text.contains("年年"));
    }

    @Test
    public void testHexEscapeInsideWord() throws Exception {
        assertContains("ESPÍRITO", getText("testRTFHexEscapeInsideWord.rtf"));
    }

    @Test
    public void testWindowsCodepage1250() throws Exception {
        String text = getText("testRTFWindowsCodepage1250.rtf");
        assertContains("zażółć gęślą jaźń", text);
        assertContains("ZAŻÓŁĆ GĘŚLĄ JAŹŃ", text);
    }

    @Test
    public void testTableCellSeparation() throws Exception {
        String replaceAll = this.tika.parseToString(getResourceAsFile("/test-documents/testRTFTableCellSeparation.rtf")).replaceAll("\\s+", " ");
        Assert.assertTrue(replaceAll.contains("a b c d ä ë ö ü"));
        assertContains("a b c d ä ë ö ü", replaceAll);
    }

    @Test
    public void testTableCellSeparation2() throws Exception {
        assertContains("Station Fax", getText("testRTFTableCellSeparation2.rtf").replaceAll("\\s+", " "));
    }

    @Test
    public void testWordPadCzechCharactersExtraction() throws Exception {
        String parseToString = this.tika.parseToString(getResourceAsFile("/test-documents/testRTFWordPadCzechCharacters.rtf"));
        Assert.assertTrue(parseToString.contains("Článek týdne"));
        Assert.assertTrue(parseToString.contains("starověké židovské náboženské texty"));
    }

    @Test
    public void testWord2010CzechCharactersExtraction() throws Exception {
        String parseToString = this.tika.parseToString(getResourceAsFile("/test-documents/testRTFWord2010CzechCharacters.rtf"));
        Assert.assertTrue(parseToString.contains("Článek týdne"));
        Assert.assertTrue(parseToString.contains("starověké židovské náboženské texty"));
    }

    @Test
    public void testMS932Extraction() throws Exception {
        Assert.assertTrue(this.tika.parseToString(getResourceAsFile("/test-documents/testRTF-ms932.rtf")).contains("こんにちは"));
        Assert.assertEquals("タイトル", getResult("testRTF-ms932.rtf").metadata.get(TikaCoreProperties.TITLE));
    }

    @Test
    public void testUmlautSpacesExtraction() throws Exception {
        Assert.assertTrue(this.tika.parseToString(getResourceAsFile("/test-documents/testRTFUmlautSpaces.rtf")).contains("Übersicht"));
    }

    @Test
    public void testGothic() throws Exception {
        assertContains("������������", getText("testRTFUnicodeGothic.rtf"));
    }

    @Test
    public void testJapaneseText() throws Exception {
        Result result = getResult("testRTFJapanese.rtf");
        String str = result.text;
        Assert.assertEquals("ゾルゲと尾崎、淡々と最期\u3000", result.metadata.get(TikaCoreProperties.TITLE));
        Assert.assertEquals("VMazel", result.metadata.get(TikaCoreProperties.CREATOR));
        Assert.assertEquals("VMazel", result.metadata.get("Author"));
        Assert.assertEquals("StarWriter", result.metadata.get(TikaCoreProperties.COMMENTS));
        assertContains("（ＧＨＱ）", str);
        assertContains("東京都三鷹市", str);
    }

    @Test
    public void testMaxLength() throws Exception {
        File resourceAsFile = getResourceAsFile("/test-documents/testRTFJapanese.rtf");
        Metadata metadata = new Metadata();
        TikaInputStream tikaInputStream = TikaInputStream.get(resourceAsFile, metadata);
        Tika tika = new Tika();
        Assert.assertTrue(tika.parseToString(tikaInputStream, metadata).length() > 500);
        tika.setMaxStringLength(200);
        Assert.assertTrue(tika.parseToString(TikaInputStream.get(resourceAsFile, metadata), metadata).length() <= 200);
        Assert.assertTrue(tika.parseToString(TikaInputStream.get(resourceAsFile, metadata), metadata, 100).length() <= 100);
    }

    @Test
    public void testTextWithCurlyBraces() throws Exception {
        assertContains("{ some text inside curly brackets }", getText("testRTFWithCurlyBraces.rtf"));
    }

    @Test
    public void testControls() throws Exception {
        String str = getResult("testRTFControls.rtf").text;
        assertContains("Thiswordhasanem—dash", str);
        assertContains("Thiswordhasanen–dash", str);
        assertContains("Thiswordhasanon‑breakinghyphen", str);
        assertContains("Thiswordhasanonbreaking space", str);
        assertContains("Thiswordhasanoptional\u00adhyphen", str);
        assertContains("‘Single quoted text’", str);
        assertContains("“Double quoted text”", str);
        assertContains("“Double quoted text again”", str);
    }

    @Test
    public void testInvalidUnicode() throws Exception {
        String str = getResult("testRTFInvalidUnicode.rtf").text;
        assertContains("Unpaired hi � here", str);
        assertContains("Unpaired lo � here", str);
        assertContains("Mismatched pair �� here", str);
    }

    @Test
    public void testVarious() throws Exception {
        Result result = getResult("testRTFVarious.rtf");
        String str = result.text;
        assertContains("Footnote appears here", str);
        assertContains("This is a footnote.", str);
        assertContains("This is the header text.", str);
        assertContains("This is the footer text.", str);
        assertContains("Here is a text box", str);
        assertContains("Bold", str);
        assertContains("italic", str);
        assertContains("underline", str);
        assertContains("superscript", str);
        assertContains("subscript", str);
        assertContains("Here is a citation:", str);
        assertContains("Figure 1 This is a caption for Figure 1", str);
        assertContains("(Kramer)", str);
        assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2 Col 2 Row 2 Col 3", str.replaceAll("\\s+", " "));
        assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", str.replaceAll("\\s+", " "));
        assertContains("This is a hyperlink", str);
        assertContains("Here is a list:", str);
        for (int i = 1; i <= 3; i++) {
            assertContains("Bullet " + i, str);
        }
        assertContains("Here is a numbered list:", str);
        for (int i2 = 1; i2 <= 3; i2++) {
            assertContains("Number bullet " + i2, str);
        }
        for (int i3 = 1; i3 <= 2; i3++) {
            for (int i4 = 1; i4 <= 3; i4++) {
                assertContains("Row " + i3 + " Col " + i4, str);
            }
        }
        assertContains("Keyword1 Keyword2", str);
        Assert.assertEquals("Keyword1 Keyword2", result.metadata.get(TikaCoreProperties.KEYWORDS));
        assertContains("Subject is here", str);
        Assert.assertEquals("Subject is here", result.metadata.get(OfficeOpenXMLCore.SUBJECT));
        Assert.assertEquals("Subject is here", result.metadata.get("subject"));
        assertContains("Suddenly some Japanese text:", str);
        assertContains("（ＧＨＱ）", str);
        assertContains("ゾルゲと尾崎、淡々と最期", str);
        assertContains("And then some Gothic text:", str);
        assertContains("������������", str);
    }

    @Test
    public void testVariousStyle() throws Exception {
        String str = getXML("testRTFVarious.rtf").xml;
        assertContains("<b>Bold</b>", str);
        assertContains("<i>italic</i>", str);
    }

    @Test
    public void testBoldItalic() throws Exception {
        String str = getXML("testRTFBoldItalic.rtf").xml;
        assertContains("<b>bold</b>", str);
        assertContains("<b>bold </b><b><i>italic</i></b>", str);
        assertContains("<b><i>italic </i></b><b>bold</b>", str);
        assertContains("<i>italic</i>", str);
        assertContains("<b>bold then </b><b><i>italic then</i></b><i> not bold</i>", str);
        assertContains("<i>italic then </i><b><i>bold then</i></b><b> not italic</b>", str);
    }

    @Test
    public void testHyperlink() throws Exception {
        assertContains("our most <a href=\"http://r.office.microsoft.com/r/rlidwelcomeFAQ?clid=1033\">frequently asked questions</a>", getXML("testRTFHyperlink.rtf").xml);
        Assert.assertEquals(-1L, r0.indexOf("<p>\t\t</p>"));
    }

    @Test
    public void testIgnoredControlWord() throws Exception {
        assertContains("<p>The quick brown fox jumps over the lazy dog</p>", getXML("testRTFIgnoredControlWord.rtf").xml);
    }

    @Test
    public void testFontAfterBufferedText() throws Exception {
        assertContains("Уважаемый клиент!", getXML("testFontAfterBufferedText.rtf").xml);
    }

    @Test
    public void testListMicrosoftWord() throws Exception {
        String str = getXML("testRTFListMicrosoftWord.rtf").xml;
        assertContains("<ol>\t<li>one</li>", str);
        assertContains("</ol>", str);
        assertContains("<ul>\t<li>first</li>", str);
        assertContains("</ul>", str);
    }

    @Test
    public void testListLibreOffice() throws Exception {
        String str = getXML("testRTFListLibreOffice.rtf").xml;
        assertContains("<ol>\t<li>one</li>", str);
        assertContains("</ol>", str);
        assertContains("<ul>\t<li>first</li>", str);
        assertContains("</ul>", str);
    }

    @Test
    public void testBinControlWord() throws Exception {
        TikaTest.ByteCopyingHandler byteCopyingHandler = new TikaTest.ByteCopyingHandler();
        TikaInputStream tikaInputStream = null;
        try {
            ParserContainerExtractor parserContainerExtractor = new ParserContainerExtractor();
            tikaInputStream = TikaInputStream.get(getResourceAsStream("/test-documents/testBinControlWord.rtf"));
            Assert.assertEquals(true, Boolean.valueOf(parserContainerExtractor.isSupported(tikaInputStream)));
            parserContainerExtractor.extract(tikaInputStream, parserContainerExtractor, byteCopyingHandler);
            tikaInputStream.close();
            Assert.assertEquals(1L, byteCopyingHandler.bytes.size());
            byte[] bArr = byteCopyingHandler.bytes.get(0);
            Assert.assertEquals(10L, bArr.length);
            Assert.assertEquals(125L, bArr[4]);
            Assert.assertEquals(-1L, bArr[9]);
        } catch (Throwable th) {
            tikaInputStream.close();
            throw th;
        }
    }

    @Test
    public void testMetaDataCounts() throws Exception {
        TikaTest.XMLResult xml = getXML("test_embedded_package.rtf");
        Assert.assertEquals("1", xml.metadata.get(Office.PAGE_COUNT));
        Assert.assertEquals("7", xml.metadata.get(Office.WORD_COUNT));
        Assert.assertEquals("36", xml.metadata.get(Office.CHARACTER_COUNT));
        Assert.assertTrue(xml.metadata.get(Office.CREATION_DATE).startsWith("2012-09-02T"));
    }

    @Test
    public void testListOverride() throws Exception {
        assertContains("Body", getResult("testRTFListOverride.rtf").text);
    }

    @Test
    public void testEmbeddedMonster() throws Exception {
        TikaInputStream tikaInputStream;
        HashSet hashSet = new HashSet();
        hashSet.add(MediaType.parse("application/x-emf"));
        hashSet.add(MediaType.parse("application/x-msmetafile"));
        ArrayList arrayList = new ArrayList();
        arrayList.add("file_0.doc");
        arrayList.add("Hw.txt");
        arrayList.add("file_1.xlsx");
        arrayList.add("test-zip-of-zip_普林斯顿.zip");
        arrayList.add("html-within-zip.zip");
        arrayList.add("text.html");
        arrayList.add("testHTML_utf8_普林斯顿.html");
        arrayList.add("testJPEG_普林斯顿.jpg");
        arrayList.add("file_2.xls");
        arrayList.add("testMSG_普林斯顿.msg");
        arrayList.add("file_3.pdf");
        arrayList.add("file_4.ppt");
        arrayList.add("file_5.pptx");
        arrayList.add("thumbnail_0.jpeg");
        arrayList.add("file_6.doc");
        arrayList.add("file_7.doc");
        arrayList.add("file_8.docx");
        arrayList.add("testJPEG_普林斯顿.jpg");
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add("application/msword");
        arrayList2.add("text/plain");
        arrayList2.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
        arrayList2.add("application/zip");
        arrayList2.add("application/zip");
        arrayList2.add("text/html");
        arrayList2.add("text/html");
        arrayList2.add("image/jpeg");
        arrayList2.add("application/vnd.ms-excel");
        arrayList2.add("application/vnd.ms-outlook");
        arrayList2.add("application/pdf");
        arrayList2.add("application/vnd.ms-powerpoint");
        arrayList2.add("application/vnd.openxmlformats-officedocument.presentationml.presentation");
        arrayList2.add("image/jpeg");
        arrayList2.add("application/msword");
        arrayList2.add("application/msword");
        arrayList2.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
        arrayList2.add("image/jpeg");
        TikaTest.TrackingHandler trackingHandler = new TikaTest.TrackingHandler(hashSet);
        TikaInputStream tikaInputStream2 = null;
        try {
            ParserContainerExtractor parserContainerExtractor = new ParserContainerExtractor();
            tikaInputStream2 = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFEmbeddedFiles.rtf"));
            Assert.assertEquals(true, Boolean.valueOf(parserContainerExtractor.isSupported(tikaInputStream2)));
            parserContainerExtractor.extract(tikaInputStream2, parserContainerExtractor, trackingHandler);
            tikaInputStream2.close();
            Assert.assertEquals(arrayList.size(), trackingHandler.filenames.size());
            Assert.assertEquals(arrayList2.size(), trackingHandler.mediaTypes.size());
            for (int i = 0; tikaInputStream < trackingHandler.filenames.size(); i = tikaInputStream + 1) {
                String str = (String) arrayList.get(tikaInputStream);
                if (str == null) {
                    Assert.assertNull(trackingHandler.filenames.get(tikaInputStream));
                } else {
                    Assert.assertNotNull(trackingHandler.filenames.get(tikaInputStream));
                    Assert.assertEquals("filename equals ", str, FilenameUtils.getName(trackingHandler.filenames.get(tikaInputStream)));
                }
                Object obj = arrayList2.get(tikaInputStream);
                List<MediaType> list = trackingHandler.mediaTypes;
                Assert.assertEquals(obj, list.get(list).toString());
            }
            TikaTest.TrackingHandler trackingHandler2 = new TikaTest.TrackingHandler();
            tikaInputStream = null;
            try {
                ParserContainerExtractor parserContainerExtractor2 = new ParserContainerExtractor();
                tikaInputStream = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFEmbeddedFiles.rtf"));
                Assert.assertEquals(true, Boolean.valueOf(parserContainerExtractor2.isSupported(tikaInputStream)));
                parserContainerExtractor2.extract(tikaInputStream, parserContainerExtractor2, trackingHandler2);
                tikaInputStream.close();
                Assert.assertEquals(47L, trackingHandler2.filenames.size());
                Assert.assertEquals("thumbnail_26.emf", trackingHandler2.filenames.get(45));
                Assert.assertEquals("thumbnail_27.wmf", trackingHandler2.filenames.get(46));
            } finally {
            }
        } finally {
        }
    }

    public void testRegularImages() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        ParseContext parseContext = new ParseContext();
        TikaTest.RecursiveMetadataParser recursiveMetadataParser = new TikaTest.RecursiveMetadataParser(autoDetectParser, false);
        parseContext.set(Parser.class, recursiveMetadataParser);
        InputStream inputStream = null;
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        metadata.add("resourceName", "testRTFRegularImages.rtf");
        try {
            inputStream = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFRegularImages.rtf"));
            recursiveMetadataParser.parse(inputStream, bodyContentHandler, metadata, parseContext);
            inputStream.close();
            List<Metadata> allMetadata = recursiveMetadataParser.getAllMetadata();
            Metadata metadata2 = allMetadata.get(0);
            Metadata metadata3 = allMetadata.get(2);
            Assert.assertTrue(metadata2 != null);
            Assert.assertTrue(metadata3 != null);
            Assert.assertTrue(Arrays.asList(metadata2.getValues("dc:subject")).contains("serbor"));
            Assert.assertTrue(metadata3.get("Comments").contains("Licensed to the Apache"));
            Assert.assertFalse(Arrays.asList(metadata3.getValues("dc:subject")).contains("serbor"));
            Assert.assertEquals("false", metadata3.get(RTFMetadata.THUMBNAIL));
            Assert.assertEquals("false", metadata2.get(RTFMetadata.THUMBNAIL));
            Assert.assertEquals(40L, metadata3.names().length);
            Assert.assertEquals(105L, metadata3.names().length);
        } catch (Throwable th) {
            inputStream.close();
            throw th;
        }
    }

    @Test
    public void testEmbeddedLinkedDocument() throws Exception {
        HashSet hashSet = new HashSet();
        hashSet.add(MediaType.parse("application/x-emf"));
        hashSet.add(MediaType.parse("application/x-msmetafile"));
        TikaTest.TrackingHandler trackingHandler = new TikaTest.TrackingHandler(hashSet);
        TikaInputStream tikaInputStream = null;
        try {
            ParserContainerExtractor parserContainerExtractor = new ParserContainerExtractor();
            tikaInputStream = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFEmbeddedLink.rtf"));
            Assert.assertEquals(true, Boolean.valueOf(parserContainerExtractor.isSupported(tikaInputStream)));
            parserContainerExtractor.extract(tikaInputStream, parserContainerExtractor, trackingHandler);
            tikaInputStream.close();
            Assert.assertEquals(0L, trackingHandler.filenames.size());
            TikaTest.TrackingHandler trackingHandler2 = new TikaTest.TrackingHandler();
            TikaInputStream tikaInputStream2 = null;
            try {
                ParserContainerExtractor parserContainerExtractor2 = new ParserContainerExtractor();
                tikaInputStream2 = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFEmbeddedLink.rtf"));
                Assert.assertEquals(true, Boolean.valueOf(parserContainerExtractor2.isSupported(tikaInputStream2)));
                parserContainerExtractor2.extract(tikaInputStream2, parserContainerExtractor2, trackingHandler2);
                tikaInputStream2.close();
                Assert.assertEquals(2L, trackingHandler2.filenames.size());
            } finally {
            }
        } finally {
        }
    }

    private Result getResult(String str) throws Exception {
        File resourceAsFile = getResourceAsFile("/test-documents/" + str);
        Metadata metadata = new Metadata();
        StringWriter stringWriter = new StringWriter();
        this.tika.getParser().parse(new FileInputStream(resourceAsFile), new WriteOutContentHandler(stringWriter), metadata, new ParseContext());
        return new Result(stringWriter.toString(), metadata);
    }

    private String getText(String str) throws Exception {
        return getResult(str).text;
    }
}
