package org.apache.tika.parser.pdf;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.tika.TikaTest;
import org.apache.tika.extractor.ParserContainerExtractor;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.sax.BodyContentHandler;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:org/apache/tika/parser/pdf/PDFParserTest.class */
public class PDFParserTest extends TikaTest {
    public static final MediaType TYPE_TEXT = MediaType.TEXT_PLAIN;
    public static final MediaType TYPE_EMF = MediaType.application("x-emf");
    public static final MediaType TYPE_PDF = MediaType.application("pdf");
    public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
    public static final MediaType TYPE_DOC = MediaType.application("msword");

    @Test
    public void testPdfParsing() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        String text = getText(PDFParserTest.class.getResourceAsStream("/test-documents/testPDF.pdf"), (Parser) autoDetectParser, metadata);
        Assert.assertEquals("application/pdf", metadata.get("Content-Type"));
        Assert.assertEquals("Bertrand Delacrétaz", metadata.get(TikaCoreProperties.CREATOR));
        Assert.assertEquals("Bertrand Delacrétaz", metadata.get("Author"));
        Assert.assertEquals("Firefox", metadata.get(TikaCoreProperties.CREATOR_TOOL));
        Assert.assertEquals("Apache Tika - Apache Tika", metadata.get(TikaCoreProperties.TITLE));
        Assert.assertTrue(text.contains("Apache Tika"));
        Assert.assertTrue(text.contains("Tika - Content Analysis Toolkit"));
        Assert.assertTrue(text.contains("incubator"));
        Assert.assertTrue(text.contains("Apache Software Foundation"));
        Assert.assertTrue("should have word boundary after headline", !text.contains("ToolkitApache"));
        Assert.assertTrue("should have word boundary between paragraphs", !text.contains("libraries.Apache"));
    }

    @Test
    public void testCustomMetadata() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        String text = getText(PDFParserTest.class.getResourceAsStream("/test-documents/testPDF-custommetadata.pdf"), (Parser) autoDetectParser, metadata);
        Assert.assertEquals("application/pdf", metadata.get("Content-Type"));
        Assert.assertEquals("Document author", metadata.get(TikaCoreProperties.CREATOR));
        Assert.assertEquals("Document author", metadata.get("Author"));
        Assert.assertEquals("Document title", metadata.get(TikaCoreProperties.TITLE));
        Assert.assertEquals("Custom Value", metadata.get("Custom Property"));
        Assert.assertEquals("Array Entry 1", metadata.get("Custom Array"));
        Assert.assertEquals(2L, metadata.getValues("Custom Array").length);
        Assert.assertEquals("Array Entry 1", metadata.getValues("Custom Array")[0]);
        Assert.assertEquals("Array Entry 2", metadata.getValues("Custom Array")[1]);
        Assert.assertTrue(text.contains("Hello World!"));
    }

    @Test
    public void testProtectedPDF() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        InputStream resourceAsStream = PDFParserTest.class.getResourceAsStream("/test-documents/testPDF_protected.pdf");
        try {
            autoDetectParser.parse(resourceAsStream, bodyContentHandler, metadata, parseContext);
            resourceAsStream.close();
            Assert.assertEquals("application/pdf", metadata.get("Content-Type"));
            Assert.assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR));
            Assert.assertEquals("The Bank of England", metadata.get("Author"));
            Assert.assertEquals("Speeches by Andrew G Haldane", metadata.get(OfficeOpenXMLCore.SUBJECT));
            Assert.assertEquals("Speeches by Andrew G Haldane", metadata.get("subject"));
            Assert.assertEquals("Rethinking the Financial Network, Speech by Andrew G Haldane, Executive Director, Financial Stability delivered at the Financial Student Association, Amsterdam on 28 April 2009", metadata.get(TikaCoreProperties.TITLE));
            String obj = bodyContentHandler.toString();
            Assert.assertTrue(obj.contains("RETHINKING THE FINANCIAL NETWORK"));
            Assert.assertTrue(obj.contains("On 16 November 2002"));
            Assert.assertTrue(obj.contains("In many important respects"));
            BodyContentHandler bodyContentHandler2 = new BodyContentHandler();
            Metadata metadata2 = new Metadata();
            ParseContext parseContext2 = new ParseContext();
            parseContext2.set(PasswordProvider.class, new PasswordProvider() { // from class: org.apache.tika.parser.pdf.PDFParserTest.1
                public String getPassword(Metadata metadata3) {
                    return "";
                }
            });
            resourceAsStream = PDFParserTest.class.getResourceAsStream("/test-documents/testPDF_protected.pdf");
            try {
                autoDetectParser.parse(resourceAsStream, bodyContentHandler2, metadata2, parseContext2);
                resourceAsStream.close();
                Assert.assertEquals("application/pdf", metadata2.get("Content-Type"));
                Assert.assertEquals("The Bank of England", metadata2.get(TikaCoreProperties.CREATOR));
                Assert.assertEquals("Speeches by Andrew G Haldane", metadata2.get(OfficeOpenXMLCore.SUBJECT));
                Assert.assertEquals("Speeches by Andrew G Haldane", metadata2.get("subject"));
                Assert.assertEquals("Rethinking the Financial Network, Speech by Andrew G Haldane, Executive Director, Financial Stability delivered at the Financial Student Association, Amsterdam on 28 April 2009", metadata2.get(TikaCoreProperties.TITLE));
                Assert.assertTrue(obj.contains("RETHINKING THE FINANCIAL NETWORK"));
                Assert.assertTrue(obj.contains("On 16 November 2002"));
                Assert.assertTrue(obj.contains("In many important respects"));
            } finally {
            }
        } finally {
        }
    }

    @Test
    public void testTwoTextBoxes() throws Exception {
        Assert.assertTrue(getText(PDFParserTest.class.getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), new AutoDetectParser()).replaceAll("\\s+", " ").contains("Left column line 1 Left column line 2 Right column line 1 Right column line 2"));
    }

    @Test
    public void testVarious() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        String text = getText(PDFParserTest.class.getResourceAsStream("/test-documents/testPDFVarious.pdf"), (Parser) autoDetectParser, metadata);
        assertContains("Footnote appears here", text);
        assertContains("This is a footnote.", text);
        assertContains("This is the header text.", text);
        assertContains("This is the footer text.", text);
        assertContains("Here is a text box", text);
        assertContains("Bold", text);
        assertContains("italic", text);
        assertContains("underline", text);
        assertContains("superscript", text);
        assertContains("subscript", text);
        assertContains("Here is a citation:", text);
        assertContains("Figure 1 This is a caption for Figure 1", text);
        assertContains("(Kramer)", text);
        assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2 Col 2 Row 2 Col 3", text.replaceAll("\\s+", " "));
        assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", text.replaceAll("\\s+", " "));
        assertContains("This is a hyperlink", text);
        assertContains("Here is a list:", text);
        for (int i = 1; i <= 3; i++) {
            assertContains("Bullet " + i, text);
        }
        assertContains("Here is a numbered list:", text);
        for (int i2 = 1; i2 <= 3; i2++) {
            assertContains(i2 + ") Number bullet " + i2, text);
        }
        for (int i3 = 1; i3 <= 2; i3++) {
            for (int i4 = 1; i4 <= 3; i4++) {
                assertContains("Row " + i3 + " Col " + i4, text);
            }
        }
        assertContains("Keyword1 Keyword2", text);
        Assert.assertEquals("Keyword1 Keyword2", metadata.get("Keywords"));
        assertContains("Subject is here", text);
        Assert.assertEquals("Subject is here", metadata.get(OfficeOpenXMLCore.SUBJECT));
        Assert.assertEquals("Subject is here", metadata.get("subject"));
        assertContains("Suddenly some Japanese text:", text);
        assertContains("（ＧＨＱ）", text);
        assertContains("ゾルゲと尾崎、淡々と最期", text);
        assertContains("And then some Gothic text:", text);
    }

    @Test
    public void testAnnotations() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        String replaceAll = getText(getResourceAsStream("/test-documents/testAnnotations.pdf"), autoDetectParser).replaceAll("[\\s ]+", " ");
        assertContains("Here is some text", replaceAll);
        assertContains("Here is a comment", replaceAll);
        PDFParser pDFParser = new PDFParser();
        pDFParser.getPDFParserConfig().setExtractAnnotationText(false);
        assertContains("Here is some text", getText(getResourceAsStream("/test-documents/testAnnotations.pdf"), pDFParser).replaceAll("[\\s ]+", " "));
        Assert.assertEquals(-1L, r0.indexOf("Here is a comment"));
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setExtractAnnotationText(false);
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        assertContains("Here is some text", getText(getResourceAsStream("/test-documents/testAnnotations.pdf"), (Parser) autoDetectParser, parseContext).replaceAll("[\\s ]+", " "));
        Assert.assertEquals(-1L, r0.indexOf("Here is a comment"));
        String str = getXML("testAnnotations.pdf").xml;
        Assert.assertEquals(substringCount("<p>", str), substringCount("</p>", str));
    }

    @Test
    public void testPopupAnnotation() throws Exception {
        String text = getText(getResourceAsStream("/test-documents/testPopupAnnotation.pdf"), new AutoDetectParser());
        assertContains("this is the note", text);
        assertContains("igalsh", text);
    }

    @Test
    public void testEmbeddedPDFs() throws Exception {
        String str = getXML("testPDFPackage.pdf").xml;
        assertContains("PDF1", str);
        assertContains("PDF2", str);
    }

    private static int substringCount(String str, String str2) {
        int i = -1;
        int i2 = 0;
        while (true) {
            int indexOf = str2.indexOf(str, i);
            if (indexOf == -1) {
                return i2;
            }
            i2++;
            i = indexOf + 1;
        }
    }

    @Test
    public void testPageNumber() throws Exception {
        assertContains("<p>1</p>", getXML("testPageNumber.pdf").xml.replaceAll("\\s+", ""));
    }

    @Test
    public void testLinks() throws Exception {
        assertContains("<div class=\"annotation\"><a href=\"http://tika.apache.org/\" /></div>", getXML("testPDFVarious.pdf").xml);
    }

    @Test
    public void testDisableAutoSpace() throws Exception {
        PDFParser pDFParser = new PDFParser();
        pDFParser.getPDFParserConfig().setEnableAutoSpace(false);
        assertContains("Here is some formatted text", getText(getResourceAsStream("/test-documents/testExtraSpaces.pdf"), pDFParser).replaceAll("[\\s ]+", " "));
        pDFParser.getPDFParserConfig().setEnableAutoSpace(true);
        Assert.assertEquals(-1L, getText(getResourceAsStream("/test-documents/testExtraSpaces.pdf"), pDFParser).replaceAll("[\\s ]+", " ").indexOf("Here is some formatted text"));
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        Assert.assertEquals(-1L, getText(getResourceAsStream("/test-documents/testExtraSpaces.pdf"), (Parser) autoDetectParser, parseContext).replaceAll("[\\s ]+", " ").indexOf("Here is some formatted text"));
        pDFParserConfig.setEnableAutoSpace(false);
        assertContains("Here is some formatted text", getText(getResourceAsStream("/test-documents/testExtraSpaces.pdf"), (Parser) pDFParser, parseContext).replaceAll("[\\s ]+", " "));
    }

    @Test
    public void testDuplicateOverlappingText() throws Exception {
        PDFParser pDFParser = new PDFParser();
        assertContains("Text the first timeText the second time", getText(getResourceAsStream("/test-documents/testOverlappingText.pdf"), pDFParser));
        pDFParser.getPDFParserConfig().setSuppressDuplicateOverlappingText(true);
        assertContains("Text the first timesecond time", getText(getResourceAsStream("/test-documents/testOverlappingText.pdf"), pDFParser));
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        assertContains("Text the first timeText the second time", getText(getResourceAsStream("/test-documents/testOverlappingText.pdf"), (Parser) autoDetectParser, parseContext));
        pDFParserConfig.setSuppressDuplicateOverlappingText(true);
        assertContains("Text the first timesecond time", getText(getResourceAsStream("/test-documents/testOverlappingText.pdf"), (Parser) autoDetectParser, parseContext));
    }

    @Test
    public void testSortByPosition() throws Exception {
        PDFParser pDFParser = new PDFParser();
        pDFParser.getPDFParserConfig().setEnableAutoSpace(false);
        assertContains("Left column line 1 Left column line 2 Right column line 1 Right column line 2", getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), pDFParser).replaceAll("\\s+", " "));
        pDFParser.getPDFParserConfig().setSortByPosition(true);
        assertContains("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), pDFParser).replaceAll("\\s+", " "));
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        assertContains("Left column line 1 Left column line 2 Right column line 1 Right column line 2", getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), (Parser) autoDetectParser, parseContext).replaceAll("\\s+", " "));
        pDFParserConfig.setSortByPosition(true);
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        assertContains("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), pDFParser).replaceAll("\\s+", " "));
    }

    @Test
    public void testBookmarks() throws Exception {
        String str = getXML("testPDF_bookmarks.pdf").xml;
        int indexOf = str.indexOf("Denmark bookmark is here");
        int indexOf2 = str.indexOf("</body>");
        Assert.assertTrue(indexOf != -1);
        Assert.assertTrue(indexOf2 != -1);
        Assert.assertTrue(indexOf < indexOf2);
    }

    @Test
    public void testEmbeddedPDFEmbeddingAnotherDocument() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        InputStream inputStream = null;
        try {
            parseContext.set(Parser.class, autoDetectParser);
            inputStream = getResourceAsStream("/test-documents/testPDFEmbeddingAndEmbedded.docx");
            autoDetectParser.parse(inputStream, bodyContentHandler, metadata, parseContext);
            String obj = bodyContentHandler.toString();
            inputStream.close();
            int indexOf = obj.indexOf("Outer_haystack");
            int indexOf2 = obj.indexOf("pdf_haystack");
            int indexOf3 = obj.indexOf("Needle");
            Assert.assertTrue(indexOf > -1);
            Assert.assertTrue(indexOf2 > -1);
            Assert.assertTrue(indexOf3 > -1);
            Assert.assertTrue(indexOf3 > indexOf2 && indexOf2 > indexOf);
            TikaTest.TrackingHandler trackingHandler = new TikaTest.TrackingHandler();
            ParserContainerExtractor parserContainerExtractor = new ParserContainerExtractor();
            try {
                TikaInputStream tikaInputStream = TikaInputStream.get(getResourceAsStream("/test-documents/testPDFEmbeddingAndEmbedded.docx"));
                parserContainerExtractor.extract(tikaInputStream, parserContainerExtractor, trackingHandler);
                inputStream.close();
                Assert.assertEquals(true, Boolean.valueOf(parserContainerExtractor.isSupported(tikaInputStream)));
                Assert.assertEquals(3L, trackingHandler.filenames.size());
                Assert.assertEquals(3L, trackingHandler.mediaTypes.size());
                Assert.assertEquals("image1.emf", trackingHandler.filenames.get(0));
                Assert.assertNull(trackingHandler.filenames.get(1));
                Assert.assertEquals("My first attachment", trackingHandler.filenames.get(2));
                Assert.assertEquals(TYPE_EMF, trackingHandler.mediaTypes.get(0));
                Assert.assertEquals(TYPE_PDF, trackingHandler.mediaTypes.get(1));
                Assert.assertEquals(TYPE_DOCX, trackingHandler.mediaTypes.get(2));
            } finally {
                inputStream.close();
            }
        } catch (Throwable th) {
            inputStream = inputStream;
            throw th;
        }
    }

    @Test
    public void testSequentialParser() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        AutoDetectParser autoDetectParser2 = new AutoDetectParser();
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setUseNonSequentialParser(true);
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        File file = new File(getClass().getResource("/test-documents").toURI());
        int i = 0;
        HashSet hashSet = new HashSet();
        hashSet.add("testAnnotations.pdf");
        hashSet.add("test_acroForm2.pdf");
        HashSet hashSet2 = new HashSet();
        for (File file2 : file.listFiles()) {
            if (file2.getName().toLowerCase().endsWith(".pdf")) {
                i++;
                Metadata metadata = new Metadata();
                String text = getText((InputStream) new FileInputStream(file2), (Parser) autoDetectParser, metadata);
                Metadata metadata2 = new Metadata();
                String text2 = getText(new FileInputStream(file2), autoDetectParser2, parseContext, metadata2);
                if (hashSet2.contains(file2.getName())) {
                    Assert.assertFalse(file2.getName(), text.equals(text2));
                } else {
                    Assert.assertEquals(file2.getName(), text, text2);
                }
                if (!hashSet.contains(file2.getName())) {
                    testMetadataEquality(file2.getName(), metadata, metadata2);
                }
            }
        }
        Assert.assertTrue("Number of pdf files tested >= 15 in non-sequential parser test", i >= 15);
    }

    public void testSignatureInAcroForm() throws Exception {
        Assert.assertTrue("found", getXML("/testPDF_acroform3.pdf").xml.indexOf("<li>aTextField: TIKA-1226</li>") > -1);
    }

    @Test
    public void testEmbeddedFilesInChildren() throws Exception {
        Assert.assertTrue(getXML("/testPDF_childAttachments.pdf").xml.contains("regressiveness"));
        TikaTest.TrackingHandler trackingHandler = new TikaTest.TrackingHandler();
        TikaInputStream tikaInputStream = null;
        ParserContainerExtractor parserContainerExtractor = new ParserContainerExtractor();
        try {
            tikaInputStream = TikaInputStream.get(getResourceAsStream("/test-documents/testPDF_childAttachments.pdf"));
            parserContainerExtractor.extract(tikaInputStream, parserContainerExtractor, trackingHandler);
            if (tikaInputStream != null) {
                tikaInputStream.close();
            }
            Assert.assertEquals(4L, trackingHandler.filenames.size());
            Assert.assertEquals(4L, trackingHandler.mediaTypes.size());
            Assert.assertNull(trackingHandler.filenames.get(0));
            Assert.assertNull(trackingHandler.filenames.get(1));
            Assert.assertEquals("Press Quality(1).joboptions", trackingHandler.filenames.get(2));
            Assert.assertEquals("Unit10.doc", trackingHandler.filenames.get(3));
            Assert.assertEquals(MediaType.image("jpeg"), trackingHandler.mediaTypes.get(0));
            Assert.assertEquals(MediaType.image("tiff"), trackingHandler.mediaTypes.get(1));
            Assert.assertEquals(TYPE_TEXT, trackingHandler.mediaTypes.get(2));
            Assert.assertEquals(TYPE_DOC, trackingHandler.mediaTypes.get(3));
        } catch (Throwable th) {
            if (tikaInputStream != null) {
                tikaInputStream.close();
            }
            throw th;
        }
    }

    public void testVersions() throws Exception {
        HashMap hashMap = new HashMap();
        hashMap.put("4.x", "application/pdf; version=1.3");
        hashMap.put("5.x", "application/pdf; version=1.4");
        hashMap.put("6.x", "application/pdf; version=1.5");
        hashMap.put("7.x", "application/pdf; version=1.6");
        hashMap.put("8.x", "application/pdf; version=1.7");
        hashMap.put("9.x", "application/pdf; version=1.7");
        hashMap.put("10.x", "application/pdf; version=1.7");
        hashMap.put("11.x.PDFA-1b", "application/pdf; version=1.7");
        HashMap hashMap2 = new HashMap();
        hashMap2.put("4.x", "1.3");
        hashMap2.put("5.x", "1.4");
        hashMap2.put("6.x", "1.5");
        hashMap2.put("7.x", "1.6");
        hashMap2.put("8.x", "1.7");
        hashMap2.put("9.x", "1.7");
        hashMap2.put("10.x", "1.7");
        hashMap2.put("11.x.PDFA-1b", "1.7");
        HashMap hashMap3 = new HashMap();
        hashMap3.put("9.x", "1.7 Adobe Extension Level 3");
        hashMap3.put("10.x", "1.7 Adobe Extension Level 8");
        hashMap3.put("11.x.PDFA-1b", "1.7 Adobe Extension Level 8");
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        for (Map.Entry entry : hashMap.entrySet()) {
            InputStream resourceAsStream = PDFParserTest.class.getResourceAsStream("/test-documents/" + ("testPDF_Version." + ((String) entry.getKey()) + ".pdf"));
            Metadata metadata = new Metadata();
            autoDetectParser.parse(resourceAsStream, new BodyContentHandler(), metadata, new ParseContext());
            resourceAsStream.close();
            boolean z = false;
            for (String str : metadata.getValues("dc:format")) {
                if (str.equals(entry.getValue())) {
                    z = true;
                }
            }
            Assert.assertTrue("dc:format ::" + ((String) entry.getValue()), z);
            String str2 = (String) hashMap3.get(entry.getKey());
            if (str2 != null) {
                Assert.assertEquals("pdf:PDFExtensionVersion :: " + str2, str2, metadata.get("pdf:PDFExtensionVersion"));
            }
            Assert.assertEquals("pdf:PDFVersion", hashMap2.get(entry.getKey()), metadata.get("pdf:PDFVersion"));
        }
        InputStream resourceAsStream2 = PDFParserTest.class.getResourceAsStream("/test-documents/testPDF_Version.11.x.PDFA-1b.pdf");
        Metadata metadata2 = new Metadata();
        autoDetectParser.parse(resourceAsStream2, new BodyContentHandler(), metadata2, new ParseContext());
        resourceAsStream2.close();
        HashSet hashSet = new HashSet();
        for (String str3 : metadata2.getValues("dc:format")) {
            hashSet.add(str3);
        }
        for (String str4 : new String[]{"application/pdf; version=1.7", "application/pdf; version=\"A-1b\"", "application/pdf; version=\"1.7 Adobe Extension Level 8\""}) {
            Assert.assertTrue(str4, hashSet.contains(str4));
        }
        Assert.assertEquals("pdfaid:conformance", metadata2.get("pdfaid:conformance"), "B");
        Assert.assertEquals("pdfaid:part", metadata2.get("pdfaid:part"), "1");
    }

    public void testMultipleAuthors() throws Exception {
        InputStream resourceAsStream = PDFParserTest.class.getResourceAsStream("/test-documents/testPDF_twoAuthors.pdf");
        AutoDetectParser autoDetectParser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        autoDetectParser.parse(resourceAsStream, new BodyContentHandler(), metadata, new ParseContext());
        resourceAsStream.close();
        for (String str : new String[]{"dc:creator", "meta:author", "creator", "Author"}) {
            String[] values = metadata.getValues(str);
            Assert.assertEquals("number of authors == 2 for key: " + str, 2L, values.length);
            HashSet hashSet = new HashSet();
            hashSet.add(values[0]);
            hashSet.add(values[1]);
            Assert.assertTrue("Sample Author 1", hashSet.contains("Sample Author 1"));
            Assert.assertTrue("Sample Author 2", hashSet.contains("Sample Author 2"));
        }
    }

    private void testMetadataEquality(String str, Metadata metadata, Metadata metadata2) {
        String[] names = metadata.names();
        String[] names2 = metadata2.names();
        Assert.assertTrue("metadata null test: " + str, (names == null && names2 == null) || !(names == null || names2 == null));
        Assert.assertEquals("metadata length: " + str, names.length, metadata2.names().length);
        for (String str2 : names) {
            if (!str2.equals("pdf:PDFVersion") && !str2.equals("dc:format")) {
                if (metadata.isMultiValued(str2) && metadata2.isMultiValued(str2)) {
                    testEqualMetadataValue(str, metadata.getValues(str2), metadata2.getValues(str2));
                } else if (metadata.isMultiValued(str2) || metadata2.isMultiValued(str2)) {
                    Assert.assertTrue("one multivalued, other isn't: " + str, false);
                } else {
                    Assert.assertEquals("unequal multivalued values: " + str, metadata.get(str2), metadata2.get(str2));
                }
            }
        }
    }

    private void testEqualMetadataValue(String str, String[] strArr, String[] strArr2) {
        Assert.assertTrue("null equality of metadata values: " + str, (strArr == null && strArr2 == null) || !(strArr == null || strArr2 == null));
        Assert.assertEquals("metadata values length: " + str, strArr.length, strArr2.length);
        List asList = Arrays.asList(strArr2);
        for (String str2 : strArr) {
            if (!asList.contains(str2)) {
                Assert.assertTrue("metadata value; that doesn't contain" + str2, false);
            }
        }
    }
}
