package org.apache.tika;

import java.io.File;
import java.io.FileInputStream;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/apache/tika/TestParsers.class */
public class TestParsers extends TikaTest {
    private TikaConfig tc;
    private Tika tika;

    @Before
    public void setUp() throws Exception {
        this.tc = TikaConfig.getDefaultConfig();
        this.tika = new Tika(this.tc);
    }

    @Test
    public void testWORDxtraction() throws Exception {
        File resourceAsFile = getResourceAsFile("/test-documents/testWORD.doc");
        Parser parser = this.tika.getParser();
        Metadata metadata = new Metadata();
        FileInputStream fileInputStream = new FileInputStream(resourceAsFile);
        try {
            parser.parse(fileInputStream, new DefaultHandler(), metadata, new ParseContext());
            fileInputStream.close();
            Assert.assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
        } catch (Throwable th) {
            fileInputStream.close();
            throw th;
        }
    }

    @Test
    public void testEXCELExtraction() throws Exception {
        File resourceAsFile = getResourceAsFile("/test-documents/testEXCEL.xls");
        Assert.assertTrue("Text does not contain 'Numbers and their Squares'", this.tika.parseToString(resourceAsFile).contains("Numbers and their Squares"));
        Parser parser = this.tika.getParser();
        Metadata metadata = new Metadata();
        FileInputStream fileInputStream = new FileInputStream(resourceAsFile);
        try {
            parser.parse(fileInputStream, new DefaultHandler(), metadata, new ParseContext());
            fileInputStream.close();
            Assert.assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
        } catch (Throwable th) {
            fileInputStream.close();
            throw th;
        }
    }

    @Test
    public void testOptionalHyphen() throws Exception {
        for (String str : new String[]{"ppt", "pptx", "doc", "docx", "rtf", "pdf"}) {
            String parseToString = this.tika.parseToString(getResourceAsFile("/test-documents/testOptionalHyphen." + str));
            Assert.assertTrue("optional hyphen was not handled for '" + str + "' file type: " + parseToString, parseToString.contains("optionalhyphen") || parseToString.contains("optional\u00adhyphen") || parseToString.contains("optional\u200bhyphen") || parseToString.contains("optional‧"));
        }
    }

    private void verifyComment(String str, String str2) throws Exception {
        String parseToString = this.tika.parseToString(getResourceAsFile("/test-documents/" + str2 + "." + str));
        Assert.assertTrue(str + ": content=" + parseToString + " did not extract text", parseToString.contains("Here is some text"));
        Assert.assertTrue(str + ": content=" + parseToString + " did not extract comment", parseToString.contains("Here is a comment"));
    }

    @Test
    public void testComment() throws Exception {
        for (String str : new String[]{"ppt", "pptx", "doc", "docx", "pdf", "rtf"}) {
            verifyComment(str, "testComment");
        }
    }
}
