package org.apache.tika.parser.microsoft.ooxml;

import com.gargoylesoftware.htmlunit.html.HtmlBold;
import com.gargoylesoftware.htmlunit.html.HtmlImage;
import com.gargoylesoftware.htmlunit.html.HtmlItalic;
import com.gargoylesoftware.htmlunit.html.HtmlParagraph;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableBody;
import com.gargoylesoftware.htmlunit.html.HtmlTableDataCell;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
import org.apache.poi.xwpf.usermodel.BodyType;
import org.apache.poi.xwpf.usermodel.IBody;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFHeaderFooter;
import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPicture;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFStyles;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.WordExtractor;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.xalan.templates.Constants;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:lib/tika-parsers-1.1.jar:org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.class */
public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
    private XWPFDocument document;
    private XWPFStyles styles;

    public XWPFWordExtractorDecorator(ParseContext parseContext, XWPFWordExtractor xWPFWordExtractor) {
        super(parseContext, xWPFWordExtractor);
        this.document = (XWPFDocument) xWPFWordExtractor.getDocument();
        this.styles = this.document.getStyles();
    }

    @Override // org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor
    protected void buildXHTML(XHTMLContentHandler xHTMLContentHandler) throws SAXException, XmlException, IOException {
        XWPFHeaderFooterPolicy headerFooterPolicy = this.document.getHeaderFooterPolicy();
        if (headerFooterPolicy != null) {
            extractHeaders(xHTMLContentHandler, headerFooterPolicy);
        }
        extractIBodyText(this.document, xHTMLContentHandler);
        if (headerFooterPolicy != null) {
            extractFooters(xHTMLContentHandler, headerFooterPolicy);
        }
    }

    private void extractIBodyText(IBody iBody, XHTMLContentHandler xHTMLContentHandler) throws SAXException, XmlException, IOException {
        for (IBodyElement iBodyElement : iBody.getBodyElements()) {
            if (iBodyElement instanceof XWPFParagraph) {
                extractParagraph((XWPFParagraph) iBodyElement, xHTMLContentHandler);
            }
            if (iBodyElement instanceof XWPFTable) {
                extractTable((XWPFTable) iBodyElement, xHTMLContentHandler);
            }
        }
    }

    private void extractParagraph(XWPFParagraph xWPFParagraph, XHTMLContentHandler xHTMLContentHandler) throws SAXException, XmlException, IOException {
        XWPFPictureData pictureData;
        CTSectPr sectPr;
        XWPFHeaderFooterPolicy xWPFHeaderFooterPolicy = null;
        if (xWPFParagraph.getCTP().getPPr() != null && (sectPr = xWPFParagraph.getCTP().getPPr().getSectPr()) != null) {
            xWPFHeaderFooterPolicy = new XWPFHeaderFooterPolicy(this.document, sectPr);
            extractHeaders(xHTMLContentHandler, xWPFHeaderFooterPolicy);
        }
        String str = HtmlParagraph.TAG_NAME;
        String str2 = null;
        if (xWPFParagraph.getStyleID() != null) {
            WordExtractor.TagAndStyle buildParagraphTagAndStyle = WordExtractor.buildParagraphTagAndStyle(this.styles.getStyle(xWPFParagraph.getStyleID()).getName(), xWPFParagraph.getPartType() == BodyType.TABLECELL);
            str = buildParagraphTagAndStyle.getTag();
            str2 = buildParagraphTagAndStyle.getStyleClass();
        }
        if (str2 == null) {
            xHTMLContentHandler.startElement(str);
        } else {
            xHTMLContentHandler.startElement(str, "class", str2);
        }
        Iterator<CTBookmark> it = xWPFParagraph.getCTP().getBookmarkStartList().iterator();
        while (it.hasNext()) {
            xHTMLContentHandler.startElement("a", "name", it.next().getName());
            xHTMLContentHandler.endElement("a");
        }
        boolean z = false;
        boolean z2 = false;
        for (XWPFRun xWPFRun : xWPFParagraph.getRuns()) {
            if (xWPFRun.isBold() != z) {
                if (z2) {
                    xHTMLContentHandler.endElement(HtmlItalic.TAG_NAME);
                    z2 = false;
                }
                if (xWPFRun.isBold()) {
                    xHTMLContentHandler.startElement(HtmlBold.TAG_NAME);
                } else {
                    xHTMLContentHandler.endElement(HtmlBold.TAG_NAME);
                }
                z = xWPFRun.isBold();
            }
            if (xWPFRun.isItalic() != z2) {
                if (xWPFRun.isItalic()) {
                    xHTMLContentHandler.startElement(HtmlItalic.TAG_NAME);
                } else {
                    xHTMLContentHandler.endElement(HtmlItalic.TAG_NAME);
                }
                z2 = xWPFRun.isItalic();
            }
            boolean z3 = false;
            if (xWPFRun instanceof XWPFHyperlinkRun) {
                XWPFHyperlinkRun xWPFHyperlinkRun = (XWPFHyperlinkRun) xWPFRun;
                XWPFHyperlink hyperlink = xWPFHyperlinkRun.getHyperlink(this.document);
                if (hyperlink != null && hyperlink.getURL() != null) {
                    xHTMLContentHandler.startElement("a", Constants.ATTRNAME_HREF, hyperlink.getURL());
                    z3 = true;
                } else if (xWPFHyperlinkRun.getAnchor() != null && xWPFHyperlinkRun.getAnchor().length() > 0) {
                    xHTMLContentHandler.startElement("a", Constants.ATTRNAME_HREF, "#" + xWPFHyperlinkRun.getAnchor());
                    z3 = true;
                }
            }
            xHTMLContentHandler.characters(xWPFRun.toString());
            for (XWPFPicture xWPFPicture : xWPFRun.getEmbeddedPictures()) {
                if (xWPFParagraph.getDocument() != null && (pictureData = xWPFPicture.getPictureData()) != null) {
                    AttributesImpl attributesImpl = new AttributesImpl();
                    attributesImpl.addAttribute("", "src", "src", "CDATA", "embedded:" + pictureData.getFileName());
                    attributesImpl.addAttribute("", "alt", "alt", "CDATA", xWPFPicture.getDescription());
                    xHTMLContentHandler.startElement(HtmlImage.TAG_NAME, attributesImpl);
                    xHTMLContentHandler.endElement(HtmlImage.TAG_NAME);
                }
            }
            if (z3) {
                xHTMLContentHandler.endElement("a");
            }
        }
        if (z2) {
            xHTMLContentHandler.endElement(HtmlItalic.TAG_NAME);
        }
        if (z) {
            xHTMLContentHandler.endElement(HtmlBold.TAG_NAME);
        }
        String commentText = new XWPFCommentsDecorator(xWPFParagraph, null).getCommentText();
        if (commentText != null && commentText.length() > 0) {
            xHTMLContentHandler.characters(commentText);
        }
        String footnoteText = xWPFParagraph.getFootnoteText();
        if (footnoteText != null && footnoteText.length() > 0) {
            xHTMLContentHandler.characters(footnoteText + "\n");
        }
        xHTMLContentHandler.endElement(str);
        if (xWPFHeaderFooterPolicy != null) {
            extractFooters(xHTMLContentHandler, xWPFHeaderFooterPolicy);
        }
    }

    private void extractTable(XWPFTable xWPFTable, XHTMLContentHandler xHTMLContentHandler) throws SAXException, XmlException, IOException {
        xHTMLContentHandler.startElement(HtmlTable.TAG_NAME);
        xHTMLContentHandler.startElement(HtmlTableBody.TAG_NAME);
        for (XWPFTableRow xWPFTableRow : xWPFTable.getRows()) {
            xHTMLContentHandler.startElement(HtmlTableRow.TAG_NAME);
            for (XWPFTableCell xWPFTableCell : xWPFTableRow.getTableCells()) {
                xHTMLContentHandler.startElement(HtmlTableDataCell.TAG_NAME);
                extractIBodyText(xWPFTableCell, xHTMLContentHandler);
                xHTMLContentHandler.endElement(HtmlTableDataCell.TAG_NAME);
            }
            xHTMLContentHandler.endElement(HtmlTableRow.TAG_NAME);
        }
        xHTMLContentHandler.endElement(HtmlTableBody.TAG_NAME);
        xHTMLContentHandler.endElement(HtmlTable.TAG_NAME);
    }

    private void extractFooters(XHTMLContentHandler xHTMLContentHandler, XWPFHeaderFooterPolicy xWPFHeaderFooterPolicy) throws SAXException, XmlException, IOException {
        if (xWPFHeaderFooterPolicy.getFirstPageFooter() != null) {
            extractHeaderText(xHTMLContentHandler, xWPFHeaderFooterPolicy.getFirstPageFooter());
        }
        if (xWPFHeaderFooterPolicy.getEvenPageFooter() != null) {
            extractHeaderText(xHTMLContentHandler, xWPFHeaderFooterPolicy.getEvenPageFooter());
        }
        if (xWPFHeaderFooterPolicy.getDefaultFooter() != null) {
            extractHeaderText(xHTMLContentHandler, xWPFHeaderFooterPolicy.getDefaultFooter());
        }
    }

    private void extractHeaders(XHTMLContentHandler xHTMLContentHandler, XWPFHeaderFooterPolicy xWPFHeaderFooterPolicy) throws SAXException, XmlException, IOException {
        if (xWPFHeaderFooterPolicy == null) {
            return;
        }
        if (xWPFHeaderFooterPolicy.getFirstPageHeader() != null) {
            extractHeaderText(xHTMLContentHandler, xWPFHeaderFooterPolicy.getFirstPageHeader());
        }
        if (xWPFHeaderFooterPolicy.getEvenPageHeader() != null) {
            extractHeaderText(xHTMLContentHandler, xWPFHeaderFooterPolicy.getEvenPageHeader());
        }
        if (xWPFHeaderFooterPolicy.getDefaultHeader() != null) {
            extractHeaderText(xHTMLContentHandler, xWPFHeaderFooterPolicy.getDefaultHeader());
        }
    }

    private void extractHeaderText(XHTMLContentHandler xHTMLContentHandler, XWPFHeaderFooter xWPFHeaderFooter) throws SAXException, XmlException, IOException {
        Iterator<XWPFParagraph> it = xWPFHeaderFooter.getParagraphs().iterator();
        while (it.hasNext()) {
            extractParagraph(it.next(), xHTMLContentHandler);
        }
        Iterator<XWPFTable> it2 = xWPFHeaderFooter.getTables().iterator();
        while (it2.hasNext()) {
            extractTable(it2.next(), xHTMLContentHandler);
        }
    }

    @Override // org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor
    protected List<PackagePart> getMainDocumentParts() {
        ArrayList arrayList = new ArrayList();
        arrayList.add(this.document.getPackagePart());
        return arrayList;
    }
}
