package org.apache.tika.parser.pdf;

import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.io.IOExceptionWithCause;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.util.Matrix;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.parser.pdf.image.ImageGraphicsEngine;
import org.apache.tika.renderer.PageRangeRequest;
import org.apache.tika.renderer.RenderResult;
import org.apache.tika.renderer.RenderResults;
import org.apache.tika.renderer.Renderer;
import org.apache.tika.renderer.pdf.pdfbox.PDFRenderingState;
import org.glassfish.jaxb.runtime.v2.runtime.reflect.opt.Const;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:BOOT-INF/lib/tika-parser-pdf-module-2.9.2.jar:org/apache/tika/parser/pdf/PDF2XHTML.class */
public class PDF2XHTML extends AbstractPDF2XHTML {
    private Map<COSStream, Integer> processedInlineImages;
    private AtomicInteger inlineImageCounter;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:BOOT-INF/lib/tika-parser-pdf-module-2.9.2.jar:org/apache/tika/parser/pdf/PDF2XHTML$AngleCollector.class */
    public static class AngleCollector extends PDFTextStripper {
        Set<Integer> angles = new HashSet();

        AngleCollector() throws IOException {
        }

        public Set<Integer> getAngles() {
            return this.angles;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.pdfbox.text.PDFTextStripper, org.apache.pdfbox.text.LegacyPDFStreamEngine
        public void processTextPosition(TextPosition textPosition) {
            textPosition.getTextMatrix().concatenate(textPosition.getFont().getFontMatrix());
            this.angles.add(Integer.valueOf((((int) Math.round(Math.toDegrees(Math.atan2(r0.getShearY(), r0.getScaleY())))) + 360) % 360));
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:BOOT-INF/lib/tika-parser-pdf-module-2.9.2.jar:org/apache/tika/parser/pdf/PDF2XHTML$AngleDetectingPDF2XHTML.class */
    public static class AngleDetectingPDF2XHTML extends PDF2XHTML {
        private AngleDetectingPDF2XHTML(PDDocument pDDocument, ContentHandler contentHandler, ParseContext parseContext, Metadata metadata, PDFParserConfig pDFParserConfig) throws IOException {
            super(pDDocument, contentHandler, parseContext, metadata, pDFParserConfig);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.tika.parser.pdf.AbstractPDF2XHTML, org.apache.pdfbox.text.PDFTextStripper
        public void startPage(PDPage pDPage) throws IOException {
        }

        @Override // org.apache.tika.parser.pdf.PDF2XHTML, org.apache.tika.parser.pdf.AbstractPDF2XHTML, org.apache.pdfbox.text.PDFTextStripper
        protected void endPage(PDPage pDPage) throws IOException {
        }

        @Override // org.apache.tika.parser.pdf.PDF2XHTML, org.apache.pdfbox.text.PDFTextStripper, org.apache.pdfbox.text.LegacyPDFStreamEngine, org.apache.pdfbox.contentstream.PDFStreamEngine
        public void processPage(PDPage pDPage) throws IOException {
            try {
                super.startPage(pDPage);
                detectAnglesAndProcessPage(pDPage);
            } catch (IOException e) {
                handleCatchableIOE(e);
            } finally {
                super.endPage(pDPage);
            }
        }

        private void detectAnglesAndProcessPage(PDPage pDPage) throws IOException {
            AngleCollector angleCollector = new AngleCollector();
            angleCollector.setStartPage(getCurrentPageNo());
            angleCollector.setEndPage(getCurrentPageNo());
            angleCollector.getText(this.document);
            int rotation = pDPage.getRotation();
            pDPage.setRotation(0);
            Iterator<Integer> it = angleCollector.getAngles().iterator();
            while (it.hasNext()) {
                if (it.next().intValue() == 0) {
                    try {
                        super.processPage(pDPage);
                    } catch (IOException e) {
                        handleCatchableIOE(e);
                    }
                } else {
                    PDPageContentStream pDPageContentStream = new PDPageContentStream(this.document, pDPage, PDPageContentStream.AppendMode.PREPEND, false);
                    Throwable th = null;
                    try {
                        try {
                            pDPageContentStream.transform(Matrix.getRotateInstance(-Math.toRadians(r0.intValue()), Const.default_value_float, Const.default_value_float));
                            if (pDPageContentStream != null) {
                                if (0 != 0) {
                                    try {
                                        pDPageContentStream.close();
                                    } catch (Throwable th2) {
                                        th.addSuppressed(th2);
                                    }
                                } else {
                                    pDPageContentStream.close();
                                }
                            }
                            try {
                                super.processPage(pDPage);
                            } catch (IOException e2) {
                                handleCatchableIOE(e2);
                            }
                            ((COSArray) pDPage.getCOSObject().getItem(COSName.CONTENTS)).remove(0);
                        } finally {
                        }
                    } catch (Throwable th3) {
                        if (pDPageContentStream != null) {
                            if (th != null) {
                                try {
                                    pDPageContentStream.close();
                                } catch (Throwable th4) {
                                    th.addSuppressed(th4);
                                }
                            } else {
                                pDPageContentStream.close();
                            }
                        }
                        throw th3;
                    }
                }
            }
            pDPage.setRotation(rotation);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.pdfbox.text.PDFTextStripper, org.apache.pdfbox.text.LegacyPDFStreamEngine
        public void processTextPosition(TextPosition textPosition) {
            textPosition.getTextMatrix().concatenate(textPosition.getFont().getFontMatrix());
            if (((int) Math.round(Math.toDegrees(Math.atan2(r0.getShearY(), r0.getScaleY())))) == 0) {
                super.processTextPosition(textPosition);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public PDF2XHTML(PDDocument pDDocument, ContentHandler contentHandler, ParseContext parseContext, Metadata metadata, PDFParserConfig pDFParserConfig) throws IOException {
        super(pDDocument, contentHandler, parseContext, metadata, pDFParserConfig);
        this.processedInlineImages = new HashMap();
        this.inlineImageCounter = new AtomicInteger(0);
    }

    public static void process(PDDocument pDDocument, ContentHandler contentHandler, ParseContext parseContext, Metadata metadata, PDFParserConfig pDFParserConfig) throws SAXException, TikaException {
        try {
            PDF2XHTML angleDetectingPDF2XHTML = pDFParserConfig.isDetectAngles() ? new AngleDetectingPDF2XHTML(pDDocument, contentHandler, parseContext, metadata, pDFParserConfig) : new PDF2XHTML(pDDocument, contentHandler, parseContext, metadata, pDFParserConfig);
            pDFParserConfig.configure(angleDetectingPDF2XHTML);
            angleDetectingPDF2XHTML.writeText(pDDocument, new Writer() { // from class: org.apache.tika.parser.pdf.PDF2XHTML.1
                @Override // java.io.Writer
                public void write(char[] cArr, int i, int i2) {
                }

                @Override // java.io.Writer, java.io.Flushable
                public void flush() {
                }

                @Override // java.io.Writer, java.io.Closeable, java.lang.AutoCloseable
                public void close() {
                }
            });
            if (angleDetectingPDF2XHTML.exceptions.size() > 0) {
                throw new TikaException("Unable to extract PDF content", angleDetectingPDF2XHTML.exceptions.get(0));
            }
        } catch (IOException e) {
            if (!(e.getCause() instanceof SAXException)) {
                throw new TikaException("Unable to extract PDF content", e);
            }
            throw ((SAXException) e.getCause());
        }
    }

    @Override // org.apache.pdfbox.text.PDFTextStripper, org.apache.pdfbox.text.LegacyPDFStreamEngine, org.apache.pdfbox.contentstream.PDFStreamEngine
    public void processPage(PDPage pDPage) throws IOException {
        try {
            super.processPage(pDPage);
        } catch (IOException e) {
            handleCatchableIOE(e);
            endPage(pDPage);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.tika.parser.pdf.AbstractPDF2XHTML, org.apache.pdfbox.text.PDFTextStripper
    public void endPage(PDPage pDPage) throws IOException {
        try {
            writeParagraphEnd();
            try {
                extractImages(pDPage);
                renderPage(pDPage);
            } catch (IOException e) {
                handleCatchableIOE(e);
            }
            super.endPage(pDPage);
        } catch (IOException e2) {
            handleCatchableIOE(e2);
        } catch (SAXException e3) {
            throw new IOException("Unable to end a page", e3);
        }
    }

    /* JADX WARN: Failed to calculate best type for var: r16v2 ??
    java.lang.NullPointerException
     */
    /* JADX WARN: Failed to calculate best type for var: r17v0 ??
    java.lang.NullPointerException
     */
    /* JADX WARN: Multi-variable type inference failed. Error: java.lang.NullPointerException
     */
    /* JADX WARN: Not initialized variable reg: 16, insn: 0x0155: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r16 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) A[TRY_LEAVE], block:B:70:0x0155 */
    /* JADX WARN: Not initialized variable reg: 17, insn: 0x015a: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r17 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:72:0x015a */
    /* JADX WARN: Type inference failed for: r16v2, types: [org.apache.tika.renderer.RenderResults] */
    /* JADX WARN: Type inference failed for: r17v0, types: [java.lang.Throwable] */
    private void renderPage(PDPage pDPage) throws IOException {
        ?? r16;
        ?? r17;
        if (this.config.getImageStrategy() != PDFParserConfig.IMAGE_STRATEGY.RENDER_PAGES_AT_PAGE_END) {
            return;
        }
        TikaInputStream tikaInputStream = ((PDFRenderingState) this.context.get(PDFRenderingState.class)).getTikaInputStream();
        Renderer renderer = this.config.getRenderer();
        PageRangeRequest pageRangeRequest = new PageRangeRequest(getCurrentPageNo(), getCurrentPageNo());
        Metadata metadata = new Metadata();
        metadata.set(TikaCoreProperties.TYPE, PDFParser.MEDIA_TYPE.toString());
        try {
            try {
                RenderResults render = renderer.render(tikaInputStream, metadata, this.context, pageRangeRequest);
                Throwable th = null;
                for (RenderResult renderResult : render.getResults()) {
                    if (renderResult.getStatus() == RenderResult.STATUS.SUCCESS && this.embeddedDocumentExtractor.shouldParseEmbedded(renderResult.getMetadata())) {
                        InputStream inputStream = renderResult.getInputStream();
                        Throwable th2 = null;
                        try {
                            try {
                                this.embeddedDocumentExtractor.parseEmbedded(inputStream, this.xhtml, renderResult.getMetadata(), true);
                                if (inputStream != null) {
                                    if (0 != 0) {
                                        try {
                                            inputStream.close();
                                        } catch (Throwable th3) {
                                            th2.addSuppressed(th3);
                                        }
                                    } else {
                                        inputStream.close();
                                    }
                                }
                            } catch (Throwable th4) {
                                th2 = th4;
                                throw th4;
                            }
                        } catch (Throwable th5) {
                            if (inputStream != null) {
                                if (th2 != null) {
                                    try {
                                        inputStream.close();
                                    } catch (Throwable th6) {
                                        th2.addSuppressed(th6);
                                    }
                                } else {
                                    inputStream.close();
                                }
                            }
                            throw th5;
                        }
                    }
                }
                if (render != null) {
                    if (0 != 0) {
                        try {
                            render.close();
                        } catch (Throwable th7) {
                            th.addSuppressed(th7);
                        }
                    } else {
                        render.close();
                    }
                }
            } catch (Throwable th8) {
                if (r16 != 0) {
                    if (r17 != 0) {
                        try {
                            r16.close();
                        } catch (Throwable th9) {
                            r17.addSuppressed(th9);
                        }
                    } else {
                        r16.close();
                    }
                }
                throw th8;
            }
        } catch (SecurityException e) {
            throw e;
        } catch (Exception e2) {
            handleCatchableIOE(new IOExceptionWithCause(e2));
        }
    }

    void extractImages(PDPage pDPage) throws SAXException, IOException {
        if (this.config.isExtractInlineImages() || this.config.isExtractInlineImageMetadataOnly()) {
            ImageGraphicsEngine newEngine = this.config.getImageGraphicsEngineFactory().newEngine(pDPage, getCurrentPageNo(), this.embeddedDocumentExtractor, this.config, this.processedInlineImages, this.inlineImageCounter, this.xhtml, this.metadata, this.context);
            newEngine.run();
            List<IOException> exceptions = newEngine.getExceptions();
            if (exceptions.size() > 0) {
                IOException remove = exceptions.remove(0);
                if (this.config.isCatchIntermediateIOExceptions()) {
                    this.exceptions.addAll(exceptions);
                }
                throw remove;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.pdfbox.text.PDFTextStripper
    public void writeParagraphStart() throws IOException {
        super.writeParagraphStart();
        try {
            this.xhtml.startElement("p");
        } catch (SAXException e) {
            throw new IOException("Unable to start a paragraph", e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.pdfbox.text.PDFTextStripper
    public void writeParagraphEnd() throws IOException {
        super.writeParagraphEnd();
        try {
            this.xhtml.endElement("p");
        } catch (SAXException e) {
            throw new IOException("Unable to end a paragraph", e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.pdfbox.text.PDFTextStripper
    public void writeString(String str) throws IOException {
        try {
            this.xhtml.characters(str);
        } catch (SAXException e) {
            throw new IOException("Unable to write a string: " + str, e);
        }
    }

    @Override // org.apache.pdfbox.text.PDFTextStripper
    protected void writeCharacters(TextPosition textPosition) throws IOException {
        try {
            this.xhtml.characters(textPosition.getUnicode());
        } catch (SAXException e) {
            throw new IOException("Unable to write a character: " + textPosition.getUnicode(), e);
        }
    }

    @Override // org.apache.pdfbox.text.PDFTextStripper
    protected void writeWordSeparator() throws IOException {
        try {
            this.xhtml.characters(getWordSeparator());
        } catch (SAXException e) {
            throw new IOException("Unable to write a space character", e);
        }
    }

    @Override // org.apache.pdfbox.text.PDFTextStripper
    protected void writeLineSeparator() throws IOException {
        try {
            this.xhtml.newline();
        } catch (SAXException e) {
            throw new IOException("Unable to write a newline character", e);
        }
    }
}
