/*
 * Decompiled with CFR 0.152.
 */
package com.adobe.internal.pdftoolkit.services.textextraction;

import com.adobe.internal.pdftoolkit.core.exceptions.PDFFontException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFIOException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFInvalidDocumentException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFInvalidStructureException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFSecurityException;
import com.adobe.internal.pdftoolkit.core.fontset.PDFFontSet;
import com.adobe.internal.pdftoolkit.core.types.ASName;
import com.adobe.internal.pdftoolkit.pdf.content.processor.MarkedContentObject;
import com.adobe.internal.pdftoolkit.pdf.content.processor.TextObjectList;
import com.adobe.internal.pdftoolkit.pdf.document.PDFDocument;
import com.adobe.internal.pdftoolkit.pdf.graphics.optionalcontent.PDFOCObject;
import com.adobe.internal.pdftoolkit.pdf.graphics.xobject.PDFXObject;
import com.adobe.internal.pdftoolkit.pdf.graphics.xobject.PDFXObjectMap;
import com.adobe.internal.pdftoolkit.pdf.interchange.structure.PDFStructureUtils;
import com.adobe.internal.pdftoolkit.pdf.page.PDFPage;
import com.adobe.internal.pdftoolkit.services.interchange.structure.StructureFinder;
import com.adobe.internal.pdftoolkit.services.optionalcontent.OCManager;
import com.adobe.internal.pdftoolkit.services.textextraction.TextExtractionOptions;
import com.adobe.internal.pdftoolkit.services.textextraction.Word;
import com.adobe.internal.pdftoolkit.services.textextraction.WordsIterator;
import com.adobe.internal.pdftoolkit.services.textextraction.impl.Base14FontSetUtil;
import com.adobe.internal.pdftoolkit.services.textextraction.impl.TEContentStreamHandler;
import com.adobe.internal.pdftoolkit.services.textextraction.impl.Wordafier;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

public class TextExtractor {
    private PDFDocument pdfDoc;
    private PDFFontSet fontSet;
    private StructureFinder finder;
    private boolean foundstructure = false;
    private boolean checkSuperscriptsSubscripts = false;
    private static final TextExtractionOptions defaultExtractionOptions = TextExtractionOptions.newInstance();
    private boolean honourSpaces = defaultExtractionOptions.isHonourSpaces();
    private boolean ignoreArtifact = defaultExtractionOptions.isIgnoreArtifacts();
    private boolean honourClipPath = defaultExtractionOptions.isHonourClipPath();
    private List<PDFOCObject> ocGroup = defaultExtractionOptions.getOptionalContentObjects();
    private boolean ignoreBackgroundContent = defaultExtractionOptions.isIgnoreBackgroundContent();
    private boolean ignoreErrors = defaultExtractionOptions.ignoreErrors();
    private boolean honourSpecialCharacter = defaultExtractionOptions.isHonourSpecialCharacter();
    public boolean workFlowOfInterest;
    public boolean bLog;

    private TextExtractor(PDFDocument pdfDoc, PDFFontSet clientFontSet, boolean useStructure, boolean shouldHonourSpaces, boolean ignoreArtifacts, boolean honourClipPath, List<PDFOCObject> ocGroup, boolean extractDefaultOptionalContent, boolean ignoreBackgroundContent, boolean ignoreErrors, boolean checkSuperscriptsSubscripts, boolean honourSpecialCharacter) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        this.pdfDoc = pdfDoc;
        this.fontSet = Base14FontSetUtil.buildBase14FontSet(clientFontSet, pdfDoc);
        this.honourSpaces = shouldHonourSpaces;
        this.ignoreArtifact = ignoreArtifacts;
        this.honourClipPath = honourClipPath;
        this.ignoreBackgroundContent = ignoreBackgroundContent;
        this.ignoreErrors = ignoreErrors;
        this.checkSuperscriptsSubscripts = checkSuperscriptsSubscripts;
        this.honourSpecialCharacter = honourSpecialCharacter;
        if (extractDefaultOptionalContent) {
            OCManager ocMgr = OCManager.newInstance(pdfDoc.requireCatalog().getOCProperties());
            if (ocMgr != null) {
                this.ocGroup = ocMgr.getVisibleOCObjects();
            }
        } else {
            this.ocGroup = ocGroup;
        }
        if (useStructure && pdfDoc.requireCatalog().getDictionaryDictionaryValue(ASName.k_MarkInfo) != null && pdfDoc.requireCatalog().getDictionaryDictionaryValue(ASName.k_MarkInfo).containsKey(ASName.k_Marked) && pdfDoc.requireCatalog().getDictionaryDictionaryValue(ASName.k_MarkInfo).getBoolean(ASName.k_Marked).booleanValue()) {
            this.foundstructure = true;
            this.finder = StructureFinder.newInstance(pdfDoc);
        }
    }

    public static TextExtractor newInstance(PDFDocument pdfDoc, PDFFontSet clientFontSet) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        if (pdfDoc == null) {
            return null;
        }
        if (clientFontSet == null) {
            return null;
        }
        return TextExtractor.newInstance(pdfDoc, clientFontSet, defaultExtractionOptions);
    }

    public static TextExtractor newInstance(PDFDocument pdfDoc, PDFFontSet clientFontSet, boolean useStructure) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        if (pdfDoc == null) {
            return null;
        }
        if (clientFontSet == null) {
            return null;
        }
        TextExtractionOptions options = TextExtractionOptions.newInstance();
        options.setUseStructure(useStructure);
        return TextExtractor.newInstance(pdfDoc, clientFontSet, options);
    }

    public static TextExtractor newInstance(PDFDocument pdfDoc, PDFFontSet clientFontSet, TextExtractionOptions options) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        if (pdfDoc == null) {
            return null;
        }
        if (clientFontSet == null) {
            return null;
        }
        if (options == null) {
            options = defaultExtractionOptions;
        }
        return new TextExtractor(pdfDoc, clientFontSet, options.isUseStructure(), options.isHonourSpaces(), options.isIgnoreArtifacts(), options.isHonourClipPath(), options.getOptionalContentObjects(), options.isExtractDefaultOptionalContent(), options.isIgnoreBackgroundContent(), options.ignoreErrors(), options.isCheckSuperscriptsSubscripts(), options.isHonourSpecialCharacter());
    }

    public static TextExtractor newInstance(PDFDocument pdfDoc, PDFFontSet clientFontSet, boolean useStructure, boolean checkSuperscriptSubscript) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        if (pdfDoc == null) {
            return null;
        }
        if (clientFontSet == null) {
            return null;
        }
        TextExtractionOptions options = TextExtractionOptions.newInstance();
        options.setUseStructure(useStructure);
        options.setCheckSuperscriptsSubscripts(checkSuperscriptSubscript);
        return TextExtractor.newInstance(pdfDoc, clientFontSet, options);
    }

    private List<Word> extractWords(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        return this.extractWords(page, pageIndex, false);
    }

    private List<Word> extractWords(PDFPage page, int pageIndex, boolean considerSpacialCharecter) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        ArrayList<Word> words = new ArrayList<Word>();
        TEContentStreamHandler teStreamHandler = new TEContentStreamHandler(this.fontSet, this.ignoreErrors);
        teStreamHandler.setHonourClipPath(this.honourClipPath);
        teStreamHandler.setReferenceOCGroups(this.ocGroup);
        teStreamHandler.setIgnoreBackgroundContent(this.ignoreBackgroundContent);
        List<TextObjectList> textObjectListsArray = teStreamHandler.extractTextObjects(page);
        for (TextObjectList textObjects : textObjectListsArray) {
            Wordafier wordafier = new Wordafier(pageIndex, textObjects, page, this.ignoreErrors);
            wordafier.setHonourSpaces(this.honourSpaces);
            wordafier.setCheckSuperscriptsSubscripts(this.checkSuperscriptsSubscripts);
            if (considerSpacialCharecter) {
                wordafier.setConsiderSpecialCharacter(true);
            }
            words.addAll(0, wordafier.buildWordList());
        }
        return words;
    }

    public List<Word> getExtractedWordsList(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        return this.extractWords(page, pageIndex);
    }

    public List<Word> getExtractedWordsLists(PDFPage page, int pageIndex, boolean considerSpacialCharecter) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        return this.extractWords(page, pageIndex, considerSpacialCharecter);
    }

    private List<Word> extractSentences(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        ArrayList<Word> words = new ArrayList<Word>();
        TEContentStreamHandler teStreamHandler = new TEContentStreamHandler(this.fontSet, this.ignoreErrors);
        teStreamHandler.setHonourClipPath(this.honourClipPath);
        teStreamHandler.setReferenceOCGroups(this.ocGroup);
        teStreamHandler.setIgnoreBackgroundContent(this.ignoreBackgroundContent);
        List<TextObjectList> textObjectListsArray = teStreamHandler.extractTextObjects(page);
        for (TextObjectList textObjects : textObjectListsArray) {
            Wordafier wordafier = new Wordafier(pageIndex, textObjects, page, this.ignoreErrors);
            words.addAll(0, wordafier.buildSentenses());
        }
        return words;
    }

    private List<Word> extractWords(PDFPage page, int pageIndex, StructureFinder finder) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        ArrayList<Word> words = new ArrayList();
        TEContentStreamHandler teStreamHandler = new TEContentStreamHandler(this.fontSet, this.ignoreErrors);
        teStreamHandler.setIgnoreArtifact(this.ignoreArtifact);
        teStreamHandler.setHonourClipPath(this.honourClipPath);
        teStreamHandler.setReferenceOCGroups(this.ocGroup);
        teStreamHandler.setIgnoreBackgroundContent(this.ignoreBackgroundContent);
        HashMap<Integer, MarkedContentObject> mcObjectList = teStreamHandler.extractMarkedContentObjects(page);
        try {
            Wordafier wordafier = new Wordafier(pageIndex, mcObjectList, page, finder, this.ignoreErrors);
            wordafier.setHonourSpaces(this.honourSpaces);
            words.addAll(0, wordafier.buildWords());
        }
        catch (PDFInvalidStructureException e) {
            words = this.extractWords(page, pageIndex, this.honourSpecialCharacter);
        }
        return words;
    }

    private List<Word> extractROTEWords(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        ArrayList<Word> words = new ArrayList<Word>();
        TEContentStreamHandler teStreamHandler = new TEContentStreamHandler(this.fontSet, this.ignoreErrors);
        teStreamHandler.setHonourClipPath(this.honourClipPath);
        teStreamHandler.setReferenceOCGroups(this.ocGroup);
        teStreamHandler.setIgnoreBackgroundContent(this.ignoreBackgroundContent);
        List<TextObjectList> textObjectListsArray = teStreamHandler.extractTextObjects(page);
        Iterator<TextObjectList> iter = textObjectListsArray.iterator();
        int lc = 0;
        while (iter.hasNext()) {
            ++lc;
            TextObjectList textObjects = iter.next();
            Wordafier wordafier = new Wordafier(pageIndex, textObjects, page, this.ignoreErrors);
            wordafier.workFlowOfInterest = this.workFlowOfInterest;
            wordafier.setHonourSpaces(this.honourSpaces);
            wordafier.buildWordList();
            this.bLog = wordafier.bLog;
            words.addAll(0, wordafier.getReadingOrderList());
            if (!this.bLog) continue;
            System.out.println("TID: " + Thread.currentThread().getName() + Thread.currentThread().getId() + ". In extractROTEWords. Added the words. . lc: " + lc);
        }
        return words;
    }

    private List<Word> extractSentences(PDFPage page, int pageIndex, StructureFinder finder) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        ArrayList<Word> words = new ArrayList<Word>();
        TEContentStreamHandler teStreamHandler = new TEContentStreamHandler(this.fontSet, this.ignoreErrors);
        teStreamHandler.setHonourClipPath(this.honourClipPath);
        teStreamHandler.setReferenceOCGroups(this.ocGroup);
        teStreamHandler.setIgnoreBackgroundContent(this.ignoreBackgroundContent);
        HashMap<Integer, MarkedContentObject> mcObjectList = teStreamHandler.extractMarkedContentObjects(page);
        try {
            Wordafier wordafier = new Wordafier(pageIndex, mcObjectList, page, finder, this.ignoreErrors);
            words.addAll(0, wordafier.buildSentenses());
        }
        catch (PDFInvalidStructureException e) {
            throw new PDFInvalidDocumentException(e);
        }
        return words;
    }

    public WordsIterator getWordsIterator() throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        return new DocumentWordsIterator();
    }

    public WordsIterator getROTEWordsIterator() throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        return new DocumentROTEWordsIterator();
    }

    public WordsIterator getSentencesIterator() throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        return new DocumentSentenceIterator();
    }

    public WordsIterator getWordsIterator(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        List<Word> words = null;
        words = this.foundstructure && this.finder != null ? (this.hasStructParent(page) ? this.extractWords(page, pageIndex, this.finder) : this.extractWords(page, pageIndex, this.honourSpecialCharacter)) : this.extractWords(page, pageIndex, this.honourSpecialCharacter);
        return new WordListIterator(words);
    }

    public WordsIterator getROTEWordsIterator(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        List<Word> words = null;
        words = this.foundstructure && this.finder != null ? (this.hasStructParent(page) ? this.extractWords(page, pageIndex, this.finder) : this.extractROTEWords(page, pageIndex)) : this.extractROTEWords(page, pageIndex);
        return new WordListIterator(words);
    }

    private WordsIterator getSentencesIterator(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        List<Word> words = null;
        words = this.foundstructure && this.finder != null ? (this.hasStructParent(page) ? this.extractSentences(page, pageIndex, this.finder) : this.extractSentences(page, pageIndex)) : this.extractSentences(page, pageIndex);
        return new WordListIterator(words);
    }

    public WordsIterator getSentences(PDFPage page, int pageIndex) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        List<Word> words = null;
        if (this.foundstructure && this.finder != null && this.hasStructParent(page)) {
            words = this.extractSentences(page, pageIndex, this.finder);
        }
        return words == null ? null : new WordListIterator(words);
    }

    private boolean hasStructParent(PDFPage page) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        if (PDFStructureUtils.hasStructParent(page) || PDFStructureUtils.hasStructParents(page)) {
            return true;
        }
        PDFXObjectMap xObjMap = page.getResources().getXObjectMap();
        if (xObjMap != null) {
            for (ASName key : xObjMap.keySet()) {
                PDFXObject xObj = xObjMap.get(key);
                if (!PDFStructureUtils.hasStructParent(xObj) && !PDFStructureUtils.hasStructParents(xObj)) continue;
                return true;
            }
        }
        return false;
    }

    class DocumentSentenceIterator
    implements WordsIterator {
        private int pageIndex = 0;
        Iterator<PDFPage> pagesIter;
        WordsIterator wordsIter;

        DocumentSentenceIterator() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            this.pagesIter = TextExtractor.this.pdfDoc.requirePages().iterator();
            if (this.pagesIter.hasNext()) {
                PDFPage page = this.pagesIter.next();
                ++this.pageIndex;
                this.wordsIter = TextExtractor.this.getSentencesIterator(page, this.pageIndex);
            }
        }

        @Override
        public boolean hasNext() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.wordsIter.hasNext()) {
                return true;
            }
            if (this.pagesIter.hasNext()) {
                while (this.pagesIter.hasNext() && !this.wordsIter.hasNext()) {
                    PDFPage page = this.pagesIter.next();
                    ++this.pageIndex;
                    this.wordsIter = TextExtractor.this.getSentencesIterator(page, this.pageIndex);
                    if (!this.wordsIter.hasNext()) continue;
                    return this.wordsIter.hasNext();
                }
                return false;
            }
            return false;
        }

        @Override
        public Word next() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.hasNext()) {
                return this.wordsIter.next();
            }
            return null;
        }
    }

    class DocumentROTEWordsIterator
    implements WordsIterator {
        private int pageIndex = 0;
        Iterator<PDFPage> pagesIter;
        WordsIterator wordsIter;

        DocumentROTEWordsIterator() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            this.pagesIter = TextExtractor.this.pdfDoc.requirePages().iterator();
            if (this.pagesIter.hasNext()) {
                PDFPage page = this.pagesIter.next();
                ++this.pageIndex;
                this.wordsIter = TextExtractor.this.getROTEWordsIterator(page, this.pageIndex);
            }
        }

        @Override
        public boolean hasNext() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.wordsIter.hasNext()) {
                return true;
            }
            if (this.pagesIter.hasNext()) {
                while (this.pagesIter.hasNext() && !this.wordsIter.hasNext()) {
                    PDFPage page = this.pagesIter.next();
                    ++this.pageIndex;
                    this.wordsIter = TextExtractor.this.getWordsIterator(page, this.pageIndex);
                    if (!this.wordsIter.hasNext()) continue;
                    return this.wordsIter.hasNext();
                }
                return false;
            }
            return false;
        }

        @Override
        public Word next() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.hasNext()) {
                return this.wordsIter.next();
            }
            return null;
        }
    }

    class DocumentWordsIterator
    implements WordsIterator {
        private int pageIndex = 0;
        Iterator<PDFPage> pagesIter;
        WordsIterator wordsIter;

        DocumentWordsIterator() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            this.pagesIter = TextExtractor.this.pdfDoc.requirePages().iterator();
            if (this.pagesIter.hasNext()) {
                PDFPage page = this.pagesIter.next();
                ++this.pageIndex;
                this.wordsIter = TextExtractor.this.getWordsIterator(page, this.pageIndex);
            }
        }

        @Override
        public boolean hasNext() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.wordsIter.hasNext()) {
                return true;
            }
            if (this.pagesIter.hasNext()) {
                while (this.pagesIter.hasNext() && !this.wordsIter.hasNext()) {
                    PDFPage page = this.pagesIter.next();
                    ++this.pageIndex;
                    this.wordsIter = TextExtractor.this.getWordsIterator(page, this.pageIndex);
                    if (!this.wordsIter.hasNext()) continue;
                    return this.wordsIter.hasNext();
                }
                return false;
            }
            return false;
        }

        @Override
        public Word next() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.hasNext()) {
                return this.wordsIter.next();
            }
            return null;
        }
    }

    static class WordListIterator
    implements WordsIterator {
        Iterator<Word> wordsIter;

        WordListIterator(List<Word> wordList) {
            this.wordsIter = wordList.iterator();
        }

        @Override
        public boolean hasNext() {
            return this.wordsIter.hasNext();
        }

        @Override
        public Word next() {
            return this.wordsIter.next();
        }
    }
}

