/*
 * Decompiled with CFR 0.152.
 */
package org.jpedal.examples.text;

import java.awt.Rectangle;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.List;
import org.jpedal.examples.text.BaseTextExtraction;
import org.jpedal.exception.PdfException;
import org.jpedal.utils.LogWriter;
import org.jpedal.utils.Strip;

public class ExtractTextAsWordlist
extends BaseTextExtraction {
    private int wordsExtracted;
    private static final String defaultDelimiters = "&:=()!;.,\\/\"\"''";

    public ExtractTextAsWordlist(String fileName) {
        super(fileName);
        this.init();
    }

    public ExtractTextAsWordlist(byte[] byteArray) {
        super(byteArray);
        this.init();
    }

    @Override
    void decodeFile(String file_name) throws PdfException {
        this.fileName = file_name;
        if (this.openPDFFile()) {
            String name = "demo";
            int pointer = file_name.lastIndexOf(separator);
            if (pointer != -1) {
                name = file_name.substring(pointer + 1, file_name.length() - 4);
            }
            String outputDir = this.output_dir + separator + name + separator;
            boolean start = true;
            int end = this.getPageCount();
            if (end > 10 && this.maxCount > 0 && end > this.maxCount) {
                end = this.maxCount;
            }
            try {
                for (int page = 1; page < end + 1; ++page) {
                    this.selectPage(page);
                    List<String> words = this.getWordsOnPage(page, defaultDelimiters);
                    if (words == null) continue;
                    File output_path = new File(outputDir);
                    if (!output_path.exists()) {
                        output_path.mkdirs();
                    }
                    int wordCount = words.size() / 5;
                    this.wordsExtracted += wordCount;
                    try (OutputStreamWriter output_stream = new OutputStreamWriter((OutputStream)new FileOutputStream(outputDir + "words-" + page + ".txt"), StandardCharsets.UTF_8);){
                        Iterator<String> wordIterator = words.iterator();
                        while (wordIterator.hasNext()) {
                            String currentWord = wordIterator.next();
                            currentWord = Strip.convertToText(currentWord, this.decode_pdf.isXMLExtraction());
                            int wx1 = (int)Float.parseFloat(wordIterator.next());
                            int wy1 = (int)Float.parseFloat(wordIterator.next());
                            int wx2 = (int)Float.parseFloat(wordIterator.next());
                            int wy2 = (int)Float.parseFloat(wordIterator.next());
                            output_stream.write(currentWord + ',' + wx1 + ',' + wy1 + ',' + wx2 + ',' + wy2 + '\n');
                        }
                        continue;
                    }
                }
                this.decode_pdf.flushObjectValues(false);
            }
            catch (Exception e) {
                LogWriter.writeLog(e);
                throw new PdfException(e.getMessage());
            }
        }
    }

    public List<String> getWordsOnPage(int page) throws PdfException {
        this.checkFileOpened();
        this.selectPage(page);
        int x1 = this.currentPageData.getMediaBoxX(page);
        int x2 = this.currentPageData.getMediaBoxWidth(page) + x1;
        int y2 = this.currentPageData.getMediaBoxX(page);
        int y1 = this.currentPageData.getMediaBoxHeight(page) - y2;
        return this.getWordsOnPage(page, x1, y1, x2, y2, defaultDelimiters);
    }

    public List<String> getWordsOnPage(int page, String delimiters) throws PdfException {
        this.checkFileOpened();
        this.selectPage(page);
        int x1 = this.currentPageData.getMediaBoxX(page);
        int x2 = this.currentPageData.getMediaBoxWidth(page) + x1;
        int y2 = this.currentPageData.getMediaBoxX(page);
        int y1 = this.currentPageData.getMediaBoxHeight(page) - y2;
        return this.getWordsOnPage(page, x1, y1, x2, y2, delimiters);
    }

    public List<String> getWordsOnPage(int page, int x1, int y1, int x2, int y2, String delimiters) throws PdfException {
        this.checkFileOpened();
        this.selectPage(page);
        return this.currentGrouping.extractTextAsWordlist(x1, y1, x2, y2, page, true, delimiters);
    }

    public List<String> getWordsOnPage(int page, Rectangle rectangle, String delimiters) throws PdfException {
        this.checkFileOpened();
        this.selectPage(page);
        return this.currentGrouping.extractTextAsWordlist(rectangle.x, rectangle.y, rectangle.x + rectangle.width, rectangle.y + rectangle.height, page, true, delimiters);
    }

    public static void main(String[] args) {
        int len = args.length;
        switch (len) {
            case 0: {
                System.out.println("Example takes 2 parameters");
                System.out.println("Value 1 is the file name or directory of PDF files to process");
                System.out.println("Value 2 is Directory for writing the data as text files");
                System.exit(0);
            }
            case 2: {
                try {
                    ExtractTextAsWordlist.writeAllWordlistsToDir(args[0], args[1], -1);
                }
                catch (PdfException e) {
                    LogWriter.writeLog(e);
                }
                break;
            }
            default: {
                System.out.println("too many arguments entered - run with no values to see defaults");
                StringBuilder arguments = new StringBuilder();
                for (String arg : args) {
                    arguments.append(arg).append('\n');
                }
                System.out.println("you entered:\n" + arguments + "as the arguments");
                System.exit(0);
            }
        }
    }

    @Override
    void init() {
        this.type = BaseTextExtraction.ExtractTypes.TEXT_AS_WORDLIST;
        super.init();
    }

    public static int writeAllWordlistsToDir(String inputDir, String password, String outputDir, int maxPages) throws PdfException {
        ExtractTextAsWordlist extract = new ExtractTextAsWordlist(inputDir);
        if (password != null) {
            extract.setPassword(password);
        }
        extract.setup(outputDir, maxPages);
        extract.processFiles(inputDir);
        extract.closePDFfile();
        return extract.wordsExtracted;
    }

    public static int writeAllWordlistsToDir(String inputDir, String outputDir, int maxPages) throws PdfException {
        return ExtractTextAsWordlist.writeAllWordlistsToDir(inputDir, null, outputDir, maxPages);
    }

    private void setup(String outputDir, int maxCount) {
        if (!outputDir.endsWith(separator)) {
            outputDir = outputDir + separator;
        }
        this.output_dir = outputDir;
        this.maxCount = maxCount;
    }
}

