Java 类com.itextpdf.text.pdf.parser.PdfReaderContentParser 实例源码

项目:presenca-vereadores-sjc    文件:PDFAttendanceParser.java   
private String getPdfContent(String pdfFile) {
    try {
        PdfReader reader = new PdfReader(pdfFile);
        StringBuffer sb = new StringBuffer();
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        TextExtractionStrategy strategy;
        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
            sb.append(strategy.getResultantText());
        }
        reader.close();
        return sb.toString();
    } catch (IOException e) {
        throw new IllegalArgumentException("Not able to read file " + pdfFile, e);
    }
}
项目:testarea-itext5    文件:PdfDenseMergeTool.java   
void merge(PdfReader reader, PdfReaderContentParser parser, int page) throws IOException
{
    TextMarginFinder finder = parser.processContent(page, new TextMarginFinder());
    Rectangle pageSizeToImport = reader.getPageSize(page);
    float heightToImport = finder.getHeight();
    float maxHeight = pageSize.getHeight() - topMargin - bottomMargin;
    if (heightToImport > maxHeight)
    {
        throw new IllegalArgumentException(String.format("Page %s content too large; height: %s, limit: %s.", page, heightToImport, maxHeight));
    }

    if (heightToImport > yPosition - pageSize.getBottom(bottomMargin))
    {
        newPage();
    }
    else if (!writer.isPageEmpty())
    {
        heightToImport += gap;
    }
    yPosition -= heightToImport;

    PdfImportedPage importedPage = writer.getImportedPage(reader, page);
    writer.getDirectContent().addTemplate(importedPage, 0, yPosition - (finder.getLly() - pageSizeToImport.getBottom()));
}
项目:testarea-itext5    文件:ExtractCertifiedSchoolList.java   
@Test
    public void testCertifiedSchoolList_9_16_2015() throws IOException
    {
        try (   Writer data = new OutputStreamWriter(new FileOutputStream(new File(RESULT_FOLDER, "data.txt")), "UTF-8");
                Writer nonData = new OutputStreamWriter(new FileOutputStream(new File(RESULT_FOLDER, "non-data.txt")), "UTF-8");
                InputStream resource = getClass().getResourceAsStream("certified-school-list-9-16-2015.pdf")    )
        {
            CertifiedSchoolListExtractionStrategy strategy = new CertifiedSchoolListExtractionStrategy(data, nonData);
            PdfReader reader = new PdfReader(resource);

            PdfReaderContentParser parser = new PdfReaderContentParser(reader);
            for (int page = 1; page <= reader.getNumberOfPages(); page++)
                parser.processContent(page, strategy);
//            parser.processContent(28, strategy);
            strategy.close();
        }
    }
项目:testarea-itext5    文件:ExtractDrawnCheckboxes.java   
/**
 * <a href="http://stackoverflow.com/questions/40549977/reading-legacy-word-forms-checkboxes-converted-to-pdf">
 * Reading legacy Word forms checkboxes converted to PDF
 * </a>
 * <br>
 * <a href="https://www.dropbox.com/s/4z7ky3yy2yaj53i/Doc1.pdf?dl=0">
 * Doc1.pdf
 * </a>
 * <p>
 * This test shows how one can extract the sample drawn "checkboxes" from the
 * sample PDF provided by the OP.
 * </p>
 */
@Test
public void testExtractDoc1() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("Doc1.pdf"))
    {
        PdfReader pdfReader = new PdfReader(resource);

        for (int page = 1; page <= pdfReader.getNumberOfPages(); page++)
        {
            System.out.printf("\nPage %s\n====\n", page);

            CheckBoxExtractionStrategy strategy = new CheckBoxExtractionStrategy();
            PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);
            parser.processContent(page, strategy);

            for (Box box : strategy.getBoxes())
            {
                Vector basePoint = box.getDiagonal().getStartPoint();
                System.out.printf("at %s, %s - %s\n", basePoint.get(Vector.I1), basePoint.get(Vector.I2),
                        box.isChecked() ? "checked" : "unchecked");
            }
        }
    }
}
项目:testarea-itext5    文件:DividerAndColorAwareTextExtraction.java   
String extractAndStore(PdfReader reader, String format, int from, int to, BaseColor headerColor) throws IOException
{
    StringBuilder builder = new StringBuilder();

    for (int page = from; page <= to; page++)
    {
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        DividerAwareTextExtrationStrategy strategy = parser.processContent(page, new DividerAndColorAwareTextExtractionStrategy(810, 30, 20, 575, headerColor));

        List<Section> sections = strategy.getSections();
        int i = 0;
        for (Section section : sections)
        {
            String sectionText = strategy.getResultantText(section);
            Files.write(Paths.get(String.format(format, page, i)), sectionText.getBytes("UTF8"));

            builder.append("--\n")
                   .append(sectionText)
                   .append('\n');
            i++;
        }
        builder.append("\n\n");
    }

    return builder.toString();
}
项目:testarea-itext5    文件:DividerAwareTextExtraction.java   
String extractAndStore(PdfReader reader, String format, int from, int to) throws IOException
{
    StringBuilder builder = new StringBuilder();

    for (int page = from; page <= to; page++)
    {
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        DividerAwareTextExtrationStrategy strategy = parser.processContent(page, new DividerAwareTextExtrationStrategy(810, 30, 20, 575));

        List<Section> sections = strategy.getSections();
        int i = 0;
        for (Section section : sections)
        {
            String sectionText = strategy.getResultantText(section);
            Files.write(Paths.get(String.format(format, page, i)), sectionText.getBytes("UTF8"));

            builder.append("--\n")
                   .append(sectionText)
                   .append('\n');
            i++;
        }
        builder.append("\n\n");
    }

    return builder.toString();
}
项目:preservation-tools    文件:PdfAnalysis.java   
public static String[] extractsPdfLines(String PdfFile) throws IOException {
    try {
        StringBuffer buff = new StringBuffer();
        String ExtractedText = null;
        PdfReader reader = new PdfReader(PdfFile);
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        TextExtractionStrategy strategy;

        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
            ExtractedText = strategy.getResultantText().toString();
            buff.append(ExtractedText + "\n");
        }

        String[] LinesArray;
        LinesArray = buff.toString().split("\n");
        reader.close();
        return LinesArray;
    } catch (Exception e) {
        return null;
    }
}
项目:pdftagger    文件:Main.java   
private static List<LocationTextExtractionStrategy.TextChunk> getTextChunks() throws IOException {
    PdfReader reader = new PdfReader(RESOURCES_DIR + "KLEE.pdf");
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    PrintWriter out = new PrintWriter(new FileOutputStream(RESOURCES_DIR + "extracted text"));
    TextExtractionStrategy strategy;
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        strategy
                = parser.processContent(i, new LocationTextExtractionStrategy());
        out.println(strategy.getResultantText());
    }
    out.flush();
    out.close();
    return null;
}
项目:finances    文件:TextExtractor.java   
public TextExtractor(InputStream is) throws IOException {
    PdfReader pdfReader = new PdfReader(is);
    PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);
    int pages = pdfReader.getNumberOfPages();
    for (int i = 1; i <= pages; i++) {
        ImportRenderListener renderListener = new ImportRenderListener();
        parser.processContent(i, renderListener);
        pageText.add(renderListener.text);
    }
}
项目:spring-itext    文件:MicaiItextUtils.java   
public static float[] getKeyWords(String filePath) {
    try {
        PdfReader pdfReader = new PdfReader(filePath);
        int pageNum = pdfReader.getNumberOfPages();
        PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);
        // 下标从1开始
        for (i = 1; i <= pageNum; i++) {
            pdfReaderContentParser.processContent(i, new RenderListener() {
                public void renderText(TextRenderInfo textRenderInfo) {
                    String text = textRenderInfo.getText();
                    System.out.println(i + ":" +text);
                    if (null != text && text.contains(KEY_WORD)) {
                        Float boundingRectange = textRenderInfo.getBaseline().getBoundingRectange();
                        resu = new float[3];
                        resu[0] = boundingRectange.x;
                        resu[1] = boundingRectange.y;
                        resu[2] = i;
                    }
                }
                public void renderImage(ImageRenderInfo arg0) {

                }
                public void endTextBlock() {

                }
                public void beginTextBlock() {

                }
            });
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return resu;
}
项目:testarea-itext5    文件:PdfVeryDenseMergeTool.java   
void merge(PdfReader reader) throws IOException
{
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    for (int page = 1; page <= reader.getNumberOfPages(); page++)
    {
        merge(reader, parser, page);
    }
}
项目:testarea-itext5    文件:PdfDenseMergeTool.java   
void merge(PdfReader reader) throws IOException
{
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    for (int page = 1; page <= reader.getNumberOfPages(); page++)
    {
        merge(reader, parser, page);
    }
}
项目:testarea-itext5    文件:PageVerticalAnalysis.java   
void analyzeVertically(InputStream pdf, File target) throws IOException
{
    final PdfReader reader = new PdfReader(pdf);

    try 
    {
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        StringBuilder builder = new StringBuilder();
        for (int page=1; page <= reader.getNumberOfPages(); page++)
        {
            PageVerticalAnalyzer analyzer = parser.processContent(page, new PageVerticalAnalyzer());
            builder.append("Page ").append(page).append('\n');
            if (analyzer.verticalFlips.size() > 0)
            {
                for (int i = 0; i < analyzer.verticalFlips.size() - 1; i+=2)
                {
                    builder.append(String.format("%3.3f - %3.3f\n", analyzer.verticalFlips.get(i), analyzer.verticalFlips.get(i+1)));
                }
                builder.append('\n');
            }
            else
            {
                builder.append("No content\n\n");
            }
        }
        String sections = builder.toString();
        System.out.print(sections);
        Files.write(target.toPath(), sections.getBytes());
    }
    finally
    {
        reader.close();
    }
}
项目:testarea-itext5    文件:TextLocationExtraction.java   
void mark(InputStream input, OutputStream output, Pattern pattern) throws DocumentException, IOException
{
    PdfReader reader = new PdfReader(input);
    PdfStamper stamper = new PdfStamper(reader, output);
    try {
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        for (int pageNr = 1; pageNr <= reader.getNumberOfPages(); pageNr++)
        {
            SearchTextLocationExtractionStrategy strategy = new SearchTextLocationExtractionStrategy(pattern);
            parser.processContent(pageNr, strategy, Collections.emptyMap()).getResultantText();
            Collection<TextRectangle> locations = strategy.getLocations(null);
            if (locations.isEmpty())
                continue;

            PdfContentByte canvas = stamper.getOverContent(pageNr);
            canvas.setRGBColorStroke(255, 255, 0);
            for (TextRectangle location : locations)
            {
                canvas.rectangle(location.getMinX(), location.getMinY(), location.getWidth(), location.getHeight());
            }
            canvas.stroke();
        }
        stamper.close();
    } finally {
        reader.close();
    }
}
项目:testarea-itext5    文件:FindFreeSpace.java   
public Collection<Rectangle2D> find(PdfReader reader, float minWidth, float minHeight, int page) throws IOException
{
    Rectangle cropBox = reader.getCropBox(page);
    Rectangle2D crop = new Rectangle2D.Float(cropBox.getLeft(), cropBox.getBottom(), cropBox.getWidth(), cropBox.getHeight());
    FreeSpaceFinder finder = new FreeSpaceFinder(crop, minWidth, minHeight);
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    parser.processContent(page, finder);
    return finder.freeSpaces;
}
项目:testarea-itext5    文件:FindFreeSpace.java   
public Collection<Rectangle2D> findExt(PdfReader reader, float minWidth, float minHeight, int page) throws IOException
{
    Rectangle cropBox = reader.getCropBox(page);
    Rectangle2D crop = new Rectangle2D.Float(cropBox.getLeft(), cropBox.getBottom(), cropBox.getWidth(), cropBox.getHeight());
    FreeSpaceFinder finder = new FreeSpaceFinderExt(crop, minWidth, minHeight);
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    parser.processContent(page, finder);
    return finder.freeSpaces;
}
项目:cvia    文件:PDFTextChunkReader.java   
public PDFReadResult readPDFFromFile(File file) throws IOException {
    PdfReader reader = new PdfReader(file.getAbsolutePath());
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    ArrayList<TextChunk> textChunks = new ArrayList<TextChunk>();
    for (int i = 1; i <= reader.getNumberOfPages(); i++) {
        TextChunkExtractionStrategy strategy = new TextChunkExtractionStrategy(i);
        strategy = parser.processContent(i, strategy);
        List<TextChunk> textChunksInPage = strategy.getResultantTextChunks();
        textChunks.addAll(textChunksInPage);
    }
    reader.close();
    return new PDFReadResult(file, textChunks);
}
项目:FastReading    文件:PdfFileOpener.java   
@Override
public BookReadingResult open(@NonNull File file, @NonNull PercentSender percentSender, @NonNull Runnable readingEndSender) {

    try {
        PdfReader pdfReader = new PdfReader(file.getAbsolutePath());
        PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);
        int numberOfPages = pdfReader.getNumberOfPages();
        int oldPercent = 0, newPercent;

        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 1; i <= pdfReader.getNumberOfPages(); i++) {
            TextExtractionStrategy strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
            String pageText = strategy.getResultantText();

            stringBuffer.append(pageText);

            if (pageText.endsWith("\\.") ||
                    pageText.endsWith("!") ||
                    pageText.endsWith("?") ||
                    pageText.endsWith(":")) {
                stringBuffer.append(" ");
            } else {
                stringBuffer.append(". ");
            }

            newPercent = 100 * i / numberOfPages;
            if (newPercent != oldPercent) {
                percentSender.refreshPercents(oldPercent, newPercent);
                oldPercent = newPercent;
            }
        }

        pdfReader.close();

        String resultText = new String(stringBuffer);
        resultText = resultText.trim();                         // delete first and last space (if exist)
        resultText = resultText.replaceAll("\\s+", " ");        // delete all duplicate white spaces
        resultText = resultText.replaceAll("(\\.)+", "\\.");        // delete all duplicate dots

        if (resultText.length() < 1) {
            return null;
        }

        readingEndSender.run();
        return new BookReadingResult(resultText, InternalStorageFileHelper.fileNameWithoutExtension(file), "");

    } catch (IOException e) {
        e.printStackTrace();

        return null;
    }

}
项目:testarea-itext5    文件:PdfVeryDenseMergeTool.java   
void merge(PdfReader reader, PdfReaderContentParser parser, int page) throws IOException
{
    PdfImportedPage importedPage = writer.getImportedPage(reader, page);
    PdfContentByte directContent = writer.getDirectContent();

    PageVerticalAnalyzer finder = parser.processContent(page, new PageVerticalAnalyzer());
    if (finder.verticalFlips.size() < 2)
        return;
    Rectangle pageSizeToImport = reader.getPageSize(page);

    int startFlip = finder.verticalFlips.size() - 1;
    boolean first = true;
    while (startFlip > 0)
    {
        if (!first)
            newPage();

        float freeSpace = yPosition - pageSize.getBottom(bottomMargin);
        int endFlip = startFlip + 1;
        while ((endFlip > 1) && (finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip - 2) < freeSpace))
            endFlip -=2;
        if (endFlip < startFlip)
        {
            float height = finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip);

            directContent.saveState();
            directContent.rectangle(0, yPosition - height, pageSizeToImport.getWidth(), height);
            directContent.clip();
            directContent.newPath();

            writer.getDirectContent().addTemplate(importedPage, 0, yPosition - (finder.verticalFlips.get(startFlip) - pageSizeToImport.getBottom()));

            directContent.restoreState();
            yPosition -= height + gap;
            startFlip = endFlip - 1;
        }
        else if (!first) 
            throw new IllegalArgumentException(String.format("Page %s content sections too large.", page));
        first = false;
    }
}
项目:testarea-itext5    文件:ExtractSuperAndSubInLine.java   
void markLineBoundaries(String resource, int startPage, int endPage) throws IOException, DocumentException
{
    String name = new File(resource).getName();
    String target = String.format("%s-lines-%s-%s.pdf", name, startPage, endPage);
    InputStream resourceStream = getClass().getResourceAsStream(resource);
    try
    {
        PdfReader reader = new PdfReader(resourceStream);
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        System.out.printf("\nLine boundaries in %s\n", name);

        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(new File(RESULT_FOLDER, target)));

        for (int page = startPage; page < endPage; page++)
        {
            System.out.printf("\n   Page %s\n   ", page);

            TextLineFinder finder = new TextLineFinder();
            parser.processContent(page, finder);

            PdfContentByte over = stamper.getOverContent(page);
            Rectangle mediaBox = reader.getPageSize(page);

            for (float flip: finder.verticalFlips)
            {
                System.out.printf(" %s", flip);
                over.moveTo(mediaBox.getLeft(), flip);
                over.lineTo(mediaBox.getRight(), flip);
            }

            System.out.println();
            over.stroke();
        }

        stamper.close();
    }
    finally
    {
        if (resourceStream != null)
            resourceStream.close();
    }
}
项目:preservation-tools    文件:TextSucheInOrdner.java   
public static void searchforStringinPdfFiles(File file) throws IOException {

        outputfile.println("<Dateiname>" + (file.getName()) + "</Dateiname>");

        int trefferinDatei;

        if (filetools.pdf.PdfAnalysis.testPdfOk(file)) {
            try {
                PdfReader reader = new PdfReader(file.toString());
                int pagesPdf = reader.getNumberOfPages();
                StringBuffer buff = new StringBuffer();
                String ExtractedText = null;
                PdfReaderContentParser parser = new PdfReaderContentParser(reader);
                TextExtractionStrategy strategy;

                trefferinDatei = 0;
                for (int i = 1; i <= pagesPdf; i++) {
                    strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
                    ExtractedText = strategy.getResultantText().toString();
                    buff.append(ExtractedText + "\n");
                    String[] LinesArray = buff.toString().split("\n");
                    int linesPdf = LinesArray.length;

                    for (int j = 0; (j < linesPdf && (stringfound < MAXIMAL_HITS)); j++) {
                        String paragraph = LinesArray[j].toLowerCase();
                        String searchStringlowerCase = searchedString.toLowerCase();
                        if (paragraph.contains(searchStringlowerCase)) {
                            trefferinDatei++;
                            stringfound++;
                            outputfile.println("<Seitenzahl>" + i + "</Seitenzahl>");
                            outputfile.println("<GanzeZeile>" + (LinesArray[j]) + "</GanzeZeile>");
                        }
                    }

                }
                outputfile.println("<TextinDatei>" + trefferinDatei + "</TextinDatei>");
                outputfile.println("<Suchergebnis>" + trefferinDatei + " x " + "</Suchergebnis>");

                reader.close();

            } catch (Exception e) {
                outputfile.println("<Fehlermeldung>" + e + "</Fehlermeldung>");
            }
        }

    }
项目:testarea-itext5    文件:TestTrimPdfPage.java   
/**
 * Need to get the size of the page excluding whitespace......
 * <p>
 * The OP's code
 * 
 * @param pageSize the original page size
 * @param reader the pdf reader
 * @return a new page size which cuts out the whitespace
 * @throws IOException 
 */
private Rectangle getOutputPageSize(Rectangle pageSize, PdfReader reader, int page) throws IOException
{
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    TextMarginFinder finder = parser.processContent(page, new TextMarginFinder());
    Rectangle result = new Rectangle(finder.getLlx(), finder.getLly(), finder.getUrx(), finder.getUry());
    System.out.printf("Actual boundary: (%f;%f) to (%f;%f)\n", finder.getLlx(), finder.getLly(), finder.getUrx(), finder.getUry());
    return result;
}
项目:testarea-itext5    文件:TestTrimPdfPage.java   
/**
 * Need to get the size of the page excluding whitespace......
 * <p>
 * The OP's code revised to use the whole page width
 * 
 * @param pageSize the original page size
 * @param reader the pdf reader
 * @return a new page size which cuts out the whitespace
 * @throws IOException 
 */
private Rectangle getOutputPageSize2(Rectangle pageSize, PdfReader reader, int page) throws IOException
{
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    TextMarginFinder finder = parser.processContent(page, new TextMarginFinder());
    Rectangle result = new Rectangle(pageSize.getLeft(), finder.getLly(), pageSize.getRight(), finder.getUry());
    System.out.printf("Actual boundary: (%f;%f) to (%f;%f)\n", finder.getLlx(), finder.getLly(), finder.getUrx(), finder.getUry());
    return result;
}
项目:testarea-itext5    文件:TestTrimPdfPage.java   
/**
 * Need to get the size of the page excluding whitespace......
 * <p>
 * The OP's code revised to use a width with equal margins left and right
 * 
 * @param pageSize the original page size
 * @param reader the pdf reader
 * @return a new page size which cuts out the whitespace
 * @throws IOException 
 */
private Rectangle getOutputPageSize3(Rectangle pageSize, PdfReader reader, int page) throws IOException
{
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    TextMarginFinder finder = parser.processContent(page, new TextMarginFinder());
    float right = 2 * finder.getUrx() - finder.getLlx();
    Rectangle result = new Rectangle(finder.getLlx(), finder.getLly(), right, finder.getUry());
    System.out.printf("Actual boundary: (%f;%f) to (%f;%f)\n", finder.getLlx(), finder.getLly(), finder.getUrx(), finder.getUry());
    return result;
}
项目:testarea-itext5    文件:TestTrimPdfPage.java   
/**
 * Need to get the size of the page excluding whitespace......
 * <p>
 * The OP's code revised to use MarginFinder
 * 
 * @param pageSize the original page size
 * @param reader the pdf reader
 * @return a new page size which cuts out the whitespace
 * @throws IOException 
 */
private Rectangle getOutputPageSize4(Rectangle pageSize, PdfReader reader, int page) throws IOException
{
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    MarginFinder finder = parser.processContent(page, new MarginFinder());
    Rectangle result = new Rectangle(finder.getLlx(), finder.getLly(), finder.getUrx(), finder.getUry());
    System.out.printf("Actual boundary: (%f;%f) to (%f;%f)\n", finder.getLlx(), finder.getLly(), finder.getUrx(), finder.getUry());
    return result;
}
项目:testarea-itext5    文件:ImportPageWithoutFreeSpace.java   
/**
 * <p>
 * This method restricts the media boxes of the pages in the given {@link PdfReader}
 * to the actual content found by the {@link MarginFinder} extended render listener.
 * </p>
 * <p>
 * It essentially is copied from the {@link TestTrimPdfPage} methods
 * {@link TestTrimPdfPage#testWithStamperExtFinder()} and
 * {@link TestTrimPdfPage#getOutputPageSize4(Rectangle, PdfReader, int)}.
 * In contrast to the code there this method manipulates
 * the media box because this is the only box respected by
 * {@link PdfWriter#getImportedPage(PdfReader, int)}.
 * </p>
 */
static void cropPdf(PdfReader reader) throws IOException
{
    int n = reader.getNumberOfPages();
    for (int i = 1; i <= n; i++)
    {
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        MarginFinder finder = parser.processContent(i, new MarginFinder());
        Rectangle rect = new Rectangle(finder.getLlx(), finder.getLly(), finder.getUrx(), finder.getUry());

        PdfDictionary page = reader.getPageN(i);
        page.put(PdfName.MEDIABOX, new PdfArray(new float[]{rect.getLeft(), rect.getBottom(), rect.getRight(), rect.getTop()}));
    }
}