private String getPdfContent(String pdfFile) { try { PdfReader reader = new PdfReader(pdfFile); StringBuffer sb = new StringBuffer(); PdfReaderContentParser parser = new PdfReaderContentParser(reader); TextExtractionStrategy strategy; for (int i = 1; i <= reader.getNumberOfPages(); i++) { strategy = parser.processContent(i, new SimpleTextExtractionStrategy()); sb.append(strategy.getResultantText()); } reader.close(); return sb.toString(); } catch (IOException e) { throw new IllegalArgumentException("Not able to read file " + pdfFile, e); } }
/** * <a href="http://stackoverflow.com/questions/34394199/i-cant-rotate-my-page-from-existing-pdf"> * I can't rotate my page from existing PDF * </a> * <p> * Switching between portrait and landscape like this obviously will cut off some parts of the page. * </p> */ @Test public void testSwitchOrientation() throws DocumentException, IOException { try (InputStream resourceStream = getClass().getResourceAsStream("/mkl/testarea/itext5/extract/n2013.00849449.pdf")) { PdfReader reader = new PdfReader(resourceStream); int n = reader.getNumberOfPages(); PdfDictionary pageDict; for (int i = 1; i <= n; i++) { Rectangle rect = reader.getPageSize(i); Rectangle crop = reader.getCropBox(i); pageDict = reader.getPageN(i); pageDict.put(PdfName.MEDIABOX, new PdfArray(new float[] {rect.getBottom(), rect.getLeft(), rect.getTop(), rect.getRight()})); pageDict.put(PdfName.CROPBOX, new PdfArray(new float[] {crop.getBottom(), crop.getLeft(), crop.getTop(), crop.getRight()})); } PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(new File(RESULT_FOLDER, "n2013.00849449-switch.pdf"))); stamper.close(); reader.close(); } }
/** * <a href="http://stackoverflow.com/questions/28991291/how-to-remove-whitespace-on-merge"> * How To Remove Whitespace on Merge * </a> * <p> * Testing {@link PdfDenseMergeTool} using the OP's files. * </p> */ @Test public void testMergeGrandizerFiles() throws DocumentException, IOException { try ( InputStream docA = getClass().getResourceAsStream("Header.pdf"); InputStream docB = getClass().getResourceAsStream("Body.pdf"); InputStream docC = getClass().getResourceAsStream("Footer.pdf"); ) { PdfDenseMergeTool tool = new PdfDenseMergeTool(PageSize.A4, 18, 18, 5); PdfReader readerA = new PdfReader(docA); PdfReader readerB = new PdfReader(docB); PdfReader readerC = new PdfReader(docC); try (FileOutputStream fos = new FileOutputStream(new File(RESULT_FOLDER, "GrandizerMerge.pdf"))) { List<PdfReader> inputs = Arrays.asList(readerA, readerB, readerC); tool.merge(fos, inputs); } finally { readerA.close(); readerB.close(); readerC.close(); } } }
public ImageInstance getImageByFile( PdfContentByte cb , File file ) throws IOException, BadElementException{ Image image = null; ImageInstance instance = null; if( file.getName().toLowerCase().endsWith( ".pdf")){ PdfReader reader = new PdfReader( file.getAbsolutePath() ); PdfImportedPage p = cb.getPdfWriter().getImportedPage(reader, 1); image = Image.getInstance(p); instance = new ImageInstance(image, reader); }else{ image = Image.getInstance( file.getAbsolutePath() ); instance = new ImageInstance(image, null); } instances.add(instance); return instance; }
@Test public void testWritePlainLetter() { try { PdfReader reader = new PdfReader("letters/" + voter.getNif() + ".pdf"); String page = PdfTextExtractor.getTextFromPage(reader, 1); String[] lines = page.split("\n"); String email = lines[1].split(":\t")[1]; String password = lines[2].split(":\t")[1]; assertEquals(voter.getEmail(), email); assertEquals(voter.getPassword(), password); } catch (IOException e) { System.err.println("Archivo no encontrado"); } }
/** * The <code>closePassedStream</code> method closes the stream passed. * * @param reader {@link PdfReader} * @param document {@link Document} * @param contentByte {@link PdfContentByte} * @param writer {@link PdfWriter} * @param fileInputStream {@link FileInputStream} * @param fileOutputStream {@link FileOutputStream} * @throws IOException {@link} if unable to close input or output stream */ private static void closePassedStream(final PdfReader reader, final Document document, final PdfContentByte contentByte, final PdfWriter writer, final FileInputStream fileInputStream, final FileOutputStream fileOutputStream) throws IOException { if (null != reader) { reader.close(); } if (null != document) { document.close(); } if (null != contentByte) { contentByte.closePath(); } if (null != writer) { writer.close(); } if (null != fileInputStream) { fileInputStream.close(); } if (null != fileOutputStream) { fileOutputStream.flush(); fileOutputStream.close(); } }
void split(PdfReader reader, int page) throws IOException { PdfImportedPage importedPage = writer.getImportedPage(reader, page); Rectangle pageSizeToImport = reader.getPageSize(page); Iterable<Rectangle> rectangles = determineSplitRectangles(reader, page); for (Rectangle rectangle : rectangles) { newPage(rectangle); PdfContentByte directContent = writer.getDirectContent(); directContent.saveState(); directContent.rectangle(rectangle.getLeft(), rectangle.getBottom(), rectangle.getWidth(), rectangle.getHeight()); directContent.clip(); directContent.newPath(); writer.getDirectContent().addTemplate(importedPage, -pageSizeToImport.getLeft(), -pageSizeToImport.getBottom()); directContent.restoreState(); } }
public void merge(OutputStream outputStream, Iterable<PdfReader> inputs) throws DocumentException, IOException { try { openDocument(outputStream); for (PdfReader reader: inputs) { merge(reader); } } finally { closeDocument(); } }
void extract(String resource, int startPage, int endPage, boolean isV2) throws IOException, DocumentException, NoSuchFieldException, SecurityException { String name = new File(resource).getName(); String target = String.format(isV2 ? "%s-lines-v2-%%s.txt" : "%s-lines-%%s.txt", name); InputStream resourceStream = getClass().getResourceAsStream(resource); try { PdfReader reader = new PdfReader(resourceStream); System.out.printf("\nText by line in %s\n", name); for (int page = startPage; page < endPage; page++) { System.out.printf("\n Page %s\n", page); String pageText = isV2 ? extractV2(reader, page) : extract(reader, page); Files.write(new File(RESULT_FOLDER, String.format(target, page)).toPath(), pageText.getBytes("UTF8")); System.out.println(pageText); } } finally { if (resourceStream != null) resourceStream.close(); } }
<E extends TextExtractionStrategy> String extractAndStore(PdfReader reader, String format, Class<E> strategyClass, RenderFilter... filters) throws Exception { StringBuilder builder = new StringBuilder(); for (int page = 1; page <= reader.getNumberOfPages(); page++) { TextExtractionStrategy strategy = strategyClass.getConstructor().newInstance(); if (filters != null && filters.length > 0) { strategy = new FilteredTextRenderListener(strategy, filters); } String pageText = extract(reader, page, strategy); Files.write(Paths.get(String.format(format, page)), pageText.getBytes("UTF8")); if (page > 1) builder.append("\n\n"); builder.append(pageText); } return builder.toString(); }
/** * <a href="http://stackoverflow.com/questions/38278816/remove-header-of-a-pdf-using-itext-pdfcleanupprocessor-does-not-work"> * Remove header of a pdf using iText PdfCleanUpProcessor does not work * </a> * <br/> * <a href="https://www.dropbox.com/s/4u8vupjqc4st3ib/love.pdf?dl=0"> * love.pdf * </a> * <p> * Cannot reproduce, I get a <code>org.apache.commons.imaging.ImageReadException: Invalid marker found in entropy data</code>. * </p> */ @Test public void testRedactLikeShiranSEkanayake() throws IOException, DocumentException { try ( InputStream resource = getClass().getResourceAsStream("love.pdf"); OutputStream result = new FileOutputStream(new File(OUTPUTDIR, "love-redacted.pdf")) ) { PdfReader reader = new PdfReader(resource); PdfStamper stamper = new PdfStamper(reader, result); List<PdfCleanUpLocation> cleanUpLocations = new ArrayList<PdfCleanUpLocation>(); for(int i=1; i<=reader.getNumberOfPages(); i++) { //System.out.println(i); Rectangle mediabox = reader.getPageSize(i); cleanUpLocations.add(new PdfCleanUpLocation(i, new Rectangle(0,800,1000,1000))); } PdfCleanUpProcessor cleaner = new PdfCleanUpProcessor(cleanUpLocations, stamper); cleaner.cleanUp(); stamper.close(); reader.close(); } }
/** * <a href="http://stackoverflow.com/questions/43511558/how-to-set-attributes-for-existing-pdf-that-contains-only-images-using-java-itex"> * how to set attributes for existing pdf that contains only images using java itext? * </a> * <p> * The OP indicated in a comment that he searches a solution without a second file. * This test shows how to work with a single file, by first loading the file into a byte array. * </p> */ @Test public void testChangeTitleWithoutTempFile() throws IOException, DocumentException { File singleFile = new File(RESULT_FOLDER, "eg_01-singleFile.pdf"); try ( InputStream resource = getClass().getResourceAsStream("eg_01.pdf") ) { Files.copy(resource, singleFile.toPath()); } byte[] original = Files.readAllBytes(singleFile.toPath()); PdfReader reader = new PdfReader(original); PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(singleFile)); Map<String, String> info = reader.getInfo(); info.put("Title", "New title"); info.put("CreationDate", new PdfDate().toString()); stamper.setMoreInfo(info); ByteArrayOutputStream baos = new ByteArrayOutputStream(); XmpWriter xmp = new XmpWriter(baos, info); xmp.close(); stamper.setXmpMetadata(baos.toByteArray()); stamper.close(); reader.close(); }
/** * The OP's original code transformed into Java */ void stampTextOriginal(InputStream source, OutputStream target) throws DocumentException, IOException { Date today = new Date(); PdfReader reader = new PdfReader(source); PdfStamper stamper = new PdfStamper(reader, target); BaseFont bf = BaseFont.createFont(BaseFont.HELVETICA_BOLD, BaseFont.WINANSI, BaseFont.EMBEDDED); int tSize = 24; String mark = "DRAFT " + today; int angle = 45; float height = reader.getPageSizeWithRotation(1).getHeight()/2; float width = reader.getPageSizeWithRotation(1).getWidth()/2; PdfContentByte cb = stamper.getOverContent(1); cb.setColorFill(new BaseColor(255,200,200)); cb.setFontAndSize(bf, tSize); cb.beginText(); cb.showTextAligned(Element.ALIGN_CENTER, mark, width, height, angle); cb.endText(); stamper.close(); reader.close(); }
String extractAndStore(PdfReader reader, String format, int from, int to, BaseColor headerColor) throws IOException { StringBuilder builder = new StringBuilder(); for (int page = from; page <= to; page++) { PdfReaderContentParser parser = new PdfReaderContentParser(reader); DividerAwareTextExtrationStrategy strategy = parser.processContent(page, new DividerAndColorAwareTextExtractionStrategy(810, 30, 20, 575, headerColor)); List<Section> sections = strategy.getSections(); int i = 0; for (Section section : sections) { String sectionText = strategy.getResultantText(section); Files.write(Paths.get(String.format(format, page, i)), sectionText.getBytes("UTF8")); builder.append("--\n") .append(sectionText) .append('\n'); i++; } builder.append("\n\n"); } return builder.toString(); }
@Test public void sign50MBrunoPartialAppend() throws IOException, DocumentException, GeneralSecurityException { String filepath = "src/test/resources/mkl/testarea/itext5/signature/50m.pdf"; String digestAlgorithm = "SHA512"; CryptoStandard subfilter = CryptoStandard.CMS; // Creating the reader and the stamper PdfReader reader = new PdfReader(filepath, null, true); FileOutputStream os = new FileOutputStream(new File(RESULT_FOLDER, "50m-signedBrunoPartialAppend.pdf")); PdfStamper stamper = PdfStamper.createSignature(reader, os, '\0', RESULT_FOLDER, true); // Creating the appearance PdfSignatureAppearance appearance = stamper.getSignatureAppearance(); appearance.setReason("reason"); appearance.setLocation("location"); appearance.setVisibleSignature(new Rectangle(36, 748, 144, 780), 1, "sig"); // Creating the signature ExternalSignature pks = new PrivateKeySignature(pk, digestAlgorithm, "BC"); ExternalDigest digest = new BouncyCastleDigest(); MakeSignature.signDetached(appearance, digest, pks, chain, null, null, null, 0, subfilter); }
/** * <a href="http://stackoverflow.com/questions/35082653/adobe-reader-cant-display-unicode-font-of-pdf-added-with-itext"> * Adobe Reader can't display unicode font of pdf added with iText * </a> * <br/> * <a href="https://www.dropbox.com/s/erkv9wot9d460dg/sampleOriginal.pdf?dl=0"> * sampleOriginal.pdf * </a> * <p> * Indeed, just like in the iTextSharp version of the code, the resulting file has * issues in Adobe Reader. With a different starting file, though, it doesn't, cf. * {@link #testAddUnicodeStampEg_01()}. * </p> * <p> * As it eventually turns out, Adobe Reader treats PDF files with composite fonts * differently if they claim to be PDF-1.2 like the OP's sample file. * </p> */ @Test public void testAddUnicodeStampSampleOriginal() throws DocumentException, IOException { try ( InputStream resource = getClass().getResourceAsStream("sampleOriginal.pdf"); OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "sampleOriginal-unicodeStamp.pdf")) ) { PdfReader reader = new PdfReader(resource); PdfStamper stamper = new PdfStamper(reader, result); BaseFont bf = BaseFont.createFont("c:/windows/fonts/arialuni.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); PdfContentByte cb = stamper.getOverContent(1); Phrase p = new Phrase(); p.setFont(new Font(bf, 25, Font.NORMAL, BaseColor.BLUE)); p.add("Sample Text"); ColumnText.showTextAligned(cb, PdfContentByte.ALIGN_LEFT, p, 200, 200, 0); stamper.close(); } }
@Test public void signCertify2gFix() throws IOException, DocumentException, GeneralSecurityException { String filepath = "src/test/resources/mkl/testarea/itext5/signature/2g-fix.pdf"; String digestAlgorithm = "SHA512"; CryptoStandard subfilter = CryptoStandard.CMS; // Creating the reader and the stamper PdfReader reader = new PdfReader(filepath, null, true); FileOutputStream os = new FileOutputStream(new File(RESULT_FOLDER, "2g-fix-certified.pdf")); PdfStamper stamper = PdfStamper.createSignature(reader, os, '\0', RESULT_FOLDER, true); // Creating the appearance PdfSignatureAppearance appearance = stamper.getSignatureAppearance(); appearance.setCertificationLevel(PdfSignatureAppearance.CERTIFIED_NO_CHANGES_ALLOWED); appearance.setReason("reason"); appearance.setLocation("location"); appearance.setVisibleSignature(new Rectangle(36, 748, 144, 780), 1, "sig"); // Creating the signature ExternalSignature pks = new PrivateKeySignature(pk, digestAlgorithm, "BC"); ExternalDigest digest = new BouncyCastleDigest(); MakeSignature.signDetached(appearance, digest, pks, chain, null, null, null, 0, subfilter); }
/** * <a href="http://stackoverflow.com/questions/32393858/why-result-of-getpagelabels-is-different-from-the-adobe-acrobat"> * Why result of GetPageLabels is different from the Adobe Acrobat * </a> * <br/> * <a href="https://drive.google.com/file/d/0Bxb0Du7de8igNmVPSUc3VzdPSjg/view?usp=sharing"> * testHuangMeizai.pdf * </a> * <p> * Indeed, the labels are wrong. There is a small bug in {@link PdfPageLabels#getPageLabelFormats(PdfReader)}. * When encountering a new page label dictionary without a P (prefix) entry, it does not reset the current * prefix value. * </p> */ @Test public void testTestHuangMeizai() throws IOException { try ( InputStream resource = getClass().getResourceAsStream("testHuangMeizai.pdf")) { final PdfReader reader = new PdfReader(resource); String[] objLabels = PdfPageLabels.getPageLabels(reader); System.out.println("page number:"); if (objLabels != null) { for (int i = 0; i <= objLabels.length - 1; i++) { System.out.printf("%2d - %s\n", i, objLabels[i]); } } } }
/** * <a href="http://stackoverflow.com/questions/37726215/why-does-my-signature-revision-number-increment-by-2-in-itext-after-detached-s"> * Why does my signature revision number increment by 2 (in itext) after detached signing? * </a> * <br/> * <a href="https://onedrive.live.com/redir?resid=2F03BFDA84B77A41!113&authkey=!ABPGZ7pxuxoE8A0&ithint=file%2Cpdf"> * signedoutput.pdf * </a> * <p> * The issue cannot be reproduced. In particular the PDF contains only a single revision. * </p> */ @Test public void testVerifySignedOutput() throws IOException, GeneralSecurityException { System.out.println("\n\nsignedoutput.pdf\n================"); try ( InputStream resource = getClass().getResourceAsStream("signedoutput.pdf") ) { PdfReader reader = new PdfReader(resource); AcroFields acroFields = reader.getAcroFields(); List<String> names = acroFields.getSignatureNames(); for (String name : names) { System.out.println("Signature name: " + name); System.out.println("Signature covers whole document: " + acroFields.signatureCoversWholeDocument(name)); System.out.println("Document revision: " + acroFields.getRevision(name) + " of " + acroFields.getTotalRevisions()); PdfPKCS7 pk = acroFields.verifySignature(name); System.out.println("Subject: " + CertificateInfo.getSubjectFields(pk.getSigningCertificate())); System.out.println("Document verifies: " + pk.verify()); } } System.out.println(); }
@Test public void signCertify2g() throws IOException, DocumentException, GeneralSecurityException { String filepath = "src/test/resources/mkl/testarea/itext5/signature/2g.pdf"; String digestAlgorithm = "SHA512"; CryptoStandard subfilter = CryptoStandard.CMS; // Creating the reader and the stamper PdfReader reader = new PdfReader(filepath, null, true); FileOutputStream os = new FileOutputStream(new File(RESULT_FOLDER, "2g-certified.pdf")); PdfStamper stamper = PdfStamper.createSignature(reader, os, '\0', RESULT_FOLDER, true); // Creating the appearance PdfSignatureAppearance appearance = stamper.getSignatureAppearance(); appearance.setCertificationLevel(PdfSignatureAppearance.CERTIFIED_NO_CHANGES_ALLOWED); appearance.setReason("reason"); appearance.setLocation("location"); appearance.setVisibleSignature(new Rectangle(36, 748, 144, 780), 1, "sig"); // Creating the signature ExternalSignature pks = new PrivateKeySignature(pk, digestAlgorithm, "BC"); ExternalDigest digest = new BouncyCastleDigest(); MakeSignature.signDetached(appearance, digest, pks, chain, null, null, null, 0, subfilter); }
public void C2_01_SignHelloWorld_sign(String src, String dest, Certificate[] chain, PrivateKey pk, String digestAlgorithm, String provider, CryptoStandard subfilter, String reason, String location) throws GeneralSecurityException, IOException, DocumentException { // Creating the reader and the stamper PdfReader reader = new PdfReader(src); FileOutputStream os = new FileOutputStream(dest); PdfStamper stamper = PdfStamper.createSignature(reader, os, '\0'); // Creating the appearance PdfSignatureAppearance appearance = stamper.getSignatureAppearance(); appearance.setReason(reason); appearance.setLocation(location); appearance.setVisibleSignature(new Rectangle(36, 748, 144, 780), 1, "sig"); // Creating the signature ExternalDigest digest = new BouncyCastleDigest(); ExternalSignature signature = new PrivateKeySignature(pk, digestAlgorithm, provider); MakeSignature.signDetached(appearance, digest, signature, chain, null, null, null, 0, subfilter); }
/** * <a href="https://stackoverflow.com/questions/45662544/itextsharp-library-does-not-extract-text-from-my-file"> * iTextSharp library does not extract text from my file * </a> * <br/> * <a href="https://www.dropbox.com/s/n4ws2fhr72xaa3s/Text%20Extraction%20-%20Colddishes_C.pdf?dl=0"> * Text Extraction - Colddishes_C.pdf * </a> * <p> * The PDF declarations of the Asian fonts allow derival of a * character code to Unicode map only by means of their ROS * values. Thus, itext-asian.jar is required for text extraction. * </p> */ @Test public void testTextExtractionColddishesC() throws IOException, DocumentException { InputStream resourceStream = getClass().getResourceAsStream("Text Extraction - Colddishes_C.pdf"); try { PdfReader reader = new PdfReader(resourceStream); String content = extractAndStoreSimple(reader, new File(RESULT_FOLDER, "Text Extraction - Colddishes_C.%s.txt").toString()); System.out.println("\nText Extraction - Colddishes_C.pdf\n************************"); System.out.println(content); System.out.println("************************"); } finally { if (resourceStream != null) resourceStream.close(); } }
@Test public void testCertifiedSchoolList_9_16_2015() throws IOException { try ( Writer data = new OutputStreamWriter(new FileOutputStream(new File(RESULT_FOLDER, "data.txt")), "UTF-8"); Writer nonData = new OutputStreamWriter(new FileOutputStream(new File(RESULT_FOLDER, "non-data.txt")), "UTF-8"); InputStream resource = getClass().getResourceAsStream("certified-school-list-9-16-2015.pdf") ) { CertifiedSchoolListExtractionStrategy strategy = new CertifiedSchoolListExtractionStrategy(data, nonData); PdfReader reader = new PdfReader(resource); PdfReaderContentParser parser = new PdfReaderContentParser(reader); for (int page = 1; page <= reader.getNumberOfPages(); page++) parser.processContent(page, strategy); // parser.processContent(28, strategy); strategy.close(); } }
/** * <a href="https://stackoverflow.com/questions/46730760/no-fields-were-printed-on-console-after-verifying-if-form-is-using-acroform-or-x"> * No fields were printed on console after verifying if form is using Acroform or XFA technology? * </a> * <br/> * <a href="http://blogs.adobe.com/formfeed/files/formfeed/Samples/multiview.pdf"> * multiview.pdf * </a> * from * <a href="http://blogs.adobe.com/formfeed/2011/02/multiple-top-level-subforms.html"> * Multiple Top Level Subforms * </a> * <p> * The OP's observation can be reproduced using this sample PDF. * </p> */ @Test public void testReadFieldsFromMultiview() throws IOException { try ( InputStream resource = getClass().getResourceAsStream("multiview.pdf") ) { PdfReader reader = new PdfReader(resource); AcroFields form = reader.getAcroFields(); XfaForm xfa = form.getXfa(); System.out.println(xfa.isXfaPresent() ? "XFA form" : "AcroForm"); Set<String> fields = form.getFields().keySet(); for (String key : fields) { System.out.println(key); } System.out.flush(); System.out.close(); reader.close(); } }
/** * <a href="http://stackoverflow.com/questions/35344982/itext-extracted-text-from-pdf-file-using-locationtextextractionstrategy-is-in-w"> * iText: Extracted text from pdf file using LocationTextExtractionStrategy is in wrong order * </a> * <br/> * <a href="https://www.dropbox.com/s/kl2s6038u51gx2q/location_text_extraction_test.pdf?dl=0"> * location_text_extraction_test.pdf * </a> * <p> * Indeed, the {@link LocationTextExtractionStrategy} returns the headers in the wrong order. * This is due to slightly different y coordinates of them. * </p> * <p> * The {@link HorizontalTextExtractionStrategy2} returns the headers and actually the whole table * correctly. Unfortunately it fails where there are overlapping lines in side-by-side columns, * in this case e.g. for the invoice recipient address. * </p> */ @Test public void testLocation_text_extraction_test() throws Exception { InputStream resourceStream = getClass().getResourceAsStream("location_text_extraction_test.pdf"); try { PdfReader reader = new PdfReader(resourceStream); String content = extractAndStore(reader, new File(RESULT_FOLDER, "location_text_extraction_test.%s.txt").toString()); String horizontalContent = extractAndStore(reader, new File(RESULT_FOLDER, "location_text_extraction_test.%s.txt").toString(), HorizontalTextExtractionStrategy2.class); System.out.println("\nText (location strategy) location_text_extraction_test.pdf \n************************"); System.out.println(content); System.out.println("\nText (horizontal strategy) location_text_extraction_test.pdf \n************************"); System.out.println(horizontalContent); System.out.println("************************"); } finally { if (resourceStream != null) resourceStream.close(); } }
static byte[] createRotatedIndirectTextPdf() throws DocumentException, IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Document document = new Document(); PdfWriter writer = PdfWriter.getInstance(document, baos); document.open(); PdfReader reader = new PdfReader(createSimpleTextPdf()); PdfImportedPage template = writer.getImportedPage(reader, 1); Rectangle pageSize = reader.getPageSize(1); writer.getDirectContent().addTemplate(template, .7f, .7f, -.7f, .7f, 400, -200); document.newPage(); writer.getDirectContent().addTemplate(template, pageSize.getLeft(), pageSize.getBottom()); document.close(); return baos.toByteArray(); }
/** * <a href="http://stackoverflow.com/questions/43205385/trying-to-draw-an-ellipse-annotation-and-the-border-on-the-edges-goes-thin-and-t"> * Trying to draw an ellipse annotation and the border on the edges goes thin and thik when i try to roatate pdf itext5 * </a> * <p> * This test creates an ellipse annotation without appearance on a page with rotation. * The ellipse form looks ok but it is moved to the right of the actual appearance rectangle when viewed in Adobe Reader. * This is caused by iText creating a non-standard rectangle, the lower left not being the lower left etc. * </p> * @see #testCreateEllipse() * @see #testCreateEllipseAppearance() * @see #testCreateEllipseAppearanceOnRotated() * @see #testCreateCorrectEllipseAppearanceOnRotated() */ @Test public void testCreateEllipseOnRotated() throws IOException, DocumentException { try ( InputStream resourceStream = getClass().getResourceAsStream("/mkl/testarea/itext5/merge/testA4.pdf"); OutputStream outputStream = new FileOutputStream(new File(RESULT_FOLDER, "testA4-rotated-ellipse.pdf")) ) { PdfReader reader = new PdfReader(resourceStream); reader.getPageN(1).put(PdfName.ROTATE, new PdfNumber(90)); PdfStamper stamper = new PdfStamper(reader, outputStream); Rectangle rect = new Rectangle(202 + 6f, 300, 200 + 100, 300 + 150); PdfAnnotation annotation = PdfAnnotation.createSquareCircle(stamper.getWriter(), rect, null, false); annotation.setFlags(PdfAnnotation.FLAGS_PRINT); annotation.setColor(BaseColor.RED); annotation.setBorderStyle(new PdfBorderDictionary(3.5f, PdfBorderDictionary.STYLE_SOLID)); stamper.addAnnotation(annotation, 1); stamper.close(); reader.close(); } }
/** * <a href="http://itext.2136553.n4.nabble.com/iText-help-resources-tt4660980.html"> * [iText-questions] iText help resources? * </a> * <br/> * <a href="http://itext.2136553.n4.nabble.com/attachment/4660980/0/testin.pdf"> * testin.pdf * </a> * <p> * Indeed, the tables cannot be extracted. Further analysis shows that the text * in the tables uses type 3 fonts with an ad-hoc encoding missing any mapping * to Unicode. * </p> */ @Test public void testTestin() throws IOException, DocumentException { InputStream resourceStream = getClass().getResourceAsStream("testin.pdf"); try { PdfReader reader = new PdfReader(resourceStream); String content = extractAndStoreSimple(reader, new File(RESULT_FOLDER, "testin.%s.txt").toString()); System.out.println("\nText testin.pdf\n************************"); System.out.println(content); System.out.println("************************"); } finally { if (resourceStream != null) resourceStream.close(); } }
/** * <a href="https://stackoverflow.com/questions/46466747/how-to-split-a-pdf-page-in-java"> * How to split a PDF page in java? * </a> * <p> * This test shows how to split the pages of a document into tiles of A6 * size using the {@link Abstract2DPdfPageSplittingTool}. * </p> */ @Test public void testSplitDocumentA6() throws IOException, DocumentException { try (InputStream resource = getClass().getResourceAsStream("document.pdf"); OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "document-A6.pdf"))) { Abstract2DPdfPageSplittingTool tool = new Abstract2DPdfPageSplittingTool() { @Override protected Iterable<Rectangle> determineSplitRectangles(PdfReader reader, int page) { Rectangle targetSize = PageSize.A6; List<Rectangle> rectangles = new ArrayList<>(); Rectangle pageSize = reader.getPageSize(page); for (float y = pageSize.getTop(); y > pageSize.getBottom() + 5; y-=targetSize.getHeight()) { for (float x = pageSize.getLeft(); x < pageSize.getRight() - 5; x+=targetSize.getWidth()) { rectangles.add(new Rectangle(x, y - targetSize.getHeight(), x + targetSize.getWidth(), y)); } } return rectangles; } }; tool.split(result, new PdfReader(resource)); } }
/** * <a href="http://stackoverflow.com/questions/43870545/filling-a-pdf-with-itextsharp-and-then-hiding-the-base-layer"> * Filling a PDF with iTextsharp and then hiding the base layer * </a> * <p> * This test shows how to cover all content using a white rectangle. * </p> */ @Test public void testHideContenUnderRectangle() throws IOException, DocumentException { try ( InputStream resource = getClass().getResourceAsStream("document.pdf"); OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "document-hiddenContent.pdf"))) { PdfReader pdfReader = new PdfReader(resource); PdfStamper pdfStamper = new PdfStamper(pdfReader, result); for (int page = 1; page <= pdfReader.getNumberOfPages(); page++) { Rectangle pageSize = pdfReader.getPageSize(page); PdfContentByte canvas = pdfStamper.getOverContent(page); canvas.setColorFill(BaseColor.WHITE); canvas.rectangle(pageSize.getLeft(), pageSize.getBottom(), pageSize.getWidth(), pageSize.getHeight()); canvas.fill(); } pdfStamper.close(); } }
String extractAndStoreRemapped(PdfReader reader, String format) throws IOException, NoSuchFieldException, SecurityException { StringBuilder builder = new StringBuilder(); for (int page = 1; page <= reader.getNumberOfPages(); page++) { String pageText = extractRemapped(reader, page); Files.write(Paths.get(String.format(format, page)), pageText.getBytes("UTF8")); if (page > 1) builder.append("\n\n"); builder.append(pageText); } return builder.toString(); }
static Map<Integer, File> retrieveFolders(PdfReader reader, File baseDir) throws DocumentException { Map<Integer, File> result = new HashMap<Integer, File>(); PdfDictionary root = reader.getCatalog(); PdfDictionary collection = root.getAsDict(PdfName.COLLECTION); if (collection == null) throw new DocumentException("Document has no Collection dictionary"); PdfDictionary folders = collection.getAsDict(FOLDERS); if (folders == null) throw new DocumentException("Document collection has no folders dictionary"); collectFolders(result, folders, baseDir); return result; }
/** * <a href="http://stackoverflow.com/questions/35374110/how-do-i-use-itext-to-have-a-landscaped-pdf-on-half-of-a-a4-back-to-portrait-and"> * How do i use iText to have a landscaped PDF on half of a A4 back to portrait and full size on A4 * </a> * <p> * This sample shows how to rotate and enlarge the upper half of an A4 page to fit into a new A4 page. * </p> */ @Test public void testRotateAndZoomUpperHalfPage() throws IOException, DocumentException { try ( InputStream resource = getClass().getResourceAsStream("/mkl/testarea/itext5/extract/test.pdf"); OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "test-upperHalf.pdf")) ) { PdfReader reader = new PdfReader(resource); Document document = new Document(PageSize.A4); PdfWriter writer = PdfWriter.getInstance(document, result); document.open(); double sqrt2 = Math.sqrt(2); Rectangle pageSize = reader.getPageSize(1); PdfImportedPage importedPage = writer.getImportedPage(reader, 1); writer.getDirectContent().addTemplate(importedPage, 0, sqrt2, -sqrt2, 0, pageSize.getTop() * sqrt2, -pageSize.getLeft() * sqrt2); document.close(); } }
/** * <a href="https://stackoverflow.com/questions/45027712/invalid-signature-when-signing-an-existing-sigrature-field-with-cosign-sapi"> * Invalid signature when signing an existing sigrature field with CoSign SAPI * </a> * <br/> * <a href="https://www.dropbox.com/s/j6eme53lleaok13/test_signed.pdf?dl=0"> * test_signed-1.pdf * </a> * <p> * Validation shows verification success while both Adobe and SD DSS fail. * Embedded certificates have issues (emailAddress RDN is typed PrintableString * which is wrong - specified is IA5String - and does not even make sense as * there is no '@' in PrintableString), but does this explain it? * </p> */ @Test public void testVerifyTestSigned1() throws IOException, GeneralSecurityException { System.out.println("\n\ntest_signed-1.pdf\n==================="); try ( InputStream resource = getClass().getResourceAsStream("test_signed-1.pdf") ) { PdfReader reader = new PdfReader(resource); AcroFields acroFields = reader.getAcroFields(); List<String> names = acroFields.getSignatureNames(); for (String name : names) { System.out.println("Signature name: " + name); System.out.println("Signature covers whole document: " + acroFields.signatureCoversWholeDocument(name)); System.out.println("Document revision: " + acroFields.getRevision(name) + " of " + acroFields.getTotalRevisions()); PdfPKCS7 pk = acroFields.verifySignature(name); System.out.println("Subject: " + CertificateInfo.getSubjectFields(pk.getSigningCertificate())); System.out.println("Document verifies: " + pk.verify()); } } System.out.println(); }
@Test public void testRemoveTransparentGraphicsTransparency() throws IOException, DocumentException { try ( InputStream resource = getClass().getResourceAsStream("transparency.pdf"); OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "transparency-noTransparency.pdf"))) { PdfReader pdfReader = new PdfReader(resource); PdfStamper pdfStamper = new PdfStamper(pdfReader, result); PdfContentStreamEditor editor = new TransparentGraphicsRemover(); for (int i = 1; i <= pdfReader.getNumberOfPages(); i++) { editor.editPage(pdfStamper, i); } pdfStamper.close(); } }
/** * These two methods ({@link #extractAttachments(PdfReader, String)} and * {@link #extractAttachment(PdfReader, File, PdfString, PdfDictionary)}) * essentially are the OP's original code posted in his question. They * extract files without the folder structure. */ public static void extractAttachments(PdfReader reader, String dir) throws IOException { File folder = new File(dir); folder.mkdirs(); PdfDictionary root = reader.getCatalog(); PdfDictionary names = root.getAsDict(PdfName.NAMES); System.out.println("" + names.getKeys().toString()); PdfDictionary embedded = names.getAsDict(PdfName.EMBEDDEDFILES); System.out.println("" + embedded.toString()); PdfArray filespecs = embedded.getAsArray(PdfName.NAMES); //System.out.println(filespecs.getAsString(root1)); for (int i = 0; i < filespecs.size();) { extractAttachment(reader, folder, filespecs.getAsString(i++), filespecs.getAsDict(i++)); } }
/** * <a href="https://stackoverflow.com/questions/46346144/digital-signature-verification-with-itext-not-working"> * Digital Signature Verification with itext not working * </a> * <br/> * <a href="https://drive.google.com/open?id=0B1XKjvoeoyPZWnk5bzc5T3VSQUk"> * test_dsp.pdf * </a> * <p> * The issue is that the signature uses ECDSA and iText 5 does not (yet) * support ECDSA. "Support" here actually means that iText cannot find * the name ECDSA for the OID 1.2.840.10045.4.3.2 (SHA256withECDSA) to * build a proper algorithm name to use for verification. * </p> * <p> * Adding a mapping "1.2.840.10045.4.3.2" to "ECDSA" resolves the issue. * </p> * @see #testVerify20180115an_signed_original() */ @Test public void testVerifyTestDsp() throws IOException, GeneralSecurityException, NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException { Field algorithmNamesField = EncryptionAlgorithms.class.getDeclaredField("algorithmNames"); algorithmNamesField.setAccessible(true); @SuppressWarnings("unchecked") HashMap<String, String> algorithmNames = (HashMap<String, String>) algorithmNamesField.get(null); algorithmNames.put("1.2.840.10045.4.3.2", "ECDSA"); System.out.println("\n\ntest_dsp.pdf\n==================="); try ( InputStream resource = getClass().getResourceAsStream("test_dsp.pdf") ) { PdfReader reader = new PdfReader(resource); AcroFields acroFields = reader.getAcroFields(); List<String> names = acroFields.getSignatureNames(); for (String name : names) { System.out.println("Signature name: " + name); System.out.println("Signature covers whole document: " + acroFields.signatureCoversWholeDocument(name)); System.out.println("Document revision: " + acroFields.getRevision(name) + " of " + acroFields.getTotalRevisions()); PdfPKCS7 pk = acroFields.verifySignature(name); System.out.println("Subject: " + CertificateInfo.getSubjectFields(pk.getSigningCertificate())); System.out.println("Document verifies: " + pk.verify()); } } System.out.println(); }
/** * <a href="http://stackoverflow.com/questions/39932311/itext-java-not-parsing-text-properly-from-pdf"> * iText java not parsing text properly from PDF * </a> * <br/> * <a href="https://www.dropbox.com/s/vc9it3c7856ejli/testPDF.pdf?dl=0"> * testPDF.pdf * </a> * <p> * Indeed, the 1.2 is located minutely below the SUBMITTALS. The * {@link HorizontalTextExtractionStrategy2} can be used to fix this. * </p> */ @Test public void testTestPDF() throws Exception { InputStream resourceStream = getClass().getResourceAsStream("testPDF.pdf"); try { PdfReader reader = new PdfReader(resourceStream); String content = extractAndStore(reader, new File(RESULT_FOLDER, "testPDF.%s.txt").toString()); String horizontalContent = extractAndStore(reader, new File(RESULT_FOLDER, "testPDF.HOR.%s.txt").toString(), HorizontalTextExtractionStrategy2.class); System.out.println("\nText (location strategy) testPDF.pdf \n************************"); System.out.println(content); System.out.println("\nText (horizontal strategy) testPDF.pdf \n************************"); System.out.println(horizontalContent); System.out.println("************************"); } finally { if (resourceStream != null) resourceStream.close(); } }
/** * <a href="http://stackoverflow.com/questions/28991291/how-to-remove-whitespace-on-merge"> * How To Remove Whitespace on Merge * </a> * <p> * Testing {@link PdfVeryDenseMergeTool} using the OP's files. * </p> */ @Test public void testMergeGrandizerFiles() throws DocumentException, IOException { try ( InputStream docA = getClass().getResourceAsStream("Header.pdf"); InputStream docB = getClass().getResourceAsStream("Body.pdf"); InputStream docC = getClass().getResourceAsStream("Footer.pdf"); ) { PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(PageSize.A4, 18, 18, 5); PdfReader readerA = new PdfReader(docA); PdfReader readerB = new PdfReader(docB); PdfReader readerC = new PdfReader(docC); try (FileOutputStream fos = new FileOutputStream(new File(RESULT_FOLDER, "GrandizerMerge-veryDense.pdf"))) { List<PdfReader> inputs = Arrays.asList(readerA, readerB, readerC); tool.merge(fos, inputs); } finally { readerA.close(); readerB.close(); readerC.close(); } } }
/** * <a href="http://stackoverflow.com/questions/28991291/how-to-remove-whitespace-on-merge"> * How To Remove Whitespace on Merge * </a> * <p> * Testing {@link PdfVeryDenseMergeTool} using the OP's files and a gap of 10. This was the * OP's gap value of choice resulting in lost lines. Cannot reproduce... * </p> */ @Test public void testMergeGrandizerFilesGap10() throws DocumentException, IOException { try ( InputStream docA = getClass().getResourceAsStream("Header.pdf"); InputStream docB = getClass().getResourceAsStream("Body.pdf"); InputStream docC = getClass().getResourceAsStream("Footer.pdf"); ) { PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(PageSize.A4, 18, 18, 10); PdfReader readerA = new PdfReader(docA); PdfReader readerB = new PdfReader(docB); PdfReader readerC = new PdfReader(docC); try (FileOutputStream fos = new FileOutputStream(new File(RESULT_FOLDER, "GrandizerMerge-veryDense-gap10.pdf"))) { List<PdfReader> inputs = Arrays.asList(readerA, readerB, readerC); tool.merge(fos, inputs); } finally { readerA.close(); readerB.close(); readerC.close(); } } }