@Test public void testWritePlainLetter() { try { PdfReader reader = new PdfReader("letters/" + voter.getNif() + ".pdf"); String page = PdfTextExtractor.getTextFromPage(reader, 1); String[] lines = page.split("\n"); String email = lines[1].split(":\t")[1]; String password = lines[2].split(":\t")[1]; assertEquals(voter.getEmail(), email); assertEquals(voter.getPassword(), password); } catch (IOException e) { System.err.println("Archivo no encontrado"); } }
private String getNonWhiteSpacesFromPDF(byte[] pdfByteArray) throws IOException { String nonWhiteSpace; PdfReader pdfReader = null; try { pdfReader = new PdfReader(pdfByteArray); nonWhiteSpace = StringUtils.deleteWhitespace(PdfTextExtractor .getTextFromPage(pdfReader, 1)); // remove non-break space nonWhiteSpace = nonWhiteSpace.replace("\u00A0", ""); nonWhiteSpace = nonWhiteSpace.replace("\u00AD", "-"); } catch (IOException e) { throw e; } finally { if (pdfReader != null) { pdfReader.close(); } } return nonWhiteSpace; }
public void parsePdf(String filename) throws IOException { PdfReader reader = new PdfReader(filename); LOGGER.trace("Reading file " + filename); pdfData = new HashMap<Integer, String>(); int numberOfPages = reader.getNumberOfPages(); for (int page = 1; page <= numberOfPages; page++) { LOGGER.trace("Reading page " + page); String textFromPage = PdfTextExtractor.getTextFromPage(reader, page); pdfData.put(page, textFromPage); } }
public void getPDFExtractedText() { String parsedText=""; try { PdfReader reader = new PdfReader(FullscreenActivity.file.toString()); int n = reader.getNumberOfPages(); for (int i = 1; i<=n ; i++) { String text = detectAndImproveLine(PdfTextExtractor.getTextFromPage(reader, i)); parsedText = parsedText + text +"\n"; //Extracting the content from the different pages } reader.close(); } catch (Exception e) { Log.d("d","Error extracting text"); } foundText = PopUpEditSongFragment.parseToHTMLEntities(parsedText); }
String extractSimple(PdfReader reader, int pageNo) throws IOException { return PdfTextExtractor.getTextFromPage(reader, pageNo, new SimpleTextExtractionStrategy() { boolean empty = true; @Override public void beginTextBlock() { if (!empty) appendTextChunk("<BLOCK>"); super.beginTextBlock(); } @Override public void endTextBlock() { if (!empty) appendTextChunk("</BLOCK>\n"); super.endTextBlock(); } @Override public String getResultantText() { if (empty) return super.getResultantText(); else return "<BLOCK>" + super.getResultantText(); } @Override public void renderText(TextRenderInfo renderInfo) { empty = false; super.renderText(renderInfo); } }); }
private boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName) { int year = new DateTime().getDate().getYear(); int month = new DateTime().getDate().getMonth(); int day = new DateTime().getDate().getDay(); File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName); boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0); if (nonNullFileExistAnd) { long startCheck= System.currentTimeMillis(); FileInputStream fin = null; FileChannel ch = null; try { fin = new FileInputStream(pdfFile); ch = fin.getChannel(); byte fileContent[] = new byte[(int)pdfFile.length()]; fin.read(fileContent); fin.close(); ch.close(); long start = System.currentTimeMillis(); PdfReader pdfReader = new PdfReader(fileContent); String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1); long end = System.currentTimeMillis(); System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds"); if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue())) { System.out.println(pdfFile + " first page content:"); System.out.println(textFromPdfFilePageOne); } long endCheck = System.currentTimeMillis(); System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds"); //If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue()) { archiveRtfFile(pdfFileName, rtfFileName); } } catch (Exception e) { e.printStackTrace(); return false; } } return nonNullFileExistAnd; }
String extract(PdfReader reader, int pageNo, TextExtractionStrategy strategy) throws IOException { return PdfTextExtractor.getTextFromPage(reader, pageNo, strategy); }
/** * Test with {@link HorizontalTextExtractionStrategy}, works for iText before 5.5.9-SNAPSHOT * Commit 53526e4854fcb80c86cbc2e113f7a07401dc9a67 ("Refactor LocationTextExtractionStrategy..."). */ @SuppressWarnings("deprecation") String extract(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException { return PdfTextExtractor.getTextFromPage(reader, pageNo, new HorizontalTextExtractionStrategy()); }
/** * Test with {@link HorizontalTextExtractionStrategy}, works for iText since 5.5.9-SNAPSHOT * Commit 1ab350beae148be2a4bef5e663b3d67a004ff9f8 ("Make TextChunkLocation a Comparable<> class..."). */ String extractV2(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException { return PdfTextExtractor.getTextFromPage(reader, pageNo, new HorizontalTextExtractionStrategy2()); }
String extractRemapped(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException { TextExtractionStrategy strategy = new RemappingExtractionFilter(new LocationTextExtractionStrategy()); return PdfTextExtractor.getTextFromPage(reader, pageNo, strategy); }
public static boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName) { int year = new DateTime().getDate().getYear(); int month = new DateTime().getDate().getMonth(); int day = new DateTime().getDate().getDay(); File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName); boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0); if (nonNullFileExistAnd) { long startCheck= System.currentTimeMillis(); FileInputStream fin = null; FileChannel ch = null; try { fin = new FileInputStream(pdfFile); ch = fin.getChannel(); byte fileContent[] = new byte[(int)pdfFile.length()]; fin.read(fileContent); fin.close(); ch.close(); long start = System.currentTimeMillis(); PdfReader pdfReader = new PdfReader(fileContent); String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1); long end = System.currentTimeMillis(); System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds"); if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue())) { System.out.println(pdfFile + " first page content:"); System.out.println(textFromPdfFilePageOne); } long endCheck = System.currentTimeMillis(); System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds"); //If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue()) { archiveRtfFile(pdfFileName, rtfFileName); } } catch (Exception e) { e.printStackTrace(); return false; } } return nonNullFileExistAnd; }