Java 类com.itextpdf.text.pdf.parser.PdfTextExtractor 实例源码

项目:Voting_2b    文件:PdfLetterTest.java   
@Test
public void testWritePlainLetter() {

    try {
        PdfReader reader = new PdfReader("letters/" + voter.getNif() + ".pdf");
        String page = PdfTextExtractor.getTextFromPage(reader, 1);
        String[] lines = page.split("\n");
        String email = lines[1].split(":\t")[1];
        String password = lines[2].split(":\t")[1];
        assertEquals(voter.getEmail(), email);
        assertEquals(voter.getPassword(), password);

    } catch (IOException e) {
        System.err.println("Archivo no encontrado");
    }

}
项目:docserv    文件:IntegrationTest.java   
private String getNonWhiteSpacesFromPDF(byte[] pdfByteArray)
        throws IOException {
    String nonWhiteSpace;

    PdfReader pdfReader = null;
    try {
        pdfReader = new PdfReader(pdfByteArray);
        nonWhiteSpace = StringUtils.deleteWhitespace(PdfTextExtractor
                .getTextFromPage(pdfReader, 1));
        // remove non-break space
        nonWhiteSpace = nonWhiteSpace.replace("\u00A0", "");
           nonWhiteSpace = nonWhiteSpace.replace("\u00AD", "-");
    } catch (IOException e) {
        throw e;
    } finally {
        if (pdfReader != null) {
            pdfReader.close();
        }
    }

    return nonWhiteSpace;
}
项目:satisfy    文件:PDFWords.java   
public void parsePdf(String filename) throws IOException {
    PdfReader reader = new PdfReader(filename);
    LOGGER.trace("Reading file " + filename);
    pdfData = new HashMap<Integer, String>();
    int numberOfPages = reader.getNumberOfPages();
    for (int page = 1; page <= numberOfPages; page++) {
        LOGGER.trace("Reading page " + page);
        String textFromPage = PdfTextExtractor.getTextFromPage(reader, page);
        pdfData.put(page, textFromPage);
    }
}
项目:OpenSongTablet    文件:PopUpPDFToTextFragment.java   
public void getPDFExtractedText() {
    String parsedText="";
    try {
        PdfReader reader = new PdfReader(FullscreenActivity.file.toString());
        int n = reader.getNumberOfPages();
        for (int i = 1; i<=n ; i++) {
            String text = detectAndImproveLine(PdfTextExtractor.getTextFromPage(reader, i));
            parsedText = parsedText + text  +"\n"; //Extracting the content from the different pages
        }
        reader.close();
    } catch (Exception e) {
        Log.d("d","Error extracting text");
    }
    foundText = PopUpEditSongFragment.parseToHTMLEntities(parsedText);
}
项目:testarea-itext5    文件:TextExtraction.java   
String extractSimple(PdfReader reader, int pageNo) throws IOException
{
    return PdfTextExtractor.getTextFromPage(reader, pageNo, new SimpleTextExtractionStrategy()
    {
        boolean empty = true;

        @Override
        public void beginTextBlock()
        {
            if (!empty)
                appendTextChunk("<BLOCK>");
            super.beginTextBlock();
        }

        @Override
        public void endTextBlock()
        {
            if (!empty)
                appendTextChunk("</BLOCK>\n");
            super.endTextBlock();
        }

        @Override
        public String getResultantText()
        {
            if (empty)
                return super.getResultantText();
            else
                return "<BLOCK>" + super.getResultantText();
        }

        @Override
        public void renderText(TextRenderInfo renderInfo)
        {
            empty = false;
            super.renderText(renderInfo);
        }

    });
}
项目:AvoinApotti    文件:Logic.java   
private boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName) 
{
    int year  = new DateTime().getDate().getYear();
    int month = new DateTime().getDate().getMonth();
    int day   = new DateTime().getDate().getDay();

    File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName);
    boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0);

    if (nonNullFileExistAnd)
    {
        long startCheck= System.currentTimeMillis();
        FileInputStream fin = null;
        FileChannel ch = null;
        try 
        {
            fin = new FileInputStream(pdfFile);
            ch = fin.getChannel();

            byte fileContent[] = new byte[(int)pdfFile.length()];
               fin.read(fileContent);

               fin.close();
            ch.close();

            long start = System.currentTimeMillis();
            PdfReader pdfReader = new PdfReader(fileContent);  
            String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1);
            long end = System.currentTimeMillis();                              
            System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds");

            if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue()))
            {
                System.out.println(pdfFile + " first page content:");
                System.out.println(textFromPdfFilePageOne);
            }

            long endCheck = System.currentTimeMillis();                             
            System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds");  

            //If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file
            if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue())
            {
                archiveRtfFile(pdfFileName, rtfFileName);
            }               
        }
        catch (Exception e) 
        {
            e.printStackTrace();
            return false;
        }   

    }

    return nonNullFileExistAnd;
}
项目:testarea-itext5    文件:TextExtraction.java   
String extract(PdfReader reader, int pageNo, TextExtractionStrategy strategy) throws IOException
{
    return PdfTextExtractor.getTextFromPage(reader, pageNo, strategy);
}
项目:testarea-itext5    文件:ExtractSuperAndSubInLine.java   
/**
 * Test with {@link HorizontalTextExtractionStrategy}, works for iText before 5.5.9-SNAPSHOT
 * Commit 53526e4854fcb80c86cbc2e113f7a07401dc9a67 ("Refactor LocationTextExtractionStrategy...").
 */
@SuppressWarnings("deprecation")
String extract(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException
{
    return PdfTextExtractor.getTextFromPage(reader, pageNo, new HorizontalTextExtractionStrategy());
}
项目:testarea-itext5    文件:ExtractSuperAndSubInLine.java   
/**
 * Test with {@link HorizontalTextExtractionStrategy}, works for iText since 5.5.9-SNAPSHOT
 * Commit 1ab350beae148be2a4bef5e663b3d67a004ff9f8 ("Make TextChunkLocation a Comparable<> class...").
 */
String extractV2(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException
{
    return PdfTextExtractor.getTextFromPage(reader, pageNo, new HorizontalTextExtractionStrategy2());
}
项目:testarea-itext5    文件:RemappedExtraction.java   
String extractRemapped(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException
{
    TextExtractionStrategy strategy = new RemappingExtractionFilter(new LocationTextExtractionStrategy());
    return PdfTextExtractor.getTextFromPage(reader, pageNo, strategy);
}
项目:openMAXIMS    文件:Logic.java   
private boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName) 
{
    int year  = new DateTime().getDate().getYear();
    int month = new DateTime().getDate().getMonth();
    int day   = new DateTime().getDate().getDay();

    File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName);
    boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0);

    if (nonNullFileExistAnd)
    {
        long startCheck= System.currentTimeMillis();
        FileInputStream fin = null;
        FileChannel ch = null;
        try 
        {
            fin = new FileInputStream(pdfFile);
            ch = fin.getChannel();

            byte fileContent[] = new byte[(int)pdfFile.length()];
               fin.read(fileContent);

               fin.close();
            ch.close();

            long start = System.currentTimeMillis();
            PdfReader pdfReader = new PdfReader(fileContent);  
            String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1);
            long end = System.currentTimeMillis();                              
            System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds");

            if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue()))
            {
                System.out.println(pdfFile + " first page content:");
                System.out.println(textFromPdfFilePageOne);
            }

            long endCheck = System.currentTimeMillis();                             
            System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds");  

            //If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file
            if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue())
            {
                archiveRtfFile(pdfFileName, rtfFileName);
            }               
        }
        catch (Exception e) 
        {
            e.printStackTrace();
            return false;
        }   

    }

    return nonNullFileExistAnd;
}
项目:openMAXIMS    文件:DocumentHelper.java   
public static boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName) 
{
    int year  = new DateTime().getDate().getYear();
    int month = new DateTime().getDate().getMonth();
    int day   = new DateTime().getDate().getDay();

    File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName);
    boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0);

    if (nonNullFileExistAnd)
    {
        long startCheck= System.currentTimeMillis();
        FileInputStream fin = null;
        FileChannel ch = null;
        try 
        {
            fin = new FileInputStream(pdfFile);
            ch = fin.getChannel();

            byte fileContent[] = new byte[(int)pdfFile.length()];
               fin.read(fileContent);

               fin.close();
            ch.close();

            long start = System.currentTimeMillis();
            PdfReader pdfReader = new PdfReader(fileContent);  
            String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1);
            long end = System.currentTimeMillis();                              
            System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds");

            if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue()))
            {
                System.out.println(pdfFile + " first page content:");
                System.out.println(textFromPdfFilePageOne);
            }

            long endCheck = System.currentTimeMillis();                             
            System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds");  

            //If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file
            if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue())
            {
                archiveRtfFile(pdfFileName, rtfFileName);
            }               
        }
        catch (Exception e) 
        {
            e.printStackTrace();
            return false;
        }     
    }

    return nonNullFileExistAnd;
}
项目:openMAXIMS    文件:Logic.java   
private boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName) 
{
    int year  = new DateTime().getDate().getYear();
    int month = new DateTime().getDate().getMonth();
    int day   = new DateTime().getDate().getDay();

    File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName);
    boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0);

    if (nonNullFileExistAnd)
    {
        long startCheck= System.currentTimeMillis();
        FileInputStream fin = null;
        FileChannel ch = null;
        try 
        {
            fin = new FileInputStream(pdfFile);
            ch = fin.getChannel();

            byte fileContent[] = new byte[(int)pdfFile.length()];
               fin.read(fileContent);

               fin.close();
            ch.close();

            long start = System.currentTimeMillis();
            PdfReader pdfReader = new PdfReader(fileContent);  
            String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1);
            long end = System.currentTimeMillis();                              
            System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds");

            if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue()))
            {
                System.out.println(pdfFile + " first page content:");
                System.out.println(textFromPdfFilePageOne);
            }

            long endCheck = System.currentTimeMillis();                             
            System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds");  

            //If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file
            if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue())
            {
                archiveRtfFile(pdfFileName, rtfFileName);
            }               
        }
        catch (Exception e) 
        {
            e.printStackTrace();
            return false;
        }   

    }

    return nonNullFileExistAnd;
}
项目:openmaxims-linux    文件:Logic.java   
private boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName) 
{
    int year  = new DateTime().getDate().getYear();
    int month = new DateTime().getDate().getMonth();
    int day   = new DateTime().getDate().getDay();

    File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName);
    boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0);

    if (nonNullFileExistAnd)
    {
        long startCheck= System.currentTimeMillis();
        FileInputStream fin = null;
        FileChannel ch = null;
        try 
        {
            fin = new FileInputStream(pdfFile);
            ch = fin.getChannel();

            byte fileContent[] = new byte[(int)pdfFile.length()];
               fin.read(fileContent);

               fin.close();
            ch.close();

            long start = System.currentTimeMillis();
            PdfReader pdfReader = new PdfReader(fileContent);  
            String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1);
            long end = System.currentTimeMillis();                              
            System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds");

            if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue()))
            {
                System.out.println(pdfFile + " first page content:");
                System.out.println(textFromPdfFilePageOne);
            }

            long endCheck = System.currentTimeMillis();                             
            System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds");  

            //If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file
            if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue())
            {
                archiveRtfFile(pdfFileName, rtfFileName);
            }               
        }
        catch (Exception e) 
        {
            e.printStackTrace();
            return false;
        }   

    }

    return nonNullFileExistAnd;
}