Click here to Skip to main content
15,910,981 members
Please Sign up or sign in to vote.
1.00/5 (1 vote)
See more:
C#
public  string GetPDFText(String pdfPath)
    {
        PdfReader reader = new PdfReader(pdfPath);

        StringWriter output = new StringWriter();
        String _text = String.Empty;
        int _subpage = 0;
        Int16 PerPageText = 2000;//char
        Int32 PageNumber = 1;

        for (int i = 1; i <= reader.NumberOfPages; i++)
        {
            _text = _text+PdfTextExtractor.GetTextFromPage(reader, i, new SimpleTextExtractionStrategy());
            _subpage = (_text.Length - _text.Length % PerPageText) / PerPageText;

            if (_subpage > 0)
            {
                for (int j = 0; j < _subpage; j++)
                {

                    output.WriteLine("Page " + PageNumber.ToString() + "<br />" + _text.Substring(PerPageText * j, PerPageText) + "<br /><br />");
                    PageNumber = PageNumber+1;
                }
                _text = _text.Substring(_text.Length - _text.Length % PerPageText, _text.Length % PerPageText);
            }
            //else {
            //    output.WriteLine("Page " + i.ToString() + "<br />" + _text+ "<br /><br />");
            //}
            
        }

        return output.ToString();
    }
Posted
Updated 5-Jun-13 2:28am
v4
Comments
Joezer BH 5-Jun-13 8:27am    
What are you using in this code?
What are the problems you are having?
Maciej Los 5-Jun-13 8:57am    
Is it a question?

1 solution

Checkout

PFDSharp - http://pdfsharp.codeplex.com/[^]
and
PDFLib - http://pdflib.codeplex.com/[^]

Cheers,
Edo
 
Share this answer
 

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900