0

I have created a console application in Visual Studio and I am trying to validate a PDF using coded UI(C#) feature provided in microsoft visual studio by extracting all the contents of PDF into a text file using IKVM and PDFBox. While doing so I am getting Null reference exception at "return stripper.getText(doc)" line. Can anybody help me?

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using org.pdfbox.pdmodel;
using org.pdfbox.util;
using System.IO;
using System.Windows.Forms;
using System.Collections;
using java.io;
using iTextSharp.text;
using iTextSharp.text.pdf;

namespace ConsoleApplication1
{
    class program
    {
        static void Main(string[] args)
        {

            string path = @"C:\Trishna Chopade\Packages\";
            string fileIn = path + "JAVA.pdf";
            string fileOut = path + "Trish.txt";
            FileStream MyFileStream;
            try
            {
                // Get file encoding  
                MyFileStream = new FileStream(@"C:\Trishna Chopade\Packages\Trish.txt", FileMode.Open);
                System.Text.Encoding fileInEnc = GetFileEncoding(fileIn);
                MyFileStream.Close();
                // Read from PDF.  
                ReadFile1(fileIn, fileOut, fileInEnc);
                Console.ReadLine();
            }
            catch (System.IO.IOException ioe)
            {
                Console.WriteLine("error " + ioe);
            }
            Console.ReadLine();
        }

        private static Encoding GetFileEncoding(string fileIn)
        {
            // Read the BOM
            var bom = new byte[4];
            using (var file = new FileStream(fileIn, FileMode.Open, FileAccess.Read))
            {
                file.Read(bom, 0, 4);
            }
            // Analyze the BOM
            if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) return Encoding.UTF7;
            if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) return Encoding.UTF8;
            if (bom[0] == 0xff && bom[1] == 0xfe) return Encoding.Unicode; //UTF-16LE
            if (bom[0] == 0xfe && bom[1] == 0xff) return Encoding.BigEndianUnicode; //UTF-16BE
            if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) return Encoding.UTF32;
            return Encoding.ASCII;
        }

        public static void ReadFile1(string fileIn, string fileOut, System.Text.Encoding fileInEnc)
        {
            using (FileStream fs = new FileStream(fileOut, FileMode.OpenOrCreate, FileAccess.Write))
            {
                using (BinaryWriter bw = new BinaryWriter(fs))//, Encoding.Default))
                {
                    bw.Write(ParseUsingPDFBox(fileIn));
                }
            }
        }

        private static string ParseUsingPDFBox(string input)
        {
            PDDocument doc= PDDocument.load(input);            
            PDFTextStripper stripper = new PDFTextStripper();
            return stripper.getText(doc);            
        }
    }
}
C. Trish
  • 9
  • 1
  • Have a look at [How to handle a NullReferenceException](http://stackoverflow.com/questions/4660142/what-is-a-nullreferenceexception-and-how-do-i-fix-it) – croxy Jan 11 '16 at 10:30
  • You must be an archeologist. That version is almost 10 years old. – Tilman Hausherr Jan 11 '16 at 11:14

0 Answers0