0

I have this method to read from a .dbf file:

public DataTable ReadBulkDBF(string dbfFile, Dictionary<string, string> columnKeys, int maxRows, string dynamicValue, int nextId)
{
    long start = DateTime.Now.Ticks;
    DataTable dt = new DataTable();
    BinaryReader recReader;
    string number;
    string year;
    string month;
    string day;
    long lDate;
    long lTime;
    DataRow row;
    int fieldIndex;
    bool foundLastColumn = false;
    List<string> keys = new List<string>(columnKeys.Keys);
    List<string> values = new List<string>(columnKeys.Values);

    // For testing purposes
    int rowCount = 0;
    // If there isn't even a file, just return an empty DataTable
    if ((!File.Exists(dbfFile)))
    {
        return dt;
    }

    BinaryReader br = null;
    try
    {
        // Will allow shared open as long as the other application using it allows it too.
        // Read the header into a buffer
        br = new BinaryReader(File.Open(dbfFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite));
        byte[] buffer = br.ReadBytes(Marshal.SizeOf(typeof(DBFHeader)));

        // Marshall the header into a DBFHeader structure
        GCHandle handle = GCHandle.Alloc(buffer, GCHandleType.Pinned);
        DBFHeader header = (DBFHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(DBFHeader));
        handle.Free();

        // Read in all the field descriptors. Per the spec, 13 (0D) marks the end of the field descriptors
        ArrayList fields = new ArrayList();
        while ((13 != br.PeekChar()))
        {
            buffer = br.ReadBytes(Marshal.SizeOf(typeof(FieldDescriptor)));
            handle = GCHandle.Alloc(buffer, GCHandleType.Pinned);
            fields.Add((FieldDescriptor)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(FieldDescriptor)));
            handle.Free();
        }

        // Read in the first row of records, we need this to help determine column types below
        ((FileStream)br.BaseStream).Seek(header.headerLen + 1, SeekOrigin.Begin);
        buffer = br.ReadBytes(header.recordLen);
        recReader = new BinaryReader(new MemoryStream(buffer));

        // Create the columns in our new DataTable
        DataColumn col = null;

        dt.Columns.Add(new DataColumn("updateId", typeof(int)));
        if (!dbfFile.Contains("con_compania")) { dt.Columns.Add(new DataColumn("dynamic", typeof(string))); }
        dt.Columns.Add(new DataColumn("fechasync", typeof(DateTime)));

        foreach (FieldDescriptor field in fields)
        {
            // Adds columns to DataTable dt
        }

        // Skip past the end of the header. 
        ((FileStream)br.BaseStream).Seek(header.headerLen, SeekOrigin.Begin);

        // Read in all the records
        for (int counter = 0; counter < header.numRecords && dt.Rows.Count < maxRows; counter++)
        {
            // First we'll read the entire record into a buffer and then read each field from the buffer
            // This helps account for any extra space at the end of each record and probably performs better
            buffer = br.ReadBytes(header.recordLen);
            recReader = new BinaryReader(new MemoryStream(buffer));

            // All dbf field records begin with a deleted flag field. Deleted - 0x2A (asterisk) else 0x20 (space)
            if (recReader.ReadChar() == '*')
            {
                continue;
            }

            // Loop through each field in a record
            fieldIndex = 2;


            rowCount = dt.Rows.Count;
            row = dt.NewRow();

    foreach (FieldDescriptor field in fields)
            {
                switch (field.fieldType)
                {
                    // Casts field's value according to its type and saves it in the dt.

                }
                fieldIndex++;
            }
            // Looks for key-value combination in every row until 
            // it finds it to know where to start reading the new rows.
            if (!foundLastColumn && columnKeys.Keys.Count > 0)
            {
                foundLastColumn = true;

                int i = 3;

                if (dbfFile.Contains("con_compania")) { i = 2; }

                for (; i < keys.Count && foundLastColumn; i++)
                {
                    if (!row[keys[i]].ToString().Equals(values[i]))
                    {
                        foundLastColumn = false;
                    }
                }
            }
            else
            {
                dt.Rows.Add(row);
                nextId++;
            }

        }
    }
    catch (Exception e)
    {
        throw e;
    }
    finally
    {
        if (null != br)
        {
            br.Close();
            br.Dispose();
        }
    }

    long count = DateTime.Now.Ticks - start;

    return dt;
}

The problem is somewhere I am leaving some kind of reference to this, so I'm getting OOM.

The method is called with something like:

DataTable dt = new ParseDBF().ReadBulkDBF(...);
//Use dt
dt.Dispose();
dt = null;

If I only call Dispose() it keeps the reference and if I call null dt becomes null, but the reference to the ParseDBF object is still there somewhere.

Any idea where the leak might be? I have looked all over the internet for ideas and tried calling Dispose() and Close(), and setting as null everything I can think of after I use it and it keeps happening.

Steven V
  • 15,061
  • 3
  • 56
  • 73
DidierFuentes
  • 87
  • 1
  • 16
  • _recReader = new BinaryReader(new MemoryStream(buffer));_ Where is the dispose? – TaW Mar 03 '15 at 20:12
  • @TaW Added it, same result. – DidierFuentes Mar 03 '15 at 20:16
  • Don't catch an exception and rethrow it `throw e;` instead "release" it `throw;`, but instead of that try-catch-finally just use a `using` that will handle calling `Dispose`. Also you don't need to call `Close` and `Dispose`, as they will ultimately do the same thing. – juharr Mar 03 '15 at 20:33
  • @juharr Thanks for the using suggestion, might try it if this works. About the exception, I'm doing something with it where the method is called so I need to send it. And finally I used `Close` and `Dispose` because I was trying to see if that changed anything. – DidierFuentes Mar 03 '15 at 20:38
  • `OutOfMemoryException` does not necessarily mean you have a memory leak. It just means you are running out of memory. How large is the DBF file that you are loading? – juharr Mar 03 '15 at 20:39
  • FYI, close and dispose free resources like file handles, they don't free memory. How big is the file you are trying to read? – Steve Wellens Mar 03 '15 at 20:40
  • @SteveWellens Isn't the Garbage Collector supposed to eventually get the memory after you call Dispose? – DidierFuentes Mar 03 '15 at 20:43
  • @Didier - The Garbage Collector will eventually free the memory whether or not you call dispose or release. How big is the file you are trying to read? – Steve Wellens Mar 03 '15 at 20:44
  • @juharr The problem is not with actually reading the file because if I limit the number of rows that it can write into the `DataTable` it runs the method a few times before I get the exception. – DidierFuentes Mar 03 '15 at 20:44
  • 1
    My recommendation is to use a memory profiler to help you determine where your memory is being used and if any of it can be freed. – juharr Mar 03 '15 at 20:50
  • @SteveWellens The file is 683MB, but if that was the problem, wouldn't it crash the first time? – DidierFuentes Mar 03 '15 at 21:11
  • @DidierFuentes - It wouldn't crash the first time but the DataTable you are putting it into could get too large. Plus, since you are pining and unpinning memory rapidly, it's **possible** the Garbage collector can't keep up and the memory is getting fragmented. I would try the method here: http://stackoverflow.com/questions/22361457/c-sharp-read-from-dbf-files-into-a-datatable – Steve Wellens Mar 03 '15 at 21:17
  • @SteveWellens There's a problem with this method, I need to insert some extra columns into the table to work with it and it doesn't look like I can. – DidierFuentes Mar 03 '15 at 21:51
  • It looks like your datatable `dt` just fills up until you pop (run out of memory). Is this what your memory profiler shows? I would expect the answer is yes. – StarPilot Mar 03 '15 at 22:43
  • @StarPilot I'm not 100% sure I understand the memory profiler, but the answer would anyway be no. I first limited the number of rows in `dt` to 500,000 and it read twice before the exception, changed it to 10,000 and read twice again and threw the exception. – DidierFuentes Mar 04 '15 at 02:29

1 Answers1

1

I notice that recreader may not be getting freed.

I would strongly suggest making use of using blocks within this code to ensure that IDisposable objects are cleaned when execution leaves the using scope.

RobStone
  • 187
  • 7
  • Added recReader.Dispose() to the method before returning, same result. – DidierFuentes Mar 03 '15 at 20:15
  • Where did you add it? It is created in the loop, so either dispose it there or move the creation out of the loop! – TaW Mar 03 '15 at 20:18
  • 1
    @TaW I originally added it right before the return statement of the method, but then I moved it to the end of the try, and then to the end of the loop. It's actually declared outside the loops and given value in every loop. Anyway, same result, OOM Exception. – DidierFuentes Mar 03 '15 at 20:35
  • You have a continue in there which might seem to skip it that disposal after the "*" check – RobStone Mar 03 '15 at 20:39
  • @RobStone Question, should I use the `using` inside or instead of the `try`? – DidierFuentes Mar 03 '15 at 20:41
  • 1
    Disposing and exception handling can be separate issues with using(){}. You probably won't need the try anymore as you aren't currently doing anything but disposal if an exception occurs. You should still use using blocks however. – RobStone Mar 03 '15 at 20:44
  • @DidierFuentes: See [this answer](http://stackoverflow.com/a/278924/1364007) to **using statement vs try finally**. I'd always use [`using`](http://msdn.microsoft.com/en-us/library/yh598w02) for [`IDispoable`](https://msdn.microsoft.com/en-us/library/system.idisposable.aspx) objects when I'm the owner (i.e. its use is entirely within my scope). – Wai Ha Lee Mar 03 '15 at 21:16
  • @RobStone Tried with a using statement for every IDisposable. Still OOM Exception. – DidierFuentes Mar 03 '15 at 21:52