What exactly is this method of contiguous C memory allocation doing under the hood?

Question

I came across this question while looking for an effective way to allocate large multi-dimensional arrays contiguously in memory. The accepted answer suggests that for a 3D array of size sz[0] x sz[1] x sz[2] one should use this method, which is currently melting my feeble brain:

int (*a)[sz[1]][sz[2]] = calloc(sz[0], sizeof(*a));
...
free(a)

the left hand of that statement looks like a 2D array of int * allocated on the stack. The right side is a single (?!) call to calloc() that allocates int * on the heap. Since sizeof(*a)==sizeof(int *) (right?) this looks like too few allocations to make any sense, since it appears to allocate sz[0]x int * bytes, and yet it works to index over the full intended size of the array.

Can someone please help me understand how exactly this definition works to produce the intended result? Is the C compiler repeating the call to calloc for every entry in the table defined on the left? And if so, how does a single call to free() suffice to get rid of it? Does the resulting array reside entirely on the heap, or is it mixing a reference table on the stack that points to memory allocated on the heap?

`sizeof (*a) == sizeof (int [sz1][sz2])` \*a has "VLA" type array of sz1 arrays of sz2 ints — pmg, Aug 01 '19 at 15:13
Ah! Not enough coffee. Starting to make more sense now, I will continue to think about it. Thanks — KBriggs, Aug 01 '19 at 15:16
The quoted solution relies on VLAs being available. VLAs did not exist before C99, may not exists with and since C11 and do not exist with C++ at all. — alk, Aug 02 '19 at 16:15

score 2 · Accepted Answer · answered Aug 03 '19 at 06:19

Here is some code with a similar principle that maybe is easier to understand at first:

typedef int THING[5][6];    // THING means a contiguous array of 5x6 ints

THING arr[4];               // arr is a contiguous array of 4 THINGs
THING *first = &arr[0];     // The expression *first would yield the first thing.

Hopefully you recognize the last two lines here as being common syntax for non-dynamic allocation of any array, and referring to the array's first element. That works just the same whether or not THING is itself an array.

Now, &arr[0] points to a memory location that is the start of a contiguous block of ints of size 4x5x6. if you use dynamic allocation to make that block it looks like:

THING *first = malloc( sizeof(int[4][5][6]) );

If we expand out the typedef in this last line it looks like:

int (*first)[5][6] = malloc( sizeof(int[4][5][6]) );

The code in your question is the same as this last line , except that:

it uses variables instead of hardcoded integers (which is allowed since C99).
it uses calloc instead of malloc.
it uses a more robust syntax for calculating the size to allocate, see here for explanation.

This is the explanation I was looking for, thanks. Could you please elaborate on the alignment issue you mention in your comment elsewhere? — KBriggs, Aug 04 '19 at 16:02

alk · Answer 2 · 2019-08-03T06:48:35.520

To not rely on VLAs, but still use one continuous region of memory you could use this approach:

int *** int_array_3d_allocate(size_t x, size_t y, size_t z)
{
  int *** result;

  size_t n = x;
  size_t s = n * sizeof *result; /* x vector of pointer to int** */

  n *= y;
  s += n * sizeof **result; /* x*y vectors of pointer to int* */

  n *= z;
  s += n * sizeof ***result; /* x*y*z int */

  /* allocate it */

  result = malloc(s);
  if (result)
  {
     /* make the int** vector point to the int* vectors: */
     for (size_t i = 0; i < x; ++i)
     {
       result[i] = (int**) ((char*) result) + 
         (x * sizeof *result +
           i * y * sizeof **result);
     }

     /* make the int* vectors point to the int vectors: */
     for (size_t i = 0; i < x*y; ++i)
     {
       ((int**) ((char*) result + x * sizeof *result))[i] = (int*) ((char*) result) +
         (x * sizeof *result + x*y * sizeof **result 
           + i * sizeof ***result);
     }
   }

   return result;
}

Version of the above code taking care of correct alignment of the int* and the int** blocks:

  #include <stdalign.h>

  int *** int_array_3d_allocate(size_t x, size_t y, size_t z)
  {
    int *** result;

    size_t n = x;
    size_t s = n * sizeof *result; /* x vector of pointer to int** */

    size_t y_off = s % alignof **result 
      ?alignof **result - s % alignof **result :0;

    n *= y;
    s += n * sizeof **result; /* x*y vectors of pointer to int* */

    size_t z_off = s % alignof ***result
      ?alignof ***result - s % alignof ***result :0;

    n *= z;
    s += n * sizeof ***result; /* x*y*z int */

    /* allocate it */

    result = malloc(s);
    if (result)
    {
       /* make the int** vector point to the int* vectors: */
       for (size_t i = 0; i < x; ++i)
       {
         result[i] = (int**) ((char*) result) + y_off +
           (x * sizeof *result +
             i * y * sizeof **result);
       }

       /* make the int* vectors point to the int vectors: */
       for (size_t i = 0; i < x*y; ++i)
       {
         ((int**) ((char*) result + x * sizeof *result + y_off))[i] = (int*) ((char*) result) + y_off +
           (x * sizeof *result + x*y * sizeof **result + z_off +
             + i * sizeof ***result);
       }
     }

     return result;
  }

Use it like this:

#include <stdlib.h>
#include <stdio.h>

int *** int_array_3d_allocate(size_t x, size_t y, size_t z);

int main(void)
{
  const size_t x = 2;
  const size_t y = 3;
  const size_t z = 5;

  int *** int_array_3d = int_array_3d_allocate(x, y, z);
  if (!int_array_3d)
  {
    perror("int_array_3d_allocate() failed");
  }
  else
  {
    for (size_t i = 0; i < x; ++i)
    {
      for (size_t j = 0; j < y; ++j)
      {
        for (size_t k = 0; k < z; ++k)
        {
          int_array_3d[i][j][k] = (int)(i*j*k);
        }
      }
    }

    /* do stuff with the continuous array of ints. 
       Just be aware that the 1st int only is located at address:
       (char* int_array_3d) +
         (x * sizeof *int_array_3d + x*y * sizeof **int_array_3d) 
    */

    free(int_array_3d);
  }
}

I see what you're going for, but there are a lot of compilation errors in the posted code. I got the gist of it, though, thanks. Something similar is done here: https://cboard.cprogramming.com/c-programming/127047-finding-indices-efficiently.html#post947162 — KBriggs, Aug 02 '19 at 17:22
The main difference of the code I show and the code you link, is the number of allocations and with this the number of *separate* memory areas, which by themselves are contentious, but in linked code all are *not*. Whereas the code above uses exactly *one* allocation, so the *whole* block describing this "3d-array" is *one continuous area* of memory, in particular *all* `int`s are accessible in *one continues block*. — alk, Aug 03 '19 at 05:23
This might have alignment issues, and also the extra pointers are a waste of space and time — M.M, Aug 03 '19 at 06:02
@M.M: Need to look into the alignment thing, right. But conceptually I see no other way to do this without VLAs. — alk, Aug 03 '19 at 06:06
Use a single `[]` operator and arithmetic to find the right index . With helper macros if desired. Your code consumes a lot of space and time, as well as introducing more possible things that can go wrong, for the "benefit" of slightly tidier syntax — M.M, Aug 03 '19 at 06:22

score -1 · Answer 3 · answered Aug 02 '19 at 21:12

If the array is handed off to functions, it decays into a pointer-to-pointer-to-pointer-to-int, making it unwieldy; one has to pass all the extra size information, too, or pass a pointer to a fixed size; see What is array decaying? A different way of handling arrays with multiple dimensions is an object which has the dimensions encoded within the object. This will compile in C90,

#include <stdlib.h> /* mallc, free, EXIT_ */
#include <errno.h>  /* errno */
#include <stdio.h>  /* perror, printf, fput[c|s] */

struct IntCube { size_t x, y, z; /* C99 supports FAM; would be useful. */ };

/** Returns a `struct IntCube` with `x`, `y`, `z` dimensions or null and
 `errno` may be set. The caller is responsible for calling `free`. */
static struct IntCube *IntCube(const size_t x, const size_t y, const size_t z) {
    struct IntCube *cube;
    size_t xy_size, xyz_size, data_size, cube_size;

    if(!x || !y || !z) return 0;

    /* Check for overflow; <https://stackoverflow.com/q/1815367/2472827>. */
    xy_size = x * y;
    xyz_size = xy_size * z;
    data_size = xyz_size * sizeof(int);
    cube_size = sizeof cube + data_size;
    if(xy_size / x != y
        || xyz_size / xy_size != z
        || data_size / xyz_size != sizeof(int)
        || cube_size < data_size) { errno = ERANGE; return 0; }

    /* Allocate memory. */
    if(!(cube = malloc(cube_size))) return 0; /* POSIX has defined errors. */
    cube->x = x;
    cube->y = y;
    cube->z = z;
    return cube;
}

static int *int_cube_get(const struct IntCube *cube,
    const size_t x, const size_t y, const size_t z) {
    return (int *)(cube + 1) + z * cube->y * cube->x + y * cube->x + x;
}

typedef void (*IntCubeAction)(const size_t x, const size_t y, const size_t z,
    int *pnumber);

typedef void (*BinaryAction)(int bin);

/** Goes through `cube` and performs `action` on each number. It will call
 optional binary action `bin` each time there is an
 start(false)/end(true)-of-x/y. */
static void IntCubeForEach(struct IntCube *const cube,
    const IntCubeAction action, const BinaryAction bin) {
    size_t x, y, z;
    if(!cube || !action) return;
    for(z = 0; z < cube->z; z++) {
        if(bin) bin(0);
        for(y = 0; y < cube->y; y++) {
            if(bin) bin(0);
            for(x = 0; x < cube->x; x++) {
                action(x, y, z, int_cube_get(cube, x, y, z));
            }
            if(bin) bin(1);
        }
        if(bin) bin(1);
    }
}

/** @implements IntCubeAction */
static void fill_with_xyz(const size_t x, const size_t y, const size_t z,
    int *pnumber) {
    *pnumber = (x + 1) * (y + 1) * (z + 1);
}

/** @implements IntCubeAction */
static void print_cube(const size_t x, const size_t y, const size_t z,
    int *pnumber) {
    (void)y, (void)z;
    printf("%s%d", x ? ", " : "", *pnumber);
}

/** @implements BinaryAction */
static void print_cube_corners(int bin) {
    printf("%s", bin ? " }" : "{ ");
}

int main(void) {
    struct IntCube *cube = 0;
    int status = EXIT_FAILURE;

    if(!(cube = IntCube(4, 3, 3))) goto catch;
    IntCubeForEach(cube, &fill_with_xyz, 0);
    IntCubeForEach(cube, &print_cube, &print_cube_corners);
    fputc('\n', stdout);
    status = EXIT_SUCCESS;
    goto finally;

catch:
    perror("Cube");

finally:
    free(cube);
    return status;
}

{ { 1, 2, 3, 4 }{ 2, 4, 6, 8 }{ 3, 6, 9, 12 } }{ { 2, 4, 6, 8 }{ 4, 8, 12, 16 }{ 6, 12, 18, 24 } }{ { 3, 6, 9, 12 }{ 6, 12, 18, 24 }{ 9, 18, 27, 36 } }

This creates a dependence on struct IntCube, but with the dependence, one can calculate the size at runtime.

there's no pointer-to-pointer-to-pointer-to-int here or anything like it — M.M, Aug 03 '19 at 06:03
There's no `int***` in the question (and nothing that decays to `int***`) — M.M, Aug 04 '19 at 01:06
It was my impression that `int (*a)[sz[1]][sz[2]]` decays to `int ***`; is that not true? Also, in the link to the original question, `int ***calloc_3d_arr(int sizes[3])`. — Neil, Aug 04 '19 at 20:07

What exactly is this method of contiguous C memory allocation doing under the hood?

3 Answers3