3

Consider the following struct and functions

typedef struct __attribute__((__packed__)) req_file {
  uint32_t start_pos;
  uint32_t byte_count;
  uint16_t name_len;
} req_file;

void req_file_hton(req_file *d){
  d->name_len = htons(d->name_len);
  d->start_pos = htonl(d->start_pos);
  d->byte_count = htonl(d->byte_count);
}

void req_file_ntoh(req_file *d){
  d->name_len = ntohs(d->name_len);
  d->start_pos = ntohl(d->start_pos);
  d->byte_count = ntohl(d->byte_count);
}

The above code is tedious to write for a lot of structs with many fields. I would like to configure the name and the fields of the struct once, and have the functions struct_name_hton and struct_name_ntoh generated for me. I have tried to play with x macros a little but had bad luck. A portable C preprocessor solution will be highly appreciated (not C++).

Mirko
  • 963
  • 5
  • 12
samvel1024
  • 1,007
  • 4
  • 15
  • 34
  • 2
    I really think an `unsigned char` buffer would be your best bet for portability. Reading and writing from a buffer is safer than assuming the packing and ordering of each field on a different system. – Myst Apr 13 '19 at 21:18
  • @Myst Not sure what you mean. The objects will already be in a buffer of `char` or so for transport; that's a given. If you mean to suggest some other logical serialisation mechanism, the fact that it involves a byte buffer surely isn't the differentiating factor? Can you clarify? – Lightness Races in Orbit Apr 13 '19 at 21:22
  • @Myst do you mean serialising integer 12 as a string "12" ? – samvel1024 Apr 13 '19 at 21:24
  • 1
    @samvel1024 _"do you mean serialising integer 12 as a string "12" ?"_ No he means you should use serialization to provide a unsigned char buffer, rather using `__attribute__((__packed__))`. – πάντα ῥεῖ Apr 13 '19 at 21:27
  • @samvel1024 I posted an answer that explains what I mean. (πάντα ῥεῖ understood me). I really think you should avoid casting the input/output buffer to a `req_file *`. You might run into issues with unaligned memory access if the code runs on a non x86 system. – Myst Apr 13 '19 at 23:34

4 Answers4

2

IMHO, you should use a raw buffer for input / output. This is by far more portable (and safer) than guessing the way a compiler will order the fields or the structure on each system.

In addition, this would allow you to pack/unpack data without worrying about byte order or memory alignment.

The macros in this example code were extracted from the facil.io framework header:

/** Reads an unaligned network ordered byte stream to a 16 bit number. */
#define fio_str2u16(c)                                                         \
  ((uint16_t)(((uint16_t)(((uint8_t *)(c))[0]) << 8) |                         \
              (uint16_t)(((uint8_t *)(c))[1])))

/** Reads an unaligned network ordered byte stream to a 32 bit number. */
#define fio_str2u32(c)                                                         \
  ((uint32_t)(((uint32_t)(((uint8_t *)(c))[0]) << 24) |                        \
              ((uint32_t)(((uint8_t *)(c))[1]) << 16) |                        \
              ((uint32_t)(((uint8_t *)(c))[2]) << 8) |                         \
              (uint32_t)(((uint8_t *)(c))[3])))

/** Writes a local 16 bit number to an unaligned buffer in network order. */
#define fio_u2str16(buffer, i)                                                 \
  do {                                                                         \
    ((uint8_t *)(buffer))[0] = ((uint16_t)(i) >> 8) & 0xFF;                    \
    ((uint8_t *)(buffer))[1] = ((uint16_t)(i)) & 0xFF;                         \
  } while (0);

/** Writes a local 32 bit number to an unaligned buffer in network order. */
#define fio_u2str32(buffer, i)                                                 \
  do {                                                                         \
    ((uint8_t *)(buffer))[0] = ((uint32_t)(i) >> 24) & 0xFF;                   \
    ((uint8_t *)(buffer))[1] = ((uint32_t)(i) >> 16) & 0xFF;                   \
    ((uint8_t *)(buffer))[2] = ((uint32_t)(i) >> 8) & 0xFF;                    \
    ((uint8_t *)(buffer))[3] = ((uint32_t)(i)) & 0xFF;                         \
  } while (0);

void req_file_read(req_file *d, unsigned char * buffer){
  d->byte_count = fio_str2u32(buffer);
  d->start_pos = fio_str2u32(buffer + 4);
  d->name_len = fio_str2u16(buffer + 8);
}

void req_file_write(unsigned char * buffer, req_file *d){
  fio_u2str32(buffer, d->byte_count);
  fio_u2str32(buffer + 4, d->start_pos);
  fio_u2str16(buffer + 8, d->name_len);
}

This makes it far easier to handle unaligned memory access as well as network byte ordering on any system. The binary based math makes this both portable and space efficient.

EDIT (X-macros)

As per the comments and concerns raised by Lightness Races in Orbit, here's a header file with X-macros that could be used to automatically create X_read / X_write inline functions.

The downside of serialization is that the byte offset for the raw buffer should be provided when declaring the struct using the macros.

In this example, the same header is included a number of times with different results. Also, the read/write functions don't have to be inlined, it's just an example.

Here's the header:

/* note there's NO include guard in the header file */
#ifndef H__FACIL_IO_MACROS
#define H__FACIL_IO_MACROS

/** Reads an unaligned network ordered byte stream to a 16 bit number. */
#define fio_str2u16(c)                                                         \
  ((uint16_t)(((uint16_t)(((uint8_t *)(c))[0]) << 8) |                         \
              (uint16_t)(((uint8_t *)(c))[1])))

/** Reads an unaligned network ordered byte stream to a 32 bit number. */
#define fio_str2u32(c)                                                         \
  ((uint32_t)(((uint32_t)(((uint8_t *)(c))[0]) << 24) |                        \
              ((uint32_t)(((uint8_t *)(c))[1]) << 16) |                        \
              ((uint32_t)(((uint8_t *)(c))[2]) << 8) |                         \
              (uint32_t)(((uint8_t *)(c))[3])))

/** Writes a local 16 bit number to an unaligned buffer in network order. */
#define fio_u2str16(buffer, i)                                                 \
  do {                                                                         \
    ((uint8_t *)(buffer))[0] = ((uint16_t)(i) >> 8) & 0xFF;                    \
    ((uint8_t *)(buffer))[1] = ((uint16_t)(i)) & 0xFF;                         \
  } while (0);

/** Writes a local 32 bit number to an unaligned buffer in network order. */
#define fio_u2str32(buffer, i)                                                 \
  do {                                                                         \
    ((uint8_t *)(buffer))[0] = ((uint32_t)(i) >> 24) & 0xFF;                   \
    ((uint8_t *)(buffer))[1] = ((uint32_t)(i) >> 16) & 0xFF;                   \
    ((uint8_t *)(buffer))[2] = ((uint32_t)(i) >> 8) & 0xFF;                    \
    ((uint8_t *)(buffer))[3] = ((uint32_t)(i)) & 0xFF;                         \
  } while (0);

/* convert SERIAL_STRUCT_NAME to actual name */
#define SERIAL_STRUCT_MAKE(struct_name) SERIAL_STRUCT_MAKE2(struct_name)

#endif
#if SERIALIZE_TYPE /* create the type */
#undef SERIALIZE_TYPE

#undef SERIAL_STRUCT_FIELD
#define SERIAL_STRUCT_FIELD(name, bits, pos) uint##bits##_t name

#undef SERIAL_STRUCT_MAKE2
#define SERIAL_STRUCT_MAKE2(struct_name)                                       \
  typedef struct {                                                             \
    SERIAL_STRUCT_FIELDS;                                                      \
  } struct_name##_s;

/* perform macros */
SERIAL_STRUCT_MAKE(SERIAL_STRUCT_NAME)

#elif SERIALIZE_READ /* create reader function */
#undef SERIALIZE_READ

#undef SERIAL_STRUCT_FIELD
#define SERIAL_STRUCT_FIELD(name, bits, pos)                                   \
  dest->name = fio_str2u##bits((src + (pos)))

#undef SERIAL_STRUCT_MAKE2
#define SERIAL_STRUCT_MAKE2(struct_name)                                       \
  inline static void struct_name_read(struct_name##_s *dest,                   \
                                      unsigned char *src) {                    \
    SERIAL_STRUCT_FIELDS;                                                      \
  }

/* perform macros */
SERIAL_STRUCT_MAKE(SERIAL_STRUCT_NAME)

#elif SERIALIZE_WRITE /* create writer function */
#undef SERIALIZE_WRITE

#undef SERIAL_STRUCT_FIELD
#define SERIAL_STRUCT_FIELD(name, bits, pos)                                   \
  fio_u2str##bits((dest + (pos)), src->name)

#undef SERIAL_STRUCT_MAKE2
#define SERIAL_STRUCT_MAKE2(struct_name)                                       \
  inline static void struct_name##_write(unsigned char *dest,                  \
                                         struct_name##_s *src) {               \
    SERIAL_STRUCT_FIELDS;                                                      \
  }

/* perform macros */
SERIAL_STRUCT_MAKE(SERIAL_STRUCT_NAME)

#endif

In the implementation file, the information might look like this (again, the inline approach could be altered):

/* will produce req_file_s as the struct name, but you can change that */
#define SERIAL_STRUCT_NAME req_file
#define SERIAL_STRUCT_FIELDS                                                   \
  SERIAL_STRUCT_FIELD(start_pos, 32, 0);                                       \
  SERIAL_STRUCT_FIELD(byte_count, 32, 4);                                      \
  SERIAL_STRUCT_FIELD(name_len, 16, 8)

#define SERIALIZE_TYPE 1
#include "serialize.h"
#define SERIALIZE_READ 1
#include "serialize.h"
#define SERIALIZE_WRITE 1
#include "serialize.h"

This could be adjusted so SERIALIZE_TYPE also declares the functions (without defining them), and the functions aren't inlined (so only the implementation file includes the header 3 times per type.

Myst
  • 16,507
  • 2
  • 40
  • 61
  • Also refer to my previous complaint about your terminology. Both approaches involve "raw buffers". The question is what the buffers contain and how you populate/interpret them. – Lightness Races in Orbit Apr 13 '19 at 22:24
  • @LightnessRacesinOrbit - It might look like more code (I'm not sure about the assembly output code length), but it's more portable. Automating the code with X-macros is as possible as it is with the `hton` approach. If we're assuming a "raw buffer" being casted into a `req_file *` (pointer), than this approach is safer due to possible memory alignment requirements on some systems. It also eliminates the `packed` keyword which could result in excessive assembly output (compilers working around possible memory alignment errors). – Myst Apr 13 '19 at 22:31
  • It's true that adding a layer of abstraction to a serialisation mechanism is generally "better". You still have a "raw buffer" in both cases though. In fact, if anything, yours is "less raw". And, either way, this doesn't really answer the question directly posed, which is how to make less tedious/repetitive code while maintaining the ability to provide endianness portability. – Lightness Races in Orbit Apr 13 '19 at 22:34
  • @LightnessRacesinOrbit - yes, you are correct. I think my argument is that although this approach could be more tedious, it is better suited to the task. Automating code using the pre-processor is wonderful... but I'm not sure it would provide the best code for the job (especially if the code might end up on embedded systems). – Myst Apr 13 '19 at 22:51
  • Just to be clear again, I don't disagree with that :) – Lightness Races in Orbit Apr 13 '19 at 23:18
  • @LightnessRacesinOrbit - I think the update to the answer (the X-macros) might solve some of your misgivings. – Myst Apr 13 '19 at 23:30
1

xmacros work. The trick is to use token pasting and aliasing of the functions depending on the type:

#define htonuint32_t htonl
#define htonuint16_t htons
#define ntohuint32_t ntohl
#define ntohuint16_t ntohl

#define DEF_FIELDS \
   DEF_FIELD(uint32_t,start_pos); \
   DEF_FIELD(uint32_t,byte_count); \
   DEF_FIELD(uint16_t,name_len)

#define DEF_FIELD(t,v)  t v

typedef struct __attribute__((__packed__)) req_file {
    DEF_FIELDS;
} req_file;

#undef DEF_FIELD
#define DEF_FIELD(t,v) d->v = hton##t(d->v)

void req_file_hton(req_file *d) {
    DEF_FIELDS;
}
#undef DEF_FIELD
#define DEF_FIELD(t,v) d->v = ntoh##t(d->v)

void req_file_hton(req_file *d) {
    DEF_FIELDS;
}

pre-processed code (reformatted for clearer display):

typedef struct __attribute__((__packed__)) req_file {
 uint32_t start_pos;
 uint32_t byte_count;
 uint16_t name_len;
} req_file;


void req_file_hton(req_file *d) {
 d->start_pos = htonl(d->start_pos);
 d->byte_count = htonl(d->byte_count);
 d->name_len = htons(d->name_len);
}


void req_file_hton(req_file *d) {
 d->start_pos = ntohl(d->start_pos);
 d->byte_count = ntohl(d->byte_count);
 d->name_len = ntohl(d->name_len);
}

If you have more than one structure, you can complexify the macro system to be able to generate all the structs & functions. Example with 2 different structures:

#define htonuint32_t htonl
#define htonuint16_t htons
#define ntohuint32_t ntohl
#define ntohuint16_t ntohl

#define DEF_FIELDS_req_file \
   DEF_FIELD(uint32_t,start_pos); \
   DEF_FIELD(uint32_t,byte_count); \
   DEF_FIELD(uint16_t,name_len)

#define DEF_FIELDS_other_file \
   DEF_FIELD(uint32_t,foo_pos); \
   DEF_FIELD(uint32_t,char_count); \
   DEF_FIELD(uint16_t,bar_len)

#define STRUCT_DEF(s) \
    START_DECL(s) \
    DEF_FIELDS_##s; \
    END_DECL(s)


#define START_DECL(s) typedef struct __attribute__((__packed__)) s {
#define END_DECL(s) } s
#define DEF_FIELD(t,v)  t v

STRUCT_DEF(req_file);
STRUCT_DEF(other_file);

#undef DEF_FIELD
#undef START_DECL
#undef END_DECL
#define DEF_FIELD(t,v) d->v = hton##t(d->v)
#define START_DECL(s) void s##_hton(s *d) {
#define END_DECL(s) }

STRUCT_DEF(req_file);
STRUCT_DEF(other_file);

#undef DEF_FIELD
#undef START_DECL
#define DEF_FIELD(t,v) d->v = ntoh##t(d->v)
#define START_DECL(s) void s##_ntoh(s *d) {

STRUCT_DEF(req_file);
STRUCT_DEF(other_file);

result:

typedef struct __attribute__((__packed__)) req_file { uint32_t start_pos; uint32_t byte_count; uint16_t name_len; } req_file;
typedef struct __attribute__((__packed__)) other_file { uint32_t foo_pos; uint32_t char_count; uint16_t bar_len; } other_file;

void req_file_hton(req_file *d) { d->start_pos = htonl(d->start_pos); d->byte_count = htonl(d->byte_count); d->name_len = htons(d->name_len); };
void other_file_hton(other_file *d) { d->foo_pos = htonl(d->foo_pos); d->char_count = htonl(d->char_count); d->bar_len = htons(d->bar_len); };

void req_file_ntoh(req_file *d) { d->start_pos = ntohl(d->start_pos); d->byte_count = ntohl(d->byte_count); d->name_len = ntohl(d->name_len); };
void other_file_ntoh(other_file *d) { d->foo_pos = ntohl(d->foo_pos); d->char_count = ntohl(d->char_count); d->bar_len = ntohl(d->bar_len); };
Jean-François Fabre
  • 126,787
  • 22
  • 103
  • 165
1

You could adapt Antony Polukhin's magic_get library, to be able to convert any (arbitrary) structure into a different byte order - just like it can now print any arbitrary structure to an ostream.

einpoklum
  • 86,754
  • 39
  • 223
  • 453
  • user just chameleoned you by removing the C++ tag. – Jean-François Fabre Apr 13 '19 at 21:33
  • the C++ tag caused confusion. Initially I added it since I thought C++ macros can apply here as well. Whereas solving this very problem in C++ is not as complicated as in C. – samvel1024 Apr 13 '19 at 21:39
  • 1
    @Jean-FrançoisFabre: Well, it's not a bad idea for readers to be aware of this fine hack (even if it's not C)... – einpoklum Apr 13 '19 at 21:47
  • @Jean-FrançoisFabre I edited for removal of c++ tag (someone peer reviewed), as I was about to answer but I got the vibe that he didn't wanted any macro-free, safe, c++ solution. – Mirko Apr 13 '19 at 23:08
1

Well, that's easy.

#include <stdint.h>
#include <arpa/inet.h>

/* the NETSTRUCT library ------------------------------- */

// for uint32_t
#define NETSTRUCT_dec_uint32_t(n)  uint32_t n;
#define NETSTRUCT_hton_uint32_t(n)  t->n = htonl(t->n);
#define NETSTRUCT_ntoh_uint32_t(n)  t->n = ntohl(t->n);

// for uint16_t
#define NETSTRUCT_dec_uint16_t(n)  uint16_t n;
#define NETSTRUCT_hton_uint16_t(n)  t->n = htons(t->n);
#define NETSTRUCT_ntoh_uint16_t(n)  t->n = ntohs(t->n);

// dec hton ntoh switch
#define NETSTRUCT_dec(type, name)  NETSTRUCT_dec_##type(name)
#define NETSTRUCT_hton(type, name) NETSTRUCT_hton_##type(name)
#define NETSTRUCT_ntoh(type, name) NETSTRUCT_ntoh_##type(name)

// calls NETSTRUCT_mod
#define NETSTRUCT1(mod, a)       NETSTRUCT_##mod a
#define NETSTRUCT2(mod, a, ...)  NETSTRUCT1(mod, a) NETSTRUCT1(mod, __VA_ARGS__)
#define NETSTRUCT3(mod, a, ...)  NETSTRUCT1(mod, a) NETSTRUCT2(mod, __VA_ARGS__)
#define NETSTRUCT4(mod, a, ...)  NETSTRUCT1(mod, a) NETSTRUCT3(mod, __VA_ARGS__)
// TO DO: all up to NETSTRUCT64

// variadic macro overload
#define NETSTRUCT_GET(_1,_2,_3,_4,NAME,...) NAME
// Overlads VA_ARGS with specified mod
#define NETSTRUCT_IN(mod, ...) \
        NETSTRUCT_GET(__VA_ARGS__, NETSTRUCT4, NETSTRUCT3, NETSTRUCT2, NETSTRUCT1) \
            (mod, __VA_ARGS__)

// entrypoint of out library
#define NETSTRUCT(name, ...)  \
    \
    struct name { \
        NETSTRUCT_IN(dec, __VA_ARGS__) \
    } __attribute__((__packed__)); \
    \
    void name##_hton(struct name *t) { \
        NETSTRUCT_IN(hton, __VA_ARGS__) \
    } \
    \
    void name##_ntoh(struct name *t) { \
        NETSTRUCT_IN(ntoh, __VA_ARGS__) \
    }

/* -------------------------------------------------------- */

// adding custom type
#define NETSTRUCT_dec_uint8_t_arr_8(n) uint8_t n[8];
#define NETSTRUCT_hton_uint8_t_arr_8(n) do{}while(0);
#define NETSTRUCT_ntoh_uint8_t_arr_8(n) do{}while(0);

NETSTRUCT(reg_file, 
    (uint32_t, start_pos),
    (uint32_t, byte_count),
    (uint16_t, name_len),
    (uint8_t_arr_8, example_custom_array)
);

int main() {
    struct reg_file t;
    reg_file_hton(&t);
    reg_file_ntoh(&t);
}

I have written the mactos so it's easy to add another function, most probably void name##serialize(char *in) and void name##deserialize(const char *out). The design can be slightly refactored so that type callbacks NETSTRUCT_dec_* take two or even unknown number of arguments with ex. NETSTRUCT(name, (type_callback_suffix, (arguments, arguments2))).

@edit added custom array type example and some lines order changing.

KamilCuk
  • 69,546
  • 5
  • 27
  • 60
  • great solution. Once I have more than 4 fields I need to add the corresponding arguments, am I right ? if not then I don't get what are those NETSTRUCT_1, 2, 3, 4 for. – samvel1024 Apr 13 '19 at 23:03
  • 1
    see [this thread](https://stackoverflow.com/questions/11761703/overloading-macro-on-number-of-arguments). I think C by standard supports max 64 macro arguments, that's why it's common to generate up to 64. For up to 5 members, you would need to add `NETSTRUCT5(...) ..` and similary add `_5` to proper places. – KamilCuk Apr 13 '19 at 23:06