-3

I have written a startup and liker script for my C++ application, running on STM32F407VG.

The problem is i have an array of structure, where the structure field str is always zero despite the initialization. The other field in the struct are correctly initialized. I can-t understand what I'm doing wrong, I guess some part of initialization in the startup script is missing.

The array is declared like the following:

struct elem{
    uint32_t str;
    uint32_t value;
    uint32_t value2;
};

const struct elem array[]{
    {(uint32_t)(*(uint32_t*)"CM1"), 1, 1},
    {(uint32_t)(*(uint32_t*)"CM2"), 2, 2},
    {(uint32_t)(*(uint32_t*)"CM3"), 3, 3}
};

relevant section of startup script:

inline void static_init()
{
  for (void (**p)() = __preinit_array_start; p < __preinit_array_end; ++p)
    (*p)();

  for (void (**p)() = __init_array_start; p < __init_array_end; ++p)
    (*p)();
}

void reset_handler(void)
{

  unsigned long *source;
  unsigned long *destination;

  // Copying data from Flash to RAM
  source = &_data_flash;
  for (destination = &_data_begin; destination < &_data_end;)
  {
      *(destination++) = *(source++);
  }

  // default zero to undefined variables
  for (destination = &_bss_begin; destination < &_bss_end;)
  {
      *(destination++) = 0;
  }

  static_init();

#ifndef __NO_SYSTEM_INIT
  SystemInit();
#endif

  // starting main program
  main();
}

and the linker script:

/* Entry Point */
ENTRY(reset_handler)

_estack = 0x20010000; /* end of 128K RAM */
/* Specify the memory areas */
/*
0x08000000 until 0x08010000 is reserved for BOOTLOADER! (64k)
*/
MEMORY
{
  EEPROM (rwx)    : ORIGIN = 0x08010000, LENGTH = 64K /*fake EEPROM!*/
  FLASH (rx)      : ORIGIN = 0x08020000, LENGTH = 896K
  RAM (xrw)       : ORIGIN = 0x20000000, LENGTH = 128K
  RAM2 (rw)      : ORIGIN = 0x10000000, LENGTH = 64K
}

SECTIONS
{
  /* The startup code goes first into FLASH */
  .isr_vector :
  {
    . = ALIGN(4);
    __intvec_start__ = .;
    KEEP(*(.isr_vector)) /* Startup code */
    . = ALIGN(4);
  } >FLASH

  /* The program code and other data goes into FLASH */
  .text :
  {
    . = ALIGN(4);
    _text = .;
      *(.text)           /* .text sections (code) */
    _text2 = .;
      *(.text*)          /* .text* sections (code) */
    _rodata = .;
      *(.rodata)         /* .rodata sections (constants, strings, etc.) */
      *(.rodata*)        /* .rodata* sections (constants, strings, etc.) */
      *(.glue_7)         /* glue arm to thumb code */
      *(.glue_7t)        /* glue thumb to arm code */
      *(.eh_frame)
    _init_data = .;
    KEEP (*(.init))
    KEEP (*(.fini))
    . = ALIGN(4);
      _etext = .;        /* define a global symbols at end of code */
  } > FLASH

  .ARM.extab   : { *(.ARM.extab* .gnu.linkonce.armextab.*) } >FLASH
  .ARM : {
    __exidx_start = .;
    *(.ARM.exidx*)
    __exidx_end = .;
  } >FLASH

  .preinit_array     :
  {
    PROVIDE_HIDDEN (__preinit_array_start = .);
    KEEP (*(.preinit_array*))
    PROVIDE_HIDDEN (__preinit_array_end = .);
  } >FLASH
  .init_array :
  {
    PROVIDE_HIDDEN (__init_array_start = .);
    KEEP (*(SORT(.init_array.*)))
    KEEP (*(.init_array*))
    PROVIDE_HIDDEN (__init_array_end = .);
  } >FLASH
  .fini_array :
  {
    PROVIDE_HIDDEN (__fini_array_start = .);
    KEEP (*(SORT(.fini_array.*)))
    KEEP (*(.fini_array*))
    PROVIDE_HIDDEN (__fini_array_end = .);
  } >FLASH

  /* used by the startup to initialize data */
  _sidata = LOADADDR(.data);

  /* used by the startup to initialize data */
  _data_flash = _sidata;

  /* Initialized data sections goes into RAM, load LMA copy after code */
  .data :
  {
    . = ALIGN(4);
    _data_begin = .;
    *(.data)
    *(.data*)

    . = ALIGN(4);
    _data_end = .;
  } >RAM AT> FLASH


  .bss (NOLOAD) :
  {
      . = ALIGN(4);
      _bss_begin = .;
      __bss_start__ = _bss_begin;
      *(.bss)
      *(.bss*)
      *(COMMON)
      . = ALIGN(4);
      _bss_end = .;
      __bss_end__ = _bss_end;
  } > RAM

  stack_size = 1024;
  __stack_end__ = ORIGIN(RAM)+LENGTH(RAM);
  __stack_start__ = __stack_end__ - stack_size;

  heap_size = 0;
  __heap_end__ = __stack_start__;
  __heap_start__ = __heap_end__ - heap_size;

  . = __stack_start__;
  ._stack :
  {
      PROVIDE ( end = . );
      . = . + stack_size;
      . = . + heap_size;
      . = ALIGN(4);
  } > RAM

  _siccmram = LOADADDR(.ram2);
  .ram2 (NOLOAD) :
  {
    . = ALIGN(4);
    *(.ram2);
          *(.ram2*);
    . = ALIGN(4);
  } > RAM2 AT> FLASH

  /* Remove information from the standard libraries */
  /DISCARD/ :
  {
      libc.a ( * )
      libm.a ( * )
      libgcc.a ( * )
  }

  .ARM.attributes 0 : { *(.ARM.attributes) }
}
mauro
  • 59
  • 5
  • 1
    that struct has 2 elements named `str` – sp2danny Jan 19 '18 at 09:13
  • 2
    `*(*uint32_t)"CM1"` is clearly undefined behavior. Additionally there is a chance that that produces some alignment issues and it doesn't even work if the compiler is nice and tries to do what you meant. What did you actually want to do? – nwp Jan 19 '18 at 09:14
  • 1
    it should be `*(uint32_t*)` anyway – sp2danny Jan 19 '18 at 09:15
  • 3
    Which compiler compiled this code? – nwp Jan 19 '18 at 09:17
  • 1
    The struct has two members with the same name - `str`. And the initialisations use invalid syntax. That code will not compile, let alone initialise any members to zero. Try providing actual code that exhibits your problem in the form of an [mcve]. – Peter Jan 19 '18 at 09:17
  • `reinterpret_cast("CM1")`. – Jarod42 Jan 19 '18 at 09:18
  • `main()` is called from `reset_handler`, are you sure this is c++ ? – sp2danny Jan 19 '18 at 09:20
  • 1
    @sp2danny this is standard practice in embedded programming. You compile without the compiler's standard wrappers (as they don't fit your non-existing OS). You then have to provide your own initialization of the data and BSS sections, and sometimes also of the RO section. – Shachar Shemesh Jan 19 '18 at 09:25
  • @Jarod42 that will give you an address, probably not what the OP wants – sp2danny Jan 19 '18 at 09:28
  • @sp2danny fixed, thanks, the first element is str – mauro Jan 19 '18 at 09:30
  • @nwp arm-gcc, while with IAR it work. the idea is the string can be long at most 4 char, and is stored inside a 32 bit variable. You said is a undefined behavior, is there any flag for arm-gcc-none-eabi to see this issue? – mauro Jan 19 '18 at 09:36
  • @Peter i had to change a couple of name here and there, but the substance is there. Fixed the little mistake. – mauro Jan 19 '18 at 09:36
  • @mauro you can use `std::memcpy` to copy the string into a 32 bit variable without UB. Although, you should be aware that the value will depend on the endianness of the processor. – eerorika Jan 19 '18 at 09:39
  • @sp2danny: Else using `0x434D3100` or using different endianness or not ASCII values. – Jarod42 Jan 19 '18 at 09:39
  • @user2079303 the array and its element are const, so no runtime initialization. – mauro Jan 19 '18 at 09:58
  • @Jarod42 yes, using a fixed value works, but something like "0x74657374" is not as readable as "test". Anyway the real point is to understand if the initialization is incorrect because undefined behavior or some other issue – mauro Jan 19 '18 at 10:01
  • @mauro in that case memcpy into a variable, and use that in initialization of the member. edit: sp2danny seems to have written an answer demonstrating that. – eerorika Jan 19 '18 at 10:19
  • or use multicharacter literal: [character_literal](http://en.cppreference.com/w/cpp/language/character_literal) (6.) (Btw: also implementation specific): 'CM1'. – Jarod42 Jan 19 '18 at 10:21

2 Answers2

1

This should work and have no UB.
However, it's endian dependent.

#include <iostream>
#include <cstdint>
#include <cstring>

using namespace std;

struct elem {
    uint32_t str;
    uint32_t value;
    uint32_t value2;
};

uint32_t makeint(const char str[4])
{
    uint32_t val;
    memcpy( &val, str, 4 );
    return val;
}

const elem arr[] = {
    {makeint("CM1"), 1, 1},
    {makeint("CM2"), 2, 2},
    {makeint("CM3"), 3, 3}
};

int main()
{
    for (auto& e : arr)
        cout << e.str << endl;

    cout << "\ndone\n";
}

See it here

sp2danny
  • 6,824
  • 2
  • 27
  • 49
0

You might use multicharacter literal: see (6.) of character_literal.

Notice single quotes:

const struct elem array[]{
    {'CM1', 1, 1},
    {'CM2', 2, 2},
    {'CM3', 3, 3}
};

You can see how gcc evaluate multicharacter literal:

https://gcc.gnu.org/onlinedocs/cpp/Implementation-defined-behavior.html#Implementation-defined-behavior

The compiler evaluates a multi-character character constant a character at a time, shifting the previous value left by the number of bits per target character, and then or-ing in the bit-pattern of the new character truncated to the width of a target character. The final bit-pattern is given type int, and is therefore signed, regardless of whether single characters are signed or not. If there are more characters in the constant than would fit in the target int the compiler issues a warning, and the excess leading characters are ignored.

Jarod42
  • 173,454
  • 13
  • 146
  • 250
  • I've always wondered the use case for multicharacter literals. Is this it? I.e. numeric value for a short string that I suppose can be used sort of like enum? – eerorika Jan 19 '18 at 10:29
  • @user2079303: problem with multicharacter literals is that values are implementation specific, so nothing forbid that 'Hello World' be equal to 'multicharacter literals' in some valid implementations. but once you restrict to one implementation, you might use it. – Jarod42 Jan 19 '18 at 10:48