1

after I've gotten some answers yesterday i was able to progress a bit but still can't figure out what's wrong, sorry if this is too much posting

im pretty new to coding and I have to do the following:

Write a program that wants you to input your ID-Card-Number (It's a 9 digit long Number, it can contain either Letters from A to Z or Numbers from 0 to 9) (Its based on ascii and apparently A=10, B=11 etc.)

the positions of the input are weighted differently (Pos 1,4,7 = weight 7; Pos 2,5,8 = weight 3; Pos 3,6,9 = weight 1)

So each digit is either multiplied by 7,3 or 1 All products are added up and the last digit of the sum of all products is the desired check-digit. (these calculations have to be in a subprogram)

The program should then output the input number and the checkdigit at the end so the output should be a 10 digit number (e.g. T22000129 (input) = T220001293 (output))

        global  main
        extern  printf

        section .text
main:
        mov     eax, [esp+4]                                                                                    ; argc   
        cmp     eax, 2                                                                                          ; 2 parameters = 1 argument
        jne     badCommand

        mov     eax, [esp+8]                                                                                    ; argv
        mov     eax, [eax+4]                                                                                    ; starting adress string



        
        push    eax
        call    checksum                                                                                    ; calling checksum              
        
        xor     eax, eax
        mov     eax, edx
        xor     edx, edx                                                                                        ; getting the last digit of the value thats saved in edx
        mov     ebx, 10
        div     ebx
        xor     eax, eax

        
        pop     eax



;; output ID Card number (input) + checkdigit ;;

        push    edx                                                                                             ; check digit
        push    eax                                                                                             ; string - ID-card-number
        push    stringFormat
        call    printf
        add     esp, 12                                                                             ; esp 3DWORD

        ret                                                                                                     ; end main


;; subprogram ;;

checksum:
        push    ebx
        push    esi                                                                                             ; Call-Saved Register - safe before use
        push    edi


        mov     esi, [esp+16]                                                                                   ; String-Adresse - get the parameter                                                             
        mov     edi, 9                                                                                          ; jump counter

start:        
        xor     ebx, ebx
        cmp     edi, 0
        je      finish

        mov     al, [esi]
        cmp     al, 65

        jge     letterweight
        jl      numberweight



letterweight:
        cmp     edi, 9
        je      letterseven
        cmp     edi, 8
        je      letterthree
        cmp     edi, 7
        je      letterone
        cmp     edi, 6
        je      letterseven
        cmp     edi, 5
        je      letterthree
        cmp     edi, 4
        je      letterone
        cmp     edi, 3
        je      letterseven
        cmp     edi, 2
        je      letterthree
        cmp     edi, 1
        je      letterone




numberweight:
        cmp     edi, 9
        je      numberseven
        cmp     edi, 8
        je      numberthree
        cmp     edi, 7
        je      numberone
        cmp     edi, 6
        je      numberseven
        cmp     edi, 5
        je      numberthree
        cmp     edi, 4
        je      numberone
        cmp     edi, 3
        je      numberseven
        cmp     edi, 2
        je      numberthree
        cmp     edi, 1
        je      numberone





letterseven:
        sub     al, 55
        mov     ebx, 7
        mul     ebx
        mov     ecx, eax
        add     edx, ecx
        jmp     ediesi

numberseven:
        mov     ebx, 7
        mul     ebx
        mov     ecx, eax
        add     edx, ecx
        jmp     ediesi




letterthree:
        sub     al, 55
        mov     ebx, 3
        mul     ebx
        mov     ecx, eax
        add     edx, ecx
        jmp     ediesi

numberthree:
        mov     ebx, 3
        mul     ebx
        mov     ecx, eax
        add     edx, ecx
        jmp     ediesi




letterone:
        sub     al, 55
        mov     ebx, 1
        mul     ebx
        mov     ecx, eax
        add     edx, ecx
        jmp     ediesi

numberone:
        mov     ebx, 1
        mul     ebx
        mov     ecx, eax
        add     edx, ecx
        jmp     ediesi

ediesi:
        dec     edi
        inc     esi
        jmp     start


finish:
        pop     ebx
        pop     esi                                                                                             ; Call-Saved Register - pop after use
        pop     edi

        ret





        
badCommand:
        push    badArgumentFormat
        call    printf
        add     esp, 4                                                                                  ; esp 1DWORD
        ret

    
        section .data
        
badArgumentFormat:
        db      'bad argument', 10, 0                                                                       ; 10 = LF


stringFormat:
        db      '"%s%1d"', 10, 0

I know this might not be the most optimal way of doing it but it kinda does what it should but it also doesnt.

My thought process maybe:

main function loads the input into eax so it can be the first part of the output later

pushing eax so i can work with it calling the subprogram to calculate

subprogram: pushing the call-saved registers before I use them in the subprogram

moving the input to esi so I can work with it setting edi to 9 so i can use it as a jump counter

setting ebx to 0 so i can work with it comparing edi with 0, if thats the case program ends

then I'm moving the first digit of the input into al comparing it to 65 because since A=65 in ascii its the first value that would be a letter

jumping to weight determination so I know whats the multiplier once i figured out what multiplier it is, it jumps in either the letter or number calculating scheme

if its a letter it does the following: subtract 55 from the value of digit from the input that i stored in al (do this because if it jumped to letter then the value must be atleast 65 so subtracting 55 will give me the value of the letter (A=65-55 = 10)

im then moving the multiplier into ebx each time so i can mul eax with ebx which is then stored into ecx ecx is then added to edx

after this I jump down to dec the jump counter (edi) and also inc esi so it gives me the value of the 2nd digit from my input

I do this until my edi is 0, after this I return back to the main program

the part starting and ending with xor eax,eax is giving me the last digit of the sum of the products

after this I pop eax to get my input back

printing eax and edx

Im sorry for the long write-up but for some reason I can't figure out why my results are wrong.

The output for my check digits with the code from above is: First time running the program its T220001297 Second time: T220001297 Third: T220001291 Fourth: T220001291 Fifth: T220001299

I'd appreciate any help, something in my thought process is wrong but I just can't figure out what it is.

Etti _
  • 21
  • 2
  • Lookup tables for data could tighten up the code where you do the same thing but with different immediate constants. You can also factor out letter->integer conversion and then use the same number-handling, it looks like. If it's not that simple, a jump table of code pointers could replace a long cmp/je chain. – Peter Cordes Dec 06 '20 at 01:31
  • Ok, gonna check it out. Any chance you know why the program isnt working properly? – Etti _ Dec 06 '20 at 02:05

1 Answers1

2

You're not restoring the stack correctly! The pop's need to come in reverse order to the pushes:

checksum:
    push    ebx
    push    esi
    push    edi
    ...
finish:
    pop     edi
    pop     esi
    pop     ebx
    ret

You are expecting that the checksum procedure returns a result in the EDX register but you forget that all of those multiplications that use the mul ebx instruction will have destroyed EDX. Remember the product will be in the combo EDX:EAX. This is a problem that you can resolve by using the immediate-operand form of the imul instruction.
Also these mul ebx instructions multiply the whole of EAX with EBX but you did not make sure that bits 8..31 of EAX are zero. A movzx eax, byte [esi] would do the trick.

Below is a my rewrite that remedies the above. I have improved the inefficient branching that you had before but this is by no means the only (good) solution to dispatch. The procedure now returns the result in the EAX register:

checksum:
    push    ebx
    push    esi
    push    edi
    mov     esi, [esp+16]
    mov     edi, 9
    xor     eax, eax         ; EAX will be result
start:
    movzx   ebx, byte [esi]  ; Expected [0,9] and [A,Z]
    cmp     bl, 65
    jb      numberweight
letterweight:
    sub     ebx, 55
numberweight:
    cmp     edi, 7
    je      one
    cmp     edi, 4
    je      one
    cmp     edi, 1
    je      one
    cmp     edi, 8
    je      three
    cmp     edi, 5
    je      three
    cmp     edi, 2
    je      three
    ; Here EDI just has got to be 9, 6, or 3!
    ; Therefore no need to compare and just fall through

seven:
    imul    ebx, 7      ; See below for an alternative (possible optimization)
    jmp     addit

three:
    imul    ebx, 3      ; See below for an optimization
    ;;jmp     addit     ; No longer needed if we drop the 'one' code.

one:
    ;;imul    ebx, 1    ; Keeping this would be silly.
addit:
    add     eax, ebx
    inc     esi
    dec     edi
    jnz     start

finish:
    pop     edi
    pop     esi
    pop     ebx
    ret

This is how you use the result from the new procedure:

    call    checksum ; -> EAX
    ; getting the last digit of the value that's saved in EAX
    xor     edx, edx
    mov     ebx, 10
    div     ebx      ; EDX:EAX / 10 -> EDX = [0,9], don't care about EAX

In the above code I wanted to stay close to your solution.
Below you'll find a couple of alternative solutions:

  • This version uses unrolling on the main loop by a factor of 3 (from 9 iterations to 3 iterations). Counterintuitively perhaps, but call .FetchDigit (as opposed to inlining it) does not necessarily make this slower:

    checksum:
      push    esi
      push    edi
      mov     esi, [esp+16]
      mov     edi, 3
      xor     eax, eax         ; EAX will be result
    .start:
      call    .FetchDigit      ; -> EDX=[0,35] ESI++
      imul    edx, 7           ; \ EQV to 'lea eax, [eax+edx*8]' 'sub eax, edx'
      add     eax, edx         ; /
      call    .FetchDigit      ; -> EDX=[0,35] ESI++
      lea     edx, [edx+edx*2] ; Faster then 'imul edx, 3'
      add     eax, edx
      call    .FetchDigit      ; -> EDX=[0,35] ESI++
      ;;imul    edx, 1
      add     eax, edx
      dec     edi
      jnz     .start
      pop     edi
      pop     esi
      ret
    ; - - - - - - - - - - - - -
    .FetchDigit:
      movzx   edx, byte [esi]  ; Expected [0,9] and [A,Z]
      cmp     dl, 65
      jb      .numberweight
    .letterweight:
      sub     edx, 55
    .numberweight:
      ret
    
  • This version uses nested loops and was build on the fact that the weights 7, 3, and 1 can be generated through a simple shift to the right:

    checksum:
      push    esi
      push    edi
      mov     esi, [esp+16]
      mov     edi, 3
      xor     eax, eax         ; EAX will be result
    .OuterLoop:
      mov     ecx, 7
    .InnerLoop:
      movzx   edx, byte [esi]  ; Expected [0,9] and [A,Z]
      cmp     dl, 65
      jb      .numberweight
    .letterweight:
      sub     edx, 55
    .numberweight:
      imul    edx, ecx         ; ECX={7,3,1}
      add     eax, edx
      shr     ecx, 1
      jnz     .InnerLoop
      dec     edi
      jnz     .OuterLoop
      pop     edi
      pop     esi
      ret
    
Sep Roland
  • 20,265
  • 3
  • 36
  • 58
  • 1
    `cdq` instead of `xor eax,eax` saves 1 byte of code size but is logically wrong: it's doing sign-extension before unsigned `div`. If you want to suggest doing taking advantage of EAX being signed-positive if interpreted as signed, also switch to `cdq` / `idiv`. That's actually more efficient on some CPUs as well, probably because the maximum absolute value of the dividend is smaller by 1 bit. e.g. `idiv r32` is 1 fewer uop than `div r32` on Sandybridge. (But not Skylake or Zen). If you actually wanted to optimize for speed, you'd use a multiplicative inverse for the division by 10. – Peter Cordes Dec 07 '20 at 00:30
  • 1
    But if you care about latency, `xor`-zero / `div` is probably best: `cdq` puts 1 extra cycle of latency before the EDX:EAX dividend is ready, but xor-zeroing is independent of the incoming EAX value. (`div` and `idiv` have equal latency on Skylake and Zen, or you could just use whichever is faster after zero-extending.) – Peter Cordes Dec 07 '20 at 00:34
  • @PeterCordes Thank you for your input on `cdq`. It seems that I can't let go easily some of my old 16-bit habits where codesize was so much important to me. I've edited the `xor edx, edx` back in. – Sep Roland Dec 07 '20 at 13:53
  • Heh, yeah x86 makes golfing for code size fun. My main objection was to combining `cdq` with `div` in an example for beginners. I'd be fine with golfing with `cdq` / `idiv`. But yeah, xor/div is probably best for consistency with other unsigned stuff. – Peter Cordes Dec 07 '20 at 14:00
  • `imul edx, 3` should be `lea edx, [edx + edx*2]`. `imul edx,7` / `add eax,edx` can be `lea eax, [eax + edx*8]` / `sub eax, edx`. – Peter Cordes Dec 07 '20 at 14:15
  • @PeterCordes I thought that these days multiplications weren't all that slow, and especially with very small numbers that offer an opportunity for an early out on the calculation. – Sep Roland Dec 07 '20 at 14:20
  • 1
    It's only a small difference, and perf isn't data-dependent: no early-out anywhere except divide. `imul reg,reg,imm` and `imul reg,reg` are 1 uop, 1/clock throughput, 3 cycle latency (Intel Sandybridge-family and AMD Zen, worse throughput and latency on Bulldozer). That `lea` is 1 cycle latency on Intel and can run on 2 execution ports, avoiding competition with anything else that can only run on port 1. (e.g. `imul` or some vector stuff on the other hyperthread... No other latency=3 integer instructions in this code.) On AMD a shifted-index makes it a "slow LEA" (2 cycle latency). – Peter Cordes Dec 07 '20 at 14:29
  • LEA is strictly better than `imul` for the `imul edx,3` case. The `imul edx,7`/`add` case ties the EDX dependency chain into EAX sooner, lengthening the loop-carried dependency chain through EAX, so `imul edx,7`/ADD could be better than LEA/SUB. – Peter Cordes Dec 07 '20 at 14:31
  • @PeterCordes You've convinced me. I'll edit it in the *should be* and comment about the *can be*. – Sep Roland Dec 07 '20 at 14:35
  • 1
    Just noticed another thing: `sub dl, 55` is no smaller than `sub edx,55` but is a performance problem on Intel P6-family: reading EDX later will cause a [partial register stall](https://stackoverflow.com/questions/41573502/why-doesnt-gcc-use-partial-registers). Avoid writing partial registers when possible, like you're avoiding with the `movzx` load. `cmp dl,55` is fine either way; reading partial regs is fine. – Peter Cordes Dec 07 '20 at 14:47
  • @PeterCordes Changed... Thank you so much for taking the time for looking at my code. – Sep Roland Dec 07 '20 at 14:53
  • Thanks for taking the time to wade through the OP's mess and turn it into something simple and easy to understand. :) – Peter Cordes Dec 07 '20 at 14:56